Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

3

4

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

7

* - July2000

7

* - July2000

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

9

*/

9

*/

10

11

/*

11

/*

12

* This handles all read/write requests to block devices

12

* This handles all read/write requests to block devices

13

*/

13

*/

14

#include <linux/kernel.h>

14

#include <linux/kernel.h>

15

#include <linux/module.h>

15

#include <linux/module.h>

16

#include <linux/backing-dev.h>

16

#include <linux/backing-dev.h>

17

#include <linux/bio.h>

17

#include <linux/bio.h>

18

#include <linux/blkdev.h>

18

#include <linux/blkdev.h>

19

#include <linux/highmem.h>

19

#include <linux/highmem.h>

20

#include <linux/mm.h>

20

#include <linux/mm.h>

21

#include <linux/kernel_stat.h>

21

#include <linux/kernel_stat.h>

22

#include <linux/string.h>

22

#include <linux/string.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/completion.h>

24

#include <linux/completion.h>

25

#include <linux/slab.h>

25

#include <linux/slab.h>

26

#include <linux/swap.h>

26

#include <linux/swap.h>

27

#include <linux/writeback.h>

27

#include <linux/writeback.h>

28

#include <linux/task_io_accounting_ops.h>

28

#include <linux/task_io_accounting_ops.h>

29

#include <linux/fault-inject.h>

29

#include <linux/fault-inject.h>

30

#include <linux/list_sort.h>

30

#include <linux/list_sort.h>

31

32

#define CREATE_TRACE_POINTS

32

#define CREATE_TRACE_POINTS

33

#include <trace/events/block.h>

33

#include <trace/events/block.h>

34

35

#include "blk.h"

35

#include "blk.h"

36

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

39

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

39

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

40

41

static int __make_request(struct request_queue *q, struct bio *bio);

41

static int __make_request(struct request_queue *q, struct bio *bio);

42

43

/*

43

/*

44

* For the allocated request tables

44

* For the allocated request tables

45

*/

45

*/

46

static struct kmem_cache *request_cachep;

46

static struct kmem_cache *request_cachep;

47

48

/*

48

/*

49

* For queue allocation

49

* For queue allocation

50

*/

50

*/

51

struct kmem_cache *blk_requestq_cachep;

51

struct kmem_cache *blk_requestq_cachep;

52

53

/*

53

/*

54

* Controlling structure to kblockd

54

* Controlling structure to kblockd

55

*/

55

*/

56

static struct workqueue_struct *kblockd_workqueue;

56

static struct workqueue_struct *kblockd_workqueue;

57

58

static void drive_stat_acct(struct request *rq, int new_io)

58

static void drive_stat_acct(struct request *rq, int new_io)

59

{

59

{

60

struct hd_struct *part;

60

struct hd_struct *part;

61

int rw = rq_data_dir(rq);

61

int rw = rq_data_dir(rq);

62

int cpu;

62

int cpu;

63

64

if (!blk_do_io_stat(rq))

64

if (!blk_do_io_stat(rq))

65

return;

65

return;

66

67

cpu = part_stat_lock();

67

cpu = part_stat_lock();

68

69

if (!new_io) {

69

if (!new_io) {

70

part = rq->part;

70

part = rq->part;

71

part_stat_inc(cpu, part, merges[rw]);

71

part_stat_inc(cpu, part, merges[rw]);

72

} else {

72

} else {

73

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

73

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

74

if (!hd_struct_try_get(part)) {

74

if (!hd_struct_try_get(part)) {

75

/*

75

/*

76

* The partition is already being removed,

76

* The partition is already being removed,

77

* the request will be accounted on the disk only

77

* the request will be accounted on the disk only

78

*

78

*

79

* We take a reference on disk->part0 although that

79

* We take a reference on disk->part0 although that

80

* partition will never be deleted, so we can treat

80

* partition will never be deleted, so we can treat

81

* it as any other partition.

81

* it as any other partition.

82

*/

82

*/

83

part = &rq->rq_disk->part0;

83

part = &rq->rq_disk->part0;

84

hd_struct_get(part);

84

hd_struct_get(part);

85

}

85

}

86

part_round_stats(cpu, part);

86

part_round_stats(cpu, part);

87

part_inc_in_flight(part, rw);

87

part_inc_in_flight(part, rw);

88

rq->part = part;

88

rq->part = part;

89

}

89

}

90

91

part_stat_unlock();

91

part_stat_unlock();

92

}

92

}

93

94

void blk_queue_congestion_threshold(struct request_queue *q)

94

void blk_queue_congestion_threshold(struct request_queue *q)

95

{

95

{

96

int nr;

96

int nr;

97

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

99

if (nr > q->nr_requests)

99

if (nr > q->nr_requests)

100

nr = q->nr_requests;

100

nr = q->nr_requests;

101

q->nr_congestion_on = nr;

101

q->nr_congestion_on = nr;

102

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

104

if (nr < 1)

104

if (nr < 1)

105

nr = 1;

105

nr = 1;

106

q->nr_congestion_off = nr;

106

q->nr_congestion_off = nr;

107

}

107

}

108

109

/**

109

/**

110

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

110

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

111

* @bdev: device

111

* @bdev: device

112

*

112

*

113

* Locates the passed device's request queue and returns the address of its

113

* Locates the passed device's request queue and returns the address of its

114

* backing_dev_info

114

* backing_dev_info

115

*

115

*

116

* Will return NULL if the request queue cannot be located.

116

* Will return NULL if the request queue cannot be located.

117

*/

117

*/

118

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

118

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

119

{

119

{

120

struct backing_dev_info *ret = NULL;

120

struct backing_dev_info *ret = NULL;

121

struct request_queue *q = bdev_get_queue(bdev);

121

struct request_queue *q = bdev_get_queue(bdev);

122

123

if (q)

123

if (q)

124

ret = &q->backing_dev_info;

124

ret = &q->backing_dev_info;

125

return ret;

125

return ret;

126

}

126

}

127

EXPORT_SYMBOL(blk_get_backing_dev_info);

127

EXPORT_SYMBOL(blk_get_backing_dev_info);

128

129

void blk_rq_init(struct request_queue *q, struct request *rq)

129

void blk_rq_init(struct request_queue *q, struct request *rq)

130

{

130

{

131

memset(rq, 0, sizeof(*rq));

131

memset(rq, 0, sizeof(*rq));

132

133

INIT_LIST_HEAD(&rq->queuelist);

133

INIT_LIST_HEAD(&rq->queuelist);

134

INIT_LIST_HEAD(&rq->timeout_list);

134

INIT_LIST_HEAD(&rq->timeout_list);

135

rq->cpu = -1;

135

rq->cpu = -1;

136

rq->q = q;

136

rq->q = q;

137

rq->__sector = (sector_t) -1;

137

rq->__sector = (sector_t) -1;

138

INIT_HLIST_NODE(&rq->hash);

138

INIT_HLIST_NODE(&rq->hash);

139

RB_CLEAR_NODE(&rq->rb_node);

139

RB_CLEAR_NODE(&rq->rb_node);

140

rq->cmd = rq->__cmd;

140

rq->cmd = rq->__cmd;

141

rq->cmd_len = BLK_MAX_CDB;

141

rq->cmd_len = BLK_MAX_CDB;

142

rq->tag = -1;

142

rq->tag = -1;

143

rq->ref_count = 1;

143

rq->ref_count = 1;

144

rq->start_time = jiffies;

144

rq->start_time = jiffies;

145

set_start_time_ns(rq);

145

set_start_time_ns(rq);

146

rq->part = NULL;

146

rq->part = NULL;

147

}

147

}

148

EXPORT_SYMBOL(blk_rq_init);

148

EXPORT_SYMBOL(blk_rq_init);

149

150

static void req_bio_endio(struct request *rq, struct bio *bio,

150

static void req_bio_endio(struct request *rq, struct bio *bio,

151

unsigned int nbytes, int error)

151

unsigned int nbytes, int error)

152

{

152

{

153

if (error)

153

if (error)

154

clear_bit(BIO_UPTODATE, &bio->bi_flags);

154

clear_bit(BIO_UPTODATE, &bio->bi_flags);

155

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

155

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

156

error = -EIO;

156

error = -EIO;

157

158

if (unlikely(nbytes > bio->bi_size)) {

158

if (unlikely(nbytes > bio->bi_size)) {

159

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

159

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

160

__func__, nbytes, bio->bi_size);

160

__func__, nbytes, bio->bi_size);

161

nbytes = bio->bi_size;

161

nbytes = bio->bi_size;

162

}

162

}

163

164

if (unlikely(rq->cmd_flags & REQ_QUIET))

164

if (unlikely(rq->cmd_flags & REQ_QUIET))

165

set_bit(BIO_QUIET, &bio->bi_flags);

165

set_bit(BIO_QUIET, &bio->bi_flags);

166

167

bio->bi_size -= nbytes;

167

bio->bi_size -= nbytes;

168

bio->bi_sector += (nbytes >> 9);

168

bio->bi_sector += (nbytes >> 9);

169

170

if (bio_integrity(bio))

170

if (bio_integrity(bio))

171

bio_integrity_advance(bio, nbytes);

171

bio_integrity_advance(bio, nbytes);

172

173

/* don't actually finish bio if it's part of flush sequence */

173

/* don't actually finish bio if it's part of flush sequence */

174

if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))

174

if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))

175

bio_endio(bio, error);

175

bio_endio(bio, error);

176

}

176

}

177

178

void blk_dump_rq_flags(struct request *rq, char *msg)

178

void blk_dump_rq_flags(struct request *rq, char *msg)

179

{

179

{

180

int bit;

180

int bit;

181

182

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

182

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

183

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

183

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

184

rq->cmd_flags);

184

rq->cmd_flags);

185

186

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

186

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

187

(unsigned long long)blk_rq_pos(rq),

187

(unsigned long long)blk_rq_pos(rq),

188

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

188

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

189

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

189

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

190

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

190

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

191

192

if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {

192

if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {

193

printk(KERN_INFO " cdb: ");

193

printk(KERN_INFO " cdb: ");

194

for (bit = 0; bit < BLK_MAX_CDB; bit++)

194

for (bit = 0; bit < BLK_MAX_CDB; bit++)

195

printk("%02x ", rq->cmd[bit]);

195

printk("%02x ", rq->cmd[bit]);

196

printk("\n");

196

printk("\n");

197

}

197

}

198

}

198

}

199

EXPORT_SYMBOL(blk_dump_rq_flags);

199

EXPORT_SYMBOL(blk_dump_rq_flags);

200

201

static void blk_delay_work(struct work_struct *work)

201

static void blk_delay_work(struct work_struct *work)

202

{

202

{

203

struct request_queue *q;

203

struct request_queue *q;

204

205

q = container_of(work, struct request_queue, delay_work.work);

205

q = container_of(work, struct request_queue, delay_work.work);

206

spin_lock_irq(q->queue_lock);

206

spin_lock_irq(q->queue_lock);

207

__blk_run_queue(q);

207

__blk_run_queue(q);

208

spin_unlock_irq(q->queue_lock);

208

spin_unlock_irq(q->queue_lock);

209

}

209

}

210

211

/**

211

/**

212

* blk_delay_queue - restart queueing after defined interval

212

* blk_delay_queue - restart queueing after defined interval

213

* @q: The &struct request_queue in question

213

* @q: The &struct request_queue in question

214

* @msecs: Delay in msecs

214

* @msecs: Delay in msecs

215

*

215

*

216

* Description:

216

* Description:

217

* Sometimes queueing needs to be postponed for a little while, to allow

217

* Sometimes queueing needs to be postponed for a little while, to allow

218

* resources to come back. This function will make sure that queueing is

218

* resources to come back. This function will make sure that queueing is

219

* restarted around the specified time.

219

* restarted around the specified time.

220

*/

220

*/

221

void blk_delay_queue(struct request_queue *q, unsigned long msecs)

221

void blk_delay_queue(struct request_queue *q, unsigned long msecs)

222

{

222

{

223

queue_delayed_work(kblockd_workqueue, &q->delay_work,

223

queue_delayed_work(kblockd_workqueue, &q->delay_work,

224

msecs_to_jiffies(msecs));

224

msecs_to_jiffies(msecs));

225

}

225

}

226

EXPORT_SYMBOL(blk_delay_queue);

226

EXPORT_SYMBOL(blk_delay_queue);

227

228

/**

228

/**

229

* blk_start_queue - restart a previously stopped queue

229

* blk_start_queue - restart a previously stopped queue

230

* @q: The &struct request_queue in question

230

* @q: The &struct request_queue in question

231

*

231

*

232

* Description:

232

* Description:

233

* blk_start_queue() will clear the stop flag on the queue, and call

233

* blk_start_queue() will clear the stop flag on the queue, and call

234

* the request_fn for the queue if it was in a stopped state when

234

* the request_fn for the queue if it was in a stopped state when

235

* entered. Also see blk_stop_queue(). Queue lock must be held.

235

* entered. Also see blk_stop_queue(). Queue lock must be held.

236

**/

236

**/

237

void blk_start_queue(struct request_queue *q)

237

void blk_start_queue(struct request_queue *q)

238

{

238

{

239

WARN_ON(!irqs_disabled());

239

WARN_ON(!irqs_disabled());

240

241

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

241

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

242

__blk_run_queue(q);

242

__blk_run_queue(q);

243

}

243

}

244

EXPORT_SYMBOL(blk_start_queue);

244

EXPORT_SYMBOL(blk_start_queue);

245

246

/**

246

/**

247

* blk_stop_queue - stop a queue

247

* blk_stop_queue - stop a queue

248

* @q: The &struct request_queue in question

248

* @q: The &struct request_queue in question

249

*

249

*

250

* Description:

250

* Description:

251

* The Linux block layer assumes that a block driver will consume all

251

* The Linux block layer assumes that a block driver will consume all

252

* entries on the request queue when the request_fn strategy is called.

252

* entries on the request queue when the request_fn strategy is called.

253

* Often this will not happen, because of hardware limitations (queue

253

* Often this will not happen, because of hardware limitations (queue

254

* depth settings). If a device driver gets a 'queue full' response,

254

* depth settings). If a device driver gets a 'queue full' response,

255

* or if it simply chooses not to queue more I/O at one point, it can

255

* or if it simply chooses not to queue more I/O at one point, it can

256

* call this function to prevent the request_fn from being called until

256

* call this function to prevent the request_fn from being called until

257

* the driver has signalled it's ready to go again. This happens by calling

257

* the driver has signalled it's ready to go again. This happens by calling

258

* blk_start_queue() to restart queue operations. Queue lock must be held.

258

* blk_start_queue() to restart queue operations. Queue lock must be held.

259

**/

259

**/

260

void blk_stop_queue(struct request_queue *q)

260

void blk_stop_queue(struct request_queue *q)

261

{

261

{

262

__cancel_delayed_work(&q->delay_work);

262

__cancel_delayed_work(&q->delay_work);

263

queue_flag_set(QUEUE_FLAG_STOPPED, q);

263

queue_flag_set(QUEUE_FLAG_STOPPED, q);

264

}

264

}

265

EXPORT_SYMBOL(blk_stop_queue);

265

EXPORT_SYMBOL(blk_stop_queue);

266

267

/**

267

/**

268

* blk_sync_queue - cancel any pending callbacks on a queue

268

* blk_sync_queue - cancel any pending callbacks on a queue

269

* @q: the queue

269

* @q: the queue

270

*

270

*

271

* Description:

271

* Description:

272

* The block layer may perform asynchronous callback activity

272

* The block layer may perform asynchronous callback activity

273

* on a queue, such as calling the unplug function after a timeout.

273

* on a queue, such as calling the unplug function after a timeout.

274

* A block device may call blk_sync_queue to ensure that any

274

* A block device may call blk_sync_queue to ensure that any

275

* such activity is cancelled, thus allowing it to release resources

275

* such activity is cancelled, thus allowing it to release resources

276

* that the callbacks might use. The caller must already have made sure

276

* that the callbacks might use. The caller must already have made sure

277

* that its ->make_request_fn will not re-add plugging prior to calling

277

* that its ->make_request_fn will not re-add plugging prior to calling

278

* this function.

278

* this function.

279

*

279

*

280

* This function does not cancel any asynchronous activity arising

280

* This function does not cancel any asynchronous activity arising

281

* out of elevator or throttling code. That would require elevaotor_exit()

281

* out of elevator or throttling code. That would require elevaotor_exit()

282

* and blk_throtl_exit() to be called with queue lock initialized.

282

* and blk_throtl_exit() to be called with queue lock initialized.

283

*

283

*

284

*/

284

*/

285

void blk_sync_queue(struct request_queue *q)

285

void blk_sync_queue(struct request_queue *q)

286

{

286

{

287

del_timer_sync(&q->timeout);

287

del_timer_sync(&q->timeout);

288

cancel_delayed_work_sync(&q->delay_work);

288

cancel_delayed_work_sync(&q->delay_work);

289

}

289

}

290

EXPORT_SYMBOL(blk_sync_queue);

290

EXPORT_SYMBOL(blk_sync_queue);

291

292

/**

292

/**

293

* __blk_run_queue - run a single device queue

293

* __blk_run_queue - run a single device queue

294

* @q: The queue to run

294

* @q: The queue to run

295

*

295

*

296

* Description:

296

* Description:

297

* See @blk_run_queue. This variant must be called with the queue lock

297

* See @blk_run_queue. This variant must be called with the queue lock

298

* held and interrupts disabled.

298

* held and interrupts disabled.

299

*/

299

*/

300

void __blk_run_queue(struct request_queue *q)

300

void __blk_run_queue(struct request_queue *q)

301

{

301

{

302

if (unlikely(blk_queue_stopped(q)))

302

if (unlikely(blk_queue_stopped(q)))

303

return;

303

return;

304

305

q->request_fn(q);

305

q->request_fn(q);

306

}

306

}

307

EXPORT_SYMBOL(__blk_run_queue);

307

EXPORT_SYMBOL(__blk_run_queue);

308

309

/**

309

/**

310

* blk_run_queue_async - run a single device queue in workqueue context

310

* blk_run_queue_async - run a single device queue in workqueue context

311

* @q: The queue to run

311

* @q: The queue to run

312

*

312

*

313

* Description:

313

* Description:

314

* Tells kblockd to perform the equivalent of @blk_run_queue on behalf

314

* Tells kblockd to perform the equivalent of @blk_run_queue on behalf

315

* of us.

315

* of us.

316

*/

316

*/

317

void blk_run_queue_async(struct request_queue *q)

317

void blk_run_queue_async(struct request_queue *q)

318

{

318

{

319

if (likely(!blk_queue_stopped(q)))

319

if (likely(!blk_queue_stopped(q))) {

320

__cancel_delayed_work(&q->delay_work);

320

queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);

321

queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);

322

}

321

}

323

}

322

EXPORT_SYMBOL(blk_run_queue_async);

324

EXPORT_SYMBOL(blk_run_queue_async);

323

325

324

/**

326

/**

325

* blk_run_queue - run a single device queue

327

* blk_run_queue - run a single device queue

326

* @q: The queue to run

328

* @q: The queue to run

327

*

329

*

328

* Description:

330

* Description:

329

* Invoke request handling on this queue, if it has pending work to do.

331

* Invoke request handling on this queue, if it has pending work to do.

330

* May be used to restart queueing when a request has completed.

332

* May be used to restart queueing when a request has completed.

331

*/

333

*/

332

void blk_run_queue(struct request_queue *q)

334

void blk_run_queue(struct request_queue *q)

333

{

335

{

334

unsigned long flags;

336

unsigned long flags;

335

337

336

spin_lock_irqsave(q->queue_lock, flags);

338

spin_lock_irqsave(q->queue_lock, flags);

337

__blk_run_queue(q);

339

__blk_run_queue(q);

338

spin_unlock_irqrestore(q->queue_lock, flags);

340

spin_unlock_irqrestore(q->queue_lock, flags);

339

}

341

}

340

EXPORT_SYMBOL(blk_run_queue);

342

EXPORT_SYMBOL(blk_run_queue);

341

343

342

void blk_put_queue(struct request_queue *q)

344

void blk_put_queue(struct request_queue *q)

343

{

345

{

344

kobject_put(&q->kobj);

346

kobject_put(&q->kobj);

345

}

347

}

346

348

347

/*

349

/*

348

* Note: If a driver supplied the queue lock, it should not zap that lock

350

* Note: If a driver supplied the queue lock, it should not zap that lock

349

* unexpectedly as some queue cleanup components like elevator_exit() and

351

* unexpectedly as some queue cleanup components like elevator_exit() and

350

* blk_throtl_exit() need queue lock.

352

* blk_throtl_exit() need queue lock.

351

*/

353

*/

352

void blk_cleanup_queue(struct request_queue *q)

354

void blk_cleanup_queue(struct request_queue *q)

353

{

355

{

354

/*

356

/*

355

* We know we have process context here, so we can be a little

357

* We know we have process context here, so we can be a little

356

* cautious and ensure that pending block actions on this device

358

* cautious and ensure that pending block actions on this device

357

* are done before moving on. Going into this function, we should

359

* are done before moving on. Going into this function, we should

358

* not have processes doing IO to this device.

360

* not have processes doing IO to this device.

359

*/

361

*/

360

blk_sync_queue(q);

362

blk_sync_queue(q);

361

363

362

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

364

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

363

mutex_lock(&q->sysfs_lock);

365

mutex_lock(&q->sysfs_lock);

364

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

366

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

365

mutex_unlock(&q->sysfs_lock);

367

mutex_unlock(&q->sysfs_lock);

366

368

367

if (q->elevator)

369

if (q->elevator)

368

elevator_exit(q->elevator);

370

elevator_exit(q->elevator);

369

371

370

blk_throtl_exit(q);

372

blk_throtl_exit(q);

371

373

372

blk_put_queue(q);

374

blk_put_queue(q);

373

}

375

}

374

EXPORT_SYMBOL(blk_cleanup_queue);

376

EXPORT_SYMBOL(blk_cleanup_queue);

375

377

376

static int blk_init_free_list(struct request_queue *q)

378

static int blk_init_free_list(struct request_queue *q)

377

{

379

{

378

struct request_list *rl = &q->rq;

380

struct request_list *rl = &q->rq;

379

381

380

if (unlikely(rl->rq_pool))

382

if (unlikely(rl->rq_pool))

381

return 0;

383

return 0;

382

384

383

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

385

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

384

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

386

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

385

rl->elvpriv = 0;

387

rl->elvpriv = 0;

386

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

388

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

387

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

389

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

388

390

389

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

391

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

390

mempool_free_slab, request_cachep, q->node);

392

mempool_free_slab, request_cachep, q->node);

391

393

392

if (!rl->rq_pool)

394

if (!rl->rq_pool)

393

return -ENOMEM;

395

return -ENOMEM;

394

396

395

return 0;

397

return 0;

396

}

398

}

397

399

398

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

400

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

399

{

401

{

400

return blk_alloc_queue_node(gfp_mask, -1);

402

return blk_alloc_queue_node(gfp_mask, -1);

401

}

403

}

402

EXPORT_SYMBOL(blk_alloc_queue);

404

EXPORT_SYMBOL(blk_alloc_queue);

403

405

404

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

406

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

405

{

407

{

406

struct request_queue *q;

408

struct request_queue *q;

407

int err;

409

int err;

408

410

409

q = kmem_cache_alloc_node(blk_requestq_cachep,

411

q = kmem_cache_alloc_node(blk_requestq_cachep,

410

gfp_mask | __GFP_ZERO, node_id);

412

gfp_mask | __GFP_ZERO, node_id);

411

if (!q)

413

if (!q)

412

return NULL;

414

return NULL;

413

415

414

q->backing_dev_info.ra_pages =

416

q->backing_dev_info.ra_pages =

415

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

417

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

416

q->backing_dev_info.state = 0;

418

q->backing_dev_info.state = 0;

417

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

419

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

418

q->backing_dev_info.name = "block";

420

q->backing_dev_info.name = "block";

419

421

420

err = bdi_init(&q->backing_dev_info);

422

err = bdi_init(&q->backing_dev_info);

421

if (err) {

423

if (err) {

422

kmem_cache_free(blk_requestq_cachep, q);

424

kmem_cache_free(blk_requestq_cachep, q);

423

return NULL;

425

return NULL;

424

}

426

}

425

427

426

if (blk_throtl_init(q)) {

428

if (blk_throtl_init(q)) {

427

kmem_cache_free(blk_requestq_cachep, q);

429

kmem_cache_free(blk_requestq_cachep, q);

428

return NULL;

430

return NULL;

429

}

431

}

430

432

431

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

433

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

432

laptop_mode_timer_fn, (unsigned long) q);

434

laptop_mode_timer_fn, (unsigned long) q);

433

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

435

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

434

INIT_LIST_HEAD(&q->timeout_list);

436

INIT_LIST_HEAD(&q->timeout_list);

435

INIT_LIST_HEAD(&q->flush_queue[0]);

437

INIT_LIST_HEAD(&q->flush_queue[0]);

436

INIT_LIST_HEAD(&q->flush_queue[1]);

438

INIT_LIST_HEAD(&q->flush_queue[1]);

437

INIT_LIST_HEAD(&q->flush_data_in_flight);

439

INIT_LIST_HEAD(&q->flush_data_in_flight);

438

INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);

440

INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);

439

441

440

kobject_init(&q->kobj, &blk_queue_ktype);

442

kobject_init(&q->kobj, &blk_queue_ktype);

441

443

442

mutex_init(&q->sysfs_lock);

444

mutex_init(&q->sysfs_lock);

443

spin_lock_init(&q->__queue_lock);

445

spin_lock_init(&q->__queue_lock);

444

446

445

/*

447

/*

446

* By default initialize queue_lock to internal lock and driver can

448

* By default initialize queue_lock to internal lock and driver can

447

* override it later if need be.

449

* override it later if need be.

448

*/

450

*/

449

q->queue_lock = &q->__queue_lock;

451

q->queue_lock = &q->__queue_lock;

450

452

451

return q;

453

return q;

452

}

454

}

453

EXPORT_SYMBOL(blk_alloc_queue_node);

455

EXPORT_SYMBOL(blk_alloc_queue_node);

454

456

455

/**

457

/**

456

* blk_init_queue - prepare a request queue for use with a block device

458

* blk_init_queue - prepare a request queue for use with a block device

457

* @rfn: The function to be called to process requests that have been

459

* @rfn: The function to be called to process requests that have been

458

* placed on the queue.

460

* placed on the queue.

459

* @lock: Request queue spin lock

461

* @lock: Request queue spin lock

460

*

462

*

461

* Description:

463

* Description:

462

* If a block device wishes to use the standard request handling procedures,

464

* If a block device wishes to use the standard request handling procedures,

463

* which sorts requests and coalesces adjacent requests, then it must

465

* which sorts requests and coalesces adjacent requests, then it must

464

* call blk_init_queue(). The function @rfn will be called when there

466

* call blk_init_queue(). The function @rfn will be called when there

465

* are requests on the queue that need to be processed. If the device

467

* are requests on the queue that need to be processed. If the device

466

* supports plugging, then @rfn may not be called immediately when requests

468

* supports plugging, then @rfn may not be called immediately when requests

467

* are available on the queue, but may be called at some time later instead.

469

* are available on the queue, but may be called at some time later instead.

468

* Plugged queues are generally unplugged when a buffer belonging to one

470

* Plugged queues are generally unplugged when a buffer belonging to one

469

* of the requests on the queue is needed, or due to memory pressure.

471

* of the requests on the queue is needed, or due to memory pressure.

470

*

472

*

471

* @rfn is not required, or even expected, to remove all requests off the

473

* @rfn is not required, or even expected, to remove all requests off the

472

* queue, but only as many as it can handle at a time. If it does leave

474

* queue, but only as many as it can handle at a time. If it does leave

473

* requests on the queue, it is responsible for arranging that the requests

475

* requests on the queue, it is responsible for arranging that the requests

474

* get dealt with eventually.

476

* get dealt with eventually.

475

*

477

*

476

* The queue spin lock must be held while manipulating the requests on the

478

* The queue spin lock must be held while manipulating the requests on the

477

* request queue; this lock will be taken also from interrupt context, so irq

479

* request queue; this lock will be taken also from interrupt context, so irq

478

* disabling is needed for it.

480

* disabling is needed for it.

479

*

481

*

480

* Function returns a pointer to the initialized request queue, or %NULL if

482

* Function returns a pointer to the initialized request queue, or %NULL if

481

* it didn't succeed.

483

* it didn't succeed.

482

*

484

*

483

* Note:

485

* Note:

484

* blk_init_queue() must be paired with a blk_cleanup_queue() call

486

* blk_init_queue() must be paired with a blk_cleanup_queue() call

485

* when the block device is deactivated (such as at module unload).

487

* when the block device is deactivated (such as at module unload).

486

**/

488

**/

487

489

488

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

490

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

489

{

491

{

490

return blk_init_queue_node(rfn, lock, -1);

492

return blk_init_queue_node(rfn, lock, -1);

491

}

493

}

492

EXPORT_SYMBOL(blk_init_queue);

494

EXPORT_SYMBOL(blk_init_queue);

493

495

494

struct request_queue *

496

struct request_queue *

495

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

497

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

496

{

498

{

497

struct request_queue *uninit_q, *q;

499

struct request_queue *uninit_q, *q;

498

500

499

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

501

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

500

if (!uninit_q)

502

if (!uninit_q)

501

return NULL;

503

return NULL;

502

504

503

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

505

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

504

if (!q)

506

if (!q)

505

blk_cleanup_queue(uninit_q);

507

blk_cleanup_queue(uninit_q);

506

508

507

return q;

509

return q;

508

}

510

}

509

EXPORT_SYMBOL(blk_init_queue_node);

511

EXPORT_SYMBOL(blk_init_queue_node);

510

512

511

struct request_queue *

513

struct request_queue *

512

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

514

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

513

spinlock_t *lock)

515

spinlock_t *lock)

514

{

516

{

515

return blk_init_allocated_queue_node(q, rfn, lock, -1);

517

return blk_init_allocated_queue_node(q, rfn, lock, -1);

516

}

518

}

517

EXPORT_SYMBOL(blk_init_allocated_queue);

519

EXPORT_SYMBOL(blk_init_allocated_queue);

518

520

519

struct request_queue *

521

struct request_queue *

520

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

522

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

521

spinlock_t *lock, int node_id)

523

spinlock_t *lock, int node_id)

522

{

524

{

523

if (!q)

525

if (!q)

524

return NULL;

526

return NULL;

525

527

526

q->node = node_id;

528

q->node = node_id;

527

if (blk_init_free_list(q))

529

if (blk_init_free_list(q))

528

return NULL;

530

return NULL;

529

531

530

q->request_fn = rfn;

532

q->request_fn = rfn;

531

q->prep_rq_fn = NULL;

533

q->prep_rq_fn = NULL;

532

q->unprep_rq_fn = NULL;

534

q->unprep_rq_fn = NULL;

533

q->queue_flags = QUEUE_FLAG_DEFAULT;

535

q->queue_flags = QUEUE_FLAG_DEFAULT;

534

536

535

/* Override internal queue lock with supplied lock pointer */

537

/* Override internal queue lock with supplied lock pointer */

536

if (lock)

538

if (lock)

537

q->queue_lock = lock;

539

q->queue_lock = lock;

538

540

539

/*

541

/*

540

* This also sets hw/phys segments, boundary and size

542

* This also sets hw/phys segments, boundary and size

541

*/

543

*/

542

blk_queue_make_request(q, __make_request);

544

blk_queue_make_request(q, __make_request);

543

545

544

q->sg_reserved_size = INT_MAX;

546

q->sg_reserved_size = INT_MAX;

545

547

546

/*

548

/*

547

* all done

549

* all done

548

*/

550

*/

549

if (!elevator_init(q, NULL)) {

551

if (!elevator_init(q, NULL)) {

550

blk_queue_congestion_threshold(q);

552

blk_queue_congestion_threshold(q);

551

return q;

553

return q;

552

}

554

}

553

555

554

return NULL;

556

return NULL;

555

}

557

}

556

EXPORT_SYMBOL(blk_init_allocated_queue_node);

558

EXPORT_SYMBOL(blk_init_allocated_queue_node);

557

559

558

int blk_get_queue(struct request_queue *q)

560

int blk_get_queue(struct request_queue *q)

559

{

561

{

560

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

562

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

561

kobject_get(&q->kobj);

563

kobject_get(&q->kobj);

562

return 0;

564

return 0;

563

}

565

}

564

566

565

return 1;

567

return 1;

566

}

568

}

567

569

568

static inline void blk_free_request(struct request_queue *q, struct request *rq)

570

static inline void blk_free_request(struct request_queue *q, struct request *rq)

569

{

571

{

570

BUG_ON(rq->cmd_flags & REQ_ON_PLUG);

572

BUG_ON(rq->cmd_flags & REQ_ON_PLUG);

571

573

572

if (rq->cmd_flags & REQ_ELVPRIV)

574

if (rq->cmd_flags & REQ_ELVPRIV)

573

elv_put_request(q, rq);

575

elv_put_request(q, rq);

574

mempool_free(rq, q->rq.rq_pool);

576

mempool_free(rq, q->rq.rq_pool);

575

}

577

}

576

578

577

static struct request *

579

static struct request *

578

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

580

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

579

{

581

{

580

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

582

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

581

583

582

if (!rq)

584

if (!rq)

583

return NULL;

585

return NULL;

584

586

585

blk_rq_init(q, rq);

587

blk_rq_init(q, rq);

586

588

587

rq->cmd_flags = flags | REQ_ALLOCED;

589

rq->cmd_flags = flags | REQ_ALLOCED;

588

590

589

if (priv) {

591

if (priv) {

590

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

592

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

591

mempool_free(rq, q->rq.rq_pool);

593

mempool_free(rq, q->rq.rq_pool);

592

return NULL;

594

return NULL;

593

}

595

}

594

rq->cmd_flags |= REQ_ELVPRIV;

596

rq->cmd_flags |= REQ_ELVPRIV;

595

}

597

}

596

598

597

return rq;

599

return rq;

598

}

600

}

599

601

600

/*

602

/*

601

* ioc_batching returns true if the ioc is a valid batching request and

603

* ioc_batching returns true if the ioc is a valid batching request and

602

* should be given priority access to a request.

604

* should be given priority access to a request.

603

*/

605

*/

604

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

606

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

605

{

607

{

606

if (!ioc)

608

if (!ioc)

607

return 0;

609

return 0;

608

610

609

/*

611

/*

610

* Make sure the process is able to allocate at least 1 request

612

* Make sure the process is able to allocate at least 1 request

611

* even if the batch times out, otherwise we could theoretically

613

* even if the batch times out, otherwise we could theoretically

612

* lose wakeups.

614

* lose wakeups.

613

*/

615

*/

614

return ioc->nr_batch_requests == q->nr_batching ||

616

return ioc->nr_batch_requests == q->nr_batching ||

615

(ioc->nr_batch_requests > 0

617

(ioc->nr_batch_requests > 0

616

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

618

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

617

}

619

}

618

620

619

/*

621

/*

620

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

622

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

621

* will cause the process to be a "batcher" on all queues in the system. This

623

* will cause the process to be a "batcher" on all queues in the system. This

622

* is the behaviour we want though - once it gets a wakeup it should be given

624

* is the behaviour we want though - once it gets a wakeup it should be given

623

* a nice run.

625

* a nice run.

624

*/

626

*/

625

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

627

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

626

{

628

{

627

if (!ioc || ioc_batching(q, ioc))

629

if (!ioc || ioc_batching(q, ioc))

628

return;

630

return;

629

631

630

ioc->nr_batch_requests = q->nr_batching;

632

ioc->nr_batch_requests = q->nr_batching;

631

ioc->last_waited = jiffies;

633

ioc->last_waited = jiffies;

632

}

634

}

633

635

634

static void __freed_request(struct request_queue *q, int sync)

636

static void __freed_request(struct request_queue *q, int sync)

635

{

637

{

636

struct request_list *rl = &q->rq;

638

struct request_list *rl = &q->rq;

637

639

638

if (rl->count[sync] < queue_congestion_off_threshold(q))

640

if (rl->count[sync] < queue_congestion_off_threshold(q))

639

blk_clear_queue_congested(q, sync);

641

blk_clear_queue_congested(q, sync);

640

642

641

if (rl->count[sync] + 1 <= q->nr_requests) {

643

if (rl->count[sync] + 1 <= q->nr_requests) {

642

if (waitqueue_active(&rl->wait[sync]))

644

if (waitqueue_active(&rl->wait[sync]))

643

wake_up(&rl->wait[sync]);

645

wake_up(&rl->wait[sync]);

644

646

645

blk_clear_queue_full(q, sync);

647

blk_clear_queue_full(q, sync);

646

}

648

}

647

}

649

}

648

650

649

/*

651

/*

650

* A request has just been released. Account for it, update the full and

652

* A request has just been released. Account for it, update the full and

651

* congestion status, wake up any waiters. Called under q->queue_lock.

653

* congestion status, wake up any waiters. Called under q->queue_lock.

652

*/

654

*/

653

static void freed_request(struct request_queue *q, int sync, int priv)

655

static void freed_request(struct request_queue *q, int sync, int priv)

654

{

656

{

655

struct request_list *rl = &q->rq;

657

struct request_list *rl = &q->rq;

656

658

657

rl->count[sync]--;

659

rl->count[sync]--;

658

if (priv)

660

if (priv)

659

rl->elvpriv--;

661

rl->elvpriv--;

660

662

661

__freed_request(q, sync);

663

__freed_request(q, sync);

662

664

663

if (unlikely(rl->starved[sync ^ 1]))

665

if (unlikely(rl->starved[sync ^ 1]))

664

__freed_request(q, sync ^ 1);

666

__freed_request(q, sync ^ 1);

665

}

667

}

666

668

667

/*

669

/*

668

* Determine if elevator data should be initialized when allocating the

670

* Determine if elevator data should be initialized when allocating the

669

* request associated with @bio.

671

* request associated with @bio.

670

*/

672

*/

671

static bool blk_rq_should_init_elevator(struct bio *bio)

673

static bool blk_rq_should_init_elevator(struct bio *bio)

672

{

674

{

673

if (!bio)

675

if (!bio)

674

return true;

676

return true;

675

677

676

/*

678

/*

677

* Flush requests do not use the elevator so skip initialization.

679

* Flush requests do not use the elevator so skip initialization.

678

* This allows a request to share the flush and elevator data.

680

* This allows a request to share the flush and elevator data.

679

*/

681

*/

680

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))

682

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))

681

return false;

683

return false;

682

684

683

return true;

685

return true;

684

}

686

}

685

687

686

/*

688

/*

687

* Get a free request, queue_lock must be held.

689

* Get a free request, queue_lock must be held.

688

* Returns NULL on failure, with queue_lock held.

690

* Returns NULL on failure, with queue_lock held.

689

* Returns !NULL on success, with queue_lock *not held*.

691

* Returns !NULL on success, with queue_lock *not held*.

690

*/

692

*/

691

static struct request *get_request(struct request_queue *q, int rw_flags,

693

static struct request *get_request(struct request_queue *q, int rw_flags,

692

struct bio *bio, gfp_t gfp_mask)

694

struct bio *bio, gfp_t gfp_mask)

693

{

695

{

694

struct request *rq = NULL;

696

struct request *rq = NULL;

695

struct request_list *rl = &q->rq;

697

struct request_list *rl = &q->rq;

696

struct io_context *ioc = NULL;

698

struct io_context *ioc = NULL;

697

const bool is_sync = rw_is_sync(rw_flags) != 0;

699

const bool is_sync = rw_is_sync(rw_flags) != 0;

698

int may_queue, priv = 0;

700

int may_queue, priv = 0;

699

701

700

may_queue = elv_may_queue(q, rw_flags);

702

may_queue = elv_may_queue(q, rw_flags);

701

if (may_queue == ELV_MQUEUE_NO)

703

if (may_queue == ELV_MQUEUE_NO)

702

goto rq_starved;

704

goto rq_starved;

703

705

704

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

706

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

705

if (rl->count[is_sync]+1 >= q->nr_requests) {

707

if (rl->count[is_sync]+1 >= q->nr_requests) {

706

ioc = current_io_context(GFP_ATOMIC, q->node);

708

ioc = current_io_context(GFP_ATOMIC, q->node);

707

/*

709

/*

708

* The queue will fill after this allocation, so set

710

* The queue will fill after this allocation, so set

709

* it as full, and mark this process as "batching".

711

* it as full, and mark this process as "batching".

710

* This process will be allowed to complete a batch of

712

* This process will be allowed to complete a batch of

711

* requests, others will be blocked.

713

* requests, others will be blocked.

712

*/

714

*/

713

if (!blk_queue_full(q, is_sync)) {

715

if (!blk_queue_full(q, is_sync)) {

714

ioc_set_batching(q, ioc);

716

ioc_set_batching(q, ioc);

715

blk_set_queue_full(q, is_sync);

717

blk_set_queue_full(q, is_sync);

716

} else {

718

} else {

717

if (may_queue != ELV_MQUEUE_MUST

719

if (may_queue != ELV_MQUEUE_MUST

718

&& !ioc_batching(q, ioc)) {

720

&& !ioc_batching(q, ioc)) {

719

/*

721

/*

720

* The queue is full and the allocating

722

* The queue is full and the allocating

721

* process is not a "batcher", and not

723

* process is not a "batcher", and not

722

* exempted by the IO scheduler

724

* exempted by the IO scheduler

723

*/

725

*/

724

goto out;

726

goto out;

725

}

727

}

726

}

728

}

727

}

729

}

728

blk_set_queue_congested(q, is_sync);

730

blk_set_queue_congested(q, is_sync);

729

}

731

}

730

732

731

/*

733

/*

732

* Only allow batching queuers to allocate up to 50% over the defined

734

* Only allow batching queuers to allocate up to 50% over the defined

733

* limit of requests, otherwise we could have thousands of requests

735

* limit of requests, otherwise we could have thousands of requests

734

* allocated with any setting of ->nr_requests

736

* allocated with any setting of ->nr_requests

735

*/

737

*/

736

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

738

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

737

goto out;

739

goto out;

738

740

739

rl->count[is_sync]++;

741

rl->count[is_sync]++;

740

rl->starved[is_sync] = 0;

742

rl->starved[is_sync] = 0;

741

743

742

if (blk_rq_should_init_elevator(bio)) {

744

if (blk_rq_should_init_elevator(bio)) {

743

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

745

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

744

if (priv)

746

if (priv)

745

rl->elvpriv++;

747

rl->elvpriv++;

746

}

748

}

747

749

748

if (blk_queue_io_stat(q))

750

if (blk_queue_io_stat(q))

749

rw_flags |= REQ_IO_STAT;

751

rw_flags |= REQ_IO_STAT;

750

spin_unlock_irq(q->queue_lock);

752

spin_unlock_irq(q->queue_lock);

751

753

752

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

754

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

753

if (unlikely(!rq)) {

755

if (unlikely(!rq)) {

754

/*

756

/*

755

* Allocation failed presumably due to memory. Undo anything

757

* Allocation failed presumably due to memory. Undo anything

756

* we might have messed up.

758

* we might have messed up.

757

*

759

*

758

* Allocating task should really be put onto the front of the

760

* Allocating task should really be put onto the front of the

759

* wait queue, but this is pretty rare.

761

* wait queue, but this is pretty rare.

760

*/

762

*/

761

spin_lock_irq(q->queue_lock);

763

spin_lock_irq(q->queue_lock);

762

freed_request(q, is_sync, priv);

764

freed_request(q, is_sync, priv);

763

765

764

/*

766

/*

765

* in the very unlikely event that allocation failed and no

767

* in the very unlikely event that allocation failed and no

766

* requests for this direction was pending, mark us starved

768

* requests for this direction was pending, mark us starved

767

* so that freeing of a request in the other direction will

769

* so that freeing of a request in the other direction will

768

* notice us. another possible fix would be to split the

770

* notice us. another possible fix would be to split the

769

* rq mempool into READ and WRITE

771

* rq mempool into READ and WRITE

770

*/

772

*/

771

rq_starved:

773

rq_starved:

772

if (unlikely(rl->count[is_sync] == 0))

774

if (unlikely(rl->count[is_sync] == 0))

773

rl->starved[is_sync] = 1;

775

rl->starved[is_sync] = 1;

774

776

775

goto out;

777

goto out;

776

}

778

}

777

779

778

/*

780

/*

779

* ioc may be NULL here, and ioc_batching will be false. That's

781

* ioc may be NULL here, and ioc_batching will be false. That's

780

* OK, if the queue is under the request limit then requests need

782

* OK, if the queue is under the request limit then requests need

781

* not count toward the nr_batch_requests limit. There will always

783

* not count toward the nr_batch_requests limit. There will always

782

* be some limit enforced by BLK_BATCH_TIME.

784

* be some limit enforced by BLK_BATCH_TIME.

783

*/

785

*/

784

if (ioc_batching(q, ioc))

786

if (ioc_batching(q, ioc))

785

ioc->nr_batch_requests--;

787

ioc->nr_batch_requests--;

786

788

787

trace_block_getrq(q, bio, rw_flags & 1);

789

trace_block_getrq(q, bio, rw_flags & 1);

788

out:

790

out:

789

return rq;

791

return rq;

790

}

792

}

791

793

792

/*

794

/*

793

* No available requests for this queue, wait for some requests to become

795

* No available requests for this queue, wait for some requests to become

794

* available.

796

* available.

795

*

797

*

796

* Called with q->queue_lock held, and returns with it unlocked.

798

* Called with q->queue_lock held, and returns with it unlocked.

797

*/

799

*/

798

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

800

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

799

struct bio *bio)

801

struct bio *bio)

800

{

802

{

801

const bool is_sync = rw_is_sync(rw_flags) != 0;

803

const bool is_sync = rw_is_sync(rw_flags) != 0;

802

struct request *rq;

804

struct request *rq;

803

805

804

rq = get_request(q, rw_flags, bio, GFP_NOIO);

806

rq = get_request(q, rw_flags, bio, GFP_NOIO);

805

while (!rq) {

807

while (!rq) {

806

DEFINE_WAIT(wait);

808

DEFINE_WAIT(wait);

807

struct io_context *ioc;

809

struct io_context *ioc;

808

struct request_list *rl = &q->rq;

810

struct request_list *rl = &q->rq;

809

811

810

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

812

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

811

TASK_UNINTERRUPTIBLE);

813

TASK_UNINTERRUPTIBLE);

812

814

813

trace_block_sleeprq(q, bio, rw_flags & 1);

815

trace_block_sleeprq(q, bio, rw_flags & 1);

814

816

815

spin_unlock_irq(q->queue_lock);

817

spin_unlock_irq(q->queue_lock);

816

io_schedule();

818

io_schedule();

817

819

818

/*

820

/*

819

* After sleeping, we become a "batching" process and

821

* After sleeping, we become a "batching" process and

820

* will be able to allocate at least one request, and

822

* will be able to allocate at least one request, and

821

* up to a big batch of them for a small period time.

823

* up to a big batch of them for a small period time.

822

* See ioc_batching, ioc_set_batching

824

* See ioc_batching, ioc_set_batching

823

*/

825

*/

824

ioc = current_io_context(GFP_NOIO, q->node);

826

ioc = current_io_context(GFP_NOIO, q->node);

825

ioc_set_batching(q, ioc);

827

ioc_set_batching(q, ioc);

826

828

827

spin_lock_irq(q->queue_lock);

829

spin_lock_irq(q->queue_lock);

828

finish_wait(&rl->wait[is_sync], &wait);

830

finish_wait(&rl->wait[is_sync], &wait);

829

831

830

rq = get_request(q, rw_flags, bio, GFP_NOIO);

832

rq = get_request(q, rw_flags, bio, GFP_NOIO);

831

};

833

};

832

834

833

return rq;

835

return rq;

834

}

836

}

835

837

836

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

838

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

837

{

839

{

838

struct request *rq;

840

struct request *rq;

839

841

840

BUG_ON(rw != READ && rw != WRITE);

842

BUG_ON(rw != READ && rw != WRITE);

841

843

842

spin_lock_irq(q->queue_lock);

844

spin_lock_irq(q->queue_lock);

843

if (gfp_mask & __GFP_WAIT) {

845

if (gfp_mask & __GFP_WAIT) {

844

rq = get_request_wait(q, rw, NULL);

846

rq = get_request_wait(q, rw, NULL);

845

} else {

847

} else {

846

rq = get_request(q, rw, NULL, gfp_mask);

848

rq = get_request(q, rw, NULL, gfp_mask);

847

if (!rq)

849

if (!rq)

848

spin_unlock_irq(q->queue_lock);

850

spin_unlock_irq(q->queue_lock);

849

}

851

}

850

/* q->queue_lock is unlocked at this point */

852

/* q->queue_lock is unlocked at this point */

851

853

852

return rq;

854

return rq;

853

}

855

}

854

EXPORT_SYMBOL(blk_get_request);

856

EXPORT_SYMBOL(blk_get_request);

855

857

856

/**

858

/**

857

* blk_make_request - given a bio, allocate a corresponding struct request.

859

* blk_make_request - given a bio, allocate a corresponding struct request.

858

* @q: target request queue

860

* @q: target request queue

859

* @bio: The bio describing the memory mappings that will be submitted for IO.

861

* @bio: The bio describing the memory mappings that will be submitted for IO.

860

* It may be a chained-bio properly constructed by block/bio layer.

862

* It may be a chained-bio properly constructed by block/bio layer.

861

* @gfp_mask: gfp flags to be used for memory allocation

863

* @gfp_mask: gfp flags to be used for memory allocation

862

*

864

*

863

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

865

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

864

* type commands. Where the struct request needs to be farther initialized by

866

* type commands. Where the struct request needs to be farther initialized by

865

* the caller. It is passed a &struct bio, which describes the memory info of

867

* the caller. It is passed a &struct bio, which describes the memory info of

866

* the I/O transfer.

868

* the I/O transfer.

867

*

869

*

868

* The caller of blk_make_request must make sure that bi_io_vec

870

* The caller of blk_make_request must make sure that bi_io_vec

869

* are set to describe the memory buffers. That bio_data_dir() will return

871

* are set to describe the memory buffers. That bio_data_dir() will return

870

* the needed direction of the request. (And all bio's in the passed bio-chain

872

* the needed direction of the request. (And all bio's in the passed bio-chain

871

* are properly set accordingly)

873

* are properly set accordingly)

872

*

874

*

873

* If called under none-sleepable conditions, mapped bio buffers must not

875

* If called under none-sleepable conditions, mapped bio buffers must not

874

* need bouncing, by calling the appropriate masked or flagged allocator,

876

* need bouncing, by calling the appropriate masked or flagged allocator,

875

* suitable for the target device. Otherwise the call to blk_queue_bounce will

877

* suitable for the target device. Otherwise the call to blk_queue_bounce will

876

* BUG.

878

* BUG.

877

*

879

*

878

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

880

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

879

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

881

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

880

* anything but the first bio in the chain. Otherwise you risk waiting for IO

882

* anything but the first bio in the chain. Otherwise you risk waiting for IO

881

* completion of a bio that hasn't been submitted yet, thus resulting in a

883

* completion of a bio that hasn't been submitted yet, thus resulting in a

882

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

884

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

883

* of bio_alloc(), as that avoids the mempool deadlock.

885

* of bio_alloc(), as that avoids the mempool deadlock.

884

* If possible a big IO should be split into smaller parts when allocation

886

* If possible a big IO should be split into smaller parts when allocation

885

* fails. Partial allocation should not be an error, or you risk a live-lock.

887

* fails. Partial allocation should not be an error, or you risk a live-lock.

886

*/

888

*/

887

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

889

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

888

gfp_t gfp_mask)

890

gfp_t gfp_mask)

889

{

891

{

890

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

892

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

891

893

892

if (unlikely(!rq))

894

if (unlikely(!rq))

893

return ERR_PTR(-ENOMEM);

895

return ERR_PTR(-ENOMEM);

894

896

895

for_each_bio(bio) {

897

for_each_bio(bio) {

896

struct bio *bounce_bio = bio;

898

struct bio *bounce_bio = bio;

897

int ret;

899

int ret;

898

900

899

blk_queue_bounce(q, &bounce_bio);

901

blk_queue_bounce(q, &bounce_bio);

900

ret = blk_rq_append_bio(q, rq, bounce_bio);

902

ret = blk_rq_append_bio(q, rq, bounce_bio);

901

if (unlikely(ret)) {

903

if (unlikely(ret)) {

902

blk_put_request(rq);

904

blk_put_request(rq);

903

return ERR_PTR(ret);

905

return ERR_PTR(ret);

904

}

906

}

905

}

907

}

906

908

907

return rq;

909

return rq;

908

}

910

}

909

EXPORT_SYMBOL(blk_make_request);

911

EXPORT_SYMBOL(blk_make_request);

910

912

911

/**

913

/**

912

* blk_requeue_request - put a request back on queue

914

* blk_requeue_request - put a request back on queue

913

* @q: request queue where request should be inserted

915

* @q: request queue where request should be inserted

914

* @rq: request to be inserted

916

* @rq: request to be inserted

915

*

917

*

916

* Description:

918

* Description:

917

* Drivers often keep queueing requests until the hardware cannot accept

919

* Drivers often keep queueing requests until the hardware cannot accept

918

* more, when that condition happens we need to put the request back

920

* more, when that condition happens we need to put the request back

919

* on the queue. Must be called with queue lock held.

921

* on the queue. Must be called with queue lock held.

920

*/

922

*/

921

void blk_requeue_request(struct request_queue *q, struct request *rq)

923

void blk_requeue_request(struct request_queue *q, struct request *rq)

922

{

924

{

923

blk_delete_timer(rq);

925

blk_delete_timer(rq);

924

blk_clear_rq_complete(rq);

926

blk_clear_rq_complete(rq);

925

trace_block_rq_requeue(q, rq);

927

trace_block_rq_requeue(q, rq);

926

928

927

if (blk_rq_tagged(rq))

929

if (blk_rq_tagged(rq))

928

blk_queue_end_tag(q, rq);

930

blk_queue_end_tag(q, rq);

929

931

930

BUG_ON(blk_queued_rq(rq));

932

BUG_ON(blk_queued_rq(rq));

931

933

932

elv_requeue_request(q, rq);

934

elv_requeue_request(q, rq);

933

}

935

}

934

EXPORT_SYMBOL(blk_requeue_request);

936

EXPORT_SYMBOL(blk_requeue_request);

935

937

936

static void add_acct_request(struct request_queue *q, struct request *rq,

938

static void add_acct_request(struct request_queue *q, struct request *rq,

937

int where)

939

int where)

938

{

940

{

939

drive_stat_acct(rq, 1);

941

drive_stat_acct(rq, 1);

940

__elv_add_request(q, rq, where);

942

__elv_add_request(q, rq, where);

941

}

943

}

942

944

943

/**

945

/**

944

* blk_insert_request - insert a special request into a request queue

946

* blk_insert_request - insert a special request into a request queue

945

* @q: request queue where request should be inserted

947

* @q: request queue where request should be inserted

946

* @rq: request to be inserted

948

* @rq: request to be inserted

947

* @at_head: insert request at head or tail of queue

949

* @at_head: insert request at head or tail of queue

948

* @data: private data

950

* @data: private data

949

*

951

*

950

* Description:

952

* Description:

951

* Many block devices need to execute commands asynchronously, so they don't

953

* Many block devices need to execute commands asynchronously, so they don't

952

* block the whole kernel from preemption during request execution. This is

954

* block the whole kernel from preemption during request execution. This is

953

* accomplished normally by inserting aritficial requests tagged as

955

* accomplished normally by inserting aritficial requests tagged as

954

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

956

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

955

* be scheduled for actual execution by the request queue.

957

* be scheduled for actual execution by the request queue.

956

*

958

*

957

* We have the option of inserting the head or the tail of the queue.

959

* We have the option of inserting the head or the tail of the queue.

958

* Typically we use the tail for new ioctls and so forth. We use the head

960

* Typically we use the tail for new ioctls and so forth. We use the head

959

* of the queue for things like a QUEUE_FULL message from a device, or a

961

* of the queue for things like a QUEUE_FULL message from a device, or a

960

* host that is unable to accept a particular command.

962

* host that is unable to accept a particular command.

961

*/

963

*/

962

void blk_insert_request(struct request_queue *q, struct request *rq,

964

void blk_insert_request(struct request_queue *q, struct request *rq,

963

int at_head, void *data)

965

int at_head, void *data)

964

{

966

{

965

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

967

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

966

unsigned long flags;

968

unsigned long flags;

967

969

968

/*

970

/*

969

* tell I/O scheduler that this isn't a regular read/write (ie it

971

* tell I/O scheduler that this isn't a regular read/write (ie it

970

* must not attempt merges on this) and that it acts as a soft

972

* must not attempt merges on this) and that it acts as a soft

971

* barrier

973

* barrier

972

*/

974

*/

973

rq->cmd_type = REQ_TYPE_SPECIAL;

975

rq->cmd_type = REQ_TYPE_SPECIAL;

974

976

975

rq->special = data;

977

rq->special = data;

976

978

977

spin_lock_irqsave(q->queue_lock, flags);

979

spin_lock_irqsave(q->queue_lock, flags);

978

980

979

/*

981

/*

980

* If command is tagged, release the tag

982

* If command is tagged, release the tag

981

*/

983

*/

982

if (blk_rq_tagged(rq))

984

if (blk_rq_tagged(rq))

983

blk_queue_end_tag(q, rq);

985

blk_queue_end_tag(q, rq);

984

986

985

add_acct_request(q, rq, where);

987

add_acct_request(q, rq, where);

986

__blk_run_queue(q);

988

__blk_run_queue(q);

987

spin_unlock_irqrestore(q->queue_lock, flags);

989

spin_unlock_irqrestore(q->queue_lock, flags);

988

}

990

}

989

EXPORT_SYMBOL(blk_insert_request);

991

EXPORT_SYMBOL(blk_insert_request);

990

992

991

static void part_round_stats_single(int cpu, struct hd_struct *part,

993

static void part_round_stats_single(int cpu, struct hd_struct *part,

992

unsigned long now)

994

unsigned long now)

993

{

995

{

994

if (now == part->stamp)

996

if (now == part->stamp)

995

return;

997

return;

996

998

997

if (part_in_flight(part)) {

999

if (part_in_flight(part)) {

998

__part_stat_add(cpu, part, time_in_queue,

1000

__part_stat_add(cpu, part, time_in_queue,

999

part_in_flight(part) * (now - part->stamp));

1001

part_in_flight(part) * (now - part->stamp));

1000

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1002

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1001

}

1003

}

1002

part->stamp = now;

1004

part->stamp = now;

1003

}

1005

}

1004

1006

1005

/**

1007

/**

1006

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1008

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1007

* @cpu: cpu number for stats access

1009

* @cpu: cpu number for stats access

1008

* @part: target partition

1010

* @part: target partition

1009

*

1011

*

1010

* The average IO queue length and utilisation statistics are maintained

1012

* The average IO queue length and utilisation statistics are maintained

1011

* by observing the current state of the queue length and the amount of

1013

* by observing the current state of the queue length and the amount of

1012

* time it has been in this state for.

1014

* time it has been in this state for.

1013

*

1015

*

1014

* Normally, that accounting is done on IO completion, but that can result

1016

* Normally, that accounting is done on IO completion, but that can result

1015

* in more than a second's worth of IO being accounted for within any one

1017

* in more than a second's worth of IO being accounted for within any one

1016

* second, leading to >100% utilisation. To deal with that, we call this

1018

* second, leading to >100% utilisation. To deal with that, we call this

1017

* function to do a round-off before returning the results when reading

1019

* function to do a round-off before returning the results when reading

1018

* /proc/diskstats. This accounts immediately for all queue usage up to

1020

* /proc/diskstats. This accounts immediately for all queue usage up to

1019

* the current jiffies and restarts the counters again.

1021

* the current jiffies and restarts the counters again.

1020

*/

1022

*/

1021

void part_round_stats(int cpu, struct hd_struct *part)

1023

void part_round_stats(int cpu, struct hd_struct *part)

1022

{

1024

{

1023

unsigned long now = jiffies;

1025

unsigned long now = jiffies;

1024

1026

1025

if (part->partno)

1027

if (part->partno)

1026

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1028

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1027

part_round_stats_single(cpu, part, now);

1029

part_round_stats_single(cpu, part, now);

1028

}

1030

}

1029

EXPORT_SYMBOL_GPL(part_round_stats);

1031

EXPORT_SYMBOL_GPL(part_round_stats);

1030

1032

1031

/*

1033

/*

1032

* queue lock must be held

1034

* queue lock must be held

1033

*/

1035

*/

1034

void __blk_put_request(struct request_queue *q, struct request *req)

1036

void __blk_put_request(struct request_queue *q, struct request *req)

1035

{

1037

{

1036

if (unlikely(!q))

1038

if (unlikely(!q))

1037

return;

1039

return;

1038

if (unlikely(--req->ref_count))

1040

if (unlikely(--req->ref_count))

1039

return;

1041

return;

1040

1042

1041

elv_completed_request(q, req);

1043

elv_completed_request(q, req);

1042

1044

1043

/* this is a bio leak */

1045

/* this is a bio leak */

1044

WARN_ON(req->bio != NULL);

1046

WARN_ON(req->bio != NULL);

1045

1047

1046

/*

1048

/*

1047

* Request may not have originated from ll_rw_blk. if not,

1049

* Request may not have originated from ll_rw_blk. if not,

1048

* it didn't come out of our reserved rq pools

1050

* it didn't come out of our reserved rq pools

1049

*/

1051

*/

1050

if (req->cmd_flags & REQ_ALLOCED) {

1052

if (req->cmd_flags & REQ_ALLOCED) {

1051

int is_sync = rq_is_sync(req) != 0;

1053

int is_sync = rq_is_sync(req) != 0;

1052

int priv = req->cmd_flags & REQ_ELVPRIV;

1054

int priv = req->cmd_flags & REQ_ELVPRIV;

1053

1055

1054

BUG_ON(!list_empty(&req->queuelist));

1056

BUG_ON(!list_empty(&req->queuelist));

1055

BUG_ON(!hlist_unhashed(&req->hash));

1057

BUG_ON(!hlist_unhashed(&req->hash));

1056

1058

1057

blk_free_request(q, req);

1059

blk_free_request(q, req);

1058

freed_request(q, is_sync, priv);

1060

freed_request(q, is_sync, priv);

1059

}

1061

}

1060

}

1062

}

1061

EXPORT_SYMBOL_GPL(__blk_put_request);

1063

EXPORT_SYMBOL_GPL(__blk_put_request);

1062

1064

1063

void blk_put_request(struct request *req)

1065

void blk_put_request(struct request *req)

1064

{

1066

{

1065

unsigned long flags;

1067

unsigned long flags;

1066

struct request_queue *q = req->q;

1068

struct request_queue *q = req->q;

1067

1069

1068

spin_lock_irqsave(q->queue_lock, flags);

1070

spin_lock_irqsave(q->queue_lock, flags);

1069

__blk_put_request(q, req);

1071

__blk_put_request(q, req);

1070

spin_unlock_irqrestore(q->queue_lock, flags);

1072

spin_unlock_irqrestore(q->queue_lock, flags);

1071

}

1073

}

1072

EXPORT_SYMBOL(blk_put_request);

1074

EXPORT_SYMBOL(blk_put_request);

1073

1075

1074

/**

1076

/**

1075

* blk_add_request_payload - add a payload to a request

1077

* blk_add_request_payload - add a payload to a request

1076

* @rq: request to update

1078

* @rq: request to update

1077

* @page: page backing the payload

1079

* @page: page backing the payload

1078

* @len: length of the payload.

1080

* @len: length of the payload.

1079

*

1081

*

1080

* This allows to later add a payload to an already submitted request by

1082

* This allows to later add a payload to an already submitted request by

1081

* a block driver. The driver needs to take care of freeing the payload

1083

* a block driver. The driver needs to take care of freeing the payload

1082

* itself.

1084

* itself.

1083

*

1085

*

1084

* Note that this is a quite horrible hack and nothing but handling of

1086

* Note that this is a quite horrible hack and nothing but handling of

1085

* discard requests should ever use it.

1087

* discard requests should ever use it.

1086

*/

1088

*/

1087

void blk_add_request_payload(struct request *rq, struct page *page,

1089

void blk_add_request_payload(struct request *rq, struct page *page,

1088

unsigned int len)

1090

unsigned int len)

1089

{

1091

{

1090

struct bio *bio = rq->bio;

1092

struct bio *bio = rq->bio;

1091

1093

1092

bio->bi_io_vec->bv_page = page;

1094

bio->bi_io_vec->bv_page = page;

1093

bio->bi_io_vec->bv_offset = 0;

1095

bio->bi_io_vec->bv_offset = 0;

1094

bio->bi_io_vec->bv_len = len;

1096

bio->bi_io_vec->bv_len = len;

1095

1097

1096

bio->bi_size = len;

1098

bio->bi_size = len;

1097

bio->bi_vcnt = 1;

1099

bio->bi_vcnt = 1;

1098

bio->bi_phys_segments = 1;

1100

bio->bi_phys_segments = 1;

1099

1101

1100

rq->__data_len = rq->resid_len = len;

1102

rq->__data_len = rq->resid_len = len;

1101

rq->nr_phys_segments = 1;

1103

rq->nr_phys_segments = 1;

1102

rq->buffer = bio_data(bio);

1104

rq->buffer = bio_data(bio);

1103

}

1105

}

1104

EXPORT_SYMBOL_GPL(blk_add_request_payload);

1106

EXPORT_SYMBOL_GPL(blk_add_request_payload);

1105

1107

1106

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,

1108

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,

1107

struct bio *bio)

1109

struct bio *bio)

1108

{

1110

{

1109

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1111

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1110

1112

1111

/*

1113

/*

1112

* Debug stuff, kill later

1114

* Debug stuff, kill later

1113

*/

1115

*/

1114

if (!rq_mergeable(req)) {

1116

if (!rq_mergeable(req)) {

1115

blk_dump_rq_flags(req, "back");

1117

blk_dump_rq_flags(req, "back");

1116

return false;

1118

return false;

1117

}

1119

}

1118

1120

1119

if (!ll_back_merge_fn(q, req, bio))

1121

if (!ll_back_merge_fn(q, req, bio))

1120

return false;

1122

return false;

1121

1123

1122

trace_block_bio_backmerge(q, bio);

1124

trace_block_bio_backmerge(q, bio);

1123

1125

1124

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1126

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1125

blk_rq_set_mixed_merge(req);

1127

blk_rq_set_mixed_merge(req);

1126

1128

1127

req->biotail->bi_next = bio;

1129

req->biotail->bi_next = bio;

1128

req->biotail = bio;

1130

req->biotail = bio;

1129

req->__data_len += bio->bi_size;

1131

req->__data_len += bio->bi_size;

1130

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1132

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1131

1133

1132

drive_stat_acct(req, 0);

1134

drive_stat_acct(req, 0);

1133

return true;

1135

return true;

1134

}

1136

}

1135

1137

1136

static bool bio_attempt_front_merge(struct request_queue *q,

1138

static bool bio_attempt_front_merge(struct request_queue *q,

1137

struct request *req, struct bio *bio)

1139

struct request *req, struct bio *bio)

1138

{

1140

{

1139

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1141

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1140

sector_t sector;

1142

sector_t sector;

1141

1143

1142

/*

1144

/*

1143

* Debug stuff, kill later

1145

* Debug stuff, kill later

1144

*/

1146

*/

1145

if (!rq_mergeable(req)) {

1147

if (!rq_mergeable(req)) {

1146

blk_dump_rq_flags(req, "front");

1148

blk_dump_rq_flags(req, "front");

1147

return false;

1149

return false;

1148

}

1150

}

1149

1151

1150

if (!ll_front_merge_fn(q, req, bio))

1152

if (!ll_front_merge_fn(q, req, bio))

1151

return false;

1153

return false;

1152

1154

1153

trace_block_bio_frontmerge(q, bio);

1155

trace_block_bio_frontmerge(q, bio);

1154

1156

1155

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1157

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1156

blk_rq_set_mixed_merge(req);

1158

blk_rq_set_mixed_merge(req);

1157

1159

1158

sector = bio->bi_sector;

1160

sector = bio->bi_sector;

1159

1161

1160

bio->bi_next = req->bio;

1162

bio->bi_next = req->bio;

1161

req->bio = bio;

1163

req->bio = bio;

1162

1164

1163

/*

1165

/*

1164

* may not be valid. if the low level driver said

1166

* may not be valid. if the low level driver said

1165

* it didn't need a bounce buffer then it better

1167

* it didn't need a bounce buffer then it better

1166

* not touch req->buffer either...

1168

* not touch req->buffer either...

1167

*/

1169

*/

1168

req->buffer = bio_data(bio);

1170

req->buffer = bio_data(bio);

1169

req->__sector = bio->bi_sector;

1171

req->__sector = bio->bi_sector;

1170

req->__data_len += bio->bi_size;

1172

req->__data_len += bio->bi_size;

1171

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1173

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1172

1174

1173

drive_stat_acct(req, 0);

1175

drive_stat_acct(req, 0);

1174

return true;

1176

return true;

1175

}

1177

}

1176

1178

1177

/*

1179

/*

1178

* Attempts to merge with the plugged list in the current process. Returns

1180

* Attempts to merge with the plugged list in the current process. Returns

1179

* true if merge was successful, otherwise false.

1181

* true if merge was successful, otherwise false.

1180

*/

1182

*/

1181

static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,

1183

static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,

1182

struct bio *bio)

1184

struct bio *bio)

1183

{

1185

{

1184

struct blk_plug *plug;

1186

struct blk_plug *plug;

1185

struct request *rq;

1187

struct request *rq;

1186

bool ret = false;

1188

bool ret = false;

1187

1189

1188

plug = tsk->plug;

1190

plug = tsk->plug;

1189

if (!plug)

1191

if (!plug)

1190

goto out;

1192

goto out;

1191

1193

1192

list_for_each_entry_reverse(rq, &plug->list, queuelist) {

1194

list_for_each_entry_reverse(rq, &plug->list, queuelist) {

1193

int el_ret;

1195

int el_ret;

1194

1196

1195

if (rq->q != q)

1197

if (rq->q != q)

1196

continue;

1198

continue;

1197

1199

1198

el_ret = elv_try_merge(rq, bio);

1200

el_ret = elv_try_merge(rq, bio);

1199

if (el_ret == ELEVATOR_BACK_MERGE) {

1201

if (el_ret == ELEVATOR_BACK_MERGE) {

1200

ret = bio_attempt_back_merge(q, rq, bio);

1202

ret = bio_attempt_back_merge(q, rq, bio);

1201

if (ret)

1203

if (ret)

1202

break;

1204

break;

1203

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1205

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1204

ret = bio_attempt_front_merge(q, rq, bio);

1206

ret = bio_attempt_front_merge(q, rq, bio);

1205

if (ret)

1207

if (ret)

1206

break;

1208

break;

1207

}

1209

}

1208

}

1210

}

1209

out:

1211

out:

1210

return ret;

1212

return ret;

1211

}

1213

}

1212

1214

1213

void init_request_from_bio(struct request *req, struct bio *bio)

1215

void init_request_from_bio(struct request *req, struct bio *bio)

1214

{

1216

{

1215

req->cpu = bio->bi_comp_cpu;

1217

req->cpu = bio->bi_comp_cpu;

1216

req->cmd_type = REQ_TYPE_FS;

1218

req->cmd_type = REQ_TYPE_FS;

1217

1219

1218

req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;

1220

req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;

1219

if (bio->bi_rw & REQ_RAHEAD)

1221

if (bio->bi_rw & REQ_RAHEAD)

1220

req->cmd_flags |= REQ_FAILFAST_MASK;

1222

req->cmd_flags |= REQ_FAILFAST_MASK;

1221

1223

1222

req->errors = 0;

1224

req->errors = 0;

1223

req->__sector = bio->bi_sector;

1225

req->__sector = bio->bi_sector;

1224

req->ioprio = bio_prio(bio);

1226

req->ioprio = bio_prio(bio);

1225

blk_rq_bio_prep(req->q, req, bio);

1227

blk_rq_bio_prep(req->q, req, bio);

1226

}

1228

}

1227

1229

1228

static int __make_request(struct request_queue *q, struct bio *bio)

1230

static int __make_request(struct request_queue *q, struct bio *bio)

1229

{

1231

{

1230

const bool sync = !!(bio->bi_rw & REQ_SYNC);

1232

const bool sync = !!(bio->bi_rw & REQ_SYNC);

1231

struct blk_plug *plug;

1233

struct blk_plug *plug;

1232

int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;

1234

int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;

1233

struct request *req;

1235

struct request *req;

1234

1236

1235

/*

1237

/*

1236

* low level driver can indicate that it wants pages above a

1238

* low level driver can indicate that it wants pages above a

1237

* certain limit bounced to low memory (ie for highmem, or even

1239

* certain limit bounced to low memory (ie for highmem, or even

1238

* ISA dma in theory)

1240

* ISA dma in theory)

1239

*/

1241

*/

1240

blk_queue_bounce(q, &bio);

1242

blk_queue_bounce(q, &bio);

1241

1243

1242

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {

1244

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {

1243

spin_lock_irq(q->queue_lock);

1245

spin_lock_irq(q->queue_lock);

1244

where = ELEVATOR_INSERT_FLUSH;

1246

where = ELEVATOR_INSERT_FLUSH;

1245

goto get_rq;

1247

goto get_rq;

1246

}

1248

}

1247

1249

1248

/*

1250

/*

1249

* Check if we can merge with the plugged list before grabbing

1251

* Check if we can merge with the plugged list before grabbing

1250

* any locks.

1252

* any locks.

1251

*/

1253

*/

1252

if (attempt_plug_merge(current, q, bio))

1254

if (attempt_plug_merge(current, q, bio))

1253

goto out;

1255

goto out;

1254

1256

1255

spin_lock_irq(q->queue_lock);

1257

spin_lock_irq(q->queue_lock);

1256

1258

1257

el_ret = elv_merge(q, &req, bio);

1259

el_ret = elv_merge(q, &req, bio);

1258

if (el_ret == ELEVATOR_BACK_MERGE) {

1260

if (el_ret == ELEVATOR_BACK_MERGE) {

1259

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1261

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1260

if (bio_attempt_back_merge(q, req, bio)) {

1262

if (bio_attempt_back_merge(q, req, bio)) {

1261

if (!attempt_back_merge(q, req))

1263

if (!attempt_back_merge(q, req))

1262

elv_merged_request(q, req, el_ret);

1264

elv_merged_request(q, req, el_ret);

1263

goto out_unlock;

1265

goto out_unlock;

1264

}

1266

}

1265

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1267

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1266

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1268

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1267

if (bio_attempt_front_merge(q, req, bio)) {

1269

if (bio_attempt_front_merge(q, req, bio)) {

1268

if (!attempt_front_merge(q, req))

1270

if (!attempt_front_merge(q, req))

1269

elv_merged_request(q, req, el_ret);

1271

elv_merged_request(q, req, el_ret);

1270

goto out_unlock;

1272

goto out_unlock;

1271

}

1273

}

1272

}

1274

}

1273

1275

1274

get_rq:

1276

get_rq:

1275

/*

1277

/*

1276

* This sync check and mask will be re-done in init_request_from_bio(),

1278

* This sync check and mask will be re-done in init_request_from_bio(),

1277

* but we need to set it earlier to expose the sync flag to the

1279

* but we need to set it earlier to expose the sync flag to the

1278

* rq allocator and io schedulers.

1280

* rq allocator and io schedulers.

1279

*/

1281

*/

1280

rw_flags = bio_data_dir(bio);

1282

rw_flags = bio_data_dir(bio);

1281

if (sync)

1283

if (sync)

1282

rw_flags |= REQ_SYNC;

1284

rw_flags |= REQ_SYNC;

1283

1285

1284

/*

1286

/*

1285

* Grab a free request. This is might sleep but can not fail.

1287

* Grab a free request. This is might sleep but can not fail.

1286

* Returns with the queue unlocked.

1288

* Returns with the queue unlocked.

1287

*/

1289

*/

1288

req = get_request_wait(q, rw_flags, bio);

1290

req = get_request_wait(q, rw_flags, bio);

1289

1291

1290

/*

1292

/*

1291

* After dropping the lock and possibly sleeping here, our request

1293

* After dropping the lock and possibly sleeping here, our request

1292

* may now be mergeable after it had proven unmergeable (above).

1294

* may now be mergeable after it had proven unmergeable (above).

1293

* We don't worry about that case for efficiency. It won't happen

1295

* We don't worry about that case for efficiency. It won't happen

1294

* often, and the elevators are able to handle it.

1296

* often, and the elevators are able to handle it.

1295

*/

1297

*/

1296

init_request_from_bio(req, bio);

1298

init_request_from_bio(req, bio);

1297

1299

1298

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1300

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1299

bio_flagged(bio, BIO_CPU_AFFINE)) {

1301

bio_flagged(bio, BIO_CPU_AFFINE)) {

1300

req->cpu = blk_cpu_to_group(get_cpu());

1302

req->cpu = blk_cpu_to_group(get_cpu());

1301

put_cpu();

1303

put_cpu();

1302

}

1304

}

1303

1305

1304

plug = current->plug;

1306

plug = current->plug;

1305

if (plug) {

1307

if (plug) {

1306

/*

1308

/*

1307

* If this is the first request added after a plug, fire

1309

* If this is the first request added after a plug, fire

1308

* of a plug trace. If others have been added before, check

1310

* of a plug trace. If others have been added before, check

1309

* if we have multiple devices in this plug. If so, make a

1311

* if we have multiple devices in this plug. If so, make a

1310

* note to sort the list before dispatch.

1312

* note to sort the list before dispatch.

1311

*/

1313

*/

1312

if (list_empty(&plug->list))

1314

if (list_empty(&plug->list))

1313

trace_block_plug(q);

1315

trace_block_plug(q);

1314

else if (!plug->should_sort) {

1316

else if (!plug->should_sort) {

1315

struct request *__rq;

1317

struct request *__rq;

1316

1318

1317

__rq = list_entry_rq(plug->list.prev);

1319

__rq = list_entry_rq(plug->list.prev);

1318

if (__rq->q != q)

1320

if (__rq->q != q)

1319

plug->should_sort = 1;

1321

plug->should_sort = 1;

1320

}

1322

}

1321

/*

1323

/*

1322

* Debug flag, kill later

1324

* Debug flag, kill later

1323

*/

1325

*/

1324

req->cmd_flags |= REQ_ON_PLUG;

1326

req->cmd_flags |= REQ_ON_PLUG;

1325

list_add_tail(&req->queuelist, &plug->list);

1327

list_add_tail(&req->queuelist, &plug->list);

1326

drive_stat_acct(req, 1);

1328

drive_stat_acct(req, 1);

1327

} else {

1329

} else {

1328

spin_lock_irq(q->queue_lock);

1330

spin_lock_irq(q->queue_lock);

1329

add_acct_request(q, req, where);

1331

add_acct_request(q, req, where);

1330

__blk_run_queue(q);

1332

__blk_run_queue(q);

1331

out_unlock:

1333

out_unlock:

1332

spin_unlock_irq(q->queue_lock);

1334

spin_unlock_irq(q->queue_lock);

1333

}

1335

}

1334

out:

1336

out:

1335

return 0;

1337

return 0;

1336

}

1338

}

1337

1339

1338

/*

1340

/*

1339

* If bio->bi_dev is a partition, remap the location

1341

* If bio->bi_dev is a partition, remap the location

1340

*/

1342

*/

1341

static inline void blk_partition_remap(struct bio *bio)

1343

static inline void blk_partition_remap(struct bio *bio)

1342

{

1344

{

1343

struct block_device *bdev = bio->bi_bdev;

1345

struct block_device *bdev = bio->bi_bdev;

1344

1346

1345

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1347

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1346

struct hd_struct *p = bdev->bd_part;

1348

struct hd_struct *p = bdev->bd_part;

1347

1349

1348

bio->bi_sector += p->start_sect;

1350

bio->bi_sector += p->start_sect;

1349

bio->bi_bdev = bdev->bd_contains;

1351

bio->bi_bdev = bdev->bd_contains;

1350

1352

1351

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,

1353

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,

1352

bdev->bd_dev,

1354

bdev->bd_dev,

1353

bio->bi_sector - p->start_sect);

1355

bio->bi_sector - p->start_sect);

1354

}

1356

}

1355

}

1357

}

1356

1358

1357

static void handle_bad_sector(struct bio *bio)

1359

static void handle_bad_sector(struct bio *bio)

1358

{

1360

{

1359

char b[BDEVNAME_SIZE];

1361

char b[BDEVNAME_SIZE];

1360

1362

1361

printk(KERN_INFO "attempt to access beyond end of device\n");

1363

printk(KERN_INFO "attempt to access beyond end of device\n");

1362

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1364

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1363

bdevname(bio->bi_bdev, b),

1365

bdevname(bio->bi_bdev, b),

1364

bio->bi_rw,

1366

bio->bi_rw,

1365

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1367

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1366

(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

1368

(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

1367

1369

1368

set_bit(BIO_EOF, &bio->bi_flags);

1370

set_bit(BIO_EOF, &bio->bi_flags);

1369

}

1371

}

1370

1372

1371

#ifdef CONFIG_FAIL_MAKE_REQUEST

1373

#ifdef CONFIG_FAIL_MAKE_REQUEST

1372

1374

1373

static DECLARE_FAULT_ATTR(fail_make_request);

1375

static DECLARE_FAULT_ATTR(fail_make_request);

1374

1376

1375

static int __init setup_fail_make_request(char *str)

1377

static int __init setup_fail_make_request(char *str)

1376

{

1378

{

1377

return setup_fault_attr(&fail_make_request, str);

1379

return setup_fault_attr(&fail_make_request, str);

1378

}

1380

}

1379

__setup("fail_make_request=", setup_fail_make_request);

1381

__setup("fail_make_request=", setup_fail_make_request);

1380

1382

1381

static int should_fail_request(struct bio *bio)

1383

static int should_fail_request(struct bio *bio)

1382

{

1384

{

1383

struct hd_struct *part = bio->bi_bdev->bd_part;

1385

struct hd_struct *part = bio->bi_bdev->bd_part;

1384

1386

1385

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1387

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1386

return should_fail(&fail_make_request, bio->bi_size);

1388

return should_fail(&fail_make_request, bio->bi_size);

1387

1389

1388

return 0;

1390

return 0;

1389

}

1391

}

1390

1392

1391

static int __init fail_make_request_debugfs(void)

1393

static int __init fail_make_request_debugfs(void)

1392

{

1394

{

1393

return init_fault_attr_dentries(&fail_make_request,

1395

return init_fault_attr_dentries(&fail_make_request,

1394

"fail_make_request");

1396

"fail_make_request");

1395

}

1397

}

1396

1398

1397

late_initcall(fail_make_request_debugfs);

1399

late_initcall(fail_make_request_debugfs);

1398

1400

1399

#else /* CONFIG_FAIL_MAKE_REQUEST */

1401

#else /* CONFIG_FAIL_MAKE_REQUEST */

1400

1402

1401

static inline int should_fail_request(struct bio *bio)

1403

static inline int should_fail_request(struct bio *bio)

1402

{

1404

{

1403

return 0;

1405

return 0;

1404

}

1406

}

1405

1407

1406

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1408

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1407

1409

1408

/*

1410

/*

1409

* Check whether this bio extends beyond the end of the device.

1411

* Check whether this bio extends beyond the end of the device.

1410

*/

1412

*/

1411

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1413

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1412

{

1414

{

1413

sector_t maxsector;

1415

sector_t maxsector;

1414

1416

1415

if (!nr_sectors)

1417

if (!nr_sectors)

1416

return 0;

1418

return 0;

1417

1419

1418

/* Test device or partition size, when known. */

1420

/* Test device or partition size, when known. */

1419

maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;

1421

maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;

1420

if (maxsector) {

1422

if (maxsector) {

1421

sector_t sector = bio->bi_sector;

1423

sector_t sector = bio->bi_sector;

1422

1424

1423

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1425

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1424

/*

1426

/*

1425

* This may well happen - the kernel calls bread()

1427

* This may well happen - the kernel calls bread()

1426

* without checking the size of the device, e.g., when

1428

* without checking the size of the device, e.g., when

1427

* mounting a device.

1429

* mounting a device.

1428

*/

1430

*/

1429

handle_bad_sector(bio);

1431

handle_bad_sector(bio);

1430

return 1;

1432

return 1;

1431

}

1433

}

1432

}

1434

}

1433

1435

1434

return 0;

1436

return 0;

1435

}

1437

}

1436

1438

1437

/**

1439

/**

1438

* generic_make_request - hand a buffer to its device driver for I/O

1440

* generic_make_request - hand a buffer to its device driver for I/O

1439

* @bio: The bio describing the location in memory and on the device.

1441

* @bio: The bio describing the location in memory and on the device.

1440

*

1442

*

1441

* generic_make_request() is used to make I/O requests of block

1443

* generic_make_request() is used to make I/O requests of block

1442

* devices. It is passed a &struct bio, which describes the I/O that needs

1444

* devices. It is passed a &struct bio, which describes the I/O that needs

1443

* to be done.

1445

* to be done.

1444

*

1446

*

1445

* generic_make_request() does not return any status. The

1447

* generic_make_request() does not return any status. The

1446

* success/failure status of the request, along with notification of

1448

* success/failure status of the request, along with notification of

1447

* completion, is delivered asynchronously through the bio->bi_end_io

1449

* completion, is delivered asynchronously through the bio->bi_end_io

1448

* function described (one day) else where.

1450

* function described (one day) else where.

1449

*

1451

*

1450

* The caller of generic_make_request must make sure that bi_io_vec

1452

* The caller of generic_make_request must make sure that bi_io_vec

1451

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1453

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1452

* set to describe the device address, and the

1454

* set to describe the device address, and the

1453

* bi_end_io and optionally bi_private are set to describe how

1455

* bi_end_io and optionally bi_private are set to describe how

1454

* completion notification should be signaled.

1456

* completion notification should be signaled.

1455

*

1457

*

1456

* generic_make_request and the drivers it calls may use bi_next if this

1458

* generic_make_request and the drivers it calls may use bi_next if this

1457

* bio happens to be merged with someone else, and may change bi_dev and

1459

* bio happens to be merged with someone else, and may change bi_dev and

1458

* bi_sector for remaps as it sees fit. So the values of these fields

1460

* bi_sector for remaps as it sees fit. So the values of these fields

1459

* should NOT be depended on after the call to generic_make_request.

1461

* should NOT be depended on after the call to generic_make_request.

1460

*/

1462

*/

1461

static inline void __generic_make_request(struct bio *bio)

1463

static inline void __generic_make_request(struct bio *bio)

1462

{

1464

{

1463

struct request_queue *q;

1465

struct request_queue *q;

1464

sector_t old_sector;

1466

sector_t old_sector;

1465

int ret, nr_sectors = bio_sectors(bio);

1467

int ret, nr_sectors = bio_sectors(bio);

1466

dev_t old_dev;

1468

dev_t old_dev;

1467

int err = -EIO;

1469

int err = -EIO;

1468

1470

1469

might_sleep();

1471

might_sleep();

1470

1472

1471

if (bio_check_eod(bio, nr_sectors))

1473

if (bio_check_eod(bio, nr_sectors))

1472

goto end_io;

1474

goto end_io;

1473

1475

1474

/*

1476

/*

1475

* Resolve the mapping until finished. (drivers are

1477

* Resolve the mapping until finished. (drivers are

1476

* still free to implement/resolve their own stacking

1478

* still free to implement/resolve their own stacking

1477

* by explicitly returning 0)

1479

* by explicitly returning 0)

1478

*

1480

*

1479

* NOTE: we don't repeat the blk_size check for each new device.

1481

* NOTE: we don't repeat the blk_size check for each new device.

1480

* Stacking drivers are expected to know what they are doing.

1482

* Stacking drivers are expected to know what they are doing.

1481

*/

1483

*/

1482

old_sector = -1;

1484

old_sector = -1;

1483

old_dev = 0;

1485

old_dev = 0;

1484

do {

1486

do {

1485

char b[BDEVNAME_SIZE];

1487

char b[BDEVNAME_SIZE];

1486

1488

1487

q = bdev_get_queue(bio->bi_bdev);

1489

q = bdev_get_queue(bio->bi_bdev);

1488

if (unlikely(!q)) {

1490

if (unlikely(!q)) {

1489

printk(KERN_ERR

1491

printk(KERN_ERR

1490

"generic_make_request: Trying to access "

1492

"generic_make_request: Trying to access "

1491

"nonexistent block-device %s (%Lu)\n",

1493

"nonexistent block-device %s (%Lu)\n",

1492

bdevname(bio->bi_bdev, b),

1494

bdevname(bio->bi_bdev, b),

1493

(long long) bio->bi_sector);

1495

(long long) bio->bi_sector);

1494

goto end_io;

1496

goto end_io;

1495

}

1497

}

1496

1498

1497

if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&

1499

if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&

1498

nr_sectors > queue_max_hw_sectors(q))) {

1500

nr_sectors > queue_max_hw_sectors(q))) {

1499

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1501

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1500

bdevname(bio->bi_bdev, b),

1502

bdevname(bio->bi_bdev, b),

1501

bio_sectors(bio),

1503

bio_sectors(bio),

1502

queue_max_hw_sectors(q));

1504

queue_max_hw_sectors(q));

1503

goto end_io;

1505

goto end_io;

1504

}

1506

}

1505

1507

1506

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1508

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1507

goto end_io;

1509

goto end_io;

1508

1510

1509

if (should_fail_request(bio))

1511

if (should_fail_request(bio))

1510

goto end_io;

1512

goto end_io;

1511

1513

1512

/*

1514

/*

1513

* If this device has partitions, remap block n

1515

* If this device has partitions, remap block n

1514

* of partition p to block n+start(p) of the disk.

1516

* of partition p to block n+start(p) of the disk.

1515

*/

1517

*/

1516

blk_partition_remap(bio);

1518

blk_partition_remap(bio);

1517

1519

1518

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1520

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1519

goto end_io;

1521

goto end_io;

1520

1522

1521

if (old_sector != -1)

1523

if (old_sector != -1)

1522

trace_block_bio_remap(q, bio, old_dev, old_sector);

1524

trace_block_bio_remap(q, bio, old_dev, old_sector);

1523

1525

1524

old_sector = bio->bi_sector;

1526

old_sector = bio->bi_sector;

1525

old_dev = bio->bi_bdev->bd_dev;

1527

old_dev = bio->bi_bdev->bd_dev;

1526

1528

1527

if (bio_check_eod(bio, nr_sectors))

1529

if (bio_check_eod(bio, nr_sectors))

1528

goto end_io;

1530

goto end_io;

1529

1531

1530

/*

1532

/*

1531

* Filter flush bio's early so that make_request based

1533

* Filter flush bio's early so that make_request based

1532

* drivers without flush support don't have to worry

1534

* drivers without flush support don't have to worry

1533

* about them.

1535

* about them.

1534

*/

1536

*/

1535

if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {

1537

if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {

1536

bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);

1538

bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);

1537

if (!nr_sectors) {

1539

if (!nr_sectors) {

1538

err = 0;

1540

err = 0;

1539

goto end_io;

1541

goto end_io;

1540

}

1542

}

1541

}

1543

}

1542

1544

1543

if ((bio->bi_rw & REQ_DISCARD) &&

1545

if ((bio->bi_rw & REQ_DISCARD) &&

1544

(!blk_queue_discard(q) ||

1546

(!blk_queue_discard(q) ||

1545

((bio->bi_rw & REQ_SECURE) &&

1547

((bio->bi_rw & REQ_SECURE) &&

1546

!blk_queue_secdiscard(q)))) {

1548

!blk_queue_secdiscard(q)))) {

1547

err = -EOPNOTSUPP;

1549

err = -EOPNOTSUPP;

1548

goto end_io;

1550

goto end_io;

1549

}

1551

}

1550

1552

1551

blk_throtl_bio(q, &bio);

1553

blk_throtl_bio(q, &bio);

1552

1554

1553

/*

1555

/*

1554

* If bio = NULL, bio has been throttled and will be submitted

1556

* If bio = NULL, bio has been throttled and will be submitted

1555

* later.

1557

* later.

1556

*/

1558

*/

1557

if (!bio)

1559

if (!bio)

1558

break;

1560

break;

1559

1561

1560

trace_block_bio_queue(q, bio);

1562

trace_block_bio_queue(q, bio);

1561

1563

1562

ret = q->make_request_fn(q, bio);

1564

ret = q->make_request_fn(q, bio);

1563

} while (ret);

1565

} while (ret);

1564

1566

1565

return;

1567

return;

1566

1568

1567

end_io:

1569

end_io:

1568

bio_endio(bio, err);

1570

bio_endio(bio, err);

1569

}

1571

}

1570

1572

1571

/*

1573

/*

1572

* We only want one ->make_request_fn to be active at a time,

1574

* We only want one ->make_request_fn to be active at a time,

1573

* else stack usage with stacked devices could be a problem.

1575

* else stack usage with stacked devices could be a problem.

1574

* So use current->bio_list to keep a list of requests

1576

* So use current->bio_list to keep a list of requests

1575

* submited by a make_request_fn function.

1577

* submited by a make_request_fn function.

1576

* current->bio_list is also used as a flag to say if

1578

* current->bio_list is also used as a flag to say if

1577

* generic_make_request is currently active in this task or not.

1579

* generic_make_request is currently active in this task or not.

1578

* If it is NULL, then no make_request is active. If it is non-NULL,

1580

* If it is NULL, then no make_request is active. If it is non-NULL,

1579

* then a make_request is active, and new requests should be added

1581

* then a make_request is active, and new requests should be added

1580

* at the tail

1582

* at the tail

1581

*/

1583

*/

1582

void generic_make_request(struct bio *bio)

1584

void generic_make_request(struct bio *bio)

1583

{

1585

{

1584

struct bio_list bio_list_on_stack;

1586

struct bio_list bio_list_on_stack;

1585

1587

1586

if (current->bio_list) {

1588

if (current->bio_list) {

1587

/* make_request is active */

1589

/* make_request is active */

1588

bio_list_add(current->bio_list, bio);

1590

bio_list_add(current->bio_list, bio);

1589

return;

1591

return;

1590

}

1592

}

1591

/* following loop may be a bit non-obvious, and so deserves some

1593

/* following loop may be a bit non-obvious, and so deserves some

1592

* explanation.

1594

* explanation.

1593

* Before entering the loop, bio->bi_next is NULL (as all callers

1595

* Before entering the loop, bio->bi_next is NULL (as all callers

1594

* ensure that) so we have a list with a single bio.

1596

* ensure that) so we have a list with a single bio.

1595

* We pretend that we have just taken it off a longer list, so

1597

* We pretend that we have just taken it off a longer list, so

1596

* we assign bio_list to a pointer to the bio_list_on_stack,

1598

* we assign bio_list to a pointer to the bio_list_on_stack,

1597

* thus initialising the bio_list of new bios to be

1599

* thus initialising the bio_list of new bios to be

1598

* added. __generic_make_request may indeed add some more bios

1600

* added. __generic_make_request may indeed add some more bios

1599

* through a recursive call to generic_make_request. If it

1601

* through a recursive call to generic_make_request. If it

1600

* did, we find a non-NULL value in bio_list and re-enter the loop

1602

* did, we find a non-NULL value in bio_list and re-enter the loop

1601

* from the top. In this case we really did just take the bio

1603

* from the top. In this case we really did just take the bio

1602

* of the top of the list (no pretending) and so remove it from

1604

* of the top of the list (no pretending) and so remove it from

1603

* bio_list, and call into __generic_make_request again.

1605

* bio_list, and call into __generic_make_request again.

1604

*

1606

*

1605

* The loop was structured like this to make only one call to

1607

* The loop was structured like this to make only one call to

1606

* __generic_make_request (which is important as it is large and

1608

* __generic_make_request (which is important as it is large and

1607

* inlined) and to keep the structure simple.

1609

* inlined) and to keep the structure simple.

1608

*/

1610

*/

1609

BUG_ON(bio->bi_next);

1611

BUG_ON(bio->bi_next);

1610

bio_list_init(&bio_list_on_stack);

1612

bio_list_init(&bio_list_on_stack);

1611

current->bio_list = &bio_list_on_stack;

1613

current->bio_list = &bio_list_on_stack;

1612

do {

1614

do {

1613

__generic_make_request(bio);

1615

__generic_make_request(bio);

1614

bio = bio_list_pop(current->bio_list);

1616

bio = bio_list_pop(current->bio_list);

1615

} while (bio);

1617

} while (bio);

1616

current->bio_list = NULL; /* deactivate */

1618

current->bio_list = NULL; /* deactivate */

1617

}

1619

}

1618

EXPORT_SYMBOL(generic_make_request);

1620

EXPORT_SYMBOL(generic_make_request);

1619

1621

1620

/**

1622

/**

1621

* submit_bio - submit a bio to the block device layer for I/O

1623

* submit_bio - submit a bio to the block device layer for I/O

1622

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1624

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1623

* @bio: The &struct bio which describes the I/O

1625

* @bio: The &struct bio which describes the I/O

1624

*

1626

*

1625

* submit_bio() is very similar in purpose to generic_make_request(), and

1627

* submit_bio() is very similar in purpose to generic_make_request(), and

1626

* uses that function to do most of the work. Both are fairly rough

1628

* uses that function to do most of the work. Both are fairly rough

1627

* interfaces; @bio must be presetup and ready for I/O.

1629

* interfaces; @bio must be presetup and ready for I/O.

1628

*

1630

*

1629

*/

1631

*/

1630

void submit_bio(int rw, struct bio *bio)

1632

void submit_bio(int rw, struct bio *bio)

1631

{

1633

{

1632

int count = bio_sectors(bio);

1634

int count = bio_sectors(bio);

1633

1635

1634

bio->bi_rw |= rw;

1636

bio->bi_rw |= rw;

1635

1637

1636

/*

1638

/*

1637

* If it's a regular read/write or a barrier with data attached,

1639

* If it's a regular read/write or a barrier with data attached,

1638

* go through the normal accounting stuff before submission.

1640

* go through the normal accounting stuff before submission.

1639

*/

1641

*/

1640

if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {

1642

if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {

1641

if (rw & WRITE) {

1643

if (rw & WRITE) {

1642

count_vm_events(PGPGOUT, count);

1644

count_vm_events(PGPGOUT, count);

1643

} else {

1645

} else {

1644

task_io_account_read(bio->bi_size);

1646

task_io_account_read(bio->bi_size);

1645

count_vm_events(PGPGIN, count);

1647

count_vm_events(PGPGIN, count);

1646

}

1648

}

1647

1649

1648

if (unlikely(block_dump)) {

1650

if (unlikely(block_dump)) {

1649

char b[BDEVNAME_SIZE];

1651

char b[BDEVNAME_SIZE];

1650

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",

1652

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",

1651

current->comm, task_pid_nr(current),

1653

current->comm, task_pid_nr(current),

1652

(rw & WRITE) ? "WRITE" : "READ",

1654

(rw & WRITE) ? "WRITE" : "READ",

1653

(unsigned long long)bio->bi_sector,

1655

(unsigned long long)bio->bi_sector,

1654

bdevname(bio->bi_bdev, b),

1656

bdevname(bio->bi_bdev, b),

1655

count);

1657

count);

1656

}

1658

}

1657

}

1659

}

1658

1660

1659

generic_make_request(bio);

1661

generic_make_request(bio);

1660

}

1662

}

1661

EXPORT_SYMBOL(submit_bio);

1663

EXPORT_SYMBOL(submit_bio);

1662

1664

1663

/**

1665

/**

1664

* blk_rq_check_limits - Helper function to check a request for the queue limit

1666

* blk_rq_check_limits - Helper function to check a request for the queue limit

1665

* @q: the queue

1667

* @q: the queue

1666

* @rq: the request being checked

1668

* @rq: the request being checked

1667

*

1669

*

1668

* Description:

1670

* Description:

1669

* @rq may have been made based on weaker limitations of upper-level queues

1671

* @rq may have been made based on weaker limitations of upper-level queues

1670

* in request stacking drivers, and it may violate the limitation of @q.

1672

* in request stacking drivers, and it may violate the limitation of @q.

1671

* Since the block layer and the underlying device driver trust @rq

1673

* Since the block layer and the underlying device driver trust @rq

1672

* after it is inserted to @q, it should be checked against @q before

1674

* after it is inserted to @q, it should be checked against @q before

1673

* the insertion using this generic function.

1675

* the insertion using this generic function.

1674

*

1676

*

1675

* This function should also be useful for request stacking drivers

1677

* This function should also be useful for request stacking drivers

1676

* in some cases below, so export this function.

1678

* in some cases below, so export this function.

1677

* Request stacking drivers like request-based dm may change the queue

1679

* Request stacking drivers like request-based dm may change the queue

1678

* limits while requests are in the queue (e.g. dm's table swapping).

1680

* limits while requests are in the queue (e.g. dm's table swapping).

1679

* Such request stacking drivers should check those requests agaist

1681

* Such request stacking drivers should check those requests agaist

1680

* the new queue limits again when they dispatch those requests,

1682

* the new queue limits again when they dispatch those requests,

1681

* although such checkings are also done against the old queue limits

1683

* although such checkings are also done against the old queue limits

1682

* when submitting requests.

1684

* when submitting requests.

1683

*/

1685

*/

1684

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1686

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1685

{

1687

{

1686

if (rq->cmd_flags & REQ_DISCARD)

1688

if (rq->cmd_flags & REQ_DISCARD)

1687

return 0;

1689

return 0;

1688

1690

1689

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1691

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1690

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1692

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1691

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1693

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1692

return -EIO;

1694

return -EIO;

1693

}

1695

}

1694

1696

1695

/*

1697

/*

1696

* queue's settings related to segment counting like q->bounce_pfn

1698

* queue's settings related to segment counting like q->bounce_pfn

1697

* may differ from that of other stacking queues.

1699

* may differ from that of other stacking queues.

1698

* Recalculate it to check the request correctly on this queue's

1700

* Recalculate it to check the request correctly on this queue's

1699

* limitation.

1701

* limitation.

1700

*/

1702

*/

1701

blk_recalc_rq_segments(rq);

1703

blk_recalc_rq_segments(rq);

1702

if (rq->nr_phys_segments > queue_max_segments(q)) {

1704

if (rq->nr_phys_segments > queue_max_segments(q)) {

1703

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1705

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1704

return -EIO;

1706

return -EIO;

1705

}

1707

}

1706

1708

1707

return 0;

1709

return 0;

1708

}

1710

}

1709

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1711

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1710

1712

1711

/**

1713

/**

1712

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1714

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1713

* @q: the queue to submit the request

1715

* @q: the queue to submit the request

1714

* @rq: the request being queued

1716

* @rq: the request being queued

1715

*/

1717

*/

1716

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1718

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1717

{

1719

{

1718

unsigned long flags;

1720

unsigned long flags;

1719

1721

1720

if (blk_rq_check_limits(q, rq))

1722

if (blk_rq_check_limits(q, rq))

1721

return -EIO;

1723

return -EIO;

1722

1724

1723

#ifdef CONFIG_FAIL_MAKE_REQUEST

1725

#ifdef CONFIG_FAIL_MAKE_REQUEST

1724

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1726

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1725

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1727

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1726

return -EIO;

1728

return -EIO;

1727

#endif

1729

#endif

1728

1730

1729

spin_lock_irqsave(q->queue_lock, flags);

1731

spin_lock_irqsave(q->queue_lock, flags);

1730

1732

1731

/*

1733

/*

1732

* Submitting request must be dequeued before calling this function

1734

* Submitting request must be dequeued before calling this function

1733

* because it will be linked to another request_queue

1735

* because it will be linked to another request_queue

1734

*/

1736

*/

1735

BUG_ON(blk_queued_rq(rq));

1737

BUG_ON(blk_queued_rq(rq));

1736

1738

1737

add_acct_request(q, rq, ELEVATOR_INSERT_BACK);

1739

add_acct_request(q, rq, ELEVATOR_INSERT_BACK);

1738

spin_unlock_irqrestore(q->queue_lock, flags);

1740

spin_unlock_irqrestore(q->queue_lock, flags);

1739

1741

1740

return 0;

1742

return 0;

1741

}

1743

}

1742

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1744

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1743

1745

1744

/**

1746

/**

1745

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1747

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1746

* @rq: request to examine

1748

* @rq: request to examine

1747

*

1749

*

1748

* Description:

1750

* Description:

1749

* A request could be merge of IOs which require different failure

1751

* A request could be merge of IOs which require different failure

1750

* handling. This function determines the number of bytes which

1752

* handling. This function determines the number of bytes which

1751

* can be failed from the beginning of the request without

1753

* can be failed from the beginning of the request without

1752

* crossing into area which need to be retried further.

1754

* crossing into area which need to be retried further.

1753

*

1755

*

1754

* Return:

1756

* Return:

1755

* The number of bytes to fail.

1757

* The number of bytes to fail.

1756

*

1758

*

1757

* Context:

1759

* Context:

1758

* queue_lock must be held.

1760

* queue_lock must be held.

1759

*/

1761

*/

1760

unsigned int blk_rq_err_bytes(const struct request *rq)

1762

unsigned int blk_rq_err_bytes(const struct request *rq)

1761

{

1763

{

1762

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1764

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1763

unsigned int bytes = 0;

1765

unsigned int bytes = 0;

1764

struct bio *bio;

1766

struct bio *bio;

1765

1767

1766

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1768

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1767

return blk_rq_bytes(rq);

1769

return blk_rq_bytes(rq);

1768

1770

1769

/*

1771

/*

1770

* Currently the only 'mixing' which can happen is between

1772

* Currently the only 'mixing' which can happen is between

1771

* different fastfail types. We can safely fail portions

1773

* different fastfail types. We can safely fail portions

1772

* which have all the failfast bits that the first one has -

1774

* which have all the failfast bits that the first one has -

1773

* the ones which are at least as eager to fail as the first

1775

* the ones which are at least as eager to fail as the first

1774

* one.

1776

* one.

1775

*/

1777

*/

1776

for (bio = rq->bio; bio; bio = bio->bi_next) {

1778

for (bio = rq->bio; bio; bio = bio->bi_next) {

1777

if ((bio->bi_rw & ff) != ff)

1779

if ((bio->bi_rw & ff) != ff)

1778

break;

1780

break;

1779

bytes += bio->bi_size;

1781

bytes += bio->bi_size;

1780

}

1782

}

1781

1783

1782

/* this could lead to infinite loop */

1784

/* this could lead to infinite loop */

1783

BUG_ON(blk_rq_bytes(rq) && !bytes);

1785

BUG_ON(blk_rq_bytes(rq) && !bytes);

1784

return bytes;

1786

return bytes;

1785

}

1787

}

1786

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1788

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1787

1789

1788

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1790

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1789

{

1791

{

1790

if (blk_do_io_stat(req)) {

1792

if (blk_do_io_stat(req)) {

1791

const int rw = rq_data_dir(req);

1793

const int rw = rq_data_dir(req);

1792

struct hd_struct *part;

1794

struct hd_struct *part;

1793

int cpu;

1795

int cpu;

1794

1796

1795

cpu = part_stat_lock();

1797

cpu = part_stat_lock();

1796

part = req->part;

1798

part = req->part;

1797

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1799

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1798

part_stat_unlock();

1800

part_stat_unlock();

1799

}

1801

}

1800

}

1802

}

1801

1803

1802

static void blk_account_io_done(struct request *req)

1804

static void blk_account_io_done(struct request *req)

1803

{

1805

{

1804

/*

1806

/*

1805

* Account IO completion. flush_rq isn't accounted as a

1807

* Account IO completion. flush_rq isn't accounted as a

1806

* normal IO on queueing nor completion. Accounting the

1808

* normal IO on queueing nor completion. Accounting the

1807

* containing request is enough.

1809

* containing request is enough.

1808

*/

1810

*/

1809

if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {

1811

if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {

1810

unsigned long duration = jiffies - req->start_time;

1812

unsigned long duration = jiffies - req->start_time;

1811

const int rw = rq_data_dir(req);

1813

const int rw = rq_data_dir(req);

1812

struct hd_struct *part;

1814

struct hd_struct *part;

1813

int cpu;

1815

int cpu;

1814

1816

1815

cpu = part_stat_lock();

1817

cpu = part_stat_lock();

1816

part = req->part;

1818

part = req->part;

1817

1819

1818

part_stat_inc(cpu, part, ios[rw]);

1820

part_stat_inc(cpu, part, ios[rw]);

1819

part_stat_add(cpu, part, ticks[rw], duration);

1821

part_stat_add(cpu, part, ticks[rw], duration);

1820

part_round_stats(cpu, part);

1822

part_round_stats(cpu, part);

1821

part_dec_in_flight(part, rw);

1823

part_dec_in_flight(part, rw);

1822

1824

1823

hd_struct_put(part);

1825

hd_struct_put(part);

1824

part_stat_unlock();

1826

part_stat_unlock();

1825

}

1827

}

1826

}

1828

}

1827

1829

1828

/**

1830

/**

1829

* blk_peek_request - peek at the top of a request queue

1831

* blk_peek_request - peek at the top of a request queue

1830

* @q: request queue to peek at

1832

* @q: request queue to peek at

1831

*

1833

*

1832

* Description:

1834

* Description:

1833

* Return the request at the top of @q. The returned request

1835

* Return the request at the top of @q. The returned request

1834

* should be started using blk_start_request() before LLD starts

1836

* should be started using blk_start_request() before LLD starts

1835

* processing it.

1837

* processing it.

1836

*

1838

*

1837

* Return:

1839

* Return:

1838

* Pointer to the request at the top of @q if available. Null

1840

* Pointer to the request at the top of @q if available. Null

1839

* otherwise.

1841

* otherwise.

1840

*

1842

*

1841

* Context:

1843

* Context:

1842

* queue_lock must be held.

1844

* queue_lock must be held.

1843

*/

1845

*/

1844

struct request *blk_peek_request(struct request_queue *q)

1846

struct request *blk_peek_request(struct request_queue *q)

1845

{

1847

{

1846

struct request *rq;

1848

struct request *rq;

1847

int ret;

1849

int ret;

1848

1850

1849

while ((rq = __elv_next_request(q)) != NULL) {

1851

while ((rq = __elv_next_request(q)) != NULL) {

1850

if (!(rq->cmd_flags & REQ_STARTED)) {

1852

if (!(rq->cmd_flags & REQ_STARTED)) {

1851

/*

1853

/*

1852

* This is the first time the device driver

1854

* This is the first time the device driver

1853

* sees this request (possibly after

1855

* sees this request (possibly after

1854

* requeueing). Notify IO scheduler.

1856

* requeueing). Notify IO scheduler.

1855

*/

1857

*/

1856

if (rq->cmd_flags & REQ_SORTED)

1858

if (rq->cmd_flags & REQ_SORTED)

1857

elv_activate_rq(q, rq);

1859

elv_activate_rq(q, rq);

1858

1860

1859

/*

1861

/*

1860

* just mark as started even if we don't start

1862

* just mark as started even if we don't start

1861

* it, a request that has been delayed should

1863

* it, a request that has been delayed should

1862

* not be passed by new incoming requests

1864

* not be passed by new incoming requests

1863

*/

1865

*/

1864

rq->cmd_flags |= REQ_STARTED;

1866

rq->cmd_flags |= REQ_STARTED;

1865

trace_block_rq_issue(q, rq);

1867

trace_block_rq_issue(q, rq);

1866

}

1868

}

1867

1869

1868

if (!q->boundary_rq || q->boundary_rq == rq) {

1870

if (!q->boundary_rq || q->boundary_rq == rq) {

1869

q->end_sector = rq_end_sector(rq);

1871

q->end_sector = rq_end_sector(rq);

1870

q->boundary_rq = NULL;

1872

q->boundary_rq = NULL;

1871

}

1873

}

1872

1874

1873

if (rq->cmd_flags & REQ_DONTPREP)

1875

if (rq->cmd_flags & REQ_DONTPREP)

1874

break;

1876

break;

1875

1877

1876

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1878

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1877

/*

1879

/*

1878

* make sure space for the drain appears we

1880

* make sure space for the drain appears we

1879

* know we can do this because max_hw_segments

1881

* know we can do this because max_hw_segments

1880

* has been adjusted to be one fewer than the

1882

* has been adjusted to be one fewer than the

1881

* device can handle

1883

* device can handle

1882

*/

1884

*/

1883

rq->nr_phys_segments++;

1885

rq->nr_phys_segments++;

1884

}

1886

}

1885

1887

1886

if (!q->prep_rq_fn)

1888

if (!q->prep_rq_fn)

1887

break;

1889

break;

1888

1890

1889

ret = q->prep_rq_fn(q, rq);

1891

ret = q->prep_rq_fn(q, rq);

1890

if (ret == BLKPREP_OK) {

1892

if (ret == BLKPREP_OK) {

1891

break;

1893

break;

1892

} else if (ret == BLKPREP_DEFER) {

1894

} else if (ret == BLKPREP_DEFER) {

1893

/*

1895

/*

1894

* the request may have been (partially) prepped.

1896

* the request may have been (partially) prepped.

1895

* we need to keep this request in the front to

1897

* we need to keep this request in the front to

1896

* avoid resource deadlock. REQ_STARTED will

1898

* avoid resource deadlock. REQ_STARTED will

1897

* prevent other fs requests from passing this one.

1899

* prevent other fs requests from passing this one.

1898

*/

1900

*/

1899

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1901

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1900

!(rq->cmd_flags & REQ_DONTPREP)) {

1902

!(rq->cmd_flags & REQ_DONTPREP)) {

1901

/*

1903

/*

1902

* remove the space for the drain we added

1904

* remove the space for the drain we added

1903

* so that we don't add it again

1905

* so that we don't add it again

1904

*/

1906

*/

1905

--rq->nr_phys_segments;

1907

--rq->nr_phys_segments;

1906

}

1908

}

1907

1909

1908

rq = NULL;

1910

rq = NULL;

1909

break;

1911

break;

1910

} else if (ret == BLKPREP_KILL) {

1912

} else if (ret == BLKPREP_KILL) {

1911

rq->cmd_flags |= REQ_QUIET;

1913

rq->cmd_flags |= REQ_QUIET;

1912

/*

1914

/*

1913

* Mark this request as started so we don't trigger

1915

* Mark this request as started so we don't trigger

1914

* any debug logic in the end I/O path.

1916

* any debug logic in the end I/O path.

1915

*/

1917

*/

1916

blk_start_request(rq);

1918

blk_start_request(rq);

1917

__blk_end_request_all(rq, -EIO);

1919

__blk_end_request_all(rq, -EIO);

1918

} else {

1920

} else {

1919

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1921

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1920

break;

1922

break;

1921

}

1923

}

1922

}

1924

}

1923

1925

1924

return rq;

1926

return rq;

1925

}

1927

}

1926

EXPORT_SYMBOL(blk_peek_request);

1928

EXPORT_SYMBOL(blk_peek_request);

1927

1929

1928

void blk_dequeue_request(struct request *rq)

1930

void blk_dequeue_request(struct request *rq)

1929

{

1931

{

1930

struct request_queue *q = rq->q;

1932

struct request_queue *q = rq->q;

1931

1933

1932

BUG_ON(list_empty(&rq->queuelist));

1934

BUG_ON(list_empty(&rq->queuelist));

1933

BUG_ON(ELV_ON_HASH(rq));

1935

BUG_ON(ELV_ON_HASH(rq));

1934

1936

1935

list_del_init(&rq->queuelist);

1937

list_del_init(&rq->queuelist);

1936

1938

1937

/*

1939

/*

1938

* the time frame between a request being removed from the lists

1940

* the time frame between a request being removed from the lists

1939

* and to it is freed is accounted as io that is in progress at

1941

* and to it is freed is accounted as io that is in progress at

1940

* the driver side.

1942

* the driver side.

1941

*/

1943

*/

1942

if (blk_account_rq(rq)) {

1944

if (blk_account_rq(rq)) {

1943

q->in_flight[rq_is_sync(rq)]++;

1945

q->in_flight[rq_is_sync(rq)]++;

1944

set_io_start_time_ns(rq);

1946

set_io_start_time_ns(rq);

1945

}

1947

}

1946

}

1948

}

1947

1949

1948

/**

1950

/**

1949

* blk_start_request - start request processing on the driver

1951

* blk_start_request - start request processing on the driver

1950

* @req: request to dequeue

1952

* @req: request to dequeue

1951

*

1953

*

1952

* Description:

1954

* Description:

1953

* Dequeue @req and start timeout timer on it. This hands off the

1955

* Dequeue @req and start timeout timer on it. This hands off the

1954

* request to the driver.

1956

* request to the driver.

1955

*

1957

*

1956

* Block internal functions which don't want to start timer should

1958

* Block internal functions which don't want to start timer should

1957

* call blk_dequeue_request().

1959

* call blk_dequeue_request().

1958

*

1960

*

1959

* Context:

1961

* Context:

1960

* queue_lock must be held.

1962

* queue_lock must be held.

1961

*/

1963

*/

1962

void blk_start_request(struct request *req)

1964

void blk_start_request(struct request *req)

1963

{

1965

{

1964

blk_dequeue_request(req);

1966

blk_dequeue_request(req);

1965

1967

1966

/*

1968

/*

1967

* We are now handing the request to the hardware, initialize

1969

* We are now handing the request to the hardware, initialize

1968

* resid_len to full count and add the timeout handler.

1970

* resid_len to full count and add the timeout handler.

1969

*/

1971

*/

1970

req->resid_len = blk_rq_bytes(req);

1972

req->resid_len = blk_rq_bytes(req);

1971

if (unlikely(blk_bidi_rq(req)))

1973

if (unlikely(blk_bidi_rq(req)))

1972

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1974

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1973

1975

1974

blk_add_timer(req);

1976

blk_add_timer(req);

1975

}

1977

}

1976

EXPORT_SYMBOL(blk_start_request);

1978

EXPORT_SYMBOL(blk_start_request);

1977

1979

1978

/**

1980

/**

1979

* blk_fetch_request - fetch a request from a request queue

1981

* blk_fetch_request - fetch a request from a request queue

1980

* @q: request queue to fetch a request from

1982

* @q: request queue to fetch a request from

1981

*

1983

*

1982

* Description:

1984

* Description:

1983

* Return the request at the top of @q. The request is started on

1985

* Return the request at the top of @q. The request is started on

1984

* return and LLD can start processing it immediately.

1986

* return and LLD can start processing it immediately.

1985

*

1987

*

1986

* Return:

1988

* Return:

1987

* Pointer to the request at the top of @q if available. Null

1989

* Pointer to the request at the top of @q if available. Null

1988

* otherwise.

1990

* otherwise.

1989

*

1991

*

1990

* Context:

1992

* Context:

1991

* queue_lock must be held.

1993

* queue_lock must be held.

1992

*/

1994

*/

1993

struct request *blk_fetch_request(struct request_queue *q)

1995

struct request *blk_fetch_request(struct request_queue *q)

1994

{

1996

{

1995

struct request *rq;

1997

struct request *rq;

1996

1998

1997

rq = blk_peek_request(q);

1999

rq = blk_peek_request(q);

1998

if (rq)

2000

if (rq)

1999

blk_start_request(rq);

2001

blk_start_request(rq);

2000

return rq;

2002

return rq;

2001

}

2003

}

2002

EXPORT_SYMBOL(blk_fetch_request);

2004

EXPORT_SYMBOL(blk_fetch_request);

2003

2005

2004

/**

2006

/**

2005

* blk_update_request - Special helper function for request stacking drivers

2007

* blk_update_request - Special helper function for request stacking drivers

2006

* @req: the request being processed

2008

* @req: the request being processed

2007

* @error: %0 for success, < %0 for error

2009

* @error: %0 for success, < %0 for error

2008

* @nr_bytes: number of bytes to complete @req

2010

* @nr_bytes: number of bytes to complete @req

2009

*

2011

*

2010

* Description:

2012

* Description:

2011

* Ends I/O on a number of bytes attached to @req, but doesn't complete

2013

* Ends I/O on a number of bytes attached to @req, but doesn't complete

2012

* the request structure even if @req doesn't have leftover.

2014

* the request structure even if @req doesn't have leftover.

2013

* If @req has leftover, sets it up for the next range of segments.

2015

* If @req has leftover, sets it up for the next range of segments.

2014

*

2016

*

2015

* This special helper function is only for request stacking drivers

2017

* This special helper function is only for request stacking drivers

2016

* (e.g. request-based dm) so that they can handle partial completion.

2018

* (e.g. request-based dm) so that they can handle partial completion.

2017

* Actual device drivers should use blk_end_request instead.

2019

* Actual device drivers should use blk_end_request instead.

2018

*

2020

*

2019

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

2021

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

2020

* %false return from this function.

2022

* %false return from this function.

2021

*

2023

*

2022

* Return:

2024

* Return:

2023

* %false - this request doesn't have any more data

2025

* %false - this request doesn't have any more data

2024

* %true - this request has more data

2026

* %true - this request has more data

2025

**/

2027

**/

2026

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

2028

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

2027

{

2029

{

2028

int total_bytes, bio_nbytes, next_idx = 0;

2030

int total_bytes, bio_nbytes, next_idx = 0;

2029

struct bio *bio;

2031

struct bio *bio;

2030

2032

2031

if (!req->bio)

2033

if (!req->bio)

2032

return false;

2034

return false;

2033

2035

2034

trace_block_rq_complete(req->q, req);

2036

trace_block_rq_complete(req->q, req);

2035

2037

2036

/*

2038

/*

2037

* For fs requests, rq is just carrier of independent bio's

2039

* For fs requests, rq is just carrier of independent bio's

2038

* and each partial completion should be handled separately.

2040

* and each partial completion should be handled separately.

2039

* Reset per-request error on each partial completion.

2041

* Reset per-request error on each partial completion.

2040

*

2042

*

2041

* TODO: tj: This is too subtle. It would be better to let

2043

* TODO: tj: This is too subtle. It would be better to let

2042

* low level drivers do what they see fit.

2044

* low level drivers do what they see fit.

2043

*/

2045

*/

2044

if (req->cmd_type == REQ_TYPE_FS)

2046

if (req->cmd_type == REQ_TYPE_FS)

2045

req->errors = 0;

2047

req->errors = 0;

2046

2048

2047

if (error && req->cmd_type == REQ_TYPE_FS &&

2049

if (error && req->cmd_type == REQ_TYPE_FS &&

2048

!(req->cmd_flags & REQ_QUIET)) {

2050

!(req->cmd_flags & REQ_QUIET)) {

2049

char *error_type;

2051

char *error_type;

2050

2052

2051

switch (error) {

2053

switch (error) {

2052

case -ENOLINK:

2054

case -ENOLINK:

2053

error_type = "recoverable transport";

2055

error_type = "recoverable transport";

2054

break;

2056

break;

2055

case -EREMOTEIO:

2057

case -EREMOTEIO:

2056

error_type = "critical target";

2058

error_type = "critical target";

2057

break;

2059

break;

2058

case -EBADE:

2060

case -EBADE:

2059

error_type = "critical nexus";

2061

error_type = "critical nexus";

2060

break;

2062

break;

2061

case -EIO:

2063

case -EIO:

2062

default:

2064

default:

2063

error_type = "I/O";

2065

error_type = "I/O";

2064

break;

2066

break;

2065

}

2067

}

2066

printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",

2068

printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",

2067

error_type, req->rq_disk ? req->rq_disk->disk_name : "?",

2069

error_type, req->rq_disk ? req->rq_disk->disk_name : "?",

2068

(unsigned long long)blk_rq_pos(req));

2070

(unsigned long long)blk_rq_pos(req));

2069

}

2071

}

2070

2072

2071

blk_account_io_completion(req, nr_bytes);

2073

blk_account_io_completion(req, nr_bytes);

2072

2074

2073

total_bytes = bio_nbytes = 0;

2075

total_bytes = bio_nbytes = 0;

2074

while ((bio = req->bio) != NULL) {

2076

while ((bio = req->bio) != NULL) {

2075

int nbytes;

2077

int nbytes;

2076

2078

2077

if (nr_bytes >= bio->bi_size) {

2079

if (nr_bytes >= bio->bi_size) {

2078

req->bio = bio->bi_next;

2080

req->bio = bio->bi_next;

2079

nbytes = bio->bi_size;

2081

nbytes = bio->bi_size;

2080

req_bio_endio(req, bio, nbytes, error);

2082

req_bio_endio(req, bio, nbytes, error);

2081

next_idx = 0;

2083

next_idx = 0;

2082

bio_nbytes = 0;

2084

bio_nbytes = 0;

2083

} else {

2085

} else {

2084

int idx = bio->bi_idx + next_idx;

2086

int idx = bio->bi_idx + next_idx;

2085

2087

2086

if (unlikely(idx >= bio->bi_vcnt)) {

2088

if (unlikely(idx >= bio->bi_vcnt)) {

2087

blk_dump_rq_flags(req, "__end_that");

2089

blk_dump_rq_flags(req, "__end_that");

2088

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2090

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2089

__func__, idx, bio->bi_vcnt);

2091

__func__, idx, bio->bi_vcnt);

2090

break;

2092

break;

2091

}

2093

}

2092

2094

2093

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2095

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2094

BIO_BUG_ON(nbytes > bio->bi_size);

2096

BIO_BUG_ON(nbytes > bio->bi_size);

2095

2097

2096

/*

2098

/*

2097

* not a complete bvec done

2099

* not a complete bvec done

2098

*/

2100

*/

2099

if (unlikely(nbytes > nr_bytes)) {

2101

if (unlikely(nbytes > nr_bytes)) {

2100

bio_nbytes += nr_bytes;

2102

bio_nbytes += nr_bytes;

2101

total_bytes += nr_bytes;

2103

total_bytes += nr_bytes;

2102

break;

2104

break;

2103

}

2105

}

2104

2106

2105

/*

2107

/*

2106

* advance to the next vector

2108

* advance to the next vector

2107

*/

2109

*/

2108

next_idx++;

2110

next_idx++;

2109

bio_nbytes += nbytes;

2111

bio_nbytes += nbytes;

2110

}

2112

}

2111

2113

2112

total_bytes += nbytes;

2114

total_bytes += nbytes;

2113

nr_bytes -= nbytes;

2115

nr_bytes -= nbytes;

2114

2116

2115

bio = req->bio;

2117

bio = req->bio;

2116

if (bio) {

2118

if (bio) {

2117

/*

2119

/*

2118

* end more in this run, or just return 'not-done'

2120

* end more in this run, or just return 'not-done'

2119

*/

2121

*/

2120

if (unlikely(nr_bytes <= 0))

2122

if (unlikely(nr_bytes <= 0))

2121

break;

2123

break;

2122

}

2124

}

2123

}

2125

}

2124

2126

2125

/*

2127

/*

2126

* completely done

2128

* completely done

2127

*/

2129

*/

2128

if (!req->bio) {

2130

if (!req->bio) {

2129

/*

2131

/*

2130

* Reset counters so that the request stacking driver

2132

* Reset counters so that the request stacking driver

2131

* can find how many bytes remain in the request

2133

* can find how many bytes remain in the request

2132

* later.

2134

* later.

2133

*/

2135

*/

2134

req->__data_len = 0;

2136

req->__data_len = 0;

2135

return false;

2137

return false;

2136

}

2138

}

2137

2139

2138

/*

2140

/*

2139

* if the request wasn't completed, update state

2141

* if the request wasn't completed, update state

2140

*/

2142

*/

2141

if (bio_nbytes) {

2143

if (bio_nbytes) {

2142

req_bio_endio(req, bio, bio_nbytes, error);

2144

req_bio_endio(req, bio, bio_nbytes, error);

2143

bio->bi_idx += next_idx;

2145

bio->bi_idx += next_idx;

2144

bio_iovec(bio)->bv_offset += nr_bytes;

2146

bio_iovec(bio)->bv_offset += nr_bytes;

2145

bio_iovec(bio)->bv_len -= nr_bytes;

2147

bio_iovec(bio)->bv_len -= nr_bytes;

2146

}

2148

}

2147

2149

2148

req->__data_len -= total_bytes;

2150

req->__data_len -= total_bytes;

2149

req->buffer = bio_data(req->bio);

2151

req->buffer = bio_data(req->bio);

2150

2152

2151

/* update sector only for requests with clear definition of sector */

2153

/* update sector only for requests with clear definition of sector */

2152

if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))

2154

if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))

2153

req->__sector += total_bytes >> 9;

2155

req->__sector += total_bytes >> 9;

2154

2156

2155

/* mixed attributes always follow the first bio */

2157

/* mixed attributes always follow the first bio */

2156

if (req->cmd_flags & REQ_MIXED_MERGE) {

2158

if (req->cmd_flags & REQ_MIXED_MERGE) {

2157

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2159

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2158

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2160

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2159

}

2161

}

2160

2162

2161

/*

2163

/*

2162

* If total number of sectors is less than the first segment

2164

* If total number of sectors is less than the first segment

2163

* size, something has gone terribly wrong.

2165

* size, something has gone terribly wrong.

2164

*/

2166

*/

2165

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2167

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2166

blk_dump_rq_flags(req, "request botched");

2168

blk_dump_rq_flags(req, "request botched");

2167

req->__data_len = blk_rq_cur_bytes(req);

2169

req->__data_len = blk_rq_cur_bytes(req);

2168

}

2170

}

2169

2171

2170

/* recalculate the number of segments */

2172

/* recalculate the number of segments */

2171

blk_recalc_rq_segments(req);

2173

blk_recalc_rq_segments(req);

2172

2174

2173

return true;

2175

return true;

2174

}

2176

}

2175

EXPORT_SYMBOL_GPL(blk_update_request);

2177

EXPORT_SYMBOL_GPL(blk_update_request);

2176

2178

2177

static bool blk_update_bidi_request(struct request *rq, int error,

2179

static bool blk_update_bidi_request(struct request *rq, int error,

2178

unsigned int nr_bytes,

2180

unsigned int nr_bytes,

2179

unsigned int bidi_bytes)

2181

unsigned int bidi_bytes)

2180

{

2182

{

2181

if (blk_update_request(rq, error, nr_bytes))

2183

if (blk_update_request(rq, error, nr_bytes))

2182

return true;

2184

return true;

2183

2185

2184

/* Bidi request must be completed as a whole */

2186

/* Bidi request must be completed as a whole */

2185

if (unlikely(blk_bidi_rq(rq)) &&

2187

if (unlikely(blk_bidi_rq(rq)) &&

2186

blk_update_request(rq->next_rq, error, bidi_bytes))

2188

blk_update_request(rq->next_rq, error, bidi_bytes))

2187

return true;

2189

return true;

2188

2190

2189

if (blk_queue_add_random(rq->q))

2191

if (blk_queue_add_random(rq->q))

2190

add_disk_randomness(rq->rq_disk);

2192

add_disk_randomness(rq->rq_disk);

2191

2193

2192

return false;

2194

return false;

2193

}

2195

}

2194

2196

2195

/**

2197

/**

2196

* blk_unprep_request - unprepare a request

2198

* blk_unprep_request - unprepare a request

2197

* @req: the request

2199

* @req: the request

2198

*

2200

*

2199

* This function makes a request ready for complete resubmission (or

2201

* This function makes a request ready for complete resubmission (or

2200

* completion). It happens only after all error handling is complete,

2202

* completion). It happens only after all error handling is complete,

2201

* so represents the appropriate moment to deallocate any resources

2203

* so represents the appropriate moment to deallocate any resources

2202

* that were allocated to the request in the prep_rq_fn. The queue

2204

* that were allocated to the request in the prep_rq_fn. The queue

2203

* lock is held when calling this.

2205

* lock is held when calling this.

2204

*/

2206

*/

2205

void blk_unprep_request(struct request *req)

2207

void blk_unprep_request(struct request *req)

2206

{

2208

{

2207

struct request_queue *q = req->q;

2209

struct request_queue *q = req->q;

2208

2210

2209

req->cmd_flags &= ~REQ_DONTPREP;

2211

req->cmd_flags &= ~REQ_DONTPREP;

2210

if (q->unprep_rq_fn)

2212

if (q->unprep_rq_fn)

2211

q->unprep_rq_fn(q, req);

2213

q->unprep_rq_fn(q, req);

2212

}

2214

}

2213

EXPORT_SYMBOL_GPL(blk_unprep_request);

2215

EXPORT_SYMBOL_GPL(blk_unprep_request);

2214

2216

2215

/*

2217

/*

2216

* queue lock must be held

2218

* queue lock must be held

2217

*/

2219

*/

2218

static void blk_finish_request(struct request *req, int error)

2220

static void blk_finish_request(struct request *req, int error)

2219

{

2221

{

2220

if (blk_rq_tagged(req))

2222

if (blk_rq_tagged(req))

2221

blk_queue_end_tag(req->q, req);

2223

blk_queue_end_tag(req->q, req);

2222

2224

2223

BUG_ON(blk_queued_rq(req));

2225

BUG_ON(blk_queued_rq(req));

2224

2226

2225

if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)

2227

if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)

2226

laptop_io_completion(&req->q->backing_dev_info);

2228

laptop_io_completion(&req->q->backing_dev_info);

2227

2229

2228

blk_delete_timer(req);

2230

blk_delete_timer(req);

2229

2231

2230

if (req->cmd_flags & REQ_DONTPREP)

2232

if (req->cmd_flags & REQ_DONTPREP)

2231

blk_unprep_request(req);

2233

blk_unprep_request(req);

2232

2234

2233

2235

2234

blk_account_io_done(req);

2236

blk_account_io_done(req);

2235

2237

2236

if (req->end_io)

2238

if (req->end_io)

2237

req->end_io(req, error);

2239

req->end_io(req, error);

2238

else {

2240

else {

2239

if (blk_bidi_rq(req))

2241

if (blk_bidi_rq(req))

2240

__blk_put_request(req->next_rq->q, req->next_rq);

2242

__blk_put_request(req->next_rq->q, req->next_rq);

2241

2243

2242

__blk_put_request(req->q, req);

2244

__blk_put_request(req->q, req);

2243

}

2245

}

2244

}

2246

}

2245

2247

2246

/**

2248

/**

2247

* blk_end_bidi_request - Complete a bidi request

2249

* blk_end_bidi_request - Complete a bidi request

2248

* @rq: the request to complete

2250

* @rq: the request to complete

2249

* @error: %0 for success, < %0 for error

2251

* @error: %0 for success, < %0 for error

2250

* @nr_bytes: number of bytes to complete @rq

2252

* @nr_bytes: number of bytes to complete @rq

2251

* @bidi_bytes: number of bytes to complete @rq->next_rq

2253

* @bidi_bytes: number of bytes to complete @rq->next_rq

2252

*

2254

*

2253

* Description:

2255

* Description:

2254

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2256

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2255

* Drivers that supports bidi can safely call this member for any

2257

* Drivers that supports bidi can safely call this member for any

2256

* type of request, bidi or uni. In the later case @bidi_bytes is

2258

* type of request, bidi or uni. In the later case @bidi_bytes is

2257

* just ignored.

2259

* just ignored.

2258

*

2260

*

2259

* Return:

2261

* Return:

2260

* %false - we are done with this request

2262

* %false - we are done with this request

2261

* %true - still buffers pending for this request

2263

* %true - still buffers pending for this request

2262

**/

2264

**/

2263

static bool blk_end_bidi_request(struct request *rq, int error,

2265

static bool blk_end_bidi_request(struct request *rq, int error,

2264

unsigned int nr_bytes, unsigned int bidi_bytes)

2266

unsigned int nr_bytes, unsigned int bidi_bytes)

2265

{

2267

{

2266

struct request_queue *q = rq->q;

2268

struct request_queue *q = rq->q;

2267

unsigned long flags;

2269

unsigned long flags;

2268

2270

2269

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2271

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2270

return true;

2272

return true;

2271

2273

2272

spin_lock_irqsave(q->queue_lock, flags);

2274

spin_lock_irqsave(q->queue_lock, flags);

2273

blk_finish_request(rq, error);

2275

blk_finish_request(rq, error);

2274

spin_unlock_irqrestore(q->queue_lock, flags);

2276

spin_unlock_irqrestore(q->queue_lock, flags);

2275

2277

2276

return false;

2278

return false;

2277

}

2279

}

2278

2280

2279

/**

2281

/**

2280

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2282

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2281

* @rq: the request to complete

2283

* @rq: the request to complete

2282

* @error: %0 for success, < %0 for error

2284

* @error: %0 for success, < %0 for error

2283

* @nr_bytes: number of bytes to complete @rq

2285

* @nr_bytes: number of bytes to complete @rq

2284

* @bidi_bytes: number of bytes to complete @rq->next_rq

2286

* @bidi_bytes: number of bytes to complete @rq->next_rq

2285

*

2287

*

2286

* Description:

2288

* Description:

2287

* Identical to blk_end_bidi_request() except that queue lock is

2289

* Identical to blk_end_bidi_request() except that queue lock is

2288

* assumed to be locked on entry and remains so on return.

2290

* assumed to be locked on entry and remains so on return.

2289

*

2291

*

2290

* Return:

2292

* Return:

2291

* %false - we are done with this request

2293

* %false - we are done with this request

2292

* %true - still buffers pending for this request

2294

* %true - still buffers pending for this request

2293

**/

2295

**/

2294

static bool __blk_end_bidi_request(struct request *rq, int error,

2296

static bool __blk_end_bidi_request(struct request *rq, int error,

2295

unsigned int nr_bytes, unsigned int bidi_bytes)

2297

unsigned int nr_bytes, unsigned int bidi_bytes)

2296

{

2298

{

2297

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2299

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2298

return true;

2300

return true;

2299

2301

2300

blk_finish_request(rq, error);

2302

blk_finish_request(rq, error);

2301

2303

2302

return false;

2304

return false;

2303

}

2305

}

2304

2306

2305

/**

2307

/**

2306

* blk_end_request - Helper function for drivers to complete the request.

2308

* blk_end_request - Helper function for drivers to complete the request.

2307

* @rq: the request being processed

2309

* @rq: the request being processed

2308

* @error: %0 for success, < %0 for error

2310

* @error: %0 for success, < %0 for error

2309

* @nr_bytes: number of bytes to complete

2311

* @nr_bytes: number of bytes to complete

2310

*

2312

*

2311

* Description:

2313

* Description:

2312

* Ends I/O on a number of bytes attached to @rq.

2314

* Ends I/O on a number of bytes attached to @rq.

2313

* If @rq has leftover, sets it up for the next range of segments.

2315

* If @rq has leftover, sets it up for the next range of segments.

2314

*

2316

*

2315

* Return:

2317

* Return:

2316

* %false - we are done with this request

2318

* %false - we are done with this request

2317

* %true - still buffers pending for this request

2319

* %true - still buffers pending for this request

2318

**/

2320

**/

2319

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2321

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2320

{

2322

{

2321

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2323

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2322

}

2324

}

2323

EXPORT_SYMBOL(blk_end_request);

2325

EXPORT_SYMBOL(blk_end_request);

2324

2326

2325

/**

2327

/**

2326

* blk_end_request_all - Helper function for drives to finish the request.

2328

* blk_end_request_all - Helper function for drives to finish the request.

2327

* @rq: the request to finish

2329

* @rq: the request to finish

2328

* @error: %0 for success, < %0 for error

2330

* @error: %0 for success, < %0 for error

2329

*

2331

*

2330

* Description:

2332

* Description:

2331

* Completely finish @rq.

2333

* Completely finish @rq.

2332

*/

2334

*/

2333

void blk_end_request_all(struct request *rq, int error)

2335

void blk_end_request_all(struct request *rq, int error)

2334

{

2336

{

2335

bool pending;

2337

bool pending;

2336

unsigned int bidi_bytes = 0;

2338

unsigned int bidi_bytes = 0;

2337

2339

2338

if (unlikely(blk_bidi_rq(rq)))

2340

if (unlikely(blk_bidi_rq(rq)))

2339

bidi_bytes = blk_rq_bytes(rq->next_rq);

2341

bidi_bytes = blk_rq_bytes(rq->next_rq);

2340

2342

2341

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2343

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2342

BUG_ON(pending);

2344

BUG_ON(pending);

2343

}

2345

}

2344

EXPORT_SYMBOL(blk_end_request_all);

2346

EXPORT_SYMBOL(blk_end_request_all);

2345

2347

2346

/**

2348

/**

2347

* blk_end_request_cur - Helper function to finish the current request chunk.

2349

* blk_end_request_cur - Helper function to finish the current request chunk.

2348

* @rq: the request to finish the current chunk for

2350

* @rq: the request to finish the current chunk for

2349

* @error: %0 for success, < %0 for error

2351

* @error: %0 for success, < %0 for error

2350

*

2352

*

2351

* Description:

2353

* Description:

2352

* Complete the current consecutively mapped chunk from @rq.

2354

* Complete the current consecutively mapped chunk from @rq.

2353

*

2355

*

2354

* Return:

2356

* Return:

2355

* %false - we are done with this request

2357

* %false - we are done with this request

2356

* %true - still buffers pending for this request

2358

* %true - still buffers pending for this request

2357

*/

2359

*/

2358

bool blk_end_request_cur(struct request *rq, int error)

2360

bool blk_end_request_cur(struct request *rq, int error)

2359

{

2361

{

2360

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2362

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2361

}

2363

}

2362

EXPORT_SYMBOL(blk_end_request_cur);

2364

EXPORT_SYMBOL(blk_end_request_cur);

2363

2365

2364

/**

2366

/**

2365

* blk_end_request_err - Finish a request till the next failure boundary.

2367

* blk_end_request_err - Finish a request till the next failure boundary.

2366

* @rq: the request to finish till the next failure boundary for

2368

* @rq: the request to finish till the next failure boundary for

2367

* @error: must be negative errno

2369

* @error: must be negative errno

2368

*

2370

*

2369

* Description:

2371

* Description:

2370

* Complete @rq till the next failure boundary.

2372

* Complete @rq till the next failure boundary.

2371

*

2373

*

2372

* Return:

2374

* Return:

2373

* %false - we are done with this request

2375

* %false - we are done with this request

2374

* %true - still buffers pending for this request

2376

* %true - still buffers pending for this request

2375

*/

2377

*/

2376

bool blk_end_request_err(struct request *rq, int error)

2378

bool blk_end_request_err(struct request *rq, int error)

2377

{

2379

{

2378

WARN_ON(error >= 0);

2380

WARN_ON(error >= 0);

2379

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2381

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2380

}

2382

}

2381

EXPORT_SYMBOL_GPL(blk_end_request_err);

2383

EXPORT_SYMBOL_GPL(blk_end_request_err);

2382

2384

2383

/**

2385

/**

2384

* __blk_end_request - Helper function for drivers to complete the request.

2386

* __blk_end_request - Helper function for drivers to complete the request.

2385

* @rq: the request being processed

2387

* @rq: the request being processed

2386

* @error: %0 for success, < %0 for error

2388

* @error: %0 for success, < %0 for error

2387

* @nr_bytes: number of bytes to complete

2389

* @nr_bytes: number of bytes to complete

2388

*

2390

*

2389

* Description:

2391

* Description:

2390

* Must be called with queue lock held unlike blk_end_request().

2392

* Must be called with queue lock held unlike blk_end_request().

2391

*

2393

*

2392

* Return:

2394

* Return:

2393

* %false - we are done with this request

2395

* %false - we are done with this request

2394

* %true - still buffers pending for this request

2396

* %true - still buffers pending for this request

2395

**/

2397

**/

2396

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2398

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2397

{

2399

{

2398

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2400

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2399

}

2401

}

2400

EXPORT_SYMBOL(__blk_end_request);

2402

EXPORT_SYMBOL(__blk_end_request);

2401

2403

2402

/**

2404

/**

2403

* __blk_end_request_all - Helper function for drives to finish the request.

2405

* __blk_end_request_all - Helper function for drives to finish the request.

2404

* @rq: the request to finish

2406

* @rq: the request to finish

2405

* @error: %0 for success, < %0 for error

2407

* @error: %0 for success, < %0 for error

2406

*

2408

*

2407

* Description:

2409

* Description:

2408

* Completely finish @rq. Must be called with queue lock held.

2410

* Completely finish @rq. Must be called with queue lock held.

2409

*/

2411

*/

2410

void __blk_end_request_all(struct request *rq, int error)

2412

void __blk_end_request_all(struct request *rq, int error)

2411

{

2413

{

2412

bool pending;

2414

bool pending;

2413

unsigned int bidi_bytes = 0;

2415

unsigned int bidi_bytes = 0;

2414

2416

2415

if (unlikely(blk_bidi_rq(rq)))

2417

if (unlikely(blk_bidi_rq(rq)))

2416

bidi_bytes = blk_rq_bytes(rq->next_rq);

2418

bidi_bytes = blk_rq_bytes(rq->next_rq);

2417

2419

2418

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2420

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2419

BUG_ON(pending);

2421

BUG_ON(pending);

2420

}

2422

}

2421

EXPORT_SYMBOL(__blk_end_request_all);

2423

EXPORT_SYMBOL(__blk_end_request_all);

2422

2424

2423

/**

2425

/**

2424

* __blk_end_request_cur - Helper function to finish the current request chunk.

2426

* __blk_end_request_cur - Helper function to finish the current request chunk.

2425

* @rq: the request to finish the current chunk for

2427

* @rq: the request to finish the current chunk for

2426

* @error: %0 for success, < %0 for error

2428

* @error: %0 for success, < %0 for error

2427

*

2429

*

2428

* Description:

2430

* Description:

2429

* Complete the current consecutively mapped chunk from @rq. Must

2431

* Complete the current consecutively mapped chunk from @rq. Must

2430

* be called with queue lock held.

2432

* be called with queue lock held.

2431

*

2433

*

2432

* Return:

2434

* Return:

2433

* %false - we are done with this request

2435

* %false - we are done with this request

2434

* %true - still buffers pending for this request

2436

* %true - still buffers pending for this request

2435

*/

2437

*/

2436

bool __blk_end_request_cur(struct request *rq, int error)

2438

bool __blk_end_request_cur(struct request *rq, int error)

2437

{

2439

{

2438

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2440

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2439

}

2441

}

2440

EXPORT_SYMBOL(__blk_end_request_cur);

2442

EXPORT_SYMBOL(__blk_end_request_cur);

2441

2443

2442

/**

2444

/**

2443

* __blk_end_request_err - Finish a request till the next failure boundary.

2445

* __blk_end_request_err - Finish a request till the next failure boundary.

2444

* @rq: the request to finish till the next failure boundary for

2446

* @rq: the request to finish till the next failure boundary for

2445

* @error: must be negative errno

2447

* @error: must be negative errno

2446

*

2448

*

2447

* Description:

2449

* Description:

2448

* Complete @rq till the next failure boundary. Must be called

2450

* Complete @rq till the next failure boundary. Must be called

2449

* with queue lock held.

2451

* with queue lock held.

2450

*

2452

*

2451

* Return:

2453

* Return:

2452

* %false - we are done with this request

2454

* %false - we are done with this request

2453

* %true - still buffers pending for this request

2455

* %true - still buffers pending for this request

2454

*/

2456

*/

2455

bool __blk_end_request_err(struct request *rq, int error)

2457

bool __blk_end_request_err(struct request *rq, int error)

2456

{

2458

{

2457

WARN_ON(error >= 0);

2459

WARN_ON(error >= 0);

2458

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2460

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2459

}

2461

}

2460

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2462

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2461

2463

2462

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2464

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2463

struct bio *bio)

2465

struct bio *bio)

2464

{

2466

{

2465

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2467

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2466

rq->cmd_flags |= bio->bi_rw & REQ_WRITE;

2468

rq->cmd_flags |= bio->bi_rw & REQ_WRITE;

2467

2469

2468

if (bio_has_data(bio)) {

2470

if (bio_has_data(bio)) {

2469

rq->nr_phys_segments = bio_phys_segments(q, bio);

2471

rq->nr_phys_segments = bio_phys_segments(q, bio);

2470

rq->buffer = bio_data(bio);

2472

rq->buffer = bio_data(bio);

2471

}

2473

}

2472

rq->__data_len = bio->bi_size;

2474

rq->__data_len = bio->bi_size;

2473

rq->bio = rq->biotail = bio;

2475

rq->bio = rq->biotail = bio;

2474

2476

2475

if (bio->bi_bdev)

2477

if (bio->bi_bdev)

2476

rq->rq_disk = bio->bi_bdev->bd_disk;

2478

rq->rq_disk = bio->bi_bdev->bd_disk;

2477

}

2479

}

2478

2480

2479

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2481

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2480

/**

2482

/**

2481

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2483

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2482

* @rq: the request to be flushed

2484

* @rq: the request to be flushed

2483

*

2485

*

2484

* Description:

2486

* Description:

2485

* Flush all pages in @rq.

2487

* Flush all pages in @rq.

2486

*/

2488

*/

2487

void rq_flush_dcache_pages(struct request *rq)

2489

void rq_flush_dcache_pages(struct request *rq)

2488

{

2490

{

2489

struct req_iterator iter;

2491

struct req_iterator iter;

2490

struct bio_vec *bvec;

2492

struct bio_vec *bvec;

2491

2493

2492

rq_for_each_segment(bvec, rq, iter)

2494

rq_for_each_segment(bvec, rq, iter)

2493

flush_dcache_page(bvec->bv_page);

2495

flush_dcache_page(bvec->bv_page);

2494

}

2496

}

2495

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2497

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2496

#endif

2498

#endif

2497

2499

2498

/**

2500

/**

2499

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2501

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2500

* @q : the queue of the device being checked

2502

* @q : the queue of the device being checked

2501

*

2503

*

2502

* Description:

2504

* Description:

2503

* Check if underlying low-level drivers of a device are busy.

2505

* Check if underlying low-level drivers of a device are busy.

2504

* If the drivers want to export their busy state, they must set own

2506

* If the drivers want to export their busy state, they must set own

2505

* exporting function using blk_queue_lld_busy() first.

2507

* exporting function using blk_queue_lld_busy() first.

2506

*

2508

*

2507

* Basically, this function is used only by request stacking drivers

2509

* Basically, this function is used only by request stacking drivers

2508

* to stop dispatching requests to underlying devices when underlying

2510

* to stop dispatching requests to underlying devices when underlying

2509

* devices are busy. This behavior helps more I/O merging on the queue

2511

* devices are busy. This behavior helps more I/O merging on the queue

2510

* of the request stacking driver and prevents I/O throughput regression

2512

* of the request stacking driver and prevents I/O throughput regression

2511

* on burst I/O load.

2513

* on burst I/O load.

2512

*

2514

*

2513

* Return:

2515

* Return:

2514

* 0 - Not busy (The request stacking driver should dispatch request)

2516

* 0 - Not busy (The request stacking driver should dispatch request)

2515

* 1 - Busy (The request stacking driver should stop dispatching request)

2517

* 1 - Busy (The request stacking driver should stop dispatching request)

2516

*/

2518

*/

2517

int blk_lld_busy(struct request_queue *q)

2519

int blk_lld_busy(struct request_queue *q)

2518

{

2520

{

2519

if (q->lld_busy_fn)

2521

if (q->lld_busy_fn)

2520

return q->lld_busy_fn(q);

2522

return q->lld_busy_fn(q);

2521

2523

2522

return 0;

2524

return 0;

2523

}

2525

}

2524

EXPORT_SYMBOL_GPL(blk_lld_busy);

2526

EXPORT_SYMBOL_GPL(blk_lld_busy);

2525

2527

2526

/**

2528

/**

2527

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2529

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2528

* @rq: the clone request to be cleaned up

2530

* @rq: the clone request to be cleaned up

2529

*

2531

*

2530

* Description:

2532

* Description:

2531

* Free all bios in @rq for a cloned request.

2533

* Free all bios in @rq for a cloned request.

2532

*/

2534

*/

2533

void blk_rq_unprep_clone(struct request *rq)

2535

void blk_rq_unprep_clone(struct request *rq)

2534

{

2536

{

2535

struct bio *bio;

2537

struct bio *bio;

2536

2538

2537

while ((bio = rq->bio) != NULL) {

2539

while ((bio = rq->bio) != NULL) {

2538

rq->bio = bio->bi_next;

2540

rq->bio = bio->bi_next;

2539

2541

2540

bio_put(bio);

2542

bio_put(bio);

2541

}

2543

}

2542

}

2544

}

2543

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2545

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2544

2546

2545

/*

2547

/*

2546

* Copy attributes of the original request to the clone request.

2548

* Copy attributes of the original request to the clone request.

2547

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2549

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2548

*/

2550

*/

2549

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2551

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2550

{

2552

{

2551

dst->cpu = src->cpu;

2553

dst->cpu = src->cpu;

2552

dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;

2554

dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;

2553

dst->cmd_type = src->cmd_type;

2555

dst->cmd_type = src->cmd_type;

2554

dst->__sector = blk_rq_pos(src);

2556

dst->__sector = blk_rq_pos(src);

2555

dst->__data_len = blk_rq_bytes(src);

2557

dst->__data_len = blk_rq_bytes(src);

2556

dst->nr_phys_segments = src->nr_phys_segments;

2558

dst->nr_phys_segments = src->nr_phys_segments;

2557

dst->ioprio = src->ioprio;

2559

dst->ioprio = src->ioprio;

2558

dst->extra_len = src->extra_len;

2560

dst->extra_len = src->extra_len;

2559

}

2561

}

2560

2562

2561

/**

2563

/**

2562

* blk_rq_prep_clone - Helper function to setup clone request

2564

* blk_rq_prep_clone - Helper function to setup clone request

2563

* @rq: the request to be setup

2565

* @rq: the request to be setup

2564

* @rq_src: original request to be cloned

2566

* @rq_src: original request to be cloned

2565

* @bs: bio_set that bios for clone are allocated from

2567

* @bs: bio_set that bios for clone are allocated from

2566

* @gfp_mask: memory allocation mask for bio

2568

* @gfp_mask: memory allocation mask for bio

2567

* @bio_ctr: setup function to be called for each clone bio.

2569

* @bio_ctr: setup function to be called for each clone bio.

2568

* Returns %0 for success, non %0 for failure.

2570

* Returns %0 for success, non %0 for failure.

2569

* @data: private data to be passed to @bio_ctr

2571

* @data: private data to be passed to @bio_ctr

2570

*

2572

*

2571

* Description:

2573

* Description:

2572

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2574

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2573

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2575

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2574

* are not copied, and copying such parts is the caller's responsibility.

2576

* are not copied, and copying such parts is the caller's responsibility.

2575

* Also, pages which the original bios are pointing to are not copied

2577

* Also, pages which the original bios are pointing to are not copied

2576

* and the cloned bios just point same pages.

2578

* and the cloned bios just point same pages.

2577

* So cloned bios must be completed before original bios, which means

2579

* So cloned bios must be completed before original bios, which means

2578

* the caller must complete @rq before @rq_src.

2580

* the caller must complete @rq before @rq_src.

2579

*/

2581

*/

2580

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2582

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2581

struct bio_set *bs, gfp_t gfp_mask,

2583

struct bio_set *bs, gfp_t gfp_mask,

2582

int (*bio_ctr)(struct bio *, struct bio *, void *),

2584

int (*bio_ctr)(struct bio *, struct bio *, void *),

2583

void *data)

2585

void *data)

2584

{

2586

{

2585

struct bio *bio, *bio_src;

2587

struct bio *bio, *bio_src;

2586

2588

2587

if (!bs)

2589

if (!bs)

2588

bs = fs_bio_set;

2590

bs = fs_bio_set;

2589

2591

2590

blk_rq_init(NULL, rq);

2592

blk_rq_init(NULL, rq);

2591

2593

2592

__rq_for_each_bio(bio_src, rq_src) {

2594

__rq_for_each_bio(bio_src, rq_src) {

2593

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2595

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2594

if (!bio)

2596

if (!bio)

2595

goto free_and_out;

2597

goto free_and_out;

2596

2598

2597

__bio_clone(bio, bio_src);

2599

__bio_clone(bio, bio_src);

2598

2600

2599

if (bio_integrity(bio_src) &&

2601

if (bio_integrity(bio_src) &&

2600

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2602

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2601

goto free_and_out;

2603

goto free_and_out;

2602

2604

2603

if (bio_ctr && bio_ctr(bio, bio_src, data))

2605

if (bio_ctr && bio_ctr(bio, bio_src, data))

2604

goto free_and_out;

2606

goto free_and_out;

2605

2607

2606

if (rq->bio) {

2608

if (rq->bio) {

2607

rq->biotail->bi_next = bio;

2609

rq->biotail->bi_next = bio;

2608

rq->biotail = bio;

2610

rq->biotail = bio;

2609

} else

2611

} else

2610

rq->bio = rq->biotail = bio;

2612

rq->bio = rq->biotail = bio;

2611

}

2613

}

2612

2614

2613

__blk_rq_prep_clone(rq, rq_src);

2615

__blk_rq_prep_clone(rq, rq_src);

2614

2616

2615

return 0;

2617

return 0;

2616

2618

2617

free_and_out:

2619

free_and_out:

2618

if (bio)

2620

if (bio)

2619

bio_free(bio, bs);

2621

bio_free(bio, bs);

2620

blk_rq_unprep_clone(rq);

2622

blk_rq_unprep_clone(rq);

2621

2623

2622

return -ENOMEM;

2624

return -ENOMEM;

2623

}

2625

}

2624

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2626

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2625

2627

2626

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2628

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2627

{

2629

{

2628

return queue_work(kblockd_workqueue, work);

2630

return queue_work(kblockd_workqueue, work);

2629

}

2631

}

2630

EXPORT_SYMBOL(kblockd_schedule_work);

2632

EXPORT_SYMBOL(kblockd_schedule_work);

2631

2633

2632

int kblockd_schedule_delayed_work(struct request_queue *q,

2634

int kblockd_schedule_delayed_work(struct request_queue *q,

2633

struct delayed_work *dwork, unsigned long delay)

2635

struct delayed_work *dwork, unsigned long delay)

2634

{

2636

{

2635

return queue_delayed_work(kblockd_workqueue, dwork, delay);

2637

return queue_delayed_work(kblockd_workqueue, dwork, delay);

2636

}

2638

}

2637

EXPORT_SYMBOL(kblockd_schedule_delayed_work);

2639

EXPORT_SYMBOL(kblockd_schedule_delayed_work);

2638

2640

2639

#define PLUG_MAGIC 0x91827364

2641

#define PLUG_MAGIC 0x91827364

2640

2642

2641

void blk_start_plug(struct blk_plug *plug)

2643

void blk_start_plug(struct blk_plug *plug)

2642

{

2644

{

2643

struct task_struct *tsk = current;

2645

struct task_struct *tsk = current;

2644

2646

2645

plug->magic = PLUG_MAGIC;

2647

plug->magic = PLUG_MAGIC;

2646

INIT_LIST_HEAD(&plug->list);

2648

INIT_LIST_HEAD(&plug->list);

2647

INIT_LIST_HEAD(&plug->cb_list);

2649

INIT_LIST_HEAD(&plug->cb_list);

2648

plug->should_sort = 0;

2650

plug->should_sort = 0;

2649

2651

2650

/*

2652

/*

2651

* If this is a nested plug, don't actually assign it. It will be

2653

* If this is a nested plug, don't actually assign it. It will be

2652

* flushed on its own.

2654

* flushed on its own.

2653

*/

2655

*/

2654

if (!tsk->plug) {

2656

if (!tsk->plug) {

2655

/*

2657

/*

2656

* Store ordering should not be needed here, since a potential

2658

* Store ordering should not be needed here, since a potential

2657

* preempt will imply a full memory barrier

2659

* preempt will imply a full memory barrier

2658

*/

2660

*/

2659

tsk->plug = plug;

2661

tsk->plug = plug;

2660

}

2662

}

2661

}

2663

}

2662

EXPORT_SYMBOL(blk_start_plug);

2664

EXPORT_SYMBOL(blk_start_plug);

2663

2665

2664

static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

2666

static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

2665

{

2667

{

2666

struct request *rqa = container_of(a, struct request, queuelist);

2668

struct request *rqa = container_of(a, struct request, queuelist);

2667

struct request *rqb = container_of(b, struct request, queuelist);

2669

struct request *rqb = container_of(b, struct request, queuelist);

2668

2670

2669

return !(rqa->q <= rqb->q);

2671

return !(rqa->q <= rqb->q);

2670

}

2672

}

2671

2673

2672

/*

2674

/*

2673

* If 'from_schedule' is true, then postpone the dispatch of requests

2675

* If 'from_schedule' is true, then postpone the dispatch of requests

2674

* until a safe kblockd context. We due this to avoid accidental big

2676

* until a safe kblockd context. We due this to avoid accidental big

2675

* additional stack usage in driver dispatch, in places where the originally

2677

* additional stack usage in driver dispatch, in places where the originally

2676

* plugger did not intend it.

2678

* plugger did not intend it.

2677

*/

2679

*/

2678

static void queue_unplugged(struct request_queue *q, unsigned int depth,

2680

static void queue_unplugged(struct request_queue *q, unsigned int depth,

2679

bool from_schedule)

2681

bool from_schedule)

2680

__releases(q->queue_lock)

2682

__releases(q->queue_lock)

2681

{

2683

{

2682

trace_block_unplug(q, depth, !from_schedule);

2684

trace_block_unplug(q, depth, !from_schedule);

2683

2685

2684

/*

2686

/*

2685

* If we are punting this to kblockd, then we can safely drop

2687

* If we are punting this to kblockd, then we can safely drop

2686

* the queue_lock before waking kblockd (which needs to take

2688

* the queue_lock before waking kblockd (which needs to take

2687

* this lock).

2689

* this lock).

2688

*/

2690

*/

2689

if (from_schedule) {

2691

if (from_schedule) {

2690

spin_unlock(q->queue_lock);

2692

spin_unlock(q->queue_lock);

2691

blk_run_queue_async(q);

2693

blk_run_queue_async(q);

2692

} else {

2694

} else {

2693

__blk_run_queue(q);

2695

__blk_run_queue(q);

2694

spin_unlock(q->queue_lock);

2696

spin_unlock(q->queue_lock);

2695

}

2697

}

2696

2698

2697

}

2699

}

2698

2700

2699

static void flush_plug_callbacks(struct blk_plug *plug)

2701

static void flush_plug_callbacks(struct blk_plug *plug)

2700

{

2702

{

2701

LIST_HEAD(callbacks);

2703

LIST_HEAD(callbacks);

2702

2704

2703

if (list_empty(&plug->cb_list))

2705

if (list_empty(&plug->cb_list))

2704

return;

2706

return;

2705

2707

2706

list_splice_init(&plug->cb_list, &callbacks);

2708

list_splice_init(&plug->cb_list, &callbacks);

2707

2709

2708

while (!list_empty(&callbacks)) {

2710

while (!list_empty(&callbacks)) {

2709

struct blk_plug_cb *cb = list_first_entry(&callbacks,

2711

struct blk_plug_cb *cb = list_first_entry(&callbacks,

2710

struct blk_plug_cb,

2712

struct blk_plug_cb,

2711

list);

2713

list);

2712

list_del(&cb->list);

2714

list_del(&cb->list);

2713

cb->callback(cb);

2715

cb->callback(cb);

2714

}

2716

}

2715

}

2717

}

2716

2718

2717

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)

2719

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)

2718

{

2720

{

2719

struct request_queue *q;

2721

struct request_queue *q;

2720

unsigned long flags;

2722

unsigned long flags;

2721

struct request *rq;

2723

struct request *rq;

2722

LIST_HEAD(list);

2724

LIST_HEAD(list);

2723

unsigned int depth;

2725

unsigned int depth;

2724

2726

2725

BUG_ON(plug->magic != PLUG_MAGIC);

2727

BUG_ON(plug->magic != PLUG_MAGIC);

2726

2728

2727

flush_plug_callbacks(plug);

2729

flush_plug_callbacks(plug);

2728

if (list_empty(&plug->list))

2730

if (list_empty(&plug->list))

2729

return;

2731

return;

2730

2732

2731

list_splice_init(&plug->list, &list);

2733

list_splice_init(&plug->list, &list);

2732

2734

2733

if (plug->should_sort) {

2735

if (plug->should_sort) {

2734

list_sort(NULL, &list, plug_rq_cmp);

2736

list_sort(NULL, &list, plug_rq_cmp);

2735

plug->should_sort = 0;

2737

plug->should_sort = 0;

2736

}

2738

}

2737

2739

2738

q = NULL;

2740

q = NULL;

2739

depth = 0;

2741

depth = 0;

2740

2742

2741

/*

2743

/*

2742

* Save and disable interrupts here, to avoid doing it for every

2744

* Save and disable interrupts here, to avoid doing it for every

2743

* queue lock we have to take.

2745

* queue lock we have to take.

2744

*/

2746

*/

2745

local_irq_save(flags);

2747

local_irq_save(flags);

2746

while (!list_empty(&list)) {

2748

while (!list_empty(&list)) {

2747

rq = list_entry_rq(list.next);

2749

rq = list_entry_rq(list.next);

2748

list_del_init(&rq->queuelist);

2750

list_del_init(&rq->queuelist);

2749

BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));

2751

BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));

2750

BUG_ON(!rq->q);

2752

BUG_ON(!rq->q);

2751

if (rq->q != q) {

2753

if (rq->q != q) {

2752

/*

2754

/*

2753

* This drops the queue lock

2755

* This drops the queue lock

2754

*/

2756

*/

2755

if (q)

2757

if (q)

2756

queue_unplugged(q, depth, from_schedule);

2758

queue_unplugged(q, depth, from_schedule);

2757

q = rq->q;

2759

q = rq->q;

2758

depth = 0;

2760

depth = 0;

2759

spin_lock(q->queue_lock);

2761

spin_lock(q->queue_lock);

2760

}

2762

}

2761

rq->cmd_flags &= ~REQ_ON_PLUG;

2763

rq->cmd_flags &= ~REQ_ON_PLUG;

2762

2764

2763

/*

2765

/*

2764

* rq is already accounted, so use raw insert

2766

* rq is already accounted, so use raw insert

2765

*/

2767

*/

2766

if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))

2768

if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))

2767

__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);

2769

__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);

2768

else

2770

else

2769

__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

2771

__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

2770

2772

2771

depth++;

2773

depth++;

2772

}

2774

}

2773

2775

2774

/*

2776

/*

2775

* This drops the queue lock

2777

* This drops the queue lock

2776

*/

2778

*/

2777

if (q)

2779

if (q)

2778

queue_unplugged(q, depth, from_schedule);

2780

queue_unplugged(q, depth, from_schedule);

2779

2781

2780

local_irq_restore(flags);

2782

local_irq_restore(flags);

2781

}

2783

}

2782

2784

2783

void blk_finish_plug(struct blk_plug *plug)

2785

void blk_finish_plug(struct blk_plug *plug)

2784

{

2786

{

2785

blk_flush_plug_list(plug, false);

2787

blk_flush_plug_list(plug, false);

2786

2788

2787

if (plug == current->plug)

2789

if (plug == current->plug)

2788

current->plug = NULL;

2790

current->plug = NULL;

2789

}

2791

}

2790

EXPORT_SYMBOL(blk_finish_plug);

2792

EXPORT_SYMBOL(blk_finish_plug);

2791

2793

2792

int __init blk_dev_init(void)

2794

int __init blk_dev_init(void)

2793

{

2795

{

2794

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2796

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2795

sizeof(((struct request *)0)->cmd_flags));

2797

sizeof(((struct request *)0)->cmd_flags));

2796

2798

2797

/* used for unplugging and affects IO latency/throughput - HIGHPRI */

2799

/* used for unplugging and affects IO latency/throughput - HIGHPRI */

2798

kblockd_workqueue = alloc_workqueue("kblockd",

2800

kblockd_workqueue = alloc_workqueue("kblockd",

2799

WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);

2801

WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);

2800

if (!kblockd_workqueue)

2802

if (!kblockd_workqueue)

2801

panic("Failed to create kblockd\n");

2803

panic("Failed to create kblockd\n");

2802

2804

2803

request_cachep = kmem_cache_create("blkdev_requests",

2805

request_cachep = kmem_cache_create("blkdev_requests",

2804

sizeof(struct request), 0, SLAB_PANIC, NULL);

2806

sizeof(struct request), 0, SLAB_PANIC, NULL);

2805

2807

2806

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2808

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2807

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2809

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2808

2810

2809

return 0;

2811

return 0;

2810

}

2812

}

2811

2813

GITLAB

block: don't delay blk_run_queue_async

 /*
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
  *	-  July2000
  * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 /*
  * This handles all read/write requests to block devices
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/fault-inject.h>
 #include <linux/list_sort.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
 #include "blk.h"
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 static int __make_request(struct request_queue *q, struct bio *bio);
 /*
  * For the allocated request tables
  */
 static struct kmem_cache *request_cachep;
 /*
  * For queue allocation
  */
 struct kmem_cache *blk_requestq_cachep;
 /*
  * Controlling structure to kblockd
  */
 static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
 	int cpu;
 	if (!blk_do_io_stat(rq))
 		return;
 	cpu = part_stat_lock();
 	if (!new_io) {
 		part = rq->part;
 		part_stat_inc(cpu, part, merges[rw]);
 	} else {
 		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 		if (!hd_struct_try_get(part)) {
 			/*
 			 * The partition is already being removed,
 			 * the request will be accounted on the disk only
 			 *
 			 * We take a reference on disk->part0 although that
 			 * partition will never be deleted, so we can treat
 			 * it as any other partition.
 			 */
 			part = &rq->rq_disk->part0;
 			hd_struct_get(part);
 		}
 		part_round_stats(cpu, part);
 		part_inc_in_flight(part, rw);
 		rq->part = part;
 	}
 	part_stat_unlock();
 }
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
 	int nr;
 	nr = q->nr_requests - (q->nr_requests / 8) + 1;
 	if (nr > q->nr_requests)
 		nr = q->nr_requests;
 	q->nr_congestion_on = nr;
 	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
 	if (nr < 1)
 		nr = 1;
 	q->nr_congestion_off = nr;
 }
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
  * backing_dev_info
  *
  * Will return NULL if the request queue cannot be located.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
 	struct backing_dev_info *ret = NULL;
 	struct request_queue *q = bdev_get_queue(bdev);
 	if (q)
 		ret = &q->backing_dev_info;
 	return ret;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 void blk_rq_init(struct request_queue *q, struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
 	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
 	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->ref_count = 1;
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
 	rq->part = NULL;
 }
 EXPORT_SYMBOL(blk_rq_init);
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
 {
 	if (error)
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
 	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 		error = -EIO;
 	if (unlikely(nbytes > bio->bi_size)) {
 		printk(KERN_ERR "%s: want %u bytes done, %u left\n",
 		       __func__, nbytes, bio->bi_size);
 		nbytes = bio->bi_size;
 	}
 	if (unlikely(rq->cmd_flags & REQ_QUIET))
 		set_bit(BIO_QUIET, &bio->bi_flags);
 	bio->bi_size -= nbytes;
 	bio->bi_sector += (nbytes >> 9);
 	if (bio_integrity(bio))
 		bio_integrity_advance(bio, nbytes);
 	/* don't actually finish bio if it's part of flush sequence */
 	if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
 		bio_endio(bio, error);
 }
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
 	       (unsigned long long)blk_rq_pos(rq),
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		printk(KERN_INFO "  cdb: ");
 		for (bit = 0; bit < BLK_MAX_CDB; bit++)
 			printk("%02x ", rq->cmd[bit]);
 		printk("\n");
 	}
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 static void blk_delay_work(struct work_struct *work)
 {
 	struct request_queue *q;
 	q = container_of(work, struct request_queue, delay_work.work);
 	spin_lock_irq(q->queue_lock);
 	__blk_run_queue(q);
 	spin_unlock_irq(q->queue_lock);
 }
 /**
  * blk_delay_queue - restart queueing after defined interval
  * @q:		The &struct request_queue in question
  * @msecs:	Delay in msecs
  *
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
  *   restarted around the specified time.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
 	queue_delayed_work(kblockd_workqueue, &q->delay_work,
 				msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
  *   entered. Also see blk_stop_queue(). Queue lock must be held.
  **/
 void blk_start_queue(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 /**
  * blk_stop_queue - stop a queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   The Linux block layer assumes that a block driver will consume all
  *   entries on the request queue when the request_fn strategy is called.
  *   Often this will not happen, because of hardware limitations (queue
  *   depth settings). If a device driver gets a 'queue full' response,
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
  *   blk_start_queue() to restart queue operations. Queue lock must be held.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
 	__cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 /**
  * blk_sync_queue - cancel any pending callbacks on a queue
  * @q: the queue
  *
  * Description:
  *     The block layer may perform asynchronous callback activity
  *     on a queue, such as calling the unplug function after a timeout.
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
  *     that the callbacks might use. The caller must already have made sure
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
  *     This function does not cancel any asynchronous activity arising
  *     out of elevator or throttling code. That would require elevaotor_exit()
  *     and blk_throtl_exit() to be called with queue lock initialized.
  *
  */
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->timeout);
 	cancel_delayed_work_sync(&q->delay_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
  *
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
  *    held and interrupts disabled.
  */
 void __blk_run_queue(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 	q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 /**
  * blk_run_queue_async - run a single device queue in workqueue context
  * @q:	The queue to run
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
  *    of us.
  */
 void blk_run_queue_async(struct request_queue *q)
 {
-	if (likely(!blk_queue_stopped(q)))
+	if (likely(!blk_queue_stopped(q))) {
+		__cancel_delayed_work(&q->delay_work);
 		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+	}
 }
 EXPORT_SYMBOL(blk_run_queue_async);
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
  *
  * Description:
  *    Invoke request handling on this queue, if it has pending work to do.
  *    May be used to restart queueing when a request has completed.
  */
 void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
 void blk_put_queue(struct request_queue *q)
 {
 	kobject_put(&q->kobj);
 }
 /*
  * Note: If a driver supplied the queue lock, it should not zap that lock
  * unexpectedly as some queue cleanup components like elevator_exit() and
  * blk_throtl_exit() need queue lock.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	/*
 	 * We know we have process context here, so we can be a little
 	 * cautious and ensure that pending block actions on this device
 	 * are done before moving on. Going into this function, we should
 	 * not have processes doing IO to this device.
 	 */
 	blk_sync_queue(q);
 	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
 	if (q->elevator)
 		elevator_exit(q->elevator);
 	blk_throtl_exit(q);
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
 static int blk_init_free_list(struct request_queue *q)
 {
 	struct request_list *rl = &q->rq;
 	if (unlikely(rl->rq_pool))
 		return 0;
 	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
 	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
 	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
 	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
 	if (!rl->rq_pool)
 		return -ENOMEM;
 	return 0;
 }
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
 	int err;
 	q = kmem_cache_alloc_node(blk_requestq_cachep,
 				gfp_mask | __GFP_ZERO, node_id);
 	if (!q)
 		return NULL;
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	if (blk_throtl_init(q)) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
 	INIT_LIST_HEAD(&q->flush_queue[0]);
 	INIT_LIST_HEAD(&q->flush_queue[1]);
 	INIT_LIST_HEAD(&q->flush_data_in_flight);
 	INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
 	kobject_init(&q->kobj, &blk_queue_ktype);
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 	/*
 	 * By default initialize queue_lock to internal lock and driver can
 	 * override it later if need be.
 	 */
 	q->queue_lock = &q->__queue_lock;
 	return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
  * @rfn:  The function to be called to process requests that have been
  *        placed on the queue.
  * @lock: Request queue spin lock
  *
  * Description:
  *    If a block device wishes to use the standard request handling procedures,
  *    which sorts requests and coalesces adjacent requests, then it must
  *    call blk_init_queue().  The function @rfn will be called when there
  *    are requests on the queue that need to be processed.  If the device
  *    supports plugging, then @rfn may not be called immediately when requests
  *    are available on the queue, but may be called at some time later instead.
  *    Plugged queues are generally unplugged when a buffer belonging to one
  *    of the requests on the queue is needed, or due to memory pressure.
  *
  *    @rfn is not required, or even expected, to remove all requests off the
  *    queue, but only as many as it can handle at a time.  If it does leave
  *    requests on the queue, it is responsible for arranging that the requests
  *    get dealt with eventually.
  *
  *    The queue spin lock must be held while manipulating the requests on the
  *    request queue; this lock will be taken also from interrupt context, so irq
  *    disabling is needed for it.
  *
  *    Function returns a pointer to the initialized request queue, or %NULL if
  *    it didn't succeed.
  *
  * Note:
  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
 	return blk_init_queue_node(rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_queue);
 struct request_queue *
 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *uninit_q, *q;
 	uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
 	if (!uninit_q)
 		return NULL;
 	q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
 	if (!q)
 		blk_cleanup_queue(uninit_q);
 	return q;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 			 spinlock_t *lock)
 {
 	return blk_init_allocated_queue_node(q, rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
 struct request_queue *
 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 			      spinlock_t *lock, int node_id)
 {
 	if (!q)
 		return NULL;
 	q->node = node_id;
 	if (blk_init_free_list(q))
 		return NULL;
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unprep_rq_fn		= NULL;
 	q->queue_flags		= QUEUE_FLAG_DEFAULT;
 	/* Override internal queue lock with supplied lock pointer */
 	if (lock)
 		q->queue_lock		= lock;
 	/*
 	 * This also sets hw/phys segments, boundary and size
 	 */
 	blk_queue_make_request(q, __make_request);
 	q->sg_reserved_size = INT_MAX;
 	/*
 	 * all done
 	 */
 	if (!elevator_init(q, NULL)) {
 		blk_queue_congestion_threshold(q);
 		return q;
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue_node);
 int blk_get_queue(struct request_queue *q)
 {
 	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
 		kobject_get(&q->kobj);
 		return 0;
 	}
 	return 1;
 }
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
 	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 static struct request *
 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	if (!rq)
 		return NULL;
 	blk_rq_init(q, rq);
 	rq->cmd_flags = flags | REQ_ALLOCED;
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
 		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 	return rq;
 }
 /*
  * ioc_batching returns true if the ioc is a valid batching request and
  * should be given priority access to a request.
  */
 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc)
 		return 0;
 	/*
 	 * Make sure the process is able to allocate at least 1 request
 	 * even if the batch times out, otherwise we could theoretically
 	 * lose wakeups.
 	 */
 	return ioc->nr_batch_requests == q->nr_batching ||
 		(ioc->nr_batch_requests > 0
 		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
 }
 /*
  * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
  * will cause the process to be a "batcher" on all queues in the system. This
  * is the behaviour we want though - once it gets a wakeup it should be given
  * a nice run.
  */
 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc || ioc_batching(q, ioc))
 		return;
 	ioc->nr_batch_requests = q->nr_batching;
 	ioc->last_waited = jiffies;
 }
 static void __freed_request(struct request_queue *q, int sync)
 {
 	struct request_list *rl = &q->rq;
 	if (rl->count[sync] < queue_congestion_off_threshold(q))
 		blk_clear_queue_congested(q, sync);
 	if (rl->count[sync] + 1 <= q->nr_requests) {
 		if (waitqueue_active(&rl->wait[sync]))
 			wake_up(&rl->wait[sync]);
 		blk_clear_queue_full(q, sync);
 	}
 }
 /*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
 static void freed_request(struct request_queue *q, int sync, int priv)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[sync]--;
 	if (priv)
 		rl->elvpriv--;
 	__freed_request(q, sync);
 	if (unlikely(rl->starved[sync ^ 1]))
 		__freed_request(q, sync ^ 1);
 }
 /*
  * Determine if elevator data should be initialized when allocating the
  * request associated with @bio.
  */
 static bool blk_rq_should_init_elevator(struct bio *bio)
 {
 	if (!bio)
 		return true;
 	/*
 	 * Flush requests do not use the elevator so skip initialization.
 	 * This allows a request to share the flush and elevator data.
 	 */
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
 		return false;
 	return true;
 }
 /*
  * Get a free request, queue_lock must be held.
  * Returns NULL on failure, with queue_lock held.
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, int rw_flags,
 				   struct bio *bio, gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue, priv = 0;
 	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[is_sync]+1 >= q->nr_requests) {
 			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
 			 * This process will be allowed to complete a batch of
 			 * requests, others will be blocked.
 			 */
 			if (!blk_queue_full(q, is_sync)) {
 				ioc_set_batching(q, ioc);
 				blk_set_queue_full(q, is_sync);
 			} else {
 				if (may_queue != ELV_MQUEUE_MUST
 						&& !ioc_batching(q, ioc)) {
 					/*
 					 * The queue is full and the allocating
 					 * process is not a "batcher", and not
 					 * exempted by the IO scheduler
 					 */
 					goto out;
 				}
 			}
 		}
 		blk_set_queue_congested(q, is_sync);
 	}
 	/*
 	 * Only allow batching queuers to allocate up to 50% over the defined
 	 * limit of requests, otherwise we could have thousands of requests
 	 * allocated with any setting of ->nr_requests
 	 */
 	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
 		goto out;
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 	if (blk_rq_should_init_elevator(bio)) {
 		priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 		if (priv)
 			rl->elvpriv++;
 	}
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * we might have messed up.
 		 *
 		 * Allocating task should really be put onto the front of the
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, is_sync, priv);
 		/*
 		 * in the very unlikely event that allocation failed and no
 		 * requests for this direction was pending, mark us starved
 		 * so that freeing of a request in the other direction will
 		 * notice us. another possible fix would be to split the
 		 * rq mempool into READ and WRITE
 		 */
 rq_starved:
 		if (unlikely(rl->count[is_sync] == 0))
 			rl->starved[is_sync] = 1;
 		goto out;
 	}
 	/*
 	 * ioc may be NULL here, and ioc_batching will be false. That's
 	 * OK, if the queue is under the request limit then requests need
 	 * not count toward the nr_batch_requests limit. There will always
 	 * be some limit enforced by BLK_BATCH_TIME.
 	 */
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 	trace_block_getrq(q, bio, rw_flags & 1);
 out:
 	return rq;
 }
 /*
  * No available requests for this queue, wait for some requests to become
  * available.
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 					struct bio *bio)
 {
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	struct request *rq;
 	rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	while (!rq) {
 		DEFINE_WAIT(wait);
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
 				TASK_UNINTERRUPTIBLE);
 		trace_block_sleeprq(q, bio, rw_flags & 1);
 		spin_unlock_irq(q->queue_lock);
 		io_schedule();
 		/*
 		 * After sleeping, we become a "batching" process and
 		 * will be able to allocate at least one request, and
 		 * up to a big batch of them for a small period time.
 		 * See ioc_batching, ioc_set_batching
 		 */
 		ioc = current_io_context(GFP_NOIO, q->node);
 		ioc_set_batching(q, ioc);
 		spin_lock_irq(q->queue_lock);
 		finish_wait(&rl->wait[is_sync], &wait);
 		rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	};
 	return rq;
 }
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 	BUG_ON(rw != READ && rw != WRITE);
 	spin_lock_irq(q->queue_lock);
 	if (gfp_mask & __GFP_WAIT) {
 		rq = get_request_wait(q, rw, NULL);
 	} else {
 		rq = get_request(q, rw, NULL, gfp_mask);
 		if (!rq)
 			spin_unlock_irq(q->queue_lock);
 	}
 	/* q->queue_lock is unlocked at this point */
 	return rq;
 }
 EXPORT_SYMBOL(blk_get_request);
 /**
  * blk_make_request - given a bio, allocate a corresponding struct request.
  * @q: target request queue
  * @bio:  The bio describing the memory mappings that will be submitted for IO.
  *        It may be a chained-bio properly constructed by block/bio layer.
  * @gfp_mask: gfp flags to be used for memory allocation
  *
  * blk_make_request is the parallel of generic_make_request for BLOCK_PC
  * type commands. Where the struct request needs to be farther initialized by
  * the caller. It is passed a &struct bio, which describes the memory info of
  * the I/O transfer.
  *
  * The caller of blk_make_request must make sure that bi_io_vec
  * are set to describe the memory buffers. That bio_data_dir() will return
  * the needed direction of the request. (And all bio's in the passed bio-chain
  * are properly set accordingly)
  *
  * If called under none-sleepable conditions, mapped bio buffers must not
  * need bouncing, by calling the appropriate masked or flagged allocator,
  * suitable for the target device. Otherwise the call to blk_queue_bounce will
  * BUG.
  *
  * WARNING: When allocating/cloning a bio-chain, careful consideration should be
  * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
  * anything but the first bio in the chain. Otherwise you risk waiting for IO
  * completion of a bio that hasn't been submitted yet, thus resulting in a
  * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
  * of bio_alloc(), as that avoids the mempool deadlock.
  * If possible a big IO should be split into smaller parts when allocation
  * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 				 gfp_t gfp_mask)
 {
 	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
 		blk_queue_bounce(q, &bounce_bio);
 		ret = blk_rq_append_bio(q, rq, bounce_bio);
 		if (unlikely(ret)) {
 			blk_put_request(rq);
 			return ERR_PTR(ret);
 		}
 	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_make_request);
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  *
  * Description:
  *    Drivers often keep queueing requests until the hardware cannot accept
  *    more, when that condition happens we need to put the request back
  *    on the queue. Must be called with queue lock held.
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	BUG_ON(blk_queued_rq(rq));
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
 static void add_acct_request(struct request_queue *q, struct request *rq,
 			     int where)
 {
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where);
 }
 /**
  * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
  * @data:	private data
  *
  * Description:
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
  *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
  *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
  *    of the queue for things like a QUEUE_FULL message from a device, or a
  *    host that is unable to accept a particular command.
  */
 void blk_insert_request(struct request_queue *q, struct request *rq,
 			int at_head, void *data)
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	unsigned long flags;
 	/*
 	 * tell I/O scheduler that this isn't a regular read/write (ie it
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->special = data;
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * If command is tagged, release the tag
 	 */
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	add_acct_request(q, rq, where);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
 	if (now == part->stamp)
 		return;
 	if (part_in_flight(part)) {
 		__part_stat_add(cpu, part, time_in_queue,
 				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
 }
 /**
  * part_round_stats() - Round off the performance stats on a struct disk_stats.
  * @cpu: cpu number for stats access
  * @part: target partition
  *
  * The average IO queue length and utilisation statistics are maintained
  * by observing the current state of the queue length and the amount of
  * time it has been in this state for.
  *
  * Normally, that accounting is done on IO completion, but that can result
  * in more than a second's worth of IO being accounted for within any one
  * second, leading to >100% utilisation.  To deal with that, we call this
  * function to do a round-off before returning the results when reading
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
 void part_round_stats(int cpu, struct hd_struct *part)
 {
 	unsigned long now = jiffies;
 	if (part->partno)
 		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
 	part_round_stats_single(cpu, part, now);
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 /*
  * queue lock must be held
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
 		return;
 	elv_completed_request(q, req);
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (req->cmd_flags & REQ_ALLOCED) {
 		int is_sync = rq_is_sync(req) != 0;
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(!hlist_unhashed(&req->hash));
 		blk_free_request(q, req);
 		freed_request(q, is_sync, priv);
 	}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
 void blk_put_request(struct request *req)
 {
 	unsigned long flags;
 	struct request_queue *q = req->q;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_put_request(q, req);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_put_request);
 /**
  * blk_add_request_payload - add a payload to a request
  * @rq: request to update
  * @page: page backing the payload
  * @len: length of the payload.
  *
  * This allows to later add a payload to an already submitted request by
  * a block driver.  The driver needs to take care of freeing the payload
  * itself.
  *
  * Note that this is a quite horrible hack and nothing but handling of
  * discard requests should ever use it.
  */
 void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len)
 {
 	struct bio *bio = rq->bio;
 	bio->bi_io_vec->bv_page = page;
 	bio->bi_io_vec->bv_offset = 0;
 	bio->bi_io_vec->bv_len = len;
 	bio->bi_size = len;
 	bio->bi_vcnt = 1;
 	bio->bi_phys_segments = 1;
 	rq->__data_len = rq->resid_len = len;
 	rq->nr_phys_segments = 1;
 	rq->buffer = bio_data(bio);
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 				   struct bio *bio)
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	/*
 	 * Debug stuff, kill later
 	 */
 	if (!rq_mergeable(req)) {
 		blk_dump_rq_flags(req, "back");
 		return false;
 	}
 	if (!ll_back_merge_fn(q, req, bio))
 		return false;
 	trace_block_bio_backmerge(q, bio);
 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 		blk_rq_set_mixed_merge(req);
 	req->biotail->bi_next = bio;
 	req->biotail = bio;
 	req->__data_len += bio->bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 	drive_stat_acct(req, 0);
 	return true;
 }
 static bool bio_attempt_front_merge(struct request_queue *q,
 				    struct request *req, struct bio *bio)
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	sector_t sector;
 	/*
 	 * Debug stuff, kill later
 	 */
 	if (!rq_mergeable(req)) {
 		blk_dump_rq_flags(req, "front");
 		return false;
 	}
 	if (!ll_front_merge_fn(q, req, bio))
 		return false;
 	trace_block_bio_frontmerge(q, bio);
 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 		blk_rq_set_mixed_merge(req);
 	sector = bio->bi_sector;
 	bio->bi_next = req->bio;
 	req->bio = bio;
 	/*
 	 * may not be valid. if the low level driver said
 	 * it didn't need a bounce buffer then it better
 	 * not touch req->buffer either...
 	 */
 	req->buffer = bio_data(bio);
 	req->__sector = bio->bi_sector;
 	req->__data_len += bio->bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 	drive_stat_acct(req, 0);
 	return true;
 }
 /*
  * Attempts to merge with the plugged list in the current process. Returns
  * true if merge was successful, otherwise false.
  */
 static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
 			       struct bio *bio)
 {
 	struct blk_plug *plug;
 	struct request *rq;
 	bool ret = false;
 	plug = tsk->plug;
 	if (!plug)
 		goto out;
 	list_for_each_entry_reverse(rq, &plug->list, queuelist) {
 		int el_ret;
 		if (rq->q != q)
 			continue;
 		el_ret = elv_try_merge(rq, bio);
 		if (el_ret == ELEVATOR_BACK_MERGE) {
 			ret = bio_attempt_back_merge(q, rq, bio);
 			if (ret)
 				break;
 		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 			ret = bio_attempt_front_merge(q, rq, bio);
 			if (ret)
 				break;
 		}
 	}
 out:
 	return ret;
 }
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 	req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
 	if (bio->bi_rw & REQ_RAHEAD)
 		req->cmd_flags |= REQ_FAILFAST_MASK;
 	req->errors = 0;
 	req->__sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
 	int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
 	struct request *req;
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
 	 * ISA dma in theory)
 	 */
 	blk_queue_bounce(q, &bio);
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
 		spin_lock_irq(q->queue_lock);
 		where = ELEVATOR_INSERT_FLUSH;
 		goto get_rq;
 	}
 	/*
 	 * Check if we can merge with the plugged list before grabbing
 	 * any locks.
 	 */
 	if (attempt_plug_merge(current, q, bio))
 		goto out;
 	spin_lock_irq(q->queue_lock);
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
 		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_back_merge(q, req, bio)) {
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_front_merge(q, req, bio)) {
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	}
 get_rq:
 	/*
 	 * This sync check and mask will be re-done in init_request_from_bio(),
 	 * but we need to set it earlier to expose the sync flag to the
 	 * rq allocator and io schedulers.
 	 */
 	rw_flags = bio_data_dir(bio);
 	if (sync)
 		rw_flags |= REQ_SYNC;
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
 	req = get_request_wait(q, rw_flags, bio);
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
 	 * may now be mergeable after it had proven unmergeable (above).
 	 * We don't worry about that case for efficiency. It won't happen
 	 * often, and the elevators are able to handle it.
 	 */
 	init_request_from_bio(req, bio);
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
 	    bio_flagged(bio, BIO_CPU_AFFINE)) {
 		req->cpu = blk_cpu_to_group(get_cpu());
 		put_cpu();
 	}
 	plug = current->plug;
 	if (plug) {
 		/*
 		 * If this is the first request added after a plug, fire
 		 * of a plug trace. If others have been added before, check
 		 * if we have multiple devices in this plug. If so, make a
 		 * note to sort the list before dispatch.
 		 */
 		if (list_empty(&plug->list))
 			trace_block_plug(q);
 		else if (!plug->should_sort) {
 			struct request *__rq;
 			__rq = list_entry_rq(plug->list.prev);
 			if (__rq->q != q)
 				plug->should_sort = 1;
 		}
 		/*
 		 * Debug flag, kill later
 		 */
 		req->cmd_flags |= REQ_ON_PLUG;
 		list_add_tail(&req->queuelist, &plug->list);
 		drive_stat_acct(req, 1);
 	} else {
 		spin_lock_irq(q->queue_lock);
 		add_acct_request(q, req, where);
 		__blk_run_queue(q);
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
 out:
 	return 0;
 }
 /*
  * If bio->bi_dev is a partition, remap the location
  */
 static inline void blk_partition_remap(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
 				      bdev->bd_dev,
 				      bio->bi_sector - p->start_sect);
 	}
 }
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
 	printk(KERN_INFO "attempt to access beyond end of device\n");
 	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
 			bdevname(bio->bi_bdev, b),
 			bio->bi_rw,
 			(unsigned long long)bio->bi_sector + bio_sectors(bio),
 			(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static DECLARE_FAULT_ATTR(fail_make_request);
 static int __init setup_fail_make_request(char *str)
 {
 	return setup_fault_attr(&fail_make_request, str);
 }
 __setup("fail_make_request=", setup_fail_make_request);
 static int should_fail_request(struct bio *bio)
 {
 	struct hd_struct *part = bio->bi_bdev->bd_part;
 	if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
 		return should_fail(&fail_make_request, bio->bi_size);
 	return 0;
 }
 static int __init fail_make_request_debugfs(void)
 {
 	return init_fault_attr_dentries(&fail_make_request,
 					"fail_make_request");
 }
 late_initcall(fail_make_request_debugfs);
 #else /* CONFIG_FAIL_MAKE_REQUEST */
 static inline int should_fail_request(struct bio *bio)
 {
 	return 0;
 }
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 /*
  * Check whether this bio extends beyond the end of the device.
  */
 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
 {
 	sector_t maxsector;
 	if (!nr_sectors)
 		return 0;
 	/* Test device or partition size, when known. */
 	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
 	if (maxsector) {
 		sector_t sector = bio->bi_sector;
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
 			/*
 			 * This may well happen - the kernel calls bread()
 			 * without checking the size of the device, e.g., when
 			 * mounting a device.
 			 */
 			handle_bad_sector(bio);
 			return 1;
 		}
 	}
 	return 0;
 }
 /**
  * generic_make_request - hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
  * devices. It is passed a &struct bio, which describes the I/O that needs
  * to be done.
  *
  * generic_make_request() does not return any status.  The
  * success/failure status of the request, along with notification of
  * completion, is delivered asynchronously through the bio->bi_end_io
  * function described (one day) else where.
  *
  * The caller of generic_make_request must make sure that bi_io_vec
  * are set to describe the memory buffer, and that bi_dev and bi_sector are
  * set to describe the device address, and the
  * bi_end_io and optionally bi_private are set to describe how
  * completion notification should be signaled.
  *
  * generic_make_request and the drivers it calls may use bi_next if this
  * bio happens to be merged with someone else, and may change bi_dev and
  * bi_sector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
  */
 static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
 	int err = -EIO;
 	might_sleep();
 	if (bio_check_eod(bio, nr_sectors))
 		goto end_io;
 	/*
 	 * Resolve the mapping until finished. (drivers are
 	 * still free to implement/resolve their own stacking
 	 * by explicitly returning 0)
 	 *
 	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	old_sector = -1;
 	old_dev = 0;
 	do {
 		char b[BDEVNAME_SIZE];
 		q = bdev_get_queue(bio->bi_bdev);
 		if (unlikely(!q)) {
 			printk(KERN_ERR
 			       "generic_make_request: Trying to access "
 				"nonexistent block-device %s (%Lu)\n",
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 			goto end_io;
 		}
 		if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
 			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
 			       queue_max_hw_sectors(q));
 			goto end_io;
 		}
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 		if (should_fail_request(bio))
 			goto end_io;
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
 		 */
 		blk_partition_remap(bio);
 		if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
 			goto end_io;
 		if (old_sector != -1)
 			trace_block_bio_remap(q, bio, old_dev, old_sector);
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 		/*
 		 * Filter flush bio's early so that make_request based
 		 * drivers without flush support don't have to worry
 		 * about them.
 		 */
 		if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
 			bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
 			if (!nr_sectors) {
 				err = 0;
 				goto end_io;
 			}
 		}
 		if ((bio->bi_rw & REQ_DISCARD) &&
 		    (!blk_queue_discard(q) ||
 		     ((bio->bi_rw & REQ_SECURE) &&
 		      !blk_queue_secdiscard(q)))) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
 		blk_throtl_bio(q, &bio);
 		/*
 		 * If bio = NULL, bio has been throttled and will be submitted
 		 * later.
 		 */
 		if (!bio)
 			break;
 		trace_block_bio_queue(q, bio);
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
 	return;
 end_io:
 	bio_endio(bio, err);
 }
 /*
  * We only want one ->make_request_fn to be active at a time,
  * else stack usage with stacked devices could be a problem.
  * So use current->bio_list to keep a list of requests
  * submited by a make_request_fn function.
  * current->bio_list is also used as a flag to say if
  * generic_make_request is currently active in this task or not.
  * If it is NULL, then no make_request is active.  If it is non-NULL,
  * then a make_request is active, and new requests should be added
  * at the tail
  */
 void generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
 	if (current->bio_list) {
 		/* make_request is active */
 		bio_list_add(current->bio_list, bio);
 		return;
 	}
 	/* following loop may be a bit non-obvious, and so deserves some
 	 * explanation.
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
 	 * ensure that) so we have a list with a single bio.
 	 * We pretend that we have just taken it off a longer list, so
 	 * we assign bio_list to a pointer to the bio_list_on_stack,
 	 * thus initialising the bio_list of new bios to be
 	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
 	 * of the top of the list (no pretending) and so remove it from
 	 * bio_list, and call into __generic_make_request again.
 	 *
 	 * The loop was structured like this to make only one call to
 	 * __generic_make_request (which is important as it is large and
 	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
 	bio_list_init(&bio_list_on_stack);
 	current->bio_list = &bio_list_on_stack;
 	do {
 		__generic_make_request(bio);
 		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 }
 EXPORT_SYMBOL(generic_make_request);
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 	bio->bi_rw |= rw;
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
 	if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
 			task_io_account_read(bio->bi_size);
 			count_vm_events(PGPGIN, count);
 		}
 		if (unlikely(block_dump)) {
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
 			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_sector,
 				bdevname(bio->bi_bdev, b),
 				count);
 		}
 	}
 	generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 /**
  * blk_rq_check_limits - Helper function to check a request for the queue limit
  * @q:  the queue
  * @rq: the request being checked
  *
  * Description:
  *    @rq may have been made based on weaker limitations of upper-level queues
  *    in request stacking drivers, and it may violate the limitation of @q.
  *    Since the block layer and the underlying device driver trust @rq
  *    after it is inserted to @q, it should be checked against @q before
  *    the insertion using this generic function.
  *
  *    This function should also be useful for request stacking drivers
  *    in some cases below, so export this function.
  *    Request stacking drivers like request-based dm may change the queue
  *    limits while requests are in the queue (e.g. dm's table swapping).
  *    Such request stacking drivers should check those requests agaist
  *    the new queue limits again when they dispatch those requests,
  *    although such checkings are also done against the old queue limits
  *    when submitting requests.
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
 	if (rq->cmd_flags & REQ_DISCARD)
 		return 0;
 	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
 	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
 	/*
 	 * queue's settings related to segment counting like q->bounce_pfn
 	 * may differ from that of other stacking queues.
 	 * Recalculate it to check the request correctly on this queue's
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
 	if (rq->nr_phys_segments > queue_max_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_rq_check_limits);
 /**
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	if (blk_rq_check_limits(q, rq))
 		return -EIO;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
 	    should_fail(&fail_make_request, blk_rq_bytes(rq)))
 		return -EIO;
 #endif
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * Submitting request must be dequeued before calling this function
 	 * because it will be linked to another request_queue
 	 */
 	BUG_ON(blk_queued_rq(rq));
 	add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 /**
  * blk_rq_err_bytes - determine number of bytes till the next failure boundary
  * @rq: request to examine
  *
  * Description:
  *     A request could be merge of IOs which require different failure
  *     handling.  This function determines the number of bytes which
  *     can be failed from the beginning of the request without
  *     crossing into area which need to be retried further.
  *
  * Return:
  *     The number of bytes to fail.
  *
  * Context:
  *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
 	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
 	unsigned int bytes = 0;
 	struct bio *bio;
 	if (!(rq->cmd_flags & REQ_MIXED_MERGE))
 		return blk_rq_bytes(rq);
 	/*
 	 * Currently the only 'mixing' which can happen is between
 	 * different fastfail types.  We can safely fail portions
 	 * which have all the failfast bits that the first one has -
 	 * the ones which are at least as eager to fail as the first
 	 * one.
 	 */
 	for (bio = rq->bio; bio; bio = bio->bi_next) {
 		if ((bio->bi_rw & ff) != ff)
 			break;
 		bytes += bio->bi_size;
 	}
 	/* this could lead to infinite loop */
 	BUG_ON(blk_rq_bytes(rq) && !bytes);
 	return bytes;
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = req->part;
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
 }
 static void blk_account_io_done(struct request *req)
 {
 	/*
 	 * Account IO completion.  flush_rq isn't accounted as a
 	 * normal IO on queueing nor completion.  Accounting the
 	 * containing request is enough.
 	 */
 	if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = req->part;
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part, rw);
 		hd_struct_put(part);
 		part_stat_unlock();
 	}
 }
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
  *
  * Description:
  *     Return the request at the top of @q.  The returned request
  *     should be started using blk_start_request() before LLD starts
  *     processing it.
  *
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
  *
  * Context:
  *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
 	while ((rq = __elv_next_request(q)) != NULL) {
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
 			 * sees this request (possibly after
 			 * requeueing).  Notify IO scheduler.
 			 */
 			if (rq->cmd_flags & REQ_SORTED)
 				elv_activate_rq(q, rq);
 			/*
 			 * just mark as started even if we don't start
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 */
 			rq->cmd_flags |= REQ_STARTED;
 			trace_block_rq_issue(q, rq);
 		}
 		if (!q->boundary_rq || q->boundary_rq == rq) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = NULL;
 		}
 		if (rq->cmd_flags & REQ_DONTPREP)
 			break;
 		if (q->dma_drain_size && blk_rq_bytes(rq)) {
 			/*
 			 * make sure space for the drain appears we
 			 * know we can do this because max_hw_segments
 			 * has been adjusted to be one fewer than the
 			 * device can handle
 			 */
 			rq->nr_phys_segments++;
 		}
 		if (!q->prep_rq_fn)
 			break;
 		ret = q->prep_rq_fn(q, rq);
 		if (ret == BLKPREP_OK) {
 			break;
 		} else if (ret == BLKPREP_DEFER) {
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
 			 * avoid resource deadlock.  REQ_STARTED will
 			 * prevent other fs requests from passing this one.
 			 */
 			if (q->dma_drain_size && blk_rq_bytes(rq) &&
 			    !(rq->cmd_flags & REQ_DONTPREP)) {
 				/*
 				 * remove the space for the drain we added
 				 * so that we don't add it again
 				 */
 				--rq->nr_phys_segments;
 			}
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
 			__blk_end_request_all(rq, -EIO);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
 		}
 	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_peek_request);
 void blk_dequeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	BUG_ON(list_empty(&rq->queuelist));
 	BUG_ON(ELV_ON_HASH(rq));
 	list_del_init(&rq->queuelist);
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]++;
 		set_io_start_time_ns(rq);
 	}
 }
 /**
  * blk_start_request - start request processing on the driver
  * @req: request to dequeue
  *
  * Description:
  *     Dequeue @req and start timeout timer on it.  This hands off the
  *     request to the driver.
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
  *
  * Context:
  *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
 	blk_dequeue_request(req);
 	/*
 	 * We are now handing the request to the hardware, initialize
 	 * resid_len to full count and add the timeout handler.
 	 */
 	req->resid_len = blk_rq_bytes(req);
 	if (unlikely(blk_bidi_rq(req)))
 		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
 	blk_add_timer(req);
 }
 EXPORT_SYMBOL(blk_start_request);
 /**
  * blk_fetch_request - fetch a request from a request queue
  * @q: request queue to fetch a request from
  *
  * Description:
  *     Return the request at the top of @q.  The request is started on
  *     return and LLD can start processing it immediately.
  *
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
  *
  * Context:
  *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
 	struct request *rq;
 	rq = blk_peek_request(q);
 	if (rq)
 		blk_start_request(rq);
 	return rq;
 }
 EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @req, but doesn't complete
  *     the request structure even if @req doesn't have leftover.
  *     If @req has leftover, sets it up for the next range of segments.
  *
  *     This special helper function is only for request stacking drivers
  *     (e.g. request-based dm) so that they can handle partial completion.
  *     Actual device drivers should use blk_end_request instead.
  *
  *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
  *     %false return from this function.
  *
  * Return:
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 	if (!req->bio)
 		return false;
 	trace_block_rq_complete(req->q, req);
 	/*
 	 * For fs requests, rq is just carrier of independent bio's
 	 * and each partial completion should be handled separately.
 	 * Reset per-request error on each partial completion.
 	 *
 	 * TODO: tj: This is too subtle.  It would be better to let
 	 * low level drivers do what they see fit.
 	 */
 	if (req->cmd_type == REQ_TYPE_FS)
 		req->errors = 0;
 	if (error && req->cmd_type == REQ_TYPE_FS &&
 	    !(req->cmd_flags & REQ_QUIET)) {
 		char *error_type;
 		switch (error) {
 		case -ENOLINK:
 			error_type = "recoverable transport";
 			break;
 		case -EREMOTEIO:
 			error_type = "critical target";
 			break;
 		case -EBADE:
 			error_type = "critical nexus";
 			break;
 		case -EIO:
 		default:
 			error_type = "I/O";
 			break;
 		}
 		printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
 		       error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
 		       (unsigned long long)blk_rq_pos(req));
 	}
 	blk_account_io_completion(req, nr_bytes);
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
 			req_bio_endio(req, bio, nbytes, error);
 			next_idx = 0;
 			bio_nbytes = 0;
 		} else {
 			int idx = bio->bi_idx + next_idx;
 			if (unlikely(idx >= bio->bi_vcnt)) {
 				blk_dump_rq_flags(req, "__end_that");
 				printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
 				       __func__, idx, bio->bi_vcnt);
 				break;
 			}
 			nbytes = bio_iovec_idx(bio, idx)->bv_len;
 			BIO_BUG_ON(nbytes > bio->bi_size);
 			/*
 			 * not a complete bvec done
 			 */
 			if (unlikely(nbytes > nr_bytes)) {
 				bio_nbytes += nr_bytes;
 				total_bytes += nr_bytes;
 				break;
 			}
 			/*
 			 * advance to the next vector
 			 */
 			next_idx++;
 			bio_nbytes += nbytes;
 		}
 		total_bytes += nbytes;
 		nr_bytes -= nbytes;
 		bio = req->bio;
 		if (bio) {
 			/*
 			 * end more in this run, or just return 'not-done'
 			 */
 			if (unlikely(nr_bytes <= 0))
 				break;
 		}
 	}
 	/*
 	 * completely done
 	 */
 	if (!req->bio) {
 		/*
 		 * Reset counters so that the request stacking driver
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
 		req->__data_len = 0;
 		return false;
 	}
 	/*
 	 * if the request wasn't completed, update state
 	 */
 	if (bio_nbytes) {
 		req_bio_endio(req, bio, bio_nbytes, error);
 		bio->bi_idx += next_idx;
 		bio_iovec(bio)->bv_offset += nr_bytes;
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 	req->__data_len -= total_bytes;
 	req->buffer = bio_data(req->bio);
 	/* update sector only for requests with clear definition of sector */
 	if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
 		req->__sector += total_bytes >> 9;
 	/* mixed attributes always follow the first bio */
 	if (req->cmd_flags & REQ_MIXED_MERGE) {
 		req->cmd_flags &= ~REQ_FAILFAST_MASK;
 		req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
 	}
 	/*
 	 * If total number of sectors is less than the first segment
 	 * size, something has gone terribly wrong.
 	 */
 	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
 		blk_dump_rq_flags(req, "request botched");
 		req->__data_len = blk_rq_cur_bytes(req);
 	}
 	/* recalculate the number of segments */
 	blk_recalc_rq_segments(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 static bool blk_update_bidi_request(struct request *rq, int error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
 	if (blk_update_request(rq, error, nr_bytes))
 		return true;
 	/* Bidi request must be completed as a whole */
 	if (unlikely(blk_bidi_rq(rq)) &&
 	    blk_update_request(rq->next_rq, error, bidi_bytes))
 		return true;
 	if (blk_queue_add_random(rq->q))
 		add_disk_randomness(rq->rq_disk);
 	return false;
 }
 /**
  * blk_unprep_request - unprepare a request
  * @req:	the request
  *
  * This function makes a request ready for complete resubmission (or
  * completion).  It happens only after all error handling is complete,
  * so represents the appropriate moment to deallocate any resources
  * that were allocated to the request in the prep_rq_fn.  The queue
  * lock is held when calling this.
  */
 void blk_unprep_request(struct request *req)
 {
 	struct request_queue *q = req->q;
 	req->cmd_flags &= ~REQ_DONTPREP;
 	if (q->unprep_rq_fn)
 		q->unprep_rq_fn(q, req);
 }
 EXPORT_SYMBOL_GPL(blk_unprep_request);
 /*
  * queue lock must be held
  */
 static void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 	BUG_ON(blk_queued_rq(req));
 	if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
 		laptop_io_completion(&req->q->backing_dev_info);
 	blk_delete_timer(req);
 	if (req->cmd_flags & REQ_DONTPREP)
 		blk_unprep_request(req);
 	blk_account_io_done(req);
 	if (req->end_io)
 		req->end_io(req, error);
 	else {
 		if (blk_bidi_rq(req))
 			__blk_put_request(req->next_rq->q, req->next_rq);
 		__blk_put_request(req->q, req);
 	}
 }
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *     Drivers that supports bidi can safely call this member for any
  *     type of request, bidi or uni.  In the later case @bidi_bytes is
  *     just ignored.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 static bool blk_end_bidi_request(struct request *rq, int error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_finish_request(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return false;
 }
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Identical to blk_end_bidi_request() except that queue lock is
  *     assumed to be locked on entry and remains so on return.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 static bool __blk_end_bidi_request(struct request *rq, int error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 	blk_finish_request(rq, error);
 	return false;
 }
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq.
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.
  */
 void blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 EXPORT_SYMBOL(blk_end_request_all);
 /**
  * blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool blk_end_request_cur(struct request *rq, int error)
 {
 	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 EXPORT_SYMBOL(blk_end_request_cur);
 /**
  * blk_end_request_err - Finish a request till the next failure boundary.
  * @rq: the request to finish till the next failure boundary for
  * @error: must be negative errno
  *
  * Description:
  *     Complete @rq till the next failure boundary.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool blk_end_request_err(struct request *rq, int error)
 {
 	WARN_ON(error >= 0);
 	return blk_end_request(rq, error, blk_rq_err_bytes(rq));
 }
 EXPORT_SYMBOL_GPL(blk_end_request_err);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
 void __blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
  *     be called with queue lock held.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool __blk_end_request_cur(struct request *rq, int error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 EXPORT_SYMBOL(__blk_end_request_cur);
 /**
  * __blk_end_request_err - Finish a request till the next failure boundary.
  * @rq: the request to finish till the next failure boundary for
  * @error: must be negative errno
  *
  * Description:
  *     Complete @rq till the next failure boundary.  Must be called
  *     with queue lock held.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool __blk_end_request_err(struct request *rq, int error)
 {
 	WARN_ON(error >= 0);
 	return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
 }
 EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
 	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
 	rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
 	if (bio_has_data(bio)) {
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
 	rq->__data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 	if (bio->bi_bdev)
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 /**
  * rq_flush_dcache_pages - Helper function to flush all pages in a request
  * @rq: the request to be flushed
  *
  * Description:
  *     Flush all pages in @rq.
  */
 void rq_flush_dcache_pages(struct request *rq)
 {
 	struct req_iterator iter;
 	struct bio_vec *bvec;
 	rq_for_each_segment(bvec, rq, iter)
 		flush_dcache_page(bvec->bv_page);
 }
 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
 #endif
 /**
  * blk_lld_busy - Check if underlying low-level drivers of a device are busy
  * @q : the queue of the device being checked
  *
  * Description:
  *    Check if underlying low-level drivers of a device are busy.
  *    If the drivers want to export their busy state, they must set own
  *    exporting function using blk_queue_lld_busy() first.
  *
  *    Basically, this function is used only by request stacking drivers
  *    to stop dispatching requests to underlying devices when underlying
  *    devices are busy.  This behavior helps more I/O merging on the queue
  *    of the request stacking driver and prevents I/O throughput regression
  *    on burst I/O load.
  *
  * Return:
  *    0 - Not busy (The request stacking driver should dispatch request)
  *    1 - Busy (The request stacking driver should stop dispatching request)
  */
 int blk_lld_busy(struct request_queue *q)
 {
 	if (q->lld_busy_fn)
 		return q->lld_busy_fn(q);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_lld_busy);
 /**
  * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
  * @rq: the clone request to be cleaned up
  *
  * Description:
  *     Free all bios in @rq for a cloned request.
  */
 void blk_rq_unprep_clone(struct request *rq)
 {
 	struct bio *bio;
 	while ((bio = rq->bio) != NULL) {
 		rq->bio = bio->bi_next;
 		bio_put(bio);
 	}
 }
 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 /*
  * Copy attributes of the original request to the clone request.
  * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
  */
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
 	dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
 	dst->nr_phys_segments = src->nr_phys_segments;
 	dst->ioprio = src->ioprio;
 	dst->extra_len = src->extra_len;
 }
 /**
  * blk_rq_prep_clone - Helper function to setup clone request
  * @rq: the request to be setup
  * @rq_src: original request to be cloned
  * @bs: bio_set that bios for clone are allocated from
  * @gfp_mask: memory allocation mask for bio
  * @bio_ctr: setup function to be called for each clone bio.
  *           Returns %0 for success, non %0 for failure.
  * @data: private data to be passed to @bio_ctr
  *
  * Description:
  *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
  *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
  *     are not copied, and copying such parts is the caller's responsibility.
  *     Also, pages which the original bios are pointing to are not copied
  *     and the cloned bios just point same pages.
  *     So cloned bios must be completed before original bios, which means
  *     the caller must complete @rq before @rq_src.
  */
 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		      struct bio_set *bs, gfp_t gfp_mask,
 		      int (*bio_ctr)(struct bio *, struct bio *, void *),
 		      void *data)
 {
 	struct bio *bio, *bio_src;
 	if (!bs)
 		bs = fs_bio_set;
 	blk_rq_init(NULL, rq);
 	__rq_for_each_bio(bio_src, rq_src) {
 		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
 		if (!bio)
 			goto free_and_out;
 		__bio_clone(bio, bio_src);
 		if (bio_integrity(bio_src) &&
 		    bio_integrity_clone(bio, bio_src, gfp_mask, bs))
 			goto free_and_out;
 		if (bio_ctr && bio_ctr(bio, bio_src, data))
 			goto free_and_out;
 		if (rq->bio) {
 			rq->biotail->bi_next = bio;
 			rq->biotail = bio;
 		} else
 			rq->bio = rq->biotail = bio;
 	}
 	__blk_rq_prep_clone(rq, rq_src);
 	return 0;
 free_and_out:
 	if (bio)
 		bio_free(bio, bs);
 	blk_rq_unprep_clone(rq);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 int kblockd_schedule_delayed_work(struct request_queue *q,
 			struct delayed_work *dwork, unsigned long delay)
 {
 	return queue_delayed_work(kblockd_workqueue, dwork, delay);
 }
 EXPORT_SYMBOL(kblockd_schedule_delayed_work);
 #define PLUG_MAGIC	0x91827364
 void blk_start_plug(struct blk_plug *plug)
 {
 	struct task_struct *tsk = current;
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->cb_list);
 	plug->should_sort = 0;
 	/*
 	 * If this is a nested plug, don't actually assign it. It will be
 	 * flushed on its own.
 	 */
 	if (!tsk->plug) {
 		/*
 		 * Store ordering should not be needed here, since a potential
 		 * preempt will imply a full memory barrier
 		 */
 		tsk->plug = plug;
 	}
 }
 EXPORT_SYMBOL(blk_start_plug);
 static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
 	struct request *rqa = container_of(a, struct request, queuelist);
 	struct request *rqb = container_of(b, struct request, queuelist);
 	return !(rqa->q <= rqb->q);
 }
 /*
  * If 'from_schedule' is true, then postpone the dispatch of requests
  * until a safe kblockd context. We due this to avoid accidental big
  * additional stack usage in driver dispatch, in places where the originally
  * plugger did not intend it.
  */
 static void queue_unplugged(struct request_queue *q, unsigned int depth,
 			    bool from_schedule)
 	__releases(q->queue_lock)
 {
 	trace_block_unplug(q, depth, !from_schedule);
 	/*
 	 * If we are punting this to kblockd, then we can safely drop
 	 * the queue_lock before waking kblockd (which needs to take
 	 * this lock).
 	 */
 	if (from_schedule) {
 		spin_unlock(q->queue_lock);
 		blk_run_queue_async(q);
 	} else {
 		__blk_run_queue(q);
 		spin_unlock(q->queue_lock);
 	}
 }
 static void flush_plug_callbacks(struct blk_plug *plug)
 {
 	LIST_HEAD(callbacks);
 	if (list_empty(&plug->cb_list))
 		return;
 	list_splice_init(&plug->cb_list, &callbacks);
 	while (!list_empty(&callbacks)) {
 		struct blk_plug_cb *cb = list_first_entry(&callbacks,
 							  struct blk_plug_cb,
 							  list);
 		list_del(&cb->list);
 		cb->callback(cb);
 	}
 }
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
 	unsigned long flags;
 	struct request *rq;
 	LIST_HEAD(list);
 	unsigned int depth;
 	BUG_ON(plug->magic != PLUG_MAGIC);
 	flush_plug_callbacks(plug);
 	if (list_empty(&plug->list))
 		return;
 	list_splice_init(&plug->list, &list);
 	if (plug->should_sort) {
 		list_sort(NULL, &list, plug_rq_cmp);
 		plug->should_sort = 0;
 	}
 	q = NULL;
 	depth = 0;
 	/*
 	 * Save and disable interrupts here, to avoid doing it for every
 	 * queue lock we have to take.
 	 */
 	local_irq_save(flags);
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
 		BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			/*
 			 * This drops the queue lock
 			 */
 			if (q)
 				queue_unplugged(q, depth, from_schedule);
 			q = rq->q;
 			depth = 0;
 			spin_lock(q->queue_lock);
 		}
 		rq->cmd_flags &= ~REQ_ON_PLUG;
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
 		if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
 			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
 		else
 			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
 		depth++;
 	}
 	/*
 	 * This drops the queue lock
 	 */
 	if (q)
 		queue_unplugged(q, depth, from_schedule);
 	local_irq_restore(flags);
 }
 void blk_finish_plug(struct blk_plug *plug)
 {
 	blk_flush_plug_list(plug, false);
 	if (plug == current->plug)
 		current->plug = NULL;
 }
 EXPORT_SYMBOL(blk_finish_plug);
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
 			sizeof(((struct request *)0)->cmd_flags));
 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
 	kblockd_workqueue = alloc_workqueue("kblockd",
 					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 	request_cachep = kmem_cache_create("blkdev_requests",
 			sizeof(struct request), 0, SLAB_PANIC, NULL);
 	blk_requestq_cachep = kmem_cache_create("blkdev_queue",
 			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 	return 0;
 }