Eric Lee / linux-smarc-t335x-v3.2

1

/*

1

/*

2

3

4

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

7

* - July2000

7

* - July2000

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

9

*/

9

*/

10

11

/*

11

/*

12

* This handles all read/write requests to block devices

12

* This handles all read/write requests to block devices

13

*/

13

*/

14

#include <linux/kernel.h>

14

#include <linux/kernel.h>

15

#include <linux/module.h>

15

#include <linux/module.h>

16

#include <linux/backing-dev.h>

16

#include <linux/backing-dev.h>

17

#include <linux/bio.h>

17

#include <linux/bio.h>

18

#include <linux/blkdev.h>

18

#include <linux/blkdev.h>

19

#include <linux/highmem.h>

19

#include <linux/highmem.h>

20

#include <linux/mm.h>

20

#include <linux/mm.h>

21

#include <linux/kernel_stat.h>

21

#include <linux/kernel_stat.h>

22

#include <linux/string.h>

22

#include <linux/string.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/completion.h>

24

#include <linux/completion.h>

25

#include <linux/slab.h>

25

#include <linux/slab.h>

26

#include <linux/swap.h>

26

#include <linux/swap.h>

27

#include <linux/writeback.h>

27

#include <linux/writeback.h>

28

#include <linux/task_io_accounting_ops.h>

28

#include <linux/task_io_accounting_ops.h>

29

#include <linux/fault-inject.h>

29

#include <linux/fault-inject.h>

30

#include <linux/list_sort.h>

30

#include <linux/list_sort.h>

31

32

#define CREATE_TRACE_POINTS

32

#define CREATE_TRACE_POINTS

33

#include <trace/events/block.h>

33

#include <trace/events/block.h>

34

35

#include "blk.h"

35

#include "blk.h"

36

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

39

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

39

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

40

41

static int __make_request(struct request_queue *q, struct bio *bio);

41

static int __make_request(struct request_queue *q, struct bio *bio);

42

43

/*

43

/*

44

* For the allocated request tables

44

* For the allocated request tables

45

*/

45

*/

46

static struct kmem_cache *request_cachep;

46

static struct kmem_cache *request_cachep;

47

48

/*

48

/*

49

* For queue allocation

49

* For queue allocation

50

*/

50

*/

51

struct kmem_cache *blk_requestq_cachep;

51

struct kmem_cache *blk_requestq_cachep;

52

53

/*

53

/*

54

* Controlling structure to kblockd

54

* Controlling structure to kblockd

55

*/

55

*/

56

static struct workqueue_struct *kblockd_workqueue;

56

static struct workqueue_struct *kblockd_workqueue;

57

58

static void drive_stat_acct(struct request *rq, int new_io)

58

static void drive_stat_acct(struct request *rq, int new_io)

59

{

59

{

60

struct hd_struct *part;

60

struct hd_struct *part;

61

int rw = rq_data_dir(rq);

61

int rw = rq_data_dir(rq);

62

int cpu;

62

int cpu;

63

64

if (!blk_do_io_stat(rq))

64

if (!blk_do_io_stat(rq))

65

return;

65

return;

66

67

cpu = part_stat_lock();

67

cpu = part_stat_lock();

68

69

if (!new_io) {

69

if (!new_io) {

70

part = rq->part;

70

part = rq->part;

71

part_stat_inc(cpu, part, merges[rw]);

71

part_stat_inc(cpu, part, merges[rw]);

72

} else {

72

} else {

73

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

73

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

74

if (!hd_struct_try_get(part)) {

74

if (!hd_struct_try_get(part)) {

75

/*

75

/*

76

* The partition is already being removed,

76

* The partition is already being removed,

77

* the request will be accounted on the disk only

77

* the request will be accounted on the disk only

78

*

78

*

79

* We take a reference on disk->part0 although that

79

* We take a reference on disk->part0 although that

80

* partition will never be deleted, so we can treat

80

* partition will never be deleted, so we can treat

81

* it as any other partition.

81

* it as any other partition.

82

*/

82

*/

83

part = &rq->rq_disk->part0;

83

part = &rq->rq_disk->part0;

84

hd_struct_get(part);

84

hd_struct_get(part);

85

}

85

}

86

part_round_stats(cpu, part);

86

part_round_stats(cpu, part);

87

part_inc_in_flight(part, rw);

87

part_inc_in_flight(part, rw);

88

rq->part = part;

88

rq->part = part;

89

}

89

}

90

91

part_stat_unlock();

91

part_stat_unlock();

92

}

92

}

93

94

void blk_queue_congestion_threshold(struct request_queue *q)

94

void blk_queue_congestion_threshold(struct request_queue *q)

95

{

95

{

96

int nr;

96

int nr;

97

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

99

if (nr > q->nr_requests)

99

if (nr > q->nr_requests)

100

nr = q->nr_requests;

100

nr = q->nr_requests;

101

q->nr_congestion_on = nr;

101

q->nr_congestion_on = nr;

102

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

104

if (nr < 1)

104

if (nr < 1)

105

nr = 1;

105

nr = 1;

106

q->nr_congestion_off = nr;

106

q->nr_congestion_off = nr;

107

}

107

}

108

109

/**

109

/**

110

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

110

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

111

* @bdev: device

111

* @bdev: device

112

*

112

*

113

* Locates the passed device's request queue and returns the address of its

113

* Locates the passed device's request queue and returns the address of its

114

* backing_dev_info

114

* backing_dev_info

115

*

115

*

116

* Will return NULL if the request queue cannot be located.

116

* Will return NULL if the request queue cannot be located.

117

*/

117

*/

118

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

118

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

119

{

119

{

120

struct backing_dev_info *ret = NULL;

120

struct backing_dev_info *ret = NULL;

121

struct request_queue *q = bdev_get_queue(bdev);

121

struct request_queue *q = bdev_get_queue(bdev);

122

123

if (q)

123

if (q)

124

ret = &q->backing_dev_info;

124

ret = &q->backing_dev_info;

125

return ret;

125

return ret;

126

}

126

}

127

EXPORT_SYMBOL(blk_get_backing_dev_info);

127

EXPORT_SYMBOL(blk_get_backing_dev_info);

128

129

void blk_rq_init(struct request_queue *q, struct request *rq)

129

void blk_rq_init(struct request_queue *q, struct request *rq)

130

{

130

{

131

memset(rq, 0, sizeof(*rq));

131

memset(rq, 0, sizeof(*rq));

132

133

INIT_LIST_HEAD(&rq->queuelist);

133

INIT_LIST_HEAD(&rq->queuelist);

134

INIT_LIST_HEAD(&rq->timeout_list);

134

INIT_LIST_HEAD(&rq->timeout_list);

135

rq->cpu = -1;

135

rq->cpu = -1;

136

rq->q = q;

136

rq->q = q;

137

rq->__sector = (sector_t) -1;

137

rq->__sector = (sector_t) -1;

138

INIT_HLIST_NODE(&rq->hash);

138

INIT_HLIST_NODE(&rq->hash);

139

RB_CLEAR_NODE(&rq->rb_node);

139

RB_CLEAR_NODE(&rq->rb_node);

140

rq->cmd = rq->__cmd;

140

rq->cmd = rq->__cmd;

141

rq->cmd_len = BLK_MAX_CDB;

141

rq->cmd_len = BLK_MAX_CDB;

142

rq->tag = -1;

142

rq->tag = -1;

143

rq->ref_count = 1;

143

rq->ref_count = 1;

144

rq->start_time = jiffies;

144

rq->start_time = jiffies;

145

set_start_time_ns(rq);

145

set_start_time_ns(rq);

146

rq->part = NULL;

146

rq->part = NULL;

147

}

147

}

148

EXPORT_SYMBOL(blk_rq_init);

148

EXPORT_SYMBOL(blk_rq_init);

149

150

static void req_bio_endio(struct request *rq, struct bio *bio,

150

static void req_bio_endio(struct request *rq, struct bio *bio,

151

unsigned int nbytes, int error)

151

unsigned int nbytes, int error)

152

{

152

{

153

if (error)

153

if (error)

154

clear_bit(BIO_UPTODATE, &bio->bi_flags);

154

clear_bit(BIO_UPTODATE, &bio->bi_flags);

155

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

155

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

156

error = -EIO;

156

error = -EIO;

157

158

if (unlikely(nbytes > bio->bi_size)) {

158

if (unlikely(nbytes > bio->bi_size)) {

159

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

159

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

160

__func__, nbytes, bio->bi_size);

160

__func__, nbytes, bio->bi_size);

161

nbytes = bio->bi_size;

161

nbytes = bio->bi_size;

162

}

162

}

163

164

if (unlikely(rq->cmd_flags & REQ_QUIET))

164

if (unlikely(rq->cmd_flags & REQ_QUIET))

165

set_bit(BIO_QUIET, &bio->bi_flags);

165

set_bit(BIO_QUIET, &bio->bi_flags);

166

167

bio->bi_size -= nbytes;

167

bio->bi_size -= nbytes;

168

bio->bi_sector += (nbytes >> 9);

168

bio->bi_sector += (nbytes >> 9);

169

170

if (bio_integrity(bio))

170

if (bio_integrity(bio))

171

bio_integrity_advance(bio, nbytes);

171

bio_integrity_advance(bio, nbytes);

172

173

/* don't actually finish bio if it's part of flush sequence */

173

/* don't actually finish bio if it's part of flush sequence */

174

if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))

174

if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))

175

bio_endio(bio, error);

175

bio_endio(bio, error);

176

}

176

}

177

178

void blk_dump_rq_flags(struct request *rq, char *msg)

178

void blk_dump_rq_flags(struct request *rq, char *msg)

179

{

179

{

180

int bit;

180

int bit;

181

182

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

182

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

183

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

183

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

184

rq->cmd_flags);

184

rq->cmd_flags);

185

186

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

186

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

187

(unsigned long long)blk_rq_pos(rq),

187

(unsigned long long)blk_rq_pos(rq),

188

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

188

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

189

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

189

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

190

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

190

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

191

192

if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {

192

if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {

193

printk(KERN_INFO " cdb: ");

193

printk(KERN_INFO " cdb: ");

194

for (bit = 0; bit < BLK_MAX_CDB; bit++)

194

for (bit = 0; bit < BLK_MAX_CDB; bit++)

195

printk("%02x ", rq->cmd[bit]);

195

printk("%02x ", rq->cmd[bit]);

196

printk("\n");

196

printk("\n");

197

}

197

}

198

}

198

}

199

EXPORT_SYMBOL(blk_dump_rq_flags);

199

EXPORT_SYMBOL(blk_dump_rq_flags);

200

201

static void blk_delay_work(struct work_struct *work)

201

static void blk_delay_work(struct work_struct *work)

202

{

202

{

203

struct request_queue *q;

203

struct request_queue *q;

204

205

q = container_of(work, struct request_queue, delay_work.work);

205

q = container_of(work, struct request_queue, delay_work.work);

206

spin_lock_irq(q->queue_lock);

206

spin_lock_irq(q->queue_lock);

207

__blk_run_queue(q);

207

__blk_run_queue(q);

208

spin_unlock_irq(q->queue_lock);

208

spin_unlock_irq(q->queue_lock);

209

}

209

}

210

211

/**

211

/**

212

* blk_delay_queue - restart queueing after defined interval

212

* blk_delay_queue - restart queueing after defined interval

213

* @q: The &struct request_queue in question

213

* @q: The &struct request_queue in question

214

* @msecs: Delay in msecs

214

* @msecs: Delay in msecs

215

*

215

*

216

* Description:

216

* Description:

217

* Sometimes queueing needs to be postponed for a little while, to allow

217

* Sometimes queueing needs to be postponed for a little while, to allow

218

* resources to come back. This function will make sure that queueing is

218

* resources to come back. This function will make sure that queueing is

219

* restarted around the specified time.

219

* restarted around the specified time.

220

*/

220

*/

221

void blk_delay_queue(struct request_queue *q, unsigned long msecs)

221

void blk_delay_queue(struct request_queue *q, unsigned long msecs)

222

{

222

{

223

queue_delayed_work(kblockd_workqueue, &q->delay_work,

223

queue_delayed_work(kblockd_workqueue, &q->delay_work,

224

msecs_to_jiffies(msecs));

224

msecs_to_jiffies(msecs));

225

}

225

}

226

EXPORT_SYMBOL(blk_delay_queue);

226

EXPORT_SYMBOL(blk_delay_queue);

227

228

/**

228

/**

229

* blk_start_queue - restart a previously stopped queue

229

* blk_start_queue - restart a previously stopped queue

230

* @q: The &struct request_queue in question

230

* @q: The &struct request_queue in question

231

*

231

*

232

* Description:

232

* Description:

233

* blk_start_queue() will clear the stop flag on the queue, and call

233

* blk_start_queue() will clear the stop flag on the queue, and call

234

* the request_fn for the queue if it was in a stopped state when

234

* the request_fn for the queue if it was in a stopped state when

235

* entered. Also see blk_stop_queue(). Queue lock must be held.

235

* entered. Also see blk_stop_queue(). Queue lock must be held.

236

**/

236

**/

237

void blk_start_queue(struct request_queue *q)

237

void blk_start_queue(struct request_queue *q)

238

{

238

{

239

WARN_ON(!irqs_disabled());

239

WARN_ON(!irqs_disabled());

240

241

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

241

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

242

__blk_run_queue(q);

242

__blk_run_queue(q);

243

}

243

}

244

EXPORT_SYMBOL(blk_start_queue);

244

EXPORT_SYMBOL(blk_start_queue);

245

246

/**

246

/**

247

* blk_stop_queue - stop a queue

247

* blk_stop_queue - stop a queue

248

* @q: The &struct request_queue in question

248

* @q: The &struct request_queue in question

249

*

249

*

250

* Description:

250

* Description:

251

* The Linux block layer assumes that a block driver will consume all

251

* The Linux block layer assumes that a block driver will consume all

252

* entries on the request queue when the request_fn strategy is called.

252

* entries on the request queue when the request_fn strategy is called.

253

* Often this will not happen, because of hardware limitations (queue

253

* Often this will not happen, because of hardware limitations (queue

254

* depth settings). If a device driver gets a 'queue full' response,

254

* depth settings). If a device driver gets a 'queue full' response,

255

* or if it simply chooses not to queue more I/O at one point, it can

255

* or if it simply chooses not to queue more I/O at one point, it can

256

* call this function to prevent the request_fn from being called until

256

* call this function to prevent the request_fn from being called until

257

* the driver has signalled it's ready to go again. This happens by calling

257

* the driver has signalled it's ready to go again. This happens by calling

258

* blk_start_queue() to restart queue operations. Queue lock must be held.

258

* blk_start_queue() to restart queue operations. Queue lock must be held.

259

**/

259

**/

260

void blk_stop_queue(struct request_queue *q)

260

void blk_stop_queue(struct request_queue *q)

261

{

261

{

262

__cancel_delayed_work(&q->delay_work);

262

__cancel_delayed_work(&q->delay_work);

263

queue_flag_set(QUEUE_FLAG_STOPPED, q);

263

queue_flag_set(QUEUE_FLAG_STOPPED, q);

264

}

264

}

265

EXPORT_SYMBOL(blk_stop_queue);

265

EXPORT_SYMBOL(blk_stop_queue);

266

267

/**

267

/**

268

* blk_sync_queue - cancel any pending callbacks on a queue

268

* blk_sync_queue - cancel any pending callbacks on a queue

269

* @q: the queue

269

* @q: the queue

270

*

270

*

271

* Description:

271

* Description:

272

* The block layer may perform asynchronous callback activity

272

* The block layer may perform asynchronous callback activity

273

* on a queue, such as calling the unplug function after a timeout.

273

* on a queue, such as calling the unplug function after a timeout.

274

* A block device may call blk_sync_queue to ensure that any

274

* A block device may call blk_sync_queue to ensure that any

275

* such activity is cancelled, thus allowing it to release resources

275

* such activity is cancelled, thus allowing it to release resources

276

* that the callbacks might use. The caller must already have made sure

276

* that the callbacks might use. The caller must already have made sure

277

* that its ->make_request_fn will not re-add plugging prior to calling

277

* that its ->make_request_fn will not re-add plugging prior to calling

278

* this function.

278

* this function.

279

*

279

*

280

* This function does not cancel any asynchronous activity arising

280

* This function does not cancel any asynchronous activity arising

281

* out of elevator or throttling code. That would require elevaotor_exit()

281

* out of elevator or throttling code. That would require elevaotor_exit()

282

* and blk_throtl_exit() to be called with queue lock initialized.

282

* and blk_throtl_exit() to be called with queue lock initialized.

283

*

283

*

284

*/

284

*/

285

void blk_sync_queue(struct request_queue *q)

285

void blk_sync_queue(struct request_queue *q)

286

{

286

{

287

del_timer_sync(&q->timeout);

287

del_timer_sync(&q->timeout);

288

cancel_delayed_work_sync(&q->delay_work);

288

cancel_delayed_work_sync(&q->delay_work);

289

}

289

}

290

EXPORT_SYMBOL(blk_sync_queue);

290

EXPORT_SYMBOL(blk_sync_queue);

291

292

/**

292

/**

293

* __blk_run_queue - run a single device queue

293

* __blk_run_queue - run a single device queue

294

* @q: The queue to run

294

* @q: The queue to run

295

*

295

*

296

* Description:

296

* Description:

297

* See @blk_run_queue. This variant must be called with the queue lock

297

* See @blk_run_queue. This variant must be called with the queue lock

298

* held and interrupts disabled.

298

* held and interrupts disabled.

299

*/

299

*/

300

void __blk_run_queue(struct request_queue *q)

300

void __blk_run_queue(struct request_queue *q)

301

{

301

{

302

if (unlikely(blk_queue_stopped(q)))

302

if (unlikely(blk_queue_stopped(q)))

303

return;

303

return;

304

305

q->request_fn(q);

305

q->request_fn(q);

306

}

306

}

307

EXPORT_SYMBOL(__blk_run_queue);

307

EXPORT_SYMBOL(__blk_run_queue);

308

309

/**

309

/**

310

* blk_run_queue_async - run a single device queue in workqueue context

310

* blk_run_queue_async - run a single device queue in workqueue context

311

* @q: The queue to run

311

* @q: The queue to run

312

*

312

*

313

* Description:

313

* Description:

314

* Tells kblockd to perform the equivalent of @blk_run_queue on behalf

314

* Tells kblockd to perform the equivalent of @blk_run_queue on behalf

315

* of us.

315

* of us.

316

*/

316

*/

317

void blk_run_queue_async(struct request_queue *q)

317

void blk_run_queue_async(struct request_queue *q)

318

{

318

{

319

if (likely(!blk_queue_stopped(q))) {

319

if (likely(!blk_queue_stopped(q))) {

320

__cancel_delayed_work(&q->delay_work);

320

__cancel_delayed_work(&q->delay_work);

321

queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);

321

queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);

322

}

322

}

323

}

323

}

324

EXPORT_SYMBOL(blk_run_queue_async);

324

EXPORT_SYMBOL(blk_run_queue_async);

325

326

/**

326

/**

327

* blk_run_queue - run a single device queue

327

* blk_run_queue - run a single device queue

328

* @q: The queue to run

328

* @q: The queue to run

329

*

329

*

330

* Description:

330

* Description:

331

* Invoke request handling on this queue, if it has pending work to do.

331

* Invoke request handling on this queue, if it has pending work to do.

332

* May be used to restart queueing when a request has completed.

332

* May be used to restart queueing when a request has completed.

333

*/

333

*/

334

void blk_run_queue(struct request_queue *q)

334

void blk_run_queue(struct request_queue *q)

335

{

335

{

336

unsigned long flags;

336

unsigned long flags;

337

338

spin_lock_irqsave(q->queue_lock, flags);

338

spin_lock_irqsave(q->queue_lock, flags);

339

__blk_run_queue(q);

339

__blk_run_queue(q);

340

spin_unlock_irqrestore(q->queue_lock, flags);

340

spin_unlock_irqrestore(q->queue_lock, flags);

341

}

341

}

342

EXPORT_SYMBOL(blk_run_queue);

342

EXPORT_SYMBOL(blk_run_queue);

343

344

void blk_put_queue(struct request_queue *q)

344

void blk_put_queue(struct request_queue *q)

345

{

345

{

346

kobject_put(&q->kobj);

346

kobject_put(&q->kobj);

347

}

347

}

348

EXPORT_SYMBOL(blk_put_queue);

348

EXPORT_SYMBOL(blk_put_queue);

349

350

/*

350

/*

351

* Note: If a driver supplied the queue lock, it should not zap that lock

351

* Note: If a driver supplied the queue lock, it should not zap that lock

352

* unexpectedly as some queue cleanup components like elevator_exit() and

352

* unexpectedly as some queue cleanup components like elevator_exit() and

353

* blk_throtl_exit() need queue lock.

353

* blk_throtl_exit() need queue lock.

354

*/

354

*/

355

void blk_cleanup_queue(struct request_queue *q)

355

void blk_cleanup_queue(struct request_queue *q)

356

{

356

{

357

/*

357

/*

358

* We know we have process context here, so we can be a little

358

* We know we have process context here, so we can be a little

359

* cautious and ensure that pending block actions on this device

359

* cautious and ensure that pending block actions on this device

360

* are done before moving on. Going into this function, we should

360

* are done before moving on. Going into this function, we should

361

* not have processes doing IO to this device.

361

* not have processes doing IO to this device.

362

*/

362

*/

363

blk_sync_queue(q);

363

blk_sync_queue(q);

364

365

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

365

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

366

mutex_lock(&q->sysfs_lock);

366

mutex_lock(&q->sysfs_lock);

367

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

367

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

368

mutex_unlock(&q->sysfs_lock);

368

mutex_unlock(&q->sysfs_lock);

369

370

if (q->elevator)

370

if (q->elevator)

371

elevator_exit(q->elevator);

371

elevator_exit(q->elevator);

372

373

blk_throtl_exit(q);

373

blk_throtl_exit(q);

374

375

blk_put_queue(q);

375

blk_put_queue(q);

376

}

376

}

377

EXPORT_SYMBOL(blk_cleanup_queue);

377

EXPORT_SYMBOL(blk_cleanup_queue);

378

379

static int blk_init_free_list(struct request_queue *q)

379

static int blk_init_free_list(struct request_queue *q)

380

{

380

{

381

struct request_list *rl = &q->rq;

381

struct request_list *rl = &q->rq;

382

383

if (unlikely(rl->rq_pool))

383

if (unlikely(rl->rq_pool))

384

return 0;

384

return 0;

385

386

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

386

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

387

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

387

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

388

rl->elvpriv = 0;

388

rl->elvpriv = 0;

389

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

389

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

390

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

390

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

391

392

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

392

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

393

mempool_free_slab, request_cachep, q->node);

393

mempool_free_slab, request_cachep, q->node);

394

395

if (!rl->rq_pool)

395

if (!rl->rq_pool)

396

return -ENOMEM;

396

return -ENOMEM;

397

398

return 0;

398

return 0;

399

}

399

}

400

401

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

401

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

402

{

402

{

403

return blk_alloc_queue_node(gfp_mask, -1);

403

return blk_alloc_queue_node(gfp_mask, -1);

404

}

404

}

405

EXPORT_SYMBOL(blk_alloc_queue);

405

EXPORT_SYMBOL(blk_alloc_queue);

406

407

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

407

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

408

{

408

{

409

struct request_queue *q;

409

struct request_queue *q;

410

int err;

410

int err;

411

412

q = kmem_cache_alloc_node(blk_requestq_cachep,

412

q = kmem_cache_alloc_node(blk_requestq_cachep,

413

gfp_mask | __GFP_ZERO, node_id);

413

gfp_mask | __GFP_ZERO, node_id);

414

if (!q)

414

if (!q)

415

return NULL;

415

return NULL;

416

417

q->backing_dev_info.ra_pages =

417

q->backing_dev_info.ra_pages =

418

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

418

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

419

q->backing_dev_info.state = 0;

419

q->backing_dev_info.state = 0;

420

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

420

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

421

q->backing_dev_info.name = "block";

421

q->backing_dev_info.name = "block";

422

423

err = bdi_init(&q->backing_dev_info);

423

err = bdi_init(&q->backing_dev_info);

424

if (err) {

424

if (err) {

425

kmem_cache_free(blk_requestq_cachep, q);

425

kmem_cache_free(blk_requestq_cachep, q);

426

return NULL;

426

return NULL;

427

}

427

}

428

429

if (blk_throtl_init(q)) {

429

if (blk_throtl_init(q)) {

430

kmem_cache_free(blk_requestq_cachep, q);

430

kmem_cache_free(blk_requestq_cachep, q);

431

return NULL;

431

return NULL;

432

}

432

}

433

434

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

434

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

435

laptop_mode_timer_fn, (unsigned long) q);

435

laptop_mode_timer_fn, (unsigned long) q);

436

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

436

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

437

INIT_LIST_HEAD(&q->timeout_list);

437

INIT_LIST_HEAD(&q->timeout_list);

438

INIT_LIST_HEAD(&q->flush_queue[0]);

438

INIT_LIST_HEAD(&q->flush_queue[0]);

439

INIT_LIST_HEAD(&q->flush_queue[1]);

439

INIT_LIST_HEAD(&q->flush_queue[1]);

440

INIT_LIST_HEAD(&q->flush_data_in_flight);

440

INIT_LIST_HEAD(&q->flush_data_in_flight);

441

INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);

441

INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);

442

443

kobject_init(&q->kobj, &blk_queue_ktype);

443

kobject_init(&q->kobj, &blk_queue_ktype);

444

445

mutex_init(&q->sysfs_lock);

445

mutex_init(&q->sysfs_lock);

446

spin_lock_init(&q->__queue_lock);

446

spin_lock_init(&q->__queue_lock);

447

448

/*

448

/*

449

* By default initialize queue_lock to internal lock and driver can

449

* By default initialize queue_lock to internal lock and driver can

450

* override it later if need be.

450

* override it later if need be.

451

*/

451

*/

452

q->queue_lock = &q->__queue_lock;

452

q->queue_lock = &q->__queue_lock;

453

454

return q;

454

return q;

455

}

455

}

456

EXPORT_SYMBOL(blk_alloc_queue_node);

456

EXPORT_SYMBOL(blk_alloc_queue_node);

457

458

/**

458

/**

459

* blk_init_queue - prepare a request queue for use with a block device

459

* blk_init_queue - prepare a request queue for use with a block device

460

* @rfn: The function to be called to process requests that have been

460

* @rfn: The function to be called to process requests that have been

461

* placed on the queue.

461

* placed on the queue.

462

* @lock: Request queue spin lock

462

* @lock: Request queue spin lock

463

*

463

*

464

* Description:

464

* Description:

465

* If a block device wishes to use the standard request handling procedures,

465

* If a block device wishes to use the standard request handling procedures,

466

* which sorts requests and coalesces adjacent requests, then it must

466

* which sorts requests and coalesces adjacent requests, then it must

467

* call blk_init_queue(). The function @rfn will be called when there

467

* call blk_init_queue(). The function @rfn will be called when there

468

* are requests on the queue that need to be processed. If the device

468

* are requests on the queue that need to be processed. If the device

469

* supports plugging, then @rfn may not be called immediately when requests

469

* supports plugging, then @rfn may not be called immediately when requests

470

* are available on the queue, but may be called at some time later instead.

470

* are available on the queue, but may be called at some time later instead.

471

* Plugged queues are generally unplugged when a buffer belonging to one

471

* Plugged queues are generally unplugged when a buffer belonging to one

472

* of the requests on the queue is needed, or due to memory pressure.

472

* of the requests on the queue is needed, or due to memory pressure.

473

*

473

*

474

* @rfn is not required, or even expected, to remove all requests off the

474

* @rfn is not required, or even expected, to remove all requests off the

475

* queue, but only as many as it can handle at a time. If it does leave

475

* queue, but only as many as it can handle at a time. If it does leave

476

* requests on the queue, it is responsible for arranging that the requests

476

* requests on the queue, it is responsible for arranging that the requests

477

* get dealt with eventually.

477

* get dealt with eventually.

478

*

478

*

479

* The queue spin lock must be held while manipulating the requests on the

479

* The queue spin lock must be held while manipulating the requests on the

480

* request queue; this lock will be taken also from interrupt context, so irq

480

* request queue; this lock will be taken also from interrupt context, so irq

481

* disabling is needed for it.

481

* disabling is needed for it.

482

*

482

*

483

* Function returns a pointer to the initialized request queue, or %NULL if

483

* Function returns a pointer to the initialized request queue, or %NULL if

484

* it didn't succeed.

484

* it didn't succeed.

485

*

485

*

486

* Note:

486

* Note:

487

* blk_init_queue() must be paired with a blk_cleanup_queue() call

487

* blk_init_queue() must be paired with a blk_cleanup_queue() call

488

* when the block device is deactivated (such as at module unload).

488

* when the block device is deactivated (such as at module unload).

489

**/

489

**/

490

491

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

491

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

492

{

492

{

493

return blk_init_queue_node(rfn, lock, -1);

493

return blk_init_queue_node(rfn, lock, -1);

494

}

494

}

495

EXPORT_SYMBOL(blk_init_queue);

495

EXPORT_SYMBOL(blk_init_queue);

496

497

struct request_queue *

497

struct request_queue *

498

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

498

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

499

{

499

{

500

struct request_queue *uninit_q, *q;

500

struct request_queue *uninit_q, *q;

501

502

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

502

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

503

if (!uninit_q)

503

if (!uninit_q)

504

return NULL;

504

return NULL;

505

506

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

506

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

507

if (!q)

507

if (!q)

508

blk_cleanup_queue(uninit_q);

508

blk_cleanup_queue(uninit_q);

509

510

return q;

510

return q;

511

}

511

}

512

EXPORT_SYMBOL(blk_init_queue_node);

512

EXPORT_SYMBOL(blk_init_queue_node);

513

514

struct request_queue *

514

struct request_queue *

515

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

515

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

516

spinlock_t *lock)

516

spinlock_t *lock)

517

{

517

{

518

return blk_init_allocated_queue_node(q, rfn, lock, -1);

518

return blk_init_allocated_queue_node(q, rfn, lock, -1);

519

}

519

}

520

EXPORT_SYMBOL(blk_init_allocated_queue);

520

EXPORT_SYMBOL(blk_init_allocated_queue);

521

522

struct request_queue *

522

struct request_queue *

523

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

523

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

524

spinlock_t *lock, int node_id)

524

spinlock_t *lock, int node_id)

525

{

525

{

526

if (!q)

526

if (!q)

527

return NULL;

527

return NULL;

528

529

q->node = node_id;

529

q->node = node_id;

530

if (blk_init_free_list(q))

530

if (blk_init_free_list(q))

531

return NULL;

531

return NULL;

532

533

q->request_fn = rfn;

533

q->request_fn = rfn;

534

q->prep_rq_fn = NULL;

534

q->prep_rq_fn = NULL;

535

q->unprep_rq_fn = NULL;

535

q->unprep_rq_fn = NULL;

536

q->queue_flags = QUEUE_FLAG_DEFAULT;

536

q->queue_flags = QUEUE_FLAG_DEFAULT;

537

538

/* Override internal queue lock with supplied lock pointer */

538

/* Override internal queue lock with supplied lock pointer */

539

if (lock)

539

if (lock)

540

q->queue_lock = lock;

540

q->queue_lock = lock;

541

542

/*

542

/*

543

* This also sets hw/phys segments, boundary and size

543

* This also sets hw/phys segments, boundary and size

544

*/

544

*/

545

blk_queue_make_request(q, __make_request);

545

blk_queue_make_request(q, __make_request);

546

547

q->sg_reserved_size = INT_MAX;

547

q->sg_reserved_size = INT_MAX;

548

549

/*

549

/*

550

* all done

550

* all done

551

*/

551

*/

552

if (!elevator_init(q, NULL)) {

552

if (!elevator_init(q, NULL)) {

553

blk_queue_congestion_threshold(q);

553

blk_queue_congestion_threshold(q);

554

return q;

554

return q;

555

}

555

}

556

557

return NULL;

557

return NULL;

558

}

558

}

559

EXPORT_SYMBOL(blk_init_allocated_queue_node);

559

EXPORT_SYMBOL(blk_init_allocated_queue_node);

560

561

int blk_get_queue(struct request_queue *q)

561

int blk_get_queue(struct request_queue *q)

562

{

562

{

563

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

563

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

564

kobject_get(&q->kobj);

564

kobject_get(&q->kobj);

565

return 0;

565

return 0;

566

}

566

}

567

568

return 1;

568

return 1;

569

}

569

}

570

EXPORT_SYMBOL(blk_get_queue);

570

EXPORT_SYMBOL(blk_get_queue);

571

572

static inline void blk_free_request(struct request_queue *q, struct request *rq)

572

static inline void blk_free_request(struct request_queue *q, struct request *rq)

573

{

573

{

574

if (rq->cmd_flags & REQ_ELVPRIV)

574

if (rq->cmd_flags & REQ_ELVPRIV)

575

elv_put_request(q, rq);

575

elv_put_request(q, rq);

576

mempool_free(rq, q->rq.rq_pool);

576

mempool_free(rq, q->rq.rq_pool);

577

}

577

}

578

579

static struct request *

579

static struct request *

580

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

580

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

581

{

581

{

582

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

582

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

583

584

if (!rq)

584

if (!rq)

585

return NULL;

585

return NULL;

586

587

blk_rq_init(q, rq);

587

blk_rq_init(q, rq);

588

589

rq->cmd_flags = flags | REQ_ALLOCED;

589

rq->cmd_flags = flags | REQ_ALLOCED;

590

591

if (priv) {

591

if (priv) {

592

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

592

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

593

mempool_free(rq, q->rq.rq_pool);

593

mempool_free(rq, q->rq.rq_pool);

594

return NULL;

594

return NULL;

595

}

595

}

596

rq->cmd_flags |= REQ_ELVPRIV;

596

rq->cmd_flags |= REQ_ELVPRIV;

597

}

597

}

598

599

return rq;

599

return rq;

600

}

600

}

601

602

/*

602

/*

603

* ioc_batching returns true if the ioc is a valid batching request and

603

* ioc_batching returns true if the ioc is a valid batching request and

604

* should be given priority access to a request.

604

* should be given priority access to a request.

605

*/

605

*/

606

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

606

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

607

{

607

{

608

if (!ioc)

608

if (!ioc)

609

return 0;

609

return 0;

610

611

/*

611

/*

612

* Make sure the process is able to allocate at least 1 request

612

* Make sure the process is able to allocate at least 1 request

613

* even if the batch times out, otherwise we could theoretically

613

* even if the batch times out, otherwise we could theoretically

614

* lose wakeups.

614

* lose wakeups.

615

*/

615

*/

616

return ioc->nr_batch_requests == q->nr_batching ||

616

return ioc->nr_batch_requests == q->nr_batching ||

617

(ioc->nr_batch_requests > 0

617

(ioc->nr_batch_requests > 0

618

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

618

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

619

}

619

}

620

621

/*

621

/*

622

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

622

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

623

* will cause the process to be a "batcher" on all queues in the system. This

623

* will cause the process to be a "batcher" on all queues in the system. This

624

* is the behaviour we want though - once it gets a wakeup it should be given

624

* is the behaviour we want though - once it gets a wakeup it should be given

625

* a nice run.

625

* a nice run.

626

*/

626

*/

627

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

627

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

628

{

628

{

629

if (!ioc || ioc_batching(q, ioc))

629

if (!ioc || ioc_batching(q, ioc))

630

return;

630

return;

631

632

ioc->nr_batch_requests = q->nr_batching;

632

ioc->nr_batch_requests = q->nr_batching;

633

ioc->last_waited = jiffies;

633

ioc->last_waited = jiffies;

634

}

634

}

635

636

static void __freed_request(struct request_queue *q, int sync)

636

static void __freed_request(struct request_queue *q, int sync)

637

{

637

{

638

struct request_list *rl = &q->rq;

638

struct request_list *rl = &q->rq;

639

640

if (rl->count[sync] < queue_congestion_off_threshold(q))

640

if (rl->count[sync] < queue_congestion_off_threshold(q))

641

blk_clear_queue_congested(q, sync);

641

blk_clear_queue_congested(q, sync);

642

643

if (rl->count[sync] + 1 <= q->nr_requests) {

643

if (rl->count[sync] + 1 <= q->nr_requests) {

644

if (waitqueue_active(&rl->wait[sync]))

644

if (waitqueue_active(&rl->wait[sync]))

645

wake_up(&rl->wait[sync]);

645

wake_up(&rl->wait[sync]);

646

647

blk_clear_queue_full(q, sync);

647

blk_clear_queue_full(q, sync);

648

}

648

}

649

}

649

}

650

651

/*

651

/*

652

* A request has just been released. Account for it, update the full and

652

* A request has just been released. Account for it, update the full and

653

* congestion status, wake up any waiters. Called under q->queue_lock.

653

* congestion status, wake up any waiters. Called under q->queue_lock.

654

*/

654

*/

655

static void freed_request(struct request_queue *q, int sync, int priv)

655

static void freed_request(struct request_queue *q, int sync, int priv)

656

{

656

{

657

struct request_list *rl = &q->rq;

657

struct request_list *rl = &q->rq;

658

659

rl->count[sync]--;

659

rl->count[sync]--;

660

if (priv)

660

if (priv)

661

rl->elvpriv--;

661

rl->elvpriv--;

662

663

__freed_request(q, sync);

663

__freed_request(q, sync);

664

665

if (unlikely(rl->starved[sync ^ 1]))

665

if (unlikely(rl->starved[sync ^ 1]))

666

__freed_request(q, sync ^ 1);

666

__freed_request(q, sync ^ 1);

667

}

667

}

668

669

/*

669

/*

670

* Determine if elevator data should be initialized when allocating the

670

* Determine if elevator data should be initialized when allocating the

671

* request associated with @bio.

671

* request associated with @bio.

672

*/

672

*/

673

static bool blk_rq_should_init_elevator(struct bio *bio)

673

static bool blk_rq_should_init_elevator(struct bio *bio)

674

{

674

{

675

if (!bio)

675

if (!bio)

676

return true;

676

return true;

677

678

/*

678

/*

679

* Flush requests do not use the elevator so skip initialization.

679

* Flush requests do not use the elevator so skip initialization.

680

* This allows a request to share the flush and elevator data.

680

* This allows a request to share the flush and elevator data.

681

*/

681

*/

682

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))

682

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))

683

return false;

683

return false;

684

685

return true;

685

return true;

686

}

686

}

687

688

/*

688

/*

689

* Get a free request, queue_lock must be held.

689

* Get a free request, queue_lock must be held.

690

* Returns NULL on failure, with queue_lock held.

690

* Returns NULL on failure, with queue_lock held.

691

* Returns !NULL on success, with queue_lock *not held*.

691

* Returns !NULL on success, with queue_lock *not held*.

692

*/

692

*/

693

static struct request *get_request(struct request_queue *q, int rw_flags,

693

static struct request *get_request(struct request_queue *q, int rw_flags,

694

struct bio *bio, gfp_t gfp_mask)

694

struct bio *bio, gfp_t gfp_mask)

695

{

695

{

696

struct request *rq = NULL;

696

struct request *rq = NULL;

697

struct request_list *rl = &q->rq;

697

struct request_list *rl = &q->rq;

698

struct io_context *ioc = NULL;

698

struct io_context *ioc = NULL;

699

const bool is_sync = rw_is_sync(rw_flags) != 0;

699

const bool is_sync = rw_is_sync(rw_flags) != 0;

700

int may_queue, priv = 0;

700

int may_queue, priv = 0;

701

702

may_queue = elv_may_queue(q, rw_flags);

702

may_queue = elv_may_queue(q, rw_flags);

703

if (may_queue == ELV_MQUEUE_NO)

703

if (may_queue == ELV_MQUEUE_NO)

704

goto rq_starved;

704

goto rq_starved;

705

706

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

706

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

707

if (rl->count[is_sync]+1 >= q->nr_requests) {

707

if (rl->count[is_sync]+1 >= q->nr_requests) {

708

ioc = current_io_context(GFP_ATOMIC, q->node);

708

ioc = current_io_context(GFP_ATOMIC, q->node);

709

/*

709

/*

710

* The queue will fill after this allocation, so set

710

* The queue will fill after this allocation, so set

711

* it as full, and mark this process as "batching".

711

* it as full, and mark this process as "batching".

712

* This process will be allowed to complete a batch of

712

* This process will be allowed to complete a batch of

713

* requests, others will be blocked.

713

* requests, others will be blocked.

714

*/

714

*/

715

if (!blk_queue_full(q, is_sync)) {

715

if (!blk_queue_full(q, is_sync)) {

716

ioc_set_batching(q, ioc);

716

ioc_set_batching(q, ioc);

717

blk_set_queue_full(q, is_sync);

717

blk_set_queue_full(q, is_sync);

718

} else {

718

} else {

719

if (may_queue != ELV_MQUEUE_MUST

719

if (may_queue != ELV_MQUEUE_MUST

720

&& !ioc_batching(q, ioc)) {

720

&& !ioc_batching(q, ioc)) {

721

/*

721

/*

722

* The queue is full and the allocating

722

* The queue is full and the allocating

723

* process is not a "batcher", and not

723

* process is not a "batcher", and not

724

* exempted by the IO scheduler

724

* exempted by the IO scheduler

725

*/

725

*/

726

goto out;

726

goto out;

727

}

727

}

728

}

728

}

729

}

729

}

730

blk_set_queue_congested(q, is_sync);

730

blk_set_queue_congested(q, is_sync);

731

}

731

}

732

733

/*

733

/*

734

* Only allow batching queuers to allocate up to 50% over the defined

734

* Only allow batching queuers to allocate up to 50% over the defined

735

* limit of requests, otherwise we could have thousands of requests

735

* limit of requests, otherwise we could have thousands of requests

736

* allocated with any setting of ->nr_requests

736

* allocated with any setting of ->nr_requests

737

*/

737

*/

738

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

738

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

739

goto out;

739

goto out;

740

741

rl->count[is_sync]++;

741

rl->count[is_sync]++;

742

rl->starved[is_sync] = 0;

742

rl->starved[is_sync] = 0;

743

744

if (blk_rq_should_init_elevator(bio)) {

744

if (blk_rq_should_init_elevator(bio)) {

745

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

745

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

746

if (priv)

746

if (priv)

747

rl->elvpriv++;

747

rl->elvpriv++;

748

}

748

}

749

750

if (blk_queue_io_stat(q))

750

if (blk_queue_io_stat(q))

751

rw_flags |= REQ_IO_STAT;

751

rw_flags |= REQ_IO_STAT;

752

spin_unlock_irq(q->queue_lock);

752

spin_unlock_irq(q->queue_lock);

753

754

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

754

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

755

if (unlikely(!rq)) {

755

if (unlikely(!rq)) {

756

/*

756

/*

757

* Allocation failed presumably due to memory. Undo anything

757

* Allocation failed presumably due to memory. Undo anything

758

* we might have messed up.

758

* we might have messed up.

759

*

759

*

760

* Allocating task should really be put onto the front of the

760

* Allocating task should really be put onto the front of the

761

* wait queue, but this is pretty rare.

761

* wait queue, but this is pretty rare.

762

*/

762

*/

763

spin_lock_irq(q->queue_lock);

763

spin_lock_irq(q->queue_lock);

764

freed_request(q, is_sync, priv);

764

freed_request(q, is_sync, priv);

765

766

/*

766

/*

767

* in the very unlikely event that allocation failed and no

767

* in the very unlikely event that allocation failed and no

768

* requests for this direction was pending, mark us starved

768

* requests for this direction was pending, mark us starved

769

* so that freeing of a request in the other direction will

769

* so that freeing of a request in the other direction will

770

* notice us. another possible fix would be to split the

770

* notice us. another possible fix would be to split the

771

* rq mempool into READ and WRITE

771

* rq mempool into READ and WRITE

772

*/

772

*/

773

rq_starved:

773

rq_starved:

774

if (unlikely(rl->count[is_sync] == 0))

774

if (unlikely(rl->count[is_sync] == 0))

775

rl->starved[is_sync] = 1;

775

rl->starved[is_sync] = 1;

776

777

goto out;

777

goto out;

778

}

778

}

779

780

/*

780

/*

781

* ioc may be NULL here, and ioc_batching will be false. That's

781

* ioc may be NULL here, and ioc_batching will be false. That's

782

* OK, if the queue is under the request limit then requests need

782

* OK, if the queue is under the request limit then requests need

783

* not count toward the nr_batch_requests limit. There will always

783

* not count toward the nr_batch_requests limit. There will always

784

* be some limit enforced by BLK_BATCH_TIME.

784

* be some limit enforced by BLK_BATCH_TIME.

785

*/

785

*/

786

if (ioc_batching(q, ioc))

786

if (ioc_batching(q, ioc))

787

ioc->nr_batch_requests--;

787

ioc->nr_batch_requests--;

788

789

trace_block_getrq(q, bio, rw_flags & 1);

789

trace_block_getrq(q, bio, rw_flags & 1);

790

out:

790

out:

791

return rq;

791

return rq;

792

}

792

}

793

794

/*

794

/*

795

* No available requests for this queue, wait for some requests to become

795

* No available requests for this queue, wait for some requests to become

796

* available.

796

* available.

797

*

797

*

798

* Called with q->queue_lock held, and returns with it unlocked.

798

* Called with q->queue_lock held, and returns with it unlocked.

799

*/

799

*/

800

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

800

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

801

struct bio *bio)

801

struct bio *bio)

802

{

802

{

803

const bool is_sync = rw_is_sync(rw_flags) != 0;

803

const bool is_sync = rw_is_sync(rw_flags) != 0;

804

struct request *rq;

804

struct request *rq;

805

806

rq = get_request(q, rw_flags, bio, GFP_NOIO);

806

rq = get_request(q, rw_flags, bio, GFP_NOIO);

807

while (!rq) {

807

while (!rq) {

808

DEFINE_WAIT(wait);

808

DEFINE_WAIT(wait);

809

struct io_context *ioc;

809

struct io_context *ioc;

810

struct request_list *rl = &q->rq;

810

struct request_list *rl = &q->rq;

811

812

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

812

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

813

TASK_UNINTERRUPTIBLE);

813

TASK_UNINTERRUPTIBLE);

814

815

trace_block_sleeprq(q, bio, rw_flags & 1);

815

trace_block_sleeprq(q, bio, rw_flags & 1);

816

817

spin_unlock_irq(q->queue_lock);

817

spin_unlock_irq(q->queue_lock);

818

io_schedule();

818

io_schedule();

819

820

/*

820

/*

821

* After sleeping, we become a "batching" process and

821

* After sleeping, we become a "batching" process and

822

* will be able to allocate at least one request, and

822

* will be able to allocate at least one request, and

823

* up to a big batch of them for a small period time.

823

* up to a big batch of them for a small period time.

824

* See ioc_batching, ioc_set_batching

824

* See ioc_batching, ioc_set_batching

825

*/

825

*/

826

ioc = current_io_context(GFP_NOIO, q->node);

826

ioc = current_io_context(GFP_NOIO, q->node);

827

ioc_set_batching(q, ioc);

827

ioc_set_batching(q, ioc);

828

829

spin_lock_irq(q->queue_lock);

829

spin_lock_irq(q->queue_lock);

830

finish_wait(&rl->wait[is_sync], &wait);

830

finish_wait(&rl->wait[is_sync], &wait);

831

832

rq = get_request(q, rw_flags, bio, GFP_NOIO);

832

rq = get_request(q, rw_flags, bio, GFP_NOIO);

833

};

833

};

834

835

return rq;

835

return rq;

836

}

836

}

837

838

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

838

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

839

{

839

{

840

struct request *rq;

840

struct request *rq;

841

842

BUG_ON(rw != READ && rw != WRITE);

842

BUG_ON(rw != READ && rw != WRITE);

843

844

spin_lock_irq(q->queue_lock);

844

spin_lock_irq(q->queue_lock);

845

if (gfp_mask & __GFP_WAIT) {

845

if (gfp_mask & __GFP_WAIT) {

846

rq = get_request_wait(q, rw, NULL);

846

rq = get_request_wait(q, rw, NULL);

847

} else {

847

} else {

848

rq = get_request(q, rw, NULL, gfp_mask);

848

rq = get_request(q, rw, NULL, gfp_mask);

849

if (!rq)

849

if (!rq)

850

spin_unlock_irq(q->queue_lock);

850

spin_unlock_irq(q->queue_lock);

851

}

851

}

852

/* q->queue_lock is unlocked at this point */

852

/* q->queue_lock is unlocked at this point */

853

854

return rq;

854

return rq;

855

}

855

}

856

EXPORT_SYMBOL(blk_get_request);

856

EXPORT_SYMBOL(blk_get_request);

857

858

/**

858

/**

859

* blk_make_request - given a bio, allocate a corresponding struct request.

859

* blk_make_request - given a bio, allocate a corresponding struct request.

860

* @q: target request queue

860

* @q: target request queue

861

* @bio: The bio describing the memory mappings that will be submitted for IO.

861

* @bio: The bio describing the memory mappings that will be submitted for IO.

862

* It may be a chained-bio properly constructed by block/bio layer.

862

* It may be a chained-bio properly constructed by block/bio layer.

863

* @gfp_mask: gfp flags to be used for memory allocation

863

* @gfp_mask: gfp flags to be used for memory allocation

864

*

864

*

865

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

865

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

866

* type commands. Where the struct request needs to be farther initialized by

866

* type commands. Where the struct request needs to be farther initialized by

867

* the caller. It is passed a &struct bio, which describes the memory info of

867

* the caller. It is passed a &struct bio, which describes the memory info of

868

* the I/O transfer.

868

* the I/O transfer.

869

*

869

*

870

* The caller of blk_make_request must make sure that bi_io_vec

870

* The caller of blk_make_request must make sure that bi_io_vec

871

* are set to describe the memory buffers. That bio_data_dir() will return

871

* are set to describe the memory buffers. That bio_data_dir() will return

872

* the needed direction of the request. (And all bio's in the passed bio-chain

872

* the needed direction of the request. (And all bio's in the passed bio-chain

873

* are properly set accordingly)

873

* are properly set accordingly)

874

*

874

*

875

* If called under none-sleepable conditions, mapped bio buffers must not

875

* If called under none-sleepable conditions, mapped bio buffers must not

876

* need bouncing, by calling the appropriate masked or flagged allocator,

876

* need bouncing, by calling the appropriate masked or flagged allocator,

877

* suitable for the target device. Otherwise the call to blk_queue_bounce will

877

* suitable for the target device. Otherwise the call to blk_queue_bounce will

878

* BUG.

878

* BUG.

879

*

879

*

880

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

880

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

881

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

881

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

882

* anything but the first bio in the chain. Otherwise you risk waiting for IO

882

* anything but the first bio in the chain. Otherwise you risk waiting for IO

883

* completion of a bio that hasn't been submitted yet, thus resulting in a

883

* completion of a bio that hasn't been submitted yet, thus resulting in a

884

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

884

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

885

* of bio_alloc(), as that avoids the mempool deadlock.

885

* of bio_alloc(), as that avoids the mempool deadlock.

886

* If possible a big IO should be split into smaller parts when allocation

886

* If possible a big IO should be split into smaller parts when allocation

887

* fails. Partial allocation should not be an error, or you risk a live-lock.

887

* fails. Partial allocation should not be an error, or you risk a live-lock.

888

*/

888

*/

889

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

889

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

890

gfp_t gfp_mask)

890

gfp_t gfp_mask)

891

{

891

{

892

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

892

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

893

894

if (unlikely(!rq))

894

if (unlikely(!rq))

895

return ERR_PTR(-ENOMEM);

895

return ERR_PTR(-ENOMEM);

896

897

for_each_bio(bio) {

897

for_each_bio(bio) {

898

struct bio *bounce_bio = bio;

898

struct bio *bounce_bio = bio;

899

int ret;

899

int ret;

900

901

blk_queue_bounce(q, &bounce_bio);

901

blk_queue_bounce(q, &bounce_bio);

902

ret = blk_rq_append_bio(q, rq, bounce_bio);

902

ret = blk_rq_append_bio(q, rq, bounce_bio);

903

if (unlikely(ret)) {

903

if (unlikely(ret)) {

904

blk_put_request(rq);

904

blk_put_request(rq);

905

return ERR_PTR(ret);

905

return ERR_PTR(ret);

906

}

906

}

907

}

907

}

908

909

return rq;

909

return rq;

910

}

910

}

911

EXPORT_SYMBOL(blk_make_request);

911

EXPORT_SYMBOL(blk_make_request);

912

913

/**

913

/**

914

* blk_requeue_request - put a request back on queue

914

* blk_requeue_request - put a request back on queue

915

* @q: request queue where request should be inserted

915

* @q: request queue where request should be inserted

916

* @rq: request to be inserted

916

* @rq: request to be inserted

917

*

917

*

918

* Description:

918

* Description:

919

* Drivers often keep queueing requests until the hardware cannot accept

919

* Drivers often keep queueing requests until the hardware cannot accept

920

* more, when that condition happens we need to put the request back

920

* more, when that condition happens we need to put the request back

921

* on the queue. Must be called with queue lock held.

921

* on the queue. Must be called with queue lock held.

922

*/

922

*/

923

void blk_requeue_request(struct request_queue *q, struct request *rq)

923

void blk_requeue_request(struct request_queue *q, struct request *rq)

924

{

924

{

925

blk_delete_timer(rq);

925

blk_delete_timer(rq);

926

blk_clear_rq_complete(rq);

926

blk_clear_rq_complete(rq);

927

trace_block_rq_requeue(q, rq);

927

trace_block_rq_requeue(q, rq);

928

929

if (blk_rq_tagged(rq))

929

if (blk_rq_tagged(rq))

930

blk_queue_end_tag(q, rq);

930

blk_queue_end_tag(q, rq);

931

932

BUG_ON(blk_queued_rq(rq));

932

BUG_ON(blk_queued_rq(rq));

933

934

elv_requeue_request(q, rq);

934

elv_requeue_request(q, rq);

935

}

935

}

936

EXPORT_SYMBOL(blk_requeue_request);

936

EXPORT_SYMBOL(blk_requeue_request);

937

938

static void add_acct_request(struct request_queue *q, struct request *rq,

938

static void add_acct_request(struct request_queue *q, struct request *rq,

939

int where)

939

int where)

940

{

940

{

941

drive_stat_acct(rq, 1);

941

drive_stat_acct(rq, 1);

942

__elv_add_request(q, rq, where);

942

__elv_add_request(q, rq, where);

943

}

943

}

944

945

/**

945

/**

946

* blk_insert_request - insert a special request into a request queue

946

* blk_insert_request - insert a special request into a request queue

947

* @q: request queue where request should be inserted

947

* @q: request queue where request should be inserted

948

* @rq: request to be inserted

948

* @rq: request to be inserted

949

* @at_head: insert request at head or tail of queue

949

* @at_head: insert request at head or tail of queue

950

* @data: private data

950

* @data: private data

951

*

951

*

952

* Description:

952

* Description:

953

* Many block devices need to execute commands asynchronously, so they don't

953

* Many block devices need to execute commands asynchronously, so they don't

954

* block the whole kernel from preemption during request execution. This is

954

* block the whole kernel from preemption during request execution. This is

955

* accomplished normally by inserting aritficial requests tagged as

955

* accomplished normally by inserting aritficial requests tagged as

956

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

956

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

957

* be scheduled for actual execution by the request queue.

957

* be scheduled for actual execution by the request queue.

958

*

958

*

959

* We have the option of inserting the head or the tail of the queue.

959

* We have the option of inserting the head or the tail of the queue.

960

* Typically we use the tail for new ioctls and so forth. We use the head

960

* Typically we use the tail for new ioctls and so forth. We use the head

961

* of the queue for things like a QUEUE_FULL message from a device, or a

961

* of the queue for things like a QUEUE_FULL message from a device, or a

962

* host that is unable to accept a particular command.

962

* host that is unable to accept a particular command.

963

*/

963

*/

964

void blk_insert_request(struct request_queue *q, struct request *rq,

964

void blk_insert_request(struct request_queue *q, struct request *rq,

965

int at_head, void *data)

965

int at_head, void *data)

966

{

966

{

967

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

967

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

968

unsigned long flags;

968

unsigned long flags;

969

970

/*

970

/*

971

* tell I/O scheduler that this isn't a regular read/write (ie it

971

* tell I/O scheduler that this isn't a regular read/write (ie it

972

* must not attempt merges on this) and that it acts as a soft

972

* must not attempt merges on this) and that it acts as a soft

973

* barrier

973

* barrier

974

*/

974

*/

975

rq->cmd_type = REQ_TYPE_SPECIAL;

975

rq->cmd_type = REQ_TYPE_SPECIAL;

976

977

rq->special = data;

977

rq->special = data;

978

979

spin_lock_irqsave(q->queue_lock, flags);

979

spin_lock_irqsave(q->queue_lock, flags);

980

981

/*

981

/*

982

* If command is tagged, release the tag

982

* If command is tagged, release the tag

983

*/

983

*/

984

if (blk_rq_tagged(rq))

984

if (blk_rq_tagged(rq))

985

blk_queue_end_tag(q, rq);

985

blk_queue_end_tag(q, rq);

986

987

add_acct_request(q, rq, where);

987

add_acct_request(q, rq, where);

988

__blk_run_queue(q);

988

__blk_run_queue(q);

989

spin_unlock_irqrestore(q->queue_lock, flags);

989

spin_unlock_irqrestore(q->queue_lock, flags);

990

}

990

}

991

EXPORT_SYMBOL(blk_insert_request);

991

EXPORT_SYMBOL(blk_insert_request);

992

993

static void part_round_stats_single(int cpu, struct hd_struct *part,

993

static void part_round_stats_single(int cpu, struct hd_struct *part,

994

unsigned long now)

994

unsigned long now)

995

{

995

{

996

if (now == part->stamp)

996

if (now == part->stamp)

997

return;

997

return;

998

999

if (part_in_flight(part)) {

999

if (part_in_flight(part)) {

1000

__part_stat_add(cpu, part, time_in_queue,

1000

__part_stat_add(cpu, part, time_in_queue,

1001

part_in_flight(part) * (now - part->stamp));

1001

part_in_flight(part) * (now - part->stamp));

1002

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1002

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1003

}

1003

}

1004

part->stamp = now;

1004

part->stamp = now;

1005

}

1005

}

1006

1007

/**

1007

/**

1008

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1008

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1009

* @cpu: cpu number for stats access

1009

* @cpu: cpu number for stats access

1010

* @part: target partition

1010

* @part: target partition

1011

*

1011

*

1012

* The average IO queue length and utilisation statistics are maintained

1012

* The average IO queue length and utilisation statistics are maintained

1013

* by observing the current state of the queue length and the amount of

1013

* by observing the current state of the queue length and the amount of

1014

* time it has been in this state for.

1014

* time it has been in this state for.

1015

*

1015

*

1016

* Normally, that accounting is done on IO completion, but that can result

1016

* Normally, that accounting is done on IO completion, but that can result

1017

* in more than a second's worth of IO being accounted for within any one

1017

* in more than a second's worth of IO being accounted for within any one

1018

* second, leading to >100% utilisation. To deal with that, we call this

1018

* second, leading to >100% utilisation. To deal with that, we call this

1019

* function to do a round-off before returning the results when reading

1019

* function to do a round-off before returning the results when reading

1020

* /proc/diskstats. This accounts immediately for all queue usage up to

1020

* /proc/diskstats. This accounts immediately for all queue usage up to

1021

* the current jiffies and restarts the counters again.

1021

* the current jiffies and restarts the counters again.

1022

*/

1022

*/

1023

void part_round_stats(int cpu, struct hd_struct *part)

1023

void part_round_stats(int cpu, struct hd_struct *part)

1024

{

1024

{

1025

unsigned long now = jiffies;

1025

unsigned long now = jiffies;

1026

1027

if (part->partno)

1027

if (part->partno)

1028

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1028

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1029

part_round_stats_single(cpu, part, now);

1029

part_round_stats_single(cpu, part, now);

1030

}

1030

}

1031

EXPORT_SYMBOL_GPL(part_round_stats);

1031

EXPORT_SYMBOL_GPL(part_round_stats);

1032

1033

/*

1033

/*

1034

* queue lock must be held

1034

* queue lock must be held

1035

*/

1035

*/

1036

void __blk_put_request(struct request_queue *q, struct request *req)

1036

void __blk_put_request(struct request_queue *q, struct request *req)

1037

{

1037

{

1038

if (unlikely(!q))

1038

if (unlikely(!q))

1039

return;

1039

return;

1040

if (unlikely(--req->ref_count))

1040

if (unlikely(--req->ref_count))

1041

return;

1041

return;

1042

1043

elv_completed_request(q, req);

1043

elv_completed_request(q, req);

1044

1045

/* this is a bio leak */

1045

/* this is a bio leak */

1046

WARN_ON(req->bio != NULL);

1046

WARN_ON(req->bio != NULL);

1047

1048

/*

1048

/*

1049

* Request may not have originated from ll_rw_blk. if not,

1049

* Request may not have originated from ll_rw_blk. if not,

1050

* it didn't come out of our reserved rq pools

1050

* it didn't come out of our reserved rq pools

1051

*/

1051

*/

1052

if (req->cmd_flags & REQ_ALLOCED) {

1052

if (req->cmd_flags & REQ_ALLOCED) {

1053

int is_sync = rq_is_sync(req) != 0;

1053

int is_sync = rq_is_sync(req) != 0;

1054

int priv = req->cmd_flags & REQ_ELVPRIV;

1054

int priv = req->cmd_flags & REQ_ELVPRIV;

1055

1056

BUG_ON(!list_empty(&req->queuelist));

1056

BUG_ON(!list_empty(&req->queuelist));

1057

BUG_ON(!hlist_unhashed(&req->hash));

1057

BUG_ON(!hlist_unhashed(&req->hash));

1058

1059

blk_free_request(q, req);

1059

blk_free_request(q, req);

1060

freed_request(q, is_sync, priv);

1060

freed_request(q, is_sync, priv);

1061

}

1061

}

1062

}

1062

}

1063

EXPORT_SYMBOL_GPL(__blk_put_request);

1063

EXPORT_SYMBOL_GPL(__blk_put_request);

1064

1065

void blk_put_request(struct request *req)

1065

void blk_put_request(struct request *req)

1066

{

1066

{

1067

unsigned long flags;

1067

unsigned long flags;

1068

struct request_queue *q = req->q;

1068

struct request_queue *q = req->q;

1069

1070

spin_lock_irqsave(q->queue_lock, flags);

1070

spin_lock_irqsave(q->queue_lock, flags);

1071

__blk_put_request(q, req);

1071

__blk_put_request(q, req);

1072

spin_unlock_irqrestore(q->queue_lock, flags);

1072

spin_unlock_irqrestore(q->queue_lock, flags);

1073

}

1073

}

1074

EXPORT_SYMBOL(blk_put_request);

1074

EXPORT_SYMBOL(blk_put_request);

1075

1076

/**

1076

/**

1077

* blk_add_request_payload - add a payload to a request

1077

* blk_add_request_payload - add a payload to a request

1078

* @rq: request to update

1078

* @rq: request to update

1079

* @page: page backing the payload

1079

* @page: page backing the payload

1080

* @len: length of the payload.

1080

* @len: length of the payload.

1081

*

1081

*

1082

* This allows to later add a payload to an already submitted request by

1082

* This allows to later add a payload to an already submitted request by

1083

* a block driver. The driver needs to take care of freeing the payload

1083

* a block driver. The driver needs to take care of freeing the payload

1084

* itself.

1084

* itself.

1085

*

1085

*

1086

* Note that this is a quite horrible hack and nothing but handling of

1086

* Note that this is a quite horrible hack and nothing but handling of

1087

* discard requests should ever use it.

1087

* discard requests should ever use it.

1088

*/

1088

*/

1089

void blk_add_request_payload(struct request *rq, struct page *page,

1089

void blk_add_request_payload(struct request *rq, struct page *page,

1090

unsigned int len)

1090

unsigned int len)

1091

{

1091

{

1092

struct bio *bio = rq->bio;

1092

struct bio *bio = rq->bio;

1093

1094

bio->bi_io_vec->bv_page = page;

1094

bio->bi_io_vec->bv_page = page;

1095

bio->bi_io_vec->bv_offset = 0;

1095

bio->bi_io_vec->bv_offset = 0;

1096

bio->bi_io_vec->bv_len = len;

1096

bio->bi_io_vec->bv_len = len;

1097

1098

bio->bi_size = len;

1098

bio->bi_size = len;

1099

bio->bi_vcnt = 1;

1099

bio->bi_vcnt = 1;

1100

bio->bi_phys_segments = 1;

1100

bio->bi_phys_segments = 1;

1101

1102

rq->__data_len = rq->resid_len = len;

1102

rq->__data_len = rq->resid_len = len;

1103

rq->nr_phys_segments = 1;

1103

rq->nr_phys_segments = 1;

1104

rq->buffer = bio_data(bio);

1104

rq->buffer = bio_data(bio);

1105

}

1105

}

1106

EXPORT_SYMBOL_GPL(blk_add_request_payload);

1106

EXPORT_SYMBOL_GPL(blk_add_request_payload);

1107

1108

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,

1108

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,

1109

struct bio *bio)

1109

struct bio *bio)

1110

{

1110

{

1111

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1111

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1112

1113

if (!ll_back_merge_fn(q, req, bio))

1113

if (!ll_back_merge_fn(q, req, bio))

1114

return false;

1114

return false;

1115

1116

trace_block_bio_backmerge(q, bio);

1116

trace_block_bio_backmerge(q, bio);

1117

1118

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1118

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1119

blk_rq_set_mixed_merge(req);

1119

blk_rq_set_mixed_merge(req);

1120

1121

req->biotail->bi_next = bio;

1121

req->biotail->bi_next = bio;

1122

req->biotail = bio;

1122

req->biotail = bio;

1123

req->__data_len += bio->bi_size;

1123

req->__data_len += bio->bi_size;

1124

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1124

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1125

1126

drive_stat_acct(req, 0);

1126

drive_stat_acct(req, 0);

1127

elv_bio_merged(q, req, bio);

1127

elv_bio_merged(q, req, bio);

1128

return true;

1128

return true;

1129

}

1129

}

1130

1131

static bool bio_attempt_front_merge(struct request_queue *q,

1131

static bool bio_attempt_front_merge(struct request_queue *q,

1132

struct request *req, struct bio *bio)

1132

struct request *req, struct bio *bio)

1133

{

1133

{

1134

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1134

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1135

1136

if (!ll_front_merge_fn(q, req, bio))

1136

if (!ll_front_merge_fn(q, req, bio))

1137

return false;

1137

return false;

1138

1139

trace_block_bio_frontmerge(q, bio);

1139

trace_block_bio_frontmerge(q, bio);

1140

1141

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1141

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1142

blk_rq_set_mixed_merge(req);

1142

blk_rq_set_mixed_merge(req);

1143

1144

bio->bi_next = req->bio;

1144

bio->bi_next = req->bio;

1145

req->bio = bio;

1145

req->bio = bio;

1146

1147

/*

1147

/*

1148

* may not be valid. if the low level driver said

1148

* may not be valid. if the low level driver said

1149

* it didn't need a bounce buffer then it better

1149

* it didn't need a bounce buffer then it better

1150

* not touch req->buffer either...

1150

* not touch req->buffer either...

1151

*/

1151

*/

1152

req->buffer = bio_data(bio);

1152

req->buffer = bio_data(bio);

1153

req->__sector = bio->bi_sector;

1153

req->__sector = bio->bi_sector;

1154

req->__data_len += bio->bi_size;

1154

req->__data_len += bio->bi_size;

1155

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1155

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1156

1157

drive_stat_acct(req, 0);

1157

drive_stat_acct(req, 0);

1158

elv_bio_merged(q, req, bio);

1158

elv_bio_merged(q, req, bio);

1159

return true;

1159

return true;

1160

}

1160

}

1161

1162

/*

1162

/*

1163

* Attempts to merge with the plugged list in the current process. Returns

1163

* Attempts to merge with the plugged list in the current process. Returns

1164

* true if merge was successful, otherwise false.

1164

* true if merge was successful, otherwise false.

1165

*/

1165

*/

1166

static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,

1166

static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,

1167

struct bio *bio)

1167

struct bio *bio)

1168

{

1168

{

1169

struct blk_plug *plug;

1169

struct blk_plug *plug;

1170

struct request *rq;

1170

struct request *rq;

1171

bool ret = false;

1171

bool ret = false;

1172

1173

plug = tsk->plug;

1173

plug = tsk->plug;

1174

if (!plug)

1174

if (!plug)

1175

goto out;

1175

goto out;

1176

1177

list_for_each_entry_reverse(rq, &plug->list, queuelist) {

1177

list_for_each_entry_reverse(rq, &plug->list, queuelist) {

1178

int el_ret;

1178

int el_ret;

1179

1180

if (rq->q != q)

1180

if (rq->q != q)

1181

continue;

1181

continue;

1182

1183

el_ret = elv_try_merge(rq, bio);

1183

el_ret = elv_try_merge(rq, bio);

1184

if (el_ret == ELEVATOR_BACK_MERGE) {

1184

if (el_ret == ELEVATOR_BACK_MERGE) {

1185

ret = bio_attempt_back_merge(q, rq, bio);

1185

ret = bio_attempt_back_merge(q, rq, bio);

1186

if (ret)

1186

if (ret)

1187

break;

1187

break;

1188

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1188

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1189

ret = bio_attempt_front_merge(q, rq, bio);

1189

ret = bio_attempt_front_merge(q, rq, bio);

1190

if (ret)

1190

if (ret)

1191

break;

1191

break;

1192

}

1192

}

1193

}

1193

}

1194

out:

1194

out:

1195

return ret;

1195

return ret;

1196

}

1196

}

1197

1198

void init_request_from_bio(struct request *req, struct bio *bio)

1198

void init_request_from_bio(struct request *req, struct bio *bio)

1199

{

1199

{

1200

req->cpu = bio->bi_comp_cpu;

1200

req->cpu = bio->bi_comp_cpu;

1201

req->cmd_type = REQ_TYPE_FS;

1201

req->cmd_type = REQ_TYPE_FS;

1202

1203

req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;

1203

req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;

1204

if (bio->bi_rw & REQ_RAHEAD)

1204

if (bio->bi_rw & REQ_RAHEAD)

1205

req->cmd_flags |= REQ_FAILFAST_MASK;

1205

req->cmd_flags |= REQ_FAILFAST_MASK;

1206

1207

req->errors = 0;

1207

req->errors = 0;

1208

req->__sector = bio->bi_sector;

1208

req->__sector = bio->bi_sector;

1209

req->ioprio = bio_prio(bio);

1209

req->ioprio = bio_prio(bio);

1210

blk_rq_bio_prep(req->q, req, bio);

1210

blk_rq_bio_prep(req->q, req, bio);

1211

}

1211

}

1212

1213

static int __make_request(struct request_queue *q, struct bio *bio)

1213

static int __make_request(struct request_queue *q, struct bio *bio)

1214

{

1214

{

1215

const bool sync = !!(bio->bi_rw & REQ_SYNC);

1215

const bool sync = !!(bio->bi_rw & REQ_SYNC);

1216

struct blk_plug *plug;

1216

struct blk_plug *plug;

1217

int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;

1217

int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;

1218

struct request *req;

1218

struct request *req;

1219

1220

/*

1220

/*

1221

* low level driver can indicate that it wants pages above a

1221

* low level driver can indicate that it wants pages above a

1222

* certain limit bounced to low memory (ie for highmem, or even

1222

* certain limit bounced to low memory (ie for highmem, or even

1223

* ISA dma in theory)

1223

* ISA dma in theory)

1224

*/

1224

*/

1225

blk_queue_bounce(q, &bio);

1225

blk_queue_bounce(q, &bio);

1226

1227

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {

1227

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {

1228

spin_lock_irq(q->queue_lock);

1228

spin_lock_irq(q->queue_lock);

1229

where = ELEVATOR_INSERT_FLUSH;

1229

where = ELEVATOR_INSERT_FLUSH;

1230

goto get_rq;

1230

goto get_rq;

1231

}

1231

}

1232

1233

/*

1233

/*

1234

* Check if we can merge with the plugged list before grabbing

1234

* Check if we can merge with the plugged list before grabbing

1235

* any locks.

1235

* any locks.

1236

*/

1236

*/

1237

if (attempt_plug_merge(current, q, bio))

1237

if (attempt_plug_merge(current, q, bio))

1238

goto out;

1238

goto out;

1239

1240

spin_lock_irq(q->queue_lock);

1240

spin_lock_irq(q->queue_lock);

1241

1242

el_ret = elv_merge(q, &req, bio);

1242

el_ret = elv_merge(q, &req, bio);

1243

if (el_ret == ELEVATOR_BACK_MERGE) {

1243

if (el_ret == ELEVATOR_BACK_MERGE) {

1244

if (bio_attempt_back_merge(q, req, bio)) {

1244

if (bio_attempt_back_merge(q, req, bio)) {

1245

if (!attempt_back_merge(q, req))

1245

if (!attempt_back_merge(q, req))

1246

elv_merged_request(q, req, el_ret);

1246

elv_merged_request(q, req, el_ret);

1247

goto out_unlock;

1247

goto out_unlock;

1248

}

1248

}

1249

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1249

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1250

if (bio_attempt_front_merge(q, req, bio)) {

1250

if (bio_attempt_front_merge(q, req, bio)) {

1251

if (!attempt_front_merge(q, req))

1251

if (!attempt_front_merge(q, req))

1252

elv_merged_request(q, req, el_ret);

1252

elv_merged_request(q, req, el_ret);

1253

goto out_unlock;

1253

goto out_unlock;

1254

}

1254

}

1255

}

1255

}

1256

1257

get_rq:

1257

get_rq:

1258

/*

1258

/*

1259

* This sync check and mask will be re-done in init_request_from_bio(),

1259

* This sync check and mask will be re-done in init_request_from_bio(),

1260

* but we need to set it earlier to expose the sync flag to the

1260

* but we need to set it earlier to expose the sync flag to the

1261

* rq allocator and io schedulers.

1261

* rq allocator and io schedulers.

1262

*/

1262

*/

1263

rw_flags = bio_data_dir(bio);

1263

rw_flags = bio_data_dir(bio);

1264

if (sync)

1264

if (sync)

1265

rw_flags |= REQ_SYNC;

1265

rw_flags |= REQ_SYNC;

1266

1267

/*

1267

/*

1268

* Grab a free request. This is might sleep but can not fail.

1268

* Grab a free request. This is might sleep but can not fail.

1269

* Returns with the queue unlocked.

1269

* Returns with the queue unlocked.

1270

*/

1270

*/

1271

req = get_request_wait(q, rw_flags, bio);

1271

req = get_request_wait(q, rw_flags, bio);

1272

1273

/*

1273

/*

1274

* After dropping the lock and possibly sleeping here, our request

1274

* After dropping the lock and possibly sleeping here, our request

1275

* may now be mergeable after it had proven unmergeable (above).

1275

* may now be mergeable after it had proven unmergeable (above).

1276

* We don't worry about that case for efficiency. It won't happen

1276

* We don't worry about that case for efficiency. It won't happen

1277

* often, and the elevators are able to handle it.

1277

* often, and the elevators are able to handle it.

1278

*/

1278

*/

1279

init_request_from_bio(req, bio);

1279

init_request_from_bio(req, bio);

1280

1281

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1281

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1282

bio_flagged(bio, BIO_CPU_AFFINE)) {

1282

bio_flagged(bio, BIO_CPU_AFFINE)) {

1283

req->cpu = blk_cpu_to_group(get_cpu());

1283

req->cpu = blk_cpu_to_group(get_cpu());

1284

put_cpu();

1284

put_cpu();

1285

}

1285

}

1286

1287

plug = current->plug;

1287

plug = current->plug;

1288

if (plug) {

1288

if (plug) {

1289

/*

1289

/*

1290

* If this is the first request added after a plug, fire

1290

* If this is the first request added after a plug, fire

1291

* of a plug trace. If others have been added before, check

1291

* of a plug trace. If others have been added before, check

1292

* if we have multiple devices in this plug. If so, make a

1292

* if we have multiple devices in this plug. If so, make a

1293

* note to sort the list before dispatch.

1293

* note to sort the list before dispatch.

1294

*/

1294

*/

1295

if (list_empty(&plug->list))

1295

if (list_empty(&plug->list))

1296

trace_block_plug(q);

1296

trace_block_plug(q);

1297

else if (!plug->should_sort) {

1297

else if (!plug->should_sort) {

1298

struct request *__rq;

1298

struct request *__rq;

1299

1300

__rq = list_entry_rq(plug->list.prev);

1300

__rq = list_entry_rq(plug->list.prev);

1301

if (__rq->q != q)

1301

if (__rq->q != q)

1302

plug->should_sort = 1;

1302

plug->should_sort = 1;

1303

}

1303

}

1304

list_add_tail(&req->queuelist, &plug->list);

1304

list_add_tail(&req->queuelist, &plug->list);

1305

plug->count++;

1305

drive_stat_acct(req, 1);

1306

drive_stat_acct(req, 1);

1307

if (plug->count >= BLK_MAX_REQUEST_COUNT)

1308

blk_flush_plug_list(plug, false);

1306

} else {

1309

} else {

1307

spin_lock_irq(q->queue_lock);

1310

spin_lock_irq(q->queue_lock);

1308

add_acct_request(q, req, where);

1311

add_acct_request(q, req, where);

1309

__blk_run_queue(q);

1312

__blk_run_queue(q);

1310

out_unlock:

1313

out_unlock:

1311

spin_unlock_irq(q->queue_lock);

1314

spin_unlock_irq(q->queue_lock);

1312

}

1315

}

1313

out:

1316

out:

1314

return 0;

1317

return 0;

1315

}

1318

}

1316

1319

1317

/*

1320

/*

1318

* If bio->bi_dev is a partition, remap the location

1321

* If bio->bi_dev is a partition, remap the location

1319

*/

1322

*/

1320

static inline void blk_partition_remap(struct bio *bio)

1323

static inline void blk_partition_remap(struct bio *bio)

1321

{

1324

{

1322

struct block_device *bdev = bio->bi_bdev;

1325

struct block_device *bdev = bio->bi_bdev;

1323

1326

1324

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1327

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1325

struct hd_struct *p = bdev->bd_part;

1328

struct hd_struct *p = bdev->bd_part;

1326

1329

1327

bio->bi_sector += p->start_sect;

1330

bio->bi_sector += p->start_sect;

1328

bio->bi_bdev = bdev->bd_contains;

1331

bio->bi_bdev = bdev->bd_contains;

1329

1332

1330

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,

1333

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,

1331

bdev->bd_dev,

1334

bdev->bd_dev,

1332

bio->bi_sector - p->start_sect);

1335

bio->bi_sector - p->start_sect);

1333

}

1336

}

1334

}

1337

}

1335

1338

1336

static void handle_bad_sector(struct bio *bio)

1339

static void handle_bad_sector(struct bio *bio)

1337

{

1340

{

1338

char b[BDEVNAME_SIZE];

1341

char b[BDEVNAME_SIZE];

1339

1342

1340

printk(KERN_INFO "attempt to access beyond end of device\n");

1343

printk(KERN_INFO "attempt to access beyond end of device\n");

1341

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1344

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1342

bdevname(bio->bi_bdev, b),

1345

bdevname(bio->bi_bdev, b),

1343

bio->bi_rw,

1346

bio->bi_rw,

1344

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1347

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1345

(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

1348

(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

1346

1349

1347

set_bit(BIO_EOF, &bio->bi_flags);

1350

set_bit(BIO_EOF, &bio->bi_flags);

1348

}

1351

}

1349

1352

1350

#ifdef CONFIG_FAIL_MAKE_REQUEST

1353

#ifdef CONFIG_FAIL_MAKE_REQUEST

1351

1354

1352

static DECLARE_FAULT_ATTR(fail_make_request);

1355

static DECLARE_FAULT_ATTR(fail_make_request);

1353

1356

1354

static int __init setup_fail_make_request(char *str)

1357

static int __init setup_fail_make_request(char *str)

1355

{

1358

{

1356

return setup_fault_attr(&fail_make_request, str);

1359

return setup_fault_attr(&fail_make_request, str);

1357

}

1360

}

1358

__setup("fail_make_request=", setup_fail_make_request);

1361

__setup("fail_make_request=", setup_fail_make_request);

1359

1362

1360

static int should_fail_request(struct bio *bio)

1363

static int should_fail_request(struct bio *bio)

1361

{

1364

{

1362

struct hd_struct *part = bio->bi_bdev->bd_part;

1365

struct hd_struct *part = bio->bi_bdev->bd_part;

1363

1366

1364

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1367

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1365

return should_fail(&fail_make_request, bio->bi_size);

1368

return should_fail(&fail_make_request, bio->bi_size);

1366

1369

1367

return 0;

1370

return 0;

1368

}

1371

}

1369

1372

1370

static int __init fail_make_request_debugfs(void)

1373

static int __init fail_make_request_debugfs(void)

1371

{

1374

{

1372

return init_fault_attr_dentries(&fail_make_request,

1375

return init_fault_attr_dentries(&fail_make_request,

1373

"fail_make_request");

1376

"fail_make_request");

1374

}

1377

}

1375

1378

1376

late_initcall(fail_make_request_debugfs);

1379

late_initcall(fail_make_request_debugfs);

1377

1380

1378

#else /* CONFIG_FAIL_MAKE_REQUEST */

1381

#else /* CONFIG_FAIL_MAKE_REQUEST */

1379

1382

1380

static inline int should_fail_request(struct bio *bio)

1383

static inline int should_fail_request(struct bio *bio)

1381

{

1384

{

1382

return 0;

1385

return 0;

1383

}

1386

}

1384

1387

1385

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1388

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1386

1389

1387

/*

1390

/*

1388

* Check whether this bio extends beyond the end of the device.

1391

* Check whether this bio extends beyond the end of the device.

1389

*/

1392

*/

1390

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1393

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1391

{

1394

{

1392

sector_t maxsector;

1395

sector_t maxsector;

1393

1396

1394

if (!nr_sectors)

1397

if (!nr_sectors)

1395

return 0;

1398

return 0;

1396

1399

1397

/* Test device or partition size, when known. */

1400

/* Test device or partition size, when known. */

1398

maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;

1401

maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;

1399

if (maxsector) {

1402

if (maxsector) {

1400

sector_t sector = bio->bi_sector;

1403

sector_t sector = bio->bi_sector;

1401

1404

1402

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1405

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1403

/*

1406

/*

1404

* This may well happen - the kernel calls bread()

1407

* This may well happen - the kernel calls bread()

1405

* without checking the size of the device, e.g., when

1408

* without checking the size of the device, e.g., when

1406

* mounting a device.

1409

* mounting a device.

1407

*/

1410

*/

1408

handle_bad_sector(bio);

1411

handle_bad_sector(bio);

1409

return 1;

1412

return 1;

1410

}

1413

}

1411

}

1414

}

1412

1415

1413

return 0;

1416

return 0;

1414

}

1417

}

1415

1418

1416

/**

1419

/**

1417

* generic_make_request - hand a buffer to its device driver for I/O

1420

* generic_make_request - hand a buffer to its device driver for I/O

1418

* @bio: The bio describing the location in memory and on the device.

1421

* @bio: The bio describing the location in memory and on the device.

1419

*

1422

*

1420

* generic_make_request() is used to make I/O requests of block

1423

* generic_make_request() is used to make I/O requests of block

1421

* devices. It is passed a &struct bio, which describes the I/O that needs

1424

* devices. It is passed a &struct bio, which describes the I/O that needs

1422

* to be done.

1425

* to be done.

1423

*

1426

*

1424

* generic_make_request() does not return any status. The

1427

* generic_make_request() does not return any status. The

1425

* success/failure status of the request, along with notification of

1428

* success/failure status of the request, along with notification of

1426

* completion, is delivered asynchronously through the bio->bi_end_io

1429

* completion, is delivered asynchronously through the bio->bi_end_io

1427

* function described (one day) else where.

1430

* function described (one day) else where.

1428

*

1431

*

1429

* The caller of generic_make_request must make sure that bi_io_vec

1432

* The caller of generic_make_request must make sure that bi_io_vec

1430

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1433

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1431

* set to describe the device address, and the

1434

* set to describe the device address, and the

1432

* bi_end_io and optionally bi_private are set to describe how

1435

* bi_end_io and optionally bi_private are set to describe how

1433

* completion notification should be signaled.

1436

* completion notification should be signaled.

1434

*

1437

*

1435

* generic_make_request and the drivers it calls may use bi_next if this

1438

* generic_make_request and the drivers it calls may use bi_next if this

1436

* bio happens to be merged with someone else, and may change bi_dev and

1439

* bio happens to be merged with someone else, and may change bi_dev and

1437

* bi_sector for remaps as it sees fit. So the values of these fields

1440

* bi_sector for remaps as it sees fit. So the values of these fields

1438

* should NOT be depended on after the call to generic_make_request.

1441

* should NOT be depended on after the call to generic_make_request.

1439

*/

1442

*/

1440

static inline void __generic_make_request(struct bio *bio)

1443

static inline void __generic_make_request(struct bio *bio)

1441

{

1444

{

1442

struct request_queue *q;

1445

struct request_queue *q;

1443

sector_t old_sector;

1446

sector_t old_sector;

1444

int ret, nr_sectors = bio_sectors(bio);

1447

int ret, nr_sectors = bio_sectors(bio);

1445

dev_t old_dev;

1448

dev_t old_dev;

1446

int err = -EIO;

1449

int err = -EIO;

1447

1450

1448

might_sleep();

1451

might_sleep();

1449

1452

1450

if (bio_check_eod(bio, nr_sectors))

1453

if (bio_check_eod(bio, nr_sectors))

1451

goto end_io;

1454

goto end_io;

1452

1455

1453

/*

1456

/*

1454

* Resolve the mapping until finished. (drivers are

1457

* Resolve the mapping until finished. (drivers are

1455

* still free to implement/resolve their own stacking

1458

* still free to implement/resolve their own stacking

1456

* by explicitly returning 0)

1459

* by explicitly returning 0)

1457

*

1460

*

1458

* NOTE: we don't repeat the blk_size check for each new device.

1461

* NOTE: we don't repeat the blk_size check for each new device.

1459

* Stacking drivers are expected to know what they are doing.

1462

* Stacking drivers are expected to know what they are doing.

1460

*/

1463

*/

1461

old_sector = -1;

1464

old_sector = -1;

1462

old_dev = 0;

1465

old_dev = 0;

1463

do {

1466

do {

1464

char b[BDEVNAME_SIZE];

1467

char b[BDEVNAME_SIZE];

1465

1468

1466

q = bdev_get_queue(bio->bi_bdev);

1469

q = bdev_get_queue(bio->bi_bdev);

1467

if (unlikely(!q)) {

1470

if (unlikely(!q)) {

1468

printk(KERN_ERR

1471

printk(KERN_ERR

1469

"generic_make_request: Trying to access "

1472

"generic_make_request: Trying to access "

1470

"nonexistent block-device %s (%Lu)\n",

1473

"nonexistent block-device %s (%Lu)\n",

1471

bdevname(bio->bi_bdev, b),

1474

bdevname(bio->bi_bdev, b),

1472

(long long) bio->bi_sector);

1475

(long long) bio->bi_sector);

1473

goto end_io;

1476

goto end_io;

1474

}

1477

}

1475

1478

1476

if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&

1479

if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&

1477

nr_sectors > queue_max_hw_sectors(q))) {

1480

nr_sectors > queue_max_hw_sectors(q))) {

1478

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1481

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1479

bdevname(bio->bi_bdev, b),

1482

bdevname(bio->bi_bdev, b),

1480

bio_sectors(bio),

1483

bio_sectors(bio),

1481

queue_max_hw_sectors(q));

1484

queue_max_hw_sectors(q));

1482

goto end_io;

1485

goto end_io;

1483

}

1486

}

1484

1487

1485

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1488

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1486

goto end_io;

1489

goto end_io;

1487

1490

1488

if (should_fail_request(bio))

1491

if (should_fail_request(bio))

1489

goto end_io;

1492

goto end_io;

1490

1493

1491

/*

1494

/*

1492

* If this device has partitions, remap block n

1495

* If this device has partitions, remap block n

1493

* of partition p to block n+start(p) of the disk.

1496

* of partition p to block n+start(p) of the disk.

1494

*/

1497

*/

1495

blk_partition_remap(bio);

1498

blk_partition_remap(bio);

1496

1499

1497

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1500

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1498

goto end_io;

1501

goto end_io;

1499

1502

1500

if (old_sector != -1)

1503

if (old_sector != -1)

1501

trace_block_bio_remap(q, bio, old_dev, old_sector);

1504

trace_block_bio_remap(q, bio, old_dev, old_sector);

1502

1505

1503

old_sector = bio->bi_sector;

1506

old_sector = bio->bi_sector;

1504

old_dev = bio->bi_bdev->bd_dev;

1507

old_dev = bio->bi_bdev->bd_dev;

1505

1508

1506

if (bio_check_eod(bio, nr_sectors))

1509

if (bio_check_eod(bio, nr_sectors))

1507

goto end_io;

1510

goto end_io;

1508

1511

1509

/*

1512

/*

1510

* Filter flush bio's early so that make_request based

1513

* Filter flush bio's early so that make_request based

1511

* drivers without flush support don't have to worry

1514

* drivers without flush support don't have to worry

1512

* about them.

1515

* about them.

1513

*/

1516

*/

1514

if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {

1517

if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {

1515

bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);

1518

bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);

1516

if (!nr_sectors) {

1519

if (!nr_sectors) {

1517

err = 0;

1520

err = 0;

1518

goto end_io;

1521

goto end_io;

1519

}

1522

}

1520

}

1523

}

1521

1524

1522

if ((bio->bi_rw & REQ_DISCARD) &&

1525

if ((bio->bi_rw & REQ_DISCARD) &&

1523

(!blk_queue_discard(q) ||

1526

(!blk_queue_discard(q) ||

1524

((bio->bi_rw & REQ_SECURE) &&

1527

((bio->bi_rw & REQ_SECURE) &&

1525

!blk_queue_secdiscard(q)))) {

1528

!blk_queue_secdiscard(q)))) {

1526

err = -EOPNOTSUPP;

1529

err = -EOPNOTSUPP;

1527

goto end_io;

1530

goto end_io;

1528

}

1531

}

1529

1532

1530

if (blk_throtl_bio(q, &bio))

1533

if (blk_throtl_bio(q, &bio))

1531

goto end_io;

1534

goto end_io;

1532

1535

1533

/*

1536

/*

1534

* If bio = NULL, bio has been throttled and will be submitted

1537

* If bio = NULL, bio has been throttled and will be submitted

1535

* later.

1538

* later.

1536

*/

1539

*/

1537

if (!bio)

1540

if (!bio)

1538

break;

1541

break;

1539

1542

1540

trace_block_bio_queue(q, bio);

1543

trace_block_bio_queue(q, bio);

1541

1544

1542

ret = q->make_request_fn(q, bio);

1545

ret = q->make_request_fn(q, bio);

1543

} while (ret);

1546

} while (ret);

1544

1547

1545

return;

1548

return;

1546

1549

1547

end_io:

1550

end_io:

1548

bio_endio(bio, err);

1551

bio_endio(bio, err);

1549

}

1552

}

1550

1553

1551

/*

1554

/*

1552

* We only want one ->make_request_fn to be active at a time,

1555

* We only want one ->make_request_fn to be active at a time,

1553

* else stack usage with stacked devices could be a problem.

1556

* else stack usage with stacked devices could be a problem.

1554

* So use current->bio_list to keep a list of requests

1557

* So use current->bio_list to keep a list of requests

1555

* submited by a make_request_fn function.

1558

* submited by a make_request_fn function.

1556

* current->bio_list is also used as a flag to say if

1559

* current->bio_list is also used as a flag to say if

1557

* generic_make_request is currently active in this task or not.

1560

* generic_make_request is currently active in this task or not.

1558

* If it is NULL, then no make_request is active. If it is non-NULL,

1561

* If it is NULL, then no make_request is active. If it is non-NULL,

1559

* then a make_request is active, and new requests should be added

1562

* then a make_request is active, and new requests should be added

1560

* at the tail

1563

* at the tail

1561

*/

1564

*/

1562

void generic_make_request(struct bio *bio)

1565

void generic_make_request(struct bio *bio)

1563

{

1566

{

1564

struct bio_list bio_list_on_stack;

1567

struct bio_list bio_list_on_stack;

1565

1568

1566

if (current->bio_list) {

1569

if (current->bio_list) {

1567

/* make_request is active */

1570

/* make_request is active */

1568

bio_list_add(current->bio_list, bio);

1571

bio_list_add(current->bio_list, bio);

1569

return;

1572

return;

1570

}

1573

}

1571

/* following loop may be a bit non-obvious, and so deserves some

1574

/* following loop may be a bit non-obvious, and so deserves some

1572

* explanation.

1575

* explanation.

1573

* Before entering the loop, bio->bi_next is NULL (as all callers

1576

* Before entering the loop, bio->bi_next is NULL (as all callers

1574

* ensure that) so we have a list with a single bio.

1577

* ensure that) so we have a list with a single bio.

1575

* We pretend that we have just taken it off a longer list, so

1578

* We pretend that we have just taken it off a longer list, so

1576

* we assign bio_list to a pointer to the bio_list_on_stack,

1579

* we assign bio_list to a pointer to the bio_list_on_stack,

1577

* thus initialising the bio_list of new bios to be

1580

* thus initialising the bio_list of new bios to be

1578

* added. __generic_make_request may indeed add some more bios

1581

* added. __generic_make_request may indeed add some more bios

1579

* through a recursive call to generic_make_request. If it

1582

* through a recursive call to generic_make_request. If it

1580

* did, we find a non-NULL value in bio_list and re-enter the loop

1583

* did, we find a non-NULL value in bio_list and re-enter the loop

1581

* from the top. In this case we really did just take the bio

1584

* from the top. In this case we really did just take the bio

1582

* of the top of the list (no pretending) and so remove it from

1585

* of the top of the list (no pretending) and so remove it from

1583

* bio_list, and call into __generic_make_request again.

1586

* bio_list, and call into __generic_make_request again.

1584

*

1587

*

1585

* The loop was structured like this to make only one call to

1588

* The loop was structured like this to make only one call to

1586

* __generic_make_request (which is important as it is large and

1589

* __generic_make_request (which is important as it is large and

1587

* inlined) and to keep the structure simple.

1590

* inlined) and to keep the structure simple.

1588

*/

1591

*/

1589

BUG_ON(bio->bi_next);

1592

BUG_ON(bio->bi_next);

1590

bio_list_init(&bio_list_on_stack);

1593

bio_list_init(&bio_list_on_stack);

1591

current->bio_list = &bio_list_on_stack;

1594

current->bio_list = &bio_list_on_stack;

1592

do {

1595

do {

1593

__generic_make_request(bio);

1596

__generic_make_request(bio);

1594

bio = bio_list_pop(current->bio_list);

1597

bio = bio_list_pop(current->bio_list);

1595

} while (bio);

1598

} while (bio);

1596

current->bio_list = NULL; /* deactivate */

1599

current->bio_list = NULL; /* deactivate */

1597

}

1600

}

1598

EXPORT_SYMBOL(generic_make_request);

1601

EXPORT_SYMBOL(generic_make_request);

1599

1602

1600

/**

1603

/**

1601

* submit_bio - submit a bio to the block device layer for I/O

1604

* submit_bio - submit a bio to the block device layer for I/O

1602

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1605

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1603

* @bio: The &struct bio which describes the I/O

1606

* @bio: The &struct bio which describes the I/O

1604

*

1607

*

1605

* submit_bio() is very similar in purpose to generic_make_request(), and

1608

* submit_bio() is very similar in purpose to generic_make_request(), and

1606

* uses that function to do most of the work. Both are fairly rough

1609

* uses that function to do most of the work. Both are fairly rough

1607

* interfaces; @bio must be presetup and ready for I/O.

1610

* interfaces; @bio must be presetup and ready for I/O.

1608

*

1611

*

1609

*/

1612

*/

1610

void submit_bio(int rw, struct bio *bio)

1613

void submit_bio(int rw, struct bio *bio)

1611

{

1614

{

1612

int count = bio_sectors(bio);

1615

int count = bio_sectors(bio);

1613

1616

1614

bio->bi_rw |= rw;

1617

bio->bi_rw |= rw;

1615

1618

1616

/*

1619

/*

1617

* If it's a regular read/write or a barrier with data attached,

1620

* If it's a regular read/write or a barrier with data attached,

1618

* go through the normal accounting stuff before submission.

1621

* go through the normal accounting stuff before submission.

1619

*/

1622

*/

1620

if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {

1623

if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {

1621

if (rw & WRITE) {

1624

if (rw & WRITE) {

1622

count_vm_events(PGPGOUT, count);

1625

count_vm_events(PGPGOUT, count);

1623

} else {

1626

} else {

1624

task_io_account_read(bio->bi_size);

1627

task_io_account_read(bio->bi_size);

1625

count_vm_events(PGPGIN, count);

1628

count_vm_events(PGPGIN, count);

1626

}

1629

}

1627

1630

1628

if (unlikely(block_dump)) {

1631

if (unlikely(block_dump)) {

1629

char b[BDEVNAME_SIZE];

1632

char b[BDEVNAME_SIZE];

1630

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",

1633

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",

1631

current->comm, task_pid_nr(current),

1634

current->comm, task_pid_nr(current),

1632

(rw & WRITE) ? "WRITE" : "READ",

1635

(rw & WRITE) ? "WRITE" : "READ",

1633

(unsigned long long)bio->bi_sector,

1636

(unsigned long long)bio->bi_sector,

1634

bdevname(bio->bi_bdev, b),

1637

bdevname(bio->bi_bdev, b),

1635

count);

1638

count);

1636

}

1639

}

1637

}

1640

}

1638

1641

1639

generic_make_request(bio);

1642

generic_make_request(bio);

1640

}

1643

}

1641

EXPORT_SYMBOL(submit_bio);

1644

EXPORT_SYMBOL(submit_bio);

1642

1645

1643

/**

1646

/**

1644

* blk_rq_check_limits - Helper function to check a request for the queue limit

1647

* blk_rq_check_limits - Helper function to check a request for the queue limit

1645

* @q: the queue

1648

* @q: the queue

1646

* @rq: the request being checked

1649

* @rq: the request being checked

1647

*

1650

*

1648

* Description:

1651

* Description:

1649

* @rq may have been made based on weaker limitations of upper-level queues

1652

* @rq may have been made based on weaker limitations of upper-level queues

1650

* in request stacking drivers, and it may violate the limitation of @q.

1653

* in request stacking drivers, and it may violate the limitation of @q.

1651

* Since the block layer and the underlying device driver trust @rq

1654

* Since the block layer and the underlying device driver trust @rq

1652

* after it is inserted to @q, it should be checked against @q before

1655

* after it is inserted to @q, it should be checked against @q before

1653

* the insertion using this generic function.

1656

* the insertion using this generic function.

1654

*

1657

*

1655

* This function should also be useful for request stacking drivers

1658

* This function should also be useful for request stacking drivers

1656

* in some cases below, so export this function.

1659

* in some cases below, so export this function.

1657

* Request stacking drivers like request-based dm may change the queue

1660

* Request stacking drivers like request-based dm may change the queue

1658

* limits while requests are in the queue (e.g. dm's table swapping).

1661

* limits while requests are in the queue (e.g. dm's table swapping).

1659

* Such request stacking drivers should check those requests agaist

1662

* Such request stacking drivers should check those requests agaist

1660

* the new queue limits again when they dispatch those requests,

1663

* the new queue limits again when they dispatch those requests,

1661

* although such checkings are also done against the old queue limits

1664

* although such checkings are also done against the old queue limits

1662

* when submitting requests.

1665

* when submitting requests.

1663

*/

1666

*/

1664

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1667

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1665

{

1668

{

1666

if (rq->cmd_flags & REQ_DISCARD)

1669

if (rq->cmd_flags & REQ_DISCARD)

1667

return 0;

1670

return 0;

1668

1671

1669

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1672

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1670

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1673

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1671

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1674

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1672

return -EIO;

1675

return -EIO;

1673

}

1676

}

1674

1677

1675

/*

1678

/*

1676

* queue's settings related to segment counting like q->bounce_pfn

1679

* queue's settings related to segment counting like q->bounce_pfn

1677

* may differ from that of other stacking queues.

1680

* may differ from that of other stacking queues.

1678

* Recalculate it to check the request correctly on this queue's

1681

* Recalculate it to check the request correctly on this queue's

1679

* limitation.

1682

* limitation.

1680

*/

1683

*/

1681

blk_recalc_rq_segments(rq);

1684

blk_recalc_rq_segments(rq);

1682

if (rq->nr_phys_segments > queue_max_segments(q)) {

1685

if (rq->nr_phys_segments > queue_max_segments(q)) {

1683

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1686

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1684

return -EIO;

1687

return -EIO;

1685

}

1688

}

1686

1689

1687

return 0;

1690

return 0;

1688

}

1691

}

1689

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1692

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1690

1693

1691

/**

1694

/**

1692

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1695

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1693

* @q: the queue to submit the request

1696

* @q: the queue to submit the request

1694

* @rq: the request being queued

1697

* @rq: the request being queued

1695

*/

1698

*/

1696

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1699

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1697

{

1700

{

1698

unsigned long flags;

1701

unsigned long flags;

1699

1702

1700

if (blk_rq_check_limits(q, rq))

1703

if (blk_rq_check_limits(q, rq))

1701

return -EIO;

1704

return -EIO;

1702

1705

1703

#ifdef CONFIG_FAIL_MAKE_REQUEST

1706

#ifdef CONFIG_FAIL_MAKE_REQUEST

1704

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1707

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1705

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1708

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1706

return -EIO;

1709

return -EIO;

1707

#endif

1710

#endif

1708

1711

1709

spin_lock_irqsave(q->queue_lock, flags);

1712

spin_lock_irqsave(q->queue_lock, flags);

1710

1713

1711

/*

1714

/*

1712

* Submitting request must be dequeued before calling this function

1715

* Submitting request must be dequeued before calling this function

1713

* because it will be linked to another request_queue

1716

* because it will be linked to another request_queue

1714

*/

1717

*/

1715

BUG_ON(blk_queued_rq(rq));

1718

BUG_ON(blk_queued_rq(rq));

1716

1719

1717

add_acct_request(q, rq, ELEVATOR_INSERT_BACK);

1720

add_acct_request(q, rq, ELEVATOR_INSERT_BACK);

1718

spin_unlock_irqrestore(q->queue_lock, flags);

1721

spin_unlock_irqrestore(q->queue_lock, flags);

1719

1722

1720

return 0;

1723

return 0;

1721

}

1724

}

1722

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1725

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1723

1726

1724

/**

1727

/**

1725

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1728

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1726

* @rq: request to examine

1729

* @rq: request to examine

1727

*

1730

*

1728

* Description:

1731

* Description:

1729

* A request could be merge of IOs which require different failure

1732

* A request could be merge of IOs which require different failure

1730

* handling. This function determines the number of bytes which

1733

* handling. This function determines the number of bytes which

1731

* can be failed from the beginning of the request without

1734

* can be failed from the beginning of the request without

1732

* crossing into area which need to be retried further.

1735

* crossing into area which need to be retried further.

1733

*

1736

*

1734

* Return:

1737

* Return:

1735

* The number of bytes to fail.

1738

* The number of bytes to fail.

1736

*

1739

*

1737

* Context:

1740

* Context:

1738

* queue_lock must be held.

1741

* queue_lock must be held.

1739

*/

1742

*/

1740

unsigned int blk_rq_err_bytes(const struct request *rq)

1743

unsigned int blk_rq_err_bytes(const struct request *rq)

1741

{

1744

{

1742

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1745

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1743

unsigned int bytes = 0;

1746

unsigned int bytes = 0;

1744

struct bio *bio;

1747

struct bio *bio;

1745

1748

1746

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1749

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1747

return blk_rq_bytes(rq);

1750

return blk_rq_bytes(rq);

1748

1751

1749

/*

1752

/*

1750

* Currently the only 'mixing' which can happen is between

1753

* Currently the only 'mixing' which can happen is between

1751

* different fastfail types. We can safely fail portions

1754

* different fastfail types. We can safely fail portions

1752

* which have all the failfast bits that the first one has -

1755

* which have all the failfast bits that the first one has -

1753

* the ones which are at least as eager to fail as the first

1756

* the ones which are at least as eager to fail as the first

1754

* one.

1757

* one.

1755

*/

1758

*/

1756

for (bio = rq->bio; bio; bio = bio->bi_next) {

1759

for (bio = rq->bio; bio; bio = bio->bi_next) {

1757

if ((bio->bi_rw & ff) != ff)

1760

if ((bio->bi_rw & ff) != ff)

1758

break;

1761

break;

1759

bytes += bio->bi_size;

1762

bytes += bio->bi_size;

1760

}

1763

}

1761

1764

1762

/* this could lead to infinite loop */

1765

/* this could lead to infinite loop */

1763

BUG_ON(blk_rq_bytes(rq) && !bytes);

1766

BUG_ON(blk_rq_bytes(rq) && !bytes);

1764

return bytes;

1767

return bytes;

1765

}

1768

}

1766

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1769

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1767

1770

1768

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1771

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1769

{

1772

{

1770

if (blk_do_io_stat(req)) {

1773

if (blk_do_io_stat(req)) {

1771

const int rw = rq_data_dir(req);

1774

const int rw = rq_data_dir(req);

1772

struct hd_struct *part;

1775

struct hd_struct *part;

1773

int cpu;

1776

int cpu;

1774

1777

1775

cpu = part_stat_lock();

1778

cpu = part_stat_lock();

1776

part = req->part;

1779

part = req->part;

1777

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1780

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1778

part_stat_unlock();

1781

part_stat_unlock();

1779

}

1782

}

1780

}

1783

}

1781

1784

1782

static void blk_account_io_done(struct request *req)

1785

static void blk_account_io_done(struct request *req)

1783

{

1786

{

1784

/*

1787

/*

1785

* Account IO completion. flush_rq isn't accounted as a

1788

* Account IO completion. flush_rq isn't accounted as a

1786

* normal IO on queueing nor completion. Accounting the

1789

* normal IO on queueing nor completion. Accounting the

1787

* containing request is enough.

1790

* containing request is enough.

1788

*/

1791

*/

1789

if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {

1792

if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {

1790

unsigned long duration = jiffies - req->start_time;

1793

unsigned long duration = jiffies - req->start_time;

1791

const int rw = rq_data_dir(req);

1794

const int rw = rq_data_dir(req);

1792

struct hd_struct *part;

1795

struct hd_struct *part;

1793

int cpu;

1796

int cpu;

1794

1797

1795

cpu = part_stat_lock();

1798

cpu = part_stat_lock();

1796

part = req->part;

1799

part = req->part;

1797

1800

1798

part_stat_inc(cpu, part, ios[rw]);

1801

part_stat_inc(cpu, part, ios[rw]);

1799

part_stat_add(cpu, part, ticks[rw], duration);

1802

part_stat_add(cpu, part, ticks[rw], duration);

1800

part_round_stats(cpu, part);

1803

part_round_stats(cpu, part);

1801

part_dec_in_flight(part, rw);

1804

part_dec_in_flight(part, rw);

1802

1805

1803

hd_struct_put(part);

1806

hd_struct_put(part);

1804

part_stat_unlock();

1807

part_stat_unlock();

1805

}

1808

}

1806

}

1809

}

1807

1810

1808

/**

1811

/**

1809

* blk_peek_request - peek at the top of a request queue

1812

* blk_peek_request - peek at the top of a request queue

1810

* @q: request queue to peek at

1813

* @q: request queue to peek at

1811

*

1814

*

1812

* Description:

1815

* Description:

1813

* Return the request at the top of @q. The returned request

1816

* Return the request at the top of @q. The returned request

1814

* should be started using blk_start_request() before LLD starts

1817

* should be started using blk_start_request() before LLD starts

1815

* processing it.

1818

* processing it.

1816

*

1819

*

1817

* Return:

1820

* Return:

1818

* Pointer to the request at the top of @q if available. Null

1821

* Pointer to the request at the top of @q if available. Null

1819

* otherwise.

1822

* otherwise.

1820

*

1823

*

1821

* Context:

1824

* Context:

1822

* queue_lock must be held.

1825

* queue_lock must be held.

1823

*/

1826

*/

1824

struct request *blk_peek_request(struct request_queue *q)

1827

struct request *blk_peek_request(struct request_queue *q)

1825

{

1828

{

1826

struct request *rq;

1829

struct request *rq;

1827

int ret;

1830

int ret;

1828

1831

1829

while ((rq = __elv_next_request(q)) != NULL) {

1832

while ((rq = __elv_next_request(q)) != NULL) {

1830

if (!(rq->cmd_flags & REQ_STARTED)) {

1833

if (!(rq->cmd_flags & REQ_STARTED)) {

1831

/*

1834

/*

1832

* This is the first time the device driver

1835

* This is the first time the device driver

1833

* sees this request (possibly after

1836

* sees this request (possibly after

1834

* requeueing). Notify IO scheduler.

1837

* requeueing). Notify IO scheduler.

1835

*/

1838

*/

1836

if (rq->cmd_flags & REQ_SORTED)

1839

if (rq->cmd_flags & REQ_SORTED)

1837

elv_activate_rq(q, rq);

1840

elv_activate_rq(q, rq);

1838

1841

1839

/*

1842

/*

1840

* just mark as started even if we don't start

1843

* just mark as started even if we don't start

1841

* it, a request that has been delayed should

1844

* it, a request that has been delayed should

1842

* not be passed by new incoming requests

1845

* not be passed by new incoming requests

1843

*/

1846

*/

1844

rq->cmd_flags |= REQ_STARTED;

1847

rq->cmd_flags |= REQ_STARTED;

1845

trace_block_rq_issue(q, rq);

1848

trace_block_rq_issue(q, rq);

1846

}

1849

}

1847

1850

1848

if (!q->boundary_rq || q->boundary_rq == rq) {

1851

if (!q->boundary_rq || q->boundary_rq == rq) {

1849

q->end_sector = rq_end_sector(rq);

1852

q->end_sector = rq_end_sector(rq);

1850

q->boundary_rq = NULL;

1853

q->boundary_rq = NULL;

1851

}

1854

}

1852

1855

1853

if (rq->cmd_flags & REQ_DONTPREP)

1856

if (rq->cmd_flags & REQ_DONTPREP)

1854

break;

1857

break;

1855

1858

1856

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1859

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1857

/*

1860

/*

1858

* make sure space for the drain appears we

1861

* make sure space for the drain appears we

1859

* know we can do this because max_hw_segments

1862

* know we can do this because max_hw_segments

1860

* has been adjusted to be one fewer than the

1863

* has been adjusted to be one fewer than the

1861

* device can handle

1864

* device can handle

1862

*/

1865

*/

1863

rq->nr_phys_segments++;

1866

rq->nr_phys_segments++;

1864

}

1867

}

1865

1868

1866

if (!q->prep_rq_fn)

1869

if (!q->prep_rq_fn)

1867

break;

1870

break;

1868

1871

1869

ret = q->prep_rq_fn(q, rq);

1872

ret = q->prep_rq_fn(q, rq);

1870

if (ret == BLKPREP_OK) {

1873

if (ret == BLKPREP_OK) {

1871

break;

1874

break;

1872

} else if (ret == BLKPREP_DEFER) {

1875

} else if (ret == BLKPREP_DEFER) {

1873

/*

1876

/*

1874

* the request may have been (partially) prepped.

1877

* the request may have been (partially) prepped.

1875

* we need to keep this request in the front to

1878

* we need to keep this request in the front to

1876

* avoid resource deadlock. REQ_STARTED will

1879

* avoid resource deadlock. REQ_STARTED will

1877

* prevent other fs requests from passing this one.

1880

* prevent other fs requests from passing this one.

1878

*/

1881

*/

1879

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1882

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1880

!(rq->cmd_flags & REQ_DONTPREP)) {

1883

!(rq->cmd_flags & REQ_DONTPREP)) {

1881

/*

1884

/*

1882

* remove the space for the drain we added

1885

* remove the space for the drain we added

1883

* so that we don't add it again

1886

* so that we don't add it again

1884

*/

1887

*/

1885

--rq->nr_phys_segments;

1888

--rq->nr_phys_segments;

1886

}

1889

}

1887

1890

1888

rq = NULL;

1891

rq = NULL;

1889

break;

1892

break;

1890

} else if (ret == BLKPREP_KILL) {

1893

} else if (ret == BLKPREP_KILL) {

1891

rq->cmd_flags |= REQ_QUIET;

1894

rq->cmd_flags |= REQ_QUIET;

1892

/*

1895

/*

1893

* Mark this request as started so we don't trigger

1896

* Mark this request as started so we don't trigger

1894

* any debug logic in the end I/O path.

1897

* any debug logic in the end I/O path.

1895

*/

1898

*/

1896

blk_start_request(rq);

1899

blk_start_request(rq);

1897

__blk_end_request_all(rq, -EIO);

1900

__blk_end_request_all(rq, -EIO);

1898

} else {

1901

} else {

1899

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1902

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1900

break;

1903

break;

1901

}

1904

}

1902

}

1905

}

1903

1906

1904

return rq;

1907

return rq;

1905

}

1908

}

1906

EXPORT_SYMBOL(blk_peek_request);

1909

EXPORT_SYMBOL(blk_peek_request);

1907

1910

1908

void blk_dequeue_request(struct request *rq)

1911

void blk_dequeue_request(struct request *rq)

1909

{

1912

{

1910

struct request_queue *q = rq->q;

1913

struct request_queue *q = rq->q;

1911

1914

1912

BUG_ON(list_empty(&rq->queuelist));

1915

BUG_ON(list_empty(&rq->queuelist));

1913

BUG_ON(ELV_ON_HASH(rq));

1916

BUG_ON(ELV_ON_HASH(rq));

1914

1917

1915

list_del_init(&rq->queuelist);

1918

list_del_init(&rq->queuelist);

1916

1919

1917

/*

1920

/*

1918

* the time frame between a request being removed from the lists

1921

* the time frame between a request being removed from the lists

1919

* and to it is freed is accounted as io that is in progress at

1922

* and to it is freed is accounted as io that is in progress at

1920

* the driver side.

1923

* the driver side.

1921

*/

1924

*/

1922

if (blk_account_rq(rq)) {

1925

if (blk_account_rq(rq)) {

1923

q->in_flight[rq_is_sync(rq)]++;

1926

q->in_flight[rq_is_sync(rq)]++;

1924

set_io_start_time_ns(rq);

1927

set_io_start_time_ns(rq);

1925

}

1928

}

1926

}

1929

}

1927

1930

1928

/**

1931

/**

1929

* blk_start_request - start request processing on the driver

1932

* blk_start_request - start request processing on the driver

1930

* @req: request to dequeue

1933

* @req: request to dequeue

1931

*

1934

*

1932

* Description:

1935

* Description:

1933

* Dequeue @req and start timeout timer on it. This hands off the

1936

* Dequeue @req and start timeout timer on it. This hands off the

1934

* request to the driver.

1937

* request to the driver.

1935

*

1938

*

1936

* Block internal functions which don't want to start timer should

1939

* Block internal functions which don't want to start timer should

1937

* call blk_dequeue_request().

1940

* call blk_dequeue_request().

1938

*

1941

*

1939

* Context:

1942

* Context:

1940

* queue_lock must be held.

1943

* queue_lock must be held.

1941

*/

1944

*/

1942

void blk_start_request(struct request *req)

1945

void blk_start_request(struct request *req)

1943

{

1946

{

1944

blk_dequeue_request(req);

1947

blk_dequeue_request(req);

1945

1948

1946

/*

1949

/*

1947

* We are now handing the request to the hardware, initialize

1950

* We are now handing the request to the hardware, initialize

1948

* resid_len to full count and add the timeout handler.

1951

* resid_len to full count and add the timeout handler.

1949

*/

1952

*/

1950

req->resid_len = blk_rq_bytes(req);

1953

req->resid_len = blk_rq_bytes(req);

1951

if (unlikely(blk_bidi_rq(req)))

1954

if (unlikely(blk_bidi_rq(req)))

1952

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1955

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1953

1956

1954

blk_add_timer(req);

1957

blk_add_timer(req);

1955

}

1958

}

1956

EXPORT_SYMBOL(blk_start_request);

1959

EXPORT_SYMBOL(blk_start_request);

1957

1960

1958

/**

1961

/**

1959

* blk_fetch_request - fetch a request from a request queue

1962

* blk_fetch_request - fetch a request from a request queue

1960

* @q: request queue to fetch a request from

1963

* @q: request queue to fetch a request from

1961

*

1964

*

1962

* Description:

1965

* Description:

1963

* Return the request at the top of @q. The request is started on

1966

* Return the request at the top of @q. The request is started on

1964

* return and LLD can start processing it immediately.

1967

* return and LLD can start processing it immediately.

1965

*

1968

*

1966

* Return:

1969

* Return:

1967

* Pointer to the request at the top of @q if available. Null

1970

* Pointer to the request at the top of @q if available. Null

1968

* otherwise.

1971

* otherwise.

1969

*

1972

*

1970

* Context:

1973

* Context:

1971

* queue_lock must be held.

1974

* queue_lock must be held.

1972

*/

1975

*/

1973

struct request *blk_fetch_request(struct request_queue *q)

1976

struct request *blk_fetch_request(struct request_queue *q)

1974

{

1977

{

1975

struct request *rq;

1978

struct request *rq;

1976

1979

1977

rq = blk_peek_request(q);

1980

rq = blk_peek_request(q);

1978

if (rq)

1981

if (rq)

1979

blk_start_request(rq);

1982

blk_start_request(rq);

1980

return rq;

1983

return rq;

1981

}

1984

}

1982

EXPORT_SYMBOL(blk_fetch_request);

1985

EXPORT_SYMBOL(blk_fetch_request);

1983

1986

1984

/**

1987

/**

1985

* blk_update_request - Special helper function for request stacking drivers

1988

* blk_update_request - Special helper function for request stacking drivers

1986

* @req: the request being processed

1989

* @req: the request being processed

1987

* @error: %0 for success, < %0 for error

1990

* @error: %0 for success, < %0 for error

1988

* @nr_bytes: number of bytes to complete @req

1991

* @nr_bytes: number of bytes to complete @req

1989

*

1992

*

1990

* Description:

1993

* Description:

1991

* Ends I/O on a number of bytes attached to @req, but doesn't complete

1994

* Ends I/O on a number of bytes attached to @req, but doesn't complete

1992

* the request structure even if @req doesn't have leftover.

1995

* the request structure even if @req doesn't have leftover.

1993

* If @req has leftover, sets it up for the next range of segments.

1996

* If @req has leftover, sets it up for the next range of segments.

1994

*

1997

*

1995

* This special helper function is only for request stacking drivers

1998

* This special helper function is only for request stacking drivers

1996

* (e.g. request-based dm) so that they can handle partial completion.

1999

* (e.g. request-based dm) so that they can handle partial completion.

1997

* Actual device drivers should use blk_end_request instead.

2000

* Actual device drivers should use blk_end_request instead.

1998

*

2001

*

1999

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

2002

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

2000

* %false return from this function.

2003

* %false return from this function.

2001

*

2004

*

2002

* Return:

2005

* Return:

2003

* %false - this request doesn't have any more data

2006

* %false - this request doesn't have any more data

2004

* %true - this request has more data

2007

* %true - this request has more data

2005

**/

2008

**/

2006

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

2009

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

2007

{

2010

{

2008

int total_bytes, bio_nbytes, next_idx = 0;

2011

int total_bytes, bio_nbytes, next_idx = 0;

2009

struct bio *bio;

2012

struct bio *bio;

2010

2013

2011

if (!req->bio)

2014

if (!req->bio)

2012

return false;

2015

return false;

2013

2016

2014

trace_block_rq_complete(req->q, req);

2017

trace_block_rq_complete(req->q, req);

2015

2018

2016

/*

2019

/*

2017

* For fs requests, rq is just carrier of independent bio's

2020

* For fs requests, rq is just carrier of independent bio's

2018

* and each partial completion should be handled separately.

2021

* and each partial completion should be handled separately.

2019

* Reset per-request error on each partial completion.

2022

* Reset per-request error on each partial completion.

2020

*

2023

*

2021

* TODO: tj: This is too subtle. It would be better to let

2024

* TODO: tj: This is too subtle. It would be better to let

2022

* low level drivers do what they see fit.

2025

* low level drivers do what they see fit.

2023

*/

2026

*/

2024

if (req->cmd_type == REQ_TYPE_FS)

2027

if (req->cmd_type == REQ_TYPE_FS)

2025

req->errors = 0;

2028

req->errors = 0;

2026

2029

2027

if (error && req->cmd_type == REQ_TYPE_FS &&

2030

if (error && req->cmd_type == REQ_TYPE_FS &&

2028

!(req->cmd_flags & REQ_QUIET)) {

2031

!(req->cmd_flags & REQ_QUIET)) {

2029

char *error_type;

2032

char *error_type;

2030

2033

2031

switch (error) {

2034

switch (error) {

2032

case -ENOLINK:

2035

case -ENOLINK:

2033

error_type = "recoverable transport";

2036

error_type = "recoverable transport";

2034

break;

2037

break;

2035

case -EREMOTEIO:

2038

case -EREMOTEIO:

2036

error_type = "critical target";

2039

error_type = "critical target";

2037

break;

2040

break;

2038

case -EBADE:

2041

case -EBADE:

2039

error_type = "critical nexus";

2042

error_type = "critical nexus";

2040

break;

2043

break;

2041

case -EIO:

2044

case -EIO:

2042

default:

2045

default:

2043

error_type = "I/O";

2046

error_type = "I/O";

2044

break;

2047

break;

2045

}

2048

}

2046

printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",

2049

printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",

2047

error_type, req->rq_disk ? req->rq_disk->disk_name : "?",

2050

error_type, req->rq_disk ? req->rq_disk->disk_name : "?",

2048

(unsigned long long)blk_rq_pos(req));

2051

(unsigned long long)blk_rq_pos(req));

2049

}

2052

}

2050

2053

2051

blk_account_io_completion(req, nr_bytes);

2054

blk_account_io_completion(req, nr_bytes);

2052

2055

2053

total_bytes = bio_nbytes = 0;

2056

total_bytes = bio_nbytes = 0;

2054

while ((bio = req->bio) != NULL) {

2057

while ((bio = req->bio) != NULL) {

2055

int nbytes;

2058

int nbytes;

2056

2059

2057

if (nr_bytes >= bio->bi_size) {

2060

if (nr_bytes >= bio->bi_size) {

2058

req->bio = bio->bi_next;

2061

req->bio = bio->bi_next;

2059

nbytes = bio->bi_size;

2062

nbytes = bio->bi_size;

2060

req_bio_endio(req, bio, nbytes, error);

2063

req_bio_endio(req, bio, nbytes, error);

2061

next_idx = 0;

2064

next_idx = 0;

2062

bio_nbytes = 0;

2065

bio_nbytes = 0;

2063

} else {

2066

} else {

2064

int idx = bio->bi_idx + next_idx;

2067

int idx = bio->bi_idx + next_idx;

2065

2068

2066

if (unlikely(idx >= bio->bi_vcnt)) {

2069

if (unlikely(idx >= bio->bi_vcnt)) {

2067

blk_dump_rq_flags(req, "__end_that");

2070

blk_dump_rq_flags(req, "__end_that");

2068

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2071

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2069

__func__, idx, bio->bi_vcnt);

2072

__func__, idx, bio->bi_vcnt);

2070

break;

2073

break;

2071

}

2074

}

2072

2075

2073

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2076

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2074

BIO_BUG_ON(nbytes > bio->bi_size);

2077

BIO_BUG_ON(nbytes > bio->bi_size);

2075

2078

2076

/*

2079

/*

2077

* not a complete bvec done

2080

* not a complete bvec done

2078

*/

2081

*/

2079

if (unlikely(nbytes > nr_bytes)) {

2082

if (unlikely(nbytes > nr_bytes)) {

2080

bio_nbytes += nr_bytes;

2083

bio_nbytes += nr_bytes;

2081

total_bytes += nr_bytes;

2084

total_bytes += nr_bytes;

2082

break;

2085

break;

2083

}

2086

}

2084

2087

2085

/*

2088

/*

2086

* advance to the next vector

2089

* advance to the next vector

2087

*/

2090

*/

2088

next_idx++;

2091

next_idx++;

2089

bio_nbytes += nbytes;

2092

bio_nbytes += nbytes;

2090

}

2093

}

2091

2094

2092

total_bytes += nbytes;

2095

total_bytes += nbytes;

2093

nr_bytes -= nbytes;

2096

nr_bytes -= nbytes;

2094

2097

2095

bio = req->bio;

2098

bio = req->bio;

2096

if (bio) {

2099

if (bio) {

2097

/*

2100

/*

2098

* end more in this run, or just return 'not-done'

2101

* end more in this run, or just return 'not-done'

2099

*/

2102

*/

2100

if (unlikely(nr_bytes <= 0))

2103

if (unlikely(nr_bytes <= 0))

2101

break;

2104

break;

2102

}

2105

}

2103

}

2106

}

2104

2107

2105

/*

2108

/*

2106

* completely done

2109

* completely done

2107

*/

2110

*/

2108

if (!req->bio) {

2111

if (!req->bio) {

2109

/*

2112

/*

2110

* Reset counters so that the request stacking driver

2113

* Reset counters so that the request stacking driver

2111

* can find how many bytes remain in the request

2114

* can find how many bytes remain in the request

2112

* later.

2115

* later.

2113

*/

2116

*/

2114

req->__data_len = 0;

2117

req->__data_len = 0;

2115

return false;

2118

return false;

2116

}

2119

}

2117

2120

2118

/*

2121

/*

2119

* if the request wasn't completed, update state

2122

* if the request wasn't completed, update state

2120

*/

2123

*/

2121

if (bio_nbytes) {

2124

if (bio_nbytes) {

2122

req_bio_endio(req, bio, bio_nbytes, error);

2125

req_bio_endio(req, bio, bio_nbytes, error);

2123

bio->bi_idx += next_idx;

2126

bio->bi_idx += next_idx;

2124

bio_iovec(bio)->bv_offset += nr_bytes;

2127

bio_iovec(bio)->bv_offset += nr_bytes;

2125

bio_iovec(bio)->bv_len -= nr_bytes;

2128

bio_iovec(bio)->bv_len -= nr_bytes;

2126

}

2129

}

2127

2130

2128

req->__data_len -= total_bytes;

2131

req->__data_len -= total_bytes;

2129

req->buffer = bio_data(req->bio);

2132

req->buffer = bio_data(req->bio);

2130

2133

2131

/* update sector only for requests with clear definition of sector */

2134

/* update sector only for requests with clear definition of sector */

2132

if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))

2135

if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))

2133

req->__sector += total_bytes >> 9;

2136

req->__sector += total_bytes >> 9;

2134

2137

2135

/* mixed attributes always follow the first bio */

2138

/* mixed attributes always follow the first bio */

2136

if (req->cmd_flags & REQ_MIXED_MERGE) {

2139

if (req->cmd_flags & REQ_MIXED_MERGE) {

2137

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2140

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2138

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2141

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2139

}

2142

}

2140

2143

2141

/*

2144

/*

2142

* If total number of sectors is less than the first segment

2145

* If total number of sectors is less than the first segment

2143

* size, something has gone terribly wrong.

2146

* size, something has gone terribly wrong.

2144

*/

2147

*/

2145

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2148

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2146

blk_dump_rq_flags(req, "request botched");

2149

blk_dump_rq_flags(req, "request botched");

2147

req->__data_len = blk_rq_cur_bytes(req);

2150

req->__data_len = blk_rq_cur_bytes(req);

2148

}

2151

}

2149

2152

2150

/* recalculate the number of segments */

2153

/* recalculate the number of segments */

2151

blk_recalc_rq_segments(req);

2154

blk_recalc_rq_segments(req);

2152

2155

2153

return true;

2156

return true;

2154

}

2157

}

2155

EXPORT_SYMBOL_GPL(blk_update_request);

2158

EXPORT_SYMBOL_GPL(blk_update_request);

2156

2159

2157

static bool blk_update_bidi_request(struct request *rq, int error,

2160

static bool blk_update_bidi_request(struct request *rq, int error,

2158

unsigned int nr_bytes,

2161

unsigned int nr_bytes,

2159

unsigned int bidi_bytes)

2162

unsigned int bidi_bytes)

2160

{

2163

{

2161

if (blk_update_request(rq, error, nr_bytes))

2164

if (blk_update_request(rq, error, nr_bytes))

2162

return true;

2165

return true;

2163

2166

2164

/* Bidi request must be completed as a whole */

2167

/* Bidi request must be completed as a whole */

2165

if (unlikely(blk_bidi_rq(rq)) &&

2168

if (unlikely(blk_bidi_rq(rq)) &&

2166

blk_update_request(rq->next_rq, error, bidi_bytes))

2169

blk_update_request(rq->next_rq, error, bidi_bytes))

2167

return true;

2170

return true;

2168

2171

2169

if (blk_queue_add_random(rq->q))

2172

if (blk_queue_add_random(rq->q))

2170

add_disk_randomness(rq->rq_disk);

2173

add_disk_randomness(rq->rq_disk);

2171

2174

2172

return false;

2175

return false;

2173

}

2176

}

2174

2177

2175

/**

2178

/**

2176

* blk_unprep_request - unprepare a request

2179

* blk_unprep_request - unprepare a request

2177

* @req: the request

2180

* @req: the request

2178

*

2181

*

2179

* This function makes a request ready for complete resubmission (or

2182

* This function makes a request ready for complete resubmission (or

2180

* completion). It happens only after all error handling is complete,

2183

* completion). It happens only after all error handling is complete,

2181

* so represents the appropriate moment to deallocate any resources

2184

* so represents the appropriate moment to deallocate any resources

2182

* that were allocated to the request in the prep_rq_fn. The queue

2185

* that were allocated to the request in the prep_rq_fn. The queue

2183

* lock is held when calling this.

2186

* lock is held when calling this.

2184

*/

2187

*/

2185

void blk_unprep_request(struct request *req)

2188

void blk_unprep_request(struct request *req)

2186

{

2189

{

2187

struct request_queue *q = req->q;

2190

struct request_queue *q = req->q;

2188

2191

2189

req->cmd_flags &= ~REQ_DONTPREP;

2192

req->cmd_flags &= ~REQ_DONTPREP;

2190

if (q->unprep_rq_fn)

2193

if (q->unprep_rq_fn)

2191

q->unprep_rq_fn(q, req);

2194

q->unprep_rq_fn(q, req);

2192

}

2195

}

2193

EXPORT_SYMBOL_GPL(blk_unprep_request);

2196

EXPORT_SYMBOL_GPL(blk_unprep_request);

2194

2197

2195

/*

2198

/*

2196

* queue lock must be held

2199

* queue lock must be held

2197

*/

2200

*/

2198

static void blk_finish_request(struct request *req, int error)

2201

static void blk_finish_request(struct request *req, int error)

2199

{

2202

{

2200

if (blk_rq_tagged(req))

2203

if (blk_rq_tagged(req))

2201

blk_queue_end_tag(req->q, req);

2204

blk_queue_end_tag(req->q, req);

2202

2205

2203

BUG_ON(blk_queued_rq(req));

2206

BUG_ON(blk_queued_rq(req));

2204

2207

2205

if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)

2208

if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)

2206

laptop_io_completion(&req->q->backing_dev_info);

2209

laptop_io_completion(&req->q->backing_dev_info);

2207

2210

2208

blk_delete_timer(req);

2211

blk_delete_timer(req);

2209

2212

2210

if (req->cmd_flags & REQ_DONTPREP)

2213

if (req->cmd_flags & REQ_DONTPREP)

2211

blk_unprep_request(req);

2214

blk_unprep_request(req);

2212

2215

2213

2216

2214

blk_account_io_done(req);

2217

blk_account_io_done(req);

2215

2218

2216

if (req->end_io)

2219

if (req->end_io)

2217

req->end_io(req, error);

2220

req->end_io(req, error);

2218

else {

2221

else {

2219

if (blk_bidi_rq(req))

2222

if (blk_bidi_rq(req))

2220

__blk_put_request(req->next_rq->q, req->next_rq);

2223

__blk_put_request(req->next_rq->q, req->next_rq);

2221

2224

2222

__blk_put_request(req->q, req);

2225

__blk_put_request(req->q, req);

2223

}

2226

}

2224

}

2227

}

2225

2228

2226

/**

2229

/**

2227

* blk_end_bidi_request - Complete a bidi request

2230

* blk_end_bidi_request - Complete a bidi request

2228

* @rq: the request to complete

2231

* @rq: the request to complete

2229

* @error: %0 for success, < %0 for error

2232

* @error: %0 for success, < %0 for error

2230

* @nr_bytes: number of bytes to complete @rq

2233

* @nr_bytes: number of bytes to complete @rq

2231

* @bidi_bytes: number of bytes to complete @rq->next_rq

2234

* @bidi_bytes: number of bytes to complete @rq->next_rq

2232

*

2235

*

2233

* Description:

2236

* Description:

2234

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2237

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2235

* Drivers that supports bidi can safely call this member for any

2238

* Drivers that supports bidi can safely call this member for any

2236

* type of request, bidi or uni. In the later case @bidi_bytes is

2239

* type of request, bidi or uni. In the later case @bidi_bytes is

2237

* just ignored.

2240

* just ignored.

2238

*

2241

*

2239

* Return:

2242

* Return:

2240

* %false - we are done with this request

2243

* %false - we are done with this request

2241

* %true - still buffers pending for this request

2244

* %true - still buffers pending for this request

2242

**/

2245

**/

2243

static bool blk_end_bidi_request(struct request *rq, int error,

2246

static bool blk_end_bidi_request(struct request *rq, int error,

2244

unsigned int nr_bytes, unsigned int bidi_bytes)

2247

unsigned int nr_bytes, unsigned int bidi_bytes)

2245

{

2248

{

2246

struct request_queue *q = rq->q;

2249

struct request_queue *q = rq->q;

2247

unsigned long flags;

2250

unsigned long flags;

2248

2251

2249

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2252

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2250

return true;

2253

return true;

2251

2254

2252

spin_lock_irqsave(q->queue_lock, flags);

2255

spin_lock_irqsave(q->queue_lock, flags);

2253

blk_finish_request(rq, error);

2256

blk_finish_request(rq, error);

2254

spin_unlock_irqrestore(q->queue_lock, flags);

2257

spin_unlock_irqrestore(q->queue_lock, flags);

2255

2258

2256

return false;

2259

return false;

2257

}

2260

}

2258

2261

2259

/**

2262

/**

2260

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2263

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2261

* @rq: the request to complete

2264

* @rq: the request to complete

2262

* @error: %0 for success, < %0 for error

2265

* @error: %0 for success, < %0 for error

2263

* @nr_bytes: number of bytes to complete @rq

2266

* @nr_bytes: number of bytes to complete @rq

2264

* @bidi_bytes: number of bytes to complete @rq->next_rq

2267

* @bidi_bytes: number of bytes to complete @rq->next_rq

2265

*

2268

*

2266

* Description:

2269

* Description:

2267

* Identical to blk_end_bidi_request() except that queue lock is

2270

* Identical to blk_end_bidi_request() except that queue lock is

2268

* assumed to be locked on entry and remains so on return.

2271

* assumed to be locked on entry and remains so on return.

2269

*

2272

*

2270

* Return:

2273

* Return:

2271

* %false - we are done with this request

2274

* %false - we are done with this request

2272

* %true - still buffers pending for this request

2275

* %true - still buffers pending for this request

2273

**/

2276

**/

2274

static bool __blk_end_bidi_request(struct request *rq, int error,

2277

static bool __blk_end_bidi_request(struct request *rq, int error,

2275

unsigned int nr_bytes, unsigned int bidi_bytes)

2278

unsigned int nr_bytes, unsigned int bidi_bytes)

2276

{

2279

{

2277

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2280

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2278

return true;

2281

return true;

2279

2282

2280

blk_finish_request(rq, error);

2283

blk_finish_request(rq, error);

2281

2284

2282

return false;

2285

return false;

2283

}

2286

}

2284

2287

2285

/**

2288

/**

2286

* blk_end_request - Helper function for drivers to complete the request.

2289

* blk_end_request - Helper function for drivers to complete the request.

2287

* @rq: the request being processed

2290

* @rq: the request being processed

2288

* @error: %0 for success, < %0 for error

2291

* @error: %0 for success, < %0 for error

2289

* @nr_bytes: number of bytes to complete

2292

* @nr_bytes: number of bytes to complete

2290

*

2293

*

2291

* Description:

2294

* Description:

2292

* Ends I/O on a number of bytes attached to @rq.

2295

* Ends I/O on a number of bytes attached to @rq.

2293

* If @rq has leftover, sets it up for the next range of segments.

2296

* If @rq has leftover, sets it up for the next range of segments.

2294

*

2297

*

2295

* Return:

2298

* Return:

2296

* %false - we are done with this request

2299

* %false - we are done with this request

2297

* %true - still buffers pending for this request

2300

* %true - still buffers pending for this request

2298

**/

2301

**/

2299

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2302

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2300

{

2303

{

2301

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2304

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2302

}

2305

}

2303

EXPORT_SYMBOL(blk_end_request);

2306

EXPORT_SYMBOL(blk_end_request);

2304

2307

2305

/**

2308

/**

2306

* blk_end_request_all - Helper function for drives to finish the request.

2309

* blk_end_request_all - Helper function for drives to finish the request.

2307

* @rq: the request to finish

2310

* @rq: the request to finish

2308

* @error: %0 for success, < %0 for error

2311

* @error: %0 for success, < %0 for error

2309

*

2312

*

2310

* Description:

2313

* Description:

2311

* Completely finish @rq.

2314

* Completely finish @rq.

2312

*/

2315

*/

2313

void blk_end_request_all(struct request *rq, int error)

2316

void blk_end_request_all(struct request *rq, int error)

2314

{

2317

{

2315

bool pending;

2318

bool pending;

2316

unsigned int bidi_bytes = 0;

2319

unsigned int bidi_bytes = 0;

2317

2320

2318

if (unlikely(blk_bidi_rq(rq)))

2321

if (unlikely(blk_bidi_rq(rq)))

2319

bidi_bytes = blk_rq_bytes(rq->next_rq);

2322

bidi_bytes = blk_rq_bytes(rq->next_rq);

2320

2323

2321

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2324

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2322

BUG_ON(pending);

2325

BUG_ON(pending);

2323

}

2326

}

2324

EXPORT_SYMBOL(blk_end_request_all);

2327

EXPORT_SYMBOL(blk_end_request_all);

2325

2328

2326

/**

2329

/**

2327

* blk_end_request_cur - Helper function to finish the current request chunk.

2330

* blk_end_request_cur - Helper function to finish the current request chunk.

2328

* @rq: the request to finish the current chunk for

2331

* @rq: the request to finish the current chunk for

2329

* @error: %0 for success, < %0 for error

2332

* @error: %0 for success, < %0 for error

2330

*

2333

*

2331

* Description:

2334

* Description:

2332

* Complete the current consecutively mapped chunk from @rq.

2335

* Complete the current consecutively mapped chunk from @rq.

2333

*

2336

*

2334

* Return:

2337

* Return:

2335

* %false - we are done with this request

2338

* %false - we are done with this request

2336

* %true - still buffers pending for this request

2339

* %true - still buffers pending for this request

2337

*/

2340

*/

2338

bool blk_end_request_cur(struct request *rq, int error)

2341

bool blk_end_request_cur(struct request *rq, int error)

2339

{

2342

{

2340

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2343

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2341

}

2344

}

2342

EXPORT_SYMBOL(blk_end_request_cur);

2345

EXPORT_SYMBOL(blk_end_request_cur);

2343

2346

2344

/**

2347

/**

2345

* blk_end_request_err - Finish a request till the next failure boundary.

2348

* blk_end_request_err - Finish a request till the next failure boundary.

2346

* @rq: the request to finish till the next failure boundary for

2349

* @rq: the request to finish till the next failure boundary for

2347

* @error: must be negative errno

2350

* @error: must be negative errno

2348

*

2351

*

2349

* Description:

2352

* Description:

2350

* Complete @rq till the next failure boundary.

2353

* Complete @rq till the next failure boundary.

2351

*

2354

*

2352

* Return:

2355

* Return:

2353

* %false - we are done with this request

2356

* %false - we are done with this request

2354

* %true - still buffers pending for this request

2357

* %true - still buffers pending for this request

2355

*/

2358

*/

2356

bool blk_end_request_err(struct request *rq, int error)

2359

bool blk_end_request_err(struct request *rq, int error)

2357

{

2360

{

2358

WARN_ON(error >= 0);

2361

WARN_ON(error >= 0);

2359

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2362

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2360

}

2363

}

2361

EXPORT_SYMBOL_GPL(blk_end_request_err);

2364

EXPORT_SYMBOL_GPL(blk_end_request_err);

2362

2365

2363

/**

2366

/**

2364

* __blk_end_request - Helper function for drivers to complete the request.

2367

* __blk_end_request - Helper function for drivers to complete the request.

2365

* @rq: the request being processed

2368

* @rq: the request being processed

2366

* @error: %0 for success, < %0 for error

2369

* @error: %0 for success, < %0 for error

2367

* @nr_bytes: number of bytes to complete

2370

* @nr_bytes: number of bytes to complete

2368

*

2371

*

2369

* Description:

2372

* Description:

2370

* Must be called with queue lock held unlike blk_end_request().

2373

* Must be called with queue lock held unlike blk_end_request().

2371

*

2374

*

2372

* Return:

2375

* Return:

2373

* %false - we are done with this request

2376

* %false - we are done with this request

2374

* %true - still buffers pending for this request

2377

* %true - still buffers pending for this request

2375

**/

2378

**/

2376

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2379

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2377

{

2380

{

2378

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2381

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2379

}

2382

}

2380

EXPORT_SYMBOL(__blk_end_request);

2383

EXPORT_SYMBOL(__blk_end_request);

2381

2384

2382

/**

2385

/**

2383

* __blk_end_request_all - Helper function for drives to finish the request.

2386

* __blk_end_request_all - Helper function for drives to finish the request.

2384

* @rq: the request to finish

2387

* @rq: the request to finish

2385

* @error: %0 for success, < %0 for error

2388

* @error: %0 for success, < %0 for error

2386

*

2389

*

2387

* Description:

2390

* Description:

2388

* Completely finish @rq. Must be called with queue lock held.

2391

* Completely finish @rq. Must be called with queue lock held.

2389

*/

2392

*/

2390

void __blk_end_request_all(struct request *rq, int error)

2393

void __blk_end_request_all(struct request *rq, int error)

2391

{

2394

{

2392

bool pending;

2395

bool pending;

2393

unsigned int bidi_bytes = 0;

2396

unsigned int bidi_bytes = 0;

2394

2397

2395

if (unlikely(blk_bidi_rq(rq)))

2398

if (unlikely(blk_bidi_rq(rq)))

2396

bidi_bytes = blk_rq_bytes(rq->next_rq);

2399

bidi_bytes = blk_rq_bytes(rq->next_rq);

2397

2400

2398

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2401

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2399

BUG_ON(pending);

2402

BUG_ON(pending);

2400

}

2403

}

2401

EXPORT_SYMBOL(__blk_end_request_all);

2404

EXPORT_SYMBOL(__blk_end_request_all);

2402

2405

2403

/**

2406

/**

2404

* __blk_end_request_cur - Helper function to finish the current request chunk.

2407

* __blk_end_request_cur - Helper function to finish the current request chunk.

2405

* @rq: the request to finish the current chunk for

2408

* @rq: the request to finish the current chunk for

2406

* @error: %0 for success, < %0 for error

2409

* @error: %0 for success, < %0 for error

2407

*

2410

*

2408

* Description:

2411

* Description:

2409

* Complete the current consecutively mapped chunk from @rq. Must

2412

* Complete the current consecutively mapped chunk from @rq. Must

2410

* be called with queue lock held.

2413

* be called with queue lock held.

2411

*

2414

*

2412

* Return:

2415

* Return:

2413

* %false - we are done with this request

2416

* %false - we are done with this request

2414

* %true - still buffers pending for this request

2417

* %true - still buffers pending for this request

2415

*/

2418

*/

2416

bool __blk_end_request_cur(struct request *rq, int error)

2419

bool __blk_end_request_cur(struct request *rq, int error)

2417

{

2420

{

2418

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2421

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2419

}

2422

}

2420

EXPORT_SYMBOL(__blk_end_request_cur);

2423

EXPORT_SYMBOL(__blk_end_request_cur);

2421

2424

2422

/**

2425

/**

2423

* __blk_end_request_err - Finish a request till the next failure boundary.

2426

* __blk_end_request_err - Finish a request till the next failure boundary.

2424

* @rq: the request to finish till the next failure boundary for

2427

* @rq: the request to finish till the next failure boundary for

2425

* @error: must be negative errno

2428

* @error: must be negative errno

2426

*

2429

*

2427

* Description:

2430

* Description:

2428

* Complete @rq till the next failure boundary. Must be called

2431

* Complete @rq till the next failure boundary. Must be called

2429

* with queue lock held.

2432

* with queue lock held.

2430

*

2433

*

2431

* Return:

2434

* Return:

2432

* %false - we are done with this request

2435

* %false - we are done with this request

2433

* %true - still buffers pending for this request

2436

* %true - still buffers pending for this request

2434

*/

2437

*/

2435

bool __blk_end_request_err(struct request *rq, int error)

2438

bool __blk_end_request_err(struct request *rq, int error)

2436

{

2439

{

2437

WARN_ON(error >= 0);

2440

WARN_ON(error >= 0);

2438

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2441

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2439

}

2442

}

2440

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2443

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2441

2444

2442

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2445

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2443

struct bio *bio)

2446

struct bio *bio)

2444

{

2447

{

2445

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2448

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2446

rq->cmd_flags |= bio->bi_rw & REQ_WRITE;

2449

rq->cmd_flags |= bio->bi_rw & REQ_WRITE;

2447

2450

2448

if (bio_has_data(bio)) {

2451

if (bio_has_data(bio)) {

2449

rq->nr_phys_segments = bio_phys_segments(q, bio);

2452

rq->nr_phys_segments = bio_phys_segments(q, bio);

2450

rq->buffer = bio_data(bio);

2453

rq->buffer = bio_data(bio);

2451

}

2454

}

2452

rq->__data_len = bio->bi_size;

2455

rq->__data_len = bio->bi_size;

2453

rq->bio = rq->biotail = bio;

2456

rq->bio = rq->biotail = bio;

2454

2457

2455

if (bio->bi_bdev)

2458

if (bio->bi_bdev)

2456

rq->rq_disk = bio->bi_bdev->bd_disk;

2459

rq->rq_disk = bio->bi_bdev->bd_disk;

2457

}

2460

}

2458

2461

2459

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2462

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2460

/**

2463

/**

2461

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2464

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2462

* @rq: the request to be flushed

2465

* @rq: the request to be flushed

2463

*

2466

*

2464

* Description:

2467

* Description:

2465

* Flush all pages in @rq.

2468

* Flush all pages in @rq.

2466

*/

2469

*/

2467

void rq_flush_dcache_pages(struct request *rq)

2470

void rq_flush_dcache_pages(struct request *rq)

2468

{

2471

{

2469

struct req_iterator iter;

2472

struct req_iterator iter;

2470

struct bio_vec *bvec;

2473

struct bio_vec *bvec;

2471

2474

2472

rq_for_each_segment(bvec, rq, iter)

2475

rq_for_each_segment(bvec, rq, iter)

2473

flush_dcache_page(bvec->bv_page);

2476

flush_dcache_page(bvec->bv_page);

2474

}

2477

}

2475

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2478

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2476

#endif

2479

#endif

2477

2480

2478

/**

2481

/**

2479

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2482

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2480

* @q : the queue of the device being checked

2483

* @q : the queue of the device being checked

2481

*

2484

*

2482

* Description:

2485

* Description:

2483

* Check if underlying low-level drivers of a device are busy.

2486

* Check if underlying low-level drivers of a device are busy.

2484

* If the drivers want to export their busy state, they must set own

2487

* If the drivers want to export their busy state, they must set own

2485

* exporting function using blk_queue_lld_busy() first.

2488

* exporting function using blk_queue_lld_busy() first.

2486

*

2489

*

2487

* Basically, this function is used only by request stacking drivers

2490

* Basically, this function is used only by request stacking drivers

2488

* to stop dispatching requests to underlying devices when underlying

2491

* to stop dispatching requests to underlying devices when underlying

2489

* devices are busy. This behavior helps more I/O merging on the queue

2492

* devices are busy. This behavior helps more I/O merging on the queue

2490

* of the request stacking driver and prevents I/O throughput regression

2493

* of the request stacking driver and prevents I/O throughput regression

2491

* on burst I/O load.

2494

* on burst I/O load.

2492

*

2495

*

2493

* Return:

2496

* Return:

2494

* 0 - Not busy (The request stacking driver should dispatch request)

2497

* 0 - Not busy (The request stacking driver should dispatch request)

2495

* 1 - Busy (The request stacking driver should stop dispatching request)

2498

* 1 - Busy (The request stacking driver should stop dispatching request)

2496

*/

2499

*/

2497

int blk_lld_busy(struct request_queue *q)

2500

int blk_lld_busy(struct request_queue *q)

2498

{

2501

{

2499

if (q->lld_busy_fn)

2502

if (q->lld_busy_fn)

2500

return q->lld_busy_fn(q);

2503

return q->lld_busy_fn(q);

2501

2504

2502

return 0;

2505

return 0;

2503

}

2506

}

2504

EXPORT_SYMBOL_GPL(blk_lld_busy);

2507

EXPORT_SYMBOL_GPL(blk_lld_busy);

2505

2508

2506

/**

2509

/**

2507

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2510

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2508

* @rq: the clone request to be cleaned up

2511

* @rq: the clone request to be cleaned up

2509

*

2512

*

2510

* Description:

2513

* Description:

2511

* Free all bios in @rq for a cloned request.

2514

* Free all bios in @rq for a cloned request.

2512

*/

2515

*/

2513

void blk_rq_unprep_clone(struct request *rq)

2516

void blk_rq_unprep_clone(struct request *rq)

2514

{

2517

{

2515

struct bio *bio;

2518

struct bio *bio;

2516

2519

2517

while ((bio = rq->bio) != NULL) {

2520

while ((bio = rq->bio) != NULL) {

2518

rq->bio = bio->bi_next;

2521

rq->bio = bio->bi_next;

2519

2522

2520

bio_put(bio);

2523

bio_put(bio);

2521

}

2524

}

2522

}

2525

}

2523

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2526

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2524

2527

2525

/*

2528

/*

2526

* Copy attributes of the original request to the clone request.

2529

* Copy attributes of the original request to the clone request.

2527

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2530

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2528

*/

2531

*/

2529

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2532

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2530

{

2533

{

2531

dst->cpu = src->cpu;

2534

dst->cpu = src->cpu;

2532

dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;

2535

dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;

2533

dst->cmd_type = src->cmd_type;

2536

dst->cmd_type = src->cmd_type;

2534

dst->__sector = blk_rq_pos(src);

2537

dst->__sector = blk_rq_pos(src);

2535

dst->__data_len = blk_rq_bytes(src);

2538

dst->__data_len = blk_rq_bytes(src);

2536

dst->nr_phys_segments = src->nr_phys_segments;

2539

dst->nr_phys_segments = src->nr_phys_segments;

2537

dst->ioprio = src->ioprio;

2540

dst->ioprio = src->ioprio;

2538

dst->extra_len = src->extra_len;

2541

dst->extra_len = src->extra_len;

2539

}

2542

}

2540

2543

2541

/**

2544

/**

2542

* blk_rq_prep_clone - Helper function to setup clone request

2545

* blk_rq_prep_clone - Helper function to setup clone request

2543

* @rq: the request to be setup

2546

* @rq: the request to be setup

2544

* @rq_src: original request to be cloned

2547

* @rq_src: original request to be cloned

2545

* @bs: bio_set that bios for clone are allocated from

2548

* @bs: bio_set that bios for clone are allocated from

2546

* @gfp_mask: memory allocation mask for bio

2549

* @gfp_mask: memory allocation mask for bio

2547

* @bio_ctr: setup function to be called for each clone bio.

2550

* @bio_ctr: setup function to be called for each clone bio.

2548

* Returns %0 for success, non %0 for failure.

2551

* Returns %0 for success, non %0 for failure.

2549

* @data: private data to be passed to @bio_ctr

2552

* @data: private data to be passed to @bio_ctr

2550

*

2553

*

2551

* Description:

2554

* Description:

2552

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2555

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2553

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2556

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2554

* are not copied, and copying such parts is the caller's responsibility.

2557

* are not copied, and copying such parts is the caller's responsibility.

2555

* Also, pages which the original bios are pointing to are not copied

2558

* Also, pages which the original bios are pointing to are not copied

2556

* and the cloned bios just point same pages.

2559

* and the cloned bios just point same pages.

2557

* So cloned bios must be completed before original bios, which means

2560

* So cloned bios must be completed before original bios, which means

2558

* the caller must complete @rq before @rq_src.

2561

* the caller must complete @rq before @rq_src.

2559

*/

2562

*/

2560

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2563

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2561

struct bio_set *bs, gfp_t gfp_mask,

2564

struct bio_set *bs, gfp_t gfp_mask,

2562

int (*bio_ctr)(struct bio *, struct bio *, void *),

2565

int (*bio_ctr)(struct bio *, struct bio *, void *),

2563

void *data)

2566

void *data)

2564

{

2567

{

2565

struct bio *bio, *bio_src;

2568

struct bio *bio, *bio_src;

2566

2569

2567

if (!bs)

2570

if (!bs)

2568

bs = fs_bio_set;

2571

bs = fs_bio_set;

2569

2572

2570

blk_rq_init(NULL, rq);

2573

blk_rq_init(NULL, rq);

2571

2574

2572

__rq_for_each_bio(bio_src, rq_src) {

2575

__rq_for_each_bio(bio_src, rq_src) {

2573

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2576

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2574

if (!bio)

2577

if (!bio)

2575

goto free_and_out;

2578

goto free_and_out;

2576

2579

2577

__bio_clone(bio, bio_src);

2580

__bio_clone(bio, bio_src);

2578

2581

2579

if (bio_integrity(bio_src) &&

2582

if (bio_integrity(bio_src) &&

2580

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2583

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2581

goto free_and_out;

2584

goto free_and_out;

2582

2585

2583

if (bio_ctr && bio_ctr(bio, bio_src, data))

2586

if (bio_ctr && bio_ctr(bio, bio_src, data))

2584

goto free_and_out;

2587

goto free_and_out;

2585

2588

2586

if (rq->bio) {

2589

if (rq->bio) {

2587

rq->biotail->bi_next = bio;

2590

rq->biotail->bi_next = bio;

2588

rq->biotail = bio;

2591

rq->biotail = bio;

2589

} else

2592

} else

2590

rq->bio = rq->biotail = bio;

2593

rq->bio = rq->biotail = bio;

2591

}

2594

}

2592

2595

2593

__blk_rq_prep_clone(rq, rq_src);

2596

__blk_rq_prep_clone(rq, rq_src);

2594

2597

2595

return 0;

2598

return 0;

2596

2599

2597

free_and_out:

2600

free_and_out:

2598

if (bio)

2601

if (bio)

2599

bio_free(bio, bs);

2602

bio_free(bio, bs);

2600

blk_rq_unprep_clone(rq);

2603

blk_rq_unprep_clone(rq);

2601

2604

2602

return -ENOMEM;

2605

return -ENOMEM;

2603

}

2606

}

2604

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2607

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2605

2608

2606

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2609

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2607

{

2610

{

2608

return queue_work(kblockd_workqueue, work);

2611

return queue_work(kblockd_workqueue, work);

2609

}

2612

}

2610

EXPORT_SYMBOL(kblockd_schedule_work);

2613

EXPORT_SYMBOL(kblockd_schedule_work);

2611

2614

2612

int kblockd_schedule_delayed_work(struct request_queue *q,

2615

int kblockd_schedule_delayed_work(struct request_queue *q,

2613

struct delayed_work *dwork, unsigned long delay)

2616

struct delayed_work *dwork, unsigned long delay)

2614

{

2617

{

2615

return queue_delayed_work(kblockd_workqueue, dwork, delay);

2618

return queue_delayed_work(kblockd_workqueue, dwork, delay);

2616

}

2619

}

2617

EXPORT_SYMBOL(kblockd_schedule_delayed_work);

2620

EXPORT_SYMBOL(kblockd_schedule_delayed_work);

2618

2621

2619

#define PLUG_MAGIC 0x91827364

2622

#define PLUG_MAGIC 0x91827364

2620

2623

2621

void blk_start_plug(struct blk_plug *plug)

2624

void blk_start_plug(struct blk_plug *plug)

2622

{

2625

{

2623

struct task_struct *tsk = current;

2626

struct task_struct *tsk = current;

2624

2627

2625

plug->magic = PLUG_MAGIC;

2628

plug->magic = PLUG_MAGIC;

2626

INIT_LIST_HEAD(&plug->list);

2629

INIT_LIST_HEAD(&plug->list);

2627

INIT_LIST_HEAD(&plug->cb_list);

2630

INIT_LIST_HEAD(&plug->cb_list);

2628

plug->should_sort = 0;

2631

plug->should_sort = 0;

2632

plug->count = 0;

2629

2633

2630

/*

2634

/*

2631

* If this is a nested plug, don't actually assign it. It will be

2635

* If this is a nested plug, don't actually assign it. It will be

2632

* flushed on its own.

2636

* flushed on its own.

2633

*/

2637

*/

2634

if (!tsk->plug) {

2638

if (!tsk->plug) {

2635

/*

2639

/*

2636

* Store ordering should not be needed here, since a potential

2640

* Store ordering should not be needed here, since a potential

2637

* preempt will imply a full memory barrier

2641

* preempt will imply a full memory barrier

2638

*/

2642

*/

2639

tsk->plug = plug;

2643

tsk->plug = plug;

2640

}

2644

}

2641

}

2645

}

2642

EXPORT_SYMBOL(blk_start_plug);

2646

EXPORT_SYMBOL(blk_start_plug);

2643

2647

2644

static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

2648

static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

2645

{

2649

{

2646

struct request *rqa = container_of(a, struct request, queuelist);

2650

struct request *rqa = container_of(a, struct request, queuelist);

2647

struct request *rqb = container_of(b, struct request, queuelist);

2651

struct request *rqb = container_of(b, struct request, queuelist);

2648

2652

2649

return !(rqa->q <= rqb->q);

2653

return !(rqa->q <= rqb->q);

2650

}

2654

}

2651

2655

2652

/*

2656

/*

2653

* If 'from_schedule' is true, then postpone the dispatch of requests

2657

* If 'from_schedule' is true, then postpone the dispatch of requests

2654

* until a safe kblockd context. We due this to avoid accidental big

2658

* until a safe kblockd context. We due this to avoid accidental big

2655

* additional stack usage in driver dispatch, in places where the originally

2659

* additional stack usage in driver dispatch, in places where the originally

2656

* plugger did not intend it.

2660

* plugger did not intend it.

2657

*/

2661

*/

2658

static void queue_unplugged(struct request_queue *q, unsigned int depth,

2662

static void queue_unplugged(struct request_queue *q, unsigned int depth,

2659

bool from_schedule)

2663

bool from_schedule)

2660

__releases(q->queue_lock)

2664

__releases(q->queue_lock)

2661

{

2665

{

2662

trace_block_unplug(q, depth, !from_schedule);

2666

trace_block_unplug(q, depth, !from_schedule);

2663

2667

2664

/*

2668

/*

2665

* If we are punting this to kblockd, then we can safely drop

2669

* If we are punting this to kblockd, then we can safely drop

2666

* the queue_lock before waking kblockd (which needs to take

2670

* the queue_lock before waking kblockd (which needs to take

2667

* this lock).

2671

* this lock).

2668

*/

2672

*/

2669

if (from_schedule) {

2673

if (from_schedule) {

2670

spin_unlock(q->queue_lock);

2674

spin_unlock(q->queue_lock);

2671

blk_run_queue_async(q);

2675

blk_run_queue_async(q);

2672

} else {

2676

} else {

2673

__blk_run_queue(q);

2677

__blk_run_queue(q);

2674

spin_unlock(q->queue_lock);

2678

spin_unlock(q->queue_lock);

2675

}

2679

}

2676

2680

2677

}

2681

}

2678

2682

2679

static void flush_plug_callbacks(struct blk_plug *plug)

2683

static void flush_plug_callbacks(struct blk_plug *plug)

2680

{

2684

{

2681

LIST_HEAD(callbacks);

2685

LIST_HEAD(callbacks);

2682

2686

2683

if (list_empty(&plug->cb_list))

2687

if (list_empty(&plug->cb_list))

2684

return;

2688

return;

2685

2689

2686

list_splice_init(&plug->cb_list, &callbacks);

2690

list_splice_init(&plug->cb_list, &callbacks);

2687

2691

2688

while (!list_empty(&callbacks)) {

2692

while (!list_empty(&callbacks)) {

2689

struct blk_plug_cb *cb = list_first_entry(&callbacks,

2693

struct blk_plug_cb *cb = list_first_entry(&callbacks,

2690

struct blk_plug_cb,

2694

struct blk_plug_cb,

2691

list);

2695

list);

2692

list_del(&cb->list);

2696

list_del(&cb->list);

2693

cb->callback(cb);

2697

cb->callback(cb);

2694

}

2698

}

2695

}

2699

}

2696

2700

2697

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)

2701

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)

2698

{

2702

{

2699

struct request_queue *q;

2703

struct request_queue *q;

2700

unsigned long flags;

2704

unsigned long flags;

2701

struct request *rq;

2705

struct request *rq;

2702

LIST_HEAD(list);

2706

LIST_HEAD(list);

2703

unsigned int depth;

2707

unsigned int depth;

2704

2708

2705

BUG_ON(plug->magic != PLUG_MAGIC);

2709

BUG_ON(plug->magic != PLUG_MAGIC);

2706

2710

2707

flush_plug_callbacks(plug);

2711

flush_plug_callbacks(plug);

2708

if (list_empty(&plug->list))

2712

if (list_empty(&plug->list))

2709

return;

2713

return;

2710

2714

2711

list_splice_init(&plug->list, &list);

2715

list_splice_init(&plug->list, &list);

2716

plug->count = 0;

2712

2717

2713

if (plug->should_sort) {

2718

if (plug->should_sort) {

2714

list_sort(NULL, &list, plug_rq_cmp);

2719

list_sort(NULL, &list, plug_rq_cmp);

2715

plug->should_sort = 0;

2720

plug->should_sort = 0;

2716

}

2721

}

2717

2722

2718

q = NULL;

2723

q = NULL;

2719

depth = 0;

2724

depth = 0;

2720

2725

2721

/*

2726

/*

2722

* Save and disable interrupts here, to avoid doing it for every

2727

* Save and disable interrupts here, to avoid doing it for every

2723

* queue lock we have to take.

2728

* queue lock we have to take.

2724

*/

2729

*/

2725

local_irq_save(flags);

2730

local_irq_save(flags);

2726

while (!list_empty(&list)) {

2731

while (!list_empty(&list)) {

2727

rq = list_entry_rq(list.next);

2732

rq = list_entry_rq(list.next);

2728

list_del_init(&rq->queuelist);

2733

list_del_init(&rq->queuelist);

2729

BUG_ON(!rq->q);

2734

BUG_ON(!rq->q);

2730

if (rq->q != q) {

2735

if (rq->q != q) {

2731

/*

2736

/*

2732

* This drops the queue lock

2737

* This drops the queue lock

2733

*/

2738

*/

2734

if (q)

2739

if (q)

2735

queue_unplugged(q, depth, from_schedule);

2740

queue_unplugged(q, depth, from_schedule);

2736

q = rq->q;

2741

q = rq->q;

2737

depth = 0;

2742

depth = 0;

2738

spin_lock(q->queue_lock);

2743

spin_lock(q->queue_lock);

2739

}

2744

}

2740

/*

2745

/*

2741

* rq is already accounted, so use raw insert

2746

* rq is already accounted, so use raw insert

2742

*/

2747

*/

2743

if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))

2748

if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))

2744

__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);

2749

__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);

2745

else

2750

else

2746

__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

2751

__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

2747

2752

2748

depth++;

2753

depth++;

2749

}

2754

}

2750

2755

2751

/*

2756

/*

2752

* This drops the queue lock

2757

* This drops the queue lock

2753

*/

2758

*/

2754

if (q)

2759

if (q)

2755

queue_unplugged(q, depth, from_schedule);

2760

queue_unplugged(q, depth, from_schedule);

2756

2761

2757

local_irq_restore(flags);

2762

local_irq_restore(flags);

2758

}

2763

}

2759

2764

2760

void blk_finish_plug(struct blk_plug *plug)

2765

void blk_finish_plug(struct blk_plug *plug)

2761

{

2766

{

2762

blk_flush_plug_list(plug, false);

2767

blk_flush_plug_list(plug, false);

2763

2768

2764

if (plug == current->plug)

2769

if (plug == current->plug)

2765

current->plug = NULL;

2770

current->plug = NULL;

2766

}

2771

}

2767

EXPORT_SYMBOL(blk_finish_plug);

2772

EXPORT_SYMBOL(blk_finish_plug);

2768

2773

2769

int __init blk_dev_init(void)

2774

int __init blk_dev_init(void)

2770

{

2775

{

2771

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2776

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2772

sizeof(((struct request *)0)->cmd_flags));

2777

sizeof(((struct request *)0)->cmd_flags));

2773

2778

2774

/* used for unplugging and affects IO latency/throughput - HIGHPRI */

2779

/* used for unplugging and affects IO latency/throughput - HIGHPRI */

2775

kblockd_workqueue = alloc_workqueue("kblockd",

2780

kblockd_workqueue = alloc_workqueue("kblockd",

2776

WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);

2781

WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);

2777

if (!kblockd_workqueue)

2782

if (!kblockd_workqueue)

2778

panic("Failed to create kblockd\n");

2783

panic("Failed to create kblockd\n");

2779

2784

2780

request_cachep = kmem_cache_create("blkdev_requests",

2785

request_cachep = kmem_cache_create("blkdev_requests",

2781

sizeof(struct request), 0, SLAB_PANIC, NULL);

2786

sizeof(struct request), 0, SLAB_PANIC, NULL);

2782

2787

2783

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2788

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2784

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2789

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2785

2790

2786

return 0;

2791

return 0;

2787

}

2792

}

2788

2793

GITLAB

Eric Lee / linux-smarc-t335x-v3.2

block: avoid building too big plug list

 /*
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
  *	-  July2000
  * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 /*
  * This handles all read/write requests to block devices
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/fault-inject.h>
 #include <linux/list_sort.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
 #include "blk.h"
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 static int __make_request(struct request_queue *q, struct bio *bio);
 /*
  * For the allocated request tables
  */
 static struct kmem_cache *request_cachep;
 /*
  * For queue allocation
  */
 struct kmem_cache *blk_requestq_cachep;
 /*
  * Controlling structure to kblockd
  */
 static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
 	int cpu;
 	if (!blk_do_io_stat(rq))
 		return;
 	cpu = part_stat_lock();
 	if (!new_io) {
 		part = rq->part;
 		part_stat_inc(cpu, part, merges[rw]);
 	} else {
 		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 		if (!hd_struct_try_get(part)) {
 			/*
 			 * The partition is already being removed,
 			 * the request will be accounted on the disk only
 			 *
 			 * We take a reference on disk->part0 although that
 			 * partition will never be deleted, so we can treat
 			 * it as any other partition.
 			 */
 			part = &rq->rq_disk->part0;
 			hd_struct_get(part);
 		}
 		part_round_stats(cpu, part);
 		part_inc_in_flight(part, rw);
 		rq->part = part;
 	}
 	part_stat_unlock();
 }
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
 	int nr;
 	nr = q->nr_requests - (q->nr_requests / 8) + 1;
 	if (nr > q->nr_requests)
 		nr = q->nr_requests;
 	q->nr_congestion_on = nr;
 	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
 	if (nr < 1)
 		nr = 1;
 	q->nr_congestion_off = nr;
 }
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
  * backing_dev_info
  *
  * Will return NULL if the request queue cannot be located.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
 	struct backing_dev_info *ret = NULL;
 	struct request_queue *q = bdev_get_queue(bdev);
 	if (q)
 		ret = &q->backing_dev_info;
 	return ret;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 void blk_rq_init(struct request_queue *q, struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
 	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
 	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->ref_count = 1;
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
 	rq->part = NULL;
 }
 EXPORT_SYMBOL(blk_rq_init);
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
 {
 	if (error)
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
 	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 		error = -EIO;
 	if (unlikely(nbytes > bio->bi_size)) {
 		printk(KERN_ERR "%s: want %u bytes done, %u left\n",
 		       __func__, nbytes, bio->bi_size);
 		nbytes = bio->bi_size;
 	}
 	if (unlikely(rq->cmd_flags & REQ_QUIET))
 		set_bit(BIO_QUIET, &bio->bi_flags);
 	bio->bi_size -= nbytes;
 	bio->bi_sector += (nbytes >> 9);
 	if (bio_integrity(bio))
 		bio_integrity_advance(bio, nbytes);
 	/* don't actually finish bio if it's part of flush sequence */
 	if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
 		bio_endio(bio, error);
 }
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
 	       (unsigned long long)blk_rq_pos(rq),
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		printk(KERN_INFO "  cdb: ");
 		for (bit = 0; bit < BLK_MAX_CDB; bit++)
 			printk("%02x ", rq->cmd[bit]);
 		printk("\n");
 	}
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 static void blk_delay_work(struct work_struct *work)
 {
 	struct request_queue *q;
 	q = container_of(work, struct request_queue, delay_work.work);
 	spin_lock_irq(q->queue_lock);
 	__blk_run_queue(q);
 	spin_unlock_irq(q->queue_lock);
 }
 /**
  * blk_delay_queue - restart queueing after defined interval
  * @q:		The &struct request_queue in question
  * @msecs:	Delay in msecs
  *
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
  *   restarted around the specified time.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
 	queue_delayed_work(kblockd_workqueue, &q->delay_work,
 				msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
  *   entered. Also see blk_stop_queue(). Queue lock must be held.
  **/
 void blk_start_queue(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 /**
  * blk_stop_queue - stop a queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   The Linux block layer assumes that a block driver will consume all
  *   entries on the request queue when the request_fn strategy is called.
  *   Often this will not happen, because of hardware limitations (queue
  *   depth settings). If a device driver gets a 'queue full' response,
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
  *   blk_start_queue() to restart queue operations. Queue lock must be held.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
 	__cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 /**
  * blk_sync_queue - cancel any pending callbacks on a queue
  * @q: the queue
  *
  * Description:
  *     The block layer may perform asynchronous callback activity
  *     on a queue, such as calling the unplug function after a timeout.
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
  *     that the callbacks might use. The caller must already have made sure
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
  *     This function does not cancel any asynchronous activity arising
  *     out of elevator or throttling code. That would require elevaotor_exit()
  *     and blk_throtl_exit() to be called with queue lock initialized.
  *
  */
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->timeout);
 	cancel_delayed_work_sync(&q->delay_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
  *
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
  *    held and interrupts disabled.
  */
 void __blk_run_queue(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 	q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 /**
  * blk_run_queue_async - run a single device queue in workqueue context
  * @q:	The queue to run
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
  *    of us.
  */
 void blk_run_queue_async(struct request_queue *q)
 {
 	if (likely(!blk_queue_stopped(q))) {
 		__cancel_delayed_work(&q->delay_work);
 		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 	}
 }
 EXPORT_SYMBOL(blk_run_queue_async);
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
  *
  * Description:
  *    Invoke request handling on this queue, if it has pending work to do.
  *    May be used to restart queueing when a request has completed.
  */
 void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
 void blk_put_queue(struct request_queue *q)
 {
 	kobject_put(&q->kobj);
 }
 EXPORT_SYMBOL(blk_put_queue);
 /*
  * Note: If a driver supplied the queue lock, it should not zap that lock
  * unexpectedly as some queue cleanup components like elevator_exit() and
  * blk_throtl_exit() need queue lock.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	/*
 	 * We know we have process context here, so we can be a little
 	 * cautious and ensure that pending block actions on this device
 	 * are done before moving on. Going into this function, we should
 	 * not have processes doing IO to this device.
 	 */
 	blk_sync_queue(q);
 	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
 	if (q->elevator)
 		elevator_exit(q->elevator);
 	blk_throtl_exit(q);
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
 static int blk_init_free_list(struct request_queue *q)
 {
 	struct request_list *rl = &q->rq;
 	if (unlikely(rl->rq_pool))
 		return 0;
 	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
 	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
 	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
 	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
 	if (!rl->rq_pool)
 		return -ENOMEM;
 	return 0;
 }
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
 	int err;
 	q = kmem_cache_alloc_node(blk_requestq_cachep,
 				gfp_mask | __GFP_ZERO, node_id);
 	if (!q)
 		return NULL;
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	if (blk_throtl_init(q)) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
 	INIT_LIST_HEAD(&q->flush_queue[0]);
 	INIT_LIST_HEAD(&q->flush_queue[1]);
 	INIT_LIST_HEAD(&q->flush_data_in_flight);
 	INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
 	kobject_init(&q->kobj, &blk_queue_ktype);
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 	/*
 	 * By default initialize queue_lock to internal lock and driver can
 	 * override it later if need be.
 	 */
 	q->queue_lock = &q->__queue_lock;
 	return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
  * @rfn:  The function to be called to process requests that have been
  *        placed on the queue.
  * @lock: Request queue spin lock
  *
  * Description:
  *    If a block device wishes to use the standard request handling procedures,
  *    which sorts requests and coalesces adjacent requests, then it must
  *    call blk_init_queue().  The function @rfn will be called when there
  *    are requests on the queue that need to be processed.  If the device
  *    supports plugging, then @rfn may not be called immediately when requests
  *    are available on the queue, but may be called at some time later instead.
  *    Plugged queues are generally unplugged when a buffer belonging to one
  *    of the requests on the queue is needed, or due to memory pressure.
  *
  *    @rfn is not required, or even expected, to remove all requests off the
  *    queue, but only as many as it can handle at a time.  If it does leave
  *    requests on the queue, it is responsible for arranging that the requests
  *    get dealt with eventually.
  *
  *    The queue spin lock must be held while manipulating the requests on the
  *    request queue; this lock will be taken also from interrupt context, so irq
  *    disabling is needed for it.
  *
  *    Function returns a pointer to the initialized request queue, or %NULL if
  *    it didn't succeed.
  *
  * Note:
  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
 	return blk_init_queue_node(rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_queue);
 struct request_queue *
 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *uninit_q, *q;
 	uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
 	if (!uninit_q)
 		return NULL;
 	q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
 	if (!q)
 		blk_cleanup_queue(uninit_q);
 	return q;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 			 spinlock_t *lock)
 {
 	return blk_init_allocated_queue_node(q, rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
 struct request_queue *
 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 			      spinlock_t *lock, int node_id)
 {
 	if (!q)
 		return NULL;
 	q->node = node_id;
 	if (blk_init_free_list(q))
 		return NULL;
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unprep_rq_fn		= NULL;
 	q->queue_flags		= QUEUE_FLAG_DEFAULT;
 	/* Override internal queue lock with supplied lock pointer */
 	if (lock)
 		q->queue_lock		= lock;
 	/*
 	 * This also sets hw/phys segments, boundary and size
 	 */
 	blk_queue_make_request(q, __make_request);
 	q->sg_reserved_size = INT_MAX;
 	/*
 	 * all done
 	 */
 	if (!elevator_init(q, NULL)) {
 		blk_queue_congestion_threshold(q);
 		return q;
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue_node);
 int blk_get_queue(struct request_queue *q)
 {
 	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
 		kobject_get(&q->kobj);
 		return 0;
 	}
 	return 1;
 }
 EXPORT_SYMBOL(blk_get_queue);
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 static struct request *
 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	if (!rq)
 		return NULL;
 	blk_rq_init(q, rq);
 	rq->cmd_flags = flags | REQ_ALLOCED;
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
 		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 	return rq;
 }
 /*
  * ioc_batching returns true if the ioc is a valid batching request and
  * should be given priority access to a request.
  */
 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc)
 		return 0;
 	/*
 	 * Make sure the process is able to allocate at least 1 request
 	 * even if the batch times out, otherwise we could theoretically
 	 * lose wakeups.
 	 */
 	return ioc->nr_batch_requests == q->nr_batching ||
 		(ioc->nr_batch_requests > 0
 		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
 }
 /*
  * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
  * will cause the process to be a "batcher" on all queues in the system. This
  * is the behaviour we want though - once it gets a wakeup it should be given
  * a nice run.
  */
 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc || ioc_batching(q, ioc))
 		return;
 	ioc->nr_batch_requests = q->nr_batching;
 	ioc->last_waited = jiffies;
 }
 static void __freed_request(struct request_queue *q, int sync)
 {
 	struct request_list *rl = &q->rq;
 	if (rl->count[sync] < queue_congestion_off_threshold(q))
 		blk_clear_queue_congested(q, sync);
 	if (rl->count[sync] + 1 <= q->nr_requests) {
 		if (waitqueue_active(&rl->wait[sync]))
 			wake_up(&rl->wait[sync]);
 		blk_clear_queue_full(q, sync);
 	}
 }
 /*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
 static void freed_request(struct request_queue *q, int sync, int priv)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[sync]--;
 	if (priv)
 		rl->elvpriv--;
 	__freed_request(q, sync);
 	if (unlikely(rl->starved[sync ^ 1]))
 		__freed_request(q, sync ^ 1);
 }
 /*
  * Determine if elevator data should be initialized when allocating the
  * request associated with @bio.
  */
 static bool blk_rq_should_init_elevator(struct bio *bio)
 {
 	if (!bio)
 		return true;
 	/*
 	 * Flush requests do not use the elevator so skip initialization.
 	 * This allows a request to share the flush and elevator data.
 	 */
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
 		return false;
 	return true;
 }
 /*
  * Get a free request, queue_lock must be held.
  * Returns NULL on failure, with queue_lock held.
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, int rw_flags,
 				   struct bio *bio, gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue, priv = 0;
 	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[is_sync]+1 >= q->nr_requests) {
 			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
 			 * This process will be allowed to complete a batch of
 			 * requests, others will be blocked.
 			 */
 			if (!blk_queue_full(q, is_sync)) {
 				ioc_set_batching(q, ioc);
 				blk_set_queue_full(q, is_sync);
 			} else {
 				if (may_queue != ELV_MQUEUE_MUST
 						&& !ioc_batching(q, ioc)) {
 					/*
 					 * The queue is full and the allocating
 					 * process is not a "batcher", and not
 					 * exempted by the IO scheduler
 					 */
 					goto out;
 				}
 			}
 		}
 		blk_set_queue_congested(q, is_sync);
 	}
 	/*
 	 * Only allow batching queuers to allocate up to 50% over the defined
 	 * limit of requests, otherwise we could have thousands of requests
 	 * allocated with any setting of ->nr_requests
 	 */
 	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
 		goto out;
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 	if (blk_rq_should_init_elevator(bio)) {
 		priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 		if (priv)
 			rl->elvpriv++;
 	}
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * we might have messed up.
 		 *
 		 * Allocating task should really be put onto the front of the
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, is_sync, priv);
 		/*
 		 * in the very unlikely event that allocation failed and no
 		 * requests for this direction was pending, mark us starved
 		 * so that freeing of a request in the other direction will
 		 * notice us. another possible fix would be to split the
 		 * rq mempool into READ and WRITE
 		 */
 rq_starved:
 		if (unlikely(rl->count[is_sync] == 0))
 			rl->starved[is_sync] = 1;
 		goto out;
 	}
 	/*
 	 * ioc may be NULL here, and ioc_batching will be false. That's
 	 * OK, if the queue is under the request limit then requests need
 	 * not count toward the nr_batch_requests limit. There will always
 	 * be some limit enforced by BLK_BATCH_TIME.
 	 */
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 	trace_block_getrq(q, bio, rw_flags & 1);
 out:
 	return rq;
 }
 /*
  * No available requests for this queue, wait for some requests to become
  * available.
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 					struct bio *bio)
 {
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	struct request *rq;
 	rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	while (!rq) {
 		DEFINE_WAIT(wait);
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
 				TASK_UNINTERRUPTIBLE);
 		trace_block_sleeprq(q, bio, rw_flags & 1);
 		spin_unlock_irq(q->queue_lock);
 		io_schedule();
 		/*
 		 * After sleeping, we become a "batching" process and
 		 * will be able to allocate at least one request, and
 		 * up to a big batch of them for a small period time.
 		 * See ioc_batching, ioc_set_batching
 		 */
 		ioc = current_io_context(GFP_NOIO, q->node);
 		ioc_set_batching(q, ioc);
 		spin_lock_irq(q->queue_lock);
 		finish_wait(&rl->wait[is_sync], &wait);
 		rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	};
 	return rq;
 }
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 	BUG_ON(rw != READ && rw != WRITE);
 	spin_lock_irq(q->queue_lock);
 	if (gfp_mask & __GFP_WAIT) {
 		rq = get_request_wait(q, rw, NULL);
 	} else {
 		rq = get_request(q, rw, NULL, gfp_mask);
 		if (!rq)
 			spin_unlock_irq(q->queue_lock);
 	}
 	/* q->queue_lock is unlocked at this point */
 	return rq;
 }
 EXPORT_SYMBOL(blk_get_request);
 /**
  * blk_make_request - given a bio, allocate a corresponding struct request.
  * @q: target request queue
  * @bio:  The bio describing the memory mappings that will be submitted for IO.
  *        It may be a chained-bio properly constructed by block/bio layer.
  * @gfp_mask: gfp flags to be used for memory allocation
  *
  * blk_make_request is the parallel of generic_make_request for BLOCK_PC
  * type commands. Where the struct request needs to be farther initialized by
  * the caller. It is passed a &struct bio, which describes the memory info of
  * the I/O transfer.
  *
  * The caller of blk_make_request must make sure that bi_io_vec
  * are set to describe the memory buffers. That bio_data_dir() will return
  * the needed direction of the request. (And all bio's in the passed bio-chain
  * are properly set accordingly)
  *
  * If called under none-sleepable conditions, mapped bio buffers must not
  * need bouncing, by calling the appropriate masked or flagged allocator,
  * suitable for the target device. Otherwise the call to blk_queue_bounce will
  * BUG.
  *
  * WARNING: When allocating/cloning a bio-chain, careful consideration should be
  * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
  * anything but the first bio in the chain. Otherwise you risk waiting for IO
  * completion of a bio that hasn't been submitted yet, thus resulting in a
  * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
  * of bio_alloc(), as that avoids the mempool deadlock.
  * If possible a big IO should be split into smaller parts when allocation
  * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 				 gfp_t gfp_mask)
 {
 	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
 		blk_queue_bounce(q, &bounce_bio);
 		ret = blk_rq_append_bio(q, rq, bounce_bio);
 		if (unlikely(ret)) {
 			blk_put_request(rq);
 			return ERR_PTR(ret);
 		}
 	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_make_request);
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  *
  * Description:
  *    Drivers often keep queueing requests until the hardware cannot accept
  *    more, when that condition happens we need to put the request back
  *    on the queue. Must be called with queue lock held.
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	BUG_ON(blk_queued_rq(rq));
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
 static void add_acct_request(struct request_queue *q, struct request *rq,
 			     int where)
 {
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where);
 }
 /**
  * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
  * @data:	private data
  *
  * Description:
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
  *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
  *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
  *    of the queue for things like a QUEUE_FULL message from a device, or a
  *    host that is unable to accept a particular command.
  */
 void blk_insert_request(struct request_queue *q, struct request *rq,
 			int at_head, void *data)
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	unsigned long flags;
 	/*
 	 * tell I/O scheduler that this isn't a regular read/write (ie it
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->special = data;
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * If command is tagged, release the tag
 	 */
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	add_acct_request(q, rq, where);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
 	if (now == part->stamp)
 		return;
 	if (part_in_flight(part)) {
 		__part_stat_add(cpu, part, time_in_queue,
 				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
 }
 /**
  * part_round_stats() - Round off the performance stats on a struct disk_stats.
  * @cpu: cpu number for stats access
  * @part: target partition
  *
  * The average IO queue length and utilisation statistics are maintained
  * by observing the current state of the queue length and the amount of
  * time it has been in this state for.
  *
  * Normally, that accounting is done on IO completion, but that can result
  * in more than a second's worth of IO being accounted for within any one
  * second, leading to >100% utilisation.  To deal with that, we call this
  * function to do a round-off before returning the results when reading
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
 void part_round_stats(int cpu, struct hd_struct *part)
 {
 	unsigned long now = jiffies;
 	if (part->partno)
 		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
 	part_round_stats_single(cpu, part, now);
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 /*
  * queue lock must be held
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
 		return;
 	elv_completed_request(q, req);
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (req->cmd_flags & REQ_ALLOCED) {
 		int is_sync = rq_is_sync(req) != 0;
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(!hlist_unhashed(&req->hash));
 		blk_free_request(q, req);
 		freed_request(q, is_sync, priv);
 	}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
 void blk_put_request(struct request *req)
 {
 	unsigned long flags;
 	struct request_queue *q = req->q;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_put_request(q, req);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_put_request);
 /**
  * blk_add_request_payload - add a payload to a request
  * @rq: request to update
  * @page: page backing the payload
  * @len: length of the payload.
  *
  * This allows to later add a payload to an already submitted request by
  * a block driver.  The driver needs to take care of freeing the payload
  * itself.
  *
  * Note that this is a quite horrible hack and nothing but handling of
  * discard requests should ever use it.
  */
 void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len)
 {
 	struct bio *bio = rq->bio;
 	bio->bi_io_vec->bv_page = page;
 	bio->bi_io_vec->bv_offset = 0;
 	bio->bi_io_vec->bv_len = len;
 	bio->bi_size = len;
 	bio->bi_vcnt = 1;
 	bio->bi_phys_segments = 1;
 	rq->__data_len = rq->resid_len = len;
 	rq->nr_phys_segments = 1;
 	rq->buffer = bio_data(bio);
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 				   struct bio *bio)
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	if (!ll_back_merge_fn(q, req, bio))
 		return false;
 	trace_block_bio_backmerge(q, bio);
 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 		blk_rq_set_mixed_merge(req);
 	req->biotail->bi_next = bio;
 	req->biotail = bio;
 	req->__data_len += bio->bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 	drive_stat_acct(req, 0);
 	elv_bio_merged(q, req, bio);
 	return true;
 }
 static bool bio_attempt_front_merge(struct request_queue *q,
 				    struct request *req, struct bio *bio)
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	if (!ll_front_merge_fn(q, req, bio))
 		return false;
 	trace_block_bio_frontmerge(q, bio);
 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 		blk_rq_set_mixed_merge(req);
 	bio->bi_next = req->bio;
 	req->bio = bio;
 	/*
 	 * may not be valid. if the low level driver said
 	 * it didn't need a bounce buffer then it better
 	 * not touch req->buffer either...
 	 */
 	req->buffer = bio_data(bio);
 	req->__sector = bio->bi_sector;
 	req->__data_len += bio->bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 	drive_stat_acct(req, 0);
 	elv_bio_merged(q, req, bio);
 	return true;
 }
 /*
  * Attempts to merge with the plugged list in the current process. Returns
  * true if merge was successful, otherwise false.
  */
 static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
 			       struct bio *bio)
 {
 	struct blk_plug *plug;
 	struct request *rq;
 	bool ret = false;
 	plug = tsk->plug;
 	if (!plug)
 		goto out;
 	list_for_each_entry_reverse(rq, &plug->list, queuelist) {
 		int el_ret;
 		if (rq->q != q)
 			continue;
 		el_ret = elv_try_merge(rq, bio);
 		if (el_ret == ELEVATOR_BACK_MERGE) {
 			ret = bio_attempt_back_merge(q, rq, bio);
 			if (ret)
 				break;
 		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 			ret = bio_attempt_front_merge(q, rq, bio);
 			if (ret)
 				break;
 		}
 	}
 out:
 	return ret;
 }
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 	req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
 	if (bio->bi_rw & REQ_RAHEAD)
 		req->cmd_flags |= REQ_FAILFAST_MASK;
 	req->errors = 0;
 	req->__sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
 	int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
 	struct request *req;
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
 	 * ISA dma in theory)
 	 */
 	blk_queue_bounce(q, &bio);
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
 		spin_lock_irq(q->queue_lock);
 		where = ELEVATOR_INSERT_FLUSH;
 		goto get_rq;
 	}
 	/*
 	 * Check if we can merge with the plugged list before grabbing
 	 * any locks.
 	 */
 	if (attempt_plug_merge(current, q, bio))
 		goto out;
 	spin_lock_irq(q->queue_lock);
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
 		if (bio_attempt_back_merge(q, req, bio)) {
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 		if (bio_attempt_front_merge(q, req, bio)) {
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	}
 get_rq:
 	/*
 	 * This sync check and mask will be re-done in init_request_from_bio(),
 	 * but we need to set it earlier to expose the sync flag to the
 	 * rq allocator and io schedulers.
 	 */
 	rw_flags = bio_data_dir(bio);
 	if (sync)
 		rw_flags |= REQ_SYNC;
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
 	req = get_request_wait(q, rw_flags, bio);
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
 	 * may now be mergeable after it had proven unmergeable (above).
 	 * We don't worry about that case for efficiency. It won't happen
 	 * often, and the elevators are able to handle it.
 	 */
 	init_request_from_bio(req, bio);
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
 	    bio_flagged(bio, BIO_CPU_AFFINE)) {
 		req->cpu = blk_cpu_to_group(get_cpu());
 		put_cpu();
 	}
 	plug = current->plug;
 	if (plug) {
 		/*
 		 * If this is the first request added after a plug, fire
 		 * of a plug trace. If others have been added before, check
 		 * if we have multiple devices in this plug. If so, make a
 		 * note to sort the list before dispatch.
 		 */
 		if (list_empty(&plug->list))
 			trace_block_plug(q);
 		else if (!plug->should_sort) {
 			struct request *__rq;
 			__rq = list_entry_rq(plug->list.prev);
 			if (__rq->q != q)
 				plug->should_sort = 1;
 		}
 		list_add_tail(&req->queuelist, &plug->list);
+		plug->count++;
 		drive_stat_acct(req, 1);
+		if (plug->count >= BLK_MAX_REQUEST_COUNT)
+			blk_flush_plug_list(plug, false);
 	} else {
 		spin_lock_irq(q->queue_lock);
 		add_acct_request(q, req, where);
 		__blk_run_queue(q);
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
 out:
 	return 0;
 }
 /*
  * If bio->bi_dev is a partition, remap the location
  */
 static inline void blk_partition_remap(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
 				      bdev->bd_dev,
 				      bio->bi_sector - p->start_sect);
 	}
 }
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
 	printk(KERN_INFO "attempt to access beyond end of device\n");
 	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
 			bdevname(bio->bi_bdev, b),
 			bio->bi_rw,
 			(unsigned long long)bio->bi_sector + bio_sectors(bio),
 			(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static DECLARE_FAULT_ATTR(fail_make_request);
 static int __init setup_fail_make_request(char *str)
 {
 	return setup_fault_attr(&fail_make_request, str);
 }
 __setup("fail_make_request=", setup_fail_make_request);
 static int should_fail_request(struct bio *bio)
 {
 	struct hd_struct *part = bio->bi_bdev->bd_part;
 	if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
 		return should_fail(&fail_make_request, bio->bi_size);
 	return 0;
 }
 static int __init fail_make_request_debugfs(void)
 {
 	return init_fault_attr_dentries(&fail_make_request,
 					"fail_make_request");
 }
 late_initcall(fail_make_request_debugfs);
 #else /* CONFIG_FAIL_MAKE_REQUEST */
 static inline int should_fail_request(struct bio *bio)
 {
 	return 0;
 }
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 /*
  * Check whether this bio extends beyond the end of the device.
  */
 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
 {
 	sector_t maxsector;
 	if (!nr_sectors)
 		return 0;
 	/* Test device or partition size, when known. */
 	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
 	if (maxsector) {
 		sector_t sector = bio->bi_sector;
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
 			/*
 			 * This may well happen - the kernel calls bread()
 			 * without checking the size of the device, e.g., when
 			 * mounting a device.
 			 */
 			handle_bad_sector(bio);
 			return 1;
 		}
 	}
 	return 0;
 }
 /**
  * generic_make_request - hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
  * devices. It is passed a &struct bio, which describes the I/O that needs
  * to be done.
  *
  * generic_make_request() does not return any status.  The
  * success/failure status of the request, along with notification of
  * completion, is delivered asynchronously through the bio->bi_end_io
  * function described (one day) else where.
  *
  * The caller of generic_make_request must make sure that bi_io_vec
  * are set to describe the memory buffer, and that bi_dev and bi_sector are
  * set to describe the device address, and the
  * bi_end_io and optionally bi_private are set to describe how
  * completion notification should be signaled.
  *
  * generic_make_request and the drivers it calls may use bi_next if this
  * bio happens to be merged with someone else, and may change bi_dev and
  * bi_sector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
  */
 static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
 	int err = -EIO;
 	might_sleep();
 	if (bio_check_eod(bio, nr_sectors))
 		goto end_io;
 	/*
 	 * Resolve the mapping until finished. (drivers are
 	 * still free to implement/resolve their own stacking
 	 * by explicitly returning 0)
 	 *
 	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	old_sector = -1;
 	old_dev = 0;
 	do {
 		char b[BDEVNAME_SIZE];
 		q = bdev_get_queue(bio->bi_bdev);
 		if (unlikely(!q)) {
 			printk(KERN_ERR
 			       "generic_make_request: Trying to access "
 				"nonexistent block-device %s (%Lu)\n",
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 			goto end_io;
 		}
 		if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
 			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
 			       queue_max_hw_sectors(q));
 			goto end_io;
 		}
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 		if (should_fail_request(bio))
 			goto end_io;
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
 		 */
 		blk_partition_remap(bio);
 		if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
 			goto end_io;
 		if (old_sector != -1)
 			trace_block_bio_remap(q, bio, old_dev, old_sector);
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 		/*
 		 * Filter flush bio's early so that make_request based
 		 * drivers without flush support don't have to worry
 		 * about them.
 		 */
 		if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
 			bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
 			if (!nr_sectors) {
 				err = 0;
 				goto end_io;
 			}
 		}
 		if ((bio->bi_rw & REQ_DISCARD) &&
 		    (!blk_queue_discard(q) ||
 		     ((bio->bi_rw & REQ_SECURE) &&
 		      !blk_queue_secdiscard(q)))) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
 		if (blk_throtl_bio(q, &bio))
 			goto end_io;
 		/*
 		 * If bio = NULL, bio has been throttled and will be submitted
 		 * later.
 		 */
 		if (!bio)
 			break;
 		trace_block_bio_queue(q, bio);
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
 	return;
 end_io:
 	bio_endio(bio, err);
 }
 /*
  * We only want one ->make_request_fn to be active at a time,
  * else stack usage with stacked devices could be a problem.
  * So use current->bio_list to keep a list of requests
  * submited by a make_request_fn function.
  * current->bio_list is also used as a flag to say if
  * generic_make_request is currently active in this task or not.
  * If it is NULL, then no make_request is active.  If it is non-NULL,
  * then a make_request is active, and new requests should be added
  * at the tail
  */
 void generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
 	if (current->bio_list) {
 		/* make_request is active */
 		bio_list_add(current->bio_list, bio);
 		return;
 	}
 	/* following loop may be a bit non-obvious, and so deserves some
 	 * explanation.
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
 	 * ensure that) so we have a list with a single bio.
 	 * We pretend that we have just taken it off a longer list, so
 	 * we assign bio_list to a pointer to the bio_list_on_stack,
 	 * thus initialising the bio_list of new bios to be
 	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
 	 * of the top of the list (no pretending) and so remove it from
 	 * bio_list, and call into __generic_make_request again.
 	 *
 	 * The loop was structured like this to make only one call to
 	 * __generic_make_request (which is important as it is large and
 	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
 	bio_list_init(&bio_list_on_stack);
 	current->bio_list = &bio_list_on_stack;
 	do {
 		__generic_make_request(bio);
 		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 }
 EXPORT_SYMBOL(generic_make_request);
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 	bio->bi_rw |= rw;
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
 	if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
 			task_io_account_read(bio->bi_size);
 			count_vm_events(PGPGIN, count);
 		}
 		if (unlikely(block_dump)) {
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
 			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_sector,
 				bdevname(bio->bi_bdev, b),
 				count);
 		}
 	}
 	generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 /**
  * blk_rq_check_limits - Helper function to check a request for the queue limit
  * @q:  the queue
  * @rq: the request being checked
  *
  * Description:
  *    @rq may have been made based on weaker limitations of upper-level queues
  *    in request stacking drivers, and it may violate the limitation of @q.
  *    Since the block layer and the underlying device driver trust @rq
  *    after it is inserted to @q, it should be checked against @q before
  *    the insertion using this generic function.
  *
  *    This function should also be useful for request stacking drivers
  *    in some cases below, so export this function.
  *    Request stacking drivers like request-based dm may change the queue
  *    limits while requests are in the queue (e.g. dm's table swapping).
  *    Such request stacking drivers should check those requests agaist
  *    the new queue limits again when they dispatch those requests,
  *    although such checkings are also done against the old queue limits
  *    when submitting requests.
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
 	if (rq->cmd_flags & REQ_DISCARD)
 		return 0;
 	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
 	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
 	/*
 	 * queue's settings related to segment counting like q->bounce_pfn
 	 * may differ from that of other stacking queues.
 	 * Recalculate it to check the request correctly on this queue's
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
 	if (rq->nr_phys_segments > queue_max_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_rq_check_limits);
 /**
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	if (blk_rq_check_limits(q, rq))
 		return -EIO;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
 	    should_fail(&fail_make_request, blk_rq_bytes(rq)))
 		return -EIO;
 #endif
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * Submitting request must be dequeued before calling this function
 	 * because it will be linked to another request_queue
 	 */
 	BUG_ON(blk_queued_rq(rq));
 	add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 /**
  * blk_rq_err_bytes - determine number of bytes till the next failure boundary
  * @rq: request to examine
  *
  * Description:
  *     A request could be merge of IOs which require different failure
  *     handling.  This function determines the number of bytes which
  *     can be failed from the beginning of the request without
  *     crossing into area which need to be retried further.
  *
  * Return:
  *     The number of bytes to fail.
  *
  * Context:
  *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
 	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
 	unsigned int bytes = 0;
 	struct bio *bio;
 	if (!(rq->cmd_flags & REQ_MIXED_MERGE))
 		return blk_rq_bytes(rq);
 	/*
 	 * Currently the only 'mixing' which can happen is between
 	 * different fastfail types.  We can safely fail portions
 	 * which have all the failfast bits that the first one has -
 	 * the ones which are at least as eager to fail as the first
 	 * one.
 	 */
 	for (bio = rq->bio; bio; bio = bio->bi_next) {
 		if ((bio->bi_rw & ff) != ff)
 			break;
 		bytes += bio->bi_size;
 	}
 	/* this could lead to infinite loop */
 	BUG_ON(blk_rq_bytes(rq) && !bytes);
 	return bytes;
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = req->part;
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
 }
 static void blk_account_io_done(struct request *req)
 {
 	/*
 	 * Account IO completion.  flush_rq isn't accounted as a
 	 * normal IO on queueing nor completion.  Accounting the
 	 * containing request is enough.
 	 */
 	if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = req->part;
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part, rw);
 		hd_struct_put(part);
 		part_stat_unlock();
 	}
 }
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
  *
  * Description:
  *     Return the request at the top of @q.  The returned request
  *     should be started using blk_start_request() before LLD starts
  *     processing it.
  *
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
  *
  * Context:
  *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
 	while ((rq = __elv_next_request(q)) != NULL) {
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
 			 * sees this request (possibly after
 			 * requeueing).  Notify IO scheduler.
 			 */
 			if (rq->cmd_flags & REQ_SORTED)
 				elv_activate_rq(q, rq);
 			/*
 			 * just mark as started even if we don't start
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 */
 			rq->cmd_flags |= REQ_STARTED;
 			trace_block_rq_issue(q, rq);
 		}
 		if (!q->boundary_rq || q->boundary_rq == rq) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = NULL;
 		}
 		if (rq->cmd_flags & REQ_DONTPREP)
 			break;
 		if (q->dma_drain_size && blk_rq_bytes(rq)) {
 			/*
 			 * make sure space for the drain appears we
 			 * know we can do this because max_hw_segments
 			 * has been adjusted to be one fewer than the
 			 * device can handle
 			 */
 			rq->nr_phys_segments++;
 		}
 		if (!q->prep_rq_fn)
 			break;
 		ret = q->prep_rq_fn(q, rq);
 		if (ret == BLKPREP_OK) {
 			break;
 		} else if (ret == BLKPREP_DEFER) {
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
 			 * avoid resource deadlock.  REQ_STARTED will
 			 * prevent other fs requests from passing this one.
 			 */
 			if (q->dma_drain_size && blk_rq_bytes(rq) &&
 			    !(rq->cmd_flags & REQ_DONTPREP)) {
 				/*
 				 * remove the space for the drain we added
 				 * so that we don't add it again
 				 */
 				--rq->nr_phys_segments;
 			}
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
 			__blk_end_request_all(rq, -EIO);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
 		}
 	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_peek_request);
 void blk_dequeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	BUG_ON(list_empty(&rq->queuelist));
 	BUG_ON(ELV_ON_HASH(rq));
 	list_del_init(&rq->queuelist);
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]++;
 		set_io_start_time_ns(rq);
 	}
 }
 /**
  * blk_start_request - start request processing on the driver
  * @req: request to dequeue
  *
  * Description:
  *     Dequeue @req and start timeout timer on it.  This hands off the
  *     request to the driver.
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
  *
  * Context:
  *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
 	blk_dequeue_request(req);
 	/*
 	 * We are now handing the request to the hardware, initialize
 	 * resid_len to full count and add the timeout handler.
 	 */
 	req->resid_len = blk_rq_bytes(req);
 	if (unlikely(blk_bidi_rq(req)))
 		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
 	blk_add_timer(req);
 }
 EXPORT_SYMBOL(blk_start_request);
 /**
  * blk_fetch_request - fetch a request from a request queue
  * @q: request queue to fetch a request from
  *
  * Description:
  *     Return the request at the top of @q.  The request is started on
  *     return and LLD can start processing it immediately.
  *
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
  *
  * Context:
  *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
 	struct request *rq;
 	rq = blk_peek_request(q);
 	if (rq)
 		blk_start_request(rq);
 	return rq;
 }
 EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @req, but doesn't complete
  *     the request structure even if @req doesn't have leftover.
  *     If @req has leftover, sets it up for the next range of segments.
  *
  *     This special helper function is only for request stacking drivers
  *     (e.g. request-based dm) so that they can handle partial completion.
  *     Actual device drivers should use blk_end_request instead.
  *
  *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
  *     %false return from this function.
  *
  * Return:
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 	if (!req->bio)
 		return false;
 	trace_block_rq_complete(req->q, req);
 	/*
 	 * For fs requests, rq is just carrier of independent bio's
 	 * and each partial completion should be handled separately.
 	 * Reset per-request error on each partial completion.
 	 *
 	 * TODO: tj: This is too subtle.  It would be better to let
 	 * low level drivers do what they see fit.
 	 */
 	if (req->cmd_type == REQ_TYPE_FS)
 		req->errors = 0;
 	if (error && req->cmd_type == REQ_TYPE_FS &&
 	    !(req->cmd_flags & REQ_QUIET)) {
 		char *error_type;
 		switch (error) {
 		case -ENOLINK:
 			error_type = "recoverable transport";
 			break;
 		case -EREMOTEIO:
 			error_type = "critical target";
 			break;
 		case -EBADE:
 			error_type = "critical nexus";
 			break;
 		case -EIO:
 		default:
 			error_type = "I/O";
 			break;
 		}
 		printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
 		       error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
 		       (unsigned long long)blk_rq_pos(req));
 	}
 	blk_account_io_completion(req, nr_bytes);
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
 			req_bio_endio(req, bio, nbytes, error);
 			next_idx = 0;
 			bio_nbytes = 0;
 		} else {
 			int idx = bio->bi_idx + next_idx;
 			if (unlikely(idx >= bio->bi_vcnt)) {
 				blk_dump_rq_flags(req, "__end_that");
 				printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
 				       __func__, idx, bio->bi_vcnt);
 				break;
 			}
 			nbytes = bio_iovec_idx(bio, idx)->bv_len;
 			BIO_BUG_ON(nbytes > bio->bi_size);
 			/*
 			 * not a complete bvec done
 			 */
 			if (unlikely(nbytes > nr_bytes)) {
 				bio_nbytes += nr_bytes;
 				total_bytes += nr_bytes;
 				break;
 			}
 			/*
 			 * advance to the next vector
 			 */
 			next_idx++;
 			bio_nbytes += nbytes;
 		}
 		total_bytes += nbytes;
 		nr_bytes -= nbytes;
 		bio = req->bio;
 		if (bio) {
 			/*
 			 * end more in this run, or just return 'not-done'
 			 */
 			if (unlikely(nr_bytes <= 0))
 				break;
 		}
 	}
 	/*
 	 * completely done
 	 */
 	if (!req->bio) {
 		/*
 		 * Reset counters so that the request stacking driver
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
 		req->__data_len = 0;
 		return false;
 	}
 	/*
 	 * if the request wasn't completed, update state
 	 */
 	if (bio_nbytes) {
 		req_bio_endio(req, bio, bio_nbytes, error);
 		bio->bi_idx += next_idx;
 		bio_iovec(bio)->bv_offset += nr_bytes;
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 	req->__data_len -= total_bytes;
 	req->buffer = bio_data(req->bio);
 	/* update sector only for requests with clear definition of sector */
 	if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
 		req->__sector += total_bytes >> 9;
 	/* mixed attributes always follow the first bio */
 	if (req->cmd_flags & REQ_MIXED_MERGE) {
 		req->cmd_flags &= ~REQ_FAILFAST_MASK;
 		req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
 	}
 	/*
 	 * If total number of sectors is less than the first segment
 	 * size, something has gone terribly wrong.
 	 */
 	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
 		blk_dump_rq_flags(req, "request botched");
 		req->__data_len = blk_rq_cur_bytes(req);
 	}
 	/* recalculate the number of segments */
 	blk_recalc_rq_segments(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 static bool blk_update_bidi_request(struct request *rq, int error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
 	if (blk_update_request(rq, error, nr_bytes))
 		return true;
 	/* Bidi request must be completed as a whole */
 	if (unlikely(blk_bidi_rq(rq)) &&
 	    blk_update_request(rq->next_rq, error, bidi_bytes))
 		return true;
 	if (blk_queue_add_random(rq->q))
 		add_disk_randomness(rq->rq_disk);
 	return false;
 }
 /**
  * blk_unprep_request - unprepare a request
  * @req:	the request
  *
  * This function makes a request ready for complete resubmission (or
  * completion).  It happens only after all error handling is complete,
  * so represents the appropriate moment to deallocate any resources
  * that were allocated to the request in the prep_rq_fn.  The queue
  * lock is held when calling this.
  */
 void blk_unprep_request(struct request *req)
 {
 	struct request_queue *q = req->q;
 	req->cmd_flags &= ~REQ_DONTPREP;
 	if (q->unprep_rq_fn)
 		q->unprep_rq_fn(q, req);
 }
 EXPORT_SYMBOL_GPL(blk_unprep_request);
 /*
  * queue lock must be held
  */
 static void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 	BUG_ON(blk_queued_rq(req));
 	if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
 		laptop_io_completion(&req->q->backing_dev_info);
 	blk_delete_timer(req);
 	if (req->cmd_flags & REQ_DONTPREP)
 		blk_unprep_request(req);
 	blk_account_io_done(req);
 	if (req->end_io)
 		req->end_io(req, error);
 	else {
 		if (blk_bidi_rq(req))
 			__blk_put_request(req->next_rq->q, req->next_rq);
 		__blk_put_request(req->q, req);
 	}
 }
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *     Drivers that supports bidi can safely call this member for any
  *     type of request, bidi or uni.  In the later case @bidi_bytes is
  *     just ignored.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 static bool blk_end_bidi_request(struct request *rq, int error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_finish_request(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return false;
 }
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Identical to blk_end_bidi_request() except that queue lock is
  *     assumed to be locked on entry and remains so on return.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 static bool __blk_end_bidi_request(struct request *rq, int error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 	blk_finish_request(rq, error);
 	return false;
 }
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq.
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.
  */
 void blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 EXPORT_SYMBOL(blk_end_request_all);
 /**
  * blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool blk_end_request_cur(struct request *rq, int error)
 {
 	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 EXPORT_SYMBOL(blk_end_request_cur);
 /**
  * blk_end_request_err - Finish a request till the next failure boundary.
  * @rq: the request to finish till the next failure boundary for
  * @error: must be negative errno
  *
  * Description:
  *     Complete @rq till the next failure boundary.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool blk_end_request_err(struct request *rq, int error)
 {
 	WARN_ON(error >= 0);
 	return blk_end_request(rq, error, blk_rq_err_bytes(rq));
 }
 EXPORT_SYMBOL_GPL(blk_end_request_err);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
 void __blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
  *     be called with queue lock held.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool __blk_end_request_cur(struct request *rq, int error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 EXPORT_SYMBOL(__blk_end_request_cur);
 /**
  * __blk_end_request_err - Finish a request till the next failure boundary.
  * @rq: the request to finish till the next failure boundary for
  * @error: must be negative errno
  *
  * Description:
  *     Complete @rq till the next failure boundary.  Must be called
  *     with queue lock held.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool __blk_end_request_err(struct request *rq, int error)
 {
 	WARN_ON(error >= 0);
 	return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
 }
 EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
 	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
 	rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
 	if (bio_has_data(bio)) {
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
 	rq->__data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 	if (bio->bi_bdev)
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 /**
  * rq_flush_dcache_pages - Helper function to flush all pages in a request
  * @rq: the request to be flushed
  *
  * Description:
  *     Flush all pages in @rq.
  */
 void rq_flush_dcache_pages(struct request *rq)
 {
 	struct req_iterator iter;
 	struct bio_vec *bvec;
 	rq_for_each_segment(bvec, rq, iter)
 		flush_dcache_page(bvec->bv_page);
 }
 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
 #endif
 /**
  * blk_lld_busy - Check if underlying low-level drivers of a device are busy
  * @q : the queue of the device being checked
  *
  * Description:
  *    Check if underlying low-level drivers of a device are busy.
  *    If the drivers want to export their busy state, they must set own
  *    exporting function using blk_queue_lld_busy() first.
  *
  *    Basically, this function is used only by request stacking drivers
  *    to stop dispatching requests to underlying devices when underlying
  *    devices are busy.  This behavior helps more I/O merging on the queue
  *    of the request stacking driver and prevents I/O throughput regression
  *    on burst I/O load.
  *
  * Return:
  *    0 - Not busy (The request stacking driver should dispatch request)
  *    1 - Busy (The request stacking driver should stop dispatching request)
  */
 int blk_lld_busy(struct request_queue *q)
 {
 	if (q->lld_busy_fn)
 		return q->lld_busy_fn(q);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_lld_busy);
 /**
  * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
  * @rq: the clone request to be cleaned up
  *
  * Description:
  *     Free all bios in @rq for a cloned request.
  */
 void blk_rq_unprep_clone(struct request *rq)
 {
 	struct bio *bio;
 	while ((bio = rq->bio) != NULL) {
 		rq->bio = bio->bi_next;
 		bio_put(bio);
 	}
 }
 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 /*
  * Copy attributes of the original request to the clone request.
  * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
  */
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
 	dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
 	dst->nr_phys_segments = src->nr_phys_segments;
 	dst->ioprio = src->ioprio;
 	dst->extra_len = src->extra_len;
 }
 /**
  * blk_rq_prep_clone - Helper function to setup clone request
  * @rq: the request to be setup
  * @rq_src: original request to be cloned
  * @bs: bio_set that bios for clone are allocated from
  * @gfp_mask: memory allocation mask for bio
  * @bio_ctr: setup function to be called for each clone bio.
  *           Returns %0 for success, non %0 for failure.
  * @data: private data to be passed to @bio_ctr
  *
  * Description:
  *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
  *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
  *     are not copied, and copying such parts is the caller's responsibility.
  *     Also, pages which the original bios are pointing to are not copied
  *     and the cloned bios just point same pages.
  *     So cloned bios must be completed before original bios, which means
  *     the caller must complete @rq before @rq_src.
  */
 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		      struct bio_set *bs, gfp_t gfp_mask,
 		      int (*bio_ctr)(struct bio *, struct bio *, void *),
 		      void *data)
 {
 	struct bio *bio, *bio_src;
 	if (!bs)
 		bs = fs_bio_set;
 	blk_rq_init(NULL, rq);
 	__rq_for_each_bio(bio_src, rq_src) {
 		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
 		if (!bio)
 			goto free_and_out;
 		__bio_clone(bio, bio_src);
 		if (bio_integrity(bio_src) &&
 		    bio_integrity_clone(bio, bio_src, gfp_mask, bs))
 			goto free_and_out;
 		if (bio_ctr && bio_ctr(bio, bio_src, data))
 			goto free_and_out;
 		if (rq->bio) {
 			rq->biotail->bi_next = bio;
 			rq->biotail = bio;
 		} else
 			rq->bio = rq->biotail = bio;
 	}
 	__blk_rq_prep_clone(rq, rq_src);
 	return 0;
 free_and_out:
 	if (bio)
 		bio_free(bio, bs);
 	blk_rq_unprep_clone(rq);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 int kblockd_schedule_delayed_work(struct request_queue *q,
 			struct delayed_work *dwork, unsigned long delay)
 {
 	return queue_delayed_work(kblockd_workqueue, dwork, delay);
 }
 EXPORT_SYMBOL(kblockd_schedule_delayed_work);
 #define PLUG_MAGIC	0x91827364
 void blk_start_plug(struct blk_plug *plug)
 {
 	struct task_struct *tsk = current;
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->cb_list);
 	plug->should_sort = 0;
+	plug->count = 0;
 	/*
 	 * If this is a nested plug, don't actually assign it. It will be
 	 * flushed on its own.
 	 */
 	if (!tsk->plug) {
 		/*
 		 * Store ordering should not be needed here, since a potential
 		 * preempt will imply a full memory barrier
 		 */
 		tsk->plug = plug;
 	}
 }
 EXPORT_SYMBOL(blk_start_plug);
 static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
 	struct request *rqa = container_of(a, struct request, queuelist);
 	struct request *rqb = container_of(b, struct request, queuelist);
 	return !(rqa->q <= rqb->q);
 }
 /*
  * If 'from_schedule' is true, then postpone the dispatch of requests
  * until a safe kblockd context. We due this to avoid accidental big
  * additional stack usage in driver dispatch, in places where the originally
  * plugger did not intend it.
  */
 static void queue_unplugged(struct request_queue *q, unsigned int depth,
 			    bool from_schedule)
 	__releases(q->queue_lock)
 {
 	trace_block_unplug(q, depth, !from_schedule);
 	/*
 	 * If we are punting this to kblockd, then we can safely drop
 	 * the queue_lock before waking kblockd (which needs to take
 	 * this lock).
 	 */
 	if (from_schedule) {
 		spin_unlock(q->queue_lock);
 		blk_run_queue_async(q);
 	} else {
 		__blk_run_queue(q);
 		spin_unlock(q->queue_lock);
 	}
 }
 static void flush_plug_callbacks(struct blk_plug *plug)
 {
 	LIST_HEAD(callbacks);
 	if (list_empty(&plug->cb_list))
 		return;
 	list_splice_init(&plug->cb_list, &callbacks);
 	while (!list_empty(&callbacks)) {
 		struct blk_plug_cb *cb = list_first_entry(&callbacks,
 							  struct blk_plug_cb,
 							  list);
 		list_del(&cb->list);
 		cb->callback(cb);
 	}
 }
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
 	unsigned long flags;
 	struct request *rq;
 	LIST_HEAD(list);
 	unsigned int depth;
 	BUG_ON(plug->magic != PLUG_MAGIC);
 	flush_plug_callbacks(plug);
 	if (list_empty(&plug->list))
 		return;
 	list_splice_init(&plug->list, &list);
+	plug->count = 0;
 	if (plug->should_sort) {
 		list_sort(NULL, &list, plug_rq_cmp);
 		plug->should_sort = 0;
 	}
 	q = NULL;
 	depth = 0;
 	/*
 	 * Save and disable interrupts here, to avoid doing it for every
 	 * queue lock we have to take.
 	 */
 	local_irq_save(flags);
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			/*
 			 * This drops the queue lock
 			 */
 			if (q)
 				queue_unplugged(q, depth, from_schedule);
 			q = rq->q;
 			depth = 0;
 			spin_lock(q->queue_lock);
 		}
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
 		if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
 			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
 		else
 			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
 		depth++;
 	}
 	/*
 	 * This drops the queue lock
 	 */
 	if (q)
 		queue_unplugged(q, depth, from_schedule);
 	local_irq_restore(flags);
 }
 void blk_finish_plug(struct blk_plug *plug)
 {
 	blk_flush_plug_list(plug, false);
 	if (plug == current->plug)
 		current->plug = NULL;
 }
 EXPORT_SYMBOL(blk_finish_plug);
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
 			sizeof(((struct request *)0)->cmd_flags));
 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
 	kblockd_workqueue = alloc_workqueue("kblockd",
 					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 	request_cachep = kmem_cache_create("blkdev_requests",
 			sizeof(struct request), 0, SLAB_PANIC, NULL);
 	blk_requestq_cachep = kmem_cache_create("blkdev_queue",
 			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 	return 0;
 }

 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 #ifdef CONFIG_BLOCK
 #include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include <linux/gfp.h>
 #include <linux/bsg.h>
 #include <linux/smp.h>
 #include <asm/scatterlist.h>
 struct scsi_ioctl_command;
 struct request_queue;
 struct elevator_queue;
 struct request_pm_state;
 struct blk_trace;
 struct request;
 struct sg_io_hdr;
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);
 struct request_list {
 	/*
 	 * count[], starved[], and wait[] are indexed by
 	 * BLK_RW_SYNC/BLK_RW_ASYNC
 	 */
 	int count[2];
 	int starved[2];
 	int elvpriv;
 	mempool_t *rq_pool;
 	wait_queue_head_t wait[2];
 };
 /*
  * request command types
  */
 enum rq_cmd_type_bits {
 	REQ_TYPE_FS		= 1,	/* fs request */
 	REQ_TYPE_BLOCK_PC,		/* scsi command */
 	REQ_TYPE_SENSE,			/* sense request */
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
 	REQ_TYPE_SPECIAL,		/* driver defined type */
 	/*
 	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
 	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
 	 * private REQ_LB opcodes to differentiate what type of request this is
 	 */
 	REQ_TYPE_ATA_TASKFILE,
 	REQ_TYPE_ATA_PC,
 };
 #define BLK_MAX_CDB	16
 /*
  * try to put the fields that are referenced together in the same cacheline.
  * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
  * as well!
  */
 struct request {
 	struct list_head queuelist;
 	struct call_single_data csd;
 	struct request_queue *q;
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 	int cpu;
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
 	sector_t __sector;		/* sector cursor */
 	struct bio *bio;
 	struct bio *biotail;
 	struct hlist_node hash;	/* merge hash */
 	/*
 	 * The rb_node is only used inside the io scheduler, requests
 	 * are pruned when moved to the dispatch queue. So let the
 	 * completion_data share space with the rb_node.
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
 		void *completion_data;
 	};
 	/*
 	 * Three pointers are available for the IO schedulers, if they need
 	 * more they have to dynamically allocate it.  Flush requests are
 	 * never put on the IO scheduler. So let the flush fields share
 	 * space with the three elevator_private pointers.
 	 */
 	union {
 		void *elevator_private[3];
 		struct {
 			unsigned int		seq;
 			struct list_head	list;
 		} flush;
 	};
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
 	unsigned long start_time;
 #ifdef CONFIG_BLK_CGROUP
 	unsigned long long start_time_ns;
 	unsigned long long io_start_time_ns;    /* when passed to hardware */
 #endif
 	/* Number of scatter-gather DMA addr+len pairs after
 	 * physical address coalescing is performed.
 	 */
 	unsigned short nr_phys_segments;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	unsigned short nr_integrity_segments;
 #endif
 	unsigned short ioprio;
 	int ref_count;
 	void *special;		/* opaque pointer available for LLD use */
 	char *buffer;		/* kaddr of the current segment if available */
 	int tag;
 	int errors;
 	/*
 	 * when request is used as a packet command carrier
 	 */
 	unsigned char __cmd[BLK_MAX_CDB];
 	unsigned char *cmd;
 	unsigned short cmd_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
 	unsigned int resid_len;	/* residual count */
 	void *sense;
 	unsigned long deadline;
 	struct list_head timeout_list;
 	unsigned int timeout;
 	int retries;
 	/*
 	 * completion callback.
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
 	/* for bidi */
 	struct request *next_rq;
 };
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
 }
 /*
  * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
 {
 	/* PM state machine step value, currently driver specific */
 	int	pm_step;
 	/* requested PM state value (S1, S2, S3, S4, ...) */
 	u32	pm_state;
 	void*	data;		/* for driver use */
 };
 #include <linux/elevator.h>
 typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
 struct bio_vec;
 struct bvec_merge_data {
 	struct block_device *bi_bdev;
 	sector_t bi_sector;
 	unsigned bi_size;
 	unsigned long bi_rw;
 };
 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
 			     struct bio_vec *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
 	BLK_EH_HANDLED,
 	BLK_EH_RESET_TIMER,
 };
 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
 enum blk_queue_state {
 	Queue_down,
 	Queue_up,
 };
 struct blk_queue_tag {
 	struct request **tag_index;	/* map of busy tags */
 	unsigned long *tag_map;		/* bit map of free/busy tags */
 	int busy;			/* current depth */
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
 };
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
 	unsigned int		max_hw_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
 	unsigned int		physical_block_size;
 	unsigned int		alignment_offset;
 	unsigned int		io_min;
 	unsigned int		io_opt;
 	unsigned int		max_discard_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
 	unsigned short		logical_block_size;
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		cluster;
 	unsigned char		discard_zeroes_data;
 };
 struct request_queue
 {
 	/*
 	 * Together with queue_head for cacheline sharing
 	 */
 	struct list_head	queue_head;
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
 	/*
 	 * the queue request freelist, one for reads and one for writes
 	 */
 	struct request_list	rq;
 	request_fn_proc		*request_fn;
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unprep_rq_fn		*unprep_rq_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	softirq_done_fn		*softirq_done_fn;
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
 	/*
 	 * Dispatch queue sorting
 	 */
 	sector_t		end_sector;
 	struct request		*boundary_rq;
 	/*
 	 * Delayed queue handling
 	 */
 	struct delayed_work	delay_work;
 	struct backing_dev_info	backing_dev_info;
 	/*
 	 * The queue owner gets to use this for whatever they like.
 	 * ll_rw_blk doesn't touch it.
 	 */
 	void			*queuedata;
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
 	gfp_t			bounce_gfp;
 	/*
 	 * various queue flags, see QUEUE_* below
 	 */
 	unsigned long		queue_flags;
 	/*
 	 * protects queue structures from reentrancy. ->__queue_lock should
 	 * _never_ be used directly, it is queue private. always use
 	 * ->queue_lock.
 	 */
 	spinlock_t		__queue_lock;
 	spinlock_t		*queue_lock;
 	/*
 	 * queue kobject
 	 */
 	struct kobject kobj;
 	/*
 	 * queue settings
 	 */
 	unsigned long		nr_requests;	/* Max # of requests */
 	unsigned int		nr_congestion_on;
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
 	unsigned int		dma_pad_mask;
 	unsigned int		dma_alignment;
 	struct blk_queue_tag	*queue_tags;
 	struct list_head	tag_busy_list;
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 	struct queue_limits	limits;
 	/*
 	 * sg stuff
 	 */
 	unsigned int		sg_timeout;
 	unsigned int		sg_reserved_size;
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
 #endif
 	/*
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
 	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
 	struct list_head	flush_queue[2];
 	struct list_head	flush_data_in_flight;
 	struct request		flush_rq;
 	struct mutex		sysfs_lock;
 #if defined(CONFIG_BLK_DEV_BSG)
 	struct bsg_class_device bsg_dev;
 #endif
 #ifdef CONFIG_BLK_DEV_THROTTLING
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
 };
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
 #define QUEUE_FLAG_ELVSWITCH	6	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	9	/* force complete on same CPU */
 #define QUEUE_FLAG_FAIL_IO     10	/* fake timeout */
 #define QUEUE_FLAG_STACKABLE   11	/* supports request stacking */
 #define QUEUE_FLAG_NONROT      12	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     13	/* do IO stats */
 #define QUEUE_FLAG_DISCARD     14	/* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   15	/* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
 #define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_ADD_RANDOM))
 static inline int queue_is_locked(struct request_queue *q)
 {
 #ifdef CONFIG_SMP
 	spinlock_t *lock = q->queue_lock;
 	return lock && spin_is_locked(lock);
 #else
 	return 1;
 #endif
 }
 static inline void queue_flag_set_unlocked(unsigned int flag,
 					   struct request_queue *q)
 {
 	__set_bit(flag, &q->queue_flags);
 }
 static inline int queue_flag_test_and_clear(unsigned int flag,
 					    struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	if (test_bit(flag, &q->queue_flags)) {
 		__clear_bit(flag, &q->queue_flags);
 		return 1;
 	}
 	return 0;
 }
 static inline int queue_flag_test_and_set(unsigned int flag,
 					  struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	if (!test_bit(flag, &q->queue_flags)) {
 		__set_bit(flag, &q->queue_flags);
 		return 0;
 	}
 	return 1;
 }
 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	__set_bit(flag, &q->queue_flags);
 }
 static inline void queue_flag_clear_unlocked(unsigned int flag,
 					     struct request_queue *q)
 {
 	__clear_bit(flag, &q->queue_flags);
 }
 static inline int queue_in_flight(struct request_queue *q)
 {
 	return q->in_flight[0] + q->in_flight[1];
 }
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	__clear_bit(flag, &q->queue_flags);
 }
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 #define blk_queue_secdiscard(q)	(blk_queue_discard(q) && \
 	test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
 #define blk_account_rq(rq) \
 	(((rq)->cmd_flags & REQ_STARTED) && \
 	 ((rq)->cmd_type == REQ_TYPE_FS || \
 	  ((rq)->cmd_flags & REQ_DISCARD)))
 #define blk_pm_request(rq)	\
 	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
 	 (rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)	(!list_empty(&(rq)->queuelist))
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 static inline unsigned int blk_queue_cluster(struct request_queue *q)
 {
 	return q->limits.cluster;
 }
 /*
  * We regard a request as sync, if either a read or a sync write
  */
 static inline bool rw_is_sync(unsigned int rw_flags)
 {
 	return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC);
 }
 static inline bool rq_is_sync(struct request *rq)
 {
 	return rw_is_sync(rq->cmd_flags);
 }
 static inline int blk_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
 		return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
 	return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
 }
 static inline void blk_set_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
 		queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
 	else
 		queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
 }
 static inline void blk_clear_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
 		queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
 }
 /*
  * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (((rq)->cmd_flags & REQ_DISCARD) || \
 	  (rq)->cmd_type == REQ_TYPE_FS))
 /*
  * q->prep_rq_fn return values
  */
 #define BLKPREP_OK		0	/* serve it */
 #define BLKPREP_KILL		1	/* fatal error, kill */
 #define BLKPREP_DEFER		2	/* leave on queue */
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 /*
  * standard bounce addresses:
  *
  * BLK_BOUNCE_HIGH	: bounce all highmem pages
  * BLK_BOUNCE_ANY	: don't bounce anything
  * BLK_BOUNCE_ISA	: bounce pages above ISA DMA boundary
  */
 #if BITS_PER_LONG == 32
 #define BLK_BOUNCE_HIGH		((u64)blk_max_low_pfn << PAGE_SHIFT)
 #else
 #define BLK_BOUNCE_HIGH		-1ULL
 #endif
 #define BLK_BOUNCE_ANY		(-1ULL)
 #define BLK_BOUNCE_ISA		(DMA_BIT_MASK(24))
 /*
  * default timeout for SG_IO if none specified
  */
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 #ifdef CONFIG_BOUNCE
 extern int init_emergency_isa_pool(void);
 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
 #else
 static inline int init_emergency_isa_pool(void)
 {
 	return 0;
 }
 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 {
 }
 #endif /* CONFIG_MMU */
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
 	int nr_entries;
 	unsigned long offset;
 	int null_mapped;
 	int from_user;
 };
 struct req_iterator {
 	int i;
 	struct bio *bio;
 };
 /* This should not be used directly - use rq_for_each_segment */
 #define for_each_bio(_bio)		\
 	for (; _bio; _bio = _bio->bi_next)
 #define __rq_for_each_bio(_bio, rq)	\
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 #define rq_for_each_segment(bvl, _rq, _iter)			\
 	__rq_for_each_bio(_iter.bio, _rq)			\
 		bio_for_each_segment(bvl, _iter.bio, _iter.i)
 #define rq_iter_last(rq, _iter)					\
 		(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 # error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
 #endif
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 extern void rq_flush_dcache_pages(struct request *rq);
 #else
 static inline void rq_flush_dcache_pages(struct request *rq)
 {
 }
 #endif
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void generic_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
 			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 			     void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			  unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
 static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
 {
 	clear_bdi_congested(&q->backing_dev_info, sync);
 }
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
 static inline void blk_set_queue_congested(struct request_queue *q, int sync)
 {
 	set_bdi_congested(&q->backing_dev_info, sync);
 }
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
 extern int blk_rq_unmap_user(struct bio *);
 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
 			       struct rq_map_data *, struct sg_iovec *, int,
 			       unsigned int, gfp_t);
 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue;
 }
 /*
  * blk_rq_pos()			: the current sector
  * blk_rq_bytes()		: bytes left in the entire request
  * blk_rq_cur_bytes()		: bytes left in the current segment
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
 	return rq->__sector;
 }
 static inline unsigned int blk_rq_bytes(const struct request *rq)
 {
 	return rq->__data_len;
 }
 static inline int blk_rq_cur_bytes(const struct request *rq)
 {
 	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
 }
 extern unsigned int blk_rq_err_bytes(const struct request *rq);
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
 	return blk_rq_bytes(rq) >> 9;
 }
 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 {
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 /*
  * Request issue related functions.
  */
 extern struct request *blk_peek_request(struct request_queue *q);
 extern void blk_start_request(struct request *rq);
 extern struct request *blk_fetch_request(struct request_queue *q);
 /*
  * Request completion related functions.
  *
  * blk_update_request() completes given number of bytes and updates
  * the request without completing it.
  *
  * blk_end_request() and friends.  __blk_end_request() must be called
  * with the request queue spinlock acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
 extern bool blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
 extern bool blk_end_request(struct request *rq, int error,
 			    unsigned int nr_bytes);
 extern void blk_end_request_all(struct request *rq, int error);
 extern bool blk_end_request_cur(struct request *rq, int error);
 extern bool blk_end_request_err(struct request *rq, int error);
 extern bool __blk_end_request(struct request *rq, int error,
 			      unsigned int nr_bytes);
 extern void __blk_end_request_all(struct request *rq, int error);
 extern bool __blk_end_request_cur(struct request *rq, int error);
 extern bool __blk_end_request_err(struct request *rq, int error);
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
 extern void blk_unprep_request(struct request *);
 /*
  * Access functions for manipulating queue properties
  */
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
 extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
 							   request_fn_proc *,
 							   spinlock_t *, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 						      request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
 			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
 extern int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size);
 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
 int blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 struct blk_plug {
 	unsigned long magic;
 	struct list_head list;
 	struct list_head cb_list;
 	unsigned int should_sort;
+	unsigned int count;
 };
+#define BLK_MAX_REQUEST_COUNT 16
 struct blk_plug_cb {
 	struct list_head list;
 	void (*callback)(struct blk_plug_cb *);
 };
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
 extern void blk_flush_plug_list(struct blk_plug *, bool);
 static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 	if (plug)
 		blk_flush_plug_list(plug, false);
 }
 static inline void blk_schedule_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 	if (plug)
 		blk_flush_plug_list(plug, true);
 }
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 	return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
 }
 /*
  * tag stuff
  */
 #define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
 extern void blk_queue_free_tags(struct request_queue *);
 extern int blk_queue_resize_tags(struct request_queue *, int);
 extern void blk_queue_invalidate_tags(struct request_queue *);
 extern struct blk_queue_tag *blk_init_tags(int);
 extern void blk_free_tags(struct blk_queue_tag *);
 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 						int tag)
 {
 	if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
 		return NULL;
 	return bqt->tag_index[tag];
 }
 #define BLKDEV_DISCARD_SECURE  0x01    /* secure discard */
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			sector_t nr_sects, gfp_t gfp_mask);
 static inline int sb_issue_discard(struct super_block *sb, sector_t block,
 		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
 {
 	return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
 				    nr_blocks << (sb->s_blocksize_bits - 9),
 				    gfp_mask, flags);
 }
 static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
 		sector_t nr_blocks, gfp_t gfp_mask)
 {
 	return blkdev_issue_zeroout(sb->s_bdev,
 				    block << (sb->s_blocksize_bits - 9),
 				    nr_blocks << (sb->s_blocksize_bits - 9),
 				    gfp_mask);
 }
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 enum blk_default_limits {
 	BLK_MAX_SEGMENTS	= 128,
 	BLK_SAFE_MAX_SECTORS	= 255,
 	BLK_DEF_MAX_SECTORS	= 1024,
 	BLK_MAX_SEGMENT_SIZE	= 65536,
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 static inline unsigned long queue_bounce_pfn(struct request_queue *q)
 {
 	return q->limits.bounce_pfn;
 }
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
 }
 static inline unsigned int queue_max_sectors(struct request_queue *q)
 {
 	return q->limits.max_sectors;
 }
 static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
 {
 	return q->limits.max_hw_sectors;
 }
 static inline unsigned short queue_max_segments(struct request_queue *q)
 {
 	return q->limits.max_segments;
 }
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
 {
 	return q->limits.max_segment_size;
 }
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 	if (q && q->limits.logical_block_size)
 		retval = q->limits.logical_block_size;
 	return retval;
 }
 static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 {
 	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 static inline unsigned int queue_physical_block_size(struct request_queue *q)
 {
 	return q->limits.physical_block_size;
 }
 static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
 {
 	return queue_physical_block_size(bdev_get_queue(bdev));
 }
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
 	return q->limits.io_min;
 }
 static inline int bdev_io_min(struct block_device *bdev)
 {
 	return queue_io_min(bdev_get_queue(bdev));
 }
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
 static inline int bdev_io_opt(struct block_device *bdev)
 {
 	return queue_io_opt(bdev_get_queue(bdev));
 }
 static inline int queue_alignment_offset(struct request_queue *q)
 {
 	if (q->limits.misaligned)
 		return -1;
 	return q->limits.alignment_offset;
 }
 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
 	unsigned int alignment = (sector << 9) & (granularity - 1);
 	return (granularity + lim->alignment_offset - alignment)
 		& (granularity - 1);
 }
 static inline int bdev_alignment_offset(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	if (q->limits.misaligned)
 		return -1;
 	if (bdev != bdev->bd_contains)
 		return bdev->bd_part->alignment_offset;
 	return q->limits.alignment_offset;
 }
 static inline int queue_discard_alignment(struct request_queue *q)
 {
 	if (q->limits.discard_misaligned)
 		return -1;
 	return q->limits.discard_alignment;
 }
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 	if (!lim->max_discard_sectors)
 		return 0;
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
 		& (lim->discard_granularity - 1);
 }
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
 	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
 		return 1;
 	return 0;
 }
 static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
 {
 	return queue_discard_zeroes_data(bdev_get_queue(bdev));
 }
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
 }
 static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 				 unsigned int len)
 {
 	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
 	return !(addr & alignment) && !(len & alignment);
 }
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
 	unsigned int bits = 8;
 	do {
 		bits++;
 		size >>= 1;
 	} while (size > 256);
 	return bits;
 }
 static inline unsigned int block_size(struct block_device *bdev)
 {
 	return bdev->bd_block_size;
 }
 static inline bool queue_flush_queueable(struct request_queue *q)
 {
 	return !q->flush_not_queueable;
 }
 typedef struct {struct page *v;} Sector;
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
 static inline void put_dev_sector(Sector p)
 {
 	page_cache_release(p.v);
 }
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
 #ifdef CONFIG_BLK_CGROUP
 /*
  * This should not be using sched_clock(). A real patch is in progress
  * to fix this up, until that is in place we need to disable preemption
  * around sched_clock() in this function and set_io_start_time_ns().
  */
 static inline void set_start_time_ns(struct request *req)
 {
 	preempt_disable();
 	req->start_time_ns = sched_clock();
 	preempt_enable();
 }
 static inline void set_io_start_time_ns(struct request *req)
 {
 	preempt_disable();
 	req->io_start_time_ns = sched_clock();
 	preempt_enable();
 }
 static inline uint64_t rq_start_time_ns(struct request *req)
 {
         return req->start_time_ns;
 }
 static inline uint64_t rq_io_start_time_ns(struct request *req)
 {
         return req->io_start_time_ns;
 }
 #else
 static inline void set_start_time_ns(struct request *req) {}
 static inline void set_io_start_time_ns(struct request *req) {}
 static inline uint64_t rq_start_time_ns(struct request *req)
 {
 	return 0;
 }
 static inline uint64_t rq_io_start_time_ns(struct request *req)
 {
 	return 0;
 }
 #endif
 #ifdef CONFIG_BLK_DEV_THROTTLING
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
 extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
 #else /* CONFIG_BLK_DEV_THROTTLING */
 static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
 {
 	return 0;
 }
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 #define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
 #define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
 struct blk_integrity_exchg {
 	void			*prot_buf;
 	void			*data_buf;
 	sector_t		sector;
 	unsigned int		data_size;
 	unsigned short		sector_size;
 	const char		*disk_name;
 };
 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
 struct blk_integrity {
 	integrity_gen_fn	*generate_fn;
 	integrity_vrfy_fn	*verify_fn;
 	integrity_set_tag_fn	*set_tag_fn;
 	integrity_get_tag_fn	*get_tag_fn;
 	unsigned short		flags;
 	unsigned short		tuple_size;
 	unsigned short		sector_size;
 	unsigned short		tag_size;
 	const char		*name;
 	struct kobject		kobj;
 };
 extern bool blk_integrity_is_initialized(struct gendisk *);
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
 extern int blk_integrity_merge_rq(struct request_queue *, struct request *,
 				  struct request *);
 extern int blk_integrity_merge_bio(struct request_queue *, struct request *,
 				   struct bio *);
 static inline
 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
 {
 	return bdev->bd_disk->integrity;
 }
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
 	return disk->integrity;
 }
 static inline int blk_integrity_rq(struct request *rq)
 {
 	if (rq->bio == NULL)
 		return 0;
 	return bio_integrity(rq->bio);
 }
 static inline void blk_queue_max_integrity_segments(struct request_queue *q,
 						    unsigned int segs)
 {
 	q->limits.max_integrity_segments = segs;
 }
 static inline unsigned short
 queue_max_integrity_segments(struct request_queue *q)
 {
 	return q->limits.max_integrity_segments;
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 #define blk_integrity_rq(rq)			(0)
 #define blk_rq_count_integrity_sg(a, b)		(0)
 #define blk_rq_map_integrity_sg(a, b, c)	(0)
 #define bdev_get_integrity(a)			(0)
 #define blk_get_integrity(a)			(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
 #define blk_integrity_unregister(a)		do { } while (0)
 #define blk_queue_max_integrity_segments(a, b)	do { } while (0)
 #define queue_max_integrity_segments(a)		(0)
 #define blk_integrity_merge_rq(a, b, c)		(0)
 #define blk_integrity_merge_bio(a, b, c)	(0)
 #define blk_integrity_is_initialized(a)		(0)
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	int (*release) (struct gendisk *, fmode_t);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*direct_access) (struct block_device *, sector_t,
 						void **, unsigned long *);
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 	/* ->media_changed() is DEPRECATED, use ->check_events() instead */
 	int (*media_changed) (struct gendisk *);
 	void (*unlock_native_capacity) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	/* this callback is with swap_lock and sometimes page table lock held */
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	struct module *owner;
 };
 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 				 unsigned long);
 #else /* CONFIG_BLOCK */
 /*
  * stubs for when the block layer is configured out
  */
 #define buffer_heads_over_limit 0
 static inline long nr_blockdev_pages(void)
 {
 	return 0;
 }
 struct blk_plug {
 };
 static inline void blk_start_plug(struct blk_plug *plug)
 {
 }
 static inline void blk_finish_plug(struct blk_plug *plug)
 {
 }
 static inline void blk_flush_plug(struct task_struct *task)
 {
 }
 static inline void blk_schedule_flush_plug(struct task_struct *task)
 {
 }
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	return false;
 }
 #endif /* CONFIG_BLOCK */
 #endif