Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

3

4

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

7

* - July2000

7

* - July2000

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

9

*/

9

*/

10

11

/*

11

/*

12

* This handles all read/write requests to block devices

12

* This handles all read/write requests to block devices

13

*/

13

*/

14

#include <linux/kernel.h>

14

#include <linux/kernel.h>

15

#include <linux/module.h>

15

#include <linux/module.h>

16

#include <linux/backing-dev.h>

16

#include <linux/backing-dev.h>

17

#include <linux/bio.h>

17

#include <linux/bio.h>

18

#include <linux/blkdev.h>

18

#include <linux/blkdev.h>

19

#include <linux/highmem.h>

19

#include <linux/highmem.h>

20

#include <linux/mm.h>

20

#include <linux/mm.h>

21

#include <linux/kernel_stat.h>

21

#include <linux/kernel_stat.h>

22

#include <linux/string.h>

22

#include <linux/string.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/completion.h>

24

#include <linux/completion.h>

25

#include <linux/slab.h>

25

#include <linux/slab.h>

26

#include <linux/swap.h>

26

#include <linux/swap.h>

27

#include <linux/writeback.h>

27

#include <linux/writeback.h>

28

#include <linux/task_io_accounting_ops.h>

28

#include <linux/task_io_accounting_ops.h>

29

#include <linux/fault-inject.h>

29

#include <linux/fault-inject.h>

30

31

#define CREATE_TRACE_POINTS

31

#define CREATE_TRACE_POINTS

32

#include <trace/events/block.h>

32

#include <trace/events/block.h>

33

34

#include "blk.h"

34

#include "blk.h"

35

36

EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);

36

EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

39

40

static int __make_request(struct request_queue *q, struct bio *bio);

40

static int __make_request(struct request_queue *q, struct bio *bio);

41

42

/*

42

/*

43

* For the allocated request tables

43

* For the allocated request tables

44

*/

44

*/

45

static struct kmem_cache *request_cachep;

45

static struct kmem_cache *request_cachep;

46

47

/*

47

/*

48

* For queue allocation

48

* For queue allocation

49

*/

49

*/

50

struct kmem_cache *blk_requestq_cachep;

50

struct kmem_cache *blk_requestq_cachep;

51

52

/*

52

/*

53

* Controlling structure to kblockd

53

* Controlling structure to kblockd

54

*/

54

*/

55

static struct workqueue_struct *kblockd_workqueue;

55

static struct workqueue_struct *kblockd_workqueue;

56

57

static void drive_stat_acct(struct request *rq, int new_io)

57

static void drive_stat_acct(struct request *rq, int new_io)

58

{

58

{

59

struct hd_struct *part;

59

struct hd_struct *part;

60

int rw = rq_data_dir(rq);

60

int rw = rq_data_dir(rq);

61

int cpu;

61

int cpu;

62

63

if (!blk_do_io_stat(rq))

63

if (!blk_do_io_stat(rq))

64

return;

64

return;

65

66

cpu = part_stat_lock();

66

cpu = part_stat_lock();

67

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

67

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

68

69

if (!new_io)

69

if (!new_io)

70

part_stat_inc(cpu, part, merges[rw]);

70

part_stat_inc(cpu, part, merges[rw]);

71

else {

71

else {

72

part_round_stats(cpu, part);

72

part_round_stats(cpu, part);

73

part_inc_in_flight(part, rw);

73

part_inc_in_flight(part, rw);

74

}

74

}

75

76

part_stat_unlock();

76

part_stat_unlock();

77

}

77

}

78

79

void blk_queue_congestion_threshold(struct request_queue *q)

79

void blk_queue_congestion_threshold(struct request_queue *q)

80

{

80

{

81

int nr;

81

int nr;

82

83

nr = q->nr_requests - (q->nr_requests / 8) + 1;

83

nr = q->nr_requests - (q->nr_requests / 8) + 1;

84

if (nr > q->nr_requests)

84

if (nr > q->nr_requests)

85

nr = q->nr_requests;

85

nr = q->nr_requests;

86

q->nr_congestion_on = nr;

86

q->nr_congestion_on = nr;

87

88

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

88

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

89

if (nr < 1)

89

if (nr < 1)

90

nr = 1;

90

nr = 1;

91

q->nr_congestion_off = nr;

91

q->nr_congestion_off = nr;

92

}

92

}

93

94

/**

94

/**

95

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

95

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

96

* @bdev: device

96

* @bdev: device

97

*

97

*

98

* Locates the passed device's request queue and returns the address of its

98

* Locates the passed device's request queue and returns the address of its

99

* backing_dev_info

99

* backing_dev_info

100

*

100

*

101

* Will return NULL if the request queue cannot be located.

101

* Will return NULL if the request queue cannot be located.

102

*/

102

*/

103

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

103

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

104

{

104

{

105

struct backing_dev_info *ret = NULL;

105

struct backing_dev_info *ret = NULL;

106

struct request_queue *q = bdev_get_queue(bdev);

106

struct request_queue *q = bdev_get_queue(bdev);

107

108

if (q)

108

if (q)

109

ret = &q->backing_dev_info;

109

ret = &q->backing_dev_info;

110

return ret;

110

return ret;

111

}

111

}

112

EXPORT_SYMBOL(blk_get_backing_dev_info);

112

EXPORT_SYMBOL(blk_get_backing_dev_info);

113

114

void blk_rq_init(struct request_queue *q, struct request *rq)

114

void blk_rq_init(struct request_queue *q, struct request *rq)

115

{

115

{

116

memset(rq, 0, sizeof(*rq));

116

memset(rq, 0, sizeof(*rq));

117

118

INIT_LIST_HEAD(&rq->queuelist);

118

INIT_LIST_HEAD(&rq->queuelist);

119

INIT_LIST_HEAD(&rq->timeout_list);

119

INIT_LIST_HEAD(&rq->timeout_list);

120

rq->cpu = -1;

120

rq->cpu = -1;

121

rq->q = q;

121

rq->q = q;

122

rq->__sector = (sector_t) -1;

122

rq->__sector = (sector_t) -1;

123

INIT_HLIST_NODE(&rq->hash);

123

INIT_HLIST_NODE(&rq->hash);

124

RB_CLEAR_NODE(&rq->rb_node);

124

RB_CLEAR_NODE(&rq->rb_node);

125

rq->cmd = rq->__cmd;

125

rq->cmd = rq->__cmd;

126

rq->cmd_len = BLK_MAX_CDB;

126

rq->cmd_len = BLK_MAX_CDB;

127

rq->tag = -1;

127

rq->tag = -1;

128

rq->ref_count = 1;

128

rq->ref_count = 1;

129

rq->start_time = jiffies;

129

rq->start_time = jiffies;

130

set_start_time_ns(rq);

130

set_start_time_ns(rq);

131

}

131

}

132

EXPORT_SYMBOL(blk_rq_init);

132

EXPORT_SYMBOL(blk_rq_init);

133

134

static void req_bio_endio(struct request *rq, struct bio *bio,

134

static void req_bio_endio(struct request *rq, struct bio *bio,

135

unsigned int nbytes, int error)

135

unsigned int nbytes, int error)

136

{

136

{

137

struct request_queue *q = rq->q;

137

struct request_queue *q = rq->q;

138

139

if (&q->bar_rq != rq) {

139

if (&q->bar_rq != rq) {

140

if (error)

140

if (error)

141

clear_bit(BIO_UPTODATE, &bio->bi_flags);

141

clear_bit(BIO_UPTODATE, &bio->bi_flags);

142

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

142

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

143

error = -EIO;

143

error = -EIO;

144

145

if (unlikely(nbytes > bio->bi_size)) {

145

if (unlikely(nbytes > bio->bi_size)) {

146

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

146

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

147

__func__, nbytes, bio->bi_size);

147

__func__, nbytes, bio->bi_size);

148

nbytes = bio->bi_size;

148

nbytes = bio->bi_size;

149

}

149

}

150

151

if (unlikely(rq->cmd_flags & REQ_QUIET))

151

if (unlikely(rq->cmd_flags & REQ_QUIET))

152

set_bit(BIO_QUIET, &bio->bi_flags);

152

set_bit(BIO_QUIET, &bio->bi_flags);

153

154

bio->bi_size -= nbytes;

154

bio->bi_size -= nbytes;

155

bio->bi_sector += (nbytes >> 9);

155

bio->bi_sector += (nbytes >> 9);

156

157

if (bio_integrity(bio))

157

if (bio_integrity(bio))

158

bio_integrity_advance(bio, nbytes);

158

bio_integrity_advance(bio, nbytes);

159

160

if (bio->bi_size == 0)

160

if (bio->bi_size == 0)

161

bio_endio(bio, error);

161

bio_endio(bio, error);

162

} else {

162

} else {

163

164

/*

164

/*

165

* Okay, this is the barrier request in progress, just

165

* Okay, this is the barrier request in progress, just

166

* record the error;

166

* record the error;

167

*/

167

*/

168

if (error && !q->orderr)

168

if (error && !q->orderr)

169

q->orderr = error;

169

q->orderr = error;

170

}

170

}

171

}

171

}

172

173

void blk_dump_rq_flags(struct request *rq, char *msg)

173

void blk_dump_rq_flags(struct request *rq, char *msg)

174

{

174

{

175

int bit;

175

int bit;

176

177

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

177

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

178

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

178

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

179

rq->cmd_flags);

179

rq->cmd_flags);

180

181

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

181

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

182

(unsigned long long)blk_rq_pos(rq),

182

(unsigned long long)blk_rq_pos(rq),

183

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

183

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

184

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

184

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

185

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

185

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

186

187

if (blk_pc_request(rq)) {

187

if (blk_pc_request(rq)) {

188

printk(KERN_INFO " cdb: ");

188

printk(KERN_INFO " cdb: ");

189

for (bit = 0; bit < BLK_MAX_CDB; bit++)

189

for (bit = 0; bit < BLK_MAX_CDB; bit++)

190

printk("%02x ", rq->cmd[bit]);

190

printk("%02x ", rq->cmd[bit]);

191

printk("\n");

191

printk("\n");

192

}

192

}

193

}

193

}

194

EXPORT_SYMBOL(blk_dump_rq_flags);

194

EXPORT_SYMBOL(blk_dump_rq_flags);

195

196

/*

196

/*

197

* "plug" the device if there are no outstanding requests: this will

197

* "plug" the device if there are no outstanding requests: this will

198

* force the transfer to start only after we have put all the requests

198

* force the transfer to start only after we have put all the requests

199

* on the list.

199

* on the list.

200

*

200

*

201

* This is called with interrupts off and no requests on the queue and

201

* This is called with interrupts off and no requests on the queue and

202

* with the queue lock held.

202

* with the queue lock held.

203

*/

203

*/

204

void blk_plug_device(struct request_queue *q)

204

void blk_plug_device(struct request_queue *q)

205

{

205

{

206

WARN_ON(!irqs_disabled());

206

WARN_ON(!irqs_disabled());

207

208

/*

208

/*

209

* don't plug a stopped queue, it must be paired with blk_start_queue()

209

* don't plug a stopped queue, it must be paired with blk_start_queue()

210

* which will restart the queueing

210

* which will restart the queueing

211

*/

211

*/

212

if (blk_queue_stopped(q))

212

if (blk_queue_stopped(q))

213

return;

213

return;

214

215

if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {

215

if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {

216

mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

216

mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

217

trace_block_plug(q);

217

trace_block_plug(q);

218

}

218

}

219

}

219

}

220

EXPORT_SYMBOL(blk_plug_device);

220

EXPORT_SYMBOL(blk_plug_device);

221

222

/**

222

/**

223

* blk_plug_device_unlocked - plug a device without queue lock held

223

* blk_plug_device_unlocked - plug a device without queue lock held

224

* @q: The &struct request_queue to plug

224

* @q: The &struct request_queue to plug

225

*

225

*

226

* Description:

226

* Description:

227

* Like @blk_plug_device(), but grabs the queue lock and disables

227

* Like @blk_plug_device(), but grabs the queue lock and disables

228

* interrupts.

228

* interrupts.

229

**/

229

**/

230

void blk_plug_device_unlocked(struct request_queue *q)

230

void blk_plug_device_unlocked(struct request_queue *q)

231

{

231

{

232

unsigned long flags;

232

unsigned long flags;

233

234

spin_lock_irqsave(q->queue_lock, flags);

234

spin_lock_irqsave(q->queue_lock, flags);

235

blk_plug_device(q);

235

blk_plug_device(q);

236

spin_unlock_irqrestore(q->queue_lock, flags);

236

spin_unlock_irqrestore(q->queue_lock, flags);

237

}

237

}

238

EXPORT_SYMBOL(blk_plug_device_unlocked);

238

EXPORT_SYMBOL(blk_plug_device_unlocked);

239

240

/*

240

/*

241

* remove the queue from the plugged list, if present. called with

241

* remove the queue from the plugged list, if present. called with

242

* queue lock held and interrupts disabled.

242

* queue lock held and interrupts disabled.

243

*/

243

*/

244

int blk_remove_plug(struct request_queue *q)

244

int blk_remove_plug(struct request_queue *q)

245

{

245

{

246

WARN_ON(!irqs_disabled());

246

WARN_ON(!irqs_disabled());

247

248

if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))

248

if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))

249

return 0;

249

return 0;

250

251

del_timer(&q->unplug_timer);

251

del_timer(&q->unplug_timer);

252

return 1;

252

return 1;

253

}

253

}

254

EXPORT_SYMBOL(blk_remove_plug);

254

EXPORT_SYMBOL(blk_remove_plug);

255

256

/*

256

/*

257

* remove the plug and let it rip..

257

* remove the plug and let it rip..

258

*/

258

*/

259

void __generic_unplug_device(struct request_queue *q)

259

void __generic_unplug_device(struct request_queue *q)

260

{

260

{

261

if (unlikely(blk_queue_stopped(q)))

261

if (unlikely(blk_queue_stopped(q)))

262

return;

262

return;

263

if (!blk_remove_plug(q) && !blk_queue_nonrot(q))

263

if (!blk_remove_plug(q) && !blk_queue_nonrot(q))

264

return;

264

return;

265

266

q->request_fn(q);

266

q->request_fn(q);

267

}

267

}

268

269

/**

269

/**

270

* generic_unplug_device - fire a request queue

270

* generic_unplug_device - fire a request queue

271

* @q: The &struct request_queue in question

271

* @q: The &struct request_queue in question

272

*

272

*

273

* Description:

273

* Description:

274

* Linux uses plugging to build bigger requests queues before letting

274

* Linux uses plugging to build bigger requests queues before letting

275

* the device have at them. If a queue is plugged, the I/O scheduler

275

* the device have at them. If a queue is plugged, the I/O scheduler

276

* is still adding and merging requests on the queue. Once the queue

276

* is still adding and merging requests on the queue. Once the queue

277

* gets unplugged, the request_fn defined for the queue is invoked and

277

* gets unplugged, the request_fn defined for the queue is invoked and

278

* transfers started.

278

* transfers started.

279

**/

279

**/

280

void generic_unplug_device(struct request_queue *q)

280

void generic_unplug_device(struct request_queue *q)

281

{

281

{

282

if (blk_queue_plugged(q)) {

282

if (blk_queue_plugged(q)) {

283

spin_lock_irq(q->queue_lock);

283

spin_lock_irq(q->queue_lock);

284

__generic_unplug_device(q);

284

__generic_unplug_device(q);

285

spin_unlock_irq(q->queue_lock);

285

spin_unlock_irq(q->queue_lock);

286

}

286

}

287

}

287

}

288

EXPORT_SYMBOL(generic_unplug_device);

288

EXPORT_SYMBOL(generic_unplug_device);

289

290

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,

290

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,

291

struct page *page)

291

struct page *page)

292

{

292

{

293

struct request_queue *q = bdi->unplug_io_data;

293

struct request_queue *q = bdi->unplug_io_data;

294

295

blk_unplug(q);

295

blk_unplug(q);

296

}

296

}

297

298

void blk_unplug_work(struct work_struct *work)

298

void blk_unplug_work(struct work_struct *work)

299

{

299

{

300

struct request_queue *q =

300

struct request_queue *q =

301

container_of(work, struct request_queue, unplug_work);

301

container_of(work, struct request_queue, unplug_work);

302

303

trace_block_unplug_io(q);

303

trace_block_unplug_io(q);

304

q->unplug_fn(q);

304

q->unplug_fn(q);

305

}

305

}

306

307

void blk_unplug_timeout(unsigned long data)

307

void blk_unplug_timeout(unsigned long data)

308

{

308

{

309

struct request_queue *q = (struct request_queue *)data;

309

struct request_queue *q = (struct request_queue *)data;

310

311

trace_block_unplug_timer(q);

311

trace_block_unplug_timer(q);

312

kblockd_schedule_work(q, &q->unplug_work);

312

kblockd_schedule_work(q, &q->unplug_work);

313

}

313

}

314

315

void blk_unplug(struct request_queue *q)

315

void blk_unplug(struct request_queue *q)

316

{

316

{

317

/*

317

/*

318

* devices don't necessarily have an ->unplug_fn defined

318

* devices don't necessarily have an ->unplug_fn defined

319

*/

319

*/

320

if (q->unplug_fn) {

320

if (q->unplug_fn) {

321

trace_block_unplug_io(q);

321

trace_block_unplug_io(q);

322

q->unplug_fn(q);

322

q->unplug_fn(q);

323

}

323

}

324

}

324

}

325

EXPORT_SYMBOL(blk_unplug);

325

EXPORT_SYMBOL(blk_unplug);

326

327

/**

327

/**

328

* blk_start_queue - restart a previously stopped queue

328

* blk_start_queue - restart a previously stopped queue

329

* @q: The &struct request_queue in question

329

* @q: The &struct request_queue in question

330

*

330

*

331

* Description:

331

* Description:

332

* blk_start_queue() will clear the stop flag on the queue, and call

332

* blk_start_queue() will clear the stop flag on the queue, and call

333

* the request_fn for the queue if it was in a stopped state when

333

* the request_fn for the queue if it was in a stopped state when

334

* entered. Also see blk_stop_queue(). Queue lock must be held.

334

* entered. Also see blk_stop_queue(). Queue lock must be held.

335

**/

335

**/

336

void blk_start_queue(struct request_queue *q)

336

void blk_start_queue(struct request_queue *q)

337

{

337

{

338

WARN_ON(!irqs_disabled());

338

WARN_ON(!irqs_disabled());

339

340

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

340

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

341

__blk_run_queue(q);

341

__blk_run_queue(q);

342

}

342

}

343

EXPORT_SYMBOL(blk_start_queue);

343

EXPORT_SYMBOL(blk_start_queue);

344

345

/**

345

/**

346

* blk_stop_queue - stop a queue

346

* blk_stop_queue - stop a queue

347

* @q: The &struct request_queue in question

347

* @q: The &struct request_queue in question

348

*

348

*

349

* Description:

349

* Description:

350

* The Linux block layer assumes that a block driver will consume all

350

* The Linux block layer assumes that a block driver will consume all

351

* entries on the request queue when the request_fn strategy is called.

351

* entries on the request queue when the request_fn strategy is called.

352

* Often this will not happen, because of hardware limitations (queue

352

* Often this will not happen, because of hardware limitations (queue

353

* depth settings). If a device driver gets a 'queue full' response,

353

* depth settings). If a device driver gets a 'queue full' response,

354

* or if it simply chooses not to queue more I/O at one point, it can

354

* or if it simply chooses not to queue more I/O at one point, it can

355

* call this function to prevent the request_fn from being called until

355

* call this function to prevent the request_fn from being called until

356

* the driver has signalled it's ready to go again. This happens by calling

356

* the driver has signalled it's ready to go again. This happens by calling

357

* blk_start_queue() to restart queue operations. Queue lock must be held.

357

* blk_start_queue() to restart queue operations. Queue lock must be held.

358

**/

358

**/

359

void blk_stop_queue(struct request_queue *q)

359

void blk_stop_queue(struct request_queue *q)

360

{

360

{

361

blk_remove_plug(q);

361

blk_remove_plug(q);

362

queue_flag_set(QUEUE_FLAG_STOPPED, q);

362

queue_flag_set(QUEUE_FLAG_STOPPED, q);

363

}

363

}

364

EXPORT_SYMBOL(blk_stop_queue);

364

EXPORT_SYMBOL(blk_stop_queue);

365

366

/**

366

/**

367

* blk_sync_queue - cancel any pending callbacks on a queue

367

* blk_sync_queue - cancel any pending callbacks on a queue

368

* @q: the queue

368

* @q: the queue

369

*

369

*

370

* Description:

370

* Description:

371

* The block layer may perform asynchronous callback activity

371

* The block layer may perform asynchronous callback activity

372

* on a queue, such as calling the unplug function after a timeout.

372

* on a queue, such as calling the unplug function after a timeout.

373

* A block device may call blk_sync_queue to ensure that any

373

* A block device may call blk_sync_queue to ensure that any

374

* such activity is cancelled, thus allowing it to release resources

374

* such activity is cancelled, thus allowing it to release resources

375

* that the callbacks might use. The caller must already have made sure

375

* that the callbacks might use. The caller must already have made sure

376

* that its ->make_request_fn will not re-add plugging prior to calling

376

* that its ->make_request_fn will not re-add plugging prior to calling

377

* this function.

377

* this function.

378

*

378

*

379

*/

379

*/

380

void blk_sync_queue(struct request_queue *q)

380

void blk_sync_queue(struct request_queue *q)

381

{

381

{

382

del_timer_sync(&q->unplug_timer);

382

del_timer_sync(&q->unplug_timer);

383

del_timer_sync(&q->timeout);

383

del_timer_sync(&q->timeout);

384

cancel_work_sync(&q->unplug_work);

384

cancel_work_sync(&q->unplug_work);

385

}

385

}

386

EXPORT_SYMBOL(blk_sync_queue);

386

EXPORT_SYMBOL(blk_sync_queue);

387

388

/**

388

/**

389

* __blk_run_queue - run a single device queue

389

* __blk_run_queue - run a single device queue

390

* @q: The queue to run

390

* @q: The queue to run

391

*

391

*

392

* Description:

392

* Description:

393

* See @blk_run_queue. This variant must be called with the queue lock

393

* See @blk_run_queue. This variant must be called with the queue lock

394

* held and interrupts disabled.

394

* held and interrupts disabled.

395

*

395

*

396

*/

396

*/

397

void __blk_run_queue(struct request_queue *q)

397

void __blk_run_queue(struct request_queue *q)

398

{

398

{

399

blk_remove_plug(q);

399

blk_remove_plug(q);

400

401

if (unlikely(blk_queue_stopped(q)))

401

if (unlikely(blk_queue_stopped(q)))

402

return;

402

return;

403

404

if (elv_queue_empty(q))

404

if (elv_queue_empty(q))

405

return;

405

return;

406

407

/*

407

/*

408

* Only recurse once to avoid overrunning the stack, let the unplug

408

* Only recurse once to avoid overrunning the stack, let the unplug

409

* handling reinvoke the handler shortly if we already got there.

409

* handling reinvoke the handler shortly if we already got there.

410

*/

410

*/

411

if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {

411

if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {

412

q->request_fn(q);

412

q->request_fn(q);

413

queue_flag_clear(QUEUE_FLAG_REENTER, q);

413

queue_flag_clear(QUEUE_FLAG_REENTER, q);

414

} else {

414

} else {

415

queue_flag_set(QUEUE_FLAG_PLUGGED, q);

415

queue_flag_set(QUEUE_FLAG_PLUGGED, q);

416

kblockd_schedule_work(q, &q->unplug_work);

416

kblockd_schedule_work(q, &q->unplug_work);

417

}

417

}

418

}

418

}

419

EXPORT_SYMBOL(__blk_run_queue);

419

EXPORT_SYMBOL(__blk_run_queue);

420

421

/**

421

/**

422

* blk_run_queue - run a single device queue

422

* blk_run_queue - run a single device queue

423

* @q: The queue to run

423

* @q: The queue to run

424

*

424

*

425

* Description:

425

* Description:

426

* Invoke request handling on this queue, if it has pending work to do.

426

* Invoke request handling on this queue, if it has pending work to do.

427

* May be used to restart queueing when a request has completed.

427

* May be used to restart queueing when a request has completed.

428

*/

428

*/

429

void blk_run_queue(struct request_queue *q)

429

void blk_run_queue(struct request_queue *q)

430

{

430

{

431

unsigned long flags;

431

unsigned long flags;

432

433

spin_lock_irqsave(q->queue_lock, flags);

433

spin_lock_irqsave(q->queue_lock, flags);

434

__blk_run_queue(q);

434

__blk_run_queue(q);

435

spin_unlock_irqrestore(q->queue_lock, flags);

435

spin_unlock_irqrestore(q->queue_lock, flags);

436

}

436

}

437

EXPORT_SYMBOL(blk_run_queue);

437

EXPORT_SYMBOL(blk_run_queue);

438

439

void blk_put_queue(struct request_queue *q)

439

void blk_put_queue(struct request_queue *q)

440

{

440

{

441

kobject_put(&q->kobj);

441

kobject_put(&q->kobj);

442

}

442

}

443

444

void blk_cleanup_queue(struct request_queue *q)

444

void blk_cleanup_queue(struct request_queue *q)

445

{

445

{

446

/*

446

/*

447

* We know we have process context here, so we can be a little

447

* We know we have process context here, so we can be a little

448

* cautious and ensure that pending block actions on this device

448

* cautious and ensure that pending block actions on this device

449

* are done before moving on. Going into this function, we should

449

* are done before moving on. Going into this function, we should

450

* not have processes doing IO to this device.

450

* not have processes doing IO to this device.

451

*/

451

*/

452

blk_sync_queue(q);

452

blk_sync_queue(q);

453

454

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

454

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

455

mutex_lock(&q->sysfs_lock);

455

mutex_lock(&q->sysfs_lock);

456

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

456

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

457

mutex_unlock(&q->sysfs_lock);

457

mutex_unlock(&q->sysfs_lock);

458

459

if (q->elevator)

459

if (q->elevator)

460

elevator_exit(q->elevator);

460

elevator_exit(q->elevator);

461

462

blk_put_queue(q);

462

blk_put_queue(q);

463

}

463

}

464

EXPORT_SYMBOL(blk_cleanup_queue);

464

EXPORT_SYMBOL(blk_cleanup_queue);

465

466

static int blk_init_free_list(struct request_queue *q)

466

static int blk_init_free_list(struct request_queue *q)

467

{

467

{

468

struct request_list *rl = &q->rq;

468

struct request_list *rl = &q->rq;

469

470

if (unlikely(rl->rq_pool))

470

if (unlikely(rl->rq_pool))

471

return 0;

471

return 0;

472

473

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

473

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

474

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

474

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

475

rl->elvpriv = 0;

475

rl->elvpriv = 0;

476

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

476

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

477

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

477

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

478

479

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

479

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

480

mempool_free_slab, request_cachep, q->node);

480

mempool_free_slab, request_cachep, q->node);

481

482

if (!rl->rq_pool)

482

if (!rl->rq_pool)

483

return -ENOMEM;

483

return -ENOMEM;

484

485

return 0;

485

return 0;

486

}

486

}

487

488

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

488

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

489

{

489

{

490

return blk_alloc_queue_node(gfp_mask, -1);

490

return blk_alloc_queue_node(gfp_mask, -1);

491

}

491

}

492

EXPORT_SYMBOL(blk_alloc_queue);

492

EXPORT_SYMBOL(blk_alloc_queue);

493

494

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

494

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

495

{

495

{

496

struct request_queue *q;

496

struct request_queue *q;

497

int err;

497

int err;

498

499

q = kmem_cache_alloc_node(blk_requestq_cachep,

499

q = kmem_cache_alloc_node(blk_requestq_cachep,

500

gfp_mask | __GFP_ZERO, node_id);

500

gfp_mask | __GFP_ZERO, node_id);

501

if (!q)

501

if (!q)

502

return NULL;

502

return NULL;

503

504

q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

504

q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

505

q->backing_dev_info.unplug_io_data = q;

505

q->backing_dev_info.unplug_io_data = q;

506

q->backing_dev_info.ra_pages =

506

q->backing_dev_info.ra_pages =

507

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

507

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

508

q->backing_dev_info.state = 0;

508

q->backing_dev_info.state = 0;

509

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

509

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

510

q->backing_dev_info.name = "block";

510

q->backing_dev_info.name = "block";

511

512

err = bdi_init(&q->backing_dev_info);

512

err = bdi_init(&q->backing_dev_info);

513

if (err) {

513

if (err) {

514

kmem_cache_free(blk_requestq_cachep, q);

514

kmem_cache_free(blk_requestq_cachep, q);

515

return NULL;

515

return NULL;

516

}

516

}

517

518

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

518

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

519

laptop_mode_timer_fn, (unsigned long) q);

519

laptop_mode_timer_fn, (unsigned long) q);

520

init_timer(&q->unplug_timer);

520

init_timer(&q->unplug_timer);

521

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

521

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

522

INIT_LIST_HEAD(&q->timeout_list);

522

INIT_LIST_HEAD(&q->timeout_list);

523

INIT_WORK(&q->unplug_work, blk_unplug_work);

523

INIT_WORK(&q->unplug_work, blk_unplug_work);

524

525

kobject_init(&q->kobj, &blk_queue_ktype);

525

kobject_init(&q->kobj, &blk_queue_ktype);

526

527

mutex_init(&q->sysfs_lock);

527

mutex_init(&q->sysfs_lock);

528

spin_lock_init(&q->__queue_lock);

528

spin_lock_init(&q->__queue_lock);

529

530

return q;

530

return q;

531

}

531

}

532

EXPORT_SYMBOL(blk_alloc_queue_node);

532

EXPORT_SYMBOL(blk_alloc_queue_node);

533

534

/**

534

/**

535

* blk_init_queue - prepare a request queue for use with a block device

535

* blk_init_queue - prepare a request queue for use with a block device

536

* @rfn: The function to be called to process requests that have been

536

* @rfn: The function to be called to process requests that have been

537

* placed on the queue.

537

* placed on the queue.

538

* @lock: Request queue spin lock

538

* @lock: Request queue spin lock

539

*

539

*

540

* Description:

540

* Description:

541

* If a block device wishes to use the standard request handling procedures,

541

* If a block device wishes to use the standard request handling procedures,

542

* which sorts requests and coalesces adjacent requests, then it must

542

* which sorts requests and coalesces adjacent requests, then it must

543

* call blk_init_queue(). The function @rfn will be called when there

543

* call blk_init_queue(). The function @rfn will be called when there

544

* are requests on the queue that need to be processed. If the device

544

* are requests on the queue that need to be processed. If the device

545

* supports plugging, then @rfn may not be called immediately when requests

545

* supports plugging, then @rfn may not be called immediately when requests

546

* are available on the queue, but may be called at some time later instead.

546

* are available on the queue, but may be called at some time later instead.

547

* Plugged queues are generally unplugged when a buffer belonging to one

547

* Plugged queues are generally unplugged when a buffer belonging to one

548

* of the requests on the queue is needed, or due to memory pressure.

548

* of the requests on the queue is needed, or due to memory pressure.

549

*

549

*

550

* @rfn is not required, or even expected, to remove all requests off the

550

* @rfn is not required, or even expected, to remove all requests off the

551

* queue, but only as many as it can handle at a time. If it does leave

551

* queue, but only as many as it can handle at a time. If it does leave

552

* requests on the queue, it is responsible for arranging that the requests

552

* requests on the queue, it is responsible for arranging that the requests

553

* get dealt with eventually.

553

* get dealt with eventually.

554

*

554

*

555

* The queue spin lock must be held while manipulating the requests on the

555

* The queue spin lock must be held while manipulating the requests on the

556

* request queue; this lock will be taken also from interrupt context, so irq

556

* request queue; this lock will be taken also from interrupt context, so irq

557

* disabling is needed for it.

557

* disabling is needed for it.

558

*

558

*

559

* Function returns a pointer to the initialized request queue, or %NULL if

559

* Function returns a pointer to the initialized request queue, or %NULL if

560

* it didn't succeed.

560

* it didn't succeed.

561

*

561

*

562

* Note:

562

* Note:

563

* blk_init_queue() must be paired with a blk_cleanup_queue() call

563

* blk_init_queue() must be paired with a blk_cleanup_queue() call

564

* when the block device is deactivated (such as at module unload).

564

* when the block device is deactivated (such as at module unload).

565

**/

565

**/

566

567

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

567

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

568

{

568

{

569

return blk_init_queue_node(rfn, lock, -1);

569

return blk_init_queue_node(rfn, lock, -1);

570

}

570

}

571

EXPORT_SYMBOL(blk_init_queue);

571

EXPORT_SYMBOL(blk_init_queue);

572

573

struct request_queue *

573

struct request_queue *

574

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

574

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

575

{

575

{

576

struct request_queue *uninit_q, *q;

576

struct request_queue *uninit_q, *q;

577

578

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

578

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

579

if (!uninit_q)

579

if (!uninit_q)

580

return NULL;

580

return NULL;

581

582

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

582

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

583

if (!q)

583

if (!q)

584

blk_cleanup_queue(uninit_q);

584

blk_cleanup_queue(uninit_q);

585

586

return q;

586

return q;

587

}

587

}

588

EXPORT_SYMBOL(blk_init_queue_node);

588

EXPORT_SYMBOL(blk_init_queue_node);

589

590

struct request_queue *

590

struct request_queue *

591

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

591

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

592

spinlock_t *lock)

592

spinlock_t *lock)

593

{

593

{

594

return blk_init_allocated_queue_node(q, rfn, lock, -1);

594

return blk_init_allocated_queue_node(q, rfn, lock, -1);

595

}

595

}

596

EXPORT_SYMBOL(blk_init_allocated_queue);

596

EXPORT_SYMBOL(blk_init_allocated_queue);

597

598

struct request_queue *

598

struct request_queue *

599

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

599

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

600

spinlock_t *lock, int node_id)

600

spinlock_t *lock, int node_id)

601

{

601

{

602

if (!q)

602

if (!q)

603

return NULL;

603

return NULL;

604

605

q->node = node_id;

605

q->node = node_id;

606

if (blk_init_free_list(q))

606

if (blk_init_free_list(q))

607

return NULL;

607

return NULL;

608

609

q->request_fn = rfn;

609

q->request_fn = rfn;

610

q->prep_rq_fn = NULL;

610

q->prep_rq_fn = NULL;

611

q->unplug_fn = generic_unplug_device;

611

q->unplug_fn = generic_unplug_device;

612

q->queue_flags = QUEUE_FLAG_DEFAULT;

612

q->queue_flags = QUEUE_FLAG_DEFAULT;

613

q->queue_lock = lock;

613

q->queue_lock = lock;

614

615

/*

615

/*

616

* This also sets hw/phys segments, boundary and size

616

* This also sets hw/phys segments, boundary and size

617

*/

617

*/

618

blk_queue_make_request(q, __make_request);

618

blk_queue_make_request(q, __make_request);

619

620

q->sg_reserved_size = INT_MAX;

620

q->sg_reserved_size = INT_MAX;

621

622

/*

622

/*

623

* all done

623

* all done

624

*/

624

*/

625

if (!elevator_init(q, NULL)) {

625

if (!elevator_init(q, NULL)) {

626

blk_queue_congestion_threshold(q);

626

blk_queue_congestion_threshold(q);

627

return q;

627

return q;

628

}

628

}

629

630

return NULL;

630

return NULL;

631

}

631

}

632

EXPORT_SYMBOL(blk_init_allocated_queue_node);

632

EXPORT_SYMBOL(blk_init_allocated_queue_node);

633

634

int blk_get_queue(struct request_queue *q)

634

int blk_get_queue(struct request_queue *q)

635

{

635

{

636

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

636

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

637

kobject_get(&q->kobj);

637

kobject_get(&q->kobj);

638

return 0;

638

return 0;

639

}

639

}

640

641

return 1;

641

return 1;

642

}

642

}

643

644

static inline void blk_free_request(struct request_queue *q, struct request *rq)

644

static inline void blk_free_request(struct request_queue *q, struct request *rq)

645

{

645

{

646

if (rq->cmd_flags & REQ_ELVPRIV)

646

if (rq->cmd_flags & REQ_ELVPRIV)

647

elv_put_request(q, rq);

647

elv_put_request(q, rq);

648

mempool_free(rq, q->rq.rq_pool);

648

mempool_free(rq, q->rq.rq_pool);

649

}

649

}

650

651

static struct request *

651

static struct request *

652

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

652

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

653

{

653

{

654

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

654

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

655

656

if (!rq)

656

if (!rq)

657

return NULL;

657

return NULL;

658

659

blk_rq_init(q, rq);

659

blk_rq_init(q, rq);

660

661

rq->cmd_flags = flags | REQ_ALLOCED;

661

rq->cmd_flags = flags | REQ_ALLOCED;

662

663

if (priv) {

663

if (priv) {

664

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

664

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

665

mempool_free(rq, q->rq.rq_pool);

665

mempool_free(rq, q->rq.rq_pool);

666

return NULL;

666

return NULL;

667

}

667

}

668

rq->cmd_flags |= REQ_ELVPRIV;

668

rq->cmd_flags |= REQ_ELVPRIV;

669

}

669

}

670

671

return rq;

671

return rq;

672

}

672

}

673

674

/*

674

/*

675

* ioc_batching returns true if the ioc is a valid batching request and

675

* ioc_batching returns true if the ioc is a valid batching request and

676

* should be given priority access to a request.

676

* should be given priority access to a request.

677

*/

677

*/

678

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

678

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

679

{

679

{

680

if (!ioc)

680

if (!ioc)

681

return 0;

681

return 0;

682

683

/*

683

/*

684

* Make sure the process is able to allocate at least 1 request

684

* Make sure the process is able to allocate at least 1 request

685

* even if the batch times out, otherwise we could theoretically

685

* even if the batch times out, otherwise we could theoretically

686

* lose wakeups.

686

* lose wakeups.

687

*/

687

*/

688

return ioc->nr_batch_requests == q->nr_batching ||

688

return ioc->nr_batch_requests == q->nr_batching ||

689

(ioc->nr_batch_requests > 0

689

(ioc->nr_batch_requests > 0

690

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

690

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

691

}

691

}

692

693

/*

693

/*

694

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

694

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

695

* will cause the process to be a "batcher" on all queues in the system. This

695

* will cause the process to be a "batcher" on all queues in the system. This

696

* is the behaviour we want though - once it gets a wakeup it should be given

696

* is the behaviour we want though - once it gets a wakeup it should be given

697

* a nice run.

697

* a nice run.

698

*/

698

*/

699

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

699

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

700

{

700

{

701

if (!ioc || ioc_batching(q, ioc))

701

if (!ioc || ioc_batching(q, ioc))

702

return;

702

return;

703

704

ioc->nr_batch_requests = q->nr_batching;

704

ioc->nr_batch_requests = q->nr_batching;

705

ioc->last_waited = jiffies;

705

ioc->last_waited = jiffies;

706

}

706

}

707

708

static void __freed_request(struct request_queue *q, int sync)

708

static void __freed_request(struct request_queue *q, int sync)

709

{

709

{

710

struct request_list *rl = &q->rq;

710

struct request_list *rl = &q->rq;

711

712

if (rl->count[sync] < queue_congestion_off_threshold(q))

712

if (rl->count[sync] < queue_congestion_off_threshold(q))

713

blk_clear_queue_congested(q, sync);

713

blk_clear_queue_congested(q, sync);

714

715

if (rl->count[sync] + 1 <= q->nr_requests) {

715

if (rl->count[sync] + 1 <= q->nr_requests) {

716

if (waitqueue_active(&rl->wait[sync]))

716

if (waitqueue_active(&rl->wait[sync]))

717

wake_up(&rl->wait[sync]);

717

wake_up(&rl->wait[sync]);

718

719

blk_clear_queue_full(q, sync);

719

blk_clear_queue_full(q, sync);

720

}

720

}

721

}

721

}

722

723

/*

723

/*

724

* A request has just been released. Account for it, update the full and

724

* A request has just been released. Account for it, update the full and

725

* congestion status, wake up any waiters. Called under q->queue_lock.

725

* congestion status, wake up any waiters. Called under q->queue_lock.

726

*/

726

*/

727

static void freed_request(struct request_queue *q, int sync, int priv)

727

static void freed_request(struct request_queue *q, int sync, int priv)

728

{

728

{

729

struct request_list *rl = &q->rq;

729

struct request_list *rl = &q->rq;

730

731

rl->count[sync]--;

731

rl->count[sync]--;

732

if (priv)

732

if (priv)

733

rl->elvpriv--;

733

rl->elvpriv--;

734

735

__freed_request(q, sync);

735

__freed_request(q, sync);

736

737

if (unlikely(rl->starved[sync ^ 1]))

737

if (unlikely(rl->starved[sync ^ 1]))

738

__freed_request(q, sync ^ 1);

738

__freed_request(q, sync ^ 1);

739

}

739

}

740

741

/*

741

/*

742

* Get a free request, queue_lock must be held.

742

* Get a free request, queue_lock must be held.

743

* Returns NULL on failure, with queue_lock held.

743

* Returns NULL on failure, with queue_lock held.

744

* Returns !NULL on success, with queue_lock *not held*.

744

* Returns !NULL on success, with queue_lock *not held*.

745

*/

745

*/

746

static struct request *get_request(struct request_queue *q, int rw_flags,

746

static struct request *get_request(struct request_queue *q, int rw_flags,

747

struct bio *bio, gfp_t gfp_mask)

747

struct bio *bio, gfp_t gfp_mask)

748

{

748

{

749

struct request *rq = NULL;

749

struct request *rq = NULL;

750

struct request_list *rl = &q->rq;

750

struct request_list *rl = &q->rq;

751

struct io_context *ioc = NULL;

751

struct io_context *ioc = NULL;

752

const bool is_sync = rw_is_sync(rw_flags) != 0;

752

const bool is_sync = rw_is_sync(rw_flags) != 0;

753

int may_queue, priv;

753

int may_queue, priv;

754

755

may_queue = elv_may_queue(q, rw_flags);

755

may_queue = elv_may_queue(q, rw_flags);

756

if (may_queue == ELV_MQUEUE_NO)

756

if (may_queue == ELV_MQUEUE_NO)

757

goto rq_starved;

757

goto rq_starved;

758

759

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

759

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

760

if (rl->count[is_sync]+1 >= q->nr_requests) {

760

if (rl->count[is_sync]+1 >= q->nr_requests) {

761

ioc = current_io_context(GFP_ATOMIC, q->node);

761

ioc = current_io_context(GFP_ATOMIC, q->node);

762

/*

762

/*

763

* The queue will fill after this allocation, so set

763

* The queue will fill after this allocation, so set

764

* it as full, and mark this process as "batching".

764

* it as full, and mark this process as "batching".

765

* This process will be allowed to complete a batch of

765

* This process will be allowed to complete a batch of

766

* requests, others will be blocked.

766

* requests, others will be blocked.

767

*/

767

*/

768

if (!blk_queue_full(q, is_sync)) {

768

if (!blk_queue_full(q, is_sync)) {

769

ioc_set_batching(q, ioc);

769

ioc_set_batching(q, ioc);

770

blk_set_queue_full(q, is_sync);

770

blk_set_queue_full(q, is_sync);

771

} else {

771

} else {

772

if (may_queue != ELV_MQUEUE_MUST

772

if (may_queue != ELV_MQUEUE_MUST

773

&& !ioc_batching(q, ioc)) {

773

&& !ioc_batching(q, ioc)) {

774

/*

774

/*

775

* The queue is full and the allocating

775

* The queue is full and the allocating

776

* process is not a "batcher", and not

776

* process is not a "batcher", and not

777

* exempted by the IO scheduler

777

* exempted by the IO scheduler

778

*/

778

*/

779

goto out;

779

goto out;

780

}

780

}

781

}

781

}

782

}

782

}

783

blk_set_queue_congested(q, is_sync);

783

blk_set_queue_congested(q, is_sync);

784

}

784

}

785

786

/*

786

/*

787

* Only allow batching queuers to allocate up to 50% over the defined

787

* Only allow batching queuers to allocate up to 50% over the defined

788

* limit of requests, otherwise we could have thousands of requests

788

* limit of requests, otherwise we could have thousands of requests

789

* allocated with any setting of ->nr_requests

789

* allocated with any setting of ->nr_requests

790

*/

790

*/

791

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

791

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

792

goto out;

792

goto out;

793

794

rl->count[is_sync]++;

794

rl->count[is_sync]++;

795

rl->starved[is_sync] = 0;

795

rl->starved[is_sync] = 0;

796

797

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

797

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

798

if (priv)

798

if (priv)

799

rl->elvpriv++;

799

rl->elvpriv++;

800

801

if (blk_queue_io_stat(q))

801

if (blk_queue_io_stat(q))

802

rw_flags |= REQ_IO_STAT;

802

rw_flags |= REQ_IO_STAT;

803

spin_unlock_irq(q->queue_lock);

803

spin_unlock_irq(q->queue_lock);

804

805

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

805

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

806

if (unlikely(!rq)) {

806

if (unlikely(!rq)) {

807

/*

807

/*

808

* Allocation failed presumably due to memory. Undo anything

808

* Allocation failed presumably due to memory. Undo anything

809

* we might have messed up.

809

* we might have messed up.

810

*

810

*

811

* Allocating task should really be put onto the front of the

811

* Allocating task should really be put onto the front of the

812

* wait queue, but this is pretty rare.

812

* wait queue, but this is pretty rare.

813

*/

813

*/

814

spin_lock_irq(q->queue_lock);

814

spin_lock_irq(q->queue_lock);

815

freed_request(q, is_sync, priv);

815

freed_request(q, is_sync, priv);

816

817

/*

817

/*

818

* in the very unlikely event that allocation failed and no

818

* in the very unlikely event that allocation failed and no

819

* requests for this direction was pending, mark us starved

819

* requests for this direction was pending, mark us starved

820

* so that freeing of a request in the other direction will

820

* so that freeing of a request in the other direction will

821

* notice us. another possible fix would be to split the

821

* notice us. another possible fix would be to split the

822

* rq mempool into READ and WRITE

822

* rq mempool into READ and WRITE

823

*/

823

*/

824

rq_starved:

824

rq_starved:

825

if (unlikely(rl->count[is_sync] == 0))

825

if (unlikely(rl->count[is_sync] == 0))

826

rl->starved[is_sync] = 1;

826

rl->starved[is_sync] = 1;

827

828

goto out;

828

goto out;

829

}

829

}

830

831

/*

831

/*

832

* ioc may be NULL here, and ioc_batching will be false. That's

832

* ioc may be NULL here, and ioc_batching will be false. That's

833

* OK, if the queue is under the request limit then requests need

833

* OK, if the queue is under the request limit then requests need

834

* not count toward the nr_batch_requests limit. There will always

834

* not count toward the nr_batch_requests limit. There will always

835

* be some limit enforced by BLK_BATCH_TIME.

835

* be some limit enforced by BLK_BATCH_TIME.

836

*/

836

*/

837

if (ioc_batching(q, ioc))

837

if (ioc_batching(q, ioc))

838

ioc->nr_batch_requests--;

838

ioc->nr_batch_requests--;

839

840

trace_block_getrq(q, bio, rw_flags & 1);

840

trace_block_getrq(q, bio, rw_flags & 1);

841

out:

841

out:

842

return rq;

842

return rq;

843

}

843

}

844

845

/*

845

/*

846

* No available requests for this queue, unplug the device and wait for some

846

* No available requests for this queue, unplug the device and wait for some

847

* requests to become available.

847

* requests to become available.

848

*

848

*

849

* Called with q->queue_lock held, and returns with it unlocked.

849

* Called with q->queue_lock held, and returns with it unlocked.

850

*/

850

*/

851

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

851

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

852

struct bio *bio)

852

struct bio *bio)

853

{

853

{

854

const bool is_sync = rw_is_sync(rw_flags) != 0;

854

const bool is_sync = rw_is_sync(rw_flags) != 0;

855

struct request *rq;

855

struct request *rq;

856

857

rq = get_request(q, rw_flags, bio, GFP_NOIO);

857

rq = get_request(q, rw_flags, bio, GFP_NOIO);

858

while (!rq) {

858

while (!rq) {

859

DEFINE_WAIT(wait);

859

DEFINE_WAIT(wait);

860

struct io_context *ioc;

860

struct io_context *ioc;

861

struct request_list *rl = &q->rq;

861

struct request_list *rl = &q->rq;

862

863

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

863

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

864

TASK_UNINTERRUPTIBLE);

864

TASK_UNINTERRUPTIBLE);

865

866

trace_block_sleeprq(q, bio, rw_flags & 1);

866

trace_block_sleeprq(q, bio, rw_flags & 1);

867

868

__generic_unplug_device(q);

868

__generic_unplug_device(q);

869

spin_unlock_irq(q->queue_lock);

869

spin_unlock_irq(q->queue_lock);

870

io_schedule();

870

io_schedule();

871

872

/*

872

/*

873

* After sleeping, we become a "batching" process and

873

* After sleeping, we become a "batching" process and

874

* will be able to allocate at least one request, and

874

* will be able to allocate at least one request, and

875

* up to a big batch of them for a small period time.

875

* up to a big batch of them for a small period time.

876

* See ioc_batching, ioc_set_batching

876

* See ioc_batching, ioc_set_batching

877

*/

877

*/

878

ioc = current_io_context(GFP_NOIO, q->node);

878

ioc = current_io_context(GFP_NOIO, q->node);

879

ioc_set_batching(q, ioc);

879

ioc_set_batching(q, ioc);

880

881

spin_lock_irq(q->queue_lock);

881

spin_lock_irq(q->queue_lock);

882

finish_wait(&rl->wait[is_sync], &wait);

882

finish_wait(&rl->wait[is_sync], &wait);

883

884

rq = get_request(q, rw_flags, bio, GFP_NOIO);

884

rq = get_request(q, rw_flags, bio, GFP_NOIO);

885

};

885

};

886

887

return rq;

887

return rq;

888

}

888

}

889

890

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

890

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

891

{

891

{

892

struct request *rq;

892

struct request *rq;

893

894

BUG_ON(rw != READ && rw != WRITE);

894

BUG_ON(rw != READ && rw != WRITE);

895

896

spin_lock_irq(q->queue_lock);

896

spin_lock_irq(q->queue_lock);

897

if (gfp_mask & __GFP_WAIT) {

897

if (gfp_mask & __GFP_WAIT) {

898

rq = get_request_wait(q, rw, NULL);

898

rq = get_request_wait(q, rw, NULL);

899

} else {

899

} else {

900

rq = get_request(q, rw, NULL, gfp_mask);

900

rq = get_request(q, rw, NULL, gfp_mask);

901

if (!rq)

901

if (!rq)

902

spin_unlock_irq(q->queue_lock);

902

spin_unlock_irq(q->queue_lock);

903

}

903

}

904

/* q->queue_lock is unlocked at this point */

904

/* q->queue_lock is unlocked at this point */

905

906

return rq;

906

return rq;

907

}

907

}

908

EXPORT_SYMBOL(blk_get_request);

908

EXPORT_SYMBOL(blk_get_request);

909

910

/**

910

/**

911

* blk_make_request - given a bio, allocate a corresponding struct request.

911

* blk_make_request - given a bio, allocate a corresponding struct request.

912

* @q: target request queue

912

* @q: target request queue

913

* @bio: The bio describing the memory mappings that will be submitted for IO.

913

* @bio: The bio describing the memory mappings that will be submitted for IO.

914

* It may be a chained-bio properly constructed by block/bio layer.

914

* It may be a chained-bio properly constructed by block/bio layer.

915

* @gfp_mask: gfp flags to be used for memory allocation

915

* @gfp_mask: gfp flags to be used for memory allocation

916

*

916

*

917

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

917

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

918

* type commands. Where the struct request needs to be farther initialized by

918

* type commands. Where the struct request needs to be farther initialized by

919

* the caller. It is passed a &struct bio, which describes the memory info of

919

* the caller. It is passed a &struct bio, which describes the memory info of

920

* the I/O transfer.

920

* the I/O transfer.

921

*

921

*

922

* The caller of blk_make_request must make sure that bi_io_vec

922

* The caller of blk_make_request must make sure that bi_io_vec

923

* are set to describe the memory buffers. That bio_data_dir() will return

923

* are set to describe the memory buffers. That bio_data_dir() will return

924

* the needed direction of the request. (And all bio's in the passed bio-chain

924

* the needed direction of the request. (And all bio's in the passed bio-chain

925

* are properly set accordingly)

925

* are properly set accordingly)

926

*

926

*

927

* If called under none-sleepable conditions, mapped bio buffers must not

927

* If called under none-sleepable conditions, mapped bio buffers must not

928

* need bouncing, by calling the appropriate masked or flagged allocator,

928

* need bouncing, by calling the appropriate masked or flagged allocator,

929

* suitable for the target device. Otherwise the call to blk_queue_bounce will

929

* suitable for the target device. Otherwise the call to blk_queue_bounce will

930

* BUG.

930

* BUG.

931

*

931

*

932

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

932

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

933

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

933

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

934

* anything but the first bio in the chain. Otherwise you risk waiting for IO

934

* anything but the first bio in the chain. Otherwise you risk waiting for IO

935

* completion of a bio that hasn't been submitted yet, thus resulting in a

935

* completion of a bio that hasn't been submitted yet, thus resulting in a

936

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

936

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

937

* of bio_alloc(), as that avoids the mempool deadlock.

937

* of bio_alloc(), as that avoids the mempool deadlock.

938

* If possible a big IO should be split into smaller parts when allocation

938

* If possible a big IO should be split into smaller parts when allocation

939

* fails. Partial allocation should not be an error, or you risk a live-lock.

939

* fails. Partial allocation should not be an error, or you risk a live-lock.

940

*/

940

*/

941

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

941

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

942

gfp_t gfp_mask)

942

gfp_t gfp_mask)

943

{

943

{

944

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

944

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

945

946

if (unlikely(!rq))

946

if (unlikely(!rq))

947

return ERR_PTR(-ENOMEM);

947

return ERR_PTR(-ENOMEM);

948

949

for_each_bio(bio) {

949

for_each_bio(bio) {

950

struct bio *bounce_bio = bio;

950

struct bio *bounce_bio = bio;

951

int ret;

951

int ret;

952

953

blk_queue_bounce(q, &bounce_bio);

953

blk_queue_bounce(q, &bounce_bio);

954

ret = blk_rq_append_bio(q, rq, bounce_bio);

954

ret = blk_rq_append_bio(q, rq, bounce_bio);

955

if (unlikely(ret)) {

955

if (unlikely(ret)) {

956

blk_put_request(rq);

956

blk_put_request(rq);

957

return ERR_PTR(ret);

957

return ERR_PTR(ret);

958

}

958

}

959

}

959

}

960

961

return rq;

961

return rq;

962

}

962

}

963

EXPORT_SYMBOL(blk_make_request);

963

EXPORT_SYMBOL(blk_make_request);

964

965

/**

965

/**

966

* blk_requeue_request - put a request back on queue

966

* blk_requeue_request - put a request back on queue

967

* @q: request queue where request should be inserted

967

* @q: request queue where request should be inserted

968

* @rq: request to be inserted

968

* @rq: request to be inserted

969

*

969

*

970

* Description:

970

* Description:

971

* Drivers often keep queueing requests until the hardware cannot accept

971

* Drivers often keep queueing requests until the hardware cannot accept

972

* more, when that condition happens we need to put the request back

972

* more, when that condition happens we need to put the request back

973

* on the queue. Must be called with queue lock held.

973

* on the queue. Must be called with queue lock held.

974

*/

974

*/

975

void blk_requeue_request(struct request_queue *q, struct request *rq)

975

void blk_requeue_request(struct request_queue *q, struct request *rq)

976

{

976

{

977

blk_delete_timer(rq);

977

blk_delete_timer(rq);

978

blk_clear_rq_complete(rq);

978

blk_clear_rq_complete(rq);

979

trace_block_rq_requeue(q, rq);

979

trace_block_rq_requeue(q, rq);

980

981

if (blk_rq_tagged(rq))

981

if (blk_rq_tagged(rq))

982

blk_queue_end_tag(q, rq);

982

blk_queue_end_tag(q, rq);

983

984

BUG_ON(blk_queued_rq(rq));

984

BUG_ON(blk_queued_rq(rq));

985

986

elv_requeue_request(q, rq);

986

elv_requeue_request(q, rq);

987

}

987

}

988

EXPORT_SYMBOL(blk_requeue_request);

988

EXPORT_SYMBOL(blk_requeue_request);

989

990

/**

990

/**

991

* blk_insert_request - insert a special request into a request queue

991

* blk_insert_request - insert a special request into a request queue

992

* @q: request queue where request should be inserted

992

* @q: request queue where request should be inserted

993

* @rq: request to be inserted

993

* @rq: request to be inserted

994

* @at_head: insert request at head or tail of queue

994

* @at_head: insert request at head or tail of queue

995

* @data: private data

995

* @data: private data

996

*

996

*

997

* Description:

997

* Description:

998

* Many block devices need to execute commands asynchronously, so they don't

998

* Many block devices need to execute commands asynchronously, so they don't

999

* block the whole kernel from preemption during request execution. This is

999

* block the whole kernel from preemption during request execution. This is

1000

* accomplished normally by inserting aritficial requests tagged as

1000

* accomplished normally by inserting aritficial requests tagged as

1001

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

1001

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

1002

* be scheduled for actual execution by the request queue.

1002

* be scheduled for actual execution by the request queue.

1003

*

1003

*

1004

* We have the option of inserting the head or the tail of the queue.

1004

* We have the option of inserting the head or the tail of the queue.

1005

* Typically we use the tail for new ioctls and so forth. We use the head

1005

* Typically we use the tail for new ioctls and so forth. We use the head

1006

* of the queue for things like a QUEUE_FULL message from a device, or a

1006

* of the queue for things like a QUEUE_FULL message from a device, or a

1007

* host that is unable to accept a particular command.

1007

* host that is unable to accept a particular command.

1008

*/

1008

*/

1009

void blk_insert_request(struct request_queue *q, struct request *rq,

1009

void blk_insert_request(struct request_queue *q, struct request *rq,

1010

int at_head, void *data)

1010

int at_head, void *data)

1011

{

1011

{

1012

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

1012

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

1013

unsigned long flags;

1013

unsigned long flags;

1014

1015

/*

1015

/*

1016

* tell I/O scheduler that this isn't a regular read/write (ie it

1016

* tell I/O scheduler that this isn't a regular read/write (ie it

1017

* must not attempt merges on this) and that it acts as a soft

1017

* must not attempt merges on this) and that it acts as a soft

1018

* barrier

1018

* barrier

1019

*/

1019

*/

1020

rq->cmd_type = REQ_TYPE_SPECIAL;

1020

rq->cmd_type = REQ_TYPE_SPECIAL;

1021

1022

rq->special = data;

1022

rq->special = data;

1023

1024

spin_lock_irqsave(q->queue_lock, flags);

1024

spin_lock_irqsave(q->queue_lock, flags);

1025

1026

/*

1026

/*

1027

* If command is tagged, release the tag

1027

* If command is tagged, release the tag

1028

*/

1028

*/

1029

if (blk_rq_tagged(rq))

1029

if (blk_rq_tagged(rq))

1030

blk_queue_end_tag(q, rq);

1030

blk_queue_end_tag(q, rq);

1031

1032

drive_stat_acct(rq, 1);

1032

drive_stat_acct(rq, 1);

1033

__elv_add_request(q, rq, where, 0);

1033

__elv_add_request(q, rq, where, 0);

1034

__blk_run_queue(q);

1034

__blk_run_queue(q);

1035

spin_unlock_irqrestore(q->queue_lock, flags);

1035

spin_unlock_irqrestore(q->queue_lock, flags);

1036

}

1036

}

1037

EXPORT_SYMBOL(blk_insert_request);

1037

EXPORT_SYMBOL(blk_insert_request);

1038

1039

/*

1039

/*

1040

* add-request adds a request to the linked list.

1040

* add-request adds a request to the linked list.

1041

* queue lock is held and interrupts disabled, as we muck with the

1041

* queue lock is held and interrupts disabled, as we muck with the

1042

* request queue list.

1042

* request queue list.

1043

*/

1043

*/

1044

static inline void add_request(struct request_queue *q, struct request *req)

1044

static inline void add_request(struct request_queue *q, struct request *req)

1045

{

1045

{

1046

drive_stat_acct(req, 1);

1046

drive_stat_acct(req, 1);

1047

1048

/*

1048

/*

1049

* elevator indicated where it wants this request to be

1049

* elevator indicated where it wants this request to be

1050

* inserted at elevator_merge time

1050

* inserted at elevator_merge time

1051

*/

1051

*/

1052

__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);

1052

__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);

1053

}

1053

}

1054

1055

static void part_round_stats_single(int cpu, struct hd_struct *part,

1055

static void part_round_stats_single(int cpu, struct hd_struct *part,

1056

unsigned long now)

1056

unsigned long now)

1057

{

1057

{

1058

if (now == part->stamp)

1058

if (now == part->stamp)

1059

return;

1059

return;

1060

1061

if (part_in_flight(part)) {

1061

if (part_in_flight(part)) {

1062

__part_stat_add(cpu, part, time_in_queue,

1062

__part_stat_add(cpu, part, time_in_queue,

1063

part_in_flight(part) * (now - part->stamp));

1063

part_in_flight(part) * (now - part->stamp));

1064

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1064

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1065

}

1065

}

1066

part->stamp = now;

1066

part->stamp = now;

1067

}

1067

}

1068

1069

/**

1069

/**

1070

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1070

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1071

* @cpu: cpu number for stats access

1071

* @cpu: cpu number for stats access

1072

* @part: target partition

1072

* @part: target partition

1073

*

1073

*

1074

* The average IO queue length and utilisation statistics are maintained

1074

* The average IO queue length and utilisation statistics are maintained

1075

* by observing the current state of the queue length and the amount of

1075

* by observing the current state of the queue length and the amount of

1076

* time it has been in this state for.

1076

* time it has been in this state for.

1077

*

1077

*

1078

* Normally, that accounting is done on IO completion, but that can result

1078

* Normally, that accounting is done on IO completion, but that can result

1079

* in more than a second's worth of IO being accounted for within any one

1079

* in more than a second's worth of IO being accounted for within any one

1080

* second, leading to >100% utilisation. To deal with that, we call this

1080

* second, leading to >100% utilisation. To deal with that, we call this

1081

* function to do a round-off before returning the results when reading

1081

* function to do a round-off before returning the results when reading

1082

* /proc/diskstats. This accounts immediately for all queue usage up to

1082

* /proc/diskstats. This accounts immediately for all queue usage up to

1083

* the current jiffies and restarts the counters again.

1083

* the current jiffies and restarts the counters again.

1084

*/

1084

*/

1085

void part_round_stats(int cpu, struct hd_struct *part)

1085

void part_round_stats(int cpu, struct hd_struct *part)

1086

{

1086

{

1087

unsigned long now = jiffies;

1087

unsigned long now = jiffies;

1088

1089

if (part->partno)

1089

if (part->partno)

1090

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1090

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1091

part_round_stats_single(cpu, part, now);

1091

part_round_stats_single(cpu, part, now);

1092

}

1092

}

1093

EXPORT_SYMBOL_GPL(part_round_stats);

1093

EXPORT_SYMBOL_GPL(part_round_stats);

1094

1095

/*

1095

/*

1096

* queue lock must be held

1096

* queue lock must be held

1097

*/

1097

*/

1098

void __blk_put_request(struct request_queue *q, struct request *req)

1098

void __blk_put_request(struct request_queue *q, struct request *req)

1099

{

1099

{

1100

if (unlikely(!q))

1100

if (unlikely(!q))

1101

return;

1101

return;

1102

if (unlikely(--req->ref_count))

1102

if (unlikely(--req->ref_count))

1103

return;

1103

return;

1104

1105

elv_completed_request(q, req);

1105

elv_completed_request(q, req);

1106

1107

/* this is a bio leak */

1107

/* this is a bio leak */

1108

WARN_ON(req->bio != NULL);

1108

WARN_ON(req->bio != NULL);

1109

1110

/*

1110

/*

1111

* Request may not have originated from ll_rw_blk. if not,

1111

* Request may not have originated from ll_rw_blk. if not,

1112

* it didn't come out of our reserved rq pools

1112

* it didn't come out of our reserved rq pools

1113

*/

1113

*/

1114

if (req->cmd_flags & REQ_ALLOCED) {

1114

if (req->cmd_flags & REQ_ALLOCED) {

1115

int is_sync = rq_is_sync(req) != 0;

1115

int is_sync = rq_is_sync(req) != 0;

1116

int priv = req->cmd_flags & REQ_ELVPRIV;

1116

int priv = req->cmd_flags & REQ_ELVPRIV;

1117

1118

BUG_ON(!list_empty(&req->queuelist));

1118

BUG_ON(!list_empty(&req->queuelist));

1119

BUG_ON(!hlist_unhashed(&req->hash));

1119

BUG_ON(!hlist_unhashed(&req->hash));

1120

1121

blk_free_request(q, req);

1121

blk_free_request(q, req);

1122

freed_request(q, is_sync, priv);

1122

freed_request(q, is_sync, priv);

1123

}

1123

}

1124

}

1124

}

1125

EXPORT_SYMBOL_GPL(__blk_put_request);

1125

EXPORT_SYMBOL_GPL(__blk_put_request);

1126

1127

void blk_put_request(struct request *req)

1127

void blk_put_request(struct request *req)

1128

{

1128

{

1129

unsigned long flags;

1129

unsigned long flags;

1130

struct request_queue *q = req->q;

1130

struct request_queue *q = req->q;

1131

1132

spin_lock_irqsave(q->queue_lock, flags);

1132

spin_lock_irqsave(q->queue_lock, flags);

1133

__blk_put_request(q, req);

1133

__blk_put_request(q, req);

1134

spin_unlock_irqrestore(q->queue_lock, flags);

1134

spin_unlock_irqrestore(q->queue_lock, flags);

1135

}

1135

}

1136

EXPORT_SYMBOL(blk_put_request);

1136

EXPORT_SYMBOL(blk_put_request);

1137

1138

void init_request_from_bio(struct request *req, struct bio *bio)

1138

void init_request_from_bio(struct request *req, struct bio *bio)

1139

{

1139

{

1140

req->cpu = bio->bi_comp_cpu;

1140

req->cpu = bio->bi_comp_cpu;

1141

req->cmd_type = REQ_TYPE_FS;

1141

req->cmd_type = REQ_TYPE_FS;

1142

1143

/*

1143

/*

1144

* Inherit FAILFAST from bio (for read-ahead, and explicit

1144

* Inherit FAILFAST from bio (for read-ahead, and explicit

1145

* FAILFAST). FAILFAST flags are identical for req and bio.

1145

* FAILFAST). FAILFAST flags are identical for req and bio.

1146

*/

1146

*/

1147

if (bio_rw_flagged(bio, BIO_RW_AHEAD))

1147

if (bio_rw_flagged(bio, BIO_RW_AHEAD))

1148

req->cmd_flags |= REQ_FAILFAST_MASK;

1148

req->cmd_flags |= REQ_FAILFAST_MASK;

1149

else

1149

else

1150

req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;

1150

req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;

1151

1152

if (bio_rw_flagged(bio, BIO_RW_DISCARD))

1152

if (bio_rw_flagged(bio, BIO_RW_DISCARD))

1153

req->cmd_flags |= REQ_DISCARD;

1153

req->cmd_flags |= REQ_DISCARD;

1154

if (bio_rw_flagged(bio, BIO_RW_BARRIER))

1154

if (bio_rw_flagged(bio, BIO_RW_BARRIER))

1155

req->cmd_flags |= REQ_HARDBARRIER;

1155

req->cmd_flags |= REQ_HARDBARRIER;

1156

if (bio_rw_flagged(bio, BIO_RW_SYNCIO))

1156

if (bio_rw_flagged(bio, BIO_RW_SYNCIO))

1157

req->cmd_flags |= REQ_RW_SYNC;

1157

req->cmd_flags |= REQ_RW_SYNC;

1158

if (bio_rw_flagged(bio, BIO_RW_META))

1158

if (bio_rw_flagged(bio, BIO_RW_META))

1159

req->cmd_flags |= REQ_RW_META;

1159

req->cmd_flags |= REQ_RW_META;

1160

if (bio_rw_flagged(bio, BIO_RW_NOIDLE))

1160

if (bio_rw_flagged(bio, BIO_RW_NOIDLE))

1161

req->cmd_flags |= REQ_NOIDLE;

1161

req->cmd_flags |= REQ_NOIDLE;

1162

1163

req->errors = 0;

1163

req->errors = 0;

1164

req->__sector = bio->bi_sector;

1164

req->__sector = bio->bi_sector;

1165

req->ioprio = bio_prio(bio);

1165

req->ioprio = bio_prio(bio);

1166

blk_rq_bio_prep(req->q, req, bio);

1166

blk_rq_bio_prep(req->q, req, bio);

1167

}

1167

}

1168

1169

/*

1169

/*

1170

* Only disabling plugging for non-rotational devices if it does tagging

1170

* Only disabling plugging for non-rotational devices if it does tagging

1171

* as well, otherwise we do need the proper merging

1171

* as well, otherwise we do need the proper merging

1172

*/

1172

*/

1173

static inline bool queue_should_plug(struct request_queue *q)

1173

static inline bool queue_should_plug(struct request_queue *q)

1174

{

1174

{

1175

return !(blk_queue_nonrot(q) && blk_queue_tagged(q));

1175

return !(blk_queue_nonrot(q) && blk_queue_tagged(q));

1176

}

1176

}

1177

1178

static int __make_request(struct request_queue *q, struct bio *bio)

1178

static int __make_request(struct request_queue *q, struct bio *bio)

1179

{

1179

{

1180

struct request *req;

1180

struct request *req;

1181

int el_ret;

1181

int el_ret;

1182

unsigned int bytes = bio->bi_size;

1182

unsigned int bytes = bio->bi_size;

1183

const unsigned short prio = bio_prio(bio);

1183

const unsigned short prio = bio_prio(bio);

1184

const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);

1184

const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);

1185

const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);

1185

const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);

1186

const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1186

const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1187

int rw_flags;

1187

int rw_flags;

1188

1189

if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&

1189

if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&

1190

(q->next_ordered == QUEUE_ORDERED_NONE)) {

1190

(q->next_ordered == QUEUE_ORDERED_NONE)) {

1191

bio_endio(bio, -EOPNOTSUPP);

1191

bio_endio(bio, -EOPNOTSUPP);

1192

return 0;

1192

return 0;

1193

}

1193

}

1194

/*

1194

/*

1195

* low level driver can indicate that it wants pages above a

1195

* low level driver can indicate that it wants pages above a

1196

* certain limit bounced to low memory (ie for highmem, or even

1196

* certain limit bounced to low memory (ie for highmem, or even

1197

* ISA dma in theory)

1197

* ISA dma in theory)

1198

*/

1198

*/

1199

blk_queue_bounce(q, &bio);

1199

blk_queue_bounce(q, &bio);

1200

1201

spin_lock_irq(q->queue_lock);

1201

spin_lock_irq(q->queue_lock);

1202

1203

if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))

1203

if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))

1204

goto get_rq;

1204

goto get_rq;

1205

1206

el_ret = elv_merge(q, &req, bio);

1206

el_ret = elv_merge(q, &req, bio);

1207

switch (el_ret) {

1207

switch (el_ret) {

1208

case ELEVATOR_BACK_MERGE:

1208

case ELEVATOR_BACK_MERGE:

1209

BUG_ON(!rq_mergeable(req));

1209

BUG_ON(!rq_mergeable(req));

1210

1211

if (!ll_back_merge_fn(q, req, bio))

1211

if (!ll_back_merge_fn(q, req, bio))

1212

break;

1212

break;

1213

1214

trace_block_bio_backmerge(q, bio);

1214

trace_block_bio_backmerge(q, bio);

1215

1216

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1216

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1217

blk_rq_set_mixed_merge(req);

1217

blk_rq_set_mixed_merge(req);

1218

1219

req->biotail->bi_next = bio;

1219

req->biotail->bi_next = bio;

1220

req->biotail = bio;

1220

req->biotail = bio;

1221

req->__data_len += bytes;

1221

req->__data_len += bytes;

1222

req->ioprio = ioprio_best(req->ioprio, prio);

1222

req->ioprio = ioprio_best(req->ioprio, prio);

1223

if (!blk_rq_cpu_valid(req))

1223

if (!blk_rq_cpu_valid(req))

1224

req->cpu = bio->bi_comp_cpu;

1224

req->cpu = bio->bi_comp_cpu;

1225

drive_stat_acct(req, 0);

1225

drive_stat_acct(req, 0);

1226

elv_bio_merged(q, req, bio);

1226

elv_bio_merged(q, req, bio);

1227

if (!attempt_back_merge(q, req))

1227

if (!attempt_back_merge(q, req))

1228

elv_merged_request(q, req, el_ret);

1228

elv_merged_request(q, req, el_ret);

1229

goto out;

1229

goto out;

1230

1231

case ELEVATOR_FRONT_MERGE:

1231

case ELEVATOR_FRONT_MERGE:

1232

BUG_ON(!rq_mergeable(req));

1232

BUG_ON(!rq_mergeable(req));

1233

1234

if (!ll_front_merge_fn(q, req, bio))

1234

if (!ll_front_merge_fn(q, req, bio))

1235

break;

1235

break;

1236

1237

trace_block_bio_frontmerge(q, bio);

1237

trace_block_bio_frontmerge(q, bio);

1238

1239

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {

1239

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {

1240

blk_rq_set_mixed_merge(req);

1240

blk_rq_set_mixed_merge(req);

1241

req->cmd_flags &= ~REQ_FAILFAST_MASK;

1241

req->cmd_flags &= ~REQ_FAILFAST_MASK;

1242

req->cmd_flags |= ff;

1242

req->cmd_flags |= ff;

1243

}

1243

}

1244

1245

bio->bi_next = req->bio;

1245

bio->bi_next = req->bio;

1246

req->bio = bio;

1246

req->bio = bio;

1247

1248

/*

1248

/*

1249

* may not be valid. if the low level driver said

1249

* may not be valid. if the low level driver said

1250

* it didn't need a bounce buffer then it better

1250

* it didn't need a bounce buffer then it better

1251

* not touch req->buffer either...

1251

* not touch req->buffer either...

1252

*/

1252

*/

1253

req->buffer = bio_data(bio);

1253

req->buffer = bio_data(bio);

1254

req->__sector = bio->bi_sector;

1254

req->__sector = bio->bi_sector;

1255

req->__data_len += bytes;

1255

req->__data_len += bytes;

1256

req->ioprio = ioprio_best(req->ioprio, prio);

1256

req->ioprio = ioprio_best(req->ioprio, prio);

1257

if (!blk_rq_cpu_valid(req))

1257

if (!blk_rq_cpu_valid(req))

1258

req->cpu = bio->bi_comp_cpu;

1258

req->cpu = bio->bi_comp_cpu;

1259

drive_stat_acct(req, 0);

1259

drive_stat_acct(req, 0);

1260

elv_bio_merged(q, req, bio);

1260

elv_bio_merged(q, req, bio);

1261

if (!attempt_front_merge(q, req))

1261

if (!attempt_front_merge(q, req))

1262

elv_merged_request(q, req, el_ret);

1262

elv_merged_request(q, req, el_ret);

1263

goto out;

1263

goto out;

1264

1265

/* ELV_NO_MERGE: elevator says don't/can't merge. */

1265

/* ELV_NO_MERGE: elevator says don't/can't merge. */

1266

default:

1266

default:

1267

;

1267

;

1268

}

1268

}

1269

1270

get_rq:

1270

get_rq:

1271

/*

1271

/*

1272

* This sync check and mask will be re-done in init_request_from_bio(),

1272

* This sync check and mask will be re-done in init_request_from_bio(),

1273

* but we need to set it earlier to expose the sync flag to the

1273

* but we need to set it earlier to expose the sync flag to the

1274

* rq allocator and io schedulers.

1274

* rq allocator and io schedulers.

1275

*/

1275

*/

1276

rw_flags = bio_data_dir(bio);

1276

rw_flags = bio_data_dir(bio);

1277

if (sync)

1277

if (sync)

1278

rw_flags |= REQ_RW_SYNC;

1278

rw_flags |= REQ_RW_SYNC;

1279

1280

/*

1280

/*

1281

* Grab a free request. This is might sleep but can not fail.

1281

* Grab a free request. This is might sleep but can not fail.

1282

* Returns with the queue unlocked.

1282

* Returns with the queue unlocked.

1283

*/

1283

*/

1284

req = get_request_wait(q, rw_flags, bio);

1284

req = get_request_wait(q, rw_flags, bio);

1285

1286

/*

1286

/*

1287

* After dropping the lock and possibly sleeping here, our request

1287

* After dropping the lock and possibly sleeping here, our request

1288

* may now be mergeable after it had proven unmergeable (above).

1288

* may now be mergeable after it had proven unmergeable (above).

1289

* We don't worry about that case for efficiency. It won't happen

1289

* We don't worry about that case for efficiency. It won't happen

1290

* often, and the elevators are able to handle it.

1290

* often, and the elevators are able to handle it.

1291

*/

1291

*/

1292

init_request_from_bio(req, bio);

1292

init_request_from_bio(req, bio);

1293

1294

spin_lock_irq(q->queue_lock);

1294

spin_lock_irq(q->queue_lock);

1295

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1295

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1296

bio_flagged(bio, BIO_CPU_AFFINE))

1296

bio_flagged(bio, BIO_CPU_AFFINE))

1297

req->cpu = blk_cpu_to_group(smp_processor_id());

1297

req->cpu = blk_cpu_to_group(smp_processor_id());

1298

if (queue_should_plug(q) && elv_queue_empty(q))

1298

if (queue_should_plug(q) && elv_queue_empty(q))

1299

blk_plug_device(q);

1299

blk_plug_device(q);

1300

add_request(q, req);

1300

add_request(q, req);

1301

out:

1301

out:

1302

if (unplug || !queue_should_plug(q))

1302

if (unplug || !queue_should_plug(q))

1303

__generic_unplug_device(q);

1303

__generic_unplug_device(q);

1304

spin_unlock_irq(q->queue_lock);

1304

spin_unlock_irq(q->queue_lock);

1305

return 0;

1305

return 0;

1306

}

1306

}

1307

1308

/*

1308

/*

1309

* If bio->bi_dev is a partition, remap the location

1309

* If bio->bi_dev is a partition, remap the location

1310

*/

1310

*/

1311

static inline void blk_partition_remap(struct bio *bio)

1311

static inline void blk_partition_remap(struct bio *bio)

1312

{

1312

{

1313

struct block_device *bdev = bio->bi_bdev;

1313

struct block_device *bdev = bio->bi_bdev;

1314

1315

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1315

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1316

struct hd_struct *p = bdev->bd_part;

1316

struct hd_struct *p = bdev->bd_part;

1317

1318

bio->bi_sector += p->start_sect;

1318

bio->bi_sector += p->start_sect;

1319

bio->bi_bdev = bdev->bd_contains;

1319

bio->bi_bdev = bdev->bd_contains;

1320

1321

trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,

1321

trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,

1322

bdev->bd_dev,

1322

bdev->bd_dev,

1323

bio->bi_sector - p->start_sect);

1323

bio->bi_sector - p->start_sect);

1324

}

1324

}

1325

}

1325

}

1326

1327

static void handle_bad_sector(struct bio *bio)

1327

static void handle_bad_sector(struct bio *bio)

1328

{

1328

{

1329

char b[BDEVNAME_SIZE];

1329

char b[BDEVNAME_SIZE];

1330

1331

printk(KERN_INFO "attempt to access beyond end of device\n");

1331

printk(KERN_INFO "attempt to access beyond end of device\n");

1332

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1332

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1333

bdevname(bio->bi_bdev, b),

1333

bdevname(bio->bi_bdev, b),

1334

bio->bi_rw,

1334

bio->bi_rw,

1335

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1335

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1336

(long long)(bio->bi_bdev->bd_inode->i_size >> 9));

1336

(long long)(bio->bi_bdev->bd_inode->i_size >> 9));

1337

1338

set_bit(BIO_EOF, &bio->bi_flags);

1338

set_bit(BIO_EOF, &bio->bi_flags);

1339

}

1339

}

1340

1341

#ifdef CONFIG_FAIL_MAKE_REQUEST

1341

#ifdef CONFIG_FAIL_MAKE_REQUEST

1342

1343

static DECLARE_FAULT_ATTR(fail_make_request);

1343

static DECLARE_FAULT_ATTR(fail_make_request);

1344

1345

static int __init setup_fail_make_request(char *str)

1345

static int __init setup_fail_make_request(char *str)

1346

{

1346

{

1347

return setup_fault_attr(&fail_make_request, str);

1347

return setup_fault_attr(&fail_make_request, str);

1348

}

1348

}

1349

__setup("fail_make_request=", setup_fail_make_request);

1349

__setup("fail_make_request=", setup_fail_make_request);

1350

1351

static int should_fail_request(struct bio *bio)

1351

static int should_fail_request(struct bio *bio)

1352

{

1352

{

1353

struct hd_struct *part = bio->bi_bdev->bd_part;

1353

struct hd_struct *part = bio->bi_bdev->bd_part;

1354

1355

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1355

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1356

return should_fail(&fail_make_request, bio->bi_size);

1356

return should_fail(&fail_make_request, bio->bi_size);

1357

1358

return 0;

1358

return 0;

1359

}

1359

}

1360

1361

static int __init fail_make_request_debugfs(void)

1361

static int __init fail_make_request_debugfs(void)

1362

{

1362

{

1363

return init_fault_attr_dentries(&fail_make_request,

1363

return init_fault_attr_dentries(&fail_make_request,

1364

"fail_make_request");

1364

"fail_make_request");

1365

}

1365

}

1366

1367

late_initcall(fail_make_request_debugfs);

1367

late_initcall(fail_make_request_debugfs);

1368

1369

#else /* CONFIG_FAIL_MAKE_REQUEST */

1369

#else /* CONFIG_FAIL_MAKE_REQUEST */

1370

1371

static inline int should_fail_request(struct bio *bio)

1371

static inline int should_fail_request(struct bio *bio)

1372

{

1372

{

1373

return 0;

1373

return 0;

1374

}

1374

}

1375

1376

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1376

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1377

1378

/*

1378

/*

1379

* Check whether this bio extends beyond the end of the device.

1379

* Check whether this bio extends beyond the end of the device.

1380

*/

1380

*/

1381

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1381

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1382

{

1382

{

1383

sector_t maxsector;

1383

sector_t maxsector;

1384

1385

if (!nr_sectors)

1385

if (!nr_sectors)

1386

return 0;

1386

return 0;

1387

1388

/* Test device or partition size, when known. */

1388

/* Test device or partition size, when known. */

1389

maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

1389

maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

1390

if (maxsector) {

1390

if (maxsector) {

1391

sector_t sector = bio->bi_sector;

1391

sector_t sector = bio->bi_sector;

1392

1393

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1393

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1394

/*

1394

/*

1395

* This may well happen - the kernel calls bread()

1395

* This may well happen - the kernel calls bread()

1396

* without checking the size of the device, e.g., when

1396

* without checking the size of the device, e.g., when

1397

* mounting a device.

1397

* mounting a device.

1398

*/

1398

*/

1399

handle_bad_sector(bio);

1399

handle_bad_sector(bio);

1400

return 1;

1400

return 1;

1401

}

1401

}

1402

}

1402

}

1403

1404

return 0;

1404

return 0;

1405

}

1405

}

1406

1407

/**

1407

/**

1408

* generic_make_request - hand a buffer to its device driver for I/O

1408

* generic_make_request - hand a buffer to its device driver for I/O

1409

* @bio: The bio describing the location in memory and on the device.

1409

* @bio: The bio describing the location in memory and on the device.

1410

*

1410

*

1411

* generic_make_request() is used to make I/O requests of block

1411

* generic_make_request() is used to make I/O requests of block

1412

* devices. It is passed a &struct bio, which describes the I/O that needs

1412

* devices. It is passed a &struct bio, which describes the I/O that needs

1413

* to be done.

1413

* to be done.

1414

*

1414

*

1415

* generic_make_request() does not return any status. The

1415

* generic_make_request() does not return any status. The

1416

* success/failure status of the request, along with notification of

1416

* success/failure status of the request, along with notification of

1417

* completion, is delivered asynchronously through the bio->bi_end_io

1417

* completion, is delivered asynchronously through the bio->bi_end_io

1418

* function described (one day) else where.

1418

* function described (one day) else where.

1419

*

1419

*

1420

* The caller of generic_make_request must make sure that bi_io_vec

1420

* The caller of generic_make_request must make sure that bi_io_vec

1421

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1421

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1422

* set to describe the device address, and the

1422

* set to describe the device address, and the

1423

* bi_end_io and optionally bi_private are set to describe how

1423

* bi_end_io and optionally bi_private are set to describe how

1424

* completion notification should be signaled.

1424

* completion notification should be signaled.

1425

*

1425

*

1426

* generic_make_request and the drivers it calls may use bi_next if this

1426

* generic_make_request and the drivers it calls may use bi_next if this

1427

* bio happens to be merged with someone else, and may change bi_dev and

1427

* bio happens to be merged with someone else, and may change bi_dev and

1428

* bi_sector for remaps as it sees fit. So the values of these fields

1428

* bi_sector for remaps as it sees fit. So the values of these fields

1429

* should NOT be depended on after the call to generic_make_request.

1429

* should NOT be depended on after the call to generic_make_request.

1430

*/

1430

*/

1431

static inline void __generic_make_request(struct bio *bio)

1431

static inline void __generic_make_request(struct bio *bio)

1432

{

1432

{

1433

struct request_queue *q;

1433

struct request_queue *q;

1434

sector_t old_sector;

1434

sector_t old_sector;

1435

int ret, nr_sectors = bio_sectors(bio);

1435

int ret, nr_sectors = bio_sectors(bio);

1436

dev_t old_dev;

1436

dev_t old_dev;

1437

int err = -EIO;

1437

int err = -EIO;

1438

1439

might_sleep();

1439

might_sleep();

1440

1441

if (bio_check_eod(bio, nr_sectors))

1441

if (bio_check_eod(bio, nr_sectors))

1442

goto end_io;

1442

goto end_io;

1443

1444

/*

1444

/*

1445

* Resolve the mapping until finished. (drivers are

1445

* Resolve the mapping until finished. (drivers are

1446

* still free to implement/resolve their own stacking

1446

* still free to implement/resolve their own stacking

1447

* by explicitly returning 0)

1447

* by explicitly returning 0)

1448

*

1448

*

1449

* NOTE: we don't repeat the blk_size check for each new device.

1449

* NOTE: we don't repeat the blk_size check for each new device.

1450

* Stacking drivers are expected to know what they are doing.

1450

* Stacking drivers are expected to know what they are doing.

1451

*/

1451

*/

1452

old_sector = -1;

1452

old_sector = -1;

1453

old_dev = 0;

1453

old_dev = 0;

1454

do {

1454

do {

1455

char b[BDEVNAME_SIZE];

1455

char b[BDEVNAME_SIZE];

1456

1457

q = bdev_get_queue(bio->bi_bdev);

1457

q = bdev_get_queue(bio->bi_bdev);

1458

if (unlikely(!q)) {

1458

if (unlikely(!q)) {

1459

printk(KERN_ERR

1459

printk(KERN_ERR

1460

"generic_make_request: Trying to access "

1460

"generic_make_request: Trying to access "

1461

"nonexistent block-device %s (%Lu)\n",

1461

"nonexistent block-device %s (%Lu)\n",

1462

bdevname(bio->bi_bdev, b),

1462

bdevname(bio->bi_bdev, b),

1463

(long long) bio->bi_sector);

1463

(long long) bio->bi_sector);

1464

goto end_io;

1464

goto end_io;

1465

}

1465

}

1466

1467

if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&

1467

if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&

1468

nr_sectors > queue_max_hw_sectors(q))) {

1468

nr_sectors > queue_max_hw_sectors(q))) {

1469

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1469

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1470

bdevname(bio->bi_bdev, b),

1470

bdevname(bio->bi_bdev, b),

1471

bio_sectors(bio),

1471

bio_sectors(bio),

1472

queue_max_hw_sectors(q));

1472

queue_max_hw_sectors(q));

1473

goto end_io;

1473

goto end_io;

1474

}

1474

}

1475

1476

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1476

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1477

goto end_io;

1477

goto end_io;

1478

1479

if (should_fail_request(bio))

1479

if (should_fail_request(bio))

1480

goto end_io;

1480

goto end_io;

1481

1482

/*

1482

/*

1483

* If this device has partitions, remap block n

1483

* If this device has partitions, remap block n

1484

* of partition p to block n+start(p) of the disk.

1484

* of partition p to block n+start(p) of the disk.

1485

*/

1485

*/

1486

blk_partition_remap(bio);

1486

blk_partition_remap(bio);

1487

1488

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1488

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1489

goto end_io;

1489

goto end_io;

1490

1491

if (old_sector != -1)

1491

if (old_sector != -1)

1492

trace_block_remap(q, bio, old_dev, old_sector);

1492

trace_block_remap(q, bio, old_dev, old_sector);

1493

1494

old_sector = bio->bi_sector;

1494

old_sector = bio->bi_sector;

1495

old_dev = bio->bi_bdev->bd_dev;

1495

old_dev = bio->bi_bdev->bd_dev;

1496

1497

if (bio_check_eod(bio, nr_sectors))

1497

if (bio_check_eod(bio, nr_sectors))

1498

goto end_io;

1498

goto end_io;

1499

1500

if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&

1500

if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&

1501

!blk_queue_discard(q)) {

1501

!blk_queue_discard(q)) {

1502

err = -EOPNOTSUPP;

1502

err = -EOPNOTSUPP;

1503

goto end_io;

1503

goto end_io;

1504

}

1504

}

1505

1506

trace_block_bio_queue(q, bio);

1506

trace_block_bio_queue(q, bio);

1507

1508

ret = q->make_request_fn(q, bio);

1508

ret = q->make_request_fn(q, bio);

1509

} while (ret);

1509

} while (ret);

1510

1511

return;

1511

return;

1512

1513

end_io:

1513

end_io:

1514

bio_endio(bio, err);

1514

bio_endio(bio, err);

1515

}

1515

}

1516

1517

/*

1517

/*

1518

* We only want one ->make_request_fn to be active at a time,

1518

* We only want one ->make_request_fn to be active at a time,

1519

* else stack usage with stacked devices could be a problem.

1519

* else stack usage with stacked devices could be a problem.

1520

* So use current->bio_list to keep a list of requests

1520

* So use current->bio_list to keep a list of requests

1521

* submited by a make_request_fn function.

1521

* submited by a make_request_fn function.

1522

* current->bio_list is also used as a flag to say if

1522

* current->bio_list is also used as a flag to say if

1523

* generic_make_request is currently active in this task or not.

1523

* generic_make_request is currently active in this task or not.

1524

* If it is NULL, then no make_request is active. If it is non-NULL,

1524

* If it is NULL, then no make_request is active. If it is non-NULL,

1525

* then a make_request is active, and new requests should be added

1525

* then a make_request is active, and new requests should be added

1526

* at the tail

1526

* at the tail

1527

*/

1527

*/

1528

void generic_make_request(struct bio *bio)

1528

void generic_make_request(struct bio *bio)

1529

{

1529

{

1530

struct bio_list bio_list_on_stack;

1530

struct bio_list bio_list_on_stack;

1531

1532

if (current->bio_list) {

1532

if (current->bio_list) {

1533

/* make_request is active */

1533

/* make_request is active */

1534

bio_list_add(current->bio_list, bio);

1534

bio_list_add(current->bio_list, bio);

1535

return;

1535

return;

1536

}

1536

}

1537

/* following loop may be a bit non-obvious, and so deserves some

1537

/* following loop may be a bit non-obvious, and so deserves some

1538

* explanation.

1538

* explanation.

1539

* Before entering the loop, bio->bi_next is NULL (as all callers

1539

* Before entering the loop, bio->bi_next is NULL (as all callers

1540

* ensure that) so we have a list with a single bio.

1540

* ensure that) so we have a list with a single bio.

1541

* We pretend that we have just taken it off a longer list, so

1541

* We pretend that we have just taken it off a longer list, so

1542

* we assign bio_list to a pointer to the bio_list_on_stack,

1542

* we assign bio_list to a pointer to the bio_list_on_stack,

1543

* thus initialising the bio_list of new bios to be

1543

* thus initialising the bio_list of new bios to be

1544

* added. __generic_make_request may indeed add some more bios

1544

* added. __generic_make_request may indeed add some more bios

1545

* through a recursive call to generic_make_request. If it

1545

* through a recursive call to generic_make_request. If it

1546

* did, we find a non-NULL value in bio_list and re-enter the loop

1546

* did, we find a non-NULL value in bio_list and re-enter the loop

1547

* from the top. In this case we really did just take the bio

1547

* from the top. In this case we really did just take the bio

1548

* of the top of the list (no pretending) and so remove it from

1548

* of the top of the list (no pretending) and so remove it from

1549

* bio_list, and call into __generic_make_request again.

1549

* bio_list, and call into __generic_make_request again.

1550

*

1550

*

1551

* The loop was structured like this to make only one call to

1551

* The loop was structured like this to make only one call to

1552

* __generic_make_request (which is important as it is large and

1552

* __generic_make_request (which is important as it is large and

1553

* inlined) and to keep the structure simple.

1553

* inlined) and to keep the structure simple.

1554

*/

1554

*/

1555

BUG_ON(bio->bi_next);

1555

BUG_ON(bio->bi_next);

1556

bio_list_init(&bio_list_on_stack);

1556

bio_list_init(&bio_list_on_stack);

1557

current->bio_list = &bio_list_on_stack;

1557

current->bio_list = &bio_list_on_stack;

1558

do {

1558

do {

1559

__generic_make_request(bio);

1559

__generic_make_request(bio);

1560

bio = bio_list_pop(current->bio_list);

1560

bio = bio_list_pop(current->bio_list);

1561

} while (bio);

1561

} while (bio);

1562

current->bio_list = NULL; /* deactivate */

1562

current->bio_list = NULL; /* deactivate */

1563

}

1563

}

1564

EXPORT_SYMBOL(generic_make_request);

1564

EXPORT_SYMBOL(generic_make_request);

1565

1566

/**

1566

/**

1567

* submit_bio - submit a bio to the block device layer for I/O

1567

* submit_bio - submit a bio to the block device layer for I/O

1568

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1568

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1569

* @bio: The &struct bio which describes the I/O

1569

* @bio: The &struct bio which describes the I/O

1570

*

1570

*

1571

* submit_bio() is very similar in purpose to generic_make_request(), and

1571

* submit_bio() is very similar in purpose to generic_make_request(), and

1572

* uses that function to do most of the work. Both are fairly rough

1572

* uses that function to do most of the work. Both are fairly rough

1573

* interfaces; @bio must be presetup and ready for I/O.

1573

* interfaces; @bio must be presetup and ready for I/O.

1574

*

1574

*

1575

*/

1575

*/

1576

void submit_bio(int rw, struct bio *bio)

1576

void submit_bio(int rw, struct bio *bio)

1577

{

1577

{

1578

int count = bio_sectors(bio);

1578

int count = bio_sectors(bio);

1579

1580

bio->bi_rw |= rw;

1580

bio->bi_rw |= rw;

1581

1582

/*

1582

/*

1583

* If it's a regular read/write or a barrier with data attached,

1583

* If it's a regular read/write or a barrier with data attached,

1584

* go through the normal accounting stuff before submission.

1584

* go through the normal accounting stuff before submission.

1585

*/

1585

*/

1586

if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) {

1586

if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) {

1587

if (rw & WRITE) {

1587

if (rw & WRITE) {

1588

count_vm_events(PGPGOUT, count);

1588

count_vm_events(PGPGOUT, count);

1589

} else {

1589

} else {

1590

task_io_account_read(bio->bi_size);

1590

task_io_account_read(bio->bi_size);

1591

count_vm_events(PGPGIN, count);

1591

count_vm_events(PGPGIN, count);

1592

}

1592

}

1593

1594

if (unlikely(block_dump)) {

1594

if (unlikely(block_dump)) {

1595

char b[BDEVNAME_SIZE];

1595

char b[BDEVNAME_SIZE];

1596

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",

1596

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",

1597

current->comm, task_pid_nr(current),

1597

current->comm, task_pid_nr(current),

1598

(rw & WRITE) ? "WRITE" : "READ",

1598

(rw & WRITE) ? "WRITE" : "READ",

1599

(unsigned long long)bio->bi_sector,

1599

(unsigned long long)bio->bi_sector,

1600

bdevname(bio->bi_bdev, b));

1600

bdevname(bio->bi_bdev, b));

1601

}

1601

}

1602

}

1602

}

1603

1604

generic_make_request(bio);

1604

generic_make_request(bio);

1605

}

1605

}

1606

EXPORT_SYMBOL(submit_bio);

1606

EXPORT_SYMBOL(submit_bio);

1607

1608

/**

1608

/**

1609

* blk_rq_check_limits - Helper function to check a request for the queue limit

1609

* blk_rq_check_limits - Helper function to check a request for the queue limit

1610

* @q: the queue

1610

* @q: the queue

1611

* @rq: the request being checked

1611

* @rq: the request being checked

1612

*

1612

*

1613

* Description:

1613

* Description:

1614

* @rq may have been made based on weaker limitations of upper-level queues

1614

* @rq may have been made based on weaker limitations of upper-level queues

1615

* in request stacking drivers, and it may violate the limitation of @q.

1615

* in request stacking drivers, and it may violate the limitation of @q.

1616

* Since the block layer and the underlying device driver trust @rq

1616

* Since the block layer and the underlying device driver trust @rq

1617

* after it is inserted to @q, it should be checked against @q before

1617

* after it is inserted to @q, it should be checked against @q before

1618

* the insertion using this generic function.

1618

* the insertion using this generic function.

1619

*

1619

*

1620

* This function should also be useful for request stacking drivers

1620

* This function should also be useful for request stacking drivers

1621

* in some cases below, so export this fuction.

1621

* in some cases below, so export this fuction.

1622

* Request stacking drivers like request-based dm may change the queue

1622

* Request stacking drivers like request-based dm may change the queue

1623

* limits while requests are in the queue (e.g. dm's table swapping).

1623

* limits while requests are in the queue (e.g. dm's table swapping).

1624

* Such request stacking drivers should check those requests agaist

1624

* Such request stacking drivers should check those requests agaist

1625

* the new queue limits again when they dispatch those requests,

1625

* the new queue limits again when they dispatch those requests,

1626

* although such checkings are also done against the old queue limits

1626

* although such checkings are also done against the old queue limits

1627

* when submitting requests.

1627

* when submitting requests.

1628

*/

1628

*/

1629

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1629

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1630

{

1630

{

1631

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1631

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1632

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1632

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1633

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1633

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1634

return -EIO;

1634

return -EIO;

1635

}

1635

}

1636

1637

/*

1637

/*

1638

* queue's settings related to segment counting like q->bounce_pfn

1638

* queue's settings related to segment counting like q->bounce_pfn

1639

* may differ from that of other stacking queues.

1639

* may differ from that of other stacking queues.

1640

* Recalculate it to check the request correctly on this queue's

1640

* Recalculate it to check the request correctly on this queue's

1641

* limitation.

1641

* limitation.

1642

*/

1642

*/

1643

blk_recalc_rq_segments(rq);

1643

blk_recalc_rq_segments(rq);

1644

if (rq->nr_phys_segments > queue_max_segments(q)) {

1644

if (rq->nr_phys_segments > queue_max_segments(q)) {

1645

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1645

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1646

return -EIO;

1646

return -EIO;

1647

}

1647

}

1648

1649

return 0;

1649

return 0;

1650

}

1650

}

1651

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1651

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1652

1653

/**

1653

/**

1654

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1654

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1655

* @q: the queue to submit the request

1655

* @q: the queue to submit the request

1656

* @rq: the request being queued

1656

* @rq: the request being queued

1657

*/

1657

*/

1658

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1658

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1659

{

1659

{

1660

unsigned long flags;

1660

unsigned long flags;

1661

1662

if (blk_rq_check_limits(q, rq))

1662

if (blk_rq_check_limits(q, rq))

1663

return -EIO;

1663

return -EIO;

1664

1665

#ifdef CONFIG_FAIL_MAKE_REQUEST

1665

#ifdef CONFIG_FAIL_MAKE_REQUEST

1666

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1666

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1667

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1667

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1668

return -EIO;

1668

return -EIO;

1669

#endif

1669

#endif

1670

1671

spin_lock_irqsave(q->queue_lock, flags);

1671

spin_lock_irqsave(q->queue_lock, flags);

1672

1673

/*

1673

/*

1674

* Submitting request must be dequeued before calling this function

1674

* Submitting request must be dequeued before calling this function

1675

* because it will be linked to another request_queue

1675

* because it will be linked to another request_queue

1676

*/

1676

*/

1677

BUG_ON(blk_queued_rq(rq));

1677

BUG_ON(blk_queued_rq(rq));

1678

1679

drive_stat_acct(rq, 1);

1679

drive_stat_acct(rq, 1);

1680

__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);

1680

__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);

1681

1682

spin_unlock_irqrestore(q->queue_lock, flags);

1682

spin_unlock_irqrestore(q->queue_lock, flags);

1683

1684

return 0;

1684

return 0;

1685

}

1685

}

1686

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1686

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1687

1688

/**

1688

/**

1689

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1689

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1690

* @rq: request to examine

1690

* @rq: request to examine

1691

*

1691

*

1692

* Description:

1692

* Description:

1693

* A request could be merge of IOs which require different failure

1693

* A request could be merge of IOs which require different failure

1694

* handling. This function determines the number of bytes which

1694

* handling. This function determines the number of bytes which

1695

* can be failed from the beginning of the request without

1695

* can be failed from the beginning of the request without

1696

* crossing into area which need to be retried further.

1696

* crossing into area which need to be retried further.

1697

*

1697

*

1698

* Return:

1698

* Return:

1699

* The number of bytes to fail.

1699

* The number of bytes to fail.

1700

*

1700

*

1701

* Context:

1701

* Context:

1702

* queue_lock must be held.

1702

* queue_lock must be held.

1703

*/

1703

*/

1704

unsigned int blk_rq_err_bytes(const struct request *rq)

1704

unsigned int blk_rq_err_bytes(const struct request *rq)

1705

{

1705

{

1706

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1706

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1707

unsigned int bytes = 0;

1707

unsigned int bytes = 0;

1708

struct bio *bio;

1708

struct bio *bio;

1709

1710

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1710

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1711

return blk_rq_bytes(rq);

1711

return blk_rq_bytes(rq);

1712

1713

/*

1713

/*

1714

* Currently the only 'mixing' which can happen is between

1714

* Currently the only 'mixing' which can happen is between

1715

* different fastfail types. We can safely fail portions

1715

* different fastfail types. We can safely fail portions

1716

* which have all the failfast bits that the first one has -

1716

* which have all the failfast bits that the first one has -

1717

* the ones which are at least as eager to fail as the first

1717

* the ones which are at least as eager to fail as the first

1718

* one.

1718

* one.

1719

*/

1719

*/

1720

for (bio = rq->bio; bio; bio = bio->bi_next) {

1720

for (bio = rq->bio; bio; bio = bio->bi_next) {

1721

if ((bio->bi_rw & ff) != ff)

1721

if ((bio->bi_rw & ff) != ff)

1722

break;

1722

break;

1723

bytes += bio->bi_size;

1723

bytes += bio->bi_size;

1724

}

1724

}

1725

1726

/* this could lead to infinite loop */

1726

/* this could lead to infinite loop */

1727

BUG_ON(blk_rq_bytes(rq) && !bytes);

1727

BUG_ON(blk_rq_bytes(rq) && !bytes);

1728

return bytes;

1728

return bytes;

1729

}

1729

}

1730

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1730

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1731

1732

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1732

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1733

{

1733

{

1734

if (blk_do_io_stat(req)) {

1734

if (blk_do_io_stat(req)) {

1735

const int rw = rq_data_dir(req);

1735

const int rw = rq_data_dir(req);

1736

struct hd_struct *part;

1736

struct hd_struct *part;

1737

int cpu;

1737

int cpu;

1738

1739

cpu = part_stat_lock();

1739

cpu = part_stat_lock();

1740

part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));

1740

part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));

1741

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1741

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1742

part_stat_unlock();

1742

part_stat_unlock();

1743

}

1743

}

1744

}

1744

}

1745

1746

static void blk_account_io_done(struct request *req)

1746

static void blk_account_io_done(struct request *req)

1747

{

1747

{

1748

/*

1748

/*

1749

* Account IO completion. bar_rq isn't accounted as a normal

1749

* Account IO completion. bar_rq isn't accounted as a normal

1750

* IO on queueing nor completion. Accounting the containing

1750

* IO on queueing nor completion. Accounting the containing

1751

* request is enough.

1751

* request is enough.

1752

*/

1752

*/

1753

if (blk_do_io_stat(req) && req != &req->q->bar_rq) {

1753

if (blk_do_io_stat(req) && req != &req->q->bar_rq) {

1754

unsigned long duration = jiffies - req->start_time;

1754

unsigned long duration = jiffies - req->start_time;

1755

const int rw = rq_data_dir(req);

1755

const int rw = rq_data_dir(req);

1756

struct hd_struct *part;

1756

struct hd_struct *part;

1757

int cpu;

1757

int cpu;

1758

1759

cpu = part_stat_lock();

1759

cpu = part_stat_lock();

1760

part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));

1760

part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));

1761

1762

part_stat_inc(cpu, part, ios[rw]);

1762

part_stat_inc(cpu, part, ios[rw]);

1763

part_stat_add(cpu, part, ticks[rw], duration);

1763

part_stat_add(cpu, part, ticks[rw], duration);

1764

part_round_stats(cpu, part);

1764

part_round_stats(cpu, part);

1765

part_dec_in_flight(part, rw);

1765

part_dec_in_flight(part, rw);

1766

1767

part_stat_unlock();

1767

part_stat_unlock();

1768

}

1768

}

1769

}

1769

}

1770

1771

/**

1771

/**

1772

* blk_peek_request - peek at the top of a request queue

1772

* blk_peek_request - peek at the top of a request queue

1773

* @q: request queue to peek at

1773

* @q: request queue to peek at

1774

*

1774

*

1775

* Description:

1775

* Description:

1776

* Return the request at the top of @q. The returned request

1776

* Return the request at the top of @q. The returned request

1777

* should be started using blk_start_request() before LLD starts

1777

* should be started using blk_start_request() before LLD starts

1778

* processing it.

1778

* processing it.

1779

*

1779

*

1780

* Return:

1780

* Return:

1781

* Pointer to the request at the top of @q if available. Null

1781

* Pointer to the request at the top of @q if available. Null

1782

* otherwise.

1782

* otherwise.

1783

*

1783

*

1784

* Context:

1784

* Context:

1785

* queue_lock must be held.

1785

* queue_lock must be held.

1786

*/

1786

*/

1787

struct request *blk_peek_request(struct request_queue *q)

1787

struct request *blk_peek_request(struct request_queue *q)

1788

{

1788

{

1789

struct request *rq;

1789

struct request *rq;

1790

int ret;

1790

int ret;

1791

1792

while ((rq = __elv_next_request(q)) != NULL) {

1792

while ((rq = __elv_next_request(q)) != NULL) {

1793

if (!(rq->cmd_flags & REQ_STARTED)) {

1793

if (!(rq->cmd_flags & REQ_STARTED)) {

1794

/*

1794

/*

1795

* This is the first time the device driver

1795

* This is the first time the device driver

1796

* sees this request (possibly after

1796

* sees this request (possibly after

1797

* requeueing). Notify IO scheduler.

1797

* requeueing). Notify IO scheduler.

1798

*/

1798

*/

1799

if (blk_sorted_rq(rq))

1799

if (blk_sorted_rq(rq))

1800

elv_activate_rq(q, rq);

1800

elv_activate_rq(q, rq);

1801

1802

/*

1802

/*

1803

* just mark as started even if we don't start

1803

* just mark as started even if we don't start

1804

* it, a request that has been delayed should

1804

* it, a request that has been delayed should

1805

* not be passed by new incoming requests

1805

* not be passed by new incoming requests

1806

*/

1806

*/

1807

rq->cmd_flags |= REQ_STARTED;

1807

rq->cmd_flags |= REQ_STARTED;

1808

trace_block_rq_issue(q, rq);

1808

trace_block_rq_issue(q, rq);

1809

}

1809

}

1810

1811

if (!q->boundary_rq || q->boundary_rq == rq) {

1811

if (!q->boundary_rq || q->boundary_rq == rq) {

1812

q->end_sector = rq_end_sector(rq);

1812

q->end_sector = rq_end_sector(rq);

1813

q->boundary_rq = NULL;

1813

q->boundary_rq = NULL;

1814

}

1814

}

1815

1816

if (rq->cmd_flags & REQ_DONTPREP)

1816

if (rq->cmd_flags & REQ_DONTPREP)

1817

break;

1817

break;

1818

1819

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1819

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1820

/*

1820

/*

1821

* make sure space for the drain appears we

1821

* make sure space for the drain appears we

1822

* know we can do this because max_hw_segments

1822

* know we can do this because max_hw_segments

1823

* has been adjusted to be one fewer than the

1823

* has been adjusted to be one fewer than the

1824

* device can handle

1824

* device can handle

1825

*/

1825

*/

1826

rq->nr_phys_segments++;

1826

rq->nr_phys_segments++;

1827

}

1827

}

1828

1829

if (!q->prep_rq_fn)

1829

if (!q->prep_rq_fn)

1830

break;

1830

break;

1831

1832

ret = q->prep_rq_fn(q, rq);

1832

ret = q->prep_rq_fn(q, rq);

1833

if (ret == BLKPREP_OK) {

1833

if (ret == BLKPREP_OK) {

1834

break;

1834

break;

1835

} else if (ret == BLKPREP_DEFER) {

1835

} else if (ret == BLKPREP_DEFER) {

1836

/*

1836

/*

1837

* the request may have been (partially) prepped.

1837

* the request may have been (partially) prepped.

1838

* we need to keep this request in the front to

1838

* we need to keep this request in the front to

1839

* avoid resource deadlock. REQ_STARTED will

1839

* avoid resource deadlock. REQ_STARTED will

1840

* prevent other fs requests from passing this one.

1840

* prevent other fs requests from passing this one.

1841

*/

1841

*/

1842

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1842

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1843

!(rq->cmd_flags & REQ_DONTPREP)) {

1843

!(rq->cmd_flags & REQ_DONTPREP)) {

1844

/*

1844

/*

1845

* remove the space for the drain we added

1845

* remove the space for the drain we added

1846

* so that we don't add it again

1846

* so that we don't add it again

1847

*/

1847

*/

1848

--rq->nr_phys_segments;

1848

--rq->nr_phys_segments;

1849

}

1849

}

1850

1851

rq = NULL;

1851

rq = NULL;

1852

break;

1852

break;

1853

} else if (ret == BLKPREP_KILL) {

1853

} else if (ret == BLKPREP_KILL) {

1854

rq->cmd_flags |= REQ_QUIET;

1854

rq->cmd_flags |= REQ_QUIET;

1855

/*

1855

/*

1856

* Mark this request as started so we don't trigger

1856

* Mark this request as started so we don't trigger

1857

* any debug logic in the end I/O path.

1857

* any debug logic in the end I/O path.

1858

*/

1858

*/

1859

blk_start_request(rq);

1859

blk_start_request(rq);

1860

__blk_end_request_all(rq, -EIO);

1860

__blk_end_request_all(rq, -EIO);

1861

} else {

1861

} else {

1862

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1862

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1863

break;

1863

break;

1864

}

1864

}

1865

}

1865

}

1866

1867

return rq;

1867

return rq;

1868

}

1868

}

1869

EXPORT_SYMBOL(blk_peek_request);

1869

EXPORT_SYMBOL(blk_peek_request);

1870

1871

void blk_dequeue_request(struct request *rq)

1871

void blk_dequeue_request(struct request *rq)

1872

{

1872

{

1873

struct request_queue *q = rq->q;

1873

struct request_queue *q = rq->q;

1874

1875

BUG_ON(list_empty(&rq->queuelist));

1875

BUG_ON(list_empty(&rq->queuelist));

1876

BUG_ON(ELV_ON_HASH(rq));

1876

BUG_ON(ELV_ON_HASH(rq));

1877

1878

list_del_init(&rq->queuelist);

1878

list_del_init(&rq->queuelist);

1879

1880

/*

1880

/*

1881

* the time frame between a request being removed from the lists

1881

* the time frame between a request being removed from the lists

1882

* and to it is freed is accounted as io that is in progress at

1882

* and to it is freed is accounted as io that is in progress at

1883

* the driver side.

1883

* the driver side.

1884

*/

1884

*/

1885

if (blk_account_rq(rq)) {

1885

if (blk_account_rq(rq)) {

1886

q->in_flight[rq_is_sync(rq)]++;

1886

q->in_flight[rq_is_sync(rq)]++;

1887

set_io_start_time_ns(rq);

1887

set_io_start_time_ns(rq);

1888

}

1888

}

1889

}

1889

}

1890

1891

/**

1891

/**

1892

* blk_start_request - start request processing on the driver

1892

* blk_start_request - start request processing on the driver

1893

* @req: request to dequeue

1893

* @req: request to dequeue

1894

*

1894

*

1895

* Description:

1895

* Description:

1896

* Dequeue @req and start timeout timer on it. This hands off the

1896

* Dequeue @req and start timeout timer on it. This hands off the

1897

* request to the driver.

1897

* request to the driver.

1898

*

1898

*

1899

* Block internal functions which don't want to start timer should

1899

* Block internal functions which don't want to start timer should

1900

* call blk_dequeue_request().

1900

* call blk_dequeue_request().

1901

*

1901

*

1902

* Context:

1902

* Context:

1903

* queue_lock must be held.

1903

* queue_lock must be held.

1904

*/

1904

*/

1905

void blk_start_request(struct request *req)

1905

void blk_start_request(struct request *req)

1906

{

1906

{

1907

blk_dequeue_request(req);

1907

blk_dequeue_request(req);

1908

1909

/*

1909

/*

1910

* We are now handing the request to the hardware, initialize

1910

* We are now handing the request to the hardware, initialize

1911

* resid_len to full count and add the timeout handler.

1911

* resid_len to full count and add the timeout handler.

1912

*/

1912

*/

1913

req->resid_len = blk_rq_bytes(req);

1913

req->resid_len = blk_rq_bytes(req);

1914

if (unlikely(blk_bidi_rq(req)))

1914

if (unlikely(blk_bidi_rq(req)))

1915

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1915

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1916

1917

blk_add_timer(req);

1917

blk_add_timer(req);

1918

}

1918

}

1919

EXPORT_SYMBOL(blk_start_request);

1919

EXPORT_SYMBOL(blk_start_request);

1920

1921

/**

1921

/**

1922

* blk_fetch_request - fetch a request from a request queue

1922

* blk_fetch_request - fetch a request from a request queue

1923

* @q: request queue to fetch a request from

1923

* @q: request queue to fetch a request from

1924

*

1924

*

1925

* Description:

1925

* Description:

1926

* Return the request at the top of @q. The request is started on

1926

* Return the request at the top of @q. The request is started on

1927

* return and LLD can start processing it immediately.

1927

* return and LLD can start processing it immediately.

1928

*

1928

*

1929

* Return:

1929

* Return:

1930

* Pointer to the request at the top of @q if available. Null

1930

* Pointer to the request at the top of @q if available. Null

1931

* otherwise.

1931

* otherwise.

1932

*

1932

*

1933

* Context:

1933

* Context:

1934

* queue_lock must be held.

1934

* queue_lock must be held.

1935

*/

1935

*/

1936

struct request *blk_fetch_request(struct request_queue *q)

1936

struct request *blk_fetch_request(struct request_queue *q)

1937

{

1937

{

1938

struct request *rq;

1938

struct request *rq;

1939

1940

rq = blk_peek_request(q);

1940

rq = blk_peek_request(q);

1941

if (rq)

1941

if (rq)

1942

blk_start_request(rq);

1942

blk_start_request(rq);

1943

return rq;

1943

return rq;

1944

}

1944

}

1945

EXPORT_SYMBOL(blk_fetch_request);

1945

EXPORT_SYMBOL(blk_fetch_request);

1946

1947

/**

1947

/**

1948

* blk_update_request - Special helper function for request stacking drivers

1948

* blk_update_request - Special helper function for request stacking drivers

1949

* @req: the request being processed

1949

* @req: the request being processed

1950

* @error: %0 for success, < %0 for error

1950

* @error: %0 for success, < %0 for error

1951

* @nr_bytes: number of bytes to complete @req

1951

* @nr_bytes: number of bytes to complete @req

1952

*

1952

*

1953

* Description:

1953

* Description:

1954

* Ends I/O on a number of bytes attached to @req, but doesn't complete

1954

* Ends I/O on a number of bytes attached to @req, but doesn't complete

1955

* the request structure even if @req doesn't have leftover.

1955

* the request structure even if @req doesn't have leftover.

1956

* If @req has leftover, sets it up for the next range of segments.

1956

* If @req has leftover, sets it up for the next range of segments.

1957

*

1957

*

1958

* This special helper function is only for request stacking drivers

1958

* This special helper function is only for request stacking drivers

1959

* (e.g. request-based dm) so that they can handle partial completion.

1959

* (e.g. request-based dm) so that they can handle partial completion.

1960

* Actual device drivers should use blk_end_request instead.

1960

* Actual device drivers should use blk_end_request instead.

1961

*

1961

*

1962

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

1962

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

1963

* %false return from this function.

1963

* %false return from this function.

1964

*

1964

*

1965

* Return:

1965

* Return:

1966

* %false - this request doesn't have any more data

1966

* %false - this request doesn't have any more data

1967

* %true - this request has more data

1967

* %true - this request has more data

1968

**/

1968

**/

1969

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

1969

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

1970

{

1970

{

1971

int total_bytes, bio_nbytes, next_idx = 0;

1971

int total_bytes, bio_nbytes, next_idx = 0;

1972

struct bio *bio;

1972

struct bio *bio;

1973

1974

if (!req->bio)

1974

if (!req->bio)

1975

return false;

1975

return false;

1976

1977

trace_block_rq_complete(req->q, req);

1977

trace_block_rq_complete(req->q, req);

1978

1979

/*

1979

/*

1980

* For fs requests, rq is just carrier of independent bio's

1980

* For fs requests, rq is just carrier of independent bio's

1981

* and each partial completion should be handled separately.

1981

* and each partial completion should be handled separately.

1982

* Reset per-request error on each partial completion.

1982

* Reset per-request error on each partial completion.

1983

*

1983

*

1984

* TODO: tj: This is too subtle. It would be better to let

1984

* TODO: tj: This is too subtle. It would be better to let

1985

* low level drivers do what they see fit.

1985

* low level drivers do what they see fit.

1986

*/

1986

*/

1987

if (blk_fs_request(req))

1987

if (blk_fs_request(req))

1988

req->errors = 0;

1988

req->errors = 0;

1989

1990

if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {

1990

if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {

1991

printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",

1991

printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",

1992

req->rq_disk ? req->rq_disk->disk_name : "?",

1992

req->rq_disk ? req->rq_disk->disk_name : "?",

1993

(unsigned long long)blk_rq_pos(req));

1993

(unsigned long long)blk_rq_pos(req));

1994

}

1994

}

1995

1996

blk_account_io_completion(req, nr_bytes);

1996

blk_account_io_completion(req, nr_bytes);

1997

1998

total_bytes = bio_nbytes = 0;

1998

total_bytes = bio_nbytes = 0;

1999

while ((bio = req->bio) != NULL) {

1999

while ((bio = req->bio) != NULL) {

2000

int nbytes;

2000

int nbytes;

2001

2002

if (nr_bytes >= bio->bi_size) {

2002

if (nr_bytes >= bio->bi_size) {

2003

req->bio = bio->bi_next;

2003

req->bio = bio->bi_next;

2004

nbytes = bio->bi_size;

2004

nbytes = bio->bi_size;

2005

req_bio_endio(req, bio, nbytes, error);

2005

req_bio_endio(req, bio, nbytes, error);

2006

next_idx = 0;

2006

next_idx = 0;

2007

bio_nbytes = 0;

2007

bio_nbytes = 0;

2008

} else {

2008

} else {

2009

int idx = bio->bi_idx + next_idx;

2009

int idx = bio->bi_idx + next_idx;

2010

2011

if (unlikely(idx >= bio->bi_vcnt)) {

2011

if (unlikely(idx >= bio->bi_vcnt)) {

2012

blk_dump_rq_flags(req, "__end_that");

2012

blk_dump_rq_flags(req, "__end_that");

2013

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2013

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2014

__func__, idx, bio->bi_vcnt);

2014

__func__, idx, bio->bi_vcnt);

2015

break;

2015

break;

2016

}

2016

}

2017

2018

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2018

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2019

BIO_BUG_ON(nbytes > bio->bi_size);

2019

BIO_BUG_ON(nbytes > bio->bi_size);

2020

2021

/*

2021

/*

2022

* not a complete bvec done

2022

* not a complete bvec done

2023

*/

2023

*/

2024

if (unlikely(nbytes > nr_bytes)) {

2024

if (unlikely(nbytes > nr_bytes)) {

2025

bio_nbytes += nr_bytes;

2025

bio_nbytes += nr_bytes;

2026

total_bytes += nr_bytes;

2026

total_bytes += nr_bytes;

2027

break;

2027

break;

2028

}

2028

}

2029

2030

/*

2030

/*

2031

* advance to the next vector

2031

* advance to the next vector

2032

*/

2032

*/

2033

next_idx++;

2033

next_idx++;

2034

bio_nbytes += nbytes;

2034

bio_nbytes += nbytes;

2035

}

2035

}

2036

2037

total_bytes += nbytes;

2037

total_bytes += nbytes;

2038

nr_bytes -= nbytes;

2038

nr_bytes -= nbytes;

2039

2040

bio = req->bio;

2040

bio = req->bio;

2041

if (bio) {

2041

if (bio) {

2042

/*

2042

/*

2043

* end more in this run, or just return 'not-done'

2043

* end more in this run, or just return 'not-done'

2044

*/

2044

*/

2045

if (unlikely(nr_bytes <= 0))

2045

if (unlikely(nr_bytes <= 0))

2046

break;

2046

break;

2047

}

2047

}

2048

}

2048

}

2049

2050

/*

2050

/*

2051

* completely done

2051

* completely done

2052

*/

2052

*/

2053

if (!req->bio) {

2053

if (!req->bio) {

2054

/*

2054

/*

2055

* Reset counters so that the request stacking driver

2055

* Reset counters so that the request stacking driver

2056

* can find how many bytes remain in the request

2056

* can find how many bytes remain in the request

2057

* later.

2057

* later.

2058

*/

2058

*/

2059

req->__data_len = 0;

2059

req->__data_len = 0;

2060

return false;

2060

return false;

2061

}

2061

}

2062

2063

/*

2063

/*

2064

* if the request wasn't completed, update state

2064

* if the request wasn't completed, update state

2065

*/

2065

*/

2066

if (bio_nbytes) {

2066

if (bio_nbytes) {

2067

req_bio_endio(req, bio, bio_nbytes, error);

2067

req_bio_endio(req, bio, bio_nbytes, error);

2068

bio->bi_idx += next_idx;

2068

bio->bi_idx += next_idx;

2069

bio_iovec(bio)->bv_offset += nr_bytes;

2069

bio_iovec(bio)->bv_offset += nr_bytes;

2070

bio_iovec(bio)->bv_len -= nr_bytes;

2070

bio_iovec(bio)->bv_len -= nr_bytes;

2071

}

2071

}

2072

2073

req->__data_len -= total_bytes;

2073

req->__data_len -= total_bytes;

2074

req->buffer = bio_data(req->bio);

2074

req->buffer = bio_data(req->bio);

2075

2076

/* update sector only for requests with clear definition of sector */

2076

/* update sector only for requests with clear definition of sector */

2077

if (blk_fs_request(req) || blk_discard_rq(req))

2077

if (blk_fs_request(req) || blk_discard_rq(req))

2078

req->__sector += total_bytes >> 9;

2078

req->__sector += total_bytes >> 9;

2079

2080

/* mixed attributes always follow the first bio */

2080

/* mixed attributes always follow the first bio */

2081

if (req->cmd_flags & REQ_MIXED_MERGE) {

2081

if (req->cmd_flags & REQ_MIXED_MERGE) {

2082

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2082

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2083

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2083

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2084

}

2084

}

2085

2086

/*

2086

/*

2087

* If total number of sectors is less than the first segment

2087

* If total number of sectors is less than the first segment

2088

* size, something has gone terribly wrong.

2088

* size, something has gone terribly wrong.

2089

*/

2089

*/

2090

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2090

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2091

printk(KERN_ERR "blk: request botched\n");

2091

printk(KERN_ERR "blk: request botched\n");

2092

req->__data_len = blk_rq_cur_bytes(req);

2092

req->__data_len = blk_rq_cur_bytes(req);

2093

}

2093

}

2094

2095

/* recalculate the number of segments */

2095

/* recalculate the number of segments */

2096

blk_recalc_rq_segments(req);

2096

blk_recalc_rq_segments(req);

2097

2098

return true;

2098

return true;

2099

}

2099

}

2100

EXPORT_SYMBOL_GPL(blk_update_request);

2100

EXPORT_SYMBOL_GPL(blk_update_request);

2101

2102

static bool blk_update_bidi_request(struct request *rq, int error,

2102

static bool blk_update_bidi_request(struct request *rq, int error,

2103

unsigned int nr_bytes,

2103

unsigned int nr_bytes,

2104

unsigned int bidi_bytes)

2104

unsigned int bidi_bytes)

2105

{

2105

{

2106

if (blk_update_request(rq, error, nr_bytes))

2106

if (blk_update_request(rq, error, nr_bytes))

2107

return true;

2107

return true;

2108

2109

/* Bidi request must be completed as a whole */

2109

/* Bidi request must be completed as a whole */

2110

if (unlikely(blk_bidi_rq(rq)) &&

2110

if (unlikely(blk_bidi_rq(rq)) &&

2111

blk_update_request(rq->next_rq, error, bidi_bytes))

2111

blk_update_request(rq->next_rq, error, bidi_bytes))

2112

return true;

2112

return true;

2113

2114

add_disk_randomness(rq->rq_disk);

2114

if (blk_queue_add_random(rq->q))

2115

add_disk_randomness(rq->rq_disk);

2115

2116

return false;

2117

return false;

2117

}

2118

}

2118

2119

/*

2120

/*

2120

* queue lock must be held

2121

* queue lock must be held

2121

*/

2122

*/

2122

static void blk_finish_request(struct request *req, int error)

2123

static void blk_finish_request(struct request *req, int error)

2123

{

2124

{

2124

if (blk_rq_tagged(req))

2125

if (blk_rq_tagged(req))

2125

blk_queue_end_tag(req->q, req);

2126

blk_queue_end_tag(req->q, req);

2126

2127

BUG_ON(blk_queued_rq(req));

2128

BUG_ON(blk_queued_rq(req));

2128

2129

if (unlikely(laptop_mode) && blk_fs_request(req))

2130

if (unlikely(laptop_mode) && blk_fs_request(req))

2130

laptop_io_completion(&req->q->backing_dev_info);

2131

laptop_io_completion(&req->q->backing_dev_info);

2131

2132

blk_delete_timer(req);

2133

blk_delete_timer(req);

2133

2134

blk_account_io_done(req);

2135

blk_account_io_done(req);

2135

2136

if (req->end_io)

2137

if (req->end_io)

2137

req->end_io(req, error);

2138

req->end_io(req, error);

2138

else {

2139

else {

2139

if (blk_bidi_rq(req))

2140

if (blk_bidi_rq(req))

2140

__blk_put_request(req->next_rq->q, req->next_rq);

2141

__blk_put_request(req->next_rq->q, req->next_rq);

2141

2142

__blk_put_request(req->q, req);

2143

__blk_put_request(req->q, req);

2143

}

2144

}

2144

}

2145

}

2145

2146

/**

2147

/**

2147

* blk_end_bidi_request - Complete a bidi request

2148

* blk_end_bidi_request - Complete a bidi request

2148

* @rq: the request to complete

2149

* @rq: the request to complete

2149

* @error: %0 for success, < %0 for error

2150

* @error: %0 for success, < %0 for error

2150

* @nr_bytes: number of bytes to complete @rq

2151

* @nr_bytes: number of bytes to complete @rq

2151

* @bidi_bytes: number of bytes to complete @rq->next_rq

2152

* @bidi_bytes: number of bytes to complete @rq->next_rq

2152

*

2153

*

2153

* Description:

2154

* Description:

2154

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2155

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2155

* Drivers that supports bidi can safely call this member for any

2156

* Drivers that supports bidi can safely call this member for any

2156

* type of request, bidi or uni. In the later case @bidi_bytes is

2157

* type of request, bidi or uni. In the later case @bidi_bytes is

2157

* just ignored.

2158

* just ignored.

2158

*

2159

*

2159

* Return:

2160

* Return:

2160

* %false - we are done with this request

2161

* %false - we are done with this request

2161

* %true - still buffers pending for this request

2162

* %true - still buffers pending for this request

2162

**/

2163

**/

2163

static bool blk_end_bidi_request(struct request *rq, int error,

2164

static bool blk_end_bidi_request(struct request *rq, int error,

2164

unsigned int nr_bytes, unsigned int bidi_bytes)

2165

unsigned int nr_bytes, unsigned int bidi_bytes)

2165

{

2166

{

2166

struct request_queue *q = rq->q;

2167

struct request_queue *q = rq->q;

2167

unsigned long flags;

2168

unsigned long flags;

2168

2169

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2170

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2170

return true;

2171

return true;

2171

2172

spin_lock_irqsave(q->queue_lock, flags);

2173

spin_lock_irqsave(q->queue_lock, flags);

2173

blk_finish_request(rq, error);

2174

blk_finish_request(rq, error);

2174

spin_unlock_irqrestore(q->queue_lock, flags);

2175

spin_unlock_irqrestore(q->queue_lock, flags);

2175

2176

return false;

2177

return false;

2177

}

2178

}

2178

2179

/**

2180

/**

2180

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2181

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2181

* @rq: the request to complete

2182

* @rq: the request to complete

2182

* @error: %0 for success, < %0 for error

2183

* @error: %0 for success, < %0 for error

2183

* @nr_bytes: number of bytes to complete @rq

2184

* @nr_bytes: number of bytes to complete @rq

2184

* @bidi_bytes: number of bytes to complete @rq->next_rq

2185

* @bidi_bytes: number of bytes to complete @rq->next_rq

2185

*

2186

*

2186

* Description:

2187

* Description:

2187

* Identical to blk_end_bidi_request() except that queue lock is

2188

* Identical to blk_end_bidi_request() except that queue lock is

2188

* assumed to be locked on entry and remains so on return.

2189

* assumed to be locked on entry and remains so on return.

2189

*

2190

*

2190

* Return:

2191

* Return:

2191

* %false - we are done with this request

2192

* %false - we are done with this request

2192

* %true - still buffers pending for this request

2193

* %true - still buffers pending for this request

2193

**/

2194

**/

2194

static bool __blk_end_bidi_request(struct request *rq, int error,

2195

static bool __blk_end_bidi_request(struct request *rq, int error,

2195

unsigned int nr_bytes, unsigned int bidi_bytes)

2196

unsigned int nr_bytes, unsigned int bidi_bytes)

2196

{

2197

{

2197

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2198

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2198

return true;

2199

return true;

2199

2200

blk_finish_request(rq, error);

2201

blk_finish_request(rq, error);

2201

2202

return false;

2203

return false;

2203

}

2204

}

2204

2205

/**

2206

/**

2206

* blk_end_request - Helper function for drivers to complete the request.

2207

* blk_end_request - Helper function for drivers to complete the request.

2207

* @rq: the request being processed

2208

* @rq: the request being processed

2208

* @error: %0 for success, < %0 for error

2209

* @error: %0 for success, < %0 for error

2209

* @nr_bytes: number of bytes to complete

2210

* @nr_bytes: number of bytes to complete

2210

*

2211

*

2211

* Description:

2212

* Description:

2212

* Ends I/O on a number of bytes attached to @rq.

2213

* Ends I/O on a number of bytes attached to @rq.

2213

* If @rq has leftover, sets it up for the next range of segments.

2214

* If @rq has leftover, sets it up for the next range of segments.

2214

*

2215

*

2215

* Return:

2216

* Return:

2216

* %false - we are done with this request

2217

* %false - we are done with this request

2217

* %true - still buffers pending for this request

2218

* %true - still buffers pending for this request

2218

**/

2219

**/

2219

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2220

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2220

{

2221

{

2221

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2222

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2222

}

2223

}

2223

EXPORT_SYMBOL(blk_end_request);

2224

EXPORT_SYMBOL(blk_end_request);

2224

2225

/**

2226

/**

2226

* blk_end_request_all - Helper function for drives to finish the request.

2227

* blk_end_request_all - Helper function for drives to finish the request.

2227

* @rq: the request to finish

2228

* @rq: the request to finish

2228

* @error: %0 for success, < %0 for error

2229

* @error: %0 for success, < %0 for error

2229

*

2230

*

2230

* Description:

2231

* Description:

2231

* Completely finish @rq.

2232

* Completely finish @rq.

2232

*/

2233

*/

2233

void blk_end_request_all(struct request *rq, int error)

2234

void blk_end_request_all(struct request *rq, int error)

2234

{

2235

{

2235

bool pending;

2236

bool pending;

2236

unsigned int bidi_bytes = 0;

2237

unsigned int bidi_bytes = 0;

2237

2238

if (unlikely(blk_bidi_rq(rq)))

2239

if (unlikely(blk_bidi_rq(rq)))

2239

bidi_bytes = blk_rq_bytes(rq->next_rq);

2240

bidi_bytes = blk_rq_bytes(rq->next_rq);

2240

2241

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2242

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2242

BUG_ON(pending);

2243

BUG_ON(pending);

2243

}

2244

}

2244

EXPORT_SYMBOL(blk_end_request_all);

2245

EXPORT_SYMBOL(blk_end_request_all);

2245

2246

/**

2247

/**

2247

* blk_end_request_cur - Helper function to finish the current request chunk.

2248

* blk_end_request_cur - Helper function to finish the current request chunk.

2248

* @rq: the request to finish the current chunk for

2249

* @rq: the request to finish the current chunk for

2249

* @error: %0 for success, < %0 for error

2250

* @error: %0 for success, < %0 for error

2250

*

2251

*

2251

* Description:

2252

* Description:

2252

* Complete the current consecutively mapped chunk from @rq.

2253

* Complete the current consecutively mapped chunk from @rq.

2253

*

2254

*

2254

* Return:

2255

* Return:

2255

* %false - we are done with this request

2256

* %false - we are done with this request

2256

* %true - still buffers pending for this request

2257

* %true - still buffers pending for this request

2257

*/

2258

*/

2258

bool blk_end_request_cur(struct request *rq, int error)

2259

bool blk_end_request_cur(struct request *rq, int error)

2259

{

2260

{

2260

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2261

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2261

}

2262

}

2262

EXPORT_SYMBOL(blk_end_request_cur);

2263

EXPORT_SYMBOL(blk_end_request_cur);

2263

2264

/**

2265

/**

2265

* blk_end_request_err - Finish a request till the next failure boundary.

2266

* blk_end_request_err - Finish a request till the next failure boundary.

2266

* @rq: the request to finish till the next failure boundary for

2267

* @rq: the request to finish till the next failure boundary for

2267

* @error: must be negative errno

2268

* @error: must be negative errno

2268

*

2269

*

2269

* Description:

2270

* Description:

2270

* Complete @rq till the next failure boundary.

2271

* Complete @rq till the next failure boundary.

2271

*

2272

*

2272

* Return:

2273

* Return:

2273

* %false - we are done with this request

2274

* %false - we are done with this request

2274

* %true - still buffers pending for this request

2275

* %true - still buffers pending for this request

2275

*/

2276

*/

2276

bool blk_end_request_err(struct request *rq, int error)

2277

bool blk_end_request_err(struct request *rq, int error)

2277

{

2278

{

2278

WARN_ON(error >= 0);

2279

WARN_ON(error >= 0);

2279

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2280

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2280

}

2281

}

2281

EXPORT_SYMBOL_GPL(blk_end_request_err);

2282

EXPORT_SYMBOL_GPL(blk_end_request_err);

2282

2283

/**

2284

/**

2284

* __blk_end_request - Helper function for drivers to complete the request.

2285

* __blk_end_request - Helper function for drivers to complete the request.

2285

* @rq: the request being processed

2286

* @rq: the request being processed

2286

* @error: %0 for success, < %0 for error

2287

* @error: %0 for success, < %0 for error

2287

* @nr_bytes: number of bytes to complete

2288

* @nr_bytes: number of bytes to complete

2288

*

2289

*

2289

* Description:

2290

* Description:

2290

* Must be called with queue lock held unlike blk_end_request().

2291

* Must be called with queue lock held unlike blk_end_request().

2291

*

2292

*

2292

* Return:

2293

* Return:

2293

* %false - we are done with this request

2294

* %false - we are done with this request

2294

* %true - still buffers pending for this request

2295

* %true - still buffers pending for this request

2295

**/

2296

**/

2296

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2297

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2297

{

2298

{

2298

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2299

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2299

}

2300

}

2300

EXPORT_SYMBOL(__blk_end_request);

2301

EXPORT_SYMBOL(__blk_end_request);

2301

2302

/**

2303

/**

2303

* __blk_end_request_all - Helper function for drives to finish the request.

2304

* __blk_end_request_all - Helper function for drives to finish the request.

2304

* @rq: the request to finish

2305

* @rq: the request to finish

2305

* @error: %0 for success, < %0 for error

2306

* @error: %0 for success, < %0 for error

2306

*

2307

*

2307

* Description:

2308

* Description:

2308

* Completely finish @rq. Must be called with queue lock held.

2309

* Completely finish @rq. Must be called with queue lock held.

2309

*/

2310

*/

2310

void __blk_end_request_all(struct request *rq, int error)

2311

void __blk_end_request_all(struct request *rq, int error)

2311

{

2312

{

2312

bool pending;

2313

bool pending;

2313

unsigned int bidi_bytes = 0;

2314

unsigned int bidi_bytes = 0;

2314

2315

if (unlikely(blk_bidi_rq(rq)))

2316

if (unlikely(blk_bidi_rq(rq)))

2316

bidi_bytes = blk_rq_bytes(rq->next_rq);

2317

bidi_bytes = blk_rq_bytes(rq->next_rq);

2317

2318

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2319

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2319

BUG_ON(pending);

2320

BUG_ON(pending);

2320

}

2321

}

2321

EXPORT_SYMBOL(__blk_end_request_all);

2322

EXPORT_SYMBOL(__blk_end_request_all);

2322

2323

/**

2324

/**

2324

* __blk_end_request_cur - Helper function to finish the current request chunk.

2325

* __blk_end_request_cur - Helper function to finish the current request chunk.

2325

* @rq: the request to finish the current chunk for

2326

* @rq: the request to finish the current chunk for

2326

* @error: %0 for success, < %0 for error

2327

* @error: %0 for success, < %0 for error

2327

*

2328

*

2328

* Description:

2329

* Description:

2329

* Complete the current consecutively mapped chunk from @rq. Must

2330

* Complete the current consecutively mapped chunk from @rq. Must

2330

* be called with queue lock held.

2331

* be called with queue lock held.

2331

*

2332

*

2332

* Return:

2333

* Return:

2333

* %false - we are done with this request

2334

* %false - we are done with this request

2334

* %true - still buffers pending for this request

2335

* %true - still buffers pending for this request

2335

*/

2336

*/

2336

bool __blk_end_request_cur(struct request *rq, int error)

2337

bool __blk_end_request_cur(struct request *rq, int error)

2337

{

2338

{

2338

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2339

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2339

}

2340

}

2340

EXPORT_SYMBOL(__blk_end_request_cur);

2341

EXPORT_SYMBOL(__blk_end_request_cur);

2341

2342

/**

2343

/**

2343

* __blk_end_request_err - Finish a request till the next failure boundary.

2344

* __blk_end_request_err - Finish a request till the next failure boundary.

2344

* @rq: the request to finish till the next failure boundary for

2345

* @rq: the request to finish till the next failure boundary for

2345

* @error: must be negative errno

2346

* @error: must be negative errno

2346

*

2347

*

2347

* Description:

2348

* Description:

2348

* Complete @rq till the next failure boundary. Must be called

2349

* Complete @rq till the next failure boundary. Must be called

2349

* with queue lock held.

2350

* with queue lock held.

2350

*

2351

*

2351

* Return:

2352

* Return:

2352

* %false - we are done with this request

2353

* %false - we are done with this request

2353

* %true - still buffers pending for this request

2354

* %true - still buffers pending for this request

2354

*/

2355

*/

2355

bool __blk_end_request_err(struct request *rq, int error)

2356

bool __blk_end_request_err(struct request *rq, int error)

2356

{

2357

{

2357

WARN_ON(error >= 0);

2358

WARN_ON(error >= 0);

2358

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2359

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2359

}

2360

}

2360

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2361

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2361

2362

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2363

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2363

struct bio *bio)

2364

struct bio *bio)

2364

{

2365

{

2365

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2366

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2366

rq->cmd_flags |= bio->bi_rw & REQ_RW;

2367

rq->cmd_flags |= bio->bi_rw & REQ_RW;

2367

2368

if (bio_has_data(bio)) {

2369

if (bio_has_data(bio)) {

2369

rq->nr_phys_segments = bio_phys_segments(q, bio);

2370

rq->nr_phys_segments = bio_phys_segments(q, bio);

2370

rq->buffer = bio_data(bio);

2371

rq->buffer = bio_data(bio);

2371

}

2372

}

2372

rq->__data_len = bio->bi_size;

2373

rq->__data_len = bio->bi_size;

2373

rq->bio = rq->biotail = bio;

2374

rq->bio = rq->biotail = bio;

2374

2375

if (bio->bi_bdev)

2376

if (bio->bi_bdev)

2376

rq->rq_disk = bio->bi_bdev->bd_disk;

2377

rq->rq_disk = bio->bi_bdev->bd_disk;

2377

}

2378

}

2378

2379

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2380

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2380

/**

2381

/**

2381

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2382

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2382

* @rq: the request to be flushed

2383

* @rq: the request to be flushed

2383

*

2384

*

2384

* Description:

2385

* Description:

2385

* Flush all pages in @rq.

2386

* Flush all pages in @rq.

2386

*/

2387

*/

2387

void rq_flush_dcache_pages(struct request *rq)

2388

void rq_flush_dcache_pages(struct request *rq)

2388

{

2389

{

2389

struct req_iterator iter;

2390

struct req_iterator iter;

2390

struct bio_vec *bvec;

2391

struct bio_vec *bvec;

2391

2392

rq_for_each_segment(bvec, rq, iter)

2393

rq_for_each_segment(bvec, rq, iter)

2393

flush_dcache_page(bvec->bv_page);

2394

flush_dcache_page(bvec->bv_page);

2394

}

2395

}

2395

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2396

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2396

#endif

2397

#endif

2397

2398

/**

2399

/**

2399

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2400

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2400

* @q : the queue of the device being checked

2401

* @q : the queue of the device being checked

2401

*

2402

*

2402

* Description:

2403

* Description:

2403

* Check if underlying low-level drivers of a device are busy.

2404

* Check if underlying low-level drivers of a device are busy.

2404

* If the drivers want to export their busy state, they must set own

2405

* If the drivers want to export their busy state, they must set own

2405

* exporting function using blk_queue_lld_busy() first.

2406

* exporting function using blk_queue_lld_busy() first.

2406

*

2407

*

2407

* Basically, this function is used only by request stacking drivers

2408

* Basically, this function is used only by request stacking drivers

2408

* to stop dispatching requests to underlying devices when underlying

2409

* to stop dispatching requests to underlying devices when underlying

2409

* devices are busy. This behavior helps more I/O merging on the queue

2410

* devices are busy. This behavior helps more I/O merging on the queue

2410

* of the request stacking driver and prevents I/O throughput regression

2411

* of the request stacking driver and prevents I/O throughput regression

2411

* on burst I/O load.

2412

* on burst I/O load.

2412

*

2413

*

2413

* Return:

2414

* Return:

2414

* 0 - Not busy (The request stacking driver should dispatch request)

2415

* 0 - Not busy (The request stacking driver should dispatch request)

2415

* 1 - Busy (The request stacking driver should stop dispatching request)

2416

* 1 - Busy (The request stacking driver should stop dispatching request)

2416

*/

2417

*/

2417

int blk_lld_busy(struct request_queue *q)

2418

int blk_lld_busy(struct request_queue *q)

2418

{

2419

{

2419

if (q->lld_busy_fn)

2420

if (q->lld_busy_fn)

2420

return q->lld_busy_fn(q);

2421

return q->lld_busy_fn(q);

2421

2422

return 0;

2423

return 0;

2423

}

2424

}

2424

EXPORT_SYMBOL_GPL(blk_lld_busy);

2425

EXPORT_SYMBOL_GPL(blk_lld_busy);

2425

2426

/**

2427

/**

2427

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2428

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2428

* @rq: the clone request to be cleaned up

2429

* @rq: the clone request to be cleaned up

2429

*

2430

*

2430

* Description:

2431

* Description:

2431

* Free all bios in @rq for a cloned request.

2432

* Free all bios in @rq for a cloned request.

2432

*/

2433

*/

2433

void blk_rq_unprep_clone(struct request *rq)

2434

void blk_rq_unprep_clone(struct request *rq)

2434

{

2435

{

2435

struct bio *bio;

2436

struct bio *bio;

2436

2437

while ((bio = rq->bio) != NULL) {

2438

while ((bio = rq->bio) != NULL) {

2438

rq->bio = bio->bi_next;

2439

rq->bio = bio->bi_next;

2439

2440

bio_put(bio);

2441

bio_put(bio);

2441

}

2442

}

2442

}

2443

}

2443

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2444

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2444

2445

/*

2446

/*

2446

* Copy attributes of the original request to the clone request.

2447

* Copy attributes of the original request to the clone request.

2447

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2448

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2448

*/

2449

*/

2449

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2450

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2450

{

2451

{

2451

dst->cpu = src->cpu;

2452

dst->cpu = src->cpu;

2452

dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);

2453

dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);

2453

dst->cmd_type = src->cmd_type;

2454

dst->cmd_type = src->cmd_type;

2454

dst->__sector = blk_rq_pos(src);

2455

dst->__sector = blk_rq_pos(src);

2455

dst->__data_len = blk_rq_bytes(src);

2456

dst->__data_len = blk_rq_bytes(src);

2456

dst->nr_phys_segments = src->nr_phys_segments;

2457

dst->nr_phys_segments = src->nr_phys_segments;

2457

dst->ioprio = src->ioprio;

2458

dst->ioprio = src->ioprio;

2458

dst->extra_len = src->extra_len;

2459

dst->extra_len = src->extra_len;

2459

}

2460

}

2460

2461

/**

2462

/**

2462

* blk_rq_prep_clone - Helper function to setup clone request

2463

* blk_rq_prep_clone - Helper function to setup clone request

2463

* @rq: the request to be setup

2464

* @rq: the request to be setup

2464

* @rq_src: original request to be cloned

2465

* @rq_src: original request to be cloned

2465

* @bs: bio_set that bios for clone are allocated from

2466

* @bs: bio_set that bios for clone are allocated from

2466

* @gfp_mask: memory allocation mask for bio

2467

* @gfp_mask: memory allocation mask for bio

2467

* @bio_ctr: setup function to be called for each clone bio.

2468

* @bio_ctr: setup function to be called for each clone bio.

2468

* Returns %0 for success, non %0 for failure.

2469

* Returns %0 for success, non %0 for failure.

2469

* @data: private data to be passed to @bio_ctr

2470

* @data: private data to be passed to @bio_ctr

2470

*

2471

*

2471

* Description:

2472

* Description:

2472

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2473

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2473

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2474

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2474

* are not copied, and copying such parts is the caller's responsibility.

2475

* are not copied, and copying such parts is the caller's responsibility.

2475

* Also, pages which the original bios are pointing to are not copied

2476

* Also, pages which the original bios are pointing to are not copied

2476

* and the cloned bios just point same pages.

2477

* and the cloned bios just point same pages.

2477

* So cloned bios must be completed before original bios, which means

2478

* So cloned bios must be completed before original bios, which means

2478

* the caller must complete @rq before @rq_src.

2479

* the caller must complete @rq before @rq_src.

2479

*/

2480

*/

2480

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2481

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2481

struct bio_set *bs, gfp_t gfp_mask,

2482

struct bio_set *bs, gfp_t gfp_mask,

2482

int (*bio_ctr)(struct bio *, struct bio *, void *),

2483

int (*bio_ctr)(struct bio *, struct bio *, void *),

2483

void *data)

2484

void *data)

2484

{

2485

{

2485

struct bio *bio, *bio_src;

2486

struct bio *bio, *bio_src;

2486

2487

if (!bs)

2488

if (!bs)

2488

bs = fs_bio_set;

2489

bs = fs_bio_set;

2489

2490

blk_rq_init(NULL, rq);

2491

blk_rq_init(NULL, rq);

2491

2492

__rq_for_each_bio(bio_src, rq_src) {

2493

__rq_for_each_bio(bio_src, rq_src) {

2493

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2494

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2494

if (!bio)

2495

if (!bio)

2495

goto free_and_out;

2496

goto free_and_out;

2496

2497

__bio_clone(bio, bio_src);

2498

__bio_clone(bio, bio_src);

2498

2499

if (bio_integrity(bio_src) &&

2500

if (bio_integrity(bio_src) &&

2500

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2501

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2501

goto free_and_out;

2502

goto free_and_out;

2502

2503

if (bio_ctr && bio_ctr(bio, bio_src, data))

2504

if (bio_ctr && bio_ctr(bio, bio_src, data))

2504

goto free_and_out;

2505

goto free_and_out;

2505

2506

if (rq->bio) {

2507

if (rq->bio) {

2507

rq->biotail->bi_next = bio;

2508

rq->biotail->bi_next = bio;

2508

rq->biotail = bio;

2509

rq->biotail = bio;

2509

} else

2510

} else

2510

rq->bio = rq->biotail = bio;

2511

rq->bio = rq->biotail = bio;

2511

}

2512

}

2512

2513

__blk_rq_prep_clone(rq, rq_src);

2514

__blk_rq_prep_clone(rq, rq_src);

2514

2515

return 0;

2516

return 0;

2516

2517

free_and_out:

2518

free_and_out:

2518

if (bio)

2519

if (bio)

2519

bio_free(bio, bs);

2520

bio_free(bio, bs);

2520

blk_rq_unprep_clone(rq);

2521

blk_rq_unprep_clone(rq);

2521

2522

return -ENOMEM;

2523

return -ENOMEM;

2523

}

2524

}

2524

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2525

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2525

2526

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2527

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2527

{

2528

{

2528

return queue_work(kblockd_workqueue, work);

2529

return queue_work(kblockd_workqueue, work);

2529

}

2530

}

2530

EXPORT_SYMBOL(kblockd_schedule_work);

2531

EXPORT_SYMBOL(kblockd_schedule_work);

2531

2532

int __init blk_dev_init(void)

2533

int __init blk_dev_init(void)

2533

{

2534

{

2534

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2535

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2535

sizeof(((struct request *)0)->cmd_flags));

2536

sizeof(((struct request *)0)->cmd_flags));

2536

2537

kblockd_workqueue = create_workqueue("kblockd");

2538

kblockd_workqueue = create_workqueue("kblockd");

2538

if (!kblockd_workqueue)

2539

if (!kblockd_workqueue)

2539

panic("Failed to create kblockd\n");

2540

panic("Failed to create kblockd\n");

2540

2541

request_cachep = kmem_cache_create("blkdev_requests",

2542

request_cachep = kmem_cache_create("blkdev_requests",

2542

sizeof(struct request), 0, SLAB_PANIC, NULL);

2543

sizeof(struct request), 0, SLAB_PANIC, NULL);

2543

2544

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2545

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2545

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2546

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2546

2547

return 0;

2548

return 0;

2548

}

2549

}

2549

2550

GITLAB

block: add sysfs knob for turning off disk entropy contributions

 /*
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
  *	-  July2000
  * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 /*
  * This handles all read/write requests to block devices
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/fault-inject.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
 #include "blk.h"
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 static int __make_request(struct request_queue *q, struct bio *bio);
 /*
  * For the allocated request tables
  */
 static struct kmem_cache *request_cachep;
 /*
  * For queue allocation
  */
 struct kmem_cache *blk_requestq_cachep;
 /*
  * Controlling structure to kblockd
  */
 static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
 	int cpu;
 	if (!blk_do_io_stat(rq))
 		return;
 	cpu = part_stat_lock();
 	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 	if (!new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
 		part_inc_in_flight(part, rw);
 	}
 	part_stat_unlock();
 }
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
 	int nr;
 	nr = q->nr_requests - (q->nr_requests / 8) + 1;
 	if (nr > q->nr_requests)
 		nr = q->nr_requests;
 	q->nr_congestion_on = nr;
 	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
 	if (nr < 1)
 		nr = 1;
 	q->nr_congestion_off = nr;
 }
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
  * backing_dev_info
  *
  * Will return NULL if the request queue cannot be located.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
 	struct backing_dev_info *ret = NULL;
 	struct request_queue *q = bdev_get_queue(bdev);
 	if (q)
 		ret = &q->backing_dev_info;
 	return ret;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 void blk_rq_init(struct request_queue *q, struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
 	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
 	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->ref_count = 1;
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
 }
 EXPORT_SYMBOL(blk_rq_init);
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
 {
 	struct request_queue *q = rq->q;
 	if (&q->bar_rq != rq) {
 		if (error)
 			clear_bit(BIO_UPTODATE, &bio->bi_flags);
 		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 			error = -EIO;
 		if (unlikely(nbytes > bio->bi_size)) {
 			printk(KERN_ERR "%s: want %u bytes done, %u left\n",
 			       __func__, nbytes, bio->bi_size);
 			nbytes = bio->bi_size;
 		}
 		if (unlikely(rq->cmd_flags & REQ_QUIET))
 			set_bit(BIO_QUIET, &bio->bi_flags);
 		bio->bi_size -= nbytes;
 		bio->bi_sector += (nbytes >> 9);
 		if (bio_integrity(bio))
 			bio_integrity_advance(bio, nbytes);
 		if (bio->bi_size == 0)
 			bio_endio(bio, error);
 	} else {
 		/*
 		 * Okay, this is the barrier request in progress, just
 		 * record the error;
 		 */
 		if (error && !q->orderr)
 			q->orderr = error;
 	}
 }
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
 	       (unsigned long long)blk_rq_pos(rq),
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
 		for (bit = 0; bit < BLK_MAX_CDB; bit++)
 			printk("%02x ", rq->cmd[bit]);
 		printk("\n");
 	}
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 /*
  * "plug" the device if there are no outstanding requests: this will
  * force the transfer to start only after we have put all the requests
  * on the list.
  *
  * This is called with interrupts off and no requests on the queue and
  * with the queue lock held.
  */
 void blk_plug_device(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	/*
 	 * don't plug a stopped queue, it must be paired with blk_start_queue()
 	 * which will restart the queueing
 	 */
 	if (blk_queue_stopped(q))
 		return;
 	if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
 		trace_block_plug(q);
 	}
 }
 EXPORT_SYMBOL(blk_plug_device);
 /**
  * blk_plug_device_unlocked - plug a device without queue lock held
  * @q:    The &struct request_queue to plug
  *
  * Description:
  *   Like @blk_plug_device(), but grabs the queue lock and disables
  *   interrupts.
  **/
 void blk_plug_device_unlocked(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_plug_device(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_plug_device_unlocked);
 /*
  * remove the queue from the plugged list, if present. called with
  * queue lock held and interrupts disabled.
  */
 int blk_remove_plug(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
 		return 0;
 	del_timer(&q->unplug_timer);
 	return 1;
 }
 EXPORT_SYMBOL(blk_remove_plug);
 /*
  * remove the plug and let it rip..
  */
 void __generic_unplug_device(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 	if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
 		return;
 	q->request_fn(q);
 }
 /**
  * generic_unplug_device - fire a request queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   Linux uses plugging to build bigger requests queues before letting
  *   the device have at them. If a queue is plugged, the I/O scheduler
  *   is still adding and merging requests on the queue. Once the queue
  *   gets unplugged, the request_fn defined for the queue is invoked and
  *   transfers started.
  **/
 void generic_unplug_device(struct request_queue *q)
 {
 	if (blk_queue_plugged(q)) {
 		spin_lock_irq(q->queue_lock);
 		__generic_unplug_device(q);
 		spin_unlock_irq(q->queue_lock);
 	}
 }
 EXPORT_SYMBOL(generic_unplug_device);
 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 				   struct page *page)
 {
 	struct request_queue *q = bdi->unplug_io_data;
 	blk_unplug(q);
 }
 void blk_unplug_work(struct work_struct *work)
 {
 	struct request_queue *q =
 		container_of(work, struct request_queue, unplug_work);
 	trace_block_unplug_io(q);
 	q->unplug_fn(q);
 }
 void blk_unplug_timeout(unsigned long data)
 {
 	struct request_queue *q = (struct request_queue *)data;
 	trace_block_unplug_timer(q);
 	kblockd_schedule_work(q, &q->unplug_work);
 }
 void blk_unplug(struct request_queue *q)
 {
 	/*
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
 	if (q->unplug_fn) {
 		trace_block_unplug_io(q);
 		q->unplug_fn(q);
 	}
 }
 EXPORT_SYMBOL(blk_unplug);
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
  *   entered. Also see blk_stop_queue(). Queue lock must be held.
  **/
 void blk_start_queue(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 /**
  * blk_stop_queue - stop a queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   The Linux block layer assumes that a block driver will consume all
  *   entries on the request queue when the request_fn strategy is called.
  *   Often this will not happen, because of hardware limitations (queue
  *   depth settings). If a device driver gets a 'queue full' response,
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
  *   blk_start_queue() to restart queue operations. Queue lock must be held.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 /**
  * blk_sync_queue - cancel any pending callbacks on a queue
  * @q: the queue
  *
  * Description:
  *     The block layer may perform asynchronous callback activity
  *     on a queue, such as calling the unplug function after a timeout.
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
  *     that the callbacks might use. The caller must already have made sure
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
  */
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->unplug_timer);
 	del_timer_sync(&q->timeout);
 	cancel_work_sync(&q->unplug_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
  *
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
  *    held and interrupts disabled.
  *
  */
 void __blk_run_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 	if (elv_queue_empty(q))
 		return;
 	/*
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
 	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
 		q->request_fn(q);
 		queue_flag_clear(QUEUE_FLAG_REENTER, q);
 	} else {
 		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
 		kblockd_schedule_work(q, &q->unplug_work);
 	}
 }
 EXPORT_SYMBOL(__blk_run_queue);
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
  *
  * Description:
  *    Invoke request handling on this queue, if it has pending work to do.
  *    May be used to restart queueing when a request has completed.
  */
 void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
 void blk_put_queue(struct request_queue *q)
 {
 	kobject_put(&q->kobj);
 }
 void blk_cleanup_queue(struct request_queue *q)
 {
 	/*
 	 * We know we have process context here, so we can be a little
 	 * cautious and ensure that pending block actions on this device
 	 * are done before moving on. Going into this function, we should
 	 * not have processes doing IO to this device.
 	 */
 	blk_sync_queue(q);
 	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
 	if (q->elevator)
 		elevator_exit(q->elevator);
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
 static int blk_init_free_list(struct request_queue *q)
 {
 	struct request_list *rl = &q->rq;
 	if (unlikely(rl->rq_pool))
 		return 0;
 	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
 	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
 	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
 	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
 	if (!rl->rq_pool)
 		return -ENOMEM;
 	return 0;
 }
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
 	int err;
 	q = kmem_cache_alloc_node(blk_requestq_cachep,
 				gfp_mask | __GFP_ZERO, node_id);
 	if (!q)
 		return NULL;
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
 	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
 	INIT_WORK(&q->unplug_work, blk_unplug_work);
 	kobject_init(&q->kobj, &blk_queue_ktype);
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 	return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
  * @rfn:  The function to be called to process requests that have been
  *        placed on the queue.
  * @lock: Request queue spin lock
  *
  * Description:
  *    If a block device wishes to use the standard request handling procedures,
  *    which sorts requests and coalesces adjacent requests, then it must
  *    call blk_init_queue().  The function @rfn will be called when there
  *    are requests on the queue that need to be processed.  If the device
  *    supports plugging, then @rfn may not be called immediately when requests
  *    are available on the queue, but may be called at some time later instead.
  *    Plugged queues are generally unplugged when a buffer belonging to one
  *    of the requests on the queue is needed, or due to memory pressure.
  *
  *    @rfn is not required, or even expected, to remove all requests off the
  *    queue, but only as many as it can handle at a time.  If it does leave
  *    requests on the queue, it is responsible for arranging that the requests
  *    get dealt with eventually.
  *
  *    The queue spin lock must be held while manipulating the requests on the
  *    request queue; this lock will be taken also from interrupt context, so irq
  *    disabling is needed for it.
  *
  *    Function returns a pointer to the initialized request queue, or %NULL if
  *    it didn't succeed.
  *
  * Note:
  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
 	return blk_init_queue_node(rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_queue);
 struct request_queue *
 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *uninit_q, *q;
 	uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
 	if (!uninit_q)
 		return NULL;
 	q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
 	if (!q)
 		blk_cleanup_queue(uninit_q);
 	return q;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 			 spinlock_t *lock)
 {
 	return blk_init_allocated_queue_node(q, rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
 struct request_queue *
 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 			      spinlock_t *lock, int node_id)
 {
 	if (!q)
 		return NULL;
 	q->node = node_id;
 	if (blk_init_free_list(q))
 		return NULL;
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
 	q->queue_flags		= QUEUE_FLAG_DEFAULT;
 	q->queue_lock		= lock;
 	/*
 	 * This also sets hw/phys segments, boundary and size
 	 */
 	blk_queue_make_request(q, __make_request);
 	q->sg_reserved_size = INT_MAX;
 	/*
 	 * all done
 	 */
 	if (!elevator_init(q, NULL)) {
 		blk_queue_congestion_threshold(q);
 		return q;
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue_node);
 int blk_get_queue(struct request_queue *q)
 {
 	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
 		kobject_get(&q->kobj);
 		return 0;
 	}
 	return 1;
 }
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 static struct request *
 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	if (!rq)
 		return NULL;
 	blk_rq_init(q, rq);
 	rq->cmd_flags = flags | REQ_ALLOCED;
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
 		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 	return rq;
 }
 /*
  * ioc_batching returns true if the ioc is a valid batching request and
  * should be given priority access to a request.
  */
 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc)
 		return 0;
 	/*
 	 * Make sure the process is able to allocate at least 1 request
 	 * even if the batch times out, otherwise we could theoretically
 	 * lose wakeups.
 	 */
 	return ioc->nr_batch_requests == q->nr_batching ||
 		(ioc->nr_batch_requests > 0
 		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
 }
 /*
  * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
  * will cause the process to be a "batcher" on all queues in the system. This
  * is the behaviour we want though - once it gets a wakeup it should be given
  * a nice run.
  */
 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc || ioc_batching(q, ioc))
 		return;
 	ioc->nr_batch_requests = q->nr_batching;
 	ioc->last_waited = jiffies;
 }
 static void __freed_request(struct request_queue *q, int sync)
 {
 	struct request_list *rl = &q->rq;
 	if (rl->count[sync] < queue_congestion_off_threshold(q))
 		blk_clear_queue_congested(q, sync);
 	if (rl->count[sync] + 1 <= q->nr_requests) {
 		if (waitqueue_active(&rl->wait[sync]))
 			wake_up(&rl->wait[sync]);
 		blk_clear_queue_full(q, sync);
 	}
 }
 /*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
 static void freed_request(struct request_queue *q, int sync, int priv)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[sync]--;
 	if (priv)
 		rl->elvpriv--;
 	__freed_request(q, sync);
 	if (unlikely(rl->starved[sync ^ 1]))
 		__freed_request(q, sync ^ 1);
 }
 /*
  * Get a free request, queue_lock must be held.
  * Returns NULL on failure, with queue_lock held.
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, int rw_flags,
 				   struct bio *bio, gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue, priv;
 	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[is_sync]+1 >= q->nr_requests) {
 			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
 			 * This process will be allowed to complete a batch of
 			 * requests, others will be blocked.
 			 */
 			if (!blk_queue_full(q, is_sync)) {
 				ioc_set_batching(q, ioc);
 				blk_set_queue_full(q, is_sync);
 			} else {
 				if (may_queue != ELV_MQUEUE_MUST
 						&& !ioc_batching(q, ioc)) {
 					/*
 					 * The queue is full and the allocating
 					 * process is not a "batcher", and not
 					 * exempted by the IO scheduler
 					 */
 					goto out;
 				}
 			}
 		}
 		blk_set_queue_congested(q, is_sync);
 	}
 	/*
 	 * Only allow batching queuers to allocate up to 50% over the defined
 	 * limit of requests, otherwise we could have thousands of requests
 	 * allocated with any setting of ->nr_requests
 	 */
 	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
 		goto out;
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 	if (priv)
 		rl->elvpriv++;
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * we might have messed up.
 		 *
 		 * Allocating task should really be put onto the front of the
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, is_sync, priv);
 		/*
 		 * in the very unlikely event that allocation failed and no
 		 * requests for this direction was pending, mark us starved
 		 * so that freeing of a request in the other direction will
 		 * notice us. another possible fix would be to split the
 		 * rq mempool into READ and WRITE
 		 */
 rq_starved:
 		if (unlikely(rl->count[is_sync] == 0))
 			rl->starved[is_sync] = 1;
 		goto out;
 	}
 	/*
 	 * ioc may be NULL here, and ioc_batching will be false. That's
 	 * OK, if the queue is under the request limit then requests need
 	 * not count toward the nr_batch_requests limit. There will always
 	 * be some limit enforced by BLK_BATCH_TIME.
 	 */
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 	trace_block_getrq(q, bio, rw_flags & 1);
 out:
 	return rq;
 }
 /*
  * No available requests for this queue, unplug the device and wait for some
  * requests to become available.
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 					struct bio *bio)
 {
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	struct request *rq;
 	rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	while (!rq) {
 		DEFINE_WAIT(wait);
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
 				TASK_UNINTERRUPTIBLE);
 		trace_block_sleeprq(q, bio, rw_flags & 1);
 		__generic_unplug_device(q);
 		spin_unlock_irq(q->queue_lock);
 		io_schedule();
 		/*
 		 * After sleeping, we become a "batching" process and
 		 * will be able to allocate at least one request, and
 		 * up to a big batch of them for a small period time.
 		 * See ioc_batching, ioc_set_batching
 		 */
 		ioc = current_io_context(GFP_NOIO, q->node);
 		ioc_set_batching(q, ioc);
 		spin_lock_irq(q->queue_lock);
 		finish_wait(&rl->wait[is_sync], &wait);
 		rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	};
 	return rq;
 }
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 	BUG_ON(rw != READ && rw != WRITE);
 	spin_lock_irq(q->queue_lock);
 	if (gfp_mask & __GFP_WAIT) {
 		rq = get_request_wait(q, rw, NULL);
 	} else {
 		rq = get_request(q, rw, NULL, gfp_mask);
 		if (!rq)
 			spin_unlock_irq(q->queue_lock);
 	}
 	/* q->queue_lock is unlocked at this point */
 	return rq;
 }
 EXPORT_SYMBOL(blk_get_request);
 /**
  * blk_make_request - given a bio, allocate a corresponding struct request.
  * @q: target request queue
  * @bio:  The bio describing the memory mappings that will be submitted for IO.
  *        It may be a chained-bio properly constructed by block/bio layer.
  * @gfp_mask: gfp flags to be used for memory allocation
  *
  * blk_make_request is the parallel of generic_make_request for BLOCK_PC
  * type commands. Where the struct request needs to be farther initialized by
  * the caller. It is passed a &struct bio, which describes the memory info of
  * the I/O transfer.
  *
  * The caller of blk_make_request must make sure that bi_io_vec
  * are set to describe the memory buffers. That bio_data_dir() will return
  * the needed direction of the request. (And all bio's in the passed bio-chain
  * are properly set accordingly)
  *
  * If called under none-sleepable conditions, mapped bio buffers must not
  * need bouncing, by calling the appropriate masked or flagged allocator,
  * suitable for the target device. Otherwise the call to blk_queue_bounce will
  * BUG.
  *
  * WARNING: When allocating/cloning a bio-chain, careful consideration should be
  * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
  * anything but the first bio in the chain. Otherwise you risk waiting for IO
  * completion of a bio that hasn't been submitted yet, thus resulting in a
  * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
  * of bio_alloc(), as that avoids the mempool deadlock.
  * If possible a big IO should be split into smaller parts when allocation
  * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 				 gfp_t gfp_mask)
 {
 	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
 		blk_queue_bounce(q, &bounce_bio);
 		ret = blk_rq_append_bio(q, rq, bounce_bio);
 		if (unlikely(ret)) {
 			blk_put_request(rq);
 			return ERR_PTR(ret);
 		}
 	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_make_request);
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  *
  * Description:
  *    Drivers often keep queueing requests until the hardware cannot accept
  *    more, when that condition happens we need to put the request back
  *    on the queue. Must be called with queue lock held.
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	BUG_ON(blk_queued_rq(rq));
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
 /**
  * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
  * @data:	private data
  *
  * Description:
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
  *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
  *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
  *    of the queue for things like a QUEUE_FULL message from a device, or a
  *    host that is unable to accept a particular command.
  */
 void blk_insert_request(struct request_queue *q, struct request *rq,
 			int at_head, void *data)
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	unsigned long flags;
 	/*
 	 * tell I/O scheduler that this isn't a regular read/write (ie it
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->special = data;
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * If command is tagged, release the tag
 	 */
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where, 0);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
 /*
  * add-request adds a request to the linked list.
  * queue lock is held and interrupts disabled, as we muck with the
  * request queue list.
  */
 static inline void add_request(struct request_queue *q, struct request *req)
 {
 	drive_stat_acct(req, 1);
 	/*
 	 * elevator indicated where it wants this request to be
 	 * inserted at elevator_merge time
 	 */
 	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 }
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
 	if (now == part->stamp)
 		return;
 	if (part_in_flight(part)) {
 		__part_stat_add(cpu, part, time_in_queue,
 				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
 }
 /**
  * part_round_stats() - Round off the performance stats on a struct disk_stats.
  * @cpu: cpu number for stats access
  * @part: target partition
  *
  * The average IO queue length and utilisation statistics are maintained
  * by observing the current state of the queue length and the amount of
  * time it has been in this state for.
  *
  * Normally, that accounting is done on IO completion, but that can result
  * in more than a second's worth of IO being accounted for within any one
  * second, leading to >100% utilisation.  To deal with that, we call this
  * function to do a round-off before returning the results when reading
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
 void part_round_stats(int cpu, struct hd_struct *part)
 {
 	unsigned long now = jiffies;
 	if (part->partno)
 		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
 	part_round_stats_single(cpu, part, now);
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 /*
  * queue lock must be held
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
 		return;
 	elv_completed_request(q, req);
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (req->cmd_flags & REQ_ALLOCED) {
 		int is_sync = rq_is_sync(req) != 0;
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(!hlist_unhashed(&req->hash));
 		blk_free_request(q, req);
 		freed_request(q, is_sync, priv);
 	}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
 void blk_put_request(struct request *req)
 {
 	unsigned long flags;
 	struct request_queue *q = req->q;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_put_request(q, req);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_put_request);
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 	/*
 	 * Inherit FAILFAST from bio (for read-ahead, and explicit
 	 * FAILFAST).  FAILFAST flags are identical for req and bio.
 	 */
 	if (bio_rw_flagged(bio, BIO_RW_AHEAD))
 		req->cmd_flags |= REQ_FAILFAST_MASK;
 	else
 		req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
 	if (bio_rw_flagged(bio, BIO_RW_DISCARD))
 		req->cmd_flags |= REQ_DISCARD;
 	if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 		req->cmd_flags |= REQ_HARDBARRIER;
 	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
 		req->cmd_flags |= REQ_RW_SYNC;
 	if (bio_rw_flagged(bio, BIO_RW_META))
 		req->cmd_flags |= REQ_RW_META;
 	if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
 		req->cmd_flags |= REQ_NOIDLE;
 	req->errors = 0;
 	req->__sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
 /*
  * Only disabling plugging for non-rotational devices if it does tagging
  * as well, otherwise we do need the proper merging
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
 	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
 }
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
 	int el_ret;
 	unsigned int bytes = bio->bi_size;
 	const unsigned short prio = bio_prio(bio);
 	const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
 	const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	int rw_flags;
 	if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
 	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
 	 * ISA dma in theory)
 	 */
 	blk_queue_bounce(q, &bio);
 	spin_lock_irq(q->queue_lock);
 	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
 		goto get_rq;
 	el_ret = elv_merge(q, &req, bio);
 	switch (el_ret) {
 	case ELEVATOR_BACK_MERGE:
 		BUG_ON(!rq_mergeable(req));
 		if (!ll_back_merge_fn(q, req, bio))
 			break;
 		trace_block_bio_backmerge(q, bio);
 		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 			blk_rq_set_mixed_merge(req);
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
 		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		elv_bio_merged(q, req, bio);
 		if (!attempt_back_merge(q, req))
 			elv_merged_request(q, req, el_ret);
 		goto out;
 	case ELEVATOR_FRONT_MERGE:
 		BUG_ON(!rq_mergeable(req));
 		if (!ll_front_merge_fn(q, req, bio))
 			break;
 		trace_block_bio_frontmerge(q, bio);
 		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
 			blk_rq_set_mixed_merge(req);
 			req->cmd_flags &= ~REQ_FAILFAST_MASK;
 			req->cmd_flags |= ff;
 		}
 		bio->bi_next = req->bio;
 		req->bio = bio;
 		/*
 		 * may not be valid. if the low level driver said
 		 * it didn't need a bounce buffer then it better
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
 		req->__sector = bio->bi_sector;
 		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		elv_bio_merged(q, req, bio);
 		if (!attempt_front_merge(q, req))
 			elv_merged_request(q, req, el_ret);
 		goto out;
 	/* ELV_NO_MERGE: elevator says don't/can't merge. */
 	default:
 		;
 	}
 get_rq:
 	/*
 	 * This sync check and mask will be re-done in init_request_from_bio(),
 	 * but we need to set it earlier to expose the sync flag to the
 	 * rq allocator and io schedulers.
 	 */
 	rw_flags = bio_data_dir(bio);
 	if (sync)
 		rw_flags |= REQ_RW_SYNC;
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
 	req = get_request_wait(q, rw_flags, bio);
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
 	 * may now be mergeable after it had proven unmergeable (above).
 	 * We don't worry about that case for efficiency. It won't happen
 	 * often, and the elevators are able to handle it.
 	 */
 	init_request_from_bio(req, bio);
 	spin_lock_irq(q->queue_lock);
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
 	    bio_flagged(bio, BIO_CPU_AFFINE))
 		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (queue_should_plug(q) && elv_queue_empty(q))
 		blk_plug_device(q);
 	add_request(q, req);
 out:
 	if (unplug || !queue_should_plug(q))
 		__generic_unplug_device(q);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 }
 /*
  * If bio->bi_dev is a partition, remap the location
  */
 static inline void blk_partition_remap(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
 				    bdev->bd_dev,
 				    bio->bi_sector - p->start_sect);
 	}
 }
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
 	printk(KERN_INFO "attempt to access beyond end of device\n");
 	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
 			bdevname(bio->bi_bdev, b),
 			bio->bi_rw,
 			(unsigned long long)bio->bi_sector + bio_sectors(bio),
 			(long long)(bio->bi_bdev->bd_inode->i_size >> 9));
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static DECLARE_FAULT_ATTR(fail_make_request);
 static int __init setup_fail_make_request(char *str)
 {
 	return setup_fault_attr(&fail_make_request, str);
 }
 __setup("fail_make_request=", setup_fail_make_request);
 static int should_fail_request(struct bio *bio)
 {
 	struct hd_struct *part = bio->bi_bdev->bd_part;
 	if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
 		return should_fail(&fail_make_request, bio->bi_size);
 	return 0;
 }
 static int __init fail_make_request_debugfs(void)
 {
 	return init_fault_attr_dentries(&fail_make_request,
 					"fail_make_request");
 }
 late_initcall(fail_make_request_debugfs);
 #else /* CONFIG_FAIL_MAKE_REQUEST */
 static inline int should_fail_request(struct bio *bio)
 {
 	return 0;
 }
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 /*
  * Check whether this bio extends beyond the end of the device.
  */
 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
 {
 	sector_t maxsector;
 	if (!nr_sectors)
 		return 0;
 	/* Test device or partition size, when known. */
 	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
 	if (maxsector) {
 		sector_t sector = bio->bi_sector;
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
 			/*
 			 * This may well happen - the kernel calls bread()
 			 * without checking the size of the device, e.g., when
 			 * mounting a device.
 			 */
 			handle_bad_sector(bio);
 			return 1;
 		}
 	}
 	return 0;
 }
 /**
  * generic_make_request - hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
  * devices. It is passed a &struct bio, which describes the I/O that needs
  * to be done.
  *
  * generic_make_request() does not return any status.  The
  * success/failure status of the request, along with notification of
  * completion, is delivered asynchronously through the bio->bi_end_io
  * function described (one day) else where.
  *
  * The caller of generic_make_request must make sure that bi_io_vec
  * are set to describe the memory buffer, and that bi_dev and bi_sector are
  * set to describe the device address, and the
  * bi_end_io and optionally bi_private are set to describe how
  * completion notification should be signaled.
  *
  * generic_make_request and the drivers it calls may use bi_next if this
  * bio happens to be merged with someone else, and may change bi_dev and
  * bi_sector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
  */
 static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
 	int err = -EIO;
 	might_sleep();
 	if (bio_check_eod(bio, nr_sectors))
 		goto end_io;
 	/*
 	 * Resolve the mapping until finished. (drivers are
 	 * still free to implement/resolve their own stacking
 	 * by explicitly returning 0)
 	 *
 	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	old_sector = -1;
 	old_dev = 0;
 	do {
 		char b[BDEVNAME_SIZE];
 		q = bdev_get_queue(bio->bi_bdev);
 		if (unlikely(!q)) {
 			printk(KERN_ERR
 			       "generic_make_request: Trying to access "
 				"nonexistent block-device %s (%Lu)\n",
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 			goto end_io;
 		}
 		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
 			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
 			       queue_max_hw_sectors(q));
 			goto end_io;
 		}
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 		if (should_fail_request(bio))
 			goto end_io;
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
 		 */
 		blk_partition_remap(bio);
 		if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
 			goto end_io;
 		if (old_sector != -1)
 			trace_block_remap(q, bio, old_dev, old_sector);
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
 		    !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
 		trace_block_bio_queue(q, bio);
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
 	return;
 end_io:
 	bio_endio(bio, err);
 }
 /*
  * We only want one ->make_request_fn to be active at a time,
  * else stack usage with stacked devices could be a problem.
  * So use current->bio_list to keep a list of requests
  * submited by a make_request_fn function.
  * current->bio_list is also used as a flag to say if
  * generic_make_request is currently active in this task or not.
  * If it is NULL, then no make_request is active.  If it is non-NULL,
  * then a make_request is active, and new requests should be added
  * at the tail
  */
 void generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
 	if (current->bio_list) {
 		/* make_request is active */
 		bio_list_add(current->bio_list, bio);
 		return;
 	}
 	/* following loop may be a bit non-obvious, and so deserves some
 	 * explanation.
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
 	 * ensure that) so we have a list with a single bio.
 	 * We pretend that we have just taken it off a longer list, so
 	 * we assign bio_list to a pointer to the bio_list_on_stack,
 	 * thus initialising the bio_list of new bios to be
 	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
 	 * of the top of the list (no pretending) and so remove it from
 	 * bio_list, and call into __generic_make_request again.
 	 *
 	 * The loop was structured like this to make only one call to
 	 * __generic_make_request (which is important as it is large and
 	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
 	bio_list_init(&bio_list_on_stack);
 	current->bio_list = &bio_list_on_stack;
 	do {
 		__generic_make_request(bio);
 		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 }
 EXPORT_SYMBOL(generic_make_request);
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 	bio->bi_rw |= rw;
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
 	if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
 			task_io_account_read(bio->bi_size);
 			count_vm_events(PGPGIN, count);
 		}
 		if (unlikely(block_dump)) {
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
 			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_sector,
 				bdevname(bio->bi_bdev, b));
 		}
 	}
 	generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 /**
  * blk_rq_check_limits - Helper function to check a request for the queue limit
  * @q:  the queue
  * @rq: the request being checked
  *
  * Description:
  *    @rq may have been made based on weaker limitations of upper-level queues
  *    in request stacking drivers, and it may violate the limitation of @q.
  *    Since the block layer and the underlying device driver trust @rq
  *    after it is inserted to @q, it should be checked against @q before
  *    the insertion using this generic function.
  *
  *    This function should also be useful for request stacking drivers
  *    in some cases below, so export this fuction.
  *    Request stacking drivers like request-based dm may change the queue
  *    limits while requests are in the queue (e.g. dm's table swapping).
  *    Such request stacking drivers should check those requests agaist
  *    the new queue limits again when they dispatch those requests,
  *    although such checkings are also done against the old queue limits
  *    when submitting requests.
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
 	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
 	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
 	/*
 	 * queue's settings related to segment counting like q->bounce_pfn
 	 * may differ from that of other stacking queues.
 	 * Recalculate it to check the request correctly on this queue's
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
 	if (rq->nr_phys_segments > queue_max_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_rq_check_limits);
 /**
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	if (blk_rq_check_limits(q, rq))
 		return -EIO;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
 	    should_fail(&fail_make_request, blk_rq_bytes(rq)))
 		return -EIO;
 #endif
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * Submitting request must be dequeued before calling this function
 	 * because it will be linked to another request_queue
 	 */
 	BUG_ON(blk_queued_rq(rq));
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 /**
  * blk_rq_err_bytes - determine number of bytes till the next failure boundary
  * @rq: request to examine
  *
  * Description:
  *     A request could be merge of IOs which require different failure
  *     handling.  This function determines the number of bytes which
  *     can be failed from the beginning of the request without
  *     crossing into area which need to be retried further.
  *
  * Return:
  *     The number of bytes to fail.
  *
  * Context:
  *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
 	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
 	unsigned int bytes = 0;
 	struct bio *bio;
 	if (!(rq->cmd_flags & REQ_MIXED_MERGE))
 		return blk_rq_bytes(rq);
 	/*
 	 * Currently the only 'mixing' which can happen is between
 	 * different fastfail types.  We can safely fail portions
 	 * which have all the failfast bits that the first one has -
 	 * the ones which are at least as eager to fail as the first
 	 * one.
 	 */
 	for (bio = rq->bio; bio; bio = bio->bi_next) {
 		if ((bio->bi_rw & ff) != ff)
 			break;
 		bytes += bio->bi_size;
 	}
 	/* this could lead to infinite loop */
 	BUG_ON(blk_rq_bytes(rq) && !bytes);
 	return bytes;
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
 }
 static void blk_account_io_done(struct request *req)
 {
 	/*
 	 * Account IO completion.  bar_rq isn't accounted as a normal
 	 * IO on queueing nor completion.  Accounting the containing
 	 * request is enough.
 	 */
 	if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part, rw);
 		part_stat_unlock();
 	}
 }
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
  *
  * Description:
  *     Return the request at the top of @q.  The returned request
  *     should be started using blk_start_request() before LLD starts
  *     processing it.
  *
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
  *
  * Context:
  *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
 	while ((rq = __elv_next_request(q)) != NULL) {
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
 			 * sees this request (possibly after
 			 * requeueing).  Notify IO scheduler.
 			 */
 			if (blk_sorted_rq(rq))
 				elv_activate_rq(q, rq);
 			/*
 			 * just mark as started even if we don't start
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 */
 			rq->cmd_flags |= REQ_STARTED;
 			trace_block_rq_issue(q, rq);
 		}
 		if (!q->boundary_rq || q->boundary_rq == rq) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = NULL;
 		}
 		if (rq->cmd_flags & REQ_DONTPREP)
 			break;
 		if (q->dma_drain_size && blk_rq_bytes(rq)) {
 			/*
 			 * make sure space for the drain appears we
 			 * know we can do this because max_hw_segments
 			 * has been adjusted to be one fewer than the
 			 * device can handle
 			 */
 			rq->nr_phys_segments++;
 		}
 		if (!q->prep_rq_fn)
 			break;
 		ret = q->prep_rq_fn(q, rq);
 		if (ret == BLKPREP_OK) {
 			break;
 		} else if (ret == BLKPREP_DEFER) {
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
 			 * avoid resource deadlock.  REQ_STARTED will
 			 * prevent other fs requests from passing this one.
 			 */
 			if (q->dma_drain_size && blk_rq_bytes(rq) &&
 			    !(rq->cmd_flags & REQ_DONTPREP)) {
 				/*
 				 * remove the space for the drain we added
 				 * so that we don't add it again
 				 */
 				--rq->nr_phys_segments;
 			}
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
 			__blk_end_request_all(rq, -EIO);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
 		}
 	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_peek_request);
 void blk_dequeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	BUG_ON(list_empty(&rq->queuelist));
 	BUG_ON(ELV_ON_HASH(rq));
 	list_del_init(&rq->queuelist);
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]++;
 		set_io_start_time_ns(rq);
 	}
 }
 /**
  * blk_start_request - start request processing on the driver
  * @req: request to dequeue
  *
  * Description:
  *     Dequeue @req and start timeout timer on it.  This hands off the
  *     request to the driver.
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
  *
  * Context:
  *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
 	blk_dequeue_request(req);
 	/*
 	 * We are now handing the request to the hardware, initialize
 	 * resid_len to full count and add the timeout handler.
 	 */
 	req->resid_len = blk_rq_bytes(req);
 	if (unlikely(blk_bidi_rq(req)))
 		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
 	blk_add_timer(req);
 }
 EXPORT_SYMBOL(blk_start_request);
 /**
  * blk_fetch_request - fetch a request from a request queue
  * @q: request queue to fetch a request from
  *
  * Description:
  *     Return the request at the top of @q.  The request is started on
  *     return and LLD can start processing it immediately.
  *
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
  *
  * Context:
  *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
 	struct request *rq;
 	rq = blk_peek_request(q);
 	if (rq)
 		blk_start_request(rq);
 	return rq;
 }
 EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @req, but doesn't complete
  *     the request structure even if @req doesn't have leftover.
  *     If @req has leftover, sets it up for the next range of segments.
  *
  *     This special helper function is only for request stacking drivers
  *     (e.g. request-based dm) so that they can handle partial completion.
  *     Actual device drivers should use blk_end_request instead.
  *
  *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
  *     %false return from this function.
  *
  * Return:
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 	if (!req->bio)
 		return false;
 	trace_block_rq_complete(req->q, req);
 	/*
 	 * For fs requests, rq is just carrier of independent bio's
 	 * and each partial completion should be handled separately.
 	 * Reset per-request error on each partial completion.
 	 *
 	 * TODO: tj: This is too subtle.  It would be better to let
 	 * low level drivers do what they see fit.
 	 */
 	if (blk_fs_request(req))
 		req->errors = 0;
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
 		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)blk_rq_pos(req));
 	}
 	blk_account_io_completion(req, nr_bytes);
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
 			req_bio_endio(req, bio, nbytes, error);
 			next_idx = 0;
 			bio_nbytes = 0;
 		} else {
 			int idx = bio->bi_idx + next_idx;
 			if (unlikely(idx >= bio->bi_vcnt)) {
 				blk_dump_rq_flags(req, "__end_that");
 				printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
 				       __func__, idx, bio->bi_vcnt);
 				break;
 			}
 			nbytes = bio_iovec_idx(bio, idx)->bv_len;
 			BIO_BUG_ON(nbytes > bio->bi_size);
 			/*
 			 * not a complete bvec done
 			 */
 			if (unlikely(nbytes > nr_bytes)) {
 				bio_nbytes += nr_bytes;
 				total_bytes += nr_bytes;
 				break;
 			}
 			/*
 			 * advance to the next vector
 			 */
 			next_idx++;
 			bio_nbytes += nbytes;
 		}
 		total_bytes += nbytes;
 		nr_bytes -= nbytes;
 		bio = req->bio;
 		if (bio) {
 			/*
 			 * end more in this run, or just return 'not-done'
 			 */
 			if (unlikely(nr_bytes <= 0))
 				break;
 		}
 	}
 	/*
 	 * completely done
 	 */
 	if (!req->bio) {
 		/*
 		 * Reset counters so that the request stacking driver
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
 		req->__data_len = 0;
 		return false;
 	}
 	/*
 	 * if the request wasn't completed, update state
 	 */
 	if (bio_nbytes) {
 		req_bio_endio(req, bio, bio_nbytes, error);
 		bio->bi_idx += next_idx;
 		bio_iovec(bio)->bv_offset += nr_bytes;
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 	req->__data_len -= total_bytes;
 	req->buffer = bio_data(req->bio);
 	/* update sector only for requests with clear definition of sector */
 	if (blk_fs_request(req) || blk_discard_rq(req))
 		req->__sector += total_bytes >> 9;
 	/* mixed attributes always follow the first bio */
 	if (req->cmd_flags & REQ_MIXED_MERGE) {
 		req->cmd_flags &= ~REQ_FAILFAST_MASK;
 		req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
 	}
 	/*
 	 * If total number of sectors is less than the first segment
 	 * size, something has gone terribly wrong.
 	 */
 	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
 		printk(KERN_ERR "blk: request botched\n");
 		req->__data_len = blk_rq_cur_bytes(req);
 	}
 	/* recalculate the number of segments */
 	blk_recalc_rq_segments(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 static bool blk_update_bidi_request(struct request *rq, int error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
 	if (blk_update_request(rq, error, nr_bytes))
 		return true;
 	/* Bidi request must be completed as a whole */
 	if (unlikely(blk_bidi_rq(rq)) &&
 	    blk_update_request(rq->next_rq, error, bidi_bytes))
 		return true;
-	add_disk_randomness(rq->rq_disk);
+	if (blk_queue_add_random(rq->q))
+		add_disk_randomness(rq->rq_disk);
 	return false;
 }
 /*
  * queue lock must be held
  */
 static void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 	BUG_ON(blk_queued_rq(req));
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion(&req->q->backing_dev_info);
 	blk_delete_timer(req);
 	blk_account_io_done(req);
 	if (req->end_io)
 		req->end_io(req, error);
 	else {
 		if (blk_bidi_rq(req))
 			__blk_put_request(req->next_rq->q, req->next_rq);
 		__blk_put_request(req->q, req);
 	}
 }
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *     Drivers that supports bidi can safely call this member for any
  *     type of request, bidi or uni.  In the later case @bidi_bytes is
  *     just ignored.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 static bool blk_end_bidi_request(struct request *rq, int error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_finish_request(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return false;
 }
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Identical to blk_end_bidi_request() except that queue lock is
  *     assumed to be locked on entry and remains so on return.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 static bool __blk_end_bidi_request(struct request *rq, int error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 	blk_finish_request(rq, error);
 	return false;
 }
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq.
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.
  */
 void blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 EXPORT_SYMBOL(blk_end_request_all);
 /**
  * blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool blk_end_request_cur(struct request *rq, int error)
 {
 	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 EXPORT_SYMBOL(blk_end_request_cur);
 /**
  * blk_end_request_err - Finish a request till the next failure boundary.
  * @rq: the request to finish till the next failure boundary for
  * @error: must be negative errno
  *
  * Description:
  *     Complete @rq till the next failure boundary.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool blk_end_request_err(struct request *rq, int error)
 {
 	WARN_ON(error >= 0);
 	return blk_end_request(rq, error, blk_rq_err_bytes(rq));
 }
 EXPORT_SYMBOL_GPL(blk_end_request_err);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
 void __blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
  * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
  *     be called with queue lock held.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool __blk_end_request_cur(struct request *rq, int error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 EXPORT_SYMBOL(__blk_end_request_cur);
 /**
  * __blk_end_request_err - Finish a request till the next failure boundary.
  * @rq: the request to finish till the next failure boundary for
  * @error: must be negative errno
  *
  * Description:
  *     Complete @rq till the next failure boundary.  Must be called
  *     with queue lock held.
  *
  * Return:
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
 bool __blk_end_request_err(struct request *rq, int error)
 {
 	WARN_ON(error >= 0);
 	return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
 }
 EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
 	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
 	rq->cmd_flags |= bio->bi_rw & REQ_RW;
 	if (bio_has_data(bio)) {
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
 	rq->__data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 	if (bio->bi_bdev)
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 /**
  * rq_flush_dcache_pages - Helper function to flush all pages in a request
  * @rq: the request to be flushed
  *
  * Description:
  *     Flush all pages in @rq.
  */
 void rq_flush_dcache_pages(struct request *rq)
 {
 	struct req_iterator iter;
 	struct bio_vec *bvec;
 	rq_for_each_segment(bvec, rq, iter)
 		flush_dcache_page(bvec->bv_page);
 }
 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
 #endif
 /**
  * blk_lld_busy - Check if underlying low-level drivers of a device are busy
  * @q : the queue of the device being checked
  *
  * Description:
  *    Check if underlying low-level drivers of a device are busy.
  *    If the drivers want to export their busy state, they must set own
  *    exporting function using blk_queue_lld_busy() first.
  *
  *    Basically, this function is used only by request stacking drivers
  *    to stop dispatching requests to underlying devices when underlying
  *    devices are busy.  This behavior helps more I/O merging on the queue
  *    of the request stacking driver and prevents I/O throughput regression
  *    on burst I/O load.
  *
  * Return:
  *    0 - Not busy (The request stacking driver should dispatch request)
  *    1 - Busy (The request stacking driver should stop dispatching request)
  */
 int blk_lld_busy(struct request_queue *q)
 {
 	if (q->lld_busy_fn)
 		return q->lld_busy_fn(q);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_lld_busy);
 /**
  * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
  * @rq: the clone request to be cleaned up
  *
  * Description:
  *     Free all bios in @rq for a cloned request.
  */
 void blk_rq_unprep_clone(struct request *rq)
 {
 	struct bio *bio;
 	while ((bio = rq->bio) != NULL) {
 		rq->bio = bio->bi_next;
 		bio_put(bio);
 	}
 }
 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 /*
  * Copy attributes of the original request to the clone request.
  * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
  */
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
 	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
 	dst->nr_phys_segments = src->nr_phys_segments;
 	dst->ioprio = src->ioprio;
 	dst->extra_len = src->extra_len;
 }
 /**
  * blk_rq_prep_clone - Helper function to setup clone request
  * @rq: the request to be setup
  * @rq_src: original request to be cloned
  * @bs: bio_set that bios for clone are allocated from
  * @gfp_mask: memory allocation mask for bio
  * @bio_ctr: setup function to be called for each clone bio.
  *           Returns %0 for success, non %0 for failure.
  * @data: private data to be passed to @bio_ctr
  *
  * Description:
  *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
  *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
  *     are not copied, and copying such parts is the caller's responsibility.
  *     Also, pages which the original bios are pointing to are not copied
  *     and the cloned bios just point same pages.
  *     So cloned bios must be completed before original bios, which means
  *     the caller must complete @rq before @rq_src.
  */
 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		      struct bio_set *bs, gfp_t gfp_mask,
 		      int (*bio_ctr)(struct bio *, struct bio *, void *),
 		      void *data)
 {
 	struct bio *bio, *bio_src;
 	if (!bs)
 		bs = fs_bio_set;
 	blk_rq_init(NULL, rq);
 	__rq_for_each_bio(bio_src, rq_src) {
 		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
 		if (!bio)
 			goto free_and_out;
 		__bio_clone(bio, bio_src);
 		if (bio_integrity(bio_src) &&
 		    bio_integrity_clone(bio, bio_src, gfp_mask, bs))
 			goto free_and_out;
 		if (bio_ctr && bio_ctr(bio, bio_src, data))
 			goto free_and_out;
 		if (rq->bio) {
 			rq->biotail->bi_next = bio;
 			rq->biotail = bio;
 		} else
 			rq->bio = rq->biotail = bio;
 	}
 	__blk_rq_prep_clone(rq, rq_src);
 	return 0;
 free_and_out:
 	if (bio)
 		bio_free(bio, bs);
 	blk_rq_unprep_clone(rq);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
 			sizeof(((struct request *)0)->cmd_flags));
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 	request_cachep = kmem_cache_create("blkdev_requests",
 			sizeof(struct request), 0, SLAB_PANIC, NULL);
 	blk_requestq_cachep = kmem_cache_create("blkdev_queue",
 			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 	return 0;
 }

 /*
  * Functions related to sysfs handling
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/blktrace_api.h>
 #include "blk.h"
 struct queue_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct request_queue *, char *);
 	ssize_t (*store)(struct request_queue *, const char *, size_t);
 };
 static ssize_t
 queue_var_show(unsigned long var, char *page)
 {
 	return sprintf(page, "%lu\n", var);
 }
 static ssize_t
 queue_var_store(unsigned long *var, const char *page, size_t count)
 {
 	char *p = (char *) page;
 	*var = simple_strtoul(p, &p, 10);
 	return count;
 }
 static ssize_t queue_requests_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(q->nr_requests, (page));
 }
 static ssize_t
 queue_requests_store(struct request_queue *q, const char *page, size_t count)
 {
 	struct request_list *rl = &q->rq;
 	unsigned long nr;
 	int ret;
 	if (!q->request_fn)
 		return -EINVAL;
 	ret = queue_var_store(&nr, page, count);
 	if (nr < BLKDEV_MIN_RQ)
 		nr = BLKDEV_MIN_RQ;
 	spin_lock_irq(q->queue_lock);
 	q->nr_requests = nr;
 	blk_queue_congestion_threshold(q);
 	if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
 		blk_set_queue_congested(q, BLK_RW_SYNC);
 	else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
 		blk_clear_queue_congested(q, BLK_RW_SYNC);
 	if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
 		blk_set_queue_congested(q, BLK_RW_ASYNC);
 	else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
 		blk_clear_queue_congested(q, BLK_RW_ASYNC);
 	if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
 		blk_set_queue_full(q, BLK_RW_SYNC);
 	} else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
 		blk_clear_queue_full(q, BLK_RW_SYNC);
 		wake_up(&rl->wait[BLK_RW_SYNC]);
 	}
 	if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
 		blk_set_queue_full(q, BLK_RW_ASYNC);
 	} else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
 		blk_clear_queue_full(q, BLK_RW_ASYNC);
 		wake_up(&rl->wait[BLK_RW_ASYNC]);
 	}
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static ssize_t queue_ra_show(struct request_queue *q, char *page)
 {
 	unsigned long ra_kb = q->backing_dev_info.ra_pages <<
 					(PAGE_CACHE_SHIFT - 10);
 	return queue_var_show(ra_kb, (page));
 }
 static ssize_t
 queue_ra_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long ra_kb;
 	ssize_t ret = queue_var_store(&ra_kb, page, count);
 	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
 	return ret;
 }
 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 {
 	int max_sectors_kb = queue_max_sectors(q) >> 1;
 	return queue_var_show(max_sectors_kb, (page));
 }
 static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_max_segments(q), (page));
 }
 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
 {
 	if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return queue_var_show(queue_max_segment_size(q), (page));
 	return queue_var_show(PAGE_CACHE_SIZE, (page));
 }
 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_logical_block_size(q), page);
 }
 static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_physical_block_size(q), page);
 }
 static ssize_t queue_io_min_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_io_min(q), page);
 }
 static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_io_opt(q), page);
 }
 static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(q->limits.discard_granularity, page);
 }
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(q->limits.max_discard_sectors << 9, page);
 }
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(queue_discard_zeroes_data(q), page);
 }
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long max_sectors_kb,
 		max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
 			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 	if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
 		return -EINVAL;
 	spin_lock_irq(q->queue_lock);
 	q->limits.max_sectors = max_sectors_kb << 1;
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 {
 	int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 static ssize_t queue_nonrot_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(!blk_queue_nonrot(q), page);
 }
 static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
 				  size_t count)
 {
 	unsigned long nm;
 	ssize_t ret = queue_var_store(&nm, page, count);
 	spin_lock_irq(q->queue_lock);
 	if (nm)
 		queue_flag_clear(QUEUE_FLAG_NONROT, q);
 	else
 		queue_flag_set(QUEUE_FLAG_NONROT, q);
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(q) << 1) |
 			       blk_queue_noxmerges(q), page);
 }
 static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
 				    size_t count)
 {
 	unsigned long nm;
 	ssize_t ret = queue_var_store(&nm, page, count);
 	spin_lock_irq(q->queue_lock);
 	queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
 	queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
 	if (nm == 2)
 		queue_flag_set(QUEUE_FLAG_NOMERGES, q);
 	else if (nm)
 		queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
 {
 	bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
 	return queue_var_show(set, page);
 }
 static ssize_t
 queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 {
 	ssize_t ret = -EINVAL;
 #if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
 	unsigned long val;
 	ret = queue_var_store(&val, page, count);
 	spin_lock_irq(q->queue_lock);
 	if (val)
 		queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_SAME_COMP,  q);
 	spin_unlock_irq(q->queue_lock);
 #endif
 	return ret;
 }
+static ssize_t queue_random_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_add_random(q), page);
+}
+static ssize_t queue_random_store(struct request_queue *q, const char *page,
+				  size_t count)
+{
+	unsigned long val;
+	ssize_t ret = queue_var_store(&val, page, count);
+	spin_lock_irq(q->queue_lock);
+	if (val)
+		queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+	spin_unlock_irq(q->queue_lock);
+	return ret;
+}
 static ssize_t queue_iostats_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(blk_queue_io_stat(q), page);
 }
 static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
 				   size_t count)
 {
 	unsigned long stats;
 	ssize_t ret = queue_var_store(&stats, page, count);
 	spin_lock_irq(q->queue_lock);
 	if (stats)
 		queue_flag_set(QUEUE_FLAG_IO_STAT, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
 	.store = queue_requests_store,
 };
 static struct queue_sysfs_entry queue_ra_entry = {
 	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_ra_show,
 	.store = queue_ra_store,
 };
 static struct queue_sysfs_entry queue_max_sectors_entry = {
 	.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_max_sectors_show,
 	.store = queue_max_sectors_store,
 };
 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
 	.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
 	.show = queue_max_hw_sectors_show,
 };
 static struct queue_sysfs_entry queue_max_segments_entry = {
 	.attr = {.name = "max_segments", .mode = S_IRUGO },
 	.show = queue_max_segments_show,
 };
 static struct queue_sysfs_entry queue_max_segment_size_entry = {
 	.attr = {.name = "max_segment_size", .mode = S_IRUGO },
 	.show = queue_max_segment_size_show,
 };
 static struct queue_sysfs_entry queue_iosched_entry = {
 	.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
 	.show = elv_iosched_show,
 	.store = elv_iosched_store,
 };
 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
 	.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
 	.show = queue_logical_block_size_show,
 };
 static struct queue_sysfs_entry queue_logical_block_size_entry = {
 	.attr = {.name = "logical_block_size", .mode = S_IRUGO },
 	.show = queue_logical_block_size_show,
 };
 static struct queue_sysfs_entry queue_physical_block_size_entry = {
 	.attr = {.name = "physical_block_size", .mode = S_IRUGO },
 	.show = queue_physical_block_size_show,
 };
 static struct queue_sysfs_entry queue_io_min_entry = {
 	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
 	.show = queue_io_min_show,
 };
 static struct queue_sysfs_entry queue_io_opt_entry = {
 	.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
 	.show = queue_io_opt_show,
 };
 static struct queue_sysfs_entry queue_discard_granularity_entry = {
 	.attr = {.name = "discard_granularity", .mode = S_IRUGO },
 	.show = queue_discard_granularity_show,
 };
 static struct queue_sysfs_entry queue_discard_max_entry = {
 	.attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
 	.show = queue_discard_max_show,
 };
 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
 	.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
 	.show = queue_discard_zeroes_data_show,
 };
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nonrot_show,
 	.store = queue_nonrot_store,
 };
 static struct queue_sysfs_entry queue_nomerges_entry = {
 	.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nomerges_show,
 	.store = queue_nomerges_store,
 };
 static struct queue_sysfs_entry queue_rq_affinity_entry = {
 	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_rq_affinity_show,
 	.store = queue_rq_affinity_store,
 };
 static struct queue_sysfs_entry queue_iostats_entry = {
 	.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_iostats_show,
 	.store = queue_iostats_store,
 };
+static struct queue_sysfs_entry queue_random_entry = {
+	.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_random_show,
+	.store = queue_random_store,
+};
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
 	&queue_max_hw_sectors_entry.attr,
 	&queue_max_sectors_entry.attr,
 	&queue_max_segments_entry.attr,
 	&queue_max_segment_size_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_logical_block_size_entry.attr,
 	&queue_physical_block_size_entry.attr,
 	&queue_io_min_entry.attr,
 	&queue_io_opt_entry.attr,
 	&queue_discard_granularity_entry.attr,
 	&queue_discard_max_entry.attr,
 	&queue_discard_zeroes_data_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
+	&queue_random_entry.attr,
 	NULL,
 };
 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
 static ssize_t
 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 {
 	struct queue_sysfs_entry *entry = to_queue(attr);
 	struct request_queue *q =
 		container_of(kobj, struct request_queue, kobj);
 	ssize_t res;
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
 	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
 	res = entry->show(q, page);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
 static ssize_t
 queue_attr_store(struct kobject *kobj, struct attribute *attr,
 		    const char *page, size_t length)
 {
 	struct queue_sysfs_entry *entry = to_queue(attr);
 	struct request_queue *q;
 	ssize_t res;
 	if (!entry->store)
 		return -EIO;
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
 	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
 	res = entry->store(q, page, length);
 	mutex_unlock(&q->sysfs_lock);
 	return res;
 }
 /**
  * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
  * @kobj:    the kobj belonging of the request queue to be released
  *
  * Description:
  *     blk_cleanup_queue is the pair to blk_init_queue() or
  *     blk_queue_make_request().  It should be called when a request queue is
  *     being released; typically when a block device is being de-registered.
  *     Currently, its primary task it to free all the &struct request
  *     structures that were allocated to the queue and the queue itself.
  *
  * Caveat:
  *     Hopefully the low level driver will have finished any
  *     outstanding requests first...
  **/
 static void blk_release_queue(struct kobject *kobj)
 {
 	struct request_queue *q =
 		container_of(kobj, struct request_queue, kobj);
 	struct request_list *rl = &q->rq;
 	blk_sync_queue(q);
 	if (rl->rq_pool)
 		mempool_destroy(rl->rq_pool);
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 	blk_trace_shutdown(q);
 	bdi_destroy(&q->backing_dev_info);
 	kmem_cache_free(blk_requestq_cachep, q);
 }
 static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
 };
 struct kobj_type blk_queue_ktype = {
 	.sysfs_ops	= &queue_sysfs_ops,
 	.default_attrs	= default_attrs,
 	.release	= blk_release_queue,
 };
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
 	struct device *dev = disk_to_dev(disk);
 	struct request_queue *q = disk->queue;
 	if (WARN_ON(!q))
 		return -ENXIO;
 	ret = blk_trace_init_sysfs(dev);
 	if (ret)
 		return ret;
 	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
 	if (ret < 0)
 		return ret;
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 	if (!q->request_fn)
 		return 0;
 	ret = elv_register_queue(q);
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
 		blk_trace_remove_sysfs(disk_to_dev(disk));
 		return ret;
 	}
 	return 0;
 }
 void blk_unregister_queue(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 	if (WARN_ON(!q))
 		return;
 	if (q->request_fn)
 		elv_unregister_queue(q);
 	kobject_uevent(&q->kobj, KOBJ_REMOVE);
 	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 	kobject_put(&disk_to_dev(disk)->kobj);
 }

 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 #ifdef CONFIG_BLOCK
 #include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include <linux/gfp.h>
 #include <linux/bsg.h>
 #include <linux/smp.h>
 #include <asm/scatterlist.h>
 struct scsi_ioctl_command;
 struct request_queue;
 struct elevator_queue;
 struct request_pm_state;
 struct blk_trace;
 struct request;
 struct sg_io_hdr;
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);
 struct request_list {
 	/*
 	 * count[], starved[], and wait[] are indexed by
 	 * BLK_RW_SYNC/BLK_RW_ASYNC
 	 */
 	int count[2];
 	int starved[2];
 	int elvpriv;
 	mempool_t *rq_pool;
 	wait_queue_head_t wait[2];
 };
 /*
  * request command types
  */
 enum rq_cmd_type_bits {
 	REQ_TYPE_FS		= 1,	/* fs request */
 	REQ_TYPE_BLOCK_PC,		/* scsi command */
 	REQ_TYPE_SENSE,			/* sense request */
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
 	REQ_TYPE_SPECIAL,		/* driver defined type */
 	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
 	/*
 	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
 	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
 	 * private REQ_LB opcodes to differentiate what type of request this is
 	 */
 	REQ_TYPE_ATA_TASKFILE,
 	REQ_TYPE_ATA_PC,
 };
 /*
  * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
  * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
  * SCSI cdb.
  *
  * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
  * typically to differentiate REQ_TYPE_SPECIAL requests.
  *
  */
 enum {
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
 	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
 };
 /*
  * request type modified bits. first four bits match BIO_RW* bits, important
  */
 enum rq_flag_bits {
 	__REQ_RW,		/* not set, read. set, write */
 	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
 	/* above flags must match BIO_RW_* */
 	__REQ_DISCARD,		/* request to discard sectors */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
 	__REQ_HARDBARRIER,	/* may not be passed by drive either */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_STARTED,		/* drive already may have started this one */
 	__REQ_DONTPREP,		/* don't call prep for this one */
 	__REQ_QUEUED,		/* uses queueing */
 	__REQ_ELVPRIV,		/* elevator private data attached */
 	__REQ_FAILED,		/* set if the request failed */
 	__REQ_QUIET,		/* don't worry about errors */
 	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_RW_SYNC,		/* request is sync (sync write or read) */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_RW_META,		/* metadata io request */
 	__REQ_COPY_USER,	/* contains copies of user pages */
 	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
 	__REQ_NOIDLE,		/* Don't anticipate more IO after this one */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_NR_BITS,		/* stops here */
 };
 #define REQ_RW		(1 << __REQ_RW)
 #define REQ_FAILFAST_DEV	(1 << __REQ_FAILFAST_DEV)
 #define REQ_FAILFAST_TRANSPORT	(1 << __REQ_FAILFAST_TRANSPORT)
 #define REQ_FAILFAST_DRIVER	(1 << __REQ_FAILFAST_DRIVER)
 #define REQ_DISCARD	(1 << __REQ_DISCARD)
 #define REQ_SORTED	(1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
 #define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
 #define REQ_FUA		(1 << __REQ_FUA)
 #define REQ_NOMERGE	(1 << __REQ_NOMERGE)
 #define REQ_STARTED	(1 << __REQ_STARTED)
 #define REQ_DONTPREP	(1 << __REQ_DONTPREP)
 #define REQ_QUEUED	(1 << __REQ_QUEUED)
 #define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
 #define REQ_FAILED	(1 << __REQ_FAILED)
 #define REQ_QUIET	(1 << __REQ_QUIET)
 #define REQ_PREEMPT	(1 << __REQ_PREEMPT)
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
 #define REQ_ALLOCED	(1 << __REQ_ALLOCED)
 #define REQ_RW_META	(1 << __REQ_RW_META)
 #define REQ_COPY_USER	(1 << __REQ_COPY_USER)
 #define REQ_INTEGRITY	(1 << __REQ_INTEGRITY)
 #define REQ_NOIDLE	(1 << __REQ_NOIDLE)
 #define REQ_IO_STAT	(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE	(1 << __REQ_MIXED_MERGE)
 #define REQ_FAILFAST_MASK	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
 				 REQ_FAILFAST_DRIVER)
 #define BLK_MAX_CDB	16
 /*
  * try to put the fields that are referenced together in the same cacheline.
  * if you modify this structure, be sure to check block/blk-core.c:rq_init()
  * as well!
  */
 struct request {
 	struct list_head queuelist;
 	struct call_single_data csd;
 	struct request_queue *q;
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 	int cpu;
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
 	sector_t __sector;		/* sector cursor */
 	struct bio *bio;
 	struct bio *biotail;
 	struct hlist_node hash;	/* merge hash */
 	/*
 	 * The rb_node is only used inside the io scheduler, requests
 	 * are pruned when moved to the dispatch queue. So let the
 	 * completion_data share space with the rb_node.
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
 		void *completion_data;
 	};
 	/*
 	 * Three pointers are available for the IO schedulers, if they need
 	 * more they have to dynamically allocate it.
 	 */
 	void *elevator_private;
 	void *elevator_private2;
 	void *elevator_private3;
 	struct gendisk *rq_disk;
 	unsigned long start_time;
 #ifdef CONFIG_BLK_CGROUP
 	unsigned long long start_time_ns;
 	unsigned long long io_start_time_ns;    /* when passed to hardware */
 #endif
 	/* Number of scatter-gather DMA addr+len pairs after
 	 * physical address coalescing is performed.
 	 */
 	unsigned short nr_phys_segments;
 	unsigned short ioprio;
 	int ref_count;
 	void *special;		/* opaque pointer available for LLD use */
 	char *buffer;		/* kaddr of the current segment if available */
 	int tag;
 	int errors;
 	/*
 	 * when request is used as a packet command carrier
 	 */
 	unsigned char __cmd[BLK_MAX_CDB];
 	unsigned char *cmd;
 	unsigned short cmd_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
 	unsigned int resid_len;	/* residual count */
 	void *sense;
 	unsigned long deadline;
 	struct list_head timeout_list;
 	unsigned int timeout;
 	int retries;
 	/*
 	 * completion callback.
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
 	/* for bidi */
 	struct request *next_rq;
 };
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
 }
 /*
  * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
 {
 	/* PM state machine step value, currently driver specific */
 	int	pm_step;
 	/* requested PM state value (S1, S2, S3, S4, ...) */
 	u32	pm_state;
 	void*	data;		/* for driver use */
 };
 #include <linux/elevator.h>
 typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
 struct bio_vec;
 struct bvec_merge_data {
 	struct block_device *bi_bdev;
 	sector_t bi_sector;
 	unsigned bi_size;
 	unsigned long bi_rw;
 };
 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
 			     struct bio_vec *);
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
 	BLK_EH_HANDLED,
 	BLK_EH_RESET_TIMER,
 };
 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
 enum blk_queue_state {
 	Queue_down,
 	Queue_up,
 };
 struct blk_queue_tag {
 	struct request **tag_index;	/* map of busy tags */
 	unsigned long *tag_map;		/* bit map of free/busy tags */
 	int busy;			/* current depth */
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
 };
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
 	unsigned int		max_hw_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
 	unsigned int		physical_block_size;
 	unsigned int		alignment_offset;
 	unsigned int		io_min;
 	unsigned int		io_opt;
 	unsigned int		max_discard_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
 	unsigned short		logical_block_size;
 	unsigned short		max_segments;
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		no_cluster;
 	signed char		discard_zeroes_data;
 };
 struct request_queue
 {
 	/*
 	 * Together with queue_head for cacheline sharing
 	 */
 	struct list_head	queue_head;
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
 	/*
 	 * the queue request freelist, one for reads and one for writes
 	 */
 	struct request_list	rq;
 	request_fn_proc		*request_fn;
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
 	/*
 	 * Dispatch queue sorting
 	 */
 	sector_t		end_sector;
 	struct request		*boundary_rq;
 	/*
 	 * Auto-unplugging state
 	 */
 	struct timer_list	unplug_timer;
 	int			unplug_thresh;	/* After this many requests */
 	unsigned long		unplug_delay;	/* After this many jiffies */
 	struct work_struct	unplug_work;
 	struct backing_dev_info	backing_dev_info;
 	/*
 	 * The queue owner gets to use this for whatever they like.
 	 * ll_rw_blk doesn't touch it.
 	 */
 	void			*queuedata;
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
 	gfp_t			bounce_gfp;
 	/*
 	 * various queue flags, see QUEUE_* below
 	 */
 	unsigned long		queue_flags;
 	/*
 	 * protects queue structures from reentrancy. ->__queue_lock should
 	 * _never_ be used directly, it is queue private. always use
 	 * ->queue_lock.
 	 */
 	spinlock_t		__queue_lock;
 	spinlock_t		*queue_lock;
 	/*
 	 * queue kobject
 	 */
 	struct kobject kobj;
 	/*
 	 * queue settings
 	 */
 	unsigned long		nr_requests;	/* Max # of requests */
 	unsigned int		nr_congestion_on;
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
 	unsigned int		dma_pad_mask;
 	unsigned int		dma_alignment;
 	struct blk_queue_tag	*queue_tags;
 	struct list_head	tag_busy_list;
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 	struct queue_limits	limits;
 	/*
 	 * sg stuff
 	 */
 	unsigned int		sg_timeout;
 	unsigned int		sg_reserved_size;
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
 #endif
 	/*
 	 * reserved for flush operations
 	 */
 	unsigned int		ordered, next_ordered, ordseq;
 	int			orderr, ordcolor;
 	struct request		pre_flush_rq, bar_rq, post_flush_rq;
 	struct request		*orig_bar_rq;
 	struct mutex		sysfs_lock;
 #if defined(CONFIG_BLK_DEV_BSG)
 	struct bsg_class_device bsg_dev;
 #endif
 };
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP   11	/* force complete on same CPU */
 #define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
 #define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  18	/* Contributes to random pool */
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-				 (1 << QUEUE_FLAG_SAME_COMP))
+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
+				 (1 << QUEUE_FLAG_ADD_RANDOM))
 static inline int queue_is_locked(struct request_queue *q)
 {
 #ifdef CONFIG_SMP
 	spinlock_t *lock = q->queue_lock;
 	return lock && spin_is_locked(lock);
 #else
 	return 1;
 #endif
 }
 static inline void queue_flag_set_unlocked(unsigned int flag,
 					   struct request_queue *q)
 {
 	__set_bit(flag, &q->queue_flags);
 }
 static inline int queue_flag_test_and_clear(unsigned int flag,
 					    struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	if (test_bit(flag, &q->queue_flags)) {
 		__clear_bit(flag, &q->queue_flags);
 		return 1;
 	}
 	return 0;
 }
 static inline int queue_flag_test_and_set(unsigned int flag,
 					  struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	if (!test_bit(flag, &q->queue_flags)) {
 		__set_bit(flag, &q->queue_flags);
 		return 0;
 	}
 	return 1;
 }
 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	__set_bit(flag, &q->queue_flags);
 }
 static inline void queue_flag_clear_unlocked(unsigned int flag,
 					     struct request_queue *q)
 {
 	__clear_bit(flag, &q->queue_flags);
 }
 static inline int queue_in_flight(struct request_queue *q)
 {
 	return q->in_flight[0] + q->in_flight[1];
 }
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	__clear_bit(flag, &q->queue_flags);
 }
 enum {
 	/*
 	 * Hardbarrier is supported with one of the following methods.
 	 *
 	 * NONE		: hardbarrier unsupported
 	 * DRAIN	: ordering by draining is enough
 	 * DRAIN_FLUSH	: ordering by draining w/ pre and post flushes
 	 * DRAIN_FUA	: ordering by draining w/ pre flush and FUA write
 	 * TAG		: ordering by tag is enough
 	 * TAG_FLUSH	: ordering by tag w/ pre and post flushes
 	 * TAG_FUA	: ordering by tag w/ pre flush and FUA write
 	 */
 	QUEUE_ORDERED_BY_DRAIN		= 0x01,
 	QUEUE_ORDERED_BY_TAG		= 0x02,
 	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
 	QUEUE_ORDERED_DO_BAR		= 0x20,
 	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
 	QUEUE_ORDERED_DO_FUA		= 0x80,
 	QUEUE_ORDERED_NONE		= 0x00,
 	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN |
 					  QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
 	QUEUE_ORDERED_DRAIN_FUA		= QUEUE_ORDERED_DRAIN |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_FUA,
 	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG |
 					  QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_TAG_FLUSH		= QUEUE_ORDERED_TAG |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
 	QUEUE_ORDERED_TAG_FUA		= QUEUE_ORDERED_TAG |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_FUA,
 	/*
 	 * Ordered operation sequence
 	 */
 	QUEUE_ORDSEQ_STARTED	= 0x01,	/* flushing in progress */
 	QUEUE_ORDSEQ_DRAIN	= 0x02,	/* waiting for the queue to be drained */
 	QUEUE_ORDSEQ_PREFLUSH	= 0x04,	/* pre-flushing in progress */
 	QUEUE_ORDSEQ_BAR	= 0x08,	/* original barrier req in progress */
 	QUEUE_ORDSEQ_POSTFLUSH	= 0x10,	/* post-flushing in progress */
 	QUEUE_ORDSEQ_DONE	= 0x20,
 };
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
+#define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
 #define blk_special_request(rq)	((rq)->cmd_type == REQ_TYPE_SPECIAL)
 #define blk_sense_request(rq)	((rq)->cmd_type == REQ_TYPE_SENSE)
 #define blk_failfast_dev(rq)	((rq)->cmd_flags & REQ_FAILFAST_DEV)
 #define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT)
 #define blk_failfast_driver(rq)	((rq)->cmd_flags & REQ_FAILFAST_DRIVER)
 #define blk_noretry_request(rq)	(blk_failfast_dev(rq) ||	\
 				 blk_failfast_transport(rq) ||	\
 				 blk_failfast_driver(rq))
 #define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 #define blk_rq_io_stat(rq)	((rq)->cmd_flags & REQ_IO_STAT)
 #define blk_rq_quiet(rq)	((rq)->cmd_flags & REQ_QUIET)
 #define blk_account_rq(rq)	(blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
 #define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
 #define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
 	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 #define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)	(!list_empty(&(rq)->queuelist))
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 /*
  * We regard a request as sync, if either a read or a sync write
  */
 static inline bool rw_is_sync(unsigned int rw_flags)
 {
 	return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC);
 }
 static inline bool rq_is_sync(struct request *rq)
 {
 	return rw_is_sync(rq->cmd_flags);
 }
 #define rq_is_meta(rq)		((rq)->cmd_flags & REQ_RW_META)
 #define rq_noidle(rq)		((rq)->cmd_flags & REQ_NOIDLE)
 static inline int blk_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
 		return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
 	return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
 }
 static inline void blk_set_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
 		queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
 	else
 		queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
 }
 static inline void blk_clear_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
 		queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
 }
 /*
  * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (blk_discard_rq(rq) || blk_fs_request((rq))))
 /*
  * q->prep_rq_fn return values
  */
 #define BLKPREP_OK		0	/* serve it */
 #define BLKPREP_KILL		1	/* fatal error, kill */
 #define BLKPREP_DEFER		2	/* leave on queue */
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 /*
  * standard bounce addresses:
  *
  * BLK_BOUNCE_HIGH	: bounce all highmem pages
  * BLK_BOUNCE_ANY	: don't bounce anything
  * BLK_BOUNCE_ISA	: bounce pages above ISA DMA boundary
  */
 #if BITS_PER_LONG == 32
 #define BLK_BOUNCE_HIGH		((u64)blk_max_low_pfn << PAGE_SHIFT)
 #else
 #define BLK_BOUNCE_HIGH		-1ULL
 #endif
 #define BLK_BOUNCE_ANY		(-1ULL)
 #define BLK_BOUNCE_ISA		(ISA_DMA_THRESHOLD)
 /*
  * default timeout for SG_IO if none specified
  */
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 #ifdef CONFIG_BOUNCE
 extern int init_emergency_isa_pool(void);
 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
 #else
 static inline int init_emergency_isa_pool(void)
 {
 	return 0;
 }
 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 {
 }
 #endif /* CONFIG_MMU */
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
 	int nr_entries;
 	unsigned long offset;
 	int null_mapped;
 	int from_user;
 };
 struct req_iterator {
 	int i;
 	struct bio *bio;
 };
 /* This should not be used directly - use rq_for_each_segment */
 #define for_each_bio(_bio)		\
 	for (; _bio; _bio = _bio->bi_next)
 #define __rq_for_each_bio(_bio, rq)	\
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 #define rq_for_each_segment(bvl, _rq, _iter)			\
 	__rq_for_each_bio(_iter.bio, _rq)			\
 		bio_for_each_segment(bvl, _iter.bio, _iter.i)
 #define rq_iter_last(rq, _iter)					\
 		(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 # error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
 #endif
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 extern void rq_flush_dcache_pages(struct request *rq);
 #else
 static inline void rq_flush_dcache_pages(struct request *rq)
 {
 }
 #endif
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
 extern void generic_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
 			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 			     void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_plug_device(struct request_queue *);
 extern void blk_plug_device_unlocked(struct request_queue *);
 extern int blk_remove_plug(struct request_queue *);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			  unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
 static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
 {
 	clear_bdi_congested(&q->backing_dev_info, sync);
 }
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
 static inline void blk_set_queue_congested(struct request_queue *q, int sync)
 {
 	set_bdi_congested(&q->backing_dev_info, sync);
 }
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
 extern int blk_rq_unmap_user(struct bio *);
 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
 			       struct rq_map_data *, struct sg_iovec *, int,
 			       unsigned int, gfp_t);
 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 extern void blk_unplug(struct request_queue *q);
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue;
 }
 /*
  * blk_rq_pos()			: the current sector
  * blk_rq_bytes()		: bytes left in the entire request
  * blk_rq_cur_bytes()		: bytes left in the current segment
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
 	return rq->__sector;
 }
 static inline unsigned int blk_rq_bytes(const struct request *rq)
 {
 	return rq->__data_len;
 }
 static inline int blk_rq_cur_bytes(const struct request *rq)
 {
 	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
 }
 extern unsigned int blk_rq_err_bytes(const struct request *rq);
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
 	return blk_rq_bytes(rq) >> 9;
 }
 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 {
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 /*
  * Request issue related functions.
  */
 extern struct request *blk_peek_request(struct request_queue *q);
 extern void blk_start_request(struct request *rq);
 extern struct request *blk_fetch_request(struct request_queue *q);
 /*
  * Request completion related functions.
  *
  * blk_update_request() completes given number of bytes and updates
  * the request without completing it.
  *
  * blk_end_request() and friends.  __blk_end_request() must be called
  * with the request queue spinlock acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
 extern bool blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
 extern bool blk_end_request(struct request *rq, int error,
 			    unsigned int nr_bytes);
 extern void blk_end_request_all(struct request *rq, int error);
 extern bool blk_end_request_cur(struct request *rq, int error);
 extern bool blk_end_request_err(struct request *rq, int error);
 extern bool __blk_end_request(struct request *rq, int error,
 			      unsigned int nr_bytes);
 extern void __blk_end_request_all(struct request *rq, int error);
 extern bool __blk_end_request_cur(struct request *rq, int error);
 extern bool __blk_end_request_err(struct request *rq, int error);
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
 /*
  * Access functions for manipulating queue properties
  */
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
 extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
 							   request_fn_proc *,
 							   spinlock_t *, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 						      request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
 			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
 extern int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size);
 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
 extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
 extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern void generic_unplug_device(struct request_queue *);
 extern long nr_blockdev_pages(void);
 int blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 /*
  * tag stuff
  */
 #define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
 extern void blk_queue_free_tags(struct request_queue *);
 extern int blk_queue_resize_tags(struct request_queue *, int);
 extern void blk_queue_invalidate_tags(struct request_queue *);
 extern struct blk_queue_tag *blk_init_tags(int);
 extern void blk_free_tags(struct blk_queue_tag *);
 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 						int tag)
 {
 	if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
 		return NULL;
 	return bqt->tag_index[tag];
 }
 enum{
 	BLKDEV_WAIT,	/* wait for completion */
 	BLKDEV_BARRIER,	/*issue request with barrier */
 };
 #define BLKDEV_IFL_WAIT		(1 << BLKDEV_WAIT)
 #define BLKDEV_IFL_BARRIER	(1 << BLKDEV_BARRIER)
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
 			unsigned long);
 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 static inline int sb_issue_discard(struct super_block *sb,
 				   sector_t block, sector_t nr_blocks)
 {
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
 				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 }
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 enum blk_default_limits {
 	BLK_MAX_SEGMENTS	= 128,
 	BLK_SAFE_MAX_SECTORS	= 255,
 	BLK_DEF_MAX_SECTORS	= 1024,
 	BLK_MAX_SEGMENT_SIZE	= 65536,
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 static inline unsigned long queue_bounce_pfn(struct request_queue *q)
 {
 	return q->limits.bounce_pfn;
 }
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
 }
 static inline unsigned int queue_max_sectors(struct request_queue *q)
 {
 	return q->limits.max_sectors;
 }
 static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
 {
 	return q->limits.max_hw_sectors;
 }
 static inline unsigned short queue_max_segments(struct request_queue *q)
 {
 	return q->limits.max_segments;
 }
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
 {
 	return q->limits.max_segment_size;
 }
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 	if (q && q->limits.logical_block_size)
 		retval = q->limits.logical_block_size;
 	return retval;
 }
 static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 {
 	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 static inline unsigned int queue_physical_block_size(struct request_queue *q)
 {
 	return q->limits.physical_block_size;
 }
 static inline int bdev_physical_block_size(struct block_device *bdev)
 {
 	return queue_physical_block_size(bdev_get_queue(bdev));
 }
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
 	return q->limits.io_min;
 }
 static inline int bdev_io_min(struct block_device *bdev)
 {
 	return queue_io_min(bdev_get_queue(bdev));
 }
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
 static inline int bdev_io_opt(struct block_device *bdev)
 {
 	return queue_io_opt(bdev_get_queue(bdev));
 }
 static inline int queue_alignment_offset(struct request_queue *q)
 {
 	if (q->limits.misaligned)
 		return -1;
 	return q->limits.alignment_offset;
 }
 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
 	unsigned int alignment = (sector << 9) & (granularity - 1);
 	return (granularity + lim->alignment_offset - alignment)
 		& (granularity - 1);
 }
 static inline int bdev_alignment_offset(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	if (q->limits.misaligned)
 		return -1;
 	if (bdev != bdev->bd_contains)
 		return bdev->bd_part->alignment_offset;
 	return q->limits.alignment_offset;
 }
 static inline int queue_discard_alignment(struct request_queue *q)
 {
 	if (q->limits.discard_misaligned)
 		return -1;
 	return q->limits.discard_alignment;
 }
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
 		& (lim->discard_granularity - 1);
 }
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
 	if (q->limits.discard_zeroes_data == 1)
 		return 1;
 	return 0;
 }
 static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
 {
 	return queue_discard_zeroes_data(bdev_get_queue(bdev));
 }
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
 }
 static inline int blk_rq_aligned(struct request_queue *q, void *addr,
 				 unsigned int len)
 {
 	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
 	return !((unsigned long)addr & alignment) && !(len & alignment);
 }
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
 	unsigned int bits = 8;
 	do {
 		bits++;
 		size >>= 1;
 	} while (size > 256);
 	return bits;
 }
 static inline unsigned int block_size(struct block_device *bdev)
 {
 	return bdev->bd_block_size;
 }
 typedef struct {struct page *v;} Sector;
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
 static inline void put_dev_sector(Sector p)
 {
 	page_cache_release(p.v);
 }
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
 #ifdef CONFIG_BLK_CGROUP
 /*
  * This should not be using sched_clock(). A real patch is in progress
  * to fix this up, until that is in place we need to disable preemption
  * around sched_clock() in this function and set_io_start_time_ns().
  */
 static inline void set_start_time_ns(struct request *req)
 {
 	preempt_disable();
 	req->start_time_ns = sched_clock();
 	preempt_enable();
 }
 static inline void set_io_start_time_ns(struct request *req)
 {
 	preempt_disable();
 	req->io_start_time_ns = sched_clock();
 	preempt_enable();
 }
 static inline uint64_t rq_start_time_ns(struct request *req)
 {
         return req->start_time_ns;
 }
 static inline uint64_t rq_io_start_time_ns(struct request *req)
 {
         return req->io_start_time_ns;
 }
 #else
 static inline void set_start_time_ns(struct request *req) {}
 static inline void set_io_start_time_ns(struct request *req) {}
 static inline uint64_t rq_start_time_ns(struct request *req)
 {
 	return 0;
 }
 static inline uint64_t rq_io_start_time_ns(struct request *req)
 {
 	return 0;
 }
 #endif
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 #define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
 #define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
 struct blk_integrity_exchg {
 	void			*prot_buf;
 	void			*data_buf;
 	sector_t		sector;
 	unsigned int		data_size;
 	unsigned short		sector_size;
 	const char		*disk_name;
 };
 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
 struct blk_integrity {
 	integrity_gen_fn	*generate_fn;
 	integrity_vrfy_fn	*verify_fn;
 	integrity_set_tag_fn	*set_tag_fn;
 	integrity_get_tag_fn	*get_tag_fn;
 	unsigned short		flags;
 	unsigned short		tuple_size;
 	unsigned short		sector_size;
 	unsigned short		tag_size;
 	const char		*name;
 	struct kobject		kobj;
 };
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request *);
 static inline
 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
 {
 	return bdev->bd_disk->integrity;
 }
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
 	return disk->integrity;
 }
 static inline int blk_integrity_rq(struct request *rq)
 {
 	if (rq->bio == NULL)
 		return 0;
 	return bio_integrity(rq->bio);
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 #define blk_integrity_rq(rq)			(0)
 #define blk_rq_count_integrity_sg(a)		(0)
 #define blk_rq_map_integrity_sg(a, b)		(0)
 #define bdev_get_integrity(a)			(0)
 #define blk_get_integrity(a)			(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
 #define blk_integrity_unregister(a)		do { } while (0);
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	int (*release) (struct gendisk *, fmode_t);
 	int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*direct_access) (struct block_device *, sector_t,
 						void **, unsigned long *);
 	int (*media_changed) (struct gendisk *);
 	void (*unlock_native_capacity) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	/* this callback is with swap_lock and sometimes page table lock held */
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	struct module *owner;
 };
 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 				 unsigned long);
 #else /* CONFIG_BLOCK */
 /*
  * stubs for when the block layer is configured out
  */
 #define buffer_heads_over_limit 0
 static inline long nr_blockdev_pages(void)
 {
 	return 0;
 }
 #endif /* CONFIG_BLOCK */
 #endif