Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

3

4

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

7

* - July2000

7

* - July2000

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

9

*/

9

*/

10

11

/*

11

/*

12

* This handles all read/write requests to block devices

12

* This handles all read/write requests to block devices

13

*/

13

*/

14

#include <linux/kernel.h>

14

#include <linux/kernel.h>

15

#include <linux/module.h>

15

#include <linux/module.h>

16

#include <linux/backing-dev.h>

16

#include <linux/backing-dev.h>

17

#include <linux/bio.h>

17

#include <linux/bio.h>

18

#include <linux/blkdev.h>

18

#include <linux/blkdev.h>

19

#include <linux/highmem.h>

19

#include <linux/highmem.h>

20

#include <linux/mm.h>

20

#include <linux/mm.h>

21

#include <linux/kernel_stat.h>

21

#include <linux/kernel_stat.h>

22

#include <linux/string.h>

22

#include <linux/string.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/completion.h>

24

#include <linux/completion.h>

25

#include <linux/slab.h>

25

#include <linux/slab.h>

26

#include <linux/swap.h>

26

#include <linux/swap.h>

27

#include <linux/writeback.h>

27

#include <linux/writeback.h>

28

#include <linux/task_io_accounting_ops.h>

28

#include <linux/task_io_accounting_ops.h>

29

#include <linux/blktrace_api.h>

29

#include <linux/blktrace_api.h>

30

#include <linux/fault-inject.h>

30

#include <linux/fault-inject.h>

31

32

#include "blk.h"

32

#include "blk.h"

33

34

static int __make_request(struct request_queue *q, struct bio *bio);

34

static int __make_request(struct request_queue *q, struct bio *bio);

35

36

/*

36

/*

37

* For the allocated request tables

37

* For the allocated request tables

38

*/

38

*/

39

static struct kmem_cache *request_cachep;

39

static struct kmem_cache *request_cachep;

40

41

/*

41

/*

42

* For queue allocation

42

* For queue allocation

43

*/

43

*/

44

struct kmem_cache *blk_requestq_cachep;

44

struct kmem_cache *blk_requestq_cachep;

45

46

/*

46

/*

47

* Controlling structure to kblockd

47

* Controlling structure to kblockd

48

*/

48

*/

49

static struct workqueue_struct *kblockd_workqueue;

49

static struct workqueue_struct *kblockd_workqueue;

50

51

static void drive_stat_acct(struct request *rq, int new_io)

51

static void drive_stat_acct(struct request *rq, int new_io)

52

{

52

{

53

struct hd_struct *part;

53

struct hd_struct *part;

54

int rw = rq_data_dir(rq);

54

int rw = rq_data_dir(rq);

55

int cpu;

55

int cpu;

56

57

if (!blk_fs_request(rq) || !rq->rq_disk)

57

if (!blk_fs_request(rq) || !rq->rq_disk)

58

return;

58

return;

59

60

cpu = part_stat_lock();

60

cpu = part_stat_lock();

61

part = disk_map_sector_rcu(rq->rq_disk, rq->sector);

61

part = disk_map_sector_rcu(rq->rq_disk, rq->sector);

62

63

if (!new_io)

63

if (!new_io)

64

part_stat_inc(cpu, part, merges[rw]);

64

part_stat_inc(cpu, part, merges[rw]);

65

else {

65

else {

66

part_round_stats(cpu, part);

66

part_round_stats(cpu, part);

67

part_inc_in_flight(part);

67

part_inc_in_flight(part);

68

}

68

}

69

70

part_stat_unlock();

70

part_stat_unlock();

71

}

71

}

72

73

void blk_queue_congestion_threshold(struct request_queue *q)

73

void blk_queue_congestion_threshold(struct request_queue *q)

74

{

74

{

75

int nr;

75

int nr;

76

77

nr = q->nr_requests - (q->nr_requests / 8) + 1;

77

nr = q->nr_requests - (q->nr_requests / 8) + 1;

78

if (nr > q->nr_requests)

78

if (nr > q->nr_requests)

79

nr = q->nr_requests;

79

nr = q->nr_requests;

80

q->nr_congestion_on = nr;

80

q->nr_congestion_on = nr;

81

82

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

82

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

83

if (nr < 1)

83

if (nr < 1)

84

nr = 1;

84

nr = 1;

85

q->nr_congestion_off = nr;

85

q->nr_congestion_off = nr;

86

}

86

}

87

88

/**

88

/**

89

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

89

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

90

* @bdev: device

90

* @bdev: device

91

*

91

*

92

* Locates the passed device's request queue and returns the address of its

92

* Locates the passed device's request queue and returns the address of its

93

* backing_dev_info

93

* backing_dev_info

94

*

94

*

95

* Will return NULL if the request queue cannot be located.

95

* Will return NULL if the request queue cannot be located.

96

*/

96

*/

97

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

97

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

98

{

98

{

99

struct backing_dev_info *ret = NULL;

99

struct backing_dev_info *ret = NULL;

100

struct request_queue *q = bdev_get_queue(bdev);

100

struct request_queue *q = bdev_get_queue(bdev);

101

102

if (q)

102

if (q)

103

ret = &q->backing_dev_info;

103

ret = &q->backing_dev_info;

104

return ret;

104

return ret;

105

}

105

}

106

EXPORT_SYMBOL(blk_get_backing_dev_info);

106

EXPORT_SYMBOL(blk_get_backing_dev_info);

107

108

void blk_rq_init(struct request_queue *q, struct request *rq)

108

void blk_rq_init(struct request_queue *q, struct request *rq)

109

{

109

{

110

memset(rq, 0, sizeof(*rq));

110

memset(rq, 0, sizeof(*rq));

111

112

INIT_LIST_HEAD(&rq->queuelist);

112

INIT_LIST_HEAD(&rq->queuelist);

113

INIT_LIST_HEAD(&rq->timeout_list);

113

INIT_LIST_HEAD(&rq->timeout_list);

114

rq->cpu = -1;

114

rq->cpu = -1;

115

rq->q = q;

115

rq->q = q;

116

rq->sector = rq->hard_sector = (sector_t) -1;

116

rq->sector = rq->hard_sector = (sector_t) -1;

117

INIT_HLIST_NODE(&rq->hash);

117

INIT_HLIST_NODE(&rq->hash);

118

RB_CLEAR_NODE(&rq->rb_node);

118

RB_CLEAR_NODE(&rq->rb_node);

119

rq->cmd = rq->__cmd;

119

rq->cmd = rq->__cmd;

120

rq->tag = -1;

120

rq->tag = -1;

121

rq->ref_count = 1;

121

rq->ref_count = 1;

122

}

122

}

123

EXPORT_SYMBOL(blk_rq_init);

123

EXPORT_SYMBOL(blk_rq_init);

124

125

static void req_bio_endio(struct request *rq, struct bio *bio,

125

static void req_bio_endio(struct request *rq, struct bio *bio,

126

unsigned int nbytes, int error)

126

unsigned int nbytes, int error)

127

{

127

{

128

struct request_queue *q = rq->q;

128

struct request_queue *q = rq->q;

129

130

if (&q->bar_rq != rq) {

130

if (&q->bar_rq != rq) {

131

if (error)

131

if (error)

132

clear_bit(BIO_UPTODATE, &bio->bi_flags);

132

clear_bit(BIO_UPTODATE, &bio->bi_flags);

133

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

133

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

134

error = -EIO;

134

error = -EIO;

135

136

if (unlikely(nbytes > bio->bi_size)) {

136

if (unlikely(nbytes > bio->bi_size)) {

137

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

137

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

138

__func__, nbytes, bio->bi_size);

138

__func__, nbytes, bio->bi_size);

139

nbytes = bio->bi_size;

139

nbytes = bio->bi_size;

140

}

140

}

141

142

bio->bi_size -= nbytes;

142

bio->bi_size -= nbytes;

143

bio->bi_sector += (nbytes >> 9);

143

bio->bi_sector += (nbytes >> 9);

144

145

if (bio_integrity(bio))

145

if (bio_integrity(bio))

146

bio_integrity_advance(bio, nbytes);

146

bio_integrity_advance(bio, nbytes);

147

148

if (bio->bi_size == 0)

148

if (bio->bi_size == 0)

149

bio_endio(bio, error);

149

bio_endio(bio, error);

150

} else {

150

} else {

151

152

/*

152

/*

153

* Okay, this is the barrier request in progress, just

153

* Okay, this is the barrier request in progress, just

154

* record the error;

154

* record the error;

155

*/

155

*/

156

if (error && !q->orderr)

156

if (error && !q->orderr)

157

q->orderr = error;

157

q->orderr = error;

158

}

158

}

159

}

159

}

160

161

void blk_dump_rq_flags(struct request *rq, char *msg)

161

void blk_dump_rq_flags(struct request *rq, char *msg)

162

{

162

{

163

int bit;

163

int bit;

164

165

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

165

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

166

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

166

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

167

rq->cmd_flags);

167

rq->cmd_flags);

168

169

printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n",

169

printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n",

170

(unsigned long long)rq->sector,

170

(unsigned long long)rq->sector,

171

rq->nr_sectors,

171

rq->nr_sectors,

172

rq->current_nr_sectors);

172

rq->current_nr_sectors);

173

printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n",

173

printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n",

174

rq->bio, rq->biotail,

174

rq->bio, rq->biotail,

175

rq->buffer, rq->data,

175

rq->buffer, rq->data,

176

rq->data_len);

176

rq->data_len);

177

178

if (blk_pc_request(rq)) {

178

if (blk_pc_request(rq)) {

179

printk(KERN_INFO " cdb: ");

179

printk(KERN_INFO " cdb: ");

180

for (bit = 0; bit < BLK_MAX_CDB; bit++)

180

for (bit = 0; bit < BLK_MAX_CDB; bit++)

181

printk("%02x ", rq->cmd[bit]);

181

printk("%02x ", rq->cmd[bit]);

182

printk("\n");

182

printk("\n");

183

}

183

}

184

}

184

}

185

EXPORT_SYMBOL(blk_dump_rq_flags);

185

EXPORT_SYMBOL(blk_dump_rq_flags);

186

187

/*

187

/*

188

* "plug" the device if there are no outstanding requests: this will

188

* "plug" the device if there are no outstanding requests: this will

189

* force the transfer to start only after we have put all the requests

189

* force the transfer to start only after we have put all the requests

190

* on the list.

190

* on the list.

191

*

191

*

192

* This is called with interrupts off and no requests on the queue and

192

* This is called with interrupts off and no requests on the queue and

193

* with the queue lock held.

193

* with the queue lock held.

194

*/

194

*/

195

void blk_plug_device(struct request_queue *q)

195

void blk_plug_device(struct request_queue *q)

196

{

196

{

197

WARN_ON(!irqs_disabled());

197

WARN_ON(!irqs_disabled());

198

199

/*

199

/*

200

* don't plug a stopped queue, it must be paired with blk_start_queue()

200

* don't plug a stopped queue, it must be paired with blk_start_queue()

201

* which will restart the queueing

201

* which will restart the queueing

202

*/

202

*/

203

if (blk_queue_stopped(q))

203

if (blk_queue_stopped(q))

204

return;

204

return;

205

206

if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {

206

if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {

207

mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

207

mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

208

blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);

208

blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);

209

}

209

}

210

}

210

}

211

EXPORT_SYMBOL(blk_plug_device);

211

EXPORT_SYMBOL(blk_plug_device);

212

213

/**

213

/**

214

* blk_plug_device_unlocked - plug a device without queue lock held

214

* blk_plug_device_unlocked - plug a device without queue lock held

215

* @q: The &struct request_queue to plug

215

* @q: The &struct request_queue to plug

216

*

216

*

217

* Description:

217

* Description:

218

* Like @blk_plug_device(), but grabs the queue lock and disables

218

* Like @blk_plug_device(), but grabs the queue lock and disables

219

* interrupts.

219

* interrupts.

220

**/

220

**/

221

void blk_plug_device_unlocked(struct request_queue *q)

221

void blk_plug_device_unlocked(struct request_queue *q)

222

{

222

{

223

unsigned long flags;

223

unsigned long flags;

224

225

spin_lock_irqsave(q->queue_lock, flags);

225

spin_lock_irqsave(q->queue_lock, flags);

226

blk_plug_device(q);

226

blk_plug_device(q);

227

spin_unlock_irqrestore(q->queue_lock, flags);

227

spin_unlock_irqrestore(q->queue_lock, flags);

228

}

228

}

229

EXPORT_SYMBOL(blk_plug_device_unlocked);

229

EXPORT_SYMBOL(blk_plug_device_unlocked);

230

231

/*

231

/*

232

* remove the queue from the plugged list, if present. called with

232

* remove the queue from the plugged list, if present. called with

233

* queue lock held and interrupts disabled.

233

* queue lock held and interrupts disabled.

234

*/

234

*/

235

int blk_remove_plug(struct request_queue *q)

235

int blk_remove_plug(struct request_queue *q)

236

{

236

{

237

WARN_ON(!irqs_disabled());

237

WARN_ON(!irqs_disabled());

238

239

if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))

239

if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))

240

return 0;

240

return 0;

241

242

del_timer(&q->unplug_timer);

242

del_timer(&q->unplug_timer);

243

return 1;

243

return 1;

244

}

244

}

245

EXPORT_SYMBOL(blk_remove_plug);

245

EXPORT_SYMBOL(blk_remove_plug);

246

247

/*

247

/*

248

* remove the plug and let it rip..

248

* remove the plug and let it rip..

249

*/

249

*/

250

void __generic_unplug_device(struct request_queue *q)

250

void __generic_unplug_device(struct request_queue *q)

251

{

251

{

252

if (unlikely(blk_queue_stopped(q)))

252

if (unlikely(blk_queue_stopped(q)))

253

return;

253

return;

254

255

if (!blk_remove_plug(q))

255

if (!blk_remove_plug(q))

256

return;

256

return;

257

258

q->request_fn(q);

258

q->request_fn(q);

259

}

259

}

260

EXPORT_SYMBOL(__generic_unplug_device);

260

EXPORT_SYMBOL(__generic_unplug_device);

261

262

/**

262

/**

263

* generic_unplug_device - fire a request queue

263

* generic_unplug_device - fire a request queue

264

* @q: The &struct request_queue in question

264

* @q: The &struct request_queue in question

265

*

265

*

266

* Description:

266

* Description:

267

* Linux uses plugging to build bigger requests queues before letting

267

* Linux uses plugging to build bigger requests queues before letting

268

* the device have at them. If a queue is plugged, the I/O scheduler

268

* the device have at them. If a queue is plugged, the I/O scheduler

269

* is still adding and merging requests on the queue. Once the queue

269

* is still adding and merging requests on the queue. Once the queue

270

* gets unplugged, the request_fn defined for the queue is invoked and

270

* gets unplugged, the request_fn defined for the queue is invoked and

271

* transfers started.

271

* transfers started.

272

**/

272

**/

273

void generic_unplug_device(struct request_queue *q)

273

void generic_unplug_device(struct request_queue *q)

274

{

274

{

275

if (blk_queue_plugged(q)) {

275

if (blk_queue_plugged(q)) {

276

spin_lock_irq(q->queue_lock);

276

spin_lock_irq(q->queue_lock);

277

__generic_unplug_device(q);

277

__generic_unplug_device(q);

278

spin_unlock_irq(q->queue_lock);

278

spin_unlock_irq(q->queue_lock);

279

}

279

}

280

}

280

}

281

EXPORT_SYMBOL(generic_unplug_device);

281

EXPORT_SYMBOL(generic_unplug_device);

282

283

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,

283

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,

284

struct page *page)

284

struct page *page)

285

{

285

{

286

struct request_queue *q = bdi->unplug_io_data;

286

struct request_queue *q = bdi->unplug_io_data;

287

288

blk_unplug(q);

288

blk_unplug(q);

289

}

289

}

290

291

void blk_unplug_work(struct work_struct *work)

291

void blk_unplug_work(struct work_struct *work)

292

{

292

{

293

struct request_queue *q =

293

struct request_queue *q =

294

container_of(work, struct request_queue, unplug_work);

294

container_of(work, struct request_queue, unplug_work);

295

296

blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

296

blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

297

q->rq.count[READ] + q->rq.count[WRITE]);

297

q->rq.count[READ] + q->rq.count[WRITE]);

298

299

q->unplug_fn(q);

299

q->unplug_fn(q);

300

}

300

}

301

302

void blk_unplug_timeout(unsigned long data)

302

void blk_unplug_timeout(unsigned long data)

303

{

303

{

304

struct request_queue *q = (struct request_queue *)data;

304

struct request_queue *q = (struct request_queue *)data;

305

306

blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,

306

blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,

307

q->rq.count[READ] + q->rq.count[WRITE]);

307

q->rq.count[READ] + q->rq.count[WRITE]);

308

309

kblockd_schedule_work(q, &q->unplug_work);

309

kblockd_schedule_work(q, &q->unplug_work);

310

}

310

}

311

312

void blk_unplug(struct request_queue *q)

312

void blk_unplug(struct request_queue *q)

313

{

313

{

314

/*

314

/*

315

* devices don't necessarily have an ->unplug_fn defined

315

* devices don't necessarily have an ->unplug_fn defined

316

*/

316

*/

317

if (q->unplug_fn) {

317

if (q->unplug_fn) {

318

blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

318

blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

319

q->rq.count[READ] + q->rq.count[WRITE]);

319

q->rq.count[READ] + q->rq.count[WRITE]);

320

321

q->unplug_fn(q);

321

q->unplug_fn(q);

322

}

322

}

323

}

323

}

324

EXPORT_SYMBOL(blk_unplug);

324

EXPORT_SYMBOL(blk_unplug);

325

326

static void blk_invoke_request_fn(struct request_queue *q)

326

static void blk_invoke_request_fn(struct request_queue *q)

327

{

327

{

328

/*

328

/*

329

* one level of recursion is ok and is much faster than kicking

329

* one level of recursion is ok and is much faster than kicking

330

* the unplug handling

330

* the unplug handling

331

*/

331

*/

332

if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {

332

if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {

333

q->request_fn(q);

333

q->request_fn(q);

334

queue_flag_clear(QUEUE_FLAG_REENTER, q);

334

queue_flag_clear(QUEUE_FLAG_REENTER, q);

335

} else {

335

} else {

336

queue_flag_set(QUEUE_FLAG_PLUGGED, q);

336

queue_flag_set(QUEUE_FLAG_PLUGGED, q);

337

kblockd_schedule_work(q, &q->unplug_work);

337

kblockd_schedule_work(q, &q->unplug_work);

338

}

338

}

339

}

339

}

340

341

/**

341

/**

342

* blk_start_queue - restart a previously stopped queue

342

* blk_start_queue - restart a previously stopped queue

343

* @q: The &struct request_queue in question

343

* @q: The &struct request_queue in question

344

*

344

*

345

* Description:

345

* Description:

346

* blk_start_queue() will clear the stop flag on the queue, and call

346

* blk_start_queue() will clear the stop flag on the queue, and call

347

* the request_fn for the queue if it was in a stopped state when

347

* the request_fn for the queue if it was in a stopped state when

348

* entered. Also see blk_stop_queue(). Queue lock must be held.

348

* entered. Also see blk_stop_queue(). Queue lock must be held.

349

**/

349

**/

350

void blk_start_queue(struct request_queue *q)

350

void blk_start_queue(struct request_queue *q)

351

{

351

{

352

WARN_ON(!irqs_disabled());

352

WARN_ON(!irqs_disabled());

353

354

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

354

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

355

blk_invoke_request_fn(q);

355

blk_invoke_request_fn(q);

356

}

356

}

357

EXPORT_SYMBOL(blk_start_queue);

357

EXPORT_SYMBOL(blk_start_queue);

358

359

/**

359

/**

360

* blk_stop_queue - stop a queue

360

* blk_stop_queue - stop a queue

361

* @q: The &struct request_queue in question

361

* @q: The &struct request_queue in question

362

*

362

*

363

* Description:

363

* Description:

364

* The Linux block layer assumes that a block driver will consume all

364

* The Linux block layer assumes that a block driver will consume all

365

* entries on the request queue when the request_fn strategy is called.

365

* entries on the request queue when the request_fn strategy is called.

366

* Often this will not happen, because of hardware limitations (queue

366

* Often this will not happen, because of hardware limitations (queue

367

* depth settings). If a device driver gets a 'queue full' response,

367

* depth settings). If a device driver gets a 'queue full' response,

368

* or if it simply chooses not to queue more I/O at one point, it can

368

* or if it simply chooses not to queue more I/O at one point, it can

369

* call this function to prevent the request_fn from being called until

369

* call this function to prevent the request_fn from being called until

370

* the driver has signalled it's ready to go again. This happens by calling

370

* the driver has signalled it's ready to go again. This happens by calling

371

* blk_start_queue() to restart queue operations. Queue lock must be held.

371

* blk_start_queue() to restart queue operations. Queue lock must be held.

372

**/

372

**/

373

void blk_stop_queue(struct request_queue *q)

373

void blk_stop_queue(struct request_queue *q)

374

{

374

{

375

blk_remove_plug(q);

375

blk_remove_plug(q);

376

queue_flag_set(QUEUE_FLAG_STOPPED, q);

376

queue_flag_set(QUEUE_FLAG_STOPPED, q);

377

}

377

}

378

EXPORT_SYMBOL(blk_stop_queue);

378

EXPORT_SYMBOL(blk_stop_queue);

379

380

/**

380

/**

381

* blk_sync_queue - cancel any pending callbacks on a queue

381

* blk_sync_queue - cancel any pending callbacks on a queue

382

* @q: the queue

382

* @q: the queue

383

*

383

*

384

* Description:

384

* Description:

385

* The block layer may perform asynchronous callback activity

385

* The block layer may perform asynchronous callback activity

386

* on a queue, such as calling the unplug function after a timeout.

386

* on a queue, such as calling the unplug function after a timeout.

387

* A block device may call blk_sync_queue to ensure that any

387

* A block device may call blk_sync_queue to ensure that any

388

* such activity is cancelled, thus allowing it to release resources

388

* such activity is cancelled, thus allowing it to release resources

389

* that the callbacks might use. The caller must already have made sure

389

* that the callbacks might use. The caller must already have made sure

390

* that its ->make_request_fn will not re-add plugging prior to calling

390

* that its ->make_request_fn will not re-add plugging prior to calling

391

* this function.

391

* this function.

392

*

392

*

393

*/

393

*/

394

void blk_sync_queue(struct request_queue *q)

394

void blk_sync_queue(struct request_queue *q)

395

{

395

{

396

del_timer_sync(&q->unplug_timer);

396

del_timer_sync(&q->unplug_timer);

397

kblockd_flush_work(&q->unplug_work);

397

kblockd_flush_work(&q->unplug_work);

398

}

398

}

399

EXPORT_SYMBOL(blk_sync_queue);

399

EXPORT_SYMBOL(blk_sync_queue);

400

401

/**

401

/**

402

* blk_run_queue - run a single device queue

402

* blk_run_queue - run a single device queue

403

* @q: The queue to run

403

* @q: The queue to run

404

*/

404

*/

405

void __blk_run_queue(struct request_queue *q)

405

void __blk_run_queue(struct request_queue *q)

406

{

406

{

407

blk_remove_plug(q);

407

blk_remove_plug(q);

408

409

/*

409

/*

410

* Only recurse once to avoid overrunning the stack, let the unplug

410

* Only recurse once to avoid overrunning the stack, let the unplug

411

* handling reinvoke the handler shortly if we already got there.

411

* handling reinvoke the handler shortly if we already got there.

412

*/

412

*/

413

if (!elv_queue_empty(q))

413

if (!elv_queue_empty(q))

414

blk_invoke_request_fn(q);

414

blk_invoke_request_fn(q);

415

}

415

}

416

EXPORT_SYMBOL(__blk_run_queue);

416

EXPORT_SYMBOL(__blk_run_queue);

417

418

/**

418

/**

419

* blk_run_queue - run a single device queue

419

* blk_run_queue - run a single device queue

420

* @q: The queue to run

420

* @q: The queue to run

421

*/

421

*/

422

void blk_run_queue(struct request_queue *q)

422

void blk_run_queue(struct request_queue *q)

423

{

423

{

424

unsigned long flags;

424

unsigned long flags;

425

426

spin_lock_irqsave(q->queue_lock, flags);

426

spin_lock_irqsave(q->queue_lock, flags);

427

__blk_run_queue(q);

427

__blk_run_queue(q);

428

spin_unlock_irqrestore(q->queue_lock, flags);

428

spin_unlock_irqrestore(q->queue_lock, flags);

429

}

429

}

430

EXPORT_SYMBOL(blk_run_queue);

430

EXPORT_SYMBOL(blk_run_queue);

431

432

void blk_put_queue(struct request_queue *q)

432

void blk_put_queue(struct request_queue *q)

433

{

433

{

434

kobject_put(&q->kobj);

434

kobject_put(&q->kobj);

435

}

435

}

436

437

void blk_cleanup_queue(struct request_queue *q)

437

void blk_cleanup_queue(struct request_queue *q)

438

{

438

{

439

/*

439

/*

440

* We know we have process context here, so we can be a little

440

* We know we have process context here, so we can be a little

441

* cautious and ensure that pending block actions on this device

441

* cautious and ensure that pending block actions on this device

442

* are done before moving on. Going into this function, we should

442

* are done before moving on. Going into this function, we should

443

* not have processes doing IO to this device.

443

* not have processes doing IO to this device.

444

*/

444

*/

445

blk_sync_queue(q);

445

blk_sync_queue(q);

446

447

mutex_lock(&q->sysfs_lock);

447

mutex_lock(&q->sysfs_lock);

448

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

448

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

449

mutex_unlock(&q->sysfs_lock);

449

mutex_unlock(&q->sysfs_lock);

450

451

if (q->elevator)

451

if (q->elevator)

452

elevator_exit(q->elevator);

452

elevator_exit(q->elevator);

453

454

blk_put_queue(q);

454

blk_put_queue(q);

455

}

455

}

456

EXPORT_SYMBOL(blk_cleanup_queue);

456

EXPORT_SYMBOL(blk_cleanup_queue);

457

458

static int blk_init_free_list(struct request_queue *q)

458

static int blk_init_free_list(struct request_queue *q)

459

{

459

{

460

struct request_list *rl = &q->rq;

460

struct request_list *rl = &q->rq;

461

462

rl->count[READ] = rl->count[WRITE] = 0;

462

rl->count[READ] = rl->count[WRITE] = 0;

463

rl->starved[READ] = rl->starved[WRITE] = 0;

463

rl->starved[READ] = rl->starved[WRITE] = 0;

464

rl->elvpriv = 0;

464

rl->elvpriv = 0;

465

init_waitqueue_head(&rl->wait[READ]);

465

init_waitqueue_head(&rl->wait[READ]);

466

init_waitqueue_head(&rl->wait[WRITE]);

466

init_waitqueue_head(&rl->wait[WRITE]);

467

468

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

468

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

469

mempool_free_slab, request_cachep, q->node);

469

mempool_free_slab, request_cachep, q->node);

470

471

if (!rl->rq_pool)

471

if (!rl->rq_pool)

472

return -ENOMEM;

472

return -ENOMEM;

473

474

return 0;

474

return 0;

475

}

475

}

476

477

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

477

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

478

{

478

{

479

return blk_alloc_queue_node(gfp_mask, -1);

479

return blk_alloc_queue_node(gfp_mask, -1);

480

}

480

}

481

EXPORT_SYMBOL(blk_alloc_queue);

481

EXPORT_SYMBOL(blk_alloc_queue);

482

483

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

483

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

484

{

484

{

485

struct request_queue *q;

485

struct request_queue *q;

486

int err;

486

int err;

487

488

q = kmem_cache_alloc_node(blk_requestq_cachep,

488

q = kmem_cache_alloc_node(blk_requestq_cachep,

489

gfp_mask | __GFP_ZERO, node_id);

489

gfp_mask | __GFP_ZERO, node_id);

490

if (!q)

490

if (!q)

491

return NULL;

491

return NULL;

492

493

q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

493

q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

494

q->backing_dev_info.unplug_io_data = q;

494

q->backing_dev_info.unplug_io_data = q;

495

err = bdi_init(&q->backing_dev_info);

495

err = bdi_init(&q->backing_dev_info);

496

if (err) {

496

if (err) {

497

kmem_cache_free(blk_requestq_cachep, q);

497

kmem_cache_free(blk_requestq_cachep, q);

498

return NULL;

498

return NULL;

499

}

499

}

500

501

init_timer(&q->unplug_timer);

501

init_timer(&q->unplug_timer);

502

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

502

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

503

INIT_LIST_HEAD(&q->timeout_list);

503

INIT_LIST_HEAD(&q->timeout_list);

504

505

kobject_init(&q->kobj, &blk_queue_ktype);

505

kobject_init(&q->kobj, &blk_queue_ktype);

506

507

mutex_init(&q->sysfs_lock);

507

mutex_init(&q->sysfs_lock);

508

spin_lock_init(&q->__queue_lock);

508

spin_lock_init(&q->__queue_lock);

509

510

return q;

510

return q;

511

}

511

}

512

EXPORT_SYMBOL(blk_alloc_queue_node);

512

EXPORT_SYMBOL(blk_alloc_queue_node);

513

514

/**

514

/**

515

* blk_init_queue - prepare a request queue for use with a block device

515

* blk_init_queue - prepare a request queue for use with a block device

516

* @rfn: The function to be called to process requests that have been

516

* @rfn: The function to be called to process requests that have been

517

* placed on the queue.

517

* placed on the queue.

518

* @lock: Request queue spin lock

518

* @lock: Request queue spin lock

519

*

519

*

520

* Description:

520

* Description:

521

* If a block device wishes to use the standard request handling procedures,

521

* If a block device wishes to use the standard request handling procedures,

522

* which sorts requests and coalesces adjacent requests, then it must

522

* which sorts requests and coalesces adjacent requests, then it must

523

* call blk_init_queue(). The function @rfn will be called when there

523

* call blk_init_queue(). The function @rfn will be called when there

524

* are requests on the queue that need to be processed. If the device

524

* are requests on the queue that need to be processed. If the device

525

* supports plugging, then @rfn may not be called immediately when requests

525

* supports plugging, then @rfn may not be called immediately when requests

526

* are available on the queue, but may be called at some time later instead.

526

* are available on the queue, but may be called at some time later instead.

527

* Plugged queues are generally unplugged when a buffer belonging to one

527

* Plugged queues are generally unplugged when a buffer belonging to one

528

* of the requests on the queue is needed, or due to memory pressure.

528

* of the requests on the queue is needed, or due to memory pressure.

529

*

529

*

530

* @rfn is not required, or even expected, to remove all requests off the

530

* @rfn is not required, or even expected, to remove all requests off the

531

* queue, but only as many as it can handle at a time. If it does leave

531

* queue, but only as many as it can handle at a time. If it does leave

532

* requests on the queue, it is responsible for arranging that the requests

532

* requests on the queue, it is responsible for arranging that the requests

533

* get dealt with eventually.

533

* get dealt with eventually.

534

*

534

*

535

* The queue spin lock must be held while manipulating the requests on the

535

* The queue spin lock must be held while manipulating the requests on the

536

* request queue; this lock will be taken also from interrupt context, so irq

536

* request queue; this lock will be taken also from interrupt context, so irq

537

* disabling is needed for it.

537

* disabling is needed for it.

538

*

538

*

539

* Function returns a pointer to the initialized request queue, or %NULL if

539

* Function returns a pointer to the initialized request queue, or %NULL if

540

* it didn't succeed.

540

* it didn't succeed.

541

*

541

*

542

* Note:

542

* Note:

543

* blk_init_queue() must be paired with a blk_cleanup_queue() call

543

* blk_init_queue() must be paired with a blk_cleanup_queue() call

544

* when the block device is deactivated (such as at module unload).

544

* when the block device is deactivated (such as at module unload).

545

**/

545

**/

546

547

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

547

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

548

{

548

{

549

return blk_init_queue_node(rfn, lock, -1);

549

return blk_init_queue_node(rfn, lock, -1);

550

}

550

}

551

EXPORT_SYMBOL(blk_init_queue);

551

EXPORT_SYMBOL(blk_init_queue);

552

553

struct request_queue *

553

struct request_queue *

554

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

554

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

555

{

555

{

556

struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);

556

struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);

557

558

if (!q)

558

if (!q)

559

return NULL;

559

return NULL;

560

561

q->node = node_id;

561

q->node = node_id;

562

if (blk_init_free_list(q)) {

562

if (blk_init_free_list(q)) {

563

kmem_cache_free(blk_requestq_cachep, q);

563

kmem_cache_free(blk_requestq_cachep, q);

564

return NULL;

564

return NULL;

565

}

565

}

566

567

/*

567

/*

568

* if caller didn't supply a lock, they get per-queue locking with

568

* if caller didn't supply a lock, they get per-queue locking with

569

* our embedded lock

569

* our embedded lock

570

*/

570

*/

571

if (!lock)

571

if (!lock)

572

lock = &q->__queue_lock;

572

lock = &q->__queue_lock;

573

574

q->request_fn = rfn;

574

q->request_fn = rfn;

575

q->prep_rq_fn = NULL;

575

q->prep_rq_fn = NULL;

576

q->unplug_fn = generic_unplug_device;

576

q->unplug_fn = generic_unplug_device;

577

q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);

577

q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);

578

q->queue_lock = lock;

578

q->queue_lock = lock;

579

580

blk_queue_segment_boundary(q, 0xffffffff);

580

blk_queue_segment_boundary(q, 0xffffffff);

581

582

blk_queue_make_request(q, __make_request);

582

blk_queue_make_request(q, __make_request);

583

blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);

583

blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);

584

585

blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

585

blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

586

blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

586

blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

587

588

q->sg_reserved_size = INT_MAX;

588

q->sg_reserved_size = INT_MAX;

589

590

blk_set_cmd_filter_defaults(&q->cmd_filter);

590

blk_set_cmd_filter_defaults(&q->cmd_filter);

591

592

/*

592

/*

593

* all done

593

* all done

594

*/

594

*/

595

if (!elevator_init(q, NULL)) {

595

if (!elevator_init(q, NULL)) {

596

blk_queue_congestion_threshold(q);

596

blk_queue_congestion_threshold(q);

597

return q;

597

return q;

598

}

598

}

599

600

blk_put_queue(q);

600

blk_put_queue(q);

601

return NULL;

601

return NULL;

602

}

602

}

603

EXPORT_SYMBOL(blk_init_queue_node);

603

EXPORT_SYMBOL(blk_init_queue_node);

604

605

int blk_get_queue(struct request_queue *q)

605

int blk_get_queue(struct request_queue *q)

606

{

606

{

607

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

607

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

608

kobject_get(&q->kobj);

608

kobject_get(&q->kobj);

609

return 0;

609

return 0;

610

}

610

}

611

612

return 1;

612

return 1;

613

}

613

}

614

615

static inline void blk_free_request(struct request_queue *q, struct request *rq)

615

static inline void blk_free_request(struct request_queue *q, struct request *rq)

616

{

616

{

617

if (rq->cmd_flags & REQ_ELVPRIV)

617

if (rq->cmd_flags & REQ_ELVPRIV)

618

elv_put_request(q, rq);

618

elv_put_request(q, rq);

619

mempool_free(rq, q->rq.rq_pool);

619

mempool_free(rq, q->rq.rq_pool);

620

}

620

}

621

622

static struct request *

622

static struct request *

623

blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)

623

blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)

624

{

624

{

625

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

625

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

626

627

if (!rq)

627

if (!rq)

628

return NULL;

628

return NULL;

629

630

blk_rq_init(q, rq);

630

blk_rq_init(q, rq);

631

632

rq->cmd_flags = rw | REQ_ALLOCED;

632

rq->cmd_flags = rw | REQ_ALLOCED;

633

634

if (priv) {

634

if (priv) {

635

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

635

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

636

mempool_free(rq, q->rq.rq_pool);

636

mempool_free(rq, q->rq.rq_pool);

637

return NULL;

637

return NULL;

638

}

638

}

639

rq->cmd_flags |= REQ_ELVPRIV;

639

rq->cmd_flags |= REQ_ELVPRIV;

640

}

640

}

641

642

return rq;

642

return rq;

643

}

643

}

644

645

/*

645

/*

646

* ioc_batching returns true if the ioc is a valid batching request and

646

* ioc_batching returns true if the ioc is a valid batching request and

647

* should be given priority access to a request.

647

* should be given priority access to a request.

648

*/

648

*/

649

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

649

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

650

{

650

{

651

if (!ioc)

651

if (!ioc)

652

return 0;

652

return 0;

653

654

/*

654

/*

655

* Make sure the process is able to allocate at least 1 request

655

* Make sure the process is able to allocate at least 1 request

656

* even if the batch times out, otherwise we could theoretically

656

* even if the batch times out, otherwise we could theoretically

657

* lose wakeups.

657

* lose wakeups.

658

*/

658

*/

659

return ioc->nr_batch_requests == q->nr_batching ||

659

return ioc->nr_batch_requests == q->nr_batching ||

660

(ioc->nr_batch_requests > 0

660

(ioc->nr_batch_requests > 0

661

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

661

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

662

}

662

}

663

664

/*

664

/*

665

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

665

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

666

* will cause the process to be a "batcher" on all queues in the system. This

666

* will cause the process to be a "batcher" on all queues in the system. This

667

* is the behaviour we want though - once it gets a wakeup it should be given

667

* is the behaviour we want though - once it gets a wakeup it should be given

668

* a nice run.

668

* a nice run.

669

*/

669

*/

670

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

670

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

671

{

671

{

672

if (!ioc || ioc_batching(q, ioc))

672

if (!ioc || ioc_batching(q, ioc))

673

return;

673

return;

674

675

ioc->nr_batch_requests = q->nr_batching;

675

ioc->nr_batch_requests = q->nr_batching;

676

ioc->last_waited = jiffies;

676

ioc->last_waited = jiffies;

677

}

677

}

678

679

static void __freed_request(struct request_queue *q, int rw)

679

static void __freed_request(struct request_queue *q, int rw)

680

{

680

{

681

struct request_list *rl = &q->rq;

681

struct request_list *rl = &q->rq;

682

683

if (rl->count[rw] < queue_congestion_off_threshold(q))

683

if (rl->count[rw] < queue_congestion_off_threshold(q))

684

blk_clear_queue_congested(q, rw);

684

blk_clear_queue_congested(q, rw);

685

686

if (rl->count[rw] + 1 <= q->nr_requests) {

686

if (rl->count[rw] + 1 <= q->nr_requests) {

687

if (waitqueue_active(&rl->wait[rw]))

687

if (waitqueue_active(&rl->wait[rw]))

688

wake_up(&rl->wait[rw]);

688

wake_up(&rl->wait[rw]);

689

690

blk_clear_queue_full(q, rw);

690

blk_clear_queue_full(q, rw);

691

}

691

}

692

}

692

}

693

694

/*

694

/*

695

* A request has just been released. Account for it, update the full and

695

* A request has just been released. Account for it, update the full and

696

* congestion status, wake up any waiters. Called under q->queue_lock.

696

* congestion status, wake up any waiters. Called under q->queue_lock.

697

*/

697

*/

698

static void freed_request(struct request_queue *q, int rw, int priv)

698

static void freed_request(struct request_queue *q, int rw, int priv)

699

{

699

{

700

struct request_list *rl = &q->rq;

700

struct request_list *rl = &q->rq;

701

702

rl->count[rw]--;

702

rl->count[rw]--;

703

if (priv)

703

if (priv)

704

rl->elvpriv--;

704

rl->elvpriv--;

705

706

__freed_request(q, rw);

706

__freed_request(q, rw);

707

708

if (unlikely(rl->starved[rw ^ 1]))

708

if (unlikely(rl->starved[rw ^ 1]))

709

__freed_request(q, rw ^ 1);

709

__freed_request(q, rw ^ 1);

710

}

710

}

711

712

#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)

712

#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)

713

/*

713

/*

714

* Get a free request, queue_lock must be held.

714

* Get a free request, queue_lock must be held.

715

* Returns NULL on failure, with queue_lock held.

715

* Returns NULL on failure, with queue_lock held.

716

* Returns !NULL on success, with queue_lock *not held*.

716

* Returns !NULL on success, with queue_lock *not held*.

717

*/

717

*/

718

static struct request *get_request(struct request_queue *q, int rw_flags,

718

static struct request *get_request(struct request_queue *q, int rw_flags,

719

struct bio *bio, gfp_t gfp_mask)

719

struct bio *bio, gfp_t gfp_mask)

720

{

720

{

721

struct request *rq = NULL;

721

struct request *rq = NULL;

722

struct request_list *rl = &q->rq;

722

struct request_list *rl = &q->rq;

723

struct io_context *ioc = NULL;

723

struct io_context *ioc = NULL;

724

const int rw = rw_flags & 0x01;

724

const int rw = rw_flags & 0x01;

725

int may_queue, priv;

725

int may_queue, priv;

726

727

may_queue = elv_may_queue(q, rw_flags);

727

may_queue = elv_may_queue(q, rw_flags);

728

if (may_queue == ELV_MQUEUE_NO)

728

if (may_queue == ELV_MQUEUE_NO)

729

goto rq_starved;

729

goto rq_starved;

730

731

if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {

731

if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {

732

if (rl->count[rw]+1 >= q->nr_requests) {

732

if (rl->count[rw]+1 >= q->nr_requests) {

733

ioc = current_io_context(GFP_ATOMIC, q->node);

733

ioc = current_io_context(GFP_ATOMIC, q->node);

734

/*

734

/*

735

* The queue will fill after this allocation, so set

735

* The queue will fill after this allocation, so set

736

* it as full, and mark this process as "batching".

736

* it as full, and mark this process as "batching".

737

* This process will be allowed to complete a batch of

737

* This process will be allowed to complete a batch of

738

* requests, others will be blocked.

738

* requests, others will be blocked.

739

*/

739

*/

740

if (!blk_queue_full(q, rw)) {

740

if (!blk_queue_full(q, rw)) {

741

ioc_set_batching(q, ioc);

741

ioc_set_batching(q, ioc);

742

blk_set_queue_full(q, rw);

742

blk_set_queue_full(q, rw);

743

} else {

743

} else {

744

if (may_queue != ELV_MQUEUE_MUST

744

if (may_queue != ELV_MQUEUE_MUST

745

&& !ioc_batching(q, ioc)) {

745

&& !ioc_batching(q, ioc)) {

746

/*

746

/*

747

* The queue is full and the allocating

747

* The queue is full and the allocating

748

* process is not a "batcher", and not

748

* process is not a "batcher", and not

749

* exempted by the IO scheduler

749

* exempted by the IO scheduler

750

*/

750

*/

751

goto out;

751

goto out;

752

}

752

}

753

}

753

}

754

}

754

}

755

blk_set_queue_congested(q, rw);

755

blk_set_queue_congested(q, rw);

756

}

756

}

757

758

/*

758

/*

759

* Only allow batching queuers to allocate up to 50% over the defined

759

* Only allow batching queuers to allocate up to 50% over the defined

760

* limit of requests, otherwise we could have thousands of requests

760

* limit of requests, otherwise we could have thousands of requests

761

* allocated with any setting of ->nr_requests

761

* allocated with any setting of ->nr_requests

762

*/

762

*/

763

if (rl->count[rw] >= (3 * q->nr_requests / 2))

763

if (rl->count[rw] >= (3 * q->nr_requests / 2))

764

goto out;

764

goto out;

765

766

rl->count[rw]++;

766

rl->count[rw]++;

767

rl->starved[rw] = 0;

767

rl->starved[rw] = 0;

768

769

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

769

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

770

if (priv)

770

if (priv)

771

rl->elvpriv++;

771

rl->elvpriv++;

772

773

spin_unlock_irq(q->queue_lock);

773

spin_unlock_irq(q->queue_lock);

774

775

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

775

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

776

if (unlikely(!rq)) {

776

if (unlikely(!rq)) {

777

/*

777

/*

778

* Allocation failed presumably due to memory. Undo anything

778

* Allocation failed presumably due to memory. Undo anything

779

* we might have messed up.

779

* we might have messed up.

780

*

780

*

781

* Allocating task should really be put onto the front of the

781

* Allocating task should really be put onto the front of the

782

* wait queue, but this is pretty rare.

782

* wait queue, but this is pretty rare.

783

*/

783

*/

784

spin_lock_irq(q->queue_lock);

784

spin_lock_irq(q->queue_lock);

785

freed_request(q, rw, priv);

785

freed_request(q, rw, priv);

786

787

/*

787

/*

788

* in the very unlikely event that allocation failed and no

788

* in the very unlikely event that allocation failed and no

789

* requests for this direction was pending, mark us starved

789

* requests for this direction was pending, mark us starved

790

* so that freeing of a request in the other direction will

790

* so that freeing of a request in the other direction will

791

* notice us. another possible fix would be to split the

791

* notice us. another possible fix would be to split the

792

* rq mempool into READ and WRITE

792

* rq mempool into READ and WRITE

793

*/

793

*/

794

rq_starved:

794

rq_starved:

795

if (unlikely(rl->count[rw] == 0))

795

if (unlikely(rl->count[rw] == 0))

796

rl->starved[rw] = 1;

796

rl->starved[rw] = 1;

797

798

goto out;

798

goto out;

799

}

799

}

800

801

/*

801

/*

802

* ioc may be NULL here, and ioc_batching will be false. That's

802

* ioc may be NULL here, and ioc_batching will be false. That's

803

* OK, if the queue is under the request limit then requests need

803

* OK, if the queue is under the request limit then requests need

804

* not count toward the nr_batch_requests limit. There will always

804

* not count toward the nr_batch_requests limit. There will always

805

* be some limit enforced by BLK_BATCH_TIME.

805

* be some limit enforced by BLK_BATCH_TIME.

806

*/

806

*/

807

if (ioc_batching(q, ioc))

807

if (ioc_batching(q, ioc))

808

ioc->nr_batch_requests--;

808

ioc->nr_batch_requests--;

809

810

blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);

810

blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);

811

out:

811

out:

812

return rq;

812

return rq;

813

}

813

}

814

815

/*

815

/*

816

* No available requests for this queue, unplug the device and wait for some

816

* No available requests for this queue, unplug the device and wait for some

817

* requests to become available.

817

* requests to become available.

818

*

818

*

819

* Called with q->queue_lock held, and returns with it unlocked.

819

* Called with q->queue_lock held, and returns with it unlocked.

820

*/

820

*/

821

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

821

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

822

struct bio *bio)

822

struct bio *bio)

823

{

823

{

824

const int rw = rw_flags & 0x01;

824

const int rw = rw_flags & 0x01;

825

struct request *rq;

825

struct request *rq;

826

827

rq = get_request(q, rw_flags, bio, GFP_NOIO);

827

rq = get_request(q, rw_flags, bio, GFP_NOIO);

828

while (!rq) {

828

while (!rq) {

829

DEFINE_WAIT(wait);

829

DEFINE_WAIT(wait);

830

struct io_context *ioc;

830

struct io_context *ioc;

831

struct request_list *rl = &q->rq;

831

struct request_list *rl = &q->rq;

832

833

prepare_to_wait_exclusive(&rl->wait[rw], &wait,

833

prepare_to_wait_exclusive(&rl->wait[rw], &wait,

834

TASK_UNINTERRUPTIBLE);

834

TASK_UNINTERRUPTIBLE);

835

836

blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);

836

blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);

837

838

__generic_unplug_device(q);

838

__generic_unplug_device(q);

839

spin_unlock_irq(q->queue_lock);

839

spin_unlock_irq(q->queue_lock);

840

io_schedule();

840

io_schedule();

841

842

/*

842

/*

843

* After sleeping, we become a "batching" process and

843

* After sleeping, we become a "batching" process and

844

* will be able to allocate at least one request, and

844

* will be able to allocate at least one request, and

845

* up to a big batch of them for a small period time.

845

* up to a big batch of them for a small period time.

846

* See ioc_batching, ioc_set_batching

846

* See ioc_batching, ioc_set_batching

847

*/

847

*/

848

ioc = current_io_context(GFP_NOIO, q->node);

848

ioc = current_io_context(GFP_NOIO, q->node);

849

ioc_set_batching(q, ioc);

849

ioc_set_batching(q, ioc);

850

851

spin_lock_irq(q->queue_lock);

851

spin_lock_irq(q->queue_lock);

852

finish_wait(&rl->wait[rw], &wait);

852

finish_wait(&rl->wait[rw], &wait);

853

854

rq = get_request(q, rw_flags, bio, GFP_NOIO);

854

rq = get_request(q, rw_flags, bio, GFP_NOIO);

855

};

855

};

856

857

return rq;

857

return rq;

858

}

858

}

859

860

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

860

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

861

{

861

{

862

struct request *rq;

862

struct request *rq;

863

864

BUG_ON(rw != READ && rw != WRITE);

864

BUG_ON(rw != READ && rw != WRITE);

865

866

spin_lock_irq(q->queue_lock);

866

spin_lock_irq(q->queue_lock);

867

if (gfp_mask & __GFP_WAIT) {

867

if (gfp_mask & __GFP_WAIT) {

868

rq = get_request_wait(q, rw, NULL);

868

rq = get_request_wait(q, rw, NULL);

869

} else {

869

} else {

870

rq = get_request(q, rw, NULL, gfp_mask);

870

rq = get_request(q, rw, NULL, gfp_mask);

871

if (!rq)

871

if (!rq)

872

spin_unlock_irq(q->queue_lock);

872

spin_unlock_irq(q->queue_lock);

873

}

873

}

874

/* q->queue_lock is unlocked at this point */

874

/* q->queue_lock is unlocked at this point */

875

876

return rq;

876

return rq;

877

}

877

}

878

EXPORT_SYMBOL(blk_get_request);

878

EXPORT_SYMBOL(blk_get_request);

879

880

/**

880

/**

881

* blk_start_queueing - initiate dispatch of requests to device

881

* blk_start_queueing - initiate dispatch of requests to device

882

* @q: request queue to kick into gear

882

* @q: request queue to kick into gear

883

*

883

*

884

* This is basically a helper to remove the need to know whether a queue

884

* This is basically a helper to remove the need to know whether a queue

885

* is plugged or not if someone just wants to initiate dispatch of requests

885

* is plugged or not if someone just wants to initiate dispatch of requests

886

* for this queue.

886

* for this queue.

887

*

887

*

888

* The queue lock must be held with interrupts disabled.

888

* The queue lock must be held with interrupts disabled.

889

*/

889

*/

890

void blk_start_queueing(struct request_queue *q)

890

void blk_start_queueing(struct request_queue *q)

891

{

891

{

892

if (!blk_queue_plugged(q))

892

if (!blk_queue_plugged(q))

893

q->request_fn(q);

893

q->request_fn(q);

894

else

894

else

895

__generic_unplug_device(q);

895

__generic_unplug_device(q);

896

}

896

}

897

EXPORT_SYMBOL(blk_start_queueing);

897

EXPORT_SYMBOL(blk_start_queueing);

898

899

/**

899

/**

900

* blk_requeue_request - put a request back on queue

900

* blk_requeue_request - put a request back on queue

901

* @q: request queue where request should be inserted

901

* @q: request queue where request should be inserted

902

* @rq: request to be inserted

902

* @rq: request to be inserted

903

*

903

*

904

* Description:

904

* Description:

905

* Drivers often keep queueing requests until the hardware cannot accept

905

* Drivers often keep queueing requests until the hardware cannot accept

906

* more, when that condition happens we need to put the request back

906

* more, when that condition happens we need to put the request back

907

* on the queue. Must be called with queue lock held.

907

* on the queue. Must be called with queue lock held.

908

*/

908

*/

909

void blk_requeue_request(struct request_queue *q, struct request *rq)

909

void blk_requeue_request(struct request_queue *q, struct request *rq)

910

{

910

{

911

blk_delete_timer(rq);

911

blk_delete_timer(rq);

912

blk_clear_rq_complete(rq);

912

blk_clear_rq_complete(rq);

913

blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);

913

blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);

914

915

if (blk_rq_tagged(rq))

915

if (blk_rq_tagged(rq))

916

blk_queue_end_tag(q, rq);

916

blk_queue_end_tag(q, rq);

917

918

elv_requeue_request(q, rq);

918

elv_requeue_request(q, rq);

919

}

919

}

920

EXPORT_SYMBOL(blk_requeue_request);

920

EXPORT_SYMBOL(blk_requeue_request);

921

922

/**

922

/**

923

* blk_insert_request - insert a special request into a request queue

923

* blk_insert_request - insert a special request into a request queue

924

* @q: request queue where request should be inserted

924

* @q: request queue where request should be inserted

925

* @rq: request to be inserted

925

* @rq: request to be inserted

926

* @at_head: insert request at head or tail of queue

926

* @at_head: insert request at head or tail of queue

927

* @data: private data

927

* @data: private data

928

*

928

*

929

* Description:

929

* Description:

930

* Many block devices need to execute commands asynchronously, so they don't

930

* Many block devices need to execute commands asynchronously, so they don't

931

* block the whole kernel from preemption during request execution. This is

931

* block the whole kernel from preemption during request execution. This is

932

* accomplished normally by inserting aritficial requests tagged as

932

* accomplished normally by inserting aritficial requests tagged as

933

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

933

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

934

* be scheduled for actual execution by the request queue.

934

* be scheduled for actual execution by the request queue.

935

*

935

*

936

* We have the option of inserting the head or the tail of the queue.

936

* We have the option of inserting the head or the tail of the queue.

937

* Typically we use the tail for new ioctls and so forth. We use the head

937

* Typically we use the tail for new ioctls and so forth. We use the head

938

* of the queue for things like a QUEUE_FULL message from a device, or a

938

* of the queue for things like a QUEUE_FULL message from a device, or a

939

* host that is unable to accept a particular command.

939

* host that is unable to accept a particular command.

940

*/

940

*/

941

void blk_insert_request(struct request_queue *q, struct request *rq,

941

void blk_insert_request(struct request_queue *q, struct request *rq,

942

int at_head, void *data)

942

int at_head, void *data)

943

{

943

{

944

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

944

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

945

unsigned long flags;

945

unsigned long flags;

946

947

/*

947

/*

948

* tell I/O scheduler that this isn't a regular read/write (ie it

948

* tell I/O scheduler that this isn't a regular read/write (ie it

949

* must not attempt merges on this) and that it acts as a soft

949

* must not attempt merges on this) and that it acts as a soft

950

* barrier

950

* barrier

951

*/

951

*/

952

rq->cmd_type = REQ_TYPE_SPECIAL;

952

rq->cmd_type = REQ_TYPE_SPECIAL;

953

rq->cmd_flags |= REQ_SOFTBARRIER;

953

rq->cmd_flags |= REQ_SOFTBARRIER;

954

955

rq->special = data;

955

rq->special = data;

956

957

spin_lock_irqsave(q->queue_lock, flags);

957

spin_lock_irqsave(q->queue_lock, flags);

958

959

/*

959

/*

960

* If command is tagged, release the tag

960

* If command is tagged, release the tag

961

*/

961

*/

962

if (blk_rq_tagged(rq))

962

if (blk_rq_tagged(rq))

963

blk_queue_end_tag(q, rq);

963

blk_queue_end_tag(q, rq);

964

965

drive_stat_acct(rq, 1);

965

drive_stat_acct(rq, 1);

966

__elv_add_request(q, rq, where, 0);

966

__elv_add_request(q, rq, where, 0);

967

blk_start_queueing(q);

967

blk_start_queueing(q);

968

spin_unlock_irqrestore(q->queue_lock, flags);

968

spin_unlock_irqrestore(q->queue_lock, flags);

969

}

969

}

970

EXPORT_SYMBOL(blk_insert_request);

970

EXPORT_SYMBOL(blk_insert_request);

971

972

/*

972

/*

973

* add-request adds a request to the linked list.

973

* add-request adds a request to the linked list.

974

* queue lock is held and interrupts disabled, as we muck with the

974

* queue lock is held and interrupts disabled, as we muck with the

975

* request queue list.

975

* request queue list.

976

*/

976

*/

977

static inline void add_request(struct request_queue *q, struct request *req)

977

static inline void add_request(struct request_queue *q, struct request *req)

978

{

978

{

979

drive_stat_acct(req, 1);

979

drive_stat_acct(req, 1);

980

981

/*

981

/*

982

* elevator indicated where it wants this request to be

982

* elevator indicated where it wants this request to be

983

* inserted at elevator_merge time

983

* inserted at elevator_merge time

984

*/

984

*/

985

__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);

985

__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);

986

}

986

}

987

988

static void part_round_stats_single(int cpu, struct hd_struct *part,

988

static void part_round_stats_single(int cpu, struct hd_struct *part,

989

unsigned long now)

989

unsigned long now)

990

{

990

{

991

if (now == part->stamp)

991

if (now == part->stamp)

992

return;

992

return;

993

994

if (part->in_flight) {

994

if (part->in_flight) {

995

__part_stat_add(cpu, part, time_in_queue,

995

__part_stat_add(cpu, part, time_in_queue,

996

part->in_flight * (now - part->stamp));

996

part->in_flight * (now - part->stamp));

997

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

997

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

998

}

998

}

999

part->stamp = now;

999

part->stamp = now;

1000

}

1000

}

1001

1002

/**

1002

/**

1003

* part_round_stats() - Round off the performance stats on a struct

1003

* part_round_stats() - Round off the performance stats on a struct

1004

* disk_stats.

1004

* disk_stats.

1005

*

1005

*

1006

* The average IO queue length and utilisation statistics are maintained

1006

* The average IO queue length and utilisation statistics are maintained

1007

* by observing the current state of the queue length and the amount of

1007

* by observing the current state of the queue length and the amount of

1008

* time it has been in this state for.

1008

* time it has been in this state for.

1009

*

1009

*

1010

* Normally, that accounting is done on IO completion, but that can result

1010

* Normally, that accounting is done on IO completion, but that can result

1011

* in more than a second's worth of IO being accounted for within any one

1011

* in more than a second's worth of IO being accounted for within any one

1012

* second, leading to >100% utilisation. To deal with that, we call this

1012

* second, leading to >100% utilisation. To deal with that, we call this

1013

* function to do a round-off before returning the results when reading

1013

* function to do a round-off before returning the results when reading

1014

* /proc/diskstats. This accounts immediately for all queue usage up to

1014

* /proc/diskstats. This accounts immediately for all queue usage up to

1015

* the current jiffies and restarts the counters again.

1015

* the current jiffies and restarts the counters again.

1016

*/

1016

*/

1017

void part_round_stats(int cpu, struct hd_struct *part)

1017

void part_round_stats(int cpu, struct hd_struct *part)

1018

{

1018

{

1019

unsigned long now = jiffies;

1019

unsigned long now = jiffies;

1020

1021

if (part->partno)

1021

if (part->partno)

1022

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1022

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1023

part_round_stats_single(cpu, part, now);

1023

part_round_stats_single(cpu, part, now);

1024

}

1024

}

1025

EXPORT_SYMBOL_GPL(part_round_stats);

1025

EXPORT_SYMBOL_GPL(part_round_stats);

1026

1027

/*

1027

/*

1028

* queue lock must be held

1028

* queue lock must be held

1029

*/

1029

*/

1030

void __blk_put_request(struct request_queue *q, struct request *req)

1030

void __blk_put_request(struct request_queue *q, struct request *req)

1031

{

1031

{

1032

if (unlikely(!q))

1032

if (unlikely(!q))

1033

return;

1033

return;

1034

if (unlikely(--req->ref_count))

1034

if (unlikely(--req->ref_count))

1035

return;

1035

return;

1036

1037

elv_completed_request(q, req);

1037

elv_completed_request(q, req);

1038

1039

/*

1039

/*

1040

* Request may not have originated from ll_rw_blk. if not,

1040

* Request may not have originated from ll_rw_blk. if not,

1041

* it didn't come out of our reserved rq pools

1041

* it didn't come out of our reserved rq pools

1042

*/

1042

*/

1043

if (req->cmd_flags & REQ_ALLOCED) {

1043

if (req->cmd_flags & REQ_ALLOCED) {

1044

int rw = rq_data_dir(req);

1044

int rw = rq_data_dir(req);

1045

int priv = req->cmd_flags & REQ_ELVPRIV;

1045

int priv = req->cmd_flags & REQ_ELVPRIV;

1046

1047

BUG_ON(!list_empty(&req->queuelist));

1047

BUG_ON(!list_empty(&req->queuelist));

1048

BUG_ON(!hlist_unhashed(&req->hash));

1048

BUG_ON(!hlist_unhashed(&req->hash));

1049

1050

blk_free_request(q, req);

1050

blk_free_request(q, req);

1051

freed_request(q, rw, priv);

1051

freed_request(q, rw, priv);

1052

}

1052

}

1053

}

1053

}

1054

EXPORT_SYMBOL_GPL(__blk_put_request);

1054

EXPORT_SYMBOL_GPL(__blk_put_request);

1055

1056

void blk_put_request(struct request *req)

1056

void blk_put_request(struct request *req)

1057

{

1057

{

1058

unsigned long flags;

1058

unsigned long flags;

1059

struct request_queue *q = req->q;

1059

struct request_queue *q = req->q;

1060

1061

spin_lock_irqsave(q->queue_lock, flags);

1061

spin_lock_irqsave(q->queue_lock, flags);

1062

__blk_put_request(q, req);

1062

__blk_put_request(q, req);

1063

spin_unlock_irqrestore(q->queue_lock, flags);

1063

spin_unlock_irqrestore(q->queue_lock, flags);

1064

}

1064

}

1065

EXPORT_SYMBOL(blk_put_request);

1065

EXPORT_SYMBOL(blk_put_request);

1066

1067

void init_request_from_bio(struct request *req, struct bio *bio)

1067

void init_request_from_bio(struct request *req, struct bio *bio)

1068

{

1068

{

1069

req->cpu = bio->bi_comp_cpu;

1069

req->cpu = bio->bi_comp_cpu;

1070

req->cmd_type = REQ_TYPE_FS;

1070

req->cmd_type = REQ_TYPE_FS;

1071

1072

/*

1072

/*

1073

* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)

1073

* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)

1074

*/

1074

*/

1075

if (bio_rw_ahead(bio) || bio_failfast(bio))

1075

if (bio_rw_ahead(bio) || bio_failfast(bio))

1076

req->cmd_flags |= REQ_FAILFAST;

1076

req->cmd_flags |= REQ_FAILFAST;

1077

1078

/*

1078

/*

1079

* REQ_BARRIER implies no merging, but lets make it explicit

1079

* REQ_BARRIER implies no merging, but lets make it explicit

1080

*/

1080

*/

1081

if (unlikely(bio_discard(bio))) {

1081

if (unlikely(bio_discard(bio))) {

1082

req->cmd_flags |= REQ_DISCARD;

1082

req->cmd_flags |= REQ_DISCARD;

1083

if (bio_barrier(bio))

1083

if (bio_barrier(bio))

1084

req->cmd_flags |= REQ_SOFTBARRIER;

1084

req->cmd_flags |= REQ_SOFTBARRIER;

1085

req->q->prepare_discard_fn(req->q, req);

1085

req->q->prepare_discard_fn(req->q, req);

1086

} else if (unlikely(bio_barrier(bio)))

1086

} else if (unlikely(bio_barrier(bio)))

1087

req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);

1087

req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);

1088

1089

if (bio_sync(bio))

1089

if (bio_sync(bio))

1090

req->cmd_flags |= REQ_RW_SYNC;

1090

req->cmd_flags |= REQ_RW_SYNC;

1091

if (bio_rw_meta(bio))

1091

if (bio_rw_meta(bio))

1092

req->cmd_flags |= REQ_RW_META;

1092

req->cmd_flags |= REQ_RW_META;

1093

1094

req->errors = 0;

1094

req->errors = 0;

1095

req->hard_sector = req->sector = bio->bi_sector;

1095

req->hard_sector = req->sector = bio->bi_sector;

1096

req->ioprio = bio_prio(bio);

1096

req->ioprio = bio_prio(bio);

1097

req->start_time = jiffies;

1097

req->start_time = jiffies;

1098

blk_rq_bio_prep(req->q, req, bio);

1098

blk_rq_bio_prep(req->q, req, bio);

1099

}

1099

}

1100

1101

static int __make_request(struct request_queue *q, struct bio *bio)

1101

static int __make_request(struct request_queue *q, struct bio *bio)

1102

{

1102

{

1103

struct request *req;

1103

struct request *req;

1104

int el_ret, nr_sectors, barrier, discard, err;

1104

int el_ret, nr_sectors, barrier, discard, err;

1105

const unsigned short prio = bio_prio(bio);

1105

const unsigned short prio = bio_prio(bio);

1106

const int sync = bio_sync(bio);

1106

const int sync = bio_sync(bio);

1107

int rw_flags;

1107

int rw_flags;

1108

1109

nr_sectors = bio_sectors(bio);

1109

nr_sectors = bio_sectors(bio);

1110

1111

/*

1111

/*

1112

* low level driver can indicate that it wants pages above a

1112

* low level driver can indicate that it wants pages above a

1113

* certain limit bounced to low memory (ie for highmem, or even

1113

* certain limit bounced to low memory (ie for highmem, or even

1114

* ISA dma in theory)

1114

* ISA dma in theory)

1115

*/

1115

*/

1116

blk_queue_bounce(q, &bio);

1116

blk_queue_bounce(q, &bio);

1117

1118

barrier = bio_barrier(bio);

1118

barrier = bio_barrier(bio);

1119

if (unlikely(barrier) && bio_has_data(bio) &&

1119

if (unlikely(barrier) && bio_has_data(bio) &&

1120

(q->next_ordered == QUEUE_ORDERED_NONE)) {

1120

(q->next_ordered == QUEUE_ORDERED_NONE)) {

1121

err = -EOPNOTSUPP;

1121

err = -EOPNOTSUPP;

1122

goto end_io;

1122

goto end_io;

1123

}

1123

}

1124

1125

discard = bio_discard(bio);

1125

discard = bio_discard(bio);

1126

if (unlikely(discard) && !q->prepare_discard_fn) {

1126

if (unlikely(discard) && !q->prepare_discard_fn) {

1127

err = -EOPNOTSUPP;

1127

err = -EOPNOTSUPP;

1128

goto end_io;

1128

goto end_io;

1129

}

1129

}

1130

1131

spin_lock_irq(q->queue_lock);

1131

spin_lock_irq(q->queue_lock);

1132

1133

if (unlikely(barrier) || elv_queue_empty(q))

1133

if (unlikely(barrier) || elv_queue_empty(q))

1134

goto get_rq;

1134

goto get_rq;

1135

1136

el_ret = elv_merge(q, &req, bio);

1136

el_ret = elv_merge(q, &req, bio);

1137

switch (el_ret) {

1137

switch (el_ret) {

1138

case ELEVATOR_BACK_MERGE:

1138

case ELEVATOR_BACK_MERGE:

1139

BUG_ON(!rq_mergeable(req));

1139

BUG_ON(!rq_mergeable(req));

1140

1141

if (!ll_back_merge_fn(q, req, bio))

1141

if (!ll_back_merge_fn(q, req, bio))

1142

break;

1142

break;

1143

1144

blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);

1144

blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);

1145

1146

req->biotail->bi_next = bio;

1146

req->biotail->bi_next = bio;

1147

req->biotail = bio;

1147

req->biotail = bio;

1148

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

1148

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

1149

req->ioprio = ioprio_best(req->ioprio, prio);

1149

req->ioprio = ioprio_best(req->ioprio, prio);

1150

if (!blk_rq_cpu_valid(req))

1150

if (!blk_rq_cpu_valid(req))

1151

req->cpu = bio->bi_comp_cpu;

1151

req->cpu = bio->bi_comp_cpu;

1152

drive_stat_acct(req, 0);

1152

drive_stat_acct(req, 0);

1153

if (!attempt_back_merge(q, req))

1153

if (!attempt_back_merge(q, req))

1154

elv_merged_request(q, req, el_ret);

1154

elv_merged_request(q, req, el_ret);

1155

goto out;

1155

goto out;

1156

1157

case ELEVATOR_FRONT_MERGE:

1157

case ELEVATOR_FRONT_MERGE:

1158

BUG_ON(!rq_mergeable(req));

1158

BUG_ON(!rq_mergeable(req));

1159

1160

if (!ll_front_merge_fn(q, req, bio))

1160

if (!ll_front_merge_fn(q, req, bio))

1161

break;

1161

break;

1162

1163

blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);

1163

blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);

1164

1165

bio->bi_next = req->bio;

1165

bio->bi_next = req->bio;

1166

req->bio = bio;

1166

req->bio = bio;

1167

1168

/*

1168

/*

1169

* may not be valid. if the low level driver said

1169

* may not be valid. if the low level driver said

1170

* it didn't need a bounce buffer then it better

1170

* it didn't need a bounce buffer then it better

1171

* not touch req->buffer either...

1171

* not touch req->buffer either...

1172

*/

1172

*/

1173

req->buffer = bio_data(bio);

1173

req->buffer = bio_data(bio);

1174

req->current_nr_sectors = bio_cur_sectors(bio);

1174

req->current_nr_sectors = bio_cur_sectors(bio);

1175

req->hard_cur_sectors = req->current_nr_sectors;

1175

req->hard_cur_sectors = req->current_nr_sectors;

1176

req->sector = req->hard_sector = bio->bi_sector;

1176

req->sector = req->hard_sector = bio->bi_sector;

1177

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

1177

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

1178

req->ioprio = ioprio_best(req->ioprio, prio);

1178

req->ioprio = ioprio_best(req->ioprio, prio);

1179

if (!blk_rq_cpu_valid(req))

1179

if (!blk_rq_cpu_valid(req))

1180

req->cpu = bio->bi_comp_cpu;

1180

req->cpu = bio->bi_comp_cpu;

1181

drive_stat_acct(req, 0);

1181

drive_stat_acct(req, 0);

1182

if (!attempt_front_merge(q, req))

1182

if (!attempt_front_merge(q, req))

1183

elv_merged_request(q, req, el_ret);

1183

elv_merged_request(q, req, el_ret);

1184

goto out;

1184

goto out;

1185

1186

/* ELV_NO_MERGE: elevator says don't/can't merge. */

1186

/* ELV_NO_MERGE: elevator says don't/can't merge. */

1187

default:

1187

default:

1188

;

1188

;

1189

}

1189

}

1190

1191

get_rq:

1191

get_rq:

1192

/*

1192

/*

1193

* This sync check and mask will be re-done in init_request_from_bio(),

1193

* This sync check and mask will be re-done in init_request_from_bio(),

1194

* but we need to set it earlier to expose the sync flag to the

1194

* but we need to set it earlier to expose the sync flag to the

1195

* rq allocator and io schedulers.

1195

* rq allocator and io schedulers.

1196

*/

1196

*/

1197

rw_flags = bio_data_dir(bio);

1197

rw_flags = bio_data_dir(bio);

1198

if (sync)

1198

if (sync)

1199

rw_flags |= REQ_RW_SYNC;

1199

rw_flags |= REQ_RW_SYNC;

1200

1201

/*

1201

/*

1202

* Grab a free request. This is might sleep but can not fail.

1202

* Grab a free request. This is might sleep but can not fail.

1203

* Returns with the queue unlocked.

1203

* Returns with the queue unlocked.

1204

*/

1204

*/

1205

req = get_request_wait(q, rw_flags, bio);

1205

req = get_request_wait(q, rw_flags, bio);

1206

1207

/*

1207

/*

1208

* After dropping the lock and possibly sleeping here, our request

1208

* After dropping the lock and possibly sleeping here, our request

1209

* may now be mergeable after it had proven unmergeable (above).

1209

* may now be mergeable after it had proven unmergeable (above).

1210

* We don't worry about that case for efficiency. It won't happen

1210

* We don't worry about that case for efficiency. It won't happen

1211

* often, and the elevators are able to handle it.

1211

* often, and the elevators are able to handle it.

1212

*/

1212

*/

1213

init_request_from_bio(req, bio);

1213

init_request_from_bio(req, bio);

1214

1215

spin_lock_irq(q->queue_lock);

1215

spin_lock_irq(q->queue_lock);

1216

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1216

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1217

bio_flagged(bio, BIO_CPU_AFFINE))

1217

bio_flagged(bio, BIO_CPU_AFFINE))

1218

req->cpu = blk_cpu_to_group(smp_processor_id());

1218

req->cpu = blk_cpu_to_group(smp_processor_id());

1219

if (elv_queue_empty(q))

1219

if (elv_queue_empty(q))

1220

blk_plug_device(q);

1220

blk_plug_device(q);

1221

add_request(q, req);

1221

add_request(q, req);

1222

out:

1222

out:

1223

if (sync)

1223

if (sync)

1224

__generic_unplug_device(q);

1224

__generic_unplug_device(q);

1225

spin_unlock_irq(q->queue_lock);

1225

spin_unlock_irq(q->queue_lock);

1226

return 0;

1226

return 0;

1227

1228

end_io:

1228

end_io:

1229

bio_endio(bio, err);

1229

bio_endio(bio, err);

1230

return 0;

1230

return 0;

1231

}

1231

}

1232

1233

/*

1233

/*

1234

* If bio->bi_dev is a partition, remap the location

1234

* If bio->bi_dev is a partition, remap the location

1235

*/

1235

*/

1236

static inline void blk_partition_remap(struct bio *bio)

1236

static inline void blk_partition_remap(struct bio *bio)

1237

{

1237

{

1238

struct block_device *bdev = bio->bi_bdev;

1238

struct block_device *bdev = bio->bi_bdev;

1239

1240

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1240

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1241

struct hd_struct *p = bdev->bd_part;

1241

struct hd_struct *p = bdev->bd_part;

1242

1243

bio->bi_sector += p->start_sect;

1243

bio->bi_sector += p->start_sect;

1244

bio->bi_bdev = bdev->bd_contains;

1244

bio->bi_bdev = bdev->bd_contains;

1245

1246

blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,

1246

blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,

1247

bdev->bd_dev, bio->bi_sector,

1247

bdev->bd_dev, bio->bi_sector,

1248

bio->bi_sector - p->start_sect);

1248

bio->bi_sector - p->start_sect);

1249

}

1249

}

1250

}

1250

}

1251

1252

static void handle_bad_sector(struct bio *bio)

1252

static void handle_bad_sector(struct bio *bio)

1253

{

1253

{

1254

char b[BDEVNAME_SIZE];

1254

char b[BDEVNAME_SIZE];

1255

1256

printk(KERN_INFO "attempt to access beyond end of device\n");

1256

printk(KERN_INFO "attempt to access beyond end of device\n");

1257

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1257

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1258

bdevname(bio->bi_bdev, b),

1258

bdevname(bio->bi_bdev, b),

1259

bio->bi_rw,

1259

bio->bi_rw,

1260

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1260

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1261

(long long)(bio->bi_bdev->bd_inode->i_size >> 9));

1261

(long long)(bio->bi_bdev->bd_inode->i_size >> 9));

1262

1263

set_bit(BIO_EOF, &bio->bi_flags);

1263

set_bit(BIO_EOF, &bio->bi_flags);

1264

}

1264

}

1265

1266

#ifdef CONFIG_FAIL_MAKE_REQUEST

1266

#ifdef CONFIG_FAIL_MAKE_REQUEST

1267

1268

static DECLARE_FAULT_ATTR(fail_make_request);

1268

static DECLARE_FAULT_ATTR(fail_make_request);

1269

1270

static int __init setup_fail_make_request(char *str)

1270

static int __init setup_fail_make_request(char *str)

1271

{

1271

{

1272

return setup_fault_attr(&fail_make_request, str);

1272

return setup_fault_attr(&fail_make_request, str);

1273

}

1273

}

1274

__setup("fail_make_request=", setup_fail_make_request);

1274

__setup("fail_make_request=", setup_fail_make_request);

1275

1276

static int should_fail_request(struct bio *bio)

1276

static int should_fail_request(struct bio *bio)

1277

{

1277

{

1278

struct hd_struct *part = bio->bi_bdev->bd_part;

1278

struct hd_struct *part = bio->bi_bdev->bd_part;

1279

1280

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1280

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1281

return should_fail(&fail_make_request, bio->bi_size);

1281

return should_fail(&fail_make_request, bio->bi_size);

1282

1283

return 0;

1283

return 0;

1284

}

1284

}

1285

1286

static int __init fail_make_request_debugfs(void)

1286

static int __init fail_make_request_debugfs(void)

1287

{

1287

{

1288

return init_fault_attr_dentries(&fail_make_request,

1288

return init_fault_attr_dentries(&fail_make_request,

1289

"fail_make_request");

1289

"fail_make_request");

1290

}

1290

}

1291

1292

late_initcall(fail_make_request_debugfs);

1292

late_initcall(fail_make_request_debugfs);

1293

1294

#else /* CONFIG_FAIL_MAKE_REQUEST */

1294

#else /* CONFIG_FAIL_MAKE_REQUEST */

1295

1296

static inline int should_fail_request(struct bio *bio)

1296

static inline int should_fail_request(struct bio *bio)

1297

{

1297

{

1298

return 0;

1298

return 0;

1299

}

1299

}

1300

1301

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1301

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1302

1303

/*

1303

/*

1304

* Check whether this bio extends beyond the end of the device.

1304

* Check whether this bio extends beyond the end of the device.

1305

*/

1305

*/

1306

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1306

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1307

{

1307

{

1308

sector_t maxsector;

1308

sector_t maxsector;

1309

1310

if (!nr_sectors)

1310

if (!nr_sectors)

1311

return 0;

1311

return 0;

1312

1313

/* Test device or partition size, when known. */

1313

/* Test device or partition size, when known. */

1314

maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

1314

maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

1315

if (maxsector) {

1315

if (maxsector) {

1316

sector_t sector = bio->bi_sector;

1316

sector_t sector = bio->bi_sector;

1317

1318

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1318

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1319

/*

1319

/*

1320

* This may well happen - the kernel calls bread()

1320

* This may well happen - the kernel calls bread()

1321

* without checking the size of the device, e.g., when

1321

* without checking the size of the device, e.g., when

1322

* mounting a device.

1322

* mounting a device.

1323

*/

1323

*/

1324

handle_bad_sector(bio);

1324

handle_bad_sector(bio);

1325

return 1;

1325

return 1;

1326

}

1326

}

1327

}

1327

}

1328

1329

return 0;

1329

return 0;

1330

}

1330

}

1331

1332

/**

1332

/**

1333

* generic_make_request - hand a buffer to its device driver for I/O

1333

* generic_make_request - hand a buffer to its device driver for I/O

1334

* @bio: The bio describing the location in memory and on the device.

1334

* @bio: The bio describing the location in memory and on the device.

1335

*

1335

*

1336

* generic_make_request() is used to make I/O requests of block

1336

* generic_make_request() is used to make I/O requests of block

1337

* devices. It is passed a &struct bio, which describes the I/O that needs

1337

* devices. It is passed a &struct bio, which describes the I/O that needs

1338

* to be done.

1338

* to be done.

1339

*

1339

*

1340

* generic_make_request() does not return any status. The

1340

* generic_make_request() does not return any status. The

1341

* success/failure status of the request, along with notification of

1341

* success/failure status of the request, along with notification of

1342

* completion, is delivered asynchronously through the bio->bi_end_io

1342

* completion, is delivered asynchronously through the bio->bi_end_io

1343

* function described (one day) else where.

1343

* function described (one day) else where.

1344

*

1344

*

1345

* The caller of generic_make_request must make sure that bi_io_vec

1345

* The caller of generic_make_request must make sure that bi_io_vec

1346

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1346

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1347

* set to describe the device address, and the

1347

* set to describe the device address, and the

1348

* bi_end_io and optionally bi_private are set to describe how

1348

* bi_end_io and optionally bi_private are set to describe how

1349

* completion notification should be signaled.

1349

* completion notification should be signaled.

1350

*

1350

*

1351

* generic_make_request and the drivers it calls may use bi_next if this

1351

* generic_make_request and the drivers it calls may use bi_next if this

1352

* bio happens to be merged with someone else, and may change bi_dev and

1352

* bio happens to be merged with someone else, and may change bi_dev and

1353

* bi_sector for remaps as it sees fit. So the values of these fields

1353

* bi_sector for remaps as it sees fit. So the values of these fields

1354

* should NOT be depended on after the call to generic_make_request.

1354

* should NOT be depended on after the call to generic_make_request.

1355

*/

1355

*/

1356

static inline void __generic_make_request(struct bio *bio)

1356

static inline void __generic_make_request(struct bio *bio)

1357

{

1357

{

1358

struct request_queue *q;

1358

struct request_queue *q;

1359

sector_t old_sector;

1359

sector_t old_sector;

1360

int ret, nr_sectors = bio_sectors(bio);

1360

int ret, nr_sectors = bio_sectors(bio);

1361

dev_t old_dev;

1361

dev_t old_dev;

1362

int err = -EIO;

1362

int err = -EIO;

1363

1364

might_sleep();

1364

might_sleep();

1365

1366

if (bio_check_eod(bio, nr_sectors))

1366

if (bio_check_eod(bio, nr_sectors))

1367

goto end_io;

1367

goto end_io;

1368

1369

/*

1369

/*

1370

* Resolve the mapping until finished. (drivers are

1370

* Resolve the mapping until finished. (drivers are

1371

* still free to implement/resolve their own stacking

1371

* still free to implement/resolve their own stacking

1372

* by explicitly returning 0)

1372

* by explicitly returning 0)

1373

*

1373

*

1374

* NOTE: we don't repeat the blk_size check for each new device.

1374

* NOTE: we don't repeat the blk_size check for each new device.

1375

* Stacking drivers are expected to know what they are doing.

1375

* Stacking drivers are expected to know what they are doing.

1376

*/

1376

*/

1377

old_sector = -1;

1377

old_sector = -1;

1378

old_dev = 0;

1378

old_dev = 0;

1379

do {

1379

do {

1380

char b[BDEVNAME_SIZE];

1380

char b[BDEVNAME_SIZE];

1381

1382

q = bdev_get_queue(bio->bi_bdev);

1382

q = bdev_get_queue(bio->bi_bdev);

1383

if (!q) {

1383

if (!q) {

1384

printk(KERN_ERR

1384

printk(KERN_ERR

1385

"generic_make_request: Trying to access "

1385

"generic_make_request: Trying to access "

1386

"nonexistent block-device %s (%Lu)\n",

1386

"nonexistent block-device %s (%Lu)\n",

1387

bdevname(bio->bi_bdev, b),

1387

bdevname(bio->bi_bdev, b),

1388

(long long) bio->bi_sector);

1388

(long long) bio->bi_sector);

1389

end_io:

1389

end_io:

1390

bio_endio(bio, err);

1390

bio_endio(bio, err);

1391

break;

1391

break;

1392

}

1392

}

1393

1394

if (unlikely(nr_sectors > q->max_hw_sectors)) {

1394

if (unlikely(nr_sectors > q->max_hw_sectors)) {

1395

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1395

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1396

bdevname(bio->bi_bdev, b),

1396

bdevname(bio->bi_bdev, b),

1397

bio_sectors(bio),

1397

bio_sectors(bio),

1398

q->max_hw_sectors);

1398

q->max_hw_sectors);

1399

goto end_io;

1399

goto end_io;

1400

}

1400

}

1401

1402

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1402

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1403

goto end_io;

1403

goto end_io;

1404

1405

if (should_fail_request(bio))

1405

if (should_fail_request(bio))

1406

goto end_io;

1406

goto end_io;

1407

1408

/*

1408

/*

1409

* If this device has partitions, remap block n

1409

* If this device has partitions, remap block n

1410

* of partition p to block n+start(p) of the disk.

1410

* of partition p to block n+start(p) of the disk.

1411

*/

1411

*/

1412

blk_partition_remap(bio);

1412

blk_partition_remap(bio);

1413

1414

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1414

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1415

goto end_io;

1415

goto end_io;

1416

1417

if (old_sector != -1)

1417

if (old_sector != -1)

1418

blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,

1418

blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,

1419

old_sector);

1419

old_sector);

1420

1421

blk_add_trace_bio(q, bio, BLK_TA_QUEUE);

1421

blk_add_trace_bio(q, bio, BLK_TA_QUEUE);

1422

1423

old_sector = bio->bi_sector;

1423

old_sector = bio->bi_sector;

1424

old_dev = bio->bi_bdev->bd_dev;

1424

old_dev = bio->bi_bdev->bd_dev;

1425

1426

if (bio_check_eod(bio, nr_sectors))

1426

if (bio_check_eod(bio, nr_sectors))

1427

goto end_io;

1427

goto end_io;

1428

if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||

1428

if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||

1429

(bio_discard(bio) && !q->prepare_discard_fn)) {

1429

(bio_discard(bio) && !q->prepare_discard_fn)) {

1430

err = -EOPNOTSUPP;

1430

err = -EOPNOTSUPP;

1431

goto end_io;

1431

goto end_io;

1432

}

1432

}

1433

1434

ret = q->make_request_fn(q, bio);

1434

ret = q->make_request_fn(q, bio);

1435

} while (ret);

1435

} while (ret);

1436

}

1436

}

1437

1438

/*

1438

/*

1439

* We only want one ->make_request_fn to be active at a time,

1439

* We only want one ->make_request_fn to be active at a time,

1440

* else stack usage with stacked devices could be a problem.

1440

* else stack usage with stacked devices could be a problem.

1441

* So use current->bio_{list,tail} to keep a list of requests

1441

* So use current->bio_{list,tail} to keep a list of requests

1442

* submited by a make_request_fn function.

1442

* submited by a make_request_fn function.

1443

* current->bio_tail is also used as a flag to say if

1443

* current->bio_tail is also used as a flag to say if

1444

* generic_make_request is currently active in this task or not.

1444

* generic_make_request is currently active in this task or not.

1445

* If it is NULL, then no make_request is active. If it is non-NULL,

1445

* If it is NULL, then no make_request is active. If it is non-NULL,

1446

* then a make_request is active, and new requests should be added

1446

* then a make_request is active, and new requests should be added

1447

* at the tail

1447

* at the tail

1448

*/

1448

*/

1449

void generic_make_request(struct bio *bio)

1449

void generic_make_request(struct bio *bio)

1450

{

1450

{

1451

if (current->bio_tail) {

1451

if (current->bio_tail) {

1452

/* make_request is active */

1452

/* make_request is active */

1453

*(current->bio_tail) = bio;

1453

*(current->bio_tail) = bio;

1454

bio->bi_next = NULL;

1454

bio->bi_next = NULL;

1455

current->bio_tail = &bio->bi_next;

1455

current->bio_tail = &bio->bi_next;

1456

return;

1456

return;

1457

}

1457

}

1458

/* following loop may be a bit non-obvious, and so deserves some

1458

/* following loop may be a bit non-obvious, and so deserves some

1459

* explanation.

1459

* explanation.

1460

* Before entering the loop, bio->bi_next is NULL (as all callers

1460

* Before entering the loop, bio->bi_next is NULL (as all callers

1461

* ensure that) so we have a list with a single bio.

1461

* ensure that) so we have a list with a single bio.

1462

* We pretend that we have just taken it off a longer list, so

1462

* We pretend that we have just taken it off a longer list, so

1463

* we assign bio_list to the next (which is NULL) and bio_tail

1463

* we assign bio_list to the next (which is NULL) and bio_tail

1464

* to &bio_list, thus initialising the bio_list of new bios to be

1464

* to &bio_list, thus initialising the bio_list of new bios to be

1465

* added. __generic_make_request may indeed add some more bios

1465

* added. __generic_make_request may indeed add some more bios

1466

* through a recursive call to generic_make_request. If it

1466

* through a recursive call to generic_make_request. If it

1467

* did, we find a non-NULL value in bio_list and re-enter the loop

1467

* did, we find a non-NULL value in bio_list and re-enter the loop

1468

* from the top. In this case we really did just take the bio

1468

* from the top. In this case we really did just take the bio

1469

* of the top of the list (no pretending) and so fixup bio_list and

1469

* of the top of the list (no pretending) and so fixup bio_list and

1470

* bio_tail or bi_next, and call into __generic_make_request again.

1470

* bio_tail or bi_next, and call into __generic_make_request again.

1471

*

1471

*

1472

* The loop was structured like this to make only one call to

1472

* The loop was structured like this to make only one call to

1473

* __generic_make_request (which is important as it is large and

1473

* __generic_make_request (which is important as it is large and

1474

* inlined) and to keep the structure simple.

1474

* inlined) and to keep the structure simple.

1475

*/

1475

*/

1476

BUG_ON(bio->bi_next);

1476

BUG_ON(bio->bi_next);

1477

do {

1477

do {

1478

current->bio_list = bio->bi_next;

1478

current->bio_list = bio->bi_next;

1479

if (bio->bi_next == NULL)

1479

if (bio->bi_next == NULL)

1480

current->bio_tail = &current->bio_list;

1480

current->bio_tail = &current->bio_list;

1481

else

1481

else

1482

bio->bi_next = NULL;

1482

bio->bi_next = NULL;

1483

__generic_make_request(bio);

1483

__generic_make_request(bio);

1484

bio = current->bio_list;

1484

bio = current->bio_list;

1485

} while (bio);

1485

} while (bio);

1486

current->bio_tail = NULL; /* deactivate */

1486

current->bio_tail = NULL; /* deactivate */

1487

}

1487

}

1488

EXPORT_SYMBOL(generic_make_request);

1488

EXPORT_SYMBOL(generic_make_request);

1489

1490

/**

1490

/**

1491

* submit_bio - submit a bio to the block device layer for I/O

1491

* submit_bio - submit a bio to the block device layer for I/O

1492

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1492

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1493

* @bio: The &struct bio which describes the I/O

1493

* @bio: The &struct bio which describes the I/O

1494

*

1494

*

1495

* submit_bio() is very similar in purpose to generic_make_request(), and

1495

* submit_bio() is very similar in purpose to generic_make_request(), and

1496

* uses that function to do most of the work. Both are fairly rough

1496

* uses that function to do most of the work. Both are fairly rough

1497

* interfaces; @bio must be presetup and ready for I/O.

1497

* interfaces; @bio must be presetup and ready for I/O.

1498

*

1498

*

1499

*/

1499

*/

1500

void submit_bio(int rw, struct bio *bio)

1500

void submit_bio(int rw, struct bio *bio)

1501

{

1501

{

1502

int count = bio_sectors(bio);

1502

int count = bio_sectors(bio);

1503

1504

bio->bi_rw |= rw;

1504

bio->bi_rw |= rw;

1505

1506

/*

1506

/*

1507

* If it's a regular read/write or a barrier with data attached,

1507

* If it's a regular read/write or a barrier with data attached,

1508

* go through the normal accounting stuff before submission.

1508

* go through the normal accounting stuff before submission.

1509

*/

1509

*/

1510

if (bio_has_data(bio)) {

1510

if (bio_has_data(bio)) {

1511

if (rw & WRITE) {

1511

if (rw & WRITE) {

1512

count_vm_events(PGPGOUT, count);

1512

count_vm_events(PGPGOUT, count);

1513

} else {

1513

} else {

1514

task_io_account_read(bio->bi_size);

1514

task_io_account_read(bio->bi_size);

1515

count_vm_events(PGPGIN, count);

1515

count_vm_events(PGPGIN, count);

1516

}

1516

}

1517

1518

if (unlikely(block_dump)) {

1518

if (unlikely(block_dump)) {

1519

char b[BDEVNAME_SIZE];

1519

char b[BDEVNAME_SIZE];

1520

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",

1520

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",

1521

current->comm, task_pid_nr(current),

1521

current->comm, task_pid_nr(current),

1522

(rw & WRITE) ? "WRITE" : "READ",

1522

(rw & WRITE) ? "WRITE" : "READ",

1523

(unsigned long long)bio->bi_sector,

1523

(unsigned long long)bio->bi_sector,

1524

bdevname(bio->bi_bdev, b));

1524

bdevname(bio->bi_bdev, b));

1525

}

1525

}

1526

}

1526

}

1527

1528

generic_make_request(bio);

1528

generic_make_request(bio);

1529

}

1529

}

1530

EXPORT_SYMBOL(submit_bio);

1530

EXPORT_SYMBOL(submit_bio);

1531

1532

/**

1532

/**

1533

* __end_that_request_first - end I/O on a request

1533

* __end_that_request_first - end I/O on a request

1534

* @req: the request being processed

1534

* @req: the request being processed

1535

* @error: %0 for success, < %0 for error

1535

* @error: %0 for success, < %0 for error

1536

* @nr_bytes: number of bytes to complete

1536

* @nr_bytes: number of bytes to complete

1537

*

1537

*

1538

* Description:

1538

* Description:

1539

* Ends I/O on a number of bytes attached to @req, and sets it up

1539

* Ends I/O on a number of bytes attached to @req, and sets it up

1540

* for the next range of segments (if any) in the cluster.

1540

* for the next range of segments (if any) in the cluster.

1541

*

1541

*

1542

* Return:

1542

* Return:

1543

* %0 - we are done with this request, call end_that_request_last()

1543

* %0 - we are done with this request, call end_that_request_last()

1544

* %1 - still buffers pending for this request

1544

* %1 - still buffers pending for this request

1545

**/

1545

**/

1546

static int __end_that_request_first(struct request *req, int error,

1546

static int __end_that_request_first(struct request *req, int error,

1547

int nr_bytes)

1547

int nr_bytes)

1548

{

1548

{

1549

int total_bytes, bio_nbytes, next_idx = 0;

1549

int total_bytes, bio_nbytes, next_idx = 0;

1550

struct bio *bio;

1550

struct bio *bio;

1551

1552

blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);

1552

blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);

1553

1554

/*

1554

/*

1555

* for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual

1555

* for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual

1556

* sense key with us all the way through

1556

* sense key with us all the way through

1557

*/

1557

*/

1558

if (!blk_pc_request(req))

1558

if (!blk_pc_request(req))

1559

req->errors = 0;

1559

req->errors = 0;

1560

1561

if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {

1561

if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {

1562

printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",

1562

printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",

1563

req->rq_disk ? req->rq_disk->disk_name : "?",

1563

req->rq_disk ? req->rq_disk->disk_name : "?",

1564

(unsigned long long)req->sector);

1564

(unsigned long long)req->sector);

1565

}

1565

}

1566

1567

if (blk_fs_request(req) && req->rq_disk) {

1567

if (blk_fs_request(req) && req->rq_disk) {

1568

const int rw = rq_data_dir(req);

1568

const int rw = rq_data_dir(req);

1569

struct hd_struct *part;

1569

struct hd_struct *part;

1570

int cpu;

1570

int cpu;

1571

1572

cpu = part_stat_lock();

1572

cpu = part_stat_lock();

1573

part = disk_map_sector_rcu(req->rq_disk, req->sector);

1573

part = disk_map_sector_rcu(req->rq_disk, req->sector);

1574

part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);

1574

part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);

1575

part_stat_unlock();

1575

part_stat_unlock();

1576

}

1576

}

1577

1578

total_bytes = bio_nbytes = 0;

1578

total_bytes = bio_nbytes = 0;

1579

while ((bio = req->bio) != NULL) {

1579

while ((bio = req->bio) != NULL) {

1580

int nbytes;

1580

int nbytes;

1581

1582

/*

1582

/*

1583

* For an empty barrier request, the low level driver must

1583

* For an empty barrier request, the low level driver must

1584

* store a potential error location in ->sector. We pass

1584

* store a potential error location in ->sector. We pass

1585

* that back up in ->bi_sector.

1585

* that back up in ->bi_sector.

1586

*/

1586

*/

1587

if (blk_empty_barrier(req))

1587

if (blk_empty_barrier(req))

1588

bio->bi_sector = req->sector;

1588

bio->bi_sector = req->sector;

1589

1590

if (nr_bytes >= bio->bi_size) {

1590

if (nr_bytes >= bio->bi_size) {

1591

req->bio = bio->bi_next;

1591

req->bio = bio->bi_next;

1592

nbytes = bio->bi_size;

1592

nbytes = bio->bi_size;

1593

req_bio_endio(req, bio, nbytes, error);

1593

req_bio_endio(req, bio, nbytes, error);

1594

next_idx = 0;

1594

next_idx = 0;

1595

bio_nbytes = 0;

1595

bio_nbytes = 0;

1596

} else {

1596

} else {

1597

int idx = bio->bi_idx + next_idx;

1597

int idx = bio->bi_idx + next_idx;

1598

1599

if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {

1599

if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {

1600

blk_dump_rq_flags(req, "__end_that");

1600

blk_dump_rq_flags(req, "__end_that");

1601

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

1601

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

1602

__func__, bio->bi_idx, bio->bi_vcnt);

1602

__func__, bio->bi_idx, bio->bi_vcnt);

1603

break;

1603

break;

1604

}

1604

}

1605

1606

nbytes = bio_iovec_idx(bio, idx)->bv_len;

1606

nbytes = bio_iovec_idx(bio, idx)->bv_len;

1607

BIO_BUG_ON(nbytes > bio->bi_size);

1607

BIO_BUG_ON(nbytes > bio->bi_size);

1608

1609

/*

1609

/*

1610

* not a complete bvec done

1610

* not a complete bvec done

1611

*/

1611

*/

1612

if (unlikely(nbytes > nr_bytes)) {

1612

if (unlikely(nbytes > nr_bytes)) {

1613

bio_nbytes += nr_bytes;

1613

bio_nbytes += nr_bytes;

1614

total_bytes += nr_bytes;

1614

total_bytes += nr_bytes;

1615

break;

1615

break;

1616

}

1616

}

1617

1618

/*

1618

/*

1619

* advance to the next vector

1619

* advance to the next vector

1620

*/

1620

*/

1621

next_idx++;

1621

next_idx++;

1622

bio_nbytes += nbytes;

1622

bio_nbytes += nbytes;

1623

}

1623

}

1624

1625

total_bytes += nbytes;

1625

total_bytes += nbytes;

1626

nr_bytes -= nbytes;

1626

nr_bytes -= nbytes;

1627

1628

bio = req->bio;

1628

bio = req->bio;

1629

if (bio) {

1629

if (bio) {

1630

/*

1630

/*

1631

* end more in this run, or just return 'not-done'

1631

* end more in this run, or just return 'not-done'

1632

*/

1632

*/

1633

if (unlikely(nr_bytes <= 0))

1633

if (unlikely(nr_bytes <= 0))

1634

break;

1634

break;

1635

}

1635

}

1636

}

1636

}

1637

1638

/*

1638

/*

1639

* completely done

1639

* completely done

1640

*/

1640

*/

1641

if (!req->bio)

1641

if (!req->bio)

1642

return 0;

1642

return 0;

1643

1644

/*

1644

/*

1645

* if the request wasn't completed, update state

1645

* if the request wasn't completed, update state

1646

*/

1646

*/

1647

if (bio_nbytes) {

1647

if (bio_nbytes) {

1648

req_bio_endio(req, bio, bio_nbytes, error);

1648

req_bio_endio(req, bio, bio_nbytes, error);

1649

bio->bi_idx += next_idx;

1649

bio->bi_idx += next_idx;

1650

bio_iovec(bio)->bv_offset += nr_bytes;

1650

bio_iovec(bio)->bv_offset += nr_bytes;

1651

bio_iovec(bio)->bv_len -= nr_bytes;

1651

bio_iovec(bio)->bv_len -= nr_bytes;

1652

}

1652

}

1653

1654

blk_recalc_rq_sectors(req, total_bytes >> 9);

1654

blk_recalc_rq_sectors(req, total_bytes >> 9);

1655

blk_recalc_rq_segments(req);

1655

blk_recalc_rq_segments(req);

1656

return 1;

1656

return 1;

1657

}

1657

}

1658

1659

/*

1659

/*

1660

* queue lock must be held

1660

* queue lock must be held

1661

*/

1661

*/

1662

static void end_that_request_last(struct request *req, int error)

1662

static void end_that_request_last(struct request *req, int error)

1663

{

1663

{

1664

struct gendisk *disk = req->rq_disk;

1664

struct gendisk *disk = req->rq_disk;

1665

1666

blk_delete_timer(req);

1666

blk_delete_timer(req);

1667

1668

if (blk_rq_tagged(req))

1668

if (blk_rq_tagged(req))

1669

blk_queue_end_tag(req->q, req);

1669

blk_queue_end_tag(req->q, req);

1670

1671

if (blk_queued_rq(req))

1671

if (blk_queued_rq(req))

1672

blkdev_dequeue_request(req);

1672

blkdev_dequeue_request(req);

1673

1674

if (unlikely(laptop_mode) && blk_fs_request(req))

1674

if (unlikely(laptop_mode) && blk_fs_request(req))

1675

laptop_io_completion();

1675

laptop_io_completion();

1676

1677

/*

1677

/*

1678

* Account IO completion. bar_rq isn't accounted as a normal

1678

* Account IO completion. bar_rq isn't accounted as a normal

1679

* IO on queueing nor completion. Accounting the containing

1679

* IO on queueing nor completion. Accounting the containing

1680

* request is enough.

1680

* request is enough.

1681

*/

1681

*/

1682

if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {

1682

if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {

1683

unsigned long duration = jiffies - req->start_time;

1683

unsigned long duration = jiffies - req->start_time;

1684

const int rw = rq_data_dir(req);

1684

const int rw = rq_data_dir(req);

1685

struct hd_struct *part;

1685

struct hd_struct *part;

1686

int cpu;

1686

int cpu;

1687

1688

cpu = part_stat_lock();

1688

cpu = part_stat_lock();

1689

part = disk_map_sector_rcu(disk, req->sector);

1689

part = disk_map_sector_rcu(disk, req->sector);

1690

1691

part_stat_inc(cpu, part, ios[rw]);

1691

part_stat_inc(cpu, part, ios[rw]);

1692

part_stat_add(cpu, part, ticks[rw], duration);

1692

part_stat_add(cpu, part, ticks[rw], duration);

1693

part_round_stats(cpu, part);

1693

part_round_stats(cpu, part);

1694

part_dec_in_flight(part);

1694

part_dec_in_flight(part);

1695

1696

part_stat_unlock();

1696

part_stat_unlock();

1697

}

1697

}

1698

1699

if (req->end_io)

1699

if (req->end_io)

1700

req->end_io(req, error);

1700

req->end_io(req, error);

1701

else {

1701

else {

1702

if (blk_bidi_rq(req))

1702

if (blk_bidi_rq(req))

1703

__blk_put_request(req->next_rq->q, req->next_rq);

1703

__blk_put_request(req->next_rq->q, req->next_rq);

1704

1705

__blk_put_request(req->q, req);

1705

__blk_put_request(req->q, req);

1706

}

1706

}

1707

}

1707

}

1708

1709

static inline void __end_request(struct request *rq, int uptodate,

1709

static inline void __end_request(struct request *rq, int uptodate,

1710

unsigned int nr_bytes)

1710

unsigned int nr_bytes)

1711

{

1711

{

1712

int error = 0;

1712

int error = 0;

1713

1714

if (uptodate <= 0)

1714

if (uptodate <= 0)

1715

error = uptodate ? uptodate : -EIO;

1715

error = uptodate ? uptodate : -EIO;

1716

1717

__blk_end_request(rq, error, nr_bytes);

1717

__blk_end_request(rq, error, nr_bytes);

1718

}

1718

}

1719

1720

/**

1720

/**

1721

* blk_rq_bytes - Returns bytes left to complete in the entire request

1721

* blk_rq_bytes - Returns bytes left to complete in the entire request

1722

* @rq: the request being processed

1722

* @rq: the request being processed

1723

**/

1723

**/

1724

unsigned int blk_rq_bytes(struct request *rq)

1724

unsigned int blk_rq_bytes(struct request *rq)

1725

{

1725

{

1726

if (blk_fs_request(rq))

1726

if (blk_fs_request(rq))

1727

return rq->hard_nr_sectors << 9;

1727

return rq->hard_nr_sectors << 9;

1728

1729

return rq->data_len;

1729

return rq->data_len;

1730

}

1730

}

1731

EXPORT_SYMBOL_GPL(blk_rq_bytes);

1731

EXPORT_SYMBOL_GPL(blk_rq_bytes);

1732

1733

/**

1733

/**

1734

* blk_rq_cur_bytes - Returns bytes left to complete in the current segment

1734

* blk_rq_cur_bytes - Returns bytes left to complete in the current segment

1735

* @rq: the request being processed

1735

* @rq: the request being processed

1736

**/

1736

**/

1737

unsigned int blk_rq_cur_bytes(struct request *rq)

1737

unsigned int blk_rq_cur_bytes(struct request *rq)

1738

{

1738

{

1739

if (blk_fs_request(rq))

1739

if (blk_fs_request(rq))

1740

return rq->current_nr_sectors << 9;

1740

return rq->current_nr_sectors << 9;

1741

1742

if (rq->bio)

1742

if (rq->bio)

1743

return rq->bio->bi_size;

1743

return rq->bio->bi_size;

1744

1745

return rq->data_len;

1745

return rq->data_len;

1746

}

1746

}

1747

EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);

1747

EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);

1748

1749

/**

1749

/**

1750

* end_queued_request - end all I/O on a queued request

1750

* end_queued_request - end all I/O on a queued request

1751

* @rq: the request being processed

1751

* @rq: the request being processed

1752

* @uptodate: error value or %0/%1 uptodate flag

1752

* @uptodate: error value or %0/%1 uptodate flag

1753

*

1753

*

1754

* Description:

1754

* Description:

1755

* Ends all I/O on a request, and removes it from the block layer queues.

1755

* Ends all I/O on a request, and removes it from the block layer queues.

1756

* Not suitable for normal I/O completion, unless the driver still has

1756

* Not suitable for normal I/O completion, unless the driver still has

1757

* the request attached to the block layer.

1757

* the request attached to the block layer.

1758

*

1758

*

1759

**/

1759

**/

1760

void end_queued_request(struct request *rq, int uptodate)

1760

void end_queued_request(struct request *rq, int uptodate)

1761

{

1761

{

1762

__end_request(rq, uptodate, blk_rq_bytes(rq));

1762

__end_request(rq, uptodate, blk_rq_bytes(rq));

1763

}

1763

}

1764

EXPORT_SYMBOL(end_queued_request);

1764

EXPORT_SYMBOL(end_queued_request);

1765

1766

/**

1766

/**

1767

* end_dequeued_request - end all I/O on a dequeued request

1767

* end_dequeued_request - end all I/O on a dequeued request

1768

* @rq: the request being processed

1768

* @rq: the request being processed

1769

* @uptodate: error value or %0/%1 uptodate flag

1769

* @uptodate: error value or %0/%1 uptodate flag

1770

*

1770

*

1771

* Description:

1771

* Description:

1772

* Ends all I/O on a request. The request must already have been

1772

* Ends all I/O on a request. The request must already have been

1773

* dequeued using blkdev_dequeue_request(), as is normally the case

1773

* dequeued using blkdev_dequeue_request(), as is normally the case

1774

* for most drivers.

1774

* for most drivers.

1775

*

1775

*

1776

**/

1776

**/

1777

void end_dequeued_request(struct request *rq, int uptodate)

1777

void end_dequeued_request(struct request *rq, int uptodate)

1778

{

1778

{

1779

__end_request(rq, uptodate, blk_rq_bytes(rq));

1779

__end_request(rq, uptodate, blk_rq_bytes(rq));

1780

}

1780

}

1781

EXPORT_SYMBOL(end_dequeued_request);

1781

EXPORT_SYMBOL(end_dequeued_request);

1782

1783

1784

/**

1784

/**

1785

* end_request - end I/O on the current segment of the request

1785

* end_request - end I/O on the current segment of the request

1786

* @req: the request being processed

1786

* @req: the request being processed

1787

* @uptodate: error value or %0/%1 uptodate flag

1787

* @uptodate: error value or %0/%1 uptodate flag

1788

*

1788

*

1789

* Description:

1789

* Description:

1790

* Ends I/O on the current segment of a request. If that is the only

1790

* Ends I/O on the current segment of a request. If that is the only

1791

* remaining segment, the request is also completed and freed.

1791

* remaining segment, the request is also completed and freed.

1792

*

1792

*

1793

* This is a remnant of how older block drivers handled I/O completions.

1793

* This is a remnant of how older block drivers handled I/O completions.

1794

* Modern drivers typically end I/O on the full request in one go, unless

1794

* Modern drivers typically end I/O on the full request in one go, unless

1795

* they have a residual value to account for. For that case this function

1795

* they have a residual value to account for. For that case this function

1796

* isn't really useful, unless the residual just happens to be the

1796

* isn't really useful, unless the residual just happens to be the

1797

* full current segment. In other words, don't use this function in new

1797

* full current segment. In other words, don't use this function in new

1798

* code. Use blk_end_request() or __blk_end_request() to end partial parts

1798

* code. Use blk_end_request() or __blk_end_request() to end partial parts

1799

* of a request, or end_dequeued_request() and end_queued_request() to

1799

* of a request, or end_dequeued_request() and end_queued_request() to

1800

* completely end IO on a dequeued/queued request.

1800

* completely end IO on a dequeued/queued request.

1801

*

1801

*

1802

**/

1802

**/

1803

void end_request(struct request *req, int uptodate)

1803

void end_request(struct request *req, int uptodate)

1804

{

1804

{

1805

__end_request(req, uptodate, req->hard_cur_sectors << 9);

1805

__end_request(req, uptodate, req->hard_cur_sectors << 9);

1806

}

1806

}

1807

EXPORT_SYMBOL(end_request);

1807

EXPORT_SYMBOL(end_request);

1808

1809

static int end_that_request_data(struct request *rq, int error,

1810

unsigned int nr_bytes, unsigned int bidi_bytes)

1811

{

1812

if (rq->bio) {

1813

if (__end_that_request_first(rq, error, nr_bytes))

1814

return 1;

1815

1816

/* Bidi request must be completed as a whole */

1817

if (blk_bidi_rq(rq) &&

1818

__end_that_request_first(rq->next_rq, error, bidi_bytes))

1819

return 1;

1820

}

1821

1822

return 0;

1823

}

1824

1809

/**

1825

/**

1810

* blk_end_io - Generic end_io function to complete a request.

1826

* blk_end_io - Generic end_io function to complete a request.

1811

* @rq: the request being processed

1827

* @rq: the request being processed

1812

* @error: %0 for success, < %0 for error

1828

* @error: %0 for success, < %0 for error

1813

* @nr_bytes: number of bytes to complete @rq

1829

* @nr_bytes: number of bytes to complete @rq

1814

* @bidi_bytes: number of bytes to complete @rq->next_rq

1830

* @bidi_bytes: number of bytes to complete @rq->next_rq

1815

* @drv_callback: function called between completion of bios in the request

1831

* @drv_callback: function called between completion of bios in the request

1816

* and completion of the request.

1832

* and completion of the request.

1817

* If the callback returns non %0, this helper returns without

1833

* If the callback returns non %0, this helper returns without

1818

* completion of the request.

1834

* completion of the request.

1819

*

1835

*

1820

* Description:

1836

* Description:

1821

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

1837

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

1822

* If @rq has leftover, sets it up for the next range of segments.

1838

* If @rq has leftover, sets it up for the next range of segments.

1823

*

1839

*

1824

* Return:

1840

* Return:

1825

* %0 - we are done with this request

1841

* %0 - we are done with this request

1826

* %1 - this request is not freed yet, it still has pending buffers.

1842

* %1 - this request is not freed yet, it still has pending buffers.

1827

**/

1843

**/

1828

static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,

1844

static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,

1829

unsigned int bidi_bytes,

1845

unsigned int bidi_bytes,

1830

int (drv_callback)(struct request *))

1846

int (drv_callback)(struct request *))

1831

{

1847

{

1832

struct request_queue *q = rq->q;

1848

struct request_queue *q = rq->q;

1833

unsigned long flags = 0UL;

1849

unsigned long flags = 0UL;

1834

1850

1835

if (rq->bio) {

1851

if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))

1836

if (__end_that_request_first(rq, error, nr_bytes))

1852

return 1;

1837

return 1;

1838

1853

1839

/* Bidi request must be completed as a whole */

1840

if (blk_bidi_rq(rq) &&

1841

__end_that_request_first(rq->next_rq, error, bidi_bytes))

1842

return 1;

1843

}

1844

1845

/* Special feature for tricky drivers */

1854

/* Special feature for tricky drivers */

1846

if (drv_callback && drv_callback(rq))

1855

if (drv_callback && drv_callback(rq))

1847

return 1;

1856

return 1;

1848

1857

1849

add_disk_randomness(rq->rq_disk);

1858

add_disk_randomness(rq->rq_disk);

1850

1859

1851

spin_lock_irqsave(q->queue_lock, flags);

1860

spin_lock_irqsave(q->queue_lock, flags);

1852

end_that_request_last(rq, error);

1861

end_that_request_last(rq, error);

1853

spin_unlock_irqrestore(q->queue_lock, flags);

1862

spin_unlock_irqrestore(q->queue_lock, flags);

1854

1863

1855

return 0;

1864

return 0;

1856

}

1865

}

1857

1866

1858

/**

1867

/**

1859

* blk_end_request - Helper function for drivers to complete the request.

1868

* blk_end_request - Helper function for drivers to complete the request.

1860

* @rq: the request being processed

1869

* @rq: the request being processed

1861

* @error: %0 for success, < %0 for error

1870

* @error: %0 for success, < %0 for error

1862

* @nr_bytes: number of bytes to complete

1871

* @nr_bytes: number of bytes to complete

1863

*

1872

*

1864

* Description:

1873

* Description:

1865

* Ends I/O on a number of bytes attached to @rq.

1874

* Ends I/O on a number of bytes attached to @rq.

1866

* If @rq has leftover, sets it up for the next range of segments.

1875

* If @rq has leftover, sets it up for the next range of segments.

1867

*

1876

*

1868

* Return:

1877

* Return:

1869

* %0 - we are done with this request

1878

* %0 - we are done with this request

1870

* %1 - still buffers pending for this request

1879

* %1 - still buffers pending for this request

1871

**/

1880

**/

1872

int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

1881

int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

1873

{

1882

{

1874

return blk_end_io(rq, error, nr_bytes, 0, NULL);

1883

return blk_end_io(rq, error, nr_bytes, 0, NULL);

1875

}

1884

}

1876

EXPORT_SYMBOL_GPL(blk_end_request);

1885

EXPORT_SYMBOL_GPL(blk_end_request);

1877

1886

1878

/**

1887

/**

1879

* __blk_end_request - Helper function for drivers to complete the request.

1888

* __blk_end_request - Helper function for drivers to complete the request.

1880

* @rq: the request being processed

1889

* @rq: the request being processed

1881

* @error: %0 for success, < %0 for error

1890

* @error: %0 for success, < %0 for error

1882

* @nr_bytes: number of bytes to complete

1891

* @nr_bytes: number of bytes to complete

1883

*

1892

*

1884

* Description:

1893

* Description:

1885

* Must be called with queue lock held unlike blk_end_request().

1894

* Must be called with queue lock held unlike blk_end_request().

1886

*

1895

*

1887

* Return:

1896

* Return:

1888

* %0 - we are done with this request

1897

* %0 - we are done with this request

1889

* %1 - still buffers pending for this request

1898

* %1 - still buffers pending for this request

1890

**/

1899

**/

1891

int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

1900

int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

1892

{

1901

{

1893

if (rq->bio && __end_that_request_first(rq, error, nr_bytes))

1902

if (rq->bio && __end_that_request_first(rq, error, nr_bytes))

1894

return 1;

1903

return 1;

1895

1904

1896

add_disk_randomness(rq->rq_disk);

1905

add_disk_randomness(rq->rq_disk);

1897

1906

1898

end_that_request_last(rq, error);

1907

end_that_request_last(rq, error);

1899

1908

1900

return 0;

1909

return 0;

1901

}

1910

}

1902

EXPORT_SYMBOL_GPL(__blk_end_request);

1911

EXPORT_SYMBOL_GPL(__blk_end_request);

1903

1912

1904

/**

1913

/**

1905

* blk_end_bidi_request - Helper function for drivers to complete bidi request.

1914

* blk_end_bidi_request - Helper function for drivers to complete bidi request.

1906

* @rq: the bidi request being processed

1915

* @rq: the bidi request being processed

1907

* @error: %0 for success, < %0 for error

1916

* @error: %0 for success, < %0 for error

1908

* @nr_bytes: number of bytes to complete @rq

1917

* @nr_bytes: number of bytes to complete @rq

1909

* @bidi_bytes: number of bytes to complete @rq->next_rq

1918

* @bidi_bytes: number of bytes to complete @rq->next_rq

1910

*

1919

*

1911

* Description:

1920

* Description:

1912

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

1921

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

1913

*

1922

*

1914

* Return:

1923

* Return:

1915

* %0 - we are done with this request

1924

* %0 - we are done with this request

1916

* %1 - still buffers pending for this request

1925

* %1 - still buffers pending for this request

1917

**/

1926

**/

1918

int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,

1927

int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,

1919

unsigned int bidi_bytes)

1928

unsigned int bidi_bytes)

1920

{

1929

{

1921

return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);

1930

return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);

1922

}

1931

}

1923

EXPORT_SYMBOL_GPL(blk_end_bidi_request);

1932

EXPORT_SYMBOL_GPL(blk_end_bidi_request);

1933

1934

/**

1935

* blk_update_request - Special helper function for request stacking drivers

1936

* @rq: the request being processed

1937

* @error: %0 for success, < %0 for error

1938

* @nr_bytes: number of bytes to complete @rq

1939

*

1940

* Description:

1941

* Ends I/O on a number of bytes attached to @rq, but doesn't complete

1942

* the request structure even if @rq doesn't have leftover.

1943

* If @rq has leftover, sets it up for the next range of segments.

1944

*

1945

* This special helper function is only for request stacking drivers

1946

* (e.g. request-based dm) so that they can handle partial completion.

1947

* Actual device drivers should use blk_end_request instead.

1948

*/

1949

void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)

1950

{

1951

if (!end_that_request_data(rq, error, nr_bytes, 0)) {

1952

/*

1953

* These members are not updated in end_that_request_data()

1954

* when all bios are completed.

1955

* Update them so that the request stacking driver can find

1956

* how many bytes remain in the request later.

1957

*/

1958

rq->nr_sectors = rq->hard_nr_sectors = 0;

1959

rq->current_nr_sectors = rq->hard_cur_sectors = 0;

1960

}

1961

}

1962

EXPORT_SYMBOL_GPL(blk_update_request);

1924

1963

1925

/**

1964

/**

1926

* blk_end_request_callback - Special helper function for tricky drivers

1965

* blk_end_request_callback - Special helper function for tricky drivers

1927

* @rq: the request being processed

1966

* @rq: the request being processed

1928

* @error: %0 for success, < %0 for error

1967

* @error: %0 for success, < %0 for error

1929

* @nr_bytes: number of bytes to complete

1968

* @nr_bytes: number of bytes to complete

1930

* @drv_callback: function called between completion of bios in the request

1969

* @drv_callback: function called between completion of bios in the request

1931

* and completion of the request.

1970

* and completion of the request.

1932

* If the callback returns non %0, this helper returns without

1971

* If the callback returns non %0, this helper returns without

1933

* completion of the request.

1972

* completion of the request.

1934

*

1973

*

1935

* Description:

1974

* Description:

1936

* Ends I/O on a number of bytes attached to @rq.

1975

* Ends I/O on a number of bytes attached to @rq.

1937

* If @rq has leftover, sets it up for the next range of segments.

1976

* If @rq has leftover, sets it up for the next range of segments.

1938

*

1977

*

1939

* This special helper function is used only for existing tricky drivers.

1978

* This special helper function is used only for existing tricky drivers.

1940

* (e.g. cdrom_newpc_intr() of ide-cd)

1979

* (e.g. cdrom_newpc_intr() of ide-cd)

1941

* This interface will be removed when such drivers are rewritten.

1980

* This interface will be removed when such drivers are rewritten.

1942

* Don't use this interface in other places anymore.

1981

* Don't use this interface in other places anymore.

1943

*

1982

*

1944

* Return:

1983

* Return:

1945

* %0 - we are done with this request

1984

* %0 - we are done with this request

1946

* %1 - this request is not freed yet.

1985

* %1 - this request is not freed yet.

1947

* this request still has pending buffers or

1986

* this request still has pending buffers or

1948

* the driver doesn't want to finish this request yet.

1987

* the driver doesn't want to finish this request yet.

1949

**/

1988

**/

1950

int blk_end_request_callback(struct request *rq, int error,

1989

int blk_end_request_callback(struct request *rq, int error,

1951

unsigned int nr_bytes,

1990

unsigned int nr_bytes,

1952

int (drv_callback)(struct request *))

1991

int (drv_callback)(struct request *))

1953

{

1992

{

1954

return blk_end_io(rq, error, nr_bytes, 0, drv_callback);

1993

return blk_end_io(rq, error, nr_bytes, 0, drv_callback);

1955

}

1994

}

1956

EXPORT_SYMBOL_GPL(blk_end_request_callback);

1995

EXPORT_SYMBOL_GPL(blk_end_request_callback);

1957

1996

1958

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

1997

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

1959

struct bio *bio)

1998

struct bio *bio)

1960

{

1999

{

1961

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and

2000

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and

1962

we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */

2001

we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */

1963

rq->cmd_flags |= (bio->bi_rw & 3);

2002

rq->cmd_flags |= (bio->bi_rw & 3);

1964

2003

1965

if (bio_has_data(bio)) {

2004

if (bio_has_data(bio)) {

1966

rq->nr_phys_segments = bio_phys_segments(q, bio);

2005

rq->nr_phys_segments = bio_phys_segments(q, bio);

1967

rq->buffer = bio_data(bio);

2006

rq->buffer = bio_data(bio);

1968

}

2007

}

1969

rq->current_nr_sectors = bio_cur_sectors(bio);

2008

rq->current_nr_sectors = bio_cur_sectors(bio);

1970

rq->hard_cur_sectors = rq->current_nr_sectors;

2009

rq->hard_cur_sectors = rq->current_nr_sectors;

1971

rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);

2010

rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);

1972

rq->data_len = bio->bi_size;

2011

rq->data_len = bio->bi_size;

1973

2012

1974

rq->bio = rq->biotail = bio;

2013

rq->bio = rq->biotail = bio;

1975

2014

1976

if (bio->bi_bdev)

2015

if (bio->bi_bdev)

1977

rq->rq_disk = bio->bi_bdev->bd_disk;

2016

rq->rq_disk = bio->bi_bdev->bd_disk;

1978

}

2017

}

1979

2018

1980

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2019

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

1981

{

2020

{

1982

return queue_work(kblockd_workqueue, work);

2021

return queue_work(kblockd_workqueue, work);

1983

}

2022

}

1984

EXPORT_SYMBOL(kblockd_schedule_work);

2023

EXPORT_SYMBOL(kblockd_schedule_work);

1985

2024

1986

void kblockd_flush_work(struct work_struct *work)

2025

void kblockd_flush_work(struct work_struct *work)

1987

{

2026

{

1988

cancel_work_sync(work);

2027

cancel_work_sync(work);

1989

}

2028

}

1990

EXPORT_SYMBOL(kblockd_flush_work);

2029

EXPORT_SYMBOL(kblockd_flush_work);

1991

2030

1992

int __init blk_dev_init(void)

2031

int __init blk_dev_init(void)

1993

{

2032

{

1994

kblockd_workqueue = create_workqueue("kblockd");

2033

kblockd_workqueue = create_workqueue("kblockd");

1995

if (!kblockd_workqueue)

2034

if (!kblockd_workqueue)

1996

panic("Failed to create kblockd\n");

2035

panic("Failed to create kblockd\n");

1997

2036

1998

request_cachep = kmem_cache_create("blkdev_requests",

2037

request_cachep = kmem_cache_create("blkdev_requests",

1999

sizeof(struct request), 0, SLAB_PANIC, NULL);

2038

sizeof(struct request), 0, SLAB_PANIC, NULL);

2000

2039

GITLAB

block: add request update interface

 /*
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
  *	-  July2000
  * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 /*
  * This handles all read/write requests to block devices
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
 #include "blk.h"
 static int __make_request(struct request_queue *q, struct bio *bio);
 /*
  * For the allocated request tables
  */
 static struct kmem_cache *request_cachep;
 /*
  * For queue allocation
  */
 struct kmem_cache *blk_requestq_cachep;
 /*
  * Controlling structure to kblockd
  */
 static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
 	int cpu;
 	if (!blk_fs_request(rq) || !rq->rq_disk)
 		return;
 	cpu = part_stat_lock();
 	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
 	if (!new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
 		part_inc_in_flight(part);
 	}
 	part_stat_unlock();
 }
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
 	int nr;
 	nr = q->nr_requests - (q->nr_requests / 8) + 1;
 	if (nr > q->nr_requests)
 		nr = q->nr_requests;
 	q->nr_congestion_on = nr;
 	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
 	if (nr < 1)
 		nr = 1;
 	q->nr_congestion_off = nr;
 }
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
  * backing_dev_info
  *
  * Will return NULL if the request queue cannot be located.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
 	struct backing_dev_info *ret = NULL;
 	struct request_queue *q = bdev_get_queue(bdev);
 	if (q)
 		ret = &q->backing_dev_info;
 	return ret;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 void blk_rq_init(struct request_queue *q, struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
 	rq->sector = rq->hard_sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
 	rq->tag = -1;
 	rq->ref_count = 1;
 }
 EXPORT_SYMBOL(blk_rq_init);
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
 {
 	struct request_queue *q = rq->q;
 	if (&q->bar_rq != rq) {
 		if (error)
 			clear_bit(BIO_UPTODATE, &bio->bi_flags);
 		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 			error = -EIO;
 		if (unlikely(nbytes > bio->bi_size)) {
 			printk(KERN_ERR "%s: want %u bytes done, %u left\n",
 			       __func__, nbytes, bio->bi_size);
 			nbytes = bio->bi_size;
 		}
 		bio->bi_size -= nbytes;
 		bio->bi_sector += (nbytes >> 9);
 		if (bio_integrity(bio))
 			bio_integrity_advance(bio, nbytes);
 		if (bio->bi_size == 0)
 			bio_endio(bio, error);
 	} else {
 		/*
 		 * Okay, this is the barrier request in progress, just
 		 * record the error;
 		 */
 		if (error && !q->orderr)
 			q->orderr = error;
 	}
 }
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 	printk(KERN_INFO "  sector %llu, nr/cnr %lu/%u\n",
 						(unsigned long long)rq->sector,
 						rq->nr_sectors,
 						rq->current_nr_sectors);
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, data %p, len %u\n",
 						rq->bio, rq->biotail,
 						rq->buffer, rq->data,
 						rq->data_len);
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
 		for (bit = 0; bit < BLK_MAX_CDB; bit++)
 			printk("%02x ", rq->cmd[bit]);
 		printk("\n");
 	}
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 /*
  * "plug" the device if there are no outstanding requests: this will
  * force the transfer to start only after we have put all the requests
  * on the list.
  *
  * This is called with interrupts off and no requests on the queue and
  * with the queue lock held.
  */
 void blk_plug_device(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	/*
 	 * don't plug a stopped queue, it must be paired with blk_start_queue()
 	 * which will restart the queueing
 	 */
 	if (blk_queue_stopped(q))
 		return;
 	if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
 		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
 	}
 }
 EXPORT_SYMBOL(blk_plug_device);
 /**
  * blk_plug_device_unlocked - plug a device without queue lock held
  * @q:    The &struct request_queue to plug
  *
  * Description:
  *   Like @blk_plug_device(), but grabs the queue lock and disables
  *   interrupts.
  **/
 void blk_plug_device_unlocked(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_plug_device(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_plug_device_unlocked);
 /*
  * remove the queue from the plugged list, if present. called with
  * queue lock held and interrupts disabled.
  */
 int blk_remove_plug(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
 		return 0;
 	del_timer(&q->unplug_timer);
 	return 1;
 }
 EXPORT_SYMBOL(blk_remove_plug);
 /*
  * remove the plug and let it rip..
  */
 void __generic_unplug_device(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 	if (!blk_remove_plug(q))
 		return;
 	q->request_fn(q);
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 /**
  * generic_unplug_device - fire a request queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   Linux uses plugging to build bigger requests queues before letting
  *   the device have at them. If a queue is plugged, the I/O scheduler
  *   is still adding and merging requests on the queue. Once the queue
  *   gets unplugged, the request_fn defined for the queue is invoked and
  *   transfers started.
  **/
 void generic_unplug_device(struct request_queue *q)
 {
 	if (blk_queue_plugged(q)) {
 		spin_lock_irq(q->queue_lock);
 		__generic_unplug_device(q);
 		spin_unlock_irq(q->queue_lock);
 	}
 }
 EXPORT_SYMBOL(generic_unplug_device);
 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 				   struct page *page)
 {
 	struct request_queue *q = bdi->unplug_io_data;
 	blk_unplug(q);
 }
 void blk_unplug_work(struct work_struct *work)
 {
 	struct request_queue *q =
 		container_of(work, struct request_queue, unplug_work);
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
 	q->unplug_fn(q);
 }
 void blk_unplug_timeout(unsigned long data)
 {
 	struct request_queue *q = (struct request_queue *)data;
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
 	kblockd_schedule_work(q, &q->unplug_work);
 }
 void blk_unplug(struct request_queue *q)
 {
 	/*
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
 	if (q->unplug_fn) {
 		blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
 					q->rq.count[READ] + q->rq.count[WRITE]);
 		q->unplug_fn(q);
 	}
 }
 EXPORT_SYMBOL(blk_unplug);
 static void blk_invoke_request_fn(struct request_queue *q)
 {
 	/*
 	 * one level of recursion is ok and is much faster than kicking
 	 * the unplug handling
 	 */
 	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
 		q->request_fn(q);
 		queue_flag_clear(QUEUE_FLAG_REENTER, q);
 	} else {
 		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
 		kblockd_schedule_work(q, &q->unplug_work);
 	}
 }
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
  *   entered. Also see blk_stop_queue(). Queue lock must be held.
  **/
 void blk_start_queue(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 /**
  * blk_stop_queue - stop a queue
  * @q:    The &struct request_queue in question
  *
  * Description:
  *   The Linux block layer assumes that a block driver will consume all
  *   entries on the request queue when the request_fn strategy is called.
  *   Often this will not happen, because of hardware limitations (queue
  *   depth settings). If a device driver gets a 'queue full' response,
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
  *   blk_start_queue() to restart queue operations. Queue lock must be held.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 /**
  * blk_sync_queue - cancel any pending callbacks on a queue
  * @q: the queue
  *
  * Description:
  *     The block layer may perform asynchronous callback activity
  *     on a queue, such as calling the unplug function after a timeout.
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
  *     that the callbacks might use. The caller must already have made sure
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
  */
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->unplug_timer);
 	kblockd_flush_work(&q->unplug_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 /**
  * blk_run_queue - run a single device queue
  * @q:	The queue to run
  */
 void __blk_run_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
 	/*
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
 	if (!elv_queue_empty(q))
 		blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
  */
 void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
 void blk_put_queue(struct request_queue *q)
 {
 	kobject_put(&q->kobj);
 }
 void blk_cleanup_queue(struct request_queue *q)
 {
 	/*
 	 * We know we have process context here, so we can be a little
 	 * cautious and ensure that pending block actions on this device
 	 * are done before moving on. Going into this function, we should
 	 * not have processes doing IO to this device.
 	 */
 	blk_sync_queue(q);
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
 	if (q->elevator)
 		elevator_exit(q->elevator);
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
 static int blk_init_free_list(struct request_queue *q)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[READ] = rl->count[WRITE] = 0;
 	rl->starved[READ] = rl->starved[WRITE] = 0;
 	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
 	if (!rl->rq_pool)
 		return -ENOMEM;
 	return 0;
 }
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
 	int err;
 	q = kmem_cache_alloc_node(blk_requestq_cachep,
 				gfp_mask | __GFP_ZERO, node_id);
 	if (!q)
 		return NULL;
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
 	kobject_init(&q->kobj, &blk_queue_ktype);
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 	return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
  * @rfn:  The function to be called to process requests that have been
  *        placed on the queue.
  * @lock: Request queue spin lock
  *
  * Description:
  *    If a block device wishes to use the standard request handling procedures,
  *    which sorts requests and coalesces adjacent requests, then it must
  *    call blk_init_queue().  The function @rfn will be called when there
  *    are requests on the queue that need to be processed.  If the device
  *    supports plugging, then @rfn may not be called immediately when requests
  *    are available on the queue, but may be called at some time later instead.
  *    Plugged queues are generally unplugged when a buffer belonging to one
  *    of the requests on the queue is needed, or due to memory pressure.
  *
  *    @rfn is not required, or even expected, to remove all requests off the
  *    queue, but only as many as it can handle at a time.  If it does leave
  *    requests on the queue, it is responsible for arranging that the requests
  *    get dealt with eventually.
  *
  *    The queue spin lock must be held while manipulating the requests on the
  *    request queue; this lock will be taken also from interrupt context, so irq
  *    disabling is needed for it.
  *
  *    Function returns a pointer to the initialized request queue, or %NULL if
  *    it didn't succeed.
  *
  * Note:
  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
 	return blk_init_queue_node(rfn, lock, -1);
 }
 EXPORT_SYMBOL(blk_init_queue);
 struct request_queue *
 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
 	if (!q)
 		return NULL;
 	q->node = node_id;
 	if (blk_init_free_list(q)) {
 		kmem_cache_free(blk_requestq_cachep, q);
 		return NULL;
 	}
 	/*
 	 * if caller didn't supply a lock, they get per-queue locking with
 	 * our embedded lock
 	 */
 	if (!lock)
 		lock = &q->__queue_lock;
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
 	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
 	q->queue_lock		= lock;
 	blk_queue_segment_boundary(q, 0xffffffff);
 	blk_queue_make_request(q, __make_request);
 	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
 	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
 	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
 	q->sg_reserved_size = INT_MAX;
 	blk_set_cmd_filter_defaults(&q->cmd_filter);
 	/*
 	 * all done
 	 */
 	if (!elevator_init(q, NULL)) {
 		blk_queue_congestion_threshold(q);
 		return q;
 	}
 	blk_put_queue(q);
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 int blk_get_queue(struct request_queue *q)
 {
 	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
 		kobject_get(&q->kobj);
 		return 0;
 	}
 	return 1;
 }
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 static struct request *
 blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	if (!rq)
 		return NULL;
 	blk_rq_init(q, rq);
 	rq->cmd_flags = rw | REQ_ALLOCED;
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
 		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 	return rq;
 }
 /*
  * ioc_batching returns true if the ioc is a valid batching request and
  * should be given priority access to a request.
  */
 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc)
 		return 0;
 	/*
 	 * Make sure the process is able to allocate at least 1 request
 	 * even if the batch times out, otherwise we could theoretically
 	 * lose wakeups.
 	 */
 	return ioc->nr_batch_requests == q->nr_batching ||
 		(ioc->nr_batch_requests > 0
 		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
 }
 /*
  * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
  * will cause the process to be a "batcher" on all queues in the system. This
  * is the behaviour we want though - once it gets a wakeup it should be given
  * a nice run.
  */
 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
 {
 	if (!ioc || ioc_batching(q, ioc))
 		return;
 	ioc->nr_batch_requests = q->nr_batching;
 	ioc->last_waited = jiffies;
 }
 static void __freed_request(struct request_queue *q, int rw)
 {
 	struct request_list *rl = &q->rq;
 	if (rl->count[rw] < queue_congestion_off_threshold(q))
 		blk_clear_queue_congested(q, rw);
 	if (rl->count[rw] + 1 <= q->nr_requests) {
 		if (waitqueue_active(&rl->wait[rw]))
 			wake_up(&rl->wait[rw]);
 		blk_clear_queue_full(q, rw);
 	}
 }
 /*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
 static void freed_request(struct request_queue *q, int rw, int priv)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[rw]--;
 	if (priv)
 		rl->elvpriv--;
 	__freed_request(q, rw);
 	if (unlikely(rl->starved[rw ^ 1]))
 		__freed_request(q, rw ^ 1);
 }
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
 /*
  * Get a free request, queue_lock must be held.
  * Returns NULL on failure, with queue_lock held.
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, int rw_flags,
 				   struct bio *bio, gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
 	const int rw = rw_flags & 0x01;
 	int may_queue, priv;
 	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[rw]+1 >= q->nr_requests) {
 			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
 			 * This process will be allowed to complete a batch of
 			 * requests, others will be blocked.
 			 */
 			if (!blk_queue_full(q, rw)) {
 				ioc_set_batching(q, ioc);
 				blk_set_queue_full(q, rw);
 			} else {
 				if (may_queue != ELV_MQUEUE_MUST
 						&& !ioc_batching(q, ioc)) {
 					/*
 					 * The queue is full and the allocating
 					 * process is not a "batcher", and not
 					 * exempted by the IO scheduler
 					 */
 					goto out;
 				}
 			}
 		}
 		blk_set_queue_congested(q, rw);
 	}
 	/*
 	 * Only allow batching queuers to allocate up to 50% over the defined
 	 * limit of requests, otherwise we could have thousands of requests
 	 * allocated with any setting of ->nr_requests
 	 */
 	if (rl->count[rw] >= (3 * q->nr_requests / 2))
 		goto out;
 	rl->count[rw]++;
 	rl->starved[rw] = 0;
 	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 	if (priv)
 		rl->elvpriv++;
 	spin_unlock_irq(q->queue_lock);
 	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * we might have messed up.
 		 *
 		 * Allocating task should really be put onto the front of the
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, rw, priv);
 		/*
 		 * in the very unlikely event that allocation failed and no
 		 * requests for this direction was pending, mark us starved
 		 * so that freeing of a request in the other direction will
 		 * notice us. another possible fix would be to split the
 		 * rq mempool into READ and WRITE
 		 */
 rq_starved:
 		if (unlikely(rl->count[rw] == 0))
 			rl->starved[rw] = 1;
 		goto out;
 	}
 	/*
 	 * ioc may be NULL here, and ioc_batching will be false. That's
 	 * OK, if the queue is under the request limit then requests need
 	 * not count toward the nr_batch_requests limit. There will always
 	 * be some limit enforced by BLK_BATCH_TIME.
 	 */
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
 	return rq;
 }
 /*
  * No available requests for this queue, unplug the device and wait for some
  * requests to become available.
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 					struct bio *bio)
 {
 	const int rw = rw_flags & 0x01;
 	struct request *rq;
 	rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	while (!rq) {
 		DEFINE_WAIT(wait);
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 		blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
 		__generic_unplug_device(q);
 		spin_unlock_irq(q->queue_lock);
 		io_schedule();
 		/*
 		 * After sleeping, we become a "batching" process and
 		 * will be able to allocate at least one request, and
 		 * up to a big batch of them for a small period time.
 		 * See ioc_batching, ioc_set_batching
 		 */
 		ioc = current_io_context(GFP_NOIO, q->node);
 		ioc_set_batching(q, ioc);
 		spin_lock_irq(q->queue_lock);
 		finish_wait(&rl->wait[rw], &wait);
 		rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	};
 	return rq;
 }
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 	BUG_ON(rw != READ && rw != WRITE);
 	spin_lock_irq(q->queue_lock);
 	if (gfp_mask & __GFP_WAIT) {
 		rq = get_request_wait(q, rw, NULL);
 	} else {
 		rq = get_request(q, rw, NULL, gfp_mask);
 		if (!rq)
 			spin_unlock_irq(q->queue_lock);
 	}
 	/* q->queue_lock is unlocked at this point */
 	return rq;
 }
 EXPORT_SYMBOL(blk_get_request);
 /**
  * blk_start_queueing - initiate dispatch of requests to device
  * @q:		request queue to kick into gear
  *
  * This is basically a helper to remove the need to know whether a queue
  * is plugged or not if someone just wants to initiate dispatch of requests
  * for this queue.
  *
  * The queue lock must be held with interrupts disabled.
  */
 void blk_start_queueing(struct request_queue *q)
 {
 	if (!blk_queue_plugged(q))
 		q->request_fn(q);
 	else
 		__generic_unplug_device(q);
 }
 EXPORT_SYMBOL(blk_start_queueing);
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  *
  * Description:
  *    Drivers often keep queueing requests until the hardware cannot accept
  *    more, when that condition happens we need to put the request back
  *    on the queue. Must be called with queue lock held.
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
 /**
  * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
  * @data:	private data
  *
  * Description:
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
  *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
  *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
  *    of the queue for things like a QUEUE_FULL message from a device, or a
  *    host that is unable to accept a particular command.
  */
 void blk_insert_request(struct request_queue *q, struct request *rq,
 			int at_head, void *data)
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	unsigned long flags;
 	/*
 	 * tell I/O scheduler that this isn't a regular read/write (ie it
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->special = data;
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * If command is tagged, release the tag
 	 */
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where, 0);
 	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
 /*
  * add-request adds a request to the linked list.
  * queue lock is held and interrupts disabled, as we muck with the
  * request queue list.
  */
 static inline void add_request(struct request_queue *q, struct request *req)
 {
 	drive_stat_acct(req, 1);
 	/*
 	 * elevator indicated where it wants this request to be
 	 * inserted at elevator_merge time
 	 */
 	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 }
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
 	if (now == part->stamp)
 		return;
 	if (part->in_flight) {
 		__part_stat_add(cpu, part, time_in_queue,
 				part->in_flight * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
 }
 /**
  * part_round_stats()	- Round off the performance stats on a struct
  * disk_stats.
  *
  * The average IO queue length and utilisation statistics are maintained
  * by observing the current state of the queue length and the amount of
  * time it has been in this state for.
  *
  * Normally, that accounting is done on IO completion, but that can result
  * in more than a second's worth of IO being accounted for within any one
  * second, leading to >100% utilisation.  To deal with that, we call this
  * function to do a round-off before returning the results when reading
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
 void part_round_stats(int cpu, struct hd_struct *part)
 {
 	unsigned long now = jiffies;
 	if (part->partno)
 		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
 	part_round_stats_single(cpu, part, now);
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 /*
  * queue lock must be held
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
 		return;
 	elv_completed_request(q, req);
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (req->cmd_flags & REQ_ALLOCED) {
 		int rw = rq_data_dir(req);
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(!hlist_unhashed(&req->hash));
 		blk_free_request(q, req);
 		freed_request(q, rw, priv);
 	}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
 void blk_put_request(struct request *req)
 {
 	unsigned long flags;
 	struct request_queue *q = req->q;
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_put_request(q, req);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_put_request);
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
 		req->cmd_flags |= REQ_FAILFAST;
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
 	if (unlikely(bio_discard(bio))) {
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_barrier(bio))
 			req->cmd_flags |= REQ_SOFTBARRIER;
 		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_barrier(bio)))
 		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 	if (bio_sync(bio))
 		req->cmd_flags |= REQ_RW_SYNC;
 	if (bio_rw_meta(bio))
 		req->cmd_flags |= REQ_RW_META;
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	req->start_time = jiffies;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
 	int el_ret, nr_sectors, barrier, discard, err;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	int rw_flags;
 	nr_sectors = bio_sectors(bio);
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
 	 * ISA dma in theory)
 	 */
 	blk_queue_bounce(q, &bio);
 	barrier = bio_barrier(bio);
 	if (unlikely(barrier) && bio_has_data(bio) &&
 	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
 	discard = bio_discard(bio);
 	if (unlikely(discard) && !q->prepare_discard_fn) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
 	spin_lock_irq(q->queue_lock);
 	if (unlikely(barrier) || elv_queue_empty(q))
 		goto get_rq;
 	el_ret = elv_merge(q, &req, bio);
 	switch (el_ret) {
 	case ELEVATOR_BACK_MERGE:
 		BUG_ON(!rq_mergeable(req));
 		if (!ll_back_merge_fn(q, req, bio))
 			break;
 		blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
 		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		if (!attempt_back_merge(q, req))
 			elv_merged_request(q, req, el_ret);
 		goto out;
 	case ELEVATOR_FRONT_MERGE:
 		BUG_ON(!rq_mergeable(req));
 		if (!ll_front_merge_fn(q, req, bio))
 			break;
 		blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
 		bio->bi_next = req->bio;
 		req->bio = bio;
 		/*
 		 * may not be valid. if the low level driver said
 		 * it didn't need a bounce buffer then it better
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
 		req->current_nr_sectors = bio_cur_sectors(bio);
 		req->hard_cur_sectors = req->current_nr_sectors;
 		req->sector = req->hard_sector = bio->bi_sector;
 		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		if (!attempt_front_merge(q, req))
 			elv_merged_request(q, req, el_ret);
 		goto out;
 	/* ELV_NO_MERGE: elevator says don't/can't merge. */
 	default:
 		;
 	}
 get_rq:
 	/*
 	 * This sync check and mask will be re-done in init_request_from_bio(),
 	 * but we need to set it earlier to expose the sync flag to the
 	 * rq allocator and io schedulers.
 	 */
 	rw_flags = bio_data_dir(bio);
 	if (sync)
 		rw_flags |= REQ_RW_SYNC;
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
 	 * Returns with the queue unlocked.
 	 */
 	req = get_request_wait(q, rw_flags, bio);
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
 	 * may now be mergeable after it had proven unmergeable (above).
 	 * We don't worry about that case for efficiency. It won't happen
 	 * often, and the elevators are able to handle it.
 	 */
 	init_request_from_bio(req, bio);
 	spin_lock_irq(q->queue_lock);
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
 	    bio_flagged(bio, BIO_CPU_AFFINE))
 		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (elv_queue_empty(q))
 		blk_plug_device(q);
 	add_request(q, req);
 out:
 	if (sync)
 		__generic_unplug_device(q);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 end_io:
 	bio_endio(bio, err);
 	return 0;
 }
 /*
  * If bio->bi_dev is a partition, remap the location
  */
 static inline void blk_partition_remap(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 		blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
 				    bdev->bd_dev, bio->bi_sector,
 				    bio->bi_sector - p->start_sect);
 	}
 }
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
 	printk(KERN_INFO "attempt to access beyond end of device\n");
 	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
 			bdevname(bio->bi_bdev, b),
 			bio->bi_rw,
 			(unsigned long long)bio->bi_sector + bio_sectors(bio),
 			(long long)(bio->bi_bdev->bd_inode->i_size >> 9));
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static DECLARE_FAULT_ATTR(fail_make_request);
 static int __init setup_fail_make_request(char *str)
 {
 	return setup_fault_attr(&fail_make_request, str);
 }
 __setup("fail_make_request=", setup_fail_make_request);
 static int should_fail_request(struct bio *bio)
 {
 	struct hd_struct *part = bio->bi_bdev->bd_part;
 	if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
 		return should_fail(&fail_make_request, bio->bi_size);
 	return 0;
 }
 static int __init fail_make_request_debugfs(void)
 {
 	return init_fault_attr_dentries(&fail_make_request,
 					"fail_make_request");
 }
 late_initcall(fail_make_request_debugfs);
 #else /* CONFIG_FAIL_MAKE_REQUEST */
 static inline int should_fail_request(struct bio *bio)
 {
 	return 0;
 }
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 /*
  * Check whether this bio extends beyond the end of the device.
  */
 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
 {
 	sector_t maxsector;
 	if (!nr_sectors)
 		return 0;
 	/* Test device or partition size, when known. */
 	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
 	if (maxsector) {
 		sector_t sector = bio->bi_sector;
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
 			/*
 			 * This may well happen - the kernel calls bread()
 			 * without checking the size of the device, e.g., when
 			 * mounting a device.
 			 */
 			handle_bad_sector(bio);
 			return 1;
 		}
 	}
 	return 0;
 }
 /**
  * generic_make_request - hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
  * devices. It is passed a &struct bio, which describes the I/O that needs
  * to be done.
  *
  * generic_make_request() does not return any status.  The
  * success/failure status of the request, along with notification of
  * completion, is delivered asynchronously through the bio->bi_end_io
  * function described (one day) else where.
  *
  * The caller of generic_make_request must make sure that bi_io_vec
  * are set to describe the memory buffer, and that bi_dev and bi_sector are
  * set to describe the device address, and the
  * bi_end_io and optionally bi_private are set to describe how
  * completion notification should be signaled.
  *
  * generic_make_request and the drivers it calls may use bi_next if this
  * bio happens to be merged with someone else, and may change bi_dev and
  * bi_sector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
  */
 static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
 	int err = -EIO;
 	might_sleep();
 	if (bio_check_eod(bio, nr_sectors))
 		goto end_io;
 	/*
 	 * Resolve the mapping until finished. (drivers are
 	 * still free to implement/resolve their own stacking
 	 * by explicitly returning 0)
 	 *
 	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	old_sector = -1;
 	old_dev = 0;
 	do {
 		char b[BDEVNAME_SIZE];
 		q = bdev_get_queue(bio->bi_bdev);
 		if (!q) {
 			printk(KERN_ERR
 			       "generic_make_request: Trying to access "
 				"nonexistent block-device %s (%Lu)\n",
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 end_io:
 			bio_endio(bio, err);
 			break;
 		}
 		if (unlikely(nr_sectors > q->max_hw_sectors)) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 				bdevname(bio->bi_bdev, b),
 				bio_sectors(bio),
 				q->max_hw_sectors);
 			goto end_io;
 		}
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 		if (should_fail_request(bio))
 			goto end_io;
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
 		 */
 		blk_partition_remap(bio);
 		if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
 			goto end_io;
 		if (old_sector != -1)
 			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
 					    old_sector);
 		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 		if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
 		    (bio_discard(bio) && !q->prepare_discard_fn)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
 }
 /*
  * We only want one ->make_request_fn to be active at a time,
  * else stack usage with stacked devices could be a problem.
  * So use current->bio_{list,tail} to keep a list of requests
  * submited by a make_request_fn function.
  * current->bio_tail is also used as a flag to say if
  * generic_make_request is currently active in this task or not.
  * If it is NULL, then no make_request is active.  If it is non-NULL,
  * then a make_request is active, and new requests should be added
  * at the tail
  */
 void generic_make_request(struct bio *bio)
 {
 	if (current->bio_tail) {
 		/* make_request is active */
 		*(current->bio_tail) = bio;
 		bio->bi_next = NULL;
 		current->bio_tail = &bio->bi_next;
 		return;
 	}
 	/* following loop may be a bit non-obvious, and so deserves some
 	 * explanation.
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
 	 * ensure that) so we have a list with a single bio.
 	 * We pretend that we have just taken it off a longer list, so
 	 * we assign bio_list to the next (which is NULL) and bio_tail
 	 * to &bio_list, thus initialising the bio_list of new bios to be
 	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
 	 * of the top of the list (no pretending) and so fixup bio_list and
 	 * bio_tail or bi_next, and call into __generic_make_request again.
 	 *
 	 * The loop was structured like this to make only one call to
 	 * __generic_make_request (which is important as it is large and
 	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
 	do {
 		current->bio_list = bio->bi_next;
 		if (bio->bi_next == NULL)
 			current->bio_tail = &current->bio_list;
 		else
 			bio->bi_next = NULL;
 		__generic_make_request(bio);
 		bio = current->bio_list;
 	} while (bio);
 	current->bio_tail = NULL; /* deactivate */
 }
 EXPORT_SYMBOL(generic_make_request);
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 	bio->bi_rw |= rw;
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
 	if (bio_has_data(bio)) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
 			task_io_account_read(bio->bi_size);
 			count_vm_events(PGPGIN, count);
 		}
 		if (unlikely(block_dump)) {
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
 			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_sector,
 				bdevname(bio->bi_bdev, b));
 		}
 	}
 	generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 /**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @req, and sets it up
  *     for the next range of segments (if any) in the cluster.
  *
  * Return:
  *     %0 - we are done with this request, call end_that_request_last()
  *     %1 - still buffers pending for this request
  **/
 static int __end_that_request_first(struct request *req, int error,
 				    int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
 	/*
 	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
 	 * sense key with us all the way through
 	 */
 	if (!blk_pc_request(req))
 		req->errors = 0;
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
 		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)req->sector);
 	}
 	if (blk_fs_request(req) && req->rq_disk) {
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = disk_map_sector_rcu(req->rq_disk, req->sector);
 		part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
 		part_stat_unlock();
 	}
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 		/*
 		 * For an empty barrier request, the low level driver must
 		 * store a potential error location in ->sector. We pass
 		 * that back up in ->bi_sector.
 		 */
 		if (blk_empty_barrier(req))
 			bio->bi_sector = req->sector;
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
 			req_bio_endio(req, bio, nbytes, error);
 			next_idx = 0;
 			bio_nbytes = 0;
 		} else {
 			int idx = bio->bi_idx + next_idx;
 			if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
 				blk_dump_rq_flags(req, "__end_that");
 				printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
 				       __func__, bio->bi_idx, bio->bi_vcnt);
 				break;
 			}
 			nbytes = bio_iovec_idx(bio, idx)->bv_len;
 			BIO_BUG_ON(nbytes > bio->bi_size);
 			/*
 			 * not a complete bvec done
 			 */
 			if (unlikely(nbytes > nr_bytes)) {
 				bio_nbytes += nr_bytes;
 				total_bytes += nr_bytes;
 				break;
 			}
 			/*
 			 * advance to the next vector
 			 */
 			next_idx++;
 			bio_nbytes += nbytes;
 		}
 		total_bytes += nbytes;
 		nr_bytes -= nbytes;
 		bio = req->bio;
 		if (bio) {
 			/*
 			 * end more in this run, or just return 'not-done'
 			 */
 			if (unlikely(nr_bytes <= 0))
 				break;
 		}
 	}
 	/*
 	 * completely done
 	 */
 	if (!req->bio)
 		return 0;
 	/*
 	 * if the request wasn't completed, update state
 	 */
 	if (bio_nbytes) {
 		req_bio_endio(req, bio, bio_nbytes, error);
 		bio->bi_idx += next_idx;
 		bio_iovec(bio)->bv_offset += nr_bytes;
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 	blk_recalc_rq_sectors(req, total_bytes >> 9);
 	blk_recalc_rq_segments(req);
 	return 1;
 }
 /*
  * queue lock must be held
  */
 static void end_that_request_last(struct request *req, int error)
 {
 	struct gendisk *disk = req->rq_disk;
 	blk_delete_timer(req);
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 	if (blk_queued_rq(req))
 		blkdev_dequeue_request(req);
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
 	/*
 	 * Account IO completion.  bar_rq isn't accounted as a normal
 	 * IO on queueing nor completion.  Accounting the containing
 	 * request is enough.
 	 */
 	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 		cpu = part_stat_lock();
 		part = disk_map_sector_rcu(disk, req->sector);
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part);
 		part_stat_unlock();
 	}
 	if (req->end_io)
 		req->end_io(req, error);
 	else {
 		if (blk_bidi_rq(req))
 			__blk_put_request(req->next_rq->q, req->next_rq);
 		__blk_put_request(req->q, req);
 	}
 }
 static inline void __end_request(struct request *rq, int uptodate,
 				 unsigned int nr_bytes)
 {
 	int error = 0;
 	if (uptodate <= 0)
 		error = uptodate ? uptodate : -EIO;
 	__blk_end_request(rq, error, nr_bytes);
 }
 /**
  * blk_rq_bytes - Returns bytes left to complete in the entire request
  * @rq: the request being processed
  **/
 unsigned int blk_rq_bytes(struct request *rq)
 {
 	if (blk_fs_request(rq))
 		return rq->hard_nr_sectors << 9;
 	return rq->data_len;
 }
 EXPORT_SYMBOL_GPL(blk_rq_bytes);
 /**
  * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
  * @rq: the request being processed
  **/
 unsigned int blk_rq_cur_bytes(struct request *rq)
 {
 	if (blk_fs_request(rq))
 		return rq->current_nr_sectors << 9;
 	if (rq->bio)
 		return rq->bio->bi_size;
 	return rq->data_len;
 }
 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
 /**
  * end_queued_request - end all I/O on a queued request
  * @rq:		the request being processed
  * @uptodate:	error value or %0/%1 uptodate flag
  *
  * Description:
  *     Ends all I/O on a request, and removes it from the block layer queues.
  *     Not suitable for normal I/O completion, unless the driver still has
  *     the request attached to the block layer.
  *
  **/
 void end_queued_request(struct request *rq, int uptodate)
 {
 	__end_request(rq, uptodate, blk_rq_bytes(rq));
 }
 EXPORT_SYMBOL(end_queued_request);
 /**
  * end_dequeued_request - end all I/O on a dequeued request
  * @rq:		the request being processed
  * @uptodate:	error value or %0/%1 uptodate flag
  *
  * Description:
  *     Ends all I/O on a request. The request must already have been
  *     dequeued using blkdev_dequeue_request(), as is normally the case
  *     for most drivers.
  *
  **/
 void end_dequeued_request(struct request *rq, int uptodate)
 {
 	__end_request(rq, uptodate, blk_rq_bytes(rq));
 }
 EXPORT_SYMBOL(end_dequeued_request);
 /**
  * end_request - end I/O on the current segment of the request
  * @req:	the request being processed
  * @uptodate:	error value or %0/%1 uptodate flag
  *
  * Description:
  *     Ends I/O on the current segment of a request. If that is the only
  *     remaining segment, the request is also completed and freed.
  *
  *     This is a remnant of how older block drivers handled I/O completions.
  *     Modern drivers typically end I/O on the full request in one go, unless
  *     they have a residual value to account for. For that case this function
  *     isn't really useful, unless the residual just happens to be the
  *     full current segment. In other words, don't use this function in new
  *     code. Use blk_end_request() or __blk_end_request() to end partial parts
  *     of a request, or end_dequeued_request() and end_queued_request() to
  *     completely end IO on a dequeued/queued request.
  *
  **/
 void end_request(struct request *req, int uptodate)
 {
 	__end_request(req, uptodate, req->hard_cur_sectors << 9);
 }
 EXPORT_SYMBOL(end_request);
+static int end_that_request_data(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
+{
+	if (rq->bio) {
+		if (__end_that_request_first(rq, error, nr_bytes))
+			return 1;
+		/* Bidi request must be completed as a whole */
+		if (blk_bidi_rq(rq) &&
+		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
+			return 1;
+	}
+	return 0;
+}
 /**
  * blk_end_io - Generic end_io function to complete a request.
  * @rq:           the request being processed
  * @error:        %0 for success, < %0 for error
  * @nr_bytes:     number of bytes to complete @rq
  * @bidi_bytes:   number of bytes to complete @rq->next_rq
  * @drv_callback: function called between completion of bios in the request
  *                and completion of the request.
  *                If the callback returns non %0, this helper returns without
  *                completion of the request.
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
  *     %0 - we are done with this request
  *     %1 - this request is not freed yet, it still has pending buffers.
  **/
 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
 		      unsigned int bidi_bytes,
 		      int (drv_callback)(struct request *))
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags = 0UL;
-	if (rq->bio) {
+	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
-		if (__end_that_request_first(rq, error, nr_bytes))
+		return 1;
-			return 1;
-		/* Bidi request must be completed as a whole */
-		if (blk_bidi_rq(rq) &&
-		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
-			return 1;
-	}
 	/* Special feature for tricky drivers */
 	if (drv_callback && drv_callback(rq))
 		return 1;
 	add_disk_randomness(rq->rq_disk);
 	spin_lock_irqsave(q->queue_lock, flags);
 	end_that_request_last(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 0;
 }
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq.
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
  *     %0 - we are done with this request
  *     %1 - still buffers pending for this request
  **/
 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	return blk_end_io(rq, error, nr_bytes, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(blk_end_request);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
  *     %0 - we are done with this request
  *     %1 - still buffers pending for this request
  **/
 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
 	if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
 		return 1;
 	add_disk_randomness(rq->rq_disk);
 	end_that_request_last(rq, error);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__blk_end_request);
 /**
  * blk_end_bidi_request - Helper function for drivers to complete bidi request.
  * @rq:         the bidi request being processed
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *
  * Return:
  *     %0 - we are done with this request
  *     %1 - still buffers pending for this request
  **/
 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 			 unsigned int bidi_bytes)
 {
 	return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);
 }
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
+/**
+ * blk_update_request - Special helper function for request stacking drivers
+ * @rq:           the request being processed
+ * @error:        %0 for success, < %0 for error
+ * @nr_bytes:     number of bytes to complete @rq
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
+ *     the request structure even if @rq doesn't have leftover.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ *     This special helper function is only for request stacking drivers
+ *     (e.g. request-based dm) so that they can handle partial completion.
+ *     Actual device drivers should use blk_end_request instead.
+ */
+void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	if (!end_that_request_data(rq, error, nr_bytes, 0)) {
+		/*
+		 * These members are not updated in end_that_request_data()
+		 * when all bios are completed.
+		 * Update them so that the request stacking driver can find
+		 * how many bytes remain in the request later.
+		 */
+		rq->nr_sectors = rq->hard_nr_sectors = 0;
+		rq->current_nr_sectors = rq->hard_cur_sectors = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(blk_update_request);
 /**
  * blk_end_request_callback - Special helper function for tricky drivers
  * @rq:           the request being processed
  * @error:        %0 for success, < %0 for error
  * @nr_bytes:     number of bytes to complete
  * @drv_callback: function called between completion of bios in the request
  *                and completion of the request.
  *                If the callback returns non %0, this helper returns without
  *                completion of the request.
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq.
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  *     This special helper function is used only for existing tricky drivers.
  *     (e.g. cdrom_newpc_intr() of ide-cd)
  *     This interface will be removed when such drivers are rewritten.
  *     Don't use this interface in other places anymore.
  *
  * Return:
  *     %0 - we are done with this request
  *     %1 - this request is not freed yet.
  *          this request still has pending buffers or
  *          the driver doesn't want to finish this request yet.
  **/
 int blk_end_request_callback(struct request *rq, int error,
 			     unsigned int nr_bytes,
 			     int (drv_callback)(struct request *))
 {
 	return blk_end_io(rq, error, nr_bytes, 0, drv_callback);
 }
 EXPORT_SYMBOL_GPL(blk_end_request_callback);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
 	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
 	   we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
 	rq->cmd_flags |= (bio->bi_rw & 3);
 	if (bio_has_data(bio)) {
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
 	rq->current_nr_sectors = bio_cur_sectors(bio);
 	rq->hard_cur_sectors = rq->current_nr_sectors;
 	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
 	rq->data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 	if (bio->bi_bdev)
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 void kblockd_flush_work(struct work_struct *work)
 {
 	cancel_work_sync(work);
 }
 EXPORT_SYMBOL(kblockd_flush_work);
 int __init blk_dev_init(void)
 {
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 	request_cachep = kmem_cache_create("blkdev_requests",
 			sizeof(struct request), 0, SLAB_PANIC, NULL);

 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 #ifdef CONFIG_BLOCK
 #include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include <linux/gfp.h>
 #include <linux/bsg.h>
 #include <linux/smp.h>
 #include <asm/scatterlist.h>
 struct scsi_ioctl_command;
 struct request_queue;
 struct elevator_queue;
 typedef struct elevator_queue elevator_t;
 struct request_pm_state;
 struct blk_trace;
 struct request;
 struct sg_io_hdr;
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);
 struct request_list {
 	int count[2];
 	int starved[2];
 	int elvpriv;
 	mempool_t *rq_pool;
 	wait_queue_head_t wait[2];
 };
 /*
  * request command types
  */
 enum rq_cmd_type_bits {
 	REQ_TYPE_FS		= 1,	/* fs request */
 	REQ_TYPE_BLOCK_PC,		/* scsi command */
 	REQ_TYPE_SENSE,			/* sense request */
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
 	REQ_TYPE_SPECIAL,		/* driver defined type */
 	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
 	/*
 	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
 	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
 	 * private REQ_LB opcodes to differentiate what type of request this is
 	 */
 	REQ_TYPE_ATA_TASKFILE,
 	REQ_TYPE_ATA_PC,
 };
 /*
  * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
  * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
  * SCSI cdb.
  *
  * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
  * typically to differentiate REQ_TYPE_SPECIAL requests.
  *
  */
 enum {
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
 	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
 	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 /*
  * request type modified bits. first two bits match BIO_RW* bits, important
  */
 enum rq_flag_bits {
 	__REQ_RW,		/* not set, read. set, write */
 	__REQ_FAILFAST,		/* no low level driver retries */
 	__REQ_DISCARD,		/* request to discard sectors */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
 	__REQ_HARDBARRIER,	/* may not be passed by drive either */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_STARTED,		/* drive already may have started this one */
 	__REQ_DONTPREP,		/* don't call prep for this one */
 	__REQ_QUEUED,		/* uses queueing */
 	__REQ_ELVPRIV,		/* elevator private data attached */
 	__REQ_FAILED,		/* set if the request failed */
 	__REQ_QUIET,		/* don't worry about errors */
 	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_RW_META,		/* metadata io request */
 	__REQ_COPY_USER,	/* contains copies of user pages */
 	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
 	__REQ_NR_BITS,		/* stops here */
 };
 #define REQ_RW		(1 << __REQ_RW)
 #define REQ_DISCARD	(1 << __REQ_DISCARD)
 #define REQ_FAILFAST	(1 << __REQ_FAILFAST)
 #define REQ_SORTED	(1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
 #define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
 #define REQ_FUA		(1 << __REQ_FUA)
 #define REQ_NOMERGE	(1 << __REQ_NOMERGE)
 #define REQ_STARTED	(1 << __REQ_STARTED)
 #define REQ_DONTPREP	(1 << __REQ_DONTPREP)
 #define REQ_QUEUED	(1 << __REQ_QUEUED)
 #define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
 #define REQ_FAILED	(1 << __REQ_FAILED)
 #define REQ_QUIET	(1 << __REQ_QUIET)
 #define REQ_PREEMPT	(1 << __REQ_PREEMPT)
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
 #define REQ_ALLOCED	(1 << __REQ_ALLOCED)
 #define REQ_RW_META	(1 << __REQ_RW_META)
 #define REQ_COPY_USER	(1 << __REQ_COPY_USER)
 #define REQ_INTEGRITY	(1 << __REQ_INTEGRITY)
 #define BLK_MAX_CDB	16
 /*
  * try to put the fields that are referenced together in the same cacheline.
  * if you modify this structure, be sure to check block/blk-core.c:rq_init()
  * as well!
  */
 struct request {
 	struct list_head queuelist;
 	struct call_single_data csd;
 	int cpu;
 	struct request_queue *q;
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 	/* Maintain bio traversal state for part by part I/O submission.
 	 * hard_* are block layer internals, no driver should touch them!
 	 */
 	sector_t sector;		/* next sector to submit */
 	sector_t hard_sector;		/* next sector to complete */
 	unsigned long nr_sectors;	/* no. of sectors left to submit */
 	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
 	/* no. of sectors left to submit in the current segment */
 	unsigned int current_nr_sectors;
 	/* no. of sectors left to complete in the current segment */
 	unsigned int hard_cur_sectors;
 	struct bio *bio;
 	struct bio *biotail;
 	struct hlist_node hash;	/* merge hash */
 	/*
 	 * The rb_node is only used inside the io scheduler, requests
 	 * are pruned when moved to the dispatch queue. So let the
 	 * completion_data share space with the rb_node.
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
 		void *completion_data;
 	};
 	/*
 	 * two pointers are available for the IO schedulers, if they need
 	 * more they have to dynamically allocate it.
 	 */
 	void *elevator_private;
 	void *elevator_private2;
 	struct gendisk *rq_disk;
 	unsigned long start_time;
 	/* Number of scatter-gather DMA addr+len pairs after
 	 * physical address coalescing is performed.
 	 */
 	unsigned short nr_phys_segments;
 	unsigned short ioprio;
 	void *special;
 	char *buffer;
 	int tag;
 	int errors;
 	int ref_count;
 	/*
 	 * when request is used as a packet command carrier
 	 */
 	unsigned short cmd_len;
 	unsigned char __cmd[BLK_MAX_CDB];
 	unsigned char *cmd;
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
 	void *data;
 	void *sense;
 	unsigned long deadline;
 	struct list_head timeout_list;
 	unsigned int timeout;
 	int retries;
 	/*
 	 * completion callback.
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
 	/* for bidi */
 	struct request *next_rq;
 };
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
 }
 /*
  * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
 {
 	/* PM state machine step value, currently driver specific */
 	int	pm_step;
 	/* requested PM state value (S1, S2, S3, S4, ...) */
 	u32	pm_state;
 	void*	data;		/* for driver use */
 };
 #include <linux/elevator.h>
 typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
 typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 struct bio_vec;
 struct bvec_merge_data {
 	struct block_device *bi_bdev;
 	sector_t bi_sector;
 	unsigned bi_size;
 	unsigned long bi_rw;
 };
 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
 			     struct bio_vec *);
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
 	BLK_EH_HANDLED,
 	BLK_EH_RESET_TIMER,
 };
 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
 enum blk_queue_state {
 	Queue_down,
 	Queue_up,
 };
 struct blk_queue_tag {
 	struct request **tag_index;	/* map of busy tags */
 	unsigned long *tag_map;		/* bit map of free/busy tags */
 	int busy;			/* current depth */
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
 };
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 struct blk_cmd_filter {
 	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
 	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
 	struct kobject kobj;
 };
 struct request_queue
 {
 	/*
 	 * Together with queue_head for cacheline sharing
 	 */
 	struct list_head	queue_head;
 	struct request		*last_merge;
 	elevator_t		*elevator;
 	/*
 	 * the queue request freelist, one for reads and one for writes
 	 */
 	struct request_list	rq;
 	request_fn_proc		*request_fn;
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
 	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	/*
 	 * Dispatch queue sorting
 	 */
 	sector_t		end_sector;
 	struct request		*boundary_rq;
 	/*
 	 * Auto-unplugging state
 	 */
 	struct timer_list	unplug_timer;
 	int			unplug_thresh;	/* After this many requests */
 	unsigned long		unplug_delay;	/* After this many jiffies */
 	struct work_struct	unplug_work;
 	struct backing_dev_info	backing_dev_info;
 	/*
 	 * The queue owner gets to use this for whatever they like.
 	 * ll_rw_blk doesn't touch it.
 	 */
 	void			*queuedata;
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
 	unsigned long		bounce_pfn;
 	gfp_t			bounce_gfp;
 	/*
 	 * various queue flags, see QUEUE_* below
 	 */
 	unsigned long		queue_flags;
 	/*
 	 * protects queue structures from reentrancy. ->__queue_lock should
 	 * _never_ be used directly, it is queue private. always use
 	 * ->queue_lock.
 	 */
 	spinlock_t		__queue_lock;
 	spinlock_t		*queue_lock;
 	/*
 	 * queue kobject
 	 */
 	struct kobject kobj;
 	/*
 	 * queue settings
 	 */
 	unsigned long		nr_requests;	/* Max # of requests */
 	unsigned int		nr_congestion_on;
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 	unsigned int		max_sectors;
 	unsigned int		max_hw_sectors;
 	unsigned short		max_phys_segments;
 	unsigned short		max_hw_segments;
 	unsigned short		hardsect_size;
 	unsigned int		max_segment_size;
 	unsigned long		seg_boundary_mask;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
 	unsigned int		dma_pad_mask;
 	unsigned int		dma_alignment;
 	struct blk_queue_tag	*queue_tags;
 	struct list_head	tag_busy_list;
 	unsigned int		nr_sorted;
 	unsigned int		in_flight;
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 	/*
 	 * sg stuff
 	 */
 	unsigned int		sg_timeout;
 	unsigned int		sg_reserved_size;
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
 #endif
 	/*
 	 * reserved for flush operations
 	 */
 	unsigned int		ordered, next_ordered, ordseq;
 	int			orderr, ordcolor;
 	struct request		pre_flush_rq, bar_rq, post_flush_rq;
 	struct request		*orig_bar_rq;
 	struct mutex		sysfs_lock;
 #if defined(CONFIG_BLK_DEV_BSG)
 	struct bsg_class_device bsg_dev;
 #endif
 	struct blk_cmd_filter cmd_filter;
 };
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_READFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_WRITEFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP   11	/* force complete on same CPU */
 #define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
 static inline int queue_is_locked(struct request_queue *q)
 {
 #ifdef CONFIG_SMP
 	spinlock_t *lock = q->queue_lock;
 	return lock && spin_is_locked(lock);
 #else
 	return 1;
 #endif
 }
 static inline void queue_flag_set_unlocked(unsigned int flag,
 					   struct request_queue *q)
 {
 	__set_bit(flag, &q->queue_flags);
 }
 static inline int queue_flag_test_and_clear(unsigned int flag,
 					    struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	if (test_bit(flag, &q->queue_flags)) {
 		__clear_bit(flag, &q->queue_flags);
 		return 1;
 	}
 	return 0;
 }
 static inline int queue_flag_test_and_set(unsigned int flag,
 					  struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	if (!test_bit(flag, &q->queue_flags)) {
 		__set_bit(flag, &q->queue_flags);
 		return 0;
 	}
 	return 1;
 }
 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	__set_bit(flag, &q->queue_flags);
 }
 static inline void queue_flag_clear_unlocked(unsigned int flag,
 					     struct request_queue *q)
 {
 	__clear_bit(flag, &q->queue_flags);
 }
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
 	__clear_bit(flag, &q->queue_flags);
 }
 enum {
 	/*
 	 * Hardbarrier is supported with one of the following methods.
 	 *
 	 * NONE		: hardbarrier unsupported
 	 * DRAIN	: ordering by draining is enough
 	 * DRAIN_FLUSH	: ordering by draining w/ pre and post flushes
 	 * DRAIN_FUA	: ordering by draining w/ pre flush and FUA write
 	 * TAG		: ordering by tag is enough
 	 * TAG_FLUSH	: ordering by tag w/ pre and post flushes
 	 * TAG_FUA	: ordering by tag w/ pre flush and FUA write
 	 */
 	QUEUE_ORDERED_NONE	= 0x00,
 	QUEUE_ORDERED_DRAIN	= 0x01,
 	QUEUE_ORDERED_TAG	= 0x02,
 	QUEUE_ORDERED_PREFLUSH	= 0x10,
 	QUEUE_ORDERED_POSTFLUSH	= 0x20,
 	QUEUE_ORDERED_FUA	= 0x40,
 	QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
 			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
 	QUEUE_ORDERED_DRAIN_FUA	= QUEUE_ORDERED_DRAIN |
 			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
 	QUEUE_ORDERED_TAG_FLUSH	= QUEUE_ORDERED_TAG |
 			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
 	QUEUE_ORDERED_TAG_FUA	= QUEUE_ORDERED_TAG |
 			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
 	/*
 	 * Ordered operation sequence
 	 */
 	QUEUE_ORDSEQ_STARTED	= 0x01,	/* flushing in progress */
 	QUEUE_ORDSEQ_DRAIN	= 0x02,	/* waiting for the queue to be drained */
 	QUEUE_ORDSEQ_PREFLUSH	= 0x04,	/* pre-flushing in progress */
 	QUEUE_ORDSEQ_BAR	= 0x08,	/* original barrier req in progress */
 	QUEUE_ORDSEQ_POSTFLUSH	= 0x10,	/* post-flushing in progress */
 	QUEUE_ORDSEQ_DONE	= 0x20,
 };
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
 #define blk_special_request(rq)	((rq)->cmd_type == REQ_TYPE_SPECIAL)
 #define blk_sense_request(rq)	((rq)->cmd_type == REQ_TYPE_SENSE)
 #define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
 #define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 #define blk_account_rq(rq)	(blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
 #define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
 #define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
 	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 #define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 #define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)	(!list_empty(&(rq)->queuelist))
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 /*
  * We regard a request as sync, if it's a READ or a SYNC write.
  */
 #define rq_is_sync(rq)		(rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
 #define rq_is_meta(rq)		((rq)->cmd_flags & REQ_RW_META)
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
 		return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
 	return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
 }
 static inline void blk_set_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
 		queue_flag_set(QUEUE_FLAG_READFULL, q);
 	else
 		queue_flag_set(QUEUE_FLAG_WRITEFULL, q);
 }
 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
 		queue_flag_clear(QUEUE_FLAG_READFULL, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_WRITEFULL, q);
 }
 /*
  * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (blk_discard_rq(rq) || blk_fs_request((rq))))
 /*
  * q->prep_rq_fn return values
  */
 #define BLKPREP_OK		0	/* serve it */
 #define BLKPREP_KILL		1	/* fatal error, kill */
 #define BLKPREP_DEFER		2	/* leave on queue */
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 /*
  * standard bounce addresses:
  *
  * BLK_BOUNCE_HIGH	: bounce all highmem pages
  * BLK_BOUNCE_ANY	: don't bounce anything
  * BLK_BOUNCE_ISA	: bounce pages above ISA DMA boundary
  */
 #if BITS_PER_LONG == 32
 #define BLK_BOUNCE_HIGH		((u64)blk_max_low_pfn << PAGE_SHIFT)
 #else
 #define BLK_BOUNCE_HIGH		-1ULL
 #endif
 #define BLK_BOUNCE_ANY		(-1ULL)
 #define BLK_BOUNCE_ISA		(ISA_DMA_THRESHOLD)
 /*
  * default timeout for SG_IO if none specified
  */
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #ifdef CONFIG_BOUNCE
 extern int init_emergency_isa_pool(void);
 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
 #else
 static inline int init_emergency_isa_pool(void)
 {
 	return 0;
 }
 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 {
 }
 #endif /* CONFIG_MMU */
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
 	int nr_entries;
 };
 struct req_iterator {
 	int i;
 	struct bio *bio;
 };
 /* This should not be used directly - use rq_for_each_segment */
 #define __rq_for_each_bio(_bio, rq)	\
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 #define rq_for_each_segment(bvl, _rq, _iter)			\
 	__rq_for_each_bio(_iter.bio, _rq)			\
 		bio_for_each_segment(bvl, _iter.bio, _iter.i)
 #define rq_iter_last(rq, _iter)					\
 		(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
 extern void generic_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_plug_device(struct request_queue *);
 extern void blk_plug_device_unlocked(struct request_queue *);
 extern int blk_remove_plug(struct request_queue *);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
 			  struct gendisk *, unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
 		struct gendisk *, struct scsi_ioctl_command __user *);
 /*
  * Temporary export, until SCSI gets fixed up.
  */
 extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 			     struct bio *bio);
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
 static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
 {
 	clear_bdi_congested(&q->backing_dev_info, rw);
 }
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
 static inline void blk_set_queue_congested(struct request_queue *q, int rw)
 {
 	set_bdi_congested(&q->backing_dev_info, rw);
 }
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
 extern int blk_rq_unmap_user(struct bio *);
 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
 			       struct rq_map_data *, struct sg_iovec *, int,
 			       unsigned int, gfp_t);
 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 extern void blk_unplug(struct request_queue *q);
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue;
 }
 static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
 				       struct page *page)
 {
 	if (bdi && bdi->unplug_io_fn)
 		bdi->unplug_io_fn(bdi, page);
 }
 static inline void blk_run_address_space(struct address_space *mapping)
 {
 	if (mapping)
 		blk_run_backing_dev(mapping->backing_dev_info, NULL);
 }
 /*
  * blk_end_request() and friends.
  * __blk_end_request() and end_request() must be called with
  * the request queue spinlock acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
 extern int blk_end_request(struct request *rq, int error,
 				unsigned int nr_bytes);
 extern int __blk_end_request(struct request *rq, int error,
 				unsigned int nr_bytes);
 extern int blk_end_bidi_request(struct request *rq, int error,
 				unsigned int nr_bytes, unsigned int bidi_bytes);
 extern void end_request(struct request *, int);
 extern void end_queued_request(struct request *, int);
 extern void end_dequeued_request(struct request *, int);
 extern int blk_end_request_callback(struct request *rq, int error,
 				unsigned int nr_bytes,
 				int (drv_callback)(struct request *));
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
+extern void blk_update_request(struct request *rq, int error,
+			       unsigned int nr_bytes);
 /*
  * blk_end_request() takes bytes instead of sectors as a complete size.
  * blk_rq_bytes() returns bytes left to complete in the entire request.
  * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
  */
 extern unsigned int blk_rq_bytes(struct request *rq);
 extern unsigned int blk_rq_cur_bytes(struct request *rq);
 static inline void blkdev_dequeue_request(struct request *req)
 {
 	elv_dequeue_request(req->q, req);
 }
 /*
  * Access functions for manipulating queue properties
  */
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
 extern int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
 extern int blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
 extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern void generic_unplug_device(struct request_queue *);
 extern void __generic_unplug_device(struct request_queue *);
 extern long nr_blockdev_pages(void);
 int blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 /*
  * tag stuff
  */
 #define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
 extern void blk_queue_free_tags(struct request_queue *);
 extern int blk_queue_resize_tags(struct request_queue *, int);
 extern void blk_queue_invalidate_tags(struct request_queue *);
 extern struct blk_queue_tag *blk_init_tags(int);
 extern void blk_free_tags(struct blk_queue_tag *);
 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 						int tag)
 {
 	if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
 		return NULL;
 	return bqt->tag_index[tag];
 }
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
 extern int blkdev_issue_discard(struct block_device *,
 				sector_t sector, sector_t nr_sects, gfp_t);
 static inline int sb_issue_discard(struct super_block *sb,
 				   sector_t block, sector_t nr_blocks)
 {
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
 }
 /*
 * command filter functions
 */
 extern int blk_verify_command(struct blk_cmd_filter *filter,
 			      unsigned char *cmd, int has_write_perm);
 extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 #define MAX_PHYS_SEGMENTS 128
 #define MAX_HW_SEGMENTS 128
 #define SAFE_MAX_SECTORS 255
 #define BLK_DEF_MAX_SECTORS 1024
 #define MAX_SEGMENT_SIZE	65536
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 static inline int queue_hardsect_size(struct request_queue *q)
 {
 	int retval = 512;
 	if (q && q->hardsect_size)
 		retval = q->hardsect_size;
 	return retval;
 }
 static inline int bdev_hardsect_size(struct block_device *bdev)
 {
 	return queue_hardsect_size(bdev_get_queue(bdev));
 }
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
 }
 static inline int blk_rq_aligned(struct request_queue *q, void *addr,
 				 unsigned int len)
 {
 	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
 	return !((unsigned long)addr & alignment) && !(len & alignment);
 }
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
 	unsigned int bits = 8;
 	do {
 		bits++;
 		size >>= 1;
 	} while (size > 256);
 	return bits;
 }
 static inline unsigned int block_size(struct block_device *bdev)
 {
 	return bdev->bd_block_size;
 }
 typedef struct {struct page *v;} Sector;
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
 static inline void put_dev_sector(Sector p)
 {
 	page_cache_release(p.v);
 }
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
 void kblockd_flush_work(struct work_struct *work);
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 #define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
 #define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
 struct blk_integrity_exchg {
 	void			*prot_buf;
 	void			*data_buf;
 	sector_t		sector;
 	unsigned int		data_size;
 	unsigned short		sector_size;
 	const char		*disk_name;
 };
 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
 struct blk_integrity {
 	integrity_gen_fn	*generate_fn;
 	integrity_vrfy_fn	*verify_fn;
 	integrity_set_tag_fn	*set_tag_fn;
 	integrity_get_tag_fn	*get_tag_fn;
 	unsigned short		flags;
 	unsigned short		tuple_size;
 	unsigned short		sector_size;
 	unsigned short		tag_size;
 	const char		*name;
 	struct kobject		kobj;
 };
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
 extern int blk_integrity_compare(struct block_device *, struct block_device *);
 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request *);
 static inline int blk_integrity_rq(struct request *rq)
 {
 	if (rq->bio == NULL)
 		return 0;
 	return bio_integrity(rq->bio);
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 #define blk_integrity_rq(rq)			(0)
 #define blk_rq_count_integrity_sg(a)		(0)
 #define blk_rq_map_integrity_sg(a, b)		(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
 #define blk_integrity_unregister(a)		do { } while (0);
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 #else /* CONFIG_BLOCK */
 /*
  * stubs for when the block layer is configured out
  */
 #define buffer_heads_over_limit 0
 static inline long nr_blockdev_pages(void)
 {
 	return 0;
 }
 #endif /* CONFIG_BLOCK */
 #endif