Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

3

4

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>

7

* - July2000

7

* - July2000

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

8

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

9

*/

9

*/

10

11

/*

11

/*

12

* This handles all read/write requests to block devices

12

* This handles all read/write requests to block devices

13

*/

13

*/

14

#include <linux/kernel.h>

14

#include <linux/kernel.h>

15

#include <linux/module.h>

15

#include <linux/module.h>

16

#include <linux/backing-dev.h>

16

#include <linux/backing-dev.h>

17

#include <linux/bio.h>

17

#include <linux/bio.h>

18

#include <linux/blkdev.h>

18

#include <linux/blkdev.h>

19

#include <linux/highmem.h>

19

#include <linux/highmem.h>

20

#include <linux/mm.h>

20

#include <linux/mm.h>

21

#include <linux/kernel_stat.h>

21

#include <linux/kernel_stat.h>

22

#include <linux/string.h>

22

#include <linux/string.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/completion.h>

24

#include <linux/completion.h>

25

#include <linux/slab.h>

25

#include <linux/slab.h>

26

#include <linux/swap.h>

26

#include <linux/swap.h>

27

#include <linux/writeback.h>

27

#include <linux/writeback.h>

28

#include <linux/task_io_accounting_ops.h>

28

#include <linux/task_io_accounting_ops.h>

29

#include <linux/fault-inject.h>

29

#include <linux/fault-inject.h>

30

#include <linux/list_sort.h>

30

#include <linux/list_sort.h>

31

32

#define CREATE_TRACE_POINTS

32

#define CREATE_TRACE_POINTS

33

#include <trace/events/block.h>

33

#include <trace/events/block.h>

34

35

#include "blk.h"

35

#include "blk.h"

36

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);

37

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

38

EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);

39

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

39

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

40

41

static int __make_request(struct request_queue *q, struct bio *bio);

41

static int __make_request(struct request_queue *q, struct bio *bio);

42

43

/*

43

/*

44

* For the allocated request tables

44

* For the allocated request tables

45

*/

45

*/

46

static struct kmem_cache *request_cachep;

46

static struct kmem_cache *request_cachep;

47

48

/*

48

/*

49

* For queue allocation

49

* For queue allocation

50

*/

50

*/

51

struct kmem_cache *blk_requestq_cachep;

51

struct kmem_cache *blk_requestq_cachep;

52

53

/*

53

/*

54

* Controlling structure to kblockd

54

* Controlling structure to kblockd

55

*/

55

*/

56

static struct workqueue_struct *kblockd_workqueue;

56

static struct workqueue_struct *kblockd_workqueue;

57

58

static void drive_stat_acct(struct request *rq, int new_io)

58

static void drive_stat_acct(struct request *rq, int new_io)

59

{

59

{

60

struct hd_struct *part;

60

struct hd_struct *part;

61

int rw = rq_data_dir(rq);

61

int rw = rq_data_dir(rq);

62

int cpu;

62

int cpu;

63

64

if (!blk_do_io_stat(rq))

64

if (!blk_do_io_stat(rq))

65

return;

65

return;

66

67

cpu = part_stat_lock();

67

cpu = part_stat_lock();

68

69

if (!new_io) {

69

if (!new_io) {

70

part = rq->part;

70

part = rq->part;

71

part_stat_inc(cpu, part, merges[rw]);

71

part_stat_inc(cpu, part, merges[rw]);

72

} else {

72

} else {

73

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

73

part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

74

if (!hd_struct_try_get(part)) {

74

if (!hd_struct_try_get(part)) {

75

/*

75

/*

76

* The partition is already being removed,

76

* The partition is already being removed,

77

* the request will be accounted on the disk only

77

* the request will be accounted on the disk only

78

*

78

*

79

* We take a reference on disk->part0 although that

79

* We take a reference on disk->part0 although that

80

* partition will never be deleted, so we can treat

80

* partition will never be deleted, so we can treat

81

* it as any other partition.

81

* it as any other partition.

82

*/

82

*/

83

part = &rq->rq_disk->part0;

83

part = &rq->rq_disk->part0;

84

hd_struct_get(part);

84

hd_struct_get(part);

85

}

85

}

86

part_round_stats(cpu, part);

86

part_round_stats(cpu, part);

87

part_inc_in_flight(part, rw);

87

part_inc_in_flight(part, rw);

88

rq->part = part;

88

rq->part = part;

89

}

89

}

90

91

part_stat_unlock();

91

part_stat_unlock();

92

}

92

}

93

94

void blk_queue_congestion_threshold(struct request_queue *q)

94

void blk_queue_congestion_threshold(struct request_queue *q)

95

{

95

{

96

int nr;

96

int nr;

97

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

99

if (nr > q->nr_requests)

99

if (nr > q->nr_requests)

100

nr = q->nr_requests;

100

nr = q->nr_requests;

101

q->nr_congestion_on = nr;

101

q->nr_congestion_on = nr;

102

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

104

if (nr < 1)

104

if (nr < 1)

105

nr = 1;

105

nr = 1;

106

q->nr_congestion_off = nr;

106

q->nr_congestion_off = nr;

107

}

107

}

108

109

/**

109

/**

110

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

110

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

111

* @bdev: device

111

* @bdev: device

112

*

112

*

113

* Locates the passed device's request queue and returns the address of its

113

* Locates the passed device's request queue and returns the address of its

114

* backing_dev_info

114

* backing_dev_info

115

*

115

*

116

* Will return NULL if the request queue cannot be located.

116

* Will return NULL if the request queue cannot be located.

117

*/

117

*/

118

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

118

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

119

{

119

{

120

struct backing_dev_info *ret = NULL;

120

struct backing_dev_info *ret = NULL;

121

struct request_queue *q = bdev_get_queue(bdev);

121

struct request_queue *q = bdev_get_queue(bdev);

122

123

if (q)

123

if (q)

124

ret = &q->backing_dev_info;

124

ret = &q->backing_dev_info;

125

return ret;

125

return ret;

126

}

126

}

127

EXPORT_SYMBOL(blk_get_backing_dev_info);

127

EXPORT_SYMBOL(blk_get_backing_dev_info);

128

129

void blk_rq_init(struct request_queue *q, struct request *rq)

129

void blk_rq_init(struct request_queue *q, struct request *rq)

130

{

130

{

131

memset(rq, 0, sizeof(*rq));

131

memset(rq, 0, sizeof(*rq));

132

133

INIT_LIST_HEAD(&rq->queuelist);

133

INIT_LIST_HEAD(&rq->queuelist);

134

INIT_LIST_HEAD(&rq->timeout_list);

134

INIT_LIST_HEAD(&rq->timeout_list);

135

rq->cpu = -1;

135

rq->cpu = -1;

136

rq->q = q;

136

rq->q = q;

137

rq->__sector = (sector_t) -1;

137

rq->__sector = (sector_t) -1;

138

INIT_HLIST_NODE(&rq->hash);

138

INIT_HLIST_NODE(&rq->hash);

139

RB_CLEAR_NODE(&rq->rb_node);

139

RB_CLEAR_NODE(&rq->rb_node);

140

rq->cmd = rq->__cmd;

140

rq->cmd = rq->__cmd;

141

rq->cmd_len = BLK_MAX_CDB;

141

rq->cmd_len = BLK_MAX_CDB;

142

rq->tag = -1;

142

rq->tag = -1;

143

rq->ref_count = 1;

143

rq->ref_count = 1;

144

rq->start_time = jiffies;

144

rq->start_time = jiffies;

145

set_start_time_ns(rq);

145

set_start_time_ns(rq);

146

rq->part = NULL;

146

rq->part = NULL;

147

}

147

}

148

EXPORT_SYMBOL(blk_rq_init);

148

EXPORT_SYMBOL(blk_rq_init);

149

150

static void req_bio_endio(struct request *rq, struct bio *bio,

150

static void req_bio_endio(struct request *rq, struct bio *bio,

151

unsigned int nbytes, int error)

151

unsigned int nbytes, int error)

152

{

152

{

153

if (error)

153

if (error)

154

clear_bit(BIO_UPTODATE, &bio->bi_flags);

154

clear_bit(BIO_UPTODATE, &bio->bi_flags);

155

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

155

else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))

156

error = -EIO;

156

error = -EIO;

157

158

if (unlikely(nbytes > bio->bi_size)) {

158

if (unlikely(nbytes > bio->bi_size)) {

159

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

159

printk(KERN_ERR "%s: want %u bytes done, %u left\n",

160

__func__, nbytes, bio->bi_size);

160

__func__, nbytes, bio->bi_size);

161

nbytes = bio->bi_size;

161

nbytes = bio->bi_size;

162

}

162

}

163

164

if (unlikely(rq->cmd_flags & REQ_QUIET))

164

if (unlikely(rq->cmd_flags & REQ_QUIET))

165

set_bit(BIO_QUIET, &bio->bi_flags);

165

set_bit(BIO_QUIET, &bio->bi_flags);

166

167

bio->bi_size -= nbytes;

167

bio->bi_size -= nbytes;

168

bio->bi_sector += (nbytes >> 9);

168

bio->bi_sector += (nbytes >> 9);

169

170

if (bio_integrity(bio))

170

if (bio_integrity(bio))

171

bio_integrity_advance(bio, nbytes);

171

bio_integrity_advance(bio, nbytes);

172

173

/* don't actually finish bio if it's part of flush sequence */

173

/* don't actually finish bio if it's part of flush sequence */

174

if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))

174

if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))

175

bio_endio(bio, error);

175

bio_endio(bio, error);

176

}

176

}

177

178

void blk_dump_rq_flags(struct request *rq, char *msg)

178

void blk_dump_rq_flags(struct request *rq, char *msg)

179

{

179

{

180

int bit;

180

int bit;

181

182

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

182

printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,

183

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

183

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,

184

rq->cmd_flags);

184

rq->cmd_flags);

185

186

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

186

printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",

187

(unsigned long long)blk_rq_pos(rq),

187

(unsigned long long)blk_rq_pos(rq),

188

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

188

blk_rq_sectors(rq), blk_rq_cur_sectors(rq));

189

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

189

printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",

190

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

190

rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));

191

192

if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {

192

if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {

193

printk(KERN_INFO " cdb: ");

193

printk(KERN_INFO " cdb: ");

194

for (bit = 0; bit < BLK_MAX_CDB; bit++)

194

for (bit = 0; bit < BLK_MAX_CDB; bit++)

195

printk("%02x ", rq->cmd[bit]);

195

printk("%02x ", rq->cmd[bit]);

196

printk("\n");

196

printk("\n");

197

}

197

}

198

}

198

}

199

EXPORT_SYMBOL(blk_dump_rq_flags);

199

EXPORT_SYMBOL(blk_dump_rq_flags);

200

201

/*

201

/*

202

* Make sure that plugs that were pending when this function was entered,

202

* Make sure that plugs that were pending when this function was entered,

203

* are now complete and requests pushed to the queue.

203

* are now complete and requests pushed to the queue.

204

*/

204

*/

205

static inline void queue_sync_plugs(struct request_queue *q)

205

static inline void queue_sync_plugs(struct request_queue *q)

206

{

206

{

207

/*

207

/*

208

* If the current process is plugged and has barriers submitted,

208

* If the current process is plugged and has barriers submitted,

209

* we will livelock if we don't unplug first.

209

* we will livelock if we don't unplug first.

210

*/

210

*/

211

blk_flush_plug(current);

211

blk_flush_plug(current);

212

}

212

}

213

214

static void blk_delay_work(struct work_struct *work)

214

static void blk_delay_work(struct work_struct *work)

215

{

215

{

216

struct request_queue *q;

216

struct request_queue *q;

217

218

q = container_of(work, struct request_queue, delay_work.work);

218

q = container_of(work, struct request_queue, delay_work.work);

219

spin_lock_irq(q->queue_lock);

219

spin_lock_irq(q->queue_lock);

220

__blk_run_queue(q, false);

220

__blk_run_queue(q, false);

221

spin_unlock_irq(q->queue_lock);

221

spin_unlock_irq(q->queue_lock);

222

}

222

}

223

224

/**

224

/**

225

* blk_delay_queue - restart queueing after defined interval

225

* blk_delay_queue - restart queueing after defined interval

226

* @q: The &struct request_queue in question

226

* @q: The &struct request_queue in question

227

* @msecs: Delay in msecs

227

* @msecs: Delay in msecs

228

*

228

*

229

* Description:

229

* Description:

230

* Sometimes queueing needs to be postponed for a little while, to allow

230

* Sometimes queueing needs to be postponed for a little while, to allow

231

* resources to come back. This function will make sure that queueing is

231

* resources to come back. This function will make sure that queueing is

232

* restarted around the specified time.

232

* restarted around the specified time.

233

*/

233

*/

234

void blk_delay_queue(struct request_queue *q, unsigned long msecs)

234

void blk_delay_queue(struct request_queue *q, unsigned long msecs)

235

{

235

{

236

schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));

236

schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));

237

}

237

}

238

EXPORT_SYMBOL(blk_delay_queue);

238

EXPORT_SYMBOL(blk_delay_queue);

239

240

/**

240

/**

241

* blk_start_queue - restart a previously stopped queue

241

* blk_start_queue - restart a previously stopped queue

242

* @q: The &struct request_queue in question

242

* @q: The &struct request_queue in question

243

*

243

*

244

* Description:

244

* Description:

245

* blk_start_queue() will clear the stop flag on the queue, and call

245

* blk_start_queue() will clear the stop flag on the queue, and call

246

* the request_fn for the queue if it was in a stopped state when

246

* the request_fn for the queue if it was in a stopped state when

247

* entered. Also see blk_stop_queue(). Queue lock must be held.

247

* entered. Also see blk_stop_queue(). Queue lock must be held.

248

**/

248

**/

249

void blk_start_queue(struct request_queue *q)

249

void blk_start_queue(struct request_queue *q)

250

{

250

{

251

WARN_ON(!irqs_disabled());

251

WARN_ON(!irqs_disabled());

252

253

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

253

queue_flag_clear(QUEUE_FLAG_STOPPED, q);

254

__blk_run_queue(q, false);

254

__blk_run_queue(q, false);

255

}

255

}

256

EXPORT_SYMBOL(blk_start_queue);

256

EXPORT_SYMBOL(blk_start_queue);

257

258

/**

258

/**

259

* blk_stop_queue - stop a queue

259

* blk_stop_queue - stop a queue

260

* @q: The &struct request_queue in question

260

* @q: The &struct request_queue in question

261

*

261

*

262

* Description:

262

* Description:

263

* The Linux block layer assumes that a block driver will consume all

263

* The Linux block layer assumes that a block driver will consume all

264

* entries on the request queue when the request_fn strategy is called.

264

* entries on the request queue when the request_fn strategy is called.

265

* Often this will not happen, because of hardware limitations (queue

265

* Often this will not happen, because of hardware limitations (queue

266

* depth settings). If a device driver gets a 'queue full' response,

266

* depth settings). If a device driver gets a 'queue full' response,

267

* or if it simply chooses not to queue more I/O at one point, it can

267

* or if it simply chooses not to queue more I/O at one point, it can

268

* call this function to prevent the request_fn from being called until

268

* call this function to prevent the request_fn from being called until

269

* the driver has signalled it's ready to go again. This happens by calling

269

* the driver has signalled it's ready to go again. This happens by calling

270

* blk_start_queue() to restart queue operations. Queue lock must be held.

270

* blk_start_queue() to restart queue operations. Queue lock must be held.

271

**/

271

**/

272

void blk_stop_queue(struct request_queue *q)

272

void blk_stop_queue(struct request_queue *q)

273

{

273

{

274

cancel_delayed_work(&q->delay_work);

274

__cancel_delayed_work(&q->delay_work);

275

queue_flag_set(QUEUE_FLAG_STOPPED, q);

275

queue_flag_set(QUEUE_FLAG_STOPPED, q);

276

}

276

}

277

EXPORT_SYMBOL(blk_stop_queue);

277

EXPORT_SYMBOL(blk_stop_queue);

278

279

/**

279

/**

280

* blk_sync_queue - cancel any pending callbacks on a queue

280

* blk_sync_queue - cancel any pending callbacks on a queue

281

* @q: the queue

281

* @q: the queue

282

*

282

*

283

* Description:

283

* Description:

284

* The block layer may perform asynchronous callback activity

284

* The block layer may perform asynchronous callback activity

285

* on a queue, such as calling the unplug function after a timeout.

285

* on a queue, such as calling the unplug function after a timeout.

286

* A block device may call blk_sync_queue to ensure that any

286

* A block device may call blk_sync_queue to ensure that any

287

* such activity is cancelled, thus allowing it to release resources

287

* such activity is cancelled, thus allowing it to release resources

288

* that the callbacks might use. The caller must already have made sure

288

* that the callbacks might use. The caller must already have made sure

289

* that its ->make_request_fn will not re-add plugging prior to calling

289

* that its ->make_request_fn will not re-add plugging prior to calling

290

* this function.

290

* this function.

291

*

291

*

292

* This function does not cancel any asynchronous activity arising

292

* This function does not cancel any asynchronous activity arising

293

* out of elevator or throttling code. That would require elevaotor_exit()

293

* out of elevator or throttling code. That would require elevaotor_exit()

294

* and blk_throtl_exit() to be called with queue lock initialized.

294

* and blk_throtl_exit() to be called with queue lock initialized.

295

*

295

*

296

*/

296

*/

297

void blk_sync_queue(struct request_queue *q)

297

void blk_sync_queue(struct request_queue *q)

298

{

298

{

299

del_timer_sync(&q->timeout);

299

del_timer_sync(&q->timeout);

300

cancel_delayed_work_sync(&q->delay_work);

300

cancel_delayed_work_sync(&q->delay_work);

301

queue_sync_plugs(q);

301

queue_sync_plugs(q);

302

}

302

}

303

EXPORT_SYMBOL(blk_sync_queue);

303

EXPORT_SYMBOL(blk_sync_queue);

304

305

/**

305

/**

306

* __blk_run_queue - run a single device queue

306

* __blk_run_queue - run a single device queue

307

* @q: The queue to run

307

* @q: The queue to run

308

* @force_kblockd: Don't run @q->request_fn directly. Use kblockd.

308

* @force_kblockd: Don't run @q->request_fn directly. Use kblockd.

309

*

309

*

310

* Description:

310

* Description:

311

* See @blk_run_queue. This variant must be called with the queue lock

311

* See @blk_run_queue. This variant must be called with the queue lock

312

* held and interrupts disabled.

312

* held and interrupts disabled.

313

*

313

*

314

*/

314

*/

315

void __blk_run_queue(struct request_queue *q, bool force_kblockd)

315

void __blk_run_queue(struct request_queue *q, bool force_kblockd)

316

{

316

{

317

if (unlikely(blk_queue_stopped(q)))

317

if (unlikely(blk_queue_stopped(q)))

318

return;

318

return;

319

320

/*

320

/*

321

* Only recurse once to avoid overrunning the stack, let the unplug

321

* Only recurse once to avoid overrunning the stack, let the unplug

322

* handling reinvoke the handler shortly if we already got there.

322

* handling reinvoke the handler shortly if we already got there.

323

*/

323

*/

324

if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {

324

if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {

325

q->request_fn(q);

325

q->request_fn(q);

326

queue_flag_clear(QUEUE_FLAG_REENTER, q);

326

queue_flag_clear(QUEUE_FLAG_REENTER, q);

327

} else

327

} else

328

queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);

328

queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);

329

}

329

}

330

EXPORT_SYMBOL(__blk_run_queue);

330

EXPORT_SYMBOL(__blk_run_queue);

331

332

/**

332

/**

333

* blk_run_queue - run a single device queue

333

* blk_run_queue - run a single device queue

334

* @q: The queue to run

334

* @q: The queue to run

335

*

335

*

336

* Description:

336

* Description:

337

* Invoke request handling on this queue, if it has pending work to do.

337

* Invoke request handling on this queue, if it has pending work to do.

338

* May be used to restart queueing when a request has completed.

338

* May be used to restart queueing when a request has completed.

339

*/

339

*/

340

void blk_run_queue(struct request_queue *q)

340

void blk_run_queue(struct request_queue *q)

341

{

341

{

342

unsigned long flags;

342

unsigned long flags;

343

344

spin_lock_irqsave(q->queue_lock, flags);

344

spin_lock_irqsave(q->queue_lock, flags);

345

__blk_run_queue(q, false);

345

__blk_run_queue(q, false);

346

spin_unlock_irqrestore(q->queue_lock, flags);

346

spin_unlock_irqrestore(q->queue_lock, flags);

347

}

347

}

348

EXPORT_SYMBOL(blk_run_queue);

348

EXPORT_SYMBOL(blk_run_queue);

349

350

void blk_put_queue(struct request_queue *q)

350

void blk_put_queue(struct request_queue *q)

351

{

351

{

352

kobject_put(&q->kobj);

352

kobject_put(&q->kobj);

353

}

353

}

354

355

/*

355

/*

356

* Note: If a driver supplied the queue lock, it should not zap that lock

356

* Note: If a driver supplied the queue lock, it should not zap that lock

357

* unexpectedly as some queue cleanup components like elevator_exit() and

357

* unexpectedly as some queue cleanup components like elevator_exit() and

358

* blk_throtl_exit() need queue lock.

358

* blk_throtl_exit() need queue lock.

359

*/

359

*/

360

void blk_cleanup_queue(struct request_queue *q)

360

void blk_cleanup_queue(struct request_queue *q)

361

{

361

{

362

/*

362

/*

363

* We know we have process context here, so we can be a little

363

* We know we have process context here, so we can be a little

364

* cautious and ensure that pending block actions on this device

364

* cautious and ensure that pending block actions on this device

365

* are done before moving on. Going into this function, we should

365

* are done before moving on. Going into this function, we should

366

* not have processes doing IO to this device.

366

* not have processes doing IO to this device.

367

*/

367

*/

368

blk_sync_queue(q);

368

blk_sync_queue(q);

369

370

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

370

del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);

371

mutex_lock(&q->sysfs_lock);

371

mutex_lock(&q->sysfs_lock);

372

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

372

queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);

373

mutex_unlock(&q->sysfs_lock);

373

mutex_unlock(&q->sysfs_lock);

374

375

if (q->elevator)

375

if (q->elevator)

376

elevator_exit(q->elevator);

376

elevator_exit(q->elevator);

377

378

blk_throtl_exit(q);

378

blk_throtl_exit(q);

379

380

blk_put_queue(q);

380

blk_put_queue(q);

381

}

381

}

382

EXPORT_SYMBOL(blk_cleanup_queue);

382

EXPORT_SYMBOL(blk_cleanup_queue);

383

384

static int blk_init_free_list(struct request_queue *q)

384

static int blk_init_free_list(struct request_queue *q)

385

{

385

{

386

struct request_list *rl = &q->rq;

386

struct request_list *rl = &q->rq;

387

388

if (unlikely(rl->rq_pool))

388

if (unlikely(rl->rq_pool))

389

return 0;

389

return 0;

390

391

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

391

rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;

392

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

392

rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;

393

rl->elvpriv = 0;

393

rl->elvpriv = 0;

394

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

394

init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);

395

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

395

init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

396

397

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

397

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

398

mempool_free_slab, request_cachep, q->node);

398

mempool_free_slab, request_cachep, q->node);

399

400

if (!rl->rq_pool)

400

if (!rl->rq_pool)

401

return -ENOMEM;

401

return -ENOMEM;

402

403

return 0;

403

return 0;

404

}

404

}

405

406

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

406

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)

407

{

407

{

408

return blk_alloc_queue_node(gfp_mask, -1);

408

return blk_alloc_queue_node(gfp_mask, -1);

409

}

409

}

410

EXPORT_SYMBOL(blk_alloc_queue);

410

EXPORT_SYMBOL(blk_alloc_queue);

411

412

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

412

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

413

{

413

{

414

struct request_queue *q;

414

struct request_queue *q;

415

int err;

415

int err;

416

417

q = kmem_cache_alloc_node(blk_requestq_cachep,

417

q = kmem_cache_alloc_node(blk_requestq_cachep,

418

gfp_mask | __GFP_ZERO, node_id);

418

gfp_mask | __GFP_ZERO, node_id);

419

if (!q)

419

if (!q)

420

return NULL;

420

return NULL;

421

422

q->backing_dev_info.ra_pages =

422

q->backing_dev_info.ra_pages =

423

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

423

(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

424

q->backing_dev_info.state = 0;

424

q->backing_dev_info.state = 0;

425

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

425

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

426

q->backing_dev_info.name = "block";

426

q->backing_dev_info.name = "block";

427

428

err = bdi_init(&q->backing_dev_info);

428

err = bdi_init(&q->backing_dev_info);

429

if (err) {

429

if (err) {

430

kmem_cache_free(blk_requestq_cachep, q);

430

kmem_cache_free(blk_requestq_cachep, q);

431

return NULL;

431

return NULL;

432

}

432

}

433

434

if (blk_throtl_init(q)) {

434

if (blk_throtl_init(q)) {

435

kmem_cache_free(blk_requestq_cachep, q);

435

kmem_cache_free(blk_requestq_cachep, q);

436

return NULL;

436

return NULL;

437

}

437

}

438

439

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

439

setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,

440

laptop_mode_timer_fn, (unsigned long) q);

440

laptop_mode_timer_fn, (unsigned long) q);

441

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

441

setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

442

INIT_LIST_HEAD(&q->timeout_list);

442

INIT_LIST_HEAD(&q->timeout_list);

443

INIT_LIST_HEAD(&q->flush_queue[0]);

443

INIT_LIST_HEAD(&q->flush_queue[0]);

444

INIT_LIST_HEAD(&q->flush_queue[1]);

444

INIT_LIST_HEAD(&q->flush_queue[1]);

445

INIT_LIST_HEAD(&q->flush_data_in_flight);

445

INIT_LIST_HEAD(&q->flush_data_in_flight);

446

INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);

446

INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);

447

448

kobject_init(&q->kobj, &blk_queue_ktype);

448

kobject_init(&q->kobj, &blk_queue_ktype);

449

450

mutex_init(&q->sysfs_lock);

450

mutex_init(&q->sysfs_lock);

451

spin_lock_init(&q->__queue_lock);

451

spin_lock_init(&q->__queue_lock);

452

453

/*

453

/*

454

* By default initialize queue_lock to internal lock and driver can

454

* By default initialize queue_lock to internal lock and driver can

455

* override it later if need be.

455

* override it later if need be.

456

*/

456

*/

457

q->queue_lock = &q->__queue_lock;

457

q->queue_lock = &q->__queue_lock;

458

459

return q;

459

return q;

460

}

460

}

461

EXPORT_SYMBOL(blk_alloc_queue_node);

461

EXPORT_SYMBOL(blk_alloc_queue_node);

462

463

/**

463

/**

464

* blk_init_queue - prepare a request queue for use with a block device

464

* blk_init_queue - prepare a request queue for use with a block device

465

* @rfn: The function to be called to process requests that have been

465

* @rfn: The function to be called to process requests that have been

466

* placed on the queue.

466

* placed on the queue.

467

* @lock: Request queue spin lock

467

* @lock: Request queue spin lock

468

*

468

*

469

* Description:

469

* Description:

470

* If a block device wishes to use the standard request handling procedures,

470

* If a block device wishes to use the standard request handling procedures,

471

* which sorts requests and coalesces adjacent requests, then it must

471

* which sorts requests and coalesces adjacent requests, then it must

472

* call blk_init_queue(). The function @rfn will be called when there

472

* call blk_init_queue(). The function @rfn will be called when there

473

* are requests on the queue that need to be processed. If the device

473

* are requests on the queue that need to be processed. If the device

474

* supports plugging, then @rfn may not be called immediately when requests

474

* supports plugging, then @rfn may not be called immediately when requests

475

* are available on the queue, but may be called at some time later instead.

475

* are available on the queue, but may be called at some time later instead.

476

* Plugged queues are generally unplugged when a buffer belonging to one

476

* Plugged queues are generally unplugged when a buffer belonging to one

477

* of the requests on the queue is needed, or due to memory pressure.

477

* of the requests on the queue is needed, or due to memory pressure.

478

*

478

*

479

* @rfn is not required, or even expected, to remove all requests off the

479

* @rfn is not required, or even expected, to remove all requests off the

480

* queue, but only as many as it can handle at a time. If it does leave

480

* queue, but only as many as it can handle at a time. If it does leave

481

* requests on the queue, it is responsible for arranging that the requests

481

* requests on the queue, it is responsible for arranging that the requests

482

* get dealt with eventually.

482

* get dealt with eventually.

483

*

483

*

484

* The queue spin lock must be held while manipulating the requests on the

484

* The queue spin lock must be held while manipulating the requests on the

485

* request queue; this lock will be taken also from interrupt context, so irq

485

* request queue; this lock will be taken also from interrupt context, so irq

486

* disabling is needed for it.

486

* disabling is needed for it.

487

*

487

*

488

* Function returns a pointer to the initialized request queue, or %NULL if

488

* Function returns a pointer to the initialized request queue, or %NULL if

489

* it didn't succeed.

489

* it didn't succeed.

490

*

490

*

491

* Note:

491

* Note:

492

* blk_init_queue() must be paired with a blk_cleanup_queue() call

492

* blk_init_queue() must be paired with a blk_cleanup_queue() call

493

* when the block device is deactivated (such as at module unload).

493

* when the block device is deactivated (such as at module unload).

494

**/

494

**/

495

496

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

496

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

497

{

497

{

498

return blk_init_queue_node(rfn, lock, -1);

498

return blk_init_queue_node(rfn, lock, -1);

499

}

499

}

500

EXPORT_SYMBOL(blk_init_queue);

500

EXPORT_SYMBOL(blk_init_queue);

501

502

struct request_queue *

502

struct request_queue *

503

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

503

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

504

{

504

{

505

struct request_queue *uninit_q, *q;

505

struct request_queue *uninit_q, *q;

506

507

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

507

uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);

508

if (!uninit_q)

508

if (!uninit_q)

509

return NULL;

509

return NULL;

510

511

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

511

q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);

512

if (!q)

512

if (!q)

513

blk_cleanup_queue(uninit_q);

513

blk_cleanup_queue(uninit_q);

514

515

return q;

515

return q;

516

}

516

}

517

EXPORT_SYMBOL(blk_init_queue_node);

517

EXPORT_SYMBOL(blk_init_queue_node);

518

519

struct request_queue *

519

struct request_queue *

520

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

520

blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

521

spinlock_t *lock)

521

spinlock_t *lock)

522

{

522

{

523

return blk_init_allocated_queue_node(q, rfn, lock, -1);

523

return blk_init_allocated_queue_node(q, rfn, lock, -1);

524

}

524

}

525

EXPORT_SYMBOL(blk_init_allocated_queue);

525

EXPORT_SYMBOL(blk_init_allocated_queue);

526

527

struct request_queue *

527

struct request_queue *

528

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

528

blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

529

spinlock_t *lock, int node_id)

529

spinlock_t *lock, int node_id)

530

{

530

{

531

if (!q)

531

if (!q)

532

return NULL;

532

return NULL;

533

534

q->node = node_id;

534

q->node = node_id;

535

if (blk_init_free_list(q))

535

if (blk_init_free_list(q))

536

return NULL;

536

return NULL;

537

538

q->request_fn = rfn;

538

q->request_fn = rfn;

539

q->prep_rq_fn = NULL;

539

q->prep_rq_fn = NULL;

540

q->unprep_rq_fn = NULL;

540

q->unprep_rq_fn = NULL;

541

q->queue_flags = QUEUE_FLAG_DEFAULT;

541

q->queue_flags = QUEUE_FLAG_DEFAULT;

542

543

/* Override internal queue lock with supplied lock pointer */

543

/* Override internal queue lock with supplied lock pointer */

544

if (lock)

544

if (lock)

545

q->queue_lock = lock;

545

q->queue_lock = lock;

546

547

/*

547

/*

548

* This also sets hw/phys segments, boundary and size

548

* This also sets hw/phys segments, boundary and size

549

*/

549

*/

550

blk_queue_make_request(q, __make_request);

550

blk_queue_make_request(q, __make_request);

551

552

q->sg_reserved_size = INT_MAX;

552

q->sg_reserved_size = INT_MAX;

553

554

/*

554

/*

555

* all done

555

* all done

556

*/

556

*/

557

if (!elevator_init(q, NULL)) {

557

if (!elevator_init(q, NULL)) {

558

blk_queue_congestion_threshold(q);

558

blk_queue_congestion_threshold(q);

559

return q;

559

return q;

560

}

560

}

561

562

return NULL;

562

return NULL;

563

}

563

}

564

EXPORT_SYMBOL(blk_init_allocated_queue_node);

564

EXPORT_SYMBOL(blk_init_allocated_queue_node);

565

566

int blk_get_queue(struct request_queue *q)

566

int blk_get_queue(struct request_queue *q)

567

{

567

{

568

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

568

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

569

kobject_get(&q->kobj);

569

kobject_get(&q->kobj);

570

return 0;

570

return 0;

571

}

571

}

572

573

return 1;

573

return 1;

574

}

574

}

575

576

static inline void blk_free_request(struct request_queue *q, struct request *rq)

576

static inline void blk_free_request(struct request_queue *q, struct request *rq)

577

{

577

{

578

BUG_ON(rq->cmd_flags & REQ_ON_PLUG);

578

BUG_ON(rq->cmd_flags & REQ_ON_PLUG);

579

580

if (rq->cmd_flags & REQ_ELVPRIV)

580

if (rq->cmd_flags & REQ_ELVPRIV)

581

elv_put_request(q, rq);

581

elv_put_request(q, rq);

582

mempool_free(rq, q->rq.rq_pool);

582

mempool_free(rq, q->rq.rq_pool);

583

}

583

}

584

585

static struct request *

585

static struct request *

586

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

586

blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)

587

{

587

{

588

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

588

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

589

590

if (!rq)

590

if (!rq)

591

return NULL;

591

return NULL;

592

593

blk_rq_init(q, rq);

593

blk_rq_init(q, rq);

594

595

rq->cmd_flags = flags | REQ_ALLOCED;

595

rq->cmd_flags = flags | REQ_ALLOCED;

596

597

if (priv) {

597

if (priv) {

598

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

598

if (unlikely(elv_set_request(q, rq, gfp_mask))) {

599

mempool_free(rq, q->rq.rq_pool);

599

mempool_free(rq, q->rq.rq_pool);

600

return NULL;

600

return NULL;

601

}

601

}

602

rq->cmd_flags |= REQ_ELVPRIV;

602

rq->cmd_flags |= REQ_ELVPRIV;

603

}

603

}

604

605

return rq;

605

return rq;

606

}

606

}

607

608

/*

608

/*

609

* ioc_batching returns true if the ioc is a valid batching request and

609

* ioc_batching returns true if the ioc is a valid batching request and

610

* should be given priority access to a request.

610

* should be given priority access to a request.

611

*/

611

*/

612

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

612

static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)

613

{

613

{

614

if (!ioc)

614

if (!ioc)

615

return 0;

615

return 0;

616

617

/*

617

/*

618

* Make sure the process is able to allocate at least 1 request

618

* Make sure the process is able to allocate at least 1 request

619

* even if the batch times out, otherwise we could theoretically

619

* even if the batch times out, otherwise we could theoretically

620

* lose wakeups.

620

* lose wakeups.

621

*/

621

*/

622

return ioc->nr_batch_requests == q->nr_batching ||

622

return ioc->nr_batch_requests == q->nr_batching ||

623

(ioc->nr_batch_requests > 0

623

(ioc->nr_batch_requests > 0

624

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

624

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

625

}

625

}

626

627

/*

627

/*

628

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

628

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

629

* will cause the process to be a "batcher" on all queues in the system. This

629

* will cause the process to be a "batcher" on all queues in the system. This

630

* is the behaviour we want though - once it gets a wakeup it should be given

630

* is the behaviour we want though - once it gets a wakeup it should be given

631

* a nice run.

631

* a nice run.

632

*/

632

*/

633

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

633

static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)

634

{

634

{

635

if (!ioc || ioc_batching(q, ioc))

635

if (!ioc || ioc_batching(q, ioc))

636

return;

636

return;

637

638

ioc->nr_batch_requests = q->nr_batching;

638

ioc->nr_batch_requests = q->nr_batching;

639

ioc->last_waited = jiffies;

639

ioc->last_waited = jiffies;

640

}

640

}

641

642

static void __freed_request(struct request_queue *q, int sync)

642

static void __freed_request(struct request_queue *q, int sync)

643

{

643

{

644

struct request_list *rl = &q->rq;

644

struct request_list *rl = &q->rq;

645

646

if (rl->count[sync] < queue_congestion_off_threshold(q))

646

if (rl->count[sync] < queue_congestion_off_threshold(q))

647

blk_clear_queue_congested(q, sync);

647

blk_clear_queue_congested(q, sync);

648

649

if (rl->count[sync] + 1 <= q->nr_requests) {

649

if (rl->count[sync] + 1 <= q->nr_requests) {

650

if (waitqueue_active(&rl->wait[sync]))

650

if (waitqueue_active(&rl->wait[sync]))

651

wake_up(&rl->wait[sync]);

651

wake_up(&rl->wait[sync]);

652

653

blk_clear_queue_full(q, sync);

653

blk_clear_queue_full(q, sync);

654

}

654

}

655

}

655

}

656

657

/*

657

/*

658

* A request has just been released. Account for it, update the full and

658

* A request has just been released. Account for it, update the full and

659

* congestion status, wake up any waiters. Called under q->queue_lock.

659

* congestion status, wake up any waiters. Called under q->queue_lock.

660

*/

660

*/

661

static void freed_request(struct request_queue *q, int sync, int priv)

661

static void freed_request(struct request_queue *q, int sync, int priv)

662

{

662

{

663

struct request_list *rl = &q->rq;

663

struct request_list *rl = &q->rq;

664

665

rl->count[sync]--;

665

rl->count[sync]--;

666

if (priv)

666

if (priv)

667

rl->elvpriv--;

667

rl->elvpriv--;

668

669

__freed_request(q, sync);

669

__freed_request(q, sync);

670

671

if (unlikely(rl->starved[sync ^ 1]))

671

if (unlikely(rl->starved[sync ^ 1]))

672

__freed_request(q, sync ^ 1);

672

__freed_request(q, sync ^ 1);

673

}

673

}

674

675

/*

675

/*

676

* Determine if elevator data should be initialized when allocating the

676

* Determine if elevator data should be initialized when allocating the

677

* request associated with @bio.

677

* request associated with @bio.

678

*/

678

*/

679

static bool blk_rq_should_init_elevator(struct bio *bio)

679

static bool blk_rq_should_init_elevator(struct bio *bio)

680

{

680

{

681

if (!bio)

681

if (!bio)

682

return true;

682

return true;

683

684

/*

684

/*

685

* Flush requests do not use the elevator so skip initialization.

685

* Flush requests do not use the elevator so skip initialization.

686

* This allows a request to share the flush and elevator data.

686

* This allows a request to share the flush and elevator data.

687

*/

687

*/

688

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))

688

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))

689

return false;

689

return false;

690

691

return true;

691

return true;

692

}

692

}

693

694

/*

694

/*

695

* Get a free request, queue_lock must be held.

695

* Get a free request, queue_lock must be held.

696

* Returns NULL on failure, with queue_lock held.

696

* Returns NULL on failure, with queue_lock held.

697

* Returns !NULL on success, with queue_lock *not held*.

697

* Returns !NULL on success, with queue_lock *not held*.

698

*/

698

*/

699

static struct request *get_request(struct request_queue *q, int rw_flags,

699

static struct request *get_request(struct request_queue *q, int rw_flags,

700

struct bio *bio, gfp_t gfp_mask)

700

struct bio *bio, gfp_t gfp_mask)

701

{

701

{

702

struct request *rq = NULL;

702

struct request *rq = NULL;

703

struct request_list *rl = &q->rq;

703

struct request_list *rl = &q->rq;

704

struct io_context *ioc = NULL;

704

struct io_context *ioc = NULL;

705

const bool is_sync = rw_is_sync(rw_flags) != 0;

705

const bool is_sync = rw_is_sync(rw_flags) != 0;

706

int may_queue, priv = 0;

706

int may_queue, priv = 0;

707

708

may_queue = elv_may_queue(q, rw_flags);

708

may_queue = elv_may_queue(q, rw_flags);

709

if (may_queue == ELV_MQUEUE_NO)

709

if (may_queue == ELV_MQUEUE_NO)

710

goto rq_starved;

710

goto rq_starved;

711

712

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

712

if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {

713

if (rl->count[is_sync]+1 >= q->nr_requests) {

713

if (rl->count[is_sync]+1 >= q->nr_requests) {

714

ioc = current_io_context(GFP_ATOMIC, q->node);

714

ioc = current_io_context(GFP_ATOMIC, q->node);

715

/*

715

/*

716

* The queue will fill after this allocation, so set

716

* The queue will fill after this allocation, so set

717

* it as full, and mark this process as "batching".

717

* it as full, and mark this process as "batching".

718

* This process will be allowed to complete a batch of

718

* This process will be allowed to complete a batch of

719

* requests, others will be blocked.

719

* requests, others will be blocked.

720

*/

720

*/

721

if (!blk_queue_full(q, is_sync)) {

721

if (!blk_queue_full(q, is_sync)) {

722

ioc_set_batching(q, ioc);

722

ioc_set_batching(q, ioc);

723

blk_set_queue_full(q, is_sync);

723

blk_set_queue_full(q, is_sync);

724

} else {

724

} else {

725

if (may_queue != ELV_MQUEUE_MUST

725

if (may_queue != ELV_MQUEUE_MUST

726

&& !ioc_batching(q, ioc)) {

726

&& !ioc_batching(q, ioc)) {

727

/*

727

/*

728

* The queue is full and the allocating

728

* The queue is full and the allocating

729

* process is not a "batcher", and not

729

* process is not a "batcher", and not

730

* exempted by the IO scheduler

730

* exempted by the IO scheduler

731

*/

731

*/

732

goto out;

732

goto out;

733

}

733

}

734

}

734

}

735

}

735

}

736

blk_set_queue_congested(q, is_sync);

736

blk_set_queue_congested(q, is_sync);

737

}

737

}

738

739

/*

739

/*

740

* Only allow batching queuers to allocate up to 50% over the defined

740

* Only allow batching queuers to allocate up to 50% over the defined

741

* limit of requests, otherwise we could have thousands of requests

741

* limit of requests, otherwise we could have thousands of requests

742

* allocated with any setting of ->nr_requests

742

* allocated with any setting of ->nr_requests

743

*/

743

*/

744

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

744

if (rl->count[is_sync] >= (3 * q->nr_requests / 2))

745

goto out;

745

goto out;

746

747

rl->count[is_sync]++;

747

rl->count[is_sync]++;

748

rl->starved[is_sync] = 0;

748

rl->starved[is_sync] = 0;

749

750

if (blk_rq_should_init_elevator(bio)) {

750

if (blk_rq_should_init_elevator(bio)) {

751

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

751

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

752

if (priv)

752

if (priv)

753

rl->elvpriv++;

753

rl->elvpriv++;

754

}

754

}

755

756

if (blk_queue_io_stat(q))

756

if (blk_queue_io_stat(q))

757

rw_flags |= REQ_IO_STAT;

757

rw_flags |= REQ_IO_STAT;

758

spin_unlock_irq(q->queue_lock);

758

spin_unlock_irq(q->queue_lock);

759

760

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

760

rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

761

if (unlikely(!rq)) {

761

if (unlikely(!rq)) {

762

/*

762

/*

763

* Allocation failed presumably due to memory. Undo anything

763

* Allocation failed presumably due to memory. Undo anything

764

* we might have messed up.

764

* we might have messed up.

765

*

765

*

766

* Allocating task should really be put onto the front of the

766

* Allocating task should really be put onto the front of the

767

* wait queue, but this is pretty rare.

767

* wait queue, but this is pretty rare.

768

*/

768

*/

769

spin_lock_irq(q->queue_lock);

769

spin_lock_irq(q->queue_lock);

770

freed_request(q, is_sync, priv);

770

freed_request(q, is_sync, priv);

771

772

/*

772

/*

773

* in the very unlikely event that allocation failed and no

773

* in the very unlikely event that allocation failed and no

774

* requests for this direction was pending, mark us starved

774

* requests for this direction was pending, mark us starved

775

* so that freeing of a request in the other direction will

775

* so that freeing of a request in the other direction will

776

* notice us. another possible fix would be to split the

776

* notice us. another possible fix would be to split the

777

* rq mempool into READ and WRITE

777

* rq mempool into READ and WRITE

778

*/

778

*/

779

rq_starved:

779

rq_starved:

780

if (unlikely(rl->count[is_sync] == 0))

780

if (unlikely(rl->count[is_sync] == 0))

781

rl->starved[is_sync] = 1;

781

rl->starved[is_sync] = 1;

782

783

goto out;

783

goto out;

784

}

784

}

785

786

/*

786

/*

787

* ioc may be NULL here, and ioc_batching will be false. That's

787

* ioc may be NULL here, and ioc_batching will be false. That's

788

* OK, if the queue is under the request limit then requests need

788

* OK, if the queue is under the request limit then requests need

789

* not count toward the nr_batch_requests limit. There will always

789

* not count toward the nr_batch_requests limit. There will always

790

* be some limit enforced by BLK_BATCH_TIME.

790

* be some limit enforced by BLK_BATCH_TIME.

791

*/

791

*/

792

if (ioc_batching(q, ioc))

792

if (ioc_batching(q, ioc))

793

ioc->nr_batch_requests--;

793

ioc->nr_batch_requests--;

794

795

trace_block_getrq(q, bio, rw_flags & 1);

795

trace_block_getrq(q, bio, rw_flags & 1);

796

out:

796

out:

797

return rq;

797

return rq;

798

}

798

}

799

800

/*

800

/*

801

* No available requests for this queue, wait for some requests to become

801

* No available requests for this queue, wait for some requests to become

802

* available.

802

* available.

803

*

803

*

804

* Called with q->queue_lock held, and returns with it unlocked.

804

* Called with q->queue_lock held, and returns with it unlocked.

805

*/

805

*/

806

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

806

static struct request *get_request_wait(struct request_queue *q, int rw_flags,

807

struct bio *bio)

807

struct bio *bio)

808

{

808

{

809

const bool is_sync = rw_is_sync(rw_flags) != 0;

809

const bool is_sync = rw_is_sync(rw_flags) != 0;

810

struct request *rq;

810

struct request *rq;

811

812

rq = get_request(q, rw_flags, bio, GFP_NOIO);

812

rq = get_request(q, rw_flags, bio, GFP_NOIO);

813

while (!rq) {

813

while (!rq) {

814

DEFINE_WAIT(wait);

814

DEFINE_WAIT(wait);

815

struct io_context *ioc;

815

struct io_context *ioc;

816

struct request_list *rl = &q->rq;

816

struct request_list *rl = &q->rq;

817

818

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

818

prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,

819

TASK_UNINTERRUPTIBLE);

819

TASK_UNINTERRUPTIBLE);

820

821

trace_block_sleeprq(q, bio, rw_flags & 1);

821

trace_block_sleeprq(q, bio, rw_flags & 1);

822

823

spin_unlock_irq(q->queue_lock);

823

spin_unlock_irq(q->queue_lock);

824

io_schedule();

824

io_schedule();

825

826

/*

826

/*

827

* After sleeping, we become a "batching" process and

827

* After sleeping, we become a "batching" process and

828

* will be able to allocate at least one request, and

828

* will be able to allocate at least one request, and

829

* up to a big batch of them for a small period time.

829

* up to a big batch of them for a small period time.

830

* See ioc_batching, ioc_set_batching

830

* See ioc_batching, ioc_set_batching

831

*/

831

*/

832

ioc = current_io_context(GFP_NOIO, q->node);

832

ioc = current_io_context(GFP_NOIO, q->node);

833

ioc_set_batching(q, ioc);

833

ioc_set_batching(q, ioc);

834

835

spin_lock_irq(q->queue_lock);

835

spin_lock_irq(q->queue_lock);

836

finish_wait(&rl->wait[is_sync], &wait);

836

finish_wait(&rl->wait[is_sync], &wait);

837

838

rq = get_request(q, rw_flags, bio, GFP_NOIO);

838

rq = get_request(q, rw_flags, bio, GFP_NOIO);

839

};

839

};

840

841

return rq;

841

return rq;

842

}

842

}

843

844

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

844

struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)

845

{

845

{

846

struct request *rq;

846

struct request *rq;

847

848

BUG_ON(rw != READ && rw != WRITE);

848

BUG_ON(rw != READ && rw != WRITE);

849

850

spin_lock_irq(q->queue_lock);

850

spin_lock_irq(q->queue_lock);

851

if (gfp_mask & __GFP_WAIT) {

851

if (gfp_mask & __GFP_WAIT) {

852

rq = get_request_wait(q, rw, NULL);

852

rq = get_request_wait(q, rw, NULL);

853

} else {

853

} else {

854

rq = get_request(q, rw, NULL, gfp_mask);

854

rq = get_request(q, rw, NULL, gfp_mask);

855

if (!rq)

855

if (!rq)

856

spin_unlock_irq(q->queue_lock);

856

spin_unlock_irq(q->queue_lock);

857

}

857

}

858

/* q->queue_lock is unlocked at this point */

858

/* q->queue_lock is unlocked at this point */

859

860

return rq;

860

return rq;

861

}

861

}

862

EXPORT_SYMBOL(blk_get_request);

862

EXPORT_SYMBOL(blk_get_request);

863

864

/**

864

/**

865

* blk_make_request - given a bio, allocate a corresponding struct request.

865

* blk_make_request - given a bio, allocate a corresponding struct request.

866

* @q: target request queue

866

* @q: target request queue

867

* @bio: The bio describing the memory mappings that will be submitted for IO.

867

* @bio: The bio describing the memory mappings that will be submitted for IO.

868

* It may be a chained-bio properly constructed by block/bio layer.

868

* It may be a chained-bio properly constructed by block/bio layer.

869

* @gfp_mask: gfp flags to be used for memory allocation

869

* @gfp_mask: gfp flags to be used for memory allocation

870

*

870

*

871

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

871

* blk_make_request is the parallel of generic_make_request for BLOCK_PC

872

* type commands. Where the struct request needs to be farther initialized by

872

* type commands. Where the struct request needs to be farther initialized by

873

* the caller. It is passed a &struct bio, which describes the memory info of

873

* the caller. It is passed a &struct bio, which describes the memory info of

874

* the I/O transfer.

874

* the I/O transfer.

875

*

875

*

876

* The caller of blk_make_request must make sure that bi_io_vec

876

* The caller of blk_make_request must make sure that bi_io_vec

877

* are set to describe the memory buffers. That bio_data_dir() will return

877

* are set to describe the memory buffers. That bio_data_dir() will return

878

* the needed direction of the request. (And all bio's in the passed bio-chain

878

* the needed direction of the request. (And all bio's in the passed bio-chain

879

* are properly set accordingly)

879

* are properly set accordingly)

880

*

880

*

881

* If called under none-sleepable conditions, mapped bio buffers must not

881

* If called under none-sleepable conditions, mapped bio buffers must not

882

* need bouncing, by calling the appropriate masked or flagged allocator,

882

* need bouncing, by calling the appropriate masked or flagged allocator,

883

* suitable for the target device. Otherwise the call to blk_queue_bounce will

883

* suitable for the target device. Otherwise the call to blk_queue_bounce will

884

* BUG.

884

* BUG.

885

*

885

*

886

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

886

* WARNING: When allocating/cloning a bio-chain, careful consideration should be

887

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

887

* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for

888

* anything but the first bio in the chain. Otherwise you risk waiting for IO

888

* anything but the first bio in the chain. Otherwise you risk waiting for IO

889

* completion of a bio that hasn't been submitted yet, thus resulting in a

889

* completion of a bio that hasn't been submitted yet, thus resulting in a

890

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

890

* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead

891

* of bio_alloc(), as that avoids the mempool deadlock.

891

* of bio_alloc(), as that avoids the mempool deadlock.

892

* If possible a big IO should be split into smaller parts when allocation

892

* If possible a big IO should be split into smaller parts when allocation

893

* fails. Partial allocation should not be an error, or you risk a live-lock.

893

* fails. Partial allocation should not be an error, or you risk a live-lock.

894

*/

894

*/

895

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

895

struct request *blk_make_request(struct request_queue *q, struct bio *bio,

896

gfp_t gfp_mask)

896

gfp_t gfp_mask)

897

{

897

{

898

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

898

struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

899

900

if (unlikely(!rq))

900

if (unlikely(!rq))

901

return ERR_PTR(-ENOMEM);

901

return ERR_PTR(-ENOMEM);

902

903

for_each_bio(bio) {

903

for_each_bio(bio) {

904

struct bio *bounce_bio = bio;

904

struct bio *bounce_bio = bio;

905

int ret;

905

int ret;

906

907

blk_queue_bounce(q, &bounce_bio);

907

blk_queue_bounce(q, &bounce_bio);

908

ret = blk_rq_append_bio(q, rq, bounce_bio);

908

ret = blk_rq_append_bio(q, rq, bounce_bio);

909

if (unlikely(ret)) {

909

if (unlikely(ret)) {

910

blk_put_request(rq);

910

blk_put_request(rq);

911

return ERR_PTR(ret);

911

return ERR_PTR(ret);

912

}

912

}

913

}

913

}

914

915

return rq;

915

return rq;

916

}

916

}

917

EXPORT_SYMBOL(blk_make_request);

917

EXPORT_SYMBOL(blk_make_request);

918

919

/**

919

/**

920

* blk_requeue_request - put a request back on queue

920

* blk_requeue_request - put a request back on queue

921

* @q: request queue where request should be inserted

921

* @q: request queue where request should be inserted

922

* @rq: request to be inserted

922

* @rq: request to be inserted

923

*

923

*

924

* Description:

924

* Description:

925

* Drivers often keep queueing requests until the hardware cannot accept

925

* Drivers often keep queueing requests until the hardware cannot accept

926

* more, when that condition happens we need to put the request back

926

* more, when that condition happens we need to put the request back

927

* on the queue. Must be called with queue lock held.

927

* on the queue. Must be called with queue lock held.

928

*/

928

*/

929

void blk_requeue_request(struct request_queue *q, struct request *rq)

929

void blk_requeue_request(struct request_queue *q, struct request *rq)

930

{

930

{

931

blk_delete_timer(rq);

931

blk_delete_timer(rq);

932

blk_clear_rq_complete(rq);

932

blk_clear_rq_complete(rq);

933

trace_block_rq_requeue(q, rq);

933

trace_block_rq_requeue(q, rq);

934

935

if (blk_rq_tagged(rq))

935

if (blk_rq_tagged(rq))

936

blk_queue_end_tag(q, rq);

936

blk_queue_end_tag(q, rq);

937

938

BUG_ON(blk_queued_rq(rq));

938

BUG_ON(blk_queued_rq(rq));

939

940

elv_requeue_request(q, rq);

940

elv_requeue_request(q, rq);

941

}

941

}

942

EXPORT_SYMBOL(blk_requeue_request);

942

EXPORT_SYMBOL(blk_requeue_request);

943

944

static void add_acct_request(struct request_queue *q, struct request *rq,

944

static void add_acct_request(struct request_queue *q, struct request *rq,

945

int where)

945

int where)

946

{

946

{

947

drive_stat_acct(rq, 1);

947

drive_stat_acct(rq, 1);

948

__elv_add_request(q, rq, where);

948

__elv_add_request(q, rq, where);

949

}

949

}

950

951

/**

951

/**

952

* blk_insert_request - insert a special request into a request queue

952

* blk_insert_request - insert a special request into a request queue

953

* @q: request queue where request should be inserted

953

* @q: request queue where request should be inserted

954

* @rq: request to be inserted

954

* @rq: request to be inserted

955

* @at_head: insert request at head or tail of queue

955

* @at_head: insert request at head or tail of queue

956

* @data: private data

956

* @data: private data

957

*

957

*

958

* Description:

958

* Description:

959

* Many block devices need to execute commands asynchronously, so they don't

959

* Many block devices need to execute commands asynchronously, so they don't

960

* block the whole kernel from preemption during request execution. This is

960

* block the whole kernel from preemption during request execution. This is

961

* accomplished normally by inserting aritficial requests tagged as

961

* accomplished normally by inserting aritficial requests tagged as

962

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

962

* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them

963

* be scheduled for actual execution by the request queue.

963

* be scheduled for actual execution by the request queue.

964

*

964

*

965

* We have the option of inserting the head or the tail of the queue.

965

* We have the option of inserting the head or the tail of the queue.

966

* Typically we use the tail for new ioctls and so forth. We use the head

966

* Typically we use the tail for new ioctls and so forth. We use the head

967

* of the queue for things like a QUEUE_FULL message from a device, or a

967

* of the queue for things like a QUEUE_FULL message from a device, or a

968

* host that is unable to accept a particular command.

968

* host that is unable to accept a particular command.

969

*/

969

*/

970

void blk_insert_request(struct request_queue *q, struct request *rq,

970

void blk_insert_request(struct request_queue *q, struct request *rq,

971

int at_head, void *data)

971

int at_head, void *data)

972

{

972

{

973

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

973

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

974

unsigned long flags;

974

unsigned long flags;

975

976

/*

976

/*

977

* tell I/O scheduler that this isn't a regular read/write (ie it

977

* tell I/O scheduler that this isn't a regular read/write (ie it

978

* must not attempt merges on this) and that it acts as a soft

978

* must not attempt merges on this) and that it acts as a soft

979

* barrier

979

* barrier

980

*/

980

*/

981

rq->cmd_type = REQ_TYPE_SPECIAL;

981

rq->cmd_type = REQ_TYPE_SPECIAL;

982

983

rq->special = data;

983

rq->special = data;

984

985

spin_lock_irqsave(q->queue_lock, flags);

985

spin_lock_irqsave(q->queue_lock, flags);

986

987

/*

987

/*

988

* If command is tagged, release the tag

988

* If command is tagged, release the tag

989

*/

989

*/

990

if (blk_rq_tagged(rq))

990

if (blk_rq_tagged(rq))

991

blk_queue_end_tag(q, rq);

991

blk_queue_end_tag(q, rq);

992

993

add_acct_request(q, rq, where);

993

add_acct_request(q, rq, where);

994

__blk_run_queue(q, false);

994

__blk_run_queue(q, false);

995

spin_unlock_irqrestore(q->queue_lock, flags);

995

spin_unlock_irqrestore(q->queue_lock, flags);

996

}

996

}

997

EXPORT_SYMBOL(blk_insert_request);

997

EXPORT_SYMBOL(blk_insert_request);

998

999

static void part_round_stats_single(int cpu, struct hd_struct *part,

999

static void part_round_stats_single(int cpu, struct hd_struct *part,

1000

unsigned long now)

1000

unsigned long now)

1001

{

1001

{

1002

if (now == part->stamp)

1002

if (now == part->stamp)

1003

return;

1003

return;

1004

1005

if (part_in_flight(part)) {

1005

if (part_in_flight(part)) {

1006

__part_stat_add(cpu, part, time_in_queue,

1006

__part_stat_add(cpu, part, time_in_queue,

1007

part_in_flight(part) * (now - part->stamp));

1007

part_in_flight(part) * (now - part->stamp));

1008

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1008

__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

1009

}

1009

}

1010

part->stamp = now;

1010

part->stamp = now;

1011

}

1011

}

1012

1013

/**

1013

/**

1014

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1014

* part_round_stats() - Round off the performance stats on a struct disk_stats.

1015

* @cpu: cpu number for stats access

1015

* @cpu: cpu number for stats access

1016

* @part: target partition

1016

* @part: target partition

1017

*

1017

*

1018

* The average IO queue length and utilisation statistics are maintained

1018

* The average IO queue length and utilisation statistics are maintained

1019

* by observing the current state of the queue length and the amount of

1019

* by observing the current state of the queue length and the amount of

1020

* time it has been in this state for.

1020

* time it has been in this state for.

1021

*

1021

*

1022

* Normally, that accounting is done on IO completion, but that can result

1022

* Normally, that accounting is done on IO completion, but that can result

1023

* in more than a second's worth of IO being accounted for within any one

1023

* in more than a second's worth of IO being accounted for within any one

1024

* second, leading to >100% utilisation. To deal with that, we call this

1024

* second, leading to >100% utilisation. To deal with that, we call this

1025

* function to do a round-off before returning the results when reading

1025

* function to do a round-off before returning the results when reading

1026

* /proc/diskstats. This accounts immediately for all queue usage up to

1026

* /proc/diskstats. This accounts immediately for all queue usage up to

1027

* the current jiffies and restarts the counters again.

1027

* the current jiffies and restarts the counters again.

1028

*/

1028

*/

1029

void part_round_stats(int cpu, struct hd_struct *part)

1029

void part_round_stats(int cpu, struct hd_struct *part)

1030

{

1030

{

1031

unsigned long now = jiffies;

1031

unsigned long now = jiffies;

1032

1033

if (part->partno)

1033

if (part->partno)

1034

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1034

part_round_stats_single(cpu, &part_to_disk(part)->part0, now);

1035

part_round_stats_single(cpu, part, now);

1035

part_round_stats_single(cpu, part, now);

1036

}

1036

}

1037

EXPORT_SYMBOL_GPL(part_round_stats);

1037

EXPORT_SYMBOL_GPL(part_round_stats);

1038

1039

/*

1039

/*

1040

* queue lock must be held

1040

* queue lock must be held

1041

*/

1041

*/

1042

void __blk_put_request(struct request_queue *q, struct request *req)

1042

void __blk_put_request(struct request_queue *q, struct request *req)

1043

{

1043

{

1044

if (unlikely(!q))

1044

if (unlikely(!q))

1045

return;

1045

return;

1046

if (unlikely(--req->ref_count))

1046

if (unlikely(--req->ref_count))

1047

return;

1047

return;

1048

1049

elv_completed_request(q, req);

1049

elv_completed_request(q, req);

1050

1051

/* this is a bio leak */

1051

/* this is a bio leak */

1052

WARN_ON(req->bio != NULL);

1052

WARN_ON(req->bio != NULL);

1053

1054

/*

1054

/*

1055

* Request may not have originated from ll_rw_blk. if not,

1055

* Request may not have originated from ll_rw_blk. if not,

1056

* it didn't come out of our reserved rq pools

1056

* it didn't come out of our reserved rq pools

1057

*/

1057

*/

1058

if (req->cmd_flags & REQ_ALLOCED) {

1058

if (req->cmd_flags & REQ_ALLOCED) {

1059

int is_sync = rq_is_sync(req) != 0;

1059

int is_sync = rq_is_sync(req) != 0;

1060

int priv = req->cmd_flags & REQ_ELVPRIV;

1060

int priv = req->cmd_flags & REQ_ELVPRIV;

1061

1062

BUG_ON(!list_empty(&req->queuelist));

1062

BUG_ON(!list_empty(&req->queuelist));

1063

BUG_ON(!hlist_unhashed(&req->hash));

1063

BUG_ON(!hlist_unhashed(&req->hash));

1064

1065

blk_free_request(q, req);

1065

blk_free_request(q, req);

1066

freed_request(q, is_sync, priv);

1066

freed_request(q, is_sync, priv);

1067

}

1067

}

1068

}

1068

}

1069

EXPORT_SYMBOL_GPL(__blk_put_request);

1069

EXPORT_SYMBOL_GPL(__blk_put_request);

1070

1071

void blk_put_request(struct request *req)

1071

void blk_put_request(struct request *req)

1072

{

1072

{

1073

unsigned long flags;

1073

unsigned long flags;

1074

struct request_queue *q = req->q;

1074

struct request_queue *q = req->q;

1075

1076

spin_lock_irqsave(q->queue_lock, flags);

1076

spin_lock_irqsave(q->queue_lock, flags);

1077

__blk_put_request(q, req);

1077

__blk_put_request(q, req);

1078

spin_unlock_irqrestore(q->queue_lock, flags);

1078

spin_unlock_irqrestore(q->queue_lock, flags);

1079

}

1079

}

1080

EXPORT_SYMBOL(blk_put_request);

1080

EXPORT_SYMBOL(blk_put_request);

1081

1082

/**

1082

/**

1083

* blk_add_request_payload - add a payload to a request

1083

* blk_add_request_payload - add a payload to a request

1084

* @rq: request to update

1084

* @rq: request to update

1085

* @page: page backing the payload

1085

* @page: page backing the payload

1086

* @len: length of the payload.

1086

* @len: length of the payload.

1087

*

1087

*

1088

* This allows to later add a payload to an already submitted request by

1088

* This allows to later add a payload to an already submitted request by

1089

* a block driver. The driver needs to take care of freeing the payload

1089

* a block driver. The driver needs to take care of freeing the payload

1090

* itself.

1090

* itself.

1091

*

1091

*

1092

* Note that this is a quite horrible hack and nothing but handling of

1092

* Note that this is a quite horrible hack and nothing but handling of

1093

* discard requests should ever use it.

1093

* discard requests should ever use it.

1094

*/

1094

*/

1095

void blk_add_request_payload(struct request *rq, struct page *page,

1095

void blk_add_request_payload(struct request *rq, struct page *page,

1096

unsigned int len)

1096

unsigned int len)

1097

{

1097

{

1098

struct bio *bio = rq->bio;

1098

struct bio *bio = rq->bio;

1099

1100

bio->bi_io_vec->bv_page = page;

1100

bio->bi_io_vec->bv_page = page;

1101

bio->bi_io_vec->bv_offset = 0;

1101

bio->bi_io_vec->bv_offset = 0;

1102

bio->bi_io_vec->bv_len = len;

1102

bio->bi_io_vec->bv_len = len;

1103

1104

bio->bi_size = len;

1104

bio->bi_size = len;

1105

bio->bi_vcnt = 1;

1105

bio->bi_vcnt = 1;

1106

bio->bi_phys_segments = 1;

1106

bio->bi_phys_segments = 1;

1107

1108

rq->__data_len = rq->resid_len = len;

1108

rq->__data_len = rq->resid_len = len;

1109

rq->nr_phys_segments = 1;

1109

rq->nr_phys_segments = 1;

1110

rq->buffer = bio_data(bio);

1110

rq->buffer = bio_data(bio);

1111

}

1111

}

1112

EXPORT_SYMBOL_GPL(blk_add_request_payload);

1112

EXPORT_SYMBOL_GPL(blk_add_request_payload);

1113

1114

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,

1114

static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,

1115

struct bio *bio)

1115

struct bio *bio)

1116

{

1116

{

1117

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1117

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1118

1119

/*

1119

/*

1120

* Debug stuff, kill later

1120

* Debug stuff, kill later

1121

*/

1121

*/

1122

if (!rq_mergeable(req)) {

1122

if (!rq_mergeable(req)) {

1123

blk_dump_rq_flags(req, "back");

1123

blk_dump_rq_flags(req, "back");

1124

return false;

1124

return false;

1125

}

1125

}

1126

1127

if (!ll_back_merge_fn(q, req, bio))

1127

if (!ll_back_merge_fn(q, req, bio))

1128

return false;

1128

return false;

1129

1130

trace_block_bio_backmerge(q, bio);

1130

trace_block_bio_backmerge(q, bio);

1131

1132

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1132

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1133

blk_rq_set_mixed_merge(req);

1133

blk_rq_set_mixed_merge(req);

1134

1135

req->biotail->bi_next = bio;

1135

req->biotail->bi_next = bio;

1136

req->biotail = bio;

1136

req->biotail = bio;

1137

req->__data_len += bio->bi_size;

1137

req->__data_len += bio->bi_size;

1138

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1138

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1139

1140

drive_stat_acct(req, 0);

1140

drive_stat_acct(req, 0);

1141

return true;

1141

return true;

1142

}

1142

}

1143

1144

static bool bio_attempt_front_merge(struct request_queue *q,

1144

static bool bio_attempt_front_merge(struct request_queue *q,

1145

struct request *req, struct bio *bio)

1145

struct request *req, struct bio *bio)

1146

{

1146

{

1147

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1147

const int ff = bio->bi_rw & REQ_FAILFAST_MASK;

1148

sector_t sector;

1148

sector_t sector;

1149

1150

/*

1150

/*

1151

* Debug stuff, kill later

1151

* Debug stuff, kill later

1152

*/

1152

*/

1153

if (!rq_mergeable(req)) {

1153

if (!rq_mergeable(req)) {

1154

blk_dump_rq_flags(req, "front");

1154

blk_dump_rq_flags(req, "front");

1155

return false;

1155

return false;

1156

}

1156

}

1157

1158

if (!ll_front_merge_fn(q, req, bio))

1158

if (!ll_front_merge_fn(q, req, bio))

1159

return false;

1159

return false;

1160

1161

trace_block_bio_frontmerge(q, bio);

1161

trace_block_bio_frontmerge(q, bio);

1162

1163

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1163

if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

1164

blk_rq_set_mixed_merge(req);

1164

blk_rq_set_mixed_merge(req);

1165

1166

sector = bio->bi_sector;

1166

sector = bio->bi_sector;

1167

1168

bio->bi_next = req->bio;

1168

bio->bi_next = req->bio;

1169

req->bio = bio;

1169

req->bio = bio;

1170

1171

/*

1171

/*

1172

* may not be valid. if the low level driver said

1172

* may not be valid. if the low level driver said

1173

* it didn't need a bounce buffer then it better

1173

* it didn't need a bounce buffer then it better

1174

* not touch req->buffer either...

1174

* not touch req->buffer either...

1175

*/

1175

*/

1176

req->buffer = bio_data(bio);

1176

req->buffer = bio_data(bio);

1177

req->__sector = bio->bi_sector;

1177

req->__sector = bio->bi_sector;

1178

req->__data_len += bio->bi_size;

1178

req->__data_len += bio->bi_size;

1179

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1179

req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));

1180

1181

drive_stat_acct(req, 0);

1181

drive_stat_acct(req, 0);

1182

return true;

1182

return true;

1183

}

1183

}

1184

1185

/*

1185

/*

1186

* Attempts to merge with the plugged list in the current process. Returns

1186

* Attempts to merge with the plugged list in the current process. Returns

1187

* true if merge was succesful, otherwise false.

1187

* true if merge was succesful, otherwise false.

1188

*/

1188

*/

1189

static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,

1189

static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,

1190

struct bio *bio)

1190

struct bio *bio)

1191

{

1191

{

1192

struct blk_plug *plug;

1192

struct blk_plug *plug;

1193

struct request *rq;

1193

struct request *rq;

1194

bool ret = false;

1194

bool ret = false;

1195

1196

plug = tsk->plug;

1196

plug = tsk->plug;

1197

if (!plug)

1197

if (!plug)

1198

goto out;

1198

goto out;

1199

1200

list_for_each_entry_reverse(rq, &plug->list, queuelist) {

1200

list_for_each_entry_reverse(rq, &plug->list, queuelist) {

1201

int el_ret;

1201

int el_ret;

1202

1203

if (rq->q != q)

1203

if (rq->q != q)

1204

continue;

1204

continue;

1205

1206

el_ret = elv_try_merge(rq, bio);

1206

el_ret = elv_try_merge(rq, bio);

1207

if (el_ret == ELEVATOR_BACK_MERGE) {

1207

if (el_ret == ELEVATOR_BACK_MERGE) {

1208

ret = bio_attempt_back_merge(q, rq, bio);

1208

ret = bio_attempt_back_merge(q, rq, bio);

1209

if (ret)

1209

if (ret)

1210

break;

1210

break;

1211

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1211

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1212

ret = bio_attempt_front_merge(q, rq, bio);

1212

ret = bio_attempt_front_merge(q, rq, bio);

1213

if (ret)

1213

if (ret)

1214

break;

1214

break;

1215

}

1215

}

1216

}

1216

}

1217

out:

1217

out:

1218

return ret;

1218

return ret;

1219

}

1219

}

1220

1221

void init_request_from_bio(struct request *req, struct bio *bio)

1221

void init_request_from_bio(struct request *req, struct bio *bio)

1222

{

1222

{

1223

req->cpu = bio->bi_comp_cpu;

1223

req->cpu = bio->bi_comp_cpu;

1224

req->cmd_type = REQ_TYPE_FS;

1224

req->cmd_type = REQ_TYPE_FS;

1225

1226

req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;

1226

req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;

1227

if (bio->bi_rw & REQ_RAHEAD)

1227

if (bio->bi_rw & REQ_RAHEAD)

1228

req->cmd_flags |= REQ_FAILFAST_MASK;

1228

req->cmd_flags |= REQ_FAILFAST_MASK;

1229

1230

req->errors = 0;

1230

req->errors = 0;

1231

req->__sector = bio->bi_sector;

1231

req->__sector = bio->bi_sector;

1232

req->ioprio = bio_prio(bio);

1232

req->ioprio = bio_prio(bio);

1233

blk_rq_bio_prep(req->q, req, bio);

1233

blk_rq_bio_prep(req->q, req, bio);

1234

}

1234

}

1235

1236

static int __make_request(struct request_queue *q, struct bio *bio)

1236

static int __make_request(struct request_queue *q, struct bio *bio)

1237

{

1237

{

1238

const bool sync = !!(bio->bi_rw & REQ_SYNC);

1238

const bool sync = !!(bio->bi_rw & REQ_SYNC);

1239

struct blk_plug *plug;

1239

struct blk_plug *plug;

1240

int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;

1240

int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;

1241

struct request *req;

1241

struct request *req;

1242

1243

/*

1243

/*

1244

* low level driver can indicate that it wants pages above a

1244

* low level driver can indicate that it wants pages above a

1245

* certain limit bounced to low memory (ie for highmem, or even

1245

* certain limit bounced to low memory (ie for highmem, or even

1246

* ISA dma in theory)

1246

* ISA dma in theory)

1247

*/

1247

*/

1248

blk_queue_bounce(q, &bio);

1248

blk_queue_bounce(q, &bio);

1249

1250

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {

1250

if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {

1251

spin_lock_irq(q->queue_lock);

1251

spin_lock_irq(q->queue_lock);

1252

where = ELEVATOR_INSERT_FLUSH;

1252

where = ELEVATOR_INSERT_FLUSH;

1253

goto get_rq;

1253

goto get_rq;

1254

}

1254

}

1255

1256

/*

1256

/*

1257

* Check if we can merge with the plugged list before grabbing

1257

* Check if we can merge with the plugged list before grabbing

1258

* any locks.

1258

* any locks.

1259

*/

1259

*/

1260

if (attempt_plug_merge(current, q, bio))

1260

if (attempt_plug_merge(current, q, bio))

1261

goto out;

1261

goto out;

1262

1263

spin_lock_irq(q->queue_lock);

1263

spin_lock_irq(q->queue_lock);

1264

1265

el_ret = elv_merge(q, &req, bio);

1265

el_ret = elv_merge(q, &req, bio);

1266

if (el_ret == ELEVATOR_BACK_MERGE) {

1266

if (el_ret == ELEVATOR_BACK_MERGE) {

1267

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1267

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1268

if (bio_attempt_back_merge(q, req, bio)) {

1268

if (bio_attempt_back_merge(q, req, bio)) {

1269

if (!attempt_back_merge(q, req))

1269

if (!attempt_back_merge(q, req))

1270

elv_merged_request(q, req, el_ret);

1270

elv_merged_request(q, req, el_ret);

1271

goto out_unlock;

1271

goto out_unlock;

1272

}

1272

}

1273

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1273

} else if (el_ret == ELEVATOR_FRONT_MERGE) {

1274

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1274

BUG_ON(req->cmd_flags & REQ_ON_PLUG);

1275

if (bio_attempt_front_merge(q, req, bio)) {

1275

if (bio_attempt_front_merge(q, req, bio)) {

1276

if (!attempt_front_merge(q, req))

1276

if (!attempt_front_merge(q, req))

1277

elv_merged_request(q, req, el_ret);

1277

elv_merged_request(q, req, el_ret);

1278

goto out_unlock;

1278

goto out_unlock;

1279

}

1279

}

1280

}

1280

}

1281

1282

get_rq:

1282

get_rq:

1283

/*

1283

/*

1284

* This sync check and mask will be re-done in init_request_from_bio(),

1284

* This sync check and mask will be re-done in init_request_from_bio(),

1285

* but we need to set it earlier to expose the sync flag to the

1285

* but we need to set it earlier to expose the sync flag to the

1286

* rq allocator and io schedulers.

1286

* rq allocator and io schedulers.

1287

*/

1287

*/

1288

rw_flags = bio_data_dir(bio);

1288

rw_flags = bio_data_dir(bio);

1289

if (sync)

1289

if (sync)

1290

rw_flags |= REQ_SYNC;

1290

rw_flags |= REQ_SYNC;

1291

1292

/*

1292

/*

1293

* Grab a free request. This is might sleep but can not fail.

1293

* Grab a free request. This is might sleep but can not fail.

1294

* Returns with the queue unlocked.

1294

* Returns with the queue unlocked.

1295

*/

1295

*/

1296

req = get_request_wait(q, rw_flags, bio);

1296

req = get_request_wait(q, rw_flags, bio);

1297

1298

/*

1298

/*

1299

* After dropping the lock and possibly sleeping here, our request

1299

* After dropping the lock and possibly sleeping here, our request

1300

* may now be mergeable after it had proven unmergeable (above).

1300

* may now be mergeable after it had proven unmergeable (above).

1301

* We don't worry about that case for efficiency. It won't happen

1301

* We don't worry about that case for efficiency. It won't happen

1302

* often, and the elevators are able to handle it.

1302

* often, and the elevators are able to handle it.

1303

*/

1303

*/

1304

init_request_from_bio(req, bio);

1304

init_request_from_bio(req, bio);

1305

1306

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1306

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||

1307

bio_flagged(bio, BIO_CPU_AFFINE)) {

1307

bio_flagged(bio, BIO_CPU_AFFINE)) {

1308

req->cpu = blk_cpu_to_group(get_cpu());

1308

req->cpu = blk_cpu_to_group(get_cpu());

1309

put_cpu();

1309

put_cpu();

1310

}

1310

}

1311

1312

plug = current->plug;

1312

plug = current->plug;

1313

if (plug) {

1313

if (plug) {

1314

if (!plug->should_sort && !list_empty(&plug->list)) {

1314

if (!plug->should_sort && !list_empty(&plug->list)) {

1315

struct request *__rq;

1315

struct request *__rq;

1316

1317

__rq = list_entry_rq(plug->list.prev);

1317

__rq = list_entry_rq(plug->list.prev);

1318

if (__rq->q != q)

1318

if (__rq->q != q)

1319

plug->should_sort = 1;

1319

plug->should_sort = 1;

1320

}

1320

}

1321

/*

1321

/*

1322

* Debug flag, kill later

1322

* Debug flag, kill later

1323

*/

1323

*/

1324

req->cmd_flags |= REQ_ON_PLUG;

1324

req->cmd_flags |= REQ_ON_PLUG;

1325

list_add_tail(&req->queuelist, &plug->list);

1325

list_add_tail(&req->queuelist, &plug->list);

1326

drive_stat_acct(req, 1);

1326

drive_stat_acct(req, 1);

1327

} else {

1327

} else {

1328

spin_lock_irq(q->queue_lock);

1328

spin_lock_irq(q->queue_lock);

1329

add_acct_request(q, req, where);

1329

add_acct_request(q, req, where);

1330

__blk_run_queue(q, false);

1330

__blk_run_queue(q, false);

1331

out_unlock:

1331

out_unlock:

1332

spin_unlock_irq(q->queue_lock);

1332

spin_unlock_irq(q->queue_lock);

1333

}

1333

}

1334

out:

1334

out:

1335

return 0;

1335

return 0;

1336

}

1336

}

1337

1338

/*

1338

/*

1339

* If bio->bi_dev is a partition, remap the location

1339

* If bio->bi_dev is a partition, remap the location

1340

*/

1340

*/

1341

static inline void blk_partition_remap(struct bio *bio)

1341

static inline void blk_partition_remap(struct bio *bio)

1342

{

1342

{

1343

struct block_device *bdev = bio->bi_bdev;

1343

struct block_device *bdev = bio->bi_bdev;

1344

1345

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1345

if (bio_sectors(bio) && bdev != bdev->bd_contains) {

1346

struct hd_struct *p = bdev->bd_part;

1346

struct hd_struct *p = bdev->bd_part;

1347

1348

bio->bi_sector += p->start_sect;

1348

bio->bi_sector += p->start_sect;

1349

bio->bi_bdev = bdev->bd_contains;

1349

bio->bi_bdev = bdev->bd_contains;

1350

1351

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,

1351

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,

1352

bdev->bd_dev,

1352

bdev->bd_dev,

1353

bio->bi_sector - p->start_sect);

1353

bio->bi_sector - p->start_sect);

1354

}

1354

}

1355

}

1355

}

1356

1357

static void handle_bad_sector(struct bio *bio)

1357

static void handle_bad_sector(struct bio *bio)

1358

{

1358

{

1359

char b[BDEVNAME_SIZE];

1359

char b[BDEVNAME_SIZE];

1360

1361

printk(KERN_INFO "attempt to access beyond end of device\n");

1361

printk(KERN_INFO "attempt to access beyond end of device\n");

1362

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1362

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

1363

bdevname(bio->bi_bdev, b),

1363

bdevname(bio->bi_bdev, b),

1364

bio->bi_rw,

1364

bio->bi_rw,

1365

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1365

(unsigned long long)bio->bi_sector + bio_sectors(bio),

1366

(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

1366

(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

1367

1368

set_bit(BIO_EOF, &bio->bi_flags);

1368

set_bit(BIO_EOF, &bio->bi_flags);

1369

}

1369

}

1370

1371

#ifdef CONFIG_FAIL_MAKE_REQUEST

1371

#ifdef CONFIG_FAIL_MAKE_REQUEST

1372

1373

static DECLARE_FAULT_ATTR(fail_make_request);

1373

static DECLARE_FAULT_ATTR(fail_make_request);

1374

1375

static int __init setup_fail_make_request(char *str)

1375

static int __init setup_fail_make_request(char *str)

1376

{

1376

{

1377

return setup_fault_attr(&fail_make_request, str);

1377

return setup_fault_attr(&fail_make_request, str);

1378

}

1378

}

1379

__setup("fail_make_request=", setup_fail_make_request);

1379

__setup("fail_make_request=", setup_fail_make_request);

1380

1381

static int should_fail_request(struct bio *bio)

1381

static int should_fail_request(struct bio *bio)

1382

{

1382

{

1383

struct hd_struct *part = bio->bi_bdev->bd_part;

1383

struct hd_struct *part = bio->bi_bdev->bd_part;

1384

1385

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1385

if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)

1386

return should_fail(&fail_make_request, bio->bi_size);

1386

return should_fail(&fail_make_request, bio->bi_size);

1387

1388

return 0;

1388

return 0;

1389

}

1389

}

1390

1391

static int __init fail_make_request_debugfs(void)

1391

static int __init fail_make_request_debugfs(void)

1392

{

1392

{

1393

return init_fault_attr_dentries(&fail_make_request,

1393

return init_fault_attr_dentries(&fail_make_request,

1394

"fail_make_request");

1394

"fail_make_request");

1395

}

1395

}

1396

1397

late_initcall(fail_make_request_debugfs);

1397

late_initcall(fail_make_request_debugfs);

1398

1399

#else /* CONFIG_FAIL_MAKE_REQUEST */

1399

#else /* CONFIG_FAIL_MAKE_REQUEST */

1400

1401

static inline int should_fail_request(struct bio *bio)

1401

static inline int should_fail_request(struct bio *bio)

1402

{

1402

{

1403

return 0;

1403

return 0;

1404

}

1404

}

1405

1406

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1406

#endif /* CONFIG_FAIL_MAKE_REQUEST */

1407

1408

/*

1408

/*

1409

* Check whether this bio extends beyond the end of the device.

1409

* Check whether this bio extends beyond the end of the device.

1410

*/

1410

*/

1411

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1411

static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)

1412

{

1412

{

1413

sector_t maxsector;

1413

sector_t maxsector;

1414

1415

if (!nr_sectors)

1415

if (!nr_sectors)

1416

return 0;

1416

return 0;

1417

1418

/* Test device or partition size, when known. */

1418

/* Test device or partition size, when known. */

1419

maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;

1419

maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;

1420

if (maxsector) {

1420

if (maxsector) {

1421

sector_t sector = bio->bi_sector;

1421

sector_t sector = bio->bi_sector;

1422

1423

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1423

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

1424

/*

1424

/*

1425

* This may well happen - the kernel calls bread()

1425

* This may well happen - the kernel calls bread()

1426

* without checking the size of the device, e.g., when

1426

* without checking the size of the device, e.g., when

1427

* mounting a device.

1427

* mounting a device.

1428

*/

1428

*/

1429

handle_bad_sector(bio);

1429

handle_bad_sector(bio);

1430

return 1;

1430

return 1;

1431

}

1431

}

1432

}

1432

}

1433

1434

return 0;

1434

return 0;

1435

}

1435

}

1436

1437

/**

1437

/**

1438

* generic_make_request - hand a buffer to its device driver for I/O

1438

* generic_make_request - hand a buffer to its device driver for I/O

1439

* @bio: The bio describing the location in memory and on the device.

1439

* @bio: The bio describing the location in memory and on the device.

1440

*

1440

*

1441

* generic_make_request() is used to make I/O requests of block

1441

* generic_make_request() is used to make I/O requests of block

1442

* devices. It is passed a &struct bio, which describes the I/O that needs

1442

* devices. It is passed a &struct bio, which describes the I/O that needs

1443

* to be done.

1443

* to be done.

1444

*

1444

*

1445

* generic_make_request() does not return any status. The

1445

* generic_make_request() does not return any status. The

1446

* success/failure status of the request, along with notification of

1446

* success/failure status of the request, along with notification of

1447

* completion, is delivered asynchronously through the bio->bi_end_io

1447

* completion, is delivered asynchronously through the bio->bi_end_io

1448

* function described (one day) else where.

1448

* function described (one day) else where.

1449

*

1449

*

1450

* The caller of generic_make_request must make sure that bi_io_vec

1450

* The caller of generic_make_request must make sure that bi_io_vec

1451

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1451

* are set to describe the memory buffer, and that bi_dev and bi_sector are

1452

* set to describe the device address, and the

1452

* set to describe the device address, and the

1453

* bi_end_io and optionally bi_private are set to describe how

1453

* bi_end_io and optionally bi_private are set to describe how

1454

* completion notification should be signaled.

1454

* completion notification should be signaled.

1455

*

1455

*

1456

* generic_make_request and the drivers it calls may use bi_next if this

1456

* generic_make_request and the drivers it calls may use bi_next if this

1457

* bio happens to be merged with someone else, and may change bi_dev and

1457

* bio happens to be merged with someone else, and may change bi_dev and

1458

* bi_sector for remaps as it sees fit. So the values of these fields

1458

* bi_sector for remaps as it sees fit. So the values of these fields

1459

* should NOT be depended on after the call to generic_make_request.

1459

* should NOT be depended on after the call to generic_make_request.

1460

*/

1460

*/

1461

static inline void __generic_make_request(struct bio *bio)

1461

static inline void __generic_make_request(struct bio *bio)

1462

{

1462

{

1463

struct request_queue *q;

1463

struct request_queue *q;

1464

sector_t old_sector;

1464

sector_t old_sector;

1465

int ret, nr_sectors = bio_sectors(bio);

1465

int ret, nr_sectors = bio_sectors(bio);

1466

dev_t old_dev;

1466

dev_t old_dev;

1467

int err = -EIO;

1467

int err = -EIO;

1468

1469

might_sleep();

1469

might_sleep();

1470

1471

if (bio_check_eod(bio, nr_sectors))

1471

if (bio_check_eod(bio, nr_sectors))

1472

goto end_io;

1472

goto end_io;

1473

1474

/*

1474

/*

1475

* Resolve the mapping until finished. (drivers are

1475

* Resolve the mapping until finished. (drivers are

1476

* still free to implement/resolve their own stacking

1476

* still free to implement/resolve their own stacking

1477

* by explicitly returning 0)

1477

* by explicitly returning 0)

1478

*

1478

*

1479

* NOTE: we don't repeat the blk_size check for each new device.

1479

* NOTE: we don't repeat the blk_size check for each new device.

1480

* Stacking drivers are expected to know what they are doing.

1480

* Stacking drivers are expected to know what they are doing.

1481

*/

1481

*/

1482

old_sector = -1;

1482

old_sector = -1;

1483

old_dev = 0;

1483

old_dev = 0;

1484

do {

1484

do {

1485

char b[BDEVNAME_SIZE];

1485

char b[BDEVNAME_SIZE];

1486

1487

q = bdev_get_queue(bio->bi_bdev);

1487

q = bdev_get_queue(bio->bi_bdev);

1488

if (unlikely(!q)) {

1488

if (unlikely(!q)) {

1489

printk(KERN_ERR

1489

printk(KERN_ERR

1490

"generic_make_request: Trying to access "

1490

"generic_make_request: Trying to access "

1491

"nonexistent block-device %s (%Lu)\n",

1491

"nonexistent block-device %s (%Lu)\n",

1492

bdevname(bio->bi_bdev, b),

1492

bdevname(bio->bi_bdev, b),

1493

(long long) bio->bi_sector);

1493

(long long) bio->bi_sector);

1494

goto end_io;

1494

goto end_io;

1495

}

1495

}

1496

1497

if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&

1497

if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&

1498

nr_sectors > queue_max_hw_sectors(q))) {

1498

nr_sectors > queue_max_hw_sectors(q))) {

1499

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1499

printk(KERN_ERR "bio too big device %s (%u > %u)\n",

1500

bdevname(bio->bi_bdev, b),

1500

bdevname(bio->bi_bdev, b),

1501

bio_sectors(bio),

1501

bio_sectors(bio),

1502

queue_max_hw_sectors(q));

1502

queue_max_hw_sectors(q));

1503

goto end_io;

1503

goto end_io;

1504

}

1504

}

1505

1506

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1506

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

1507

goto end_io;

1507

goto end_io;

1508

1509

if (should_fail_request(bio))

1509

if (should_fail_request(bio))

1510

goto end_io;

1510

goto end_io;

1511

1512

/*

1512

/*

1513

* If this device has partitions, remap block n

1513

* If this device has partitions, remap block n

1514

* of partition p to block n+start(p) of the disk.

1514

* of partition p to block n+start(p) of the disk.

1515

*/

1515

*/

1516

blk_partition_remap(bio);

1516

blk_partition_remap(bio);

1517

1518

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1518

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))

1519

goto end_io;

1519

goto end_io;

1520

1521

if (old_sector != -1)

1521

if (old_sector != -1)

1522

trace_block_bio_remap(q, bio, old_dev, old_sector);

1522

trace_block_bio_remap(q, bio, old_dev, old_sector);

1523

1524

old_sector = bio->bi_sector;

1524

old_sector = bio->bi_sector;

1525

old_dev = bio->bi_bdev->bd_dev;

1525

old_dev = bio->bi_bdev->bd_dev;

1526

1527

if (bio_check_eod(bio, nr_sectors))

1527

if (bio_check_eod(bio, nr_sectors))

1528

goto end_io;

1528

goto end_io;

1529

1530

/*

1530

/*

1531

* Filter flush bio's early so that make_request based

1531

* Filter flush bio's early so that make_request based

1532

* drivers without flush support don't have to worry

1532

* drivers without flush support don't have to worry

1533

* about them.

1533

* about them.

1534

*/

1534

*/

1535

if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {

1535

if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {

1536

bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);

1536

bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);

1537

if (!nr_sectors) {

1537

if (!nr_sectors) {

1538

err = 0;

1538

err = 0;

1539

goto end_io;

1539

goto end_io;

1540

}

1540

}

1541

}

1541

}

1542

1543

if ((bio->bi_rw & REQ_DISCARD) &&

1543

if ((bio->bi_rw & REQ_DISCARD) &&

1544

(!blk_queue_discard(q) ||

1544

(!blk_queue_discard(q) ||

1545

((bio->bi_rw & REQ_SECURE) &&

1545

((bio->bi_rw & REQ_SECURE) &&

1546

!blk_queue_secdiscard(q)))) {

1546

!blk_queue_secdiscard(q)))) {

1547

err = -EOPNOTSUPP;

1547

err = -EOPNOTSUPP;

1548

goto end_io;

1548

goto end_io;

1549

}

1549

}

1550

1551

blk_throtl_bio(q, &bio);

1551

blk_throtl_bio(q, &bio);

1552

1553

/*

1553

/*

1554

* If bio = NULL, bio has been throttled and will be submitted

1554

* If bio = NULL, bio has been throttled and will be submitted

1555

* later.

1555

* later.

1556

*/

1556

*/

1557

if (!bio)

1557

if (!bio)

1558

break;

1558

break;

1559

1560

trace_block_bio_queue(q, bio);

1560

trace_block_bio_queue(q, bio);

1561

1562

ret = q->make_request_fn(q, bio);

1562

ret = q->make_request_fn(q, bio);

1563

} while (ret);

1563

} while (ret);

1564

1565

return;

1565

return;

1566

1567

end_io:

1567

end_io:

1568

bio_endio(bio, err);

1568

bio_endio(bio, err);

1569

}

1569

}

1570

1571

/*

1571

/*

1572

* We only want one ->make_request_fn to be active at a time,

1572

* We only want one ->make_request_fn to be active at a time,

1573

* else stack usage with stacked devices could be a problem.

1573

* else stack usage with stacked devices could be a problem.

1574

* So use current->bio_list to keep a list of requests

1574

* So use current->bio_list to keep a list of requests

1575

* submited by a make_request_fn function.

1575

* submited by a make_request_fn function.

1576

* current->bio_list is also used as a flag to say if

1576

* current->bio_list is also used as a flag to say if

1577

* generic_make_request is currently active in this task or not.

1577

* generic_make_request is currently active in this task or not.

1578

* If it is NULL, then no make_request is active. If it is non-NULL,

1578

* If it is NULL, then no make_request is active. If it is non-NULL,

1579

* then a make_request is active, and new requests should be added

1579

* then a make_request is active, and new requests should be added

1580

* at the tail

1580

* at the tail

1581

*/

1581

*/

1582

void generic_make_request(struct bio *bio)

1582

void generic_make_request(struct bio *bio)

1583

{

1583

{

1584

struct bio_list bio_list_on_stack;

1584

struct bio_list bio_list_on_stack;

1585

1586

if (current->bio_list) {

1586

if (current->bio_list) {

1587

/* make_request is active */

1587

/* make_request is active */

1588

bio_list_add(current->bio_list, bio);

1588

bio_list_add(current->bio_list, bio);

1589

return;

1589

return;

1590

}

1590

}

1591

/* following loop may be a bit non-obvious, and so deserves some

1591

/* following loop may be a bit non-obvious, and so deserves some

1592

* explanation.

1592

* explanation.

1593

* Before entering the loop, bio->bi_next is NULL (as all callers

1593

* Before entering the loop, bio->bi_next is NULL (as all callers

1594

* ensure that) so we have a list with a single bio.

1594

* ensure that) so we have a list with a single bio.

1595

* We pretend that we have just taken it off a longer list, so

1595

* We pretend that we have just taken it off a longer list, so

1596

* we assign bio_list to a pointer to the bio_list_on_stack,

1596

* we assign bio_list to a pointer to the bio_list_on_stack,

1597

* thus initialising the bio_list of new bios to be

1597

* thus initialising the bio_list of new bios to be

1598

* added. __generic_make_request may indeed add some more bios

1598

* added. __generic_make_request may indeed add some more bios

1599

* through a recursive call to generic_make_request. If it

1599

* through a recursive call to generic_make_request. If it

1600

* did, we find a non-NULL value in bio_list and re-enter the loop

1600

* did, we find a non-NULL value in bio_list and re-enter the loop

1601

* from the top. In this case we really did just take the bio

1601

* from the top. In this case we really did just take the bio

1602

* of the top of the list (no pretending) and so remove it from

1602

* of the top of the list (no pretending) and so remove it from

1603

* bio_list, and call into __generic_make_request again.

1603

* bio_list, and call into __generic_make_request again.

1604

*

1604

*

1605

* The loop was structured like this to make only one call to

1605

* The loop was structured like this to make only one call to

1606

* __generic_make_request (which is important as it is large and

1606

* __generic_make_request (which is important as it is large and

1607

* inlined) and to keep the structure simple.

1607

* inlined) and to keep the structure simple.

1608

*/

1608

*/

1609

BUG_ON(bio->bi_next);

1609

BUG_ON(bio->bi_next);

1610

bio_list_init(&bio_list_on_stack);

1610

bio_list_init(&bio_list_on_stack);

1611

current->bio_list = &bio_list_on_stack;

1611

current->bio_list = &bio_list_on_stack;

1612

do {

1612

do {

1613

__generic_make_request(bio);

1613

__generic_make_request(bio);

1614

bio = bio_list_pop(current->bio_list);

1614

bio = bio_list_pop(current->bio_list);

1615

} while (bio);

1615

} while (bio);

1616

current->bio_list = NULL; /* deactivate */

1616

current->bio_list = NULL; /* deactivate */

1617

}

1617

}

1618

EXPORT_SYMBOL(generic_make_request);

1618

EXPORT_SYMBOL(generic_make_request);

1619

1620

/**

1620

/**

1621

* submit_bio - submit a bio to the block device layer for I/O

1621

* submit_bio - submit a bio to the block device layer for I/O

1622

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1622

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

1623

* @bio: The &struct bio which describes the I/O

1623

* @bio: The &struct bio which describes the I/O

1624

*

1624

*

1625

* submit_bio() is very similar in purpose to generic_make_request(), and

1625

* submit_bio() is very similar in purpose to generic_make_request(), and

1626

* uses that function to do most of the work. Both are fairly rough

1626

* uses that function to do most of the work. Both are fairly rough

1627

* interfaces; @bio must be presetup and ready for I/O.

1627

* interfaces; @bio must be presetup and ready for I/O.

1628

*

1628

*

1629

*/

1629

*/

1630

void submit_bio(int rw, struct bio *bio)

1630

void submit_bio(int rw, struct bio *bio)

1631

{

1631

{

1632

int count = bio_sectors(bio);

1632

int count = bio_sectors(bio);

1633

1634

bio->bi_rw |= rw;

1634

bio->bi_rw |= rw;

1635

1636

/*

1636

/*

1637

* If it's a regular read/write or a barrier with data attached,

1637

* If it's a regular read/write or a barrier with data attached,

1638

* go through the normal accounting stuff before submission.

1638

* go through the normal accounting stuff before submission.

1639

*/

1639

*/

1640

if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {

1640

if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {

1641

if (rw & WRITE) {

1641

if (rw & WRITE) {

1642

count_vm_events(PGPGOUT, count);

1642

count_vm_events(PGPGOUT, count);

1643

} else {

1643

} else {

1644

task_io_account_read(bio->bi_size);

1644

task_io_account_read(bio->bi_size);

1645

count_vm_events(PGPGIN, count);

1645

count_vm_events(PGPGIN, count);

1646

}

1646

}

1647

1648

if (unlikely(block_dump)) {

1648

if (unlikely(block_dump)) {

1649

char b[BDEVNAME_SIZE];

1649

char b[BDEVNAME_SIZE];

1650

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",

1650

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",

1651

current->comm, task_pid_nr(current),

1651

current->comm, task_pid_nr(current),

1652

(rw & WRITE) ? "WRITE" : "READ",

1652

(rw & WRITE) ? "WRITE" : "READ",

1653

(unsigned long long)bio->bi_sector,

1653

(unsigned long long)bio->bi_sector,

1654

bdevname(bio->bi_bdev, b),

1654

bdevname(bio->bi_bdev, b),

1655

count);

1655

count);

1656

}

1656

}

1657

}

1657

}

1658

1659

generic_make_request(bio);

1659

generic_make_request(bio);

1660

}

1660

}

1661

EXPORT_SYMBOL(submit_bio);

1661

EXPORT_SYMBOL(submit_bio);

1662

1663

/**

1663

/**

1664

* blk_rq_check_limits - Helper function to check a request for the queue limit

1664

* blk_rq_check_limits - Helper function to check a request for the queue limit

1665

* @q: the queue

1665

* @q: the queue

1666

* @rq: the request being checked

1666

* @rq: the request being checked

1667

*

1667

*

1668

* Description:

1668

* Description:

1669

* @rq may have been made based on weaker limitations of upper-level queues

1669

* @rq may have been made based on weaker limitations of upper-level queues

1670

* in request stacking drivers, and it may violate the limitation of @q.

1670

* in request stacking drivers, and it may violate the limitation of @q.

1671

* Since the block layer and the underlying device driver trust @rq

1671

* Since the block layer and the underlying device driver trust @rq

1672

* after it is inserted to @q, it should be checked against @q before

1672

* after it is inserted to @q, it should be checked against @q before

1673

* the insertion using this generic function.

1673

* the insertion using this generic function.

1674

*

1674

*

1675

* This function should also be useful for request stacking drivers

1675

* This function should also be useful for request stacking drivers

1676

* in some cases below, so export this function.

1676

* in some cases below, so export this function.

1677

* Request stacking drivers like request-based dm may change the queue

1677

* Request stacking drivers like request-based dm may change the queue

1678

* limits while requests are in the queue (e.g. dm's table swapping).

1678

* limits while requests are in the queue (e.g. dm's table swapping).

1679

* Such request stacking drivers should check those requests agaist

1679

* Such request stacking drivers should check those requests agaist

1680

* the new queue limits again when they dispatch those requests,

1680

* the new queue limits again when they dispatch those requests,

1681

* although such checkings are also done against the old queue limits

1681

* although such checkings are also done against the old queue limits

1682

* when submitting requests.

1682

* when submitting requests.

1683

*/

1683

*/

1684

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1684

int blk_rq_check_limits(struct request_queue *q, struct request *rq)

1685

{

1685

{

1686

if (rq->cmd_flags & REQ_DISCARD)

1686

if (rq->cmd_flags & REQ_DISCARD)

1687

return 0;

1687

return 0;

1688

1689

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1689

if (blk_rq_sectors(rq) > queue_max_sectors(q) ||

1690

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1690

blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {

1691

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1691

printk(KERN_ERR "%s: over max size limit.\n", __func__);

1692

return -EIO;

1692

return -EIO;

1693

}

1693

}

1694

1695

/*

1695

/*

1696

* queue's settings related to segment counting like q->bounce_pfn

1696

* queue's settings related to segment counting like q->bounce_pfn

1697

* may differ from that of other stacking queues.

1697

* may differ from that of other stacking queues.

1698

* Recalculate it to check the request correctly on this queue's

1698

* Recalculate it to check the request correctly on this queue's

1699

* limitation.

1699

* limitation.

1700

*/

1700

*/

1701

blk_recalc_rq_segments(rq);

1701

blk_recalc_rq_segments(rq);

1702

if (rq->nr_phys_segments > queue_max_segments(q)) {

1702

if (rq->nr_phys_segments > queue_max_segments(q)) {

1703

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1703

printk(KERN_ERR "%s: over max segments limit.\n", __func__);

1704

return -EIO;

1704

return -EIO;

1705

}

1705

}

1706

1707

return 0;

1707

return 0;

1708

}

1708

}

1709

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1709

EXPORT_SYMBOL_GPL(blk_rq_check_limits);

1710

1711

/**

1711

/**

1712

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1712

* blk_insert_cloned_request - Helper for stacking drivers to submit a request

1713

* @q: the queue to submit the request

1713

* @q: the queue to submit the request

1714

* @rq: the request being queued

1714

* @rq: the request being queued

1715

*/

1715

*/

1716

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1716

int blk_insert_cloned_request(struct request_queue *q, struct request *rq)

1717

{

1717

{

1718

unsigned long flags;

1718

unsigned long flags;

1719

1720

if (blk_rq_check_limits(q, rq))

1720

if (blk_rq_check_limits(q, rq))

1721

return -EIO;

1721

return -EIO;

1722

1723

#ifdef CONFIG_FAIL_MAKE_REQUEST

1723

#ifdef CONFIG_FAIL_MAKE_REQUEST

1724

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1724

if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&

1725

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1725

should_fail(&fail_make_request, blk_rq_bytes(rq)))

1726

return -EIO;

1726

return -EIO;

1727

#endif

1727

#endif

1728

1729

spin_lock_irqsave(q->queue_lock, flags);

1729

spin_lock_irqsave(q->queue_lock, flags);

1730

1731

/*

1731

/*

1732

* Submitting request must be dequeued before calling this function

1732

* Submitting request must be dequeued before calling this function

1733

* because it will be linked to another request_queue

1733

* because it will be linked to another request_queue

1734

*/

1734

*/

1735

BUG_ON(blk_queued_rq(rq));

1735

BUG_ON(blk_queued_rq(rq));

1736

1737

add_acct_request(q, rq, ELEVATOR_INSERT_BACK);

1737

add_acct_request(q, rq, ELEVATOR_INSERT_BACK);

1738

spin_unlock_irqrestore(q->queue_lock, flags);

1738

spin_unlock_irqrestore(q->queue_lock, flags);

1739

1740

return 0;

1740

return 0;

1741

}

1741

}

1742

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1742

EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

1743

1744

/**

1744

/**

1745

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1745

* blk_rq_err_bytes - determine number of bytes till the next failure boundary

1746

* @rq: request to examine

1746

* @rq: request to examine

1747

*

1747

*

1748

* Description:

1748

* Description:

1749

* A request could be merge of IOs which require different failure

1749

* A request could be merge of IOs which require different failure

1750

* handling. This function determines the number of bytes which

1750

* handling. This function determines the number of bytes which

1751

* can be failed from the beginning of the request without

1751

* can be failed from the beginning of the request without

1752

* crossing into area which need to be retried further.

1752

* crossing into area which need to be retried further.

1753

*

1753

*

1754

* Return:

1754

* Return:

1755

* The number of bytes to fail.

1755

* The number of bytes to fail.

1756

*

1756

*

1757

* Context:

1757

* Context:

1758

* queue_lock must be held.

1758

* queue_lock must be held.

1759

*/

1759

*/

1760

unsigned int blk_rq_err_bytes(const struct request *rq)

1760

unsigned int blk_rq_err_bytes(const struct request *rq)

1761

{

1761

{

1762

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1762

unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;

1763

unsigned int bytes = 0;

1763

unsigned int bytes = 0;

1764

struct bio *bio;

1764

struct bio *bio;

1765

1766

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1766

if (!(rq->cmd_flags & REQ_MIXED_MERGE))

1767

return blk_rq_bytes(rq);

1767

return blk_rq_bytes(rq);

1768

1769

/*

1769

/*

1770

* Currently the only 'mixing' which can happen is between

1770

* Currently the only 'mixing' which can happen is between

1771

* different fastfail types. We can safely fail portions

1771

* different fastfail types. We can safely fail portions

1772

* which have all the failfast bits that the first one has -

1772

* which have all the failfast bits that the first one has -

1773

* the ones which are at least as eager to fail as the first

1773

* the ones which are at least as eager to fail as the first

1774

* one.

1774

* one.

1775

*/

1775

*/

1776

for (bio = rq->bio; bio; bio = bio->bi_next) {

1776

for (bio = rq->bio; bio; bio = bio->bi_next) {

1777

if ((bio->bi_rw & ff) != ff)

1777

if ((bio->bi_rw & ff) != ff)

1778

break;

1778

break;

1779

bytes += bio->bi_size;

1779

bytes += bio->bi_size;

1780

}

1780

}

1781

1782

/* this could lead to infinite loop */

1782

/* this could lead to infinite loop */

1783

BUG_ON(blk_rq_bytes(rq) && !bytes);

1783

BUG_ON(blk_rq_bytes(rq) && !bytes);

1784

return bytes;

1784

return bytes;

1785

}

1785

}

1786

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1786

EXPORT_SYMBOL_GPL(blk_rq_err_bytes);

1787

1788

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1788

static void blk_account_io_completion(struct request *req, unsigned int bytes)

1789

{

1789

{

1790

if (blk_do_io_stat(req)) {

1790

if (blk_do_io_stat(req)) {

1791

const int rw = rq_data_dir(req);

1791

const int rw = rq_data_dir(req);

1792

struct hd_struct *part;

1792

struct hd_struct *part;

1793

int cpu;

1793

int cpu;

1794

1795

cpu = part_stat_lock();

1795

cpu = part_stat_lock();

1796

part = req->part;

1796

part = req->part;

1797

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1797

part_stat_add(cpu, part, sectors[rw], bytes >> 9);

1798

part_stat_unlock();

1798

part_stat_unlock();

1799

}

1799

}

1800

}

1800

}

1801

1802

static void blk_account_io_done(struct request *req)

1802

static void blk_account_io_done(struct request *req)

1803

{

1803

{

1804

/*

1804

/*

1805

* Account IO completion. flush_rq isn't accounted as a

1805

* Account IO completion. flush_rq isn't accounted as a

1806

* normal IO on queueing nor completion. Accounting the

1806

* normal IO on queueing nor completion. Accounting the

1807

* containing request is enough.

1807

* containing request is enough.

1808

*/

1808

*/

1809

if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {

1809

if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {

1810

unsigned long duration = jiffies - req->start_time;

1810

unsigned long duration = jiffies - req->start_time;

1811

const int rw = rq_data_dir(req);

1811

const int rw = rq_data_dir(req);

1812

struct hd_struct *part;

1812

struct hd_struct *part;

1813

int cpu;

1813

int cpu;

1814

1815

cpu = part_stat_lock();

1815

cpu = part_stat_lock();

1816

part = req->part;

1816

part = req->part;

1817

1818

part_stat_inc(cpu, part, ios[rw]);

1818

part_stat_inc(cpu, part, ios[rw]);

1819

part_stat_add(cpu, part, ticks[rw], duration);

1819

part_stat_add(cpu, part, ticks[rw], duration);

1820

part_round_stats(cpu, part);

1820

part_round_stats(cpu, part);

1821

part_dec_in_flight(part, rw);

1821

part_dec_in_flight(part, rw);

1822

1823

hd_struct_put(part);

1823

hd_struct_put(part);

1824

part_stat_unlock();

1824

part_stat_unlock();

1825

}

1825

}

1826

}

1826

}

1827

1828

/**

1828

/**

1829

* blk_peek_request - peek at the top of a request queue

1829

* blk_peek_request - peek at the top of a request queue

1830

* @q: request queue to peek at

1830

* @q: request queue to peek at

1831

*

1831

*

1832

* Description:

1832

* Description:

1833

* Return the request at the top of @q. The returned request

1833

* Return the request at the top of @q. The returned request

1834

* should be started using blk_start_request() before LLD starts

1834

* should be started using blk_start_request() before LLD starts

1835

* processing it.

1835

* processing it.

1836

*

1836

*

1837

* Return:

1837

* Return:

1838

* Pointer to the request at the top of @q if available. Null

1838

* Pointer to the request at the top of @q if available. Null

1839

* otherwise.

1839

* otherwise.

1840

*

1840

*

1841

* Context:

1841

* Context:

1842

* queue_lock must be held.

1842

* queue_lock must be held.

1843

*/

1843

*/

1844

struct request *blk_peek_request(struct request_queue *q)

1844

struct request *blk_peek_request(struct request_queue *q)

1845

{

1845

{

1846

struct request *rq;

1846

struct request *rq;

1847

int ret;

1847

int ret;

1848

1849

while ((rq = __elv_next_request(q)) != NULL) {

1849

while ((rq = __elv_next_request(q)) != NULL) {

1850

if (!(rq->cmd_flags & REQ_STARTED)) {

1850

if (!(rq->cmd_flags & REQ_STARTED)) {

1851

/*

1851

/*

1852

* This is the first time the device driver

1852

* This is the first time the device driver

1853

* sees this request (possibly after

1853

* sees this request (possibly after

1854

* requeueing). Notify IO scheduler.

1854

* requeueing). Notify IO scheduler.

1855

*/

1855

*/

1856

if (rq->cmd_flags & REQ_SORTED)

1856

if (rq->cmd_flags & REQ_SORTED)

1857

elv_activate_rq(q, rq);

1857

elv_activate_rq(q, rq);

1858

1859

/*

1859

/*

1860

* just mark as started even if we don't start

1860

* just mark as started even if we don't start

1861

* it, a request that has been delayed should

1861

* it, a request that has been delayed should

1862

* not be passed by new incoming requests

1862

* not be passed by new incoming requests

1863

*/

1863

*/

1864

rq->cmd_flags |= REQ_STARTED;

1864

rq->cmd_flags |= REQ_STARTED;

1865

trace_block_rq_issue(q, rq);

1865

trace_block_rq_issue(q, rq);

1866

}

1866

}

1867

1868

if (!q->boundary_rq || q->boundary_rq == rq) {

1868

if (!q->boundary_rq || q->boundary_rq == rq) {

1869

q->end_sector = rq_end_sector(rq);

1869

q->end_sector = rq_end_sector(rq);

1870

q->boundary_rq = NULL;

1870

q->boundary_rq = NULL;

1871

}

1871

}

1872

1873

if (rq->cmd_flags & REQ_DONTPREP)

1873

if (rq->cmd_flags & REQ_DONTPREP)

1874

break;

1874

break;

1875

1876

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1876

if (q->dma_drain_size && blk_rq_bytes(rq)) {

1877

/*

1877

/*

1878

* make sure space for the drain appears we

1878

* make sure space for the drain appears we

1879

* know we can do this because max_hw_segments

1879

* know we can do this because max_hw_segments

1880

* has been adjusted to be one fewer than the

1880

* has been adjusted to be one fewer than the

1881

* device can handle

1881

* device can handle

1882

*/

1882

*/

1883

rq->nr_phys_segments++;

1883

rq->nr_phys_segments++;

1884

}

1884

}

1885

1886

if (!q->prep_rq_fn)

1886

if (!q->prep_rq_fn)

1887

break;

1887

break;

1888

1889

ret = q->prep_rq_fn(q, rq);

1889

ret = q->prep_rq_fn(q, rq);

1890

if (ret == BLKPREP_OK) {

1890

if (ret == BLKPREP_OK) {

1891

break;

1891

break;

1892

} else if (ret == BLKPREP_DEFER) {

1892

} else if (ret == BLKPREP_DEFER) {

1893

/*

1893

/*

1894

* the request may have been (partially) prepped.

1894

* the request may have been (partially) prepped.

1895

* we need to keep this request in the front to

1895

* we need to keep this request in the front to

1896

* avoid resource deadlock. REQ_STARTED will

1896

* avoid resource deadlock. REQ_STARTED will

1897

* prevent other fs requests from passing this one.

1897

* prevent other fs requests from passing this one.

1898

*/

1898

*/

1899

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1899

if (q->dma_drain_size && blk_rq_bytes(rq) &&

1900

!(rq->cmd_flags & REQ_DONTPREP)) {

1900

!(rq->cmd_flags & REQ_DONTPREP)) {

1901

/*

1901

/*

1902

* remove the space for the drain we added

1902

* remove the space for the drain we added

1903

* so that we don't add it again

1903

* so that we don't add it again

1904

*/

1904

*/

1905

--rq->nr_phys_segments;

1905

--rq->nr_phys_segments;

1906

}

1906

}

1907

1908

rq = NULL;

1908

rq = NULL;

1909

break;

1909

break;

1910

} else if (ret == BLKPREP_KILL) {

1910

} else if (ret == BLKPREP_KILL) {

1911

rq->cmd_flags |= REQ_QUIET;

1911

rq->cmd_flags |= REQ_QUIET;

1912

/*

1912

/*

1913

* Mark this request as started so we don't trigger

1913

* Mark this request as started so we don't trigger

1914

* any debug logic in the end I/O path.

1914

* any debug logic in the end I/O path.

1915

*/

1915

*/

1916

blk_start_request(rq);

1916

blk_start_request(rq);

1917

__blk_end_request_all(rq, -EIO);

1917

__blk_end_request_all(rq, -EIO);

1918

} else {

1918

} else {

1919

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1919

printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);

1920

break;

1920

break;

1921

}

1921

}

1922

}

1922

}

1923

1924

return rq;

1924

return rq;

1925

}

1925

}

1926

EXPORT_SYMBOL(blk_peek_request);

1926

EXPORT_SYMBOL(blk_peek_request);

1927

1928

void blk_dequeue_request(struct request *rq)

1928

void blk_dequeue_request(struct request *rq)

1929

{

1929

{

1930

struct request_queue *q = rq->q;

1930

struct request_queue *q = rq->q;

1931

1932

BUG_ON(list_empty(&rq->queuelist));

1932

BUG_ON(list_empty(&rq->queuelist));

1933

BUG_ON(ELV_ON_HASH(rq));

1933

BUG_ON(ELV_ON_HASH(rq));

1934

1935

list_del_init(&rq->queuelist);

1935

list_del_init(&rq->queuelist);

1936

1937

/*

1937

/*

1938

* the time frame between a request being removed from the lists

1938

* the time frame between a request being removed from the lists

1939

* and to it is freed is accounted as io that is in progress at

1939

* and to it is freed is accounted as io that is in progress at

1940

* the driver side.

1940

* the driver side.

1941

*/

1941

*/

1942

if (blk_account_rq(rq)) {

1942

if (blk_account_rq(rq)) {

1943

q->in_flight[rq_is_sync(rq)]++;

1943

q->in_flight[rq_is_sync(rq)]++;

1944

set_io_start_time_ns(rq);

1944

set_io_start_time_ns(rq);

1945

}

1945

}

1946

}

1946

}

1947

1948

/**

1948

/**

1949

* blk_start_request - start request processing on the driver

1949

* blk_start_request - start request processing on the driver

1950

* @req: request to dequeue

1950

* @req: request to dequeue

1951

*

1951

*

1952

* Description:

1952

* Description:

1953

* Dequeue @req and start timeout timer on it. This hands off the

1953

* Dequeue @req and start timeout timer on it. This hands off the

1954

* request to the driver.

1954

* request to the driver.

1955

*

1955

*

1956

* Block internal functions which don't want to start timer should

1956

* Block internal functions which don't want to start timer should

1957

* call blk_dequeue_request().

1957

* call blk_dequeue_request().

1958

*

1958

*

1959

* Context:

1959

* Context:

1960

* queue_lock must be held.

1960

* queue_lock must be held.

1961

*/

1961

*/

1962

void blk_start_request(struct request *req)

1962

void blk_start_request(struct request *req)

1963

{

1963

{

1964

blk_dequeue_request(req);

1964

blk_dequeue_request(req);

1965

1966

/*

1966

/*

1967

* We are now handing the request to the hardware, initialize

1967

* We are now handing the request to the hardware, initialize

1968

* resid_len to full count and add the timeout handler.

1968

* resid_len to full count and add the timeout handler.

1969

*/

1969

*/

1970

req->resid_len = blk_rq_bytes(req);

1970

req->resid_len = blk_rq_bytes(req);

1971

if (unlikely(blk_bidi_rq(req)))

1971

if (unlikely(blk_bidi_rq(req)))

1972

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1972

req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

1973

1974

blk_add_timer(req);

1974

blk_add_timer(req);

1975

}

1975

}

1976

EXPORT_SYMBOL(blk_start_request);

1976

EXPORT_SYMBOL(blk_start_request);

1977

1978

/**

1978

/**

1979

* blk_fetch_request - fetch a request from a request queue

1979

* blk_fetch_request - fetch a request from a request queue

1980

* @q: request queue to fetch a request from

1980

* @q: request queue to fetch a request from

1981

*

1981

*

1982

* Description:

1982

* Description:

1983

* Return the request at the top of @q. The request is started on

1983

* Return the request at the top of @q. The request is started on

1984

* return and LLD can start processing it immediately.

1984

* return and LLD can start processing it immediately.

1985

*

1985

*

1986

* Return:

1986

* Return:

1987

* Pointer to the request at the top of @q if available. Null

1987

* Pointer to the request at the top of @q if available. Null

1988

* otherwise.

1988

* otherwise.

1989

*

1989

*

1990

* Context:

1990

* Context:

1991

* queue_lock must be held.

1991

* queue_lock must be held.

1992

*/

1992

*/

1993

struct request *blk_fetch_request(struct request_queue *q)

1993

struct request *blk_fetch_request(struct request_queue *q)

1994

{

1994

{

1995

struct request *rq;

1995

struct request *rq;

1996

1997

rq = blk_peek_request(q);

1997

rq = blk_peek_request(q);

1998

if (rq)

1998

if (rq)

1999

blk_start_request(rq);

1999

blk_start_request(rq);

2000

return rq;

2000

return rq;

2001

}

2001

}

2002

EXPORT_SYMBOL(blk_fetch_request);

2002

EXPORT_SYMBOL(blk_fetch_request);

2003

2004

/**

2004

/**

2005

* blk_update_request - Special helper function for request stacking drivers

2005

* blk_update_request - Special helper function for request stacking drivers

2006

* @req: the request being processed

2006

* @req: the request being processed

2007

* @error: %0 for success, < %0 for error

2007

* @error: %0 for success, < %0 for error

2008

* @nr_bytes: number of bytes to complete @req

2008

* @nr_bytes: number of bytes to complete @req

2009

*

2009

*

2010

* Description:

2010

* Description:

2011

* Ends I/O on a number of bytes attached to @req, but doesn't complete

2011

* Ends I/O on a number of bytes attached to @req, but doesn't complete

2012

* the request structure even if @req doesn't have leftover.

2012

* the request structure even if @req doesn't have leftover.

2013

* If @req has leftover, sets it up for the next range of segments.

2013

* If @req has leftover, sets it up for the next range of segments.

2014

*

2014

*

2015

* This special helper function is only for request stacking drivers

2015

* This special helper function is only for request stacking drivers

2016

* (e.g. request-based dm) so that they can handle partial completion.

2016

* (e.g. request-based dm) so that they can handle partial completion.

2017

* Actual device drivers should use blk_end_request instead.

2017

* Actual device drivers should use blk_end_request instead.

2018

*

2018

*

2019

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

2019

* Passing the result of blk_rq_bytes() as @nr_bytes guarantees

2020

* %false return from this function.

2020

* %false return from this function.

2021

*

2021

*

2022

* Return:

2022

* Return:

2023

* %false - this request doesn't have any more data

2023

* %false - this request doesn't have any more data

2024

* %true - this request has more data

2024

* %true - this request has more data

2025

**/

2025

**/

2026

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

2026

bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

2027

{

2027

{

2028

int total_bytes, bio_nbytes, next_idx = 0;

2028

int total_bytes, bio_nbytes, next_idx = 0;

2029

struct bio *bio;

2029

struct bio *bio;

2030

2031

if (!req->bio)

2031

if (!req->bio)

2032

return false;

2032

return false;

2033

2034

trace_block_rq_complete(req->q, req);

2034

trace_block_rq_complete(req->q, req);

2035

2036

/*

2036

/*

2037

* For fs requests, rq is just carrier of independent bio's

2037

* For fs requests, rq is just carrier of independent bio's

2038

* and each partial completion should be handled separately.

2038

* and each partial completion should be handled separately.

2039

* Reset per-request error on each partial completion.

2039

* Reset per-request error on each partial completion.

2040

*

2040

*

2041

* TODO: tj: This is too subtle. It would be better to let

2041

* TODO: tj: This is too subtle. It would be better to let

2042

* low level drivers do what they see fit.

2042

* low level drivers do what they see fit.

2043

*/

2043

*/

2044

if (req->cmd_type == REQ_TYPE_FS)

2044

if (req->cmd_type == REQ_TYPE_FS)

2045

req->errors = 0;

2045

req->errors = 0;

2046

2047

if (error && req->cmd_type == REQ_TYPE_FS &&

2047

if (error && req->cmd_type == REQ_TYPE_FS &&

2048

!(req->cmd_flags & REQ_QUIET)) {

2048

!(req->cmd_flags & REQ_QUIET)) {

2049

char *error_type;

2049

char *error_type;

2050

2051

switch (error) {

2051

switch (error) {

2052

case -ENOLINK:

2052

case -ENOLINK:

2053

error_type = "recoverable transport";

2053

error_type = "recoverable transport";

2054

break;

2054

break;

2055

case -EREMOTEIO:

2055

case -EREMOTEIO:

2056

error_type = "critical target";

2056

error_type = "critical target";

2057

break;

2057

break;

2058

case -EBADE:

2058

case -EBADE:

2059

error_type = "critical nexus";

2059

error_type = "critical nexus";

2060

break;

2060

break;

2061

case -EIO:

2061

case -EIO:

2062

default:

2062

default:

2063

error_type = "I/O";

2063

error_type = "I/O";

2064

break;

2064

break;

2065

}

2065

}

2066

printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",

2066

printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",

2067

error_type, req->rq_disk ? req->rq_disk->disk_name : "?",

2067

error_type, req->rq_disk ? req->rq_disk->disk_name : "?",

2068

(unsigned long long)blk_rq_pos(req));

2068

(unsigned long long)blk_rq_pos(req));

2069

}

2069

}

2070

2071

blk_account_io_completion(req, nr_bytes);

2071

blk_account_io_completion(req, nr_bytes);

2072

2073

total_bytes = bio_nbytes = 0;

2073

total_bytes = bio_nbytes = 0;

2074

while ((bio = req->bio) != NULL) {

2074

while ((bio = req->bio) != NULL) {

2075

int nbytes;

2075

int nbytes;

2076

2077

if (nr_bytes >= bio->bi_size) {

2077

if (nr_bytes >= bio->bi_size) {

2078

req->bio = bio->bi_next;

2078

req->bio = bio->bi_next;

2079

nbytes = bio->bi_size;

2079

nbytes = bio->bi_size;

2080

req_bio_endio(req, bio, nbytes, error);

2080

req_bio_endio(req, bio, nbytes, error);

2081

next_idx = 0;

2081

next_idx = 0;

2082

bio_nbytes = 0;

2082

bio_nbytes = 0;

2083

} else {

2083

} else {

2084

int idx = bio->bi_idx + next_idx;

2084

int idx = bio->bi_idx + next_idx;

2085

2086

if (unlikely(idx >= bio->bi_vcnt)) {

2086

if (unlikely(idx >= bio->bi_vcnt)) {

2087

blk_dump_rq_flags(req, "__end_that");

2087

blk_dump_rq_flags(req, "__end_that");

2088

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2088

printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",

2089

__func__, idx, bio->bi_vcnt);

2089

__func__, idx, bio->bi_vcnt);

2090

break;

2090

break;

2091

}

2091

}

2092

2093

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2093

nbytes = bio_iovec_idx(bio, idx)->bv_len;

2094

BIO_BUG_ON(nbytes > bio->bi_size);

2094

BIO_BUG_ON(nbytes > bio->bi_size);

2095

2096

/*

2096

/*

2097

* not a complete bvec done

2097

* not a complete bvec done

2098

*/

2098

*/

2099

if (unlikely(nbytes > nr_bytes)) {

2099

if (unlikely(nbytes > nr_bytes)) {

2100

bio_nbytes += nr_bytes;

2100

bio_nbytes += nr_bytes;

2101

total_bytes += nr_bytes;

2101

total_bytes += nr_bytes;

2102

break;

2102

break;

2103

}

2103

}

2104

2105

/*

2105

/*

2106

* advance to the next vector

2106

* advance to the next vector

2107

*/

2107

*/

2108

next_idx++;

2108

next_idx++;

2109

bio_nbytes += nbytes;

2109

bio_nbytes += nbytes;

2110

}

2110

}

2111

2112

total_bytes += nbytes;

2112

total_bytes += nbytes;

2113

nr_bytes -= nbytes;

2113

nr_bytes -= nbytes;

2114

2115

bio = req->bio;

2115

bio = req->bio;

2116

if (bio) {

2116

if (bio) {

2117

/*

2117

/*

2118

* end more in this run, or just return 'not-done'

2118

* end more in this run, or just return 'not-done'

2119

*/

2119

*/

2120

if (unlikely(nr_bytes <= 0))

2120

if (unlikely(nr_bytes <= 0))

2121

break;

2121

break;

2122

}

2122

}

2123

}

2123

}

2124

2125

/*

2125

/*

2126

* completely done

2126

* completely done

2127

*/

2127

*/

2128

if (!req->bio) {

2128

if (!req->bio) {

2129

/*

2129

/*

2130

* Reset counters so that the request stacking driver

2130

* Reset counters so that the request stacking driver

2131

* can find how many bytes remain in the request

2131

* can find how many bytes remain in the request

2132

* later.

2132

* later.

2133

*/

2133

*/

2134

req->__data_len = 0;

2134

req->__data_len = 0;

2135

return false;

2135

return false;

2136

}

2136

}

2137

2138

/*

2138

/*

2139

* if the request wasn't completed, update state

2139

* if the request wasn't completed, update state

2140

*/

2140

*/

2141

if (bio_nbytes) {

2141

if (bio_nbytes) {

2142

req_bio_endio(req, bio, bio_nbytes, error);

2142

req_bio_endio(req, bio, bio_nbytes, error);

2143

bio->bi_idx += next_idx;

2143

bio->bi_idx += next_idx;

2144

bio_iovec(bio)->bv_offset += nr_bytes;

2144

bio_iovec(bio)->bv_offset += nr_bytes;

2145

bio_iovec(bio)->bv_len -= nr_bytes;

2145

bio_iovec(bio)->bv_len -= nr_bytes;

2146

}

2146

}

2147

2148

req->__data_len -= total_bytes;

2148

req->__data_len -= total_bytes;

2149

req->buffer = bio_data(req->bio);

2149

req->buffer = bio_data(req->bio);

2150

2151

/* update sector only for requests with clear definition of sector */

2151

/* update sector only for requests with clear definition of sector */

2152

if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))

2152

if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))

2153

req->__sector += total_bytes >> 9;

2153

req->__sector += total_bytes >> 9;

2154

2155

/* mixed attributes always follow the first bio */

2155

/* mixed attributes always follow the first bio */

2156

if (req->cmd_flags & REQ_MIXED_MERGE) {

2156

if (req->cmd_flags & REQ_MIXED_MERGE) {

2157

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2157

req->cmd_flags &= ~REQ_FAILFAST_MASK;

2158

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2158

req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;

2159

}

2159

}

2160

2161

/*

2161

/*

2162

* If total number of sectors is less than the first segment

2162

* If total number of sectors is less than the first segment

2163

* size, something has gone terribly wrong.

2163

* size, something has gone terribly wrong.

2164

*/

2164

*/

2165

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2165

if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {

2166

printk(KERN_ERR "blk: request botched\n");

2166

printk(KERN_ERR "blk: request botched\n");

2167

req->__data_len = blk_rq_cur_bytes(req);

2167

req->__data_len = blk_rq_cur_bytes(req);

2168

}

2168

}

2169

2170

/* recalculate the number of segments */

2170

/* recalculate the number of segments */

2171

blk_recalc_rq_segments(req);

2171

blk_recalc_rq_segments(req);

2172

2173

return true;

2173

return true;

2174

}

2174

}

2175

EXPORT_SYMBOL_GPL(blk_update_request);

2175

EXPORT_SYMBOL_GPL(blk_update_request);

2176

2177

static bool blk_update_bidi_request(struct request *rq, int error,

2177

static bool blk_update_bidi_request(struct request *rq, int error,

2178

unsigned int nr_bytes,

2178

unsigned int nr_bytes,

2179

unsigned int bidi_bytes)

2179

unsigned int bidi_bytes)

2180

{

2180

{

2181

if (blk_update_request(rq, error, nr_bytes))

2181

if (blk_update_request(rq, error, nr_bytes))

2182

return true;

2182

return true;

2183

2184

/* Bidi request must be completed as a whole */

2184

/* Bidi request must be completed as a whole */

2185

if (unlikely(blk_bidi_rq(rq)) &&

2185

if (unlikely(blk_bidi_rq(rq)) &&

2186

blk_update_request(rq->next_rq, error, bidi_bytes))

2186

blk_update_request(rq->next_rq, error, bidi_bytes))

2187

return true;

2187

return true;

2188

2189

if (blk_queue_add_random(rq->q))

2189

if (blk_queue_add_random(rq->q))

2190

add_disk_randomness(rq->rq_disk);

2190

add_disk_randomness(rq->rq_disk);

2191

2192

return false;

2192

return false;

2193

}

2193

}

2194

2195

/**

2195

/**

2196

* blk_unprep_request - unprepare a request

2196

* blk_unprep_request - unprepare a request

2197

* @req: the request

2197

* @req: the request

2198

*

2198

*

2199

* This function makes a request ready for complete resubmission (or

2199

* This function makes a request ready for complete resubmission (or

2200

* completion). It happens only after all error handling is complete,

2200

* completion). It happens only after all error handling is complete,

2201

* so represents the appropriate moment to deallocate any resources

2201

* so represents the appropriate moment to deallocate any resources

2202

* that were allocated to the request in the prep_rq_fn. The queue

2202

* that were allocated to the request in the prep_rq_fn. The queue

2203

* lock is held when calling this.

2203

* lock is held when calling this.

2204

*/

2204

*/

2205

void blk_unprep_request(struct request *req)

2205

void blk_unprep_request(struct request *req)

2206

{

2206

{

2207

struct request_queue *q = req->q;

2207

struct request_queue *q = req->q;

2208

2209

req->cmd_flags &= ~REQ_DONTPREP;

2209

req->cmd_flags &= ~REQ_DONTPREP;

2210

if (q->unprep_rq_fn)

2210

if (q->unprep_rq_fn)

2211

q->unprep_rq_fn(q, req);

2211

q->unprep_rq_fn(q, req);

2212

}

2212

}

2213

EXPORT_SYMBOL_GPL(blk_unprep_request);

2213

EXPORT_SYMBOL_GPL(blk_unprep_request);

2214

2215

/*

2215

/*

2216

* queue lock must be held

2216

* queue lock must be held

2217

*/

2217

*/

2218

static void blk_finish_request(struct request *req, int error)

2218

static void blk_finish_request(struct request *req, int error)

2219

{

2219

{

2220

if (blk_rq_tagged(req))

2220

if (blk_rq_tagged(req))

2221

blk_queue_end_tag(req->q, req);

2221

blk_queue_end_tag(req->q, req);

2222

2223

BUG_ON(blk_queued_rq(req));

2223

BUG_ON(blk_queued_rq(req));

2224

2225

if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)

2225

if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)

2226

laptop_io_completion(&req->q->backing_dev_info);

2226

laptop_io_completion(&req->q->backing_dev_info);

2227

2228

blk_delete_timer(req);

2228

blk_delete_timer(req);

2229

2230

if (req->cmd_flags & REQ_DONTPREP)

2230

if (req->cmd_flags & REQ_DONTPREP)

2231

blk_unprep_request(req);

2231

blk_unprep_request(req);

2232

2233

2234

blk_account_io_done(req);

2234

blk_account_io_done(req);

2235

2236

if (req->end_io)

2236

if (req->end_io)

2237

req->end_io(req, error);

2237

req->end_io(req, error);

2238

else {

2238

else {

2239

if (blk_bidi_rq(req))

2239

if (blk_bidi_rq(req))

2240

__blk_put_request(req->next_rq->q, req->next_rq);

2240

__blk_put_request(req->next_rq->q, req->next_rq);

2241

2242

__blk_put_request(req->q, req);

2242

__blk_put_request(req->q, req);

2243

}

2243

}

2244

}

2244

}

2245

2246

/**

2246

/**

2247

* blk_end_bidi_request - Complete a bidi request

2247

* blk_end_bidi_request - Complete a bidi request

2248

* @rq: the request to complete

2248

* @rq: the request to complete

2249

* @error: %0 for success, < %0 for error

2249

* @error: %0 for success, < %0 for error

2250

* @nr_bytes: number of bytes to complete @rq

2250

* @nr_bytes: number of bytes to complete @rq

2251

* @bidi_bytes: number of bytes to complete @rq->next_rq

2251

* @bidi_bytes: number of bytes to complete @rq->next_rq

2252

*

2252

*

2253

* Description:

2253

* Description:

2254

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2254

* Ends I/O on a number of bytes attached to @rq and @rq->next_rq.

2255

* Drivers that supports bidi can safely call this member for any

2255

* Drivers that supports bidi can safely call this member for any

2256

* type of request, bidi or uni. In the later case @bidi_bytes is

2256

* type of request, bidi or uni. In the later case @bidi_bytes is

2257

* just ignored.

2257

* just ignored.

2258

*

2258

*

2259

* Return:

2259

* Return:

2260

* %false - we are done with this request

2260

* %false - we are done with this request

2261

* %true - still buffers pending for this request

2261

* %true - still buffers pending for this request

2262

**/

2262

**/

2263

static bool blk_end_bidi_request(struct request *rq, int error,

2263

static bool blk_end_bidi_request(struct request *rq, int error,

2264

unsigned int nr_bytes, unsigned int bidi_bytes)

2264

unsigned int nr_bytes, unsigned int bidi_bytes)

2265

{

2265

{

2266

struct request_queue *q = rq->q;

2266

struct request_queue *q = rq->q;

2267

unsigned long flags;

2267

unsigned long flags;

2268

2269

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2269

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2270

return true;

2270

return true;

2271

2272

spin_lock_irqsave(q->queue_lock, flags);

2272

spin_lock_irqsave(q->queue_lock, flags);

2273

blk_finish_request(rq, error);

2273

blk_finish_request(rq, error);

2274

spin_unlock_irqrestore(q->queue_lock, flags);

2274

spin_unlock_irqrestore(q->queue_lock, flags);

2275

2276

return false;

2276

return false;

2277

}

2277

}

2278

2279

/**

2279

/**

2280

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2280

* __blk_end_bidi_request - Complete a bidi request with queue lock held

2281

* @rq: the request to complete

2281

* @rq: the request to complete

2282

* @error: %0 for success, < %0 for error

2282

* @error: %0 for success, < %0 for error

2283

* @nr_bytes: number of bytes to complete @rq

2283

* @nr_bytes: number of bytes to complete @rq

2284

* @bidi_bytes: number of bytes to complete @rq->next_rq

2284

* @bidi_bytes: number of bytes to complete @rq->next_rq

2285

*

2285

*

2286

* Description:

2286

* Description:

2287

* Identical to blk_end_bidi_request() except that queue lock is

2287

* Identical to blk_end_bidi_request() except that queue lock is

2288

* assumed to be locked on entry and remains so on return.

2288

* assumed to be locked on entry and remains so on return.

2289

*

2289

*

2290

* Return:

2290

* Return:

2291

* %false - we are done with this request

2291

* %false - we are done with this request

2292

* %true - still buffers pending for this request

2292

* %true - still buffers pending for this request

2293

**/

2293

**/

2294

static bool __blk_end_bidi_request(struct request *rq, int error,

2294

static bool __blk_end_bidi_request(struct request *rq, int error,

2295

unsigned int nr_bytes, unsigned int bidi_bytes)

2295

unsigned int nr_bytes, unsigned int bidi_bytes)

2296

{

2296

{

2297

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2297

if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

2298

return true;

2298

return true;

2299

2300

blk_finish_request(rq, error);

2300

blk_finish_request(rq, error);

2301

2302

return false;

2302

return false;

2303

}

2303

}

2304

2305

/**

2305

/**

2306

* blk_end_request - Helper function for drivers to complete the request.

2306

* blk_end_request - Helper function for drivers to complete the request.

2307

* @rq: the request being processed

2307

* @rq: the request being processed

2308

* @error: %0 for success, < %0 for error

2308

* @error: %0 for success, < %0 for error

2309

* @nr_bytes: number of bytes to complete

2309

* @nr_bytes: number of bytes to complete

2310

*

2310

*

2311

* Description:

2311

* Description:

2312

* Ends I/O on a number of bytes attached to @rq.

2312

* Ends I/O on a number of bytes attached to @rq.

2313

* If @rq has leftover, sets it up for the next range of segments.

2313

* If @rq has leftover, sets it up for the next range of segments.

2314

*

2314

*

2315

* Return:

2315

* Return:

2316

* %false - we are done with this request

2316

* %false - we are done with this request

2317

* %true - still buffers pending for this request

2317

* %true - still buffers pending for this request

2318

**/

2318

**/

2319

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2319

bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2320

{

2320

{

2321

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2321

return blk_end_bidi_request(rq, error, nr_bytes, 0);

2322

}

2322

}

2323

EXPORT_SYMBOL(blk_end_request);

2323

EXPORT_SYMBOL(blk_end_request);

2324

2325

/**

2325

/**

2326

* blk_end_request_all - Helper function for drives to finish the request.

2326

* blk_end_request_all - Helper function for drives to finish the request.

2327

* @rq: the request to finish

2327

* @rq: the request to finish

2328

* @error: %0 for success, < %0 for error

2328

* @error: %0 for success, < %0 for error

2329

*

2329

*

2330

* Description:

2330

* Description:

2331

* Completely finish @rq.

2331

* Completely finish @rq.

2332

*/

2332

*/

2333

void blk_end_request_all(struct request *rq, int error)

2333

void blk_end_request_all(struct request *rq, int error)

2334

{

2334

{

2335

bool pending;

2335

bool pending;

2336

unsigned int bidi_bytes = 0;

2336

unsigned int bidi_bytes = 0;

2337

2338

if (unlikely(blk_bidi_rq(rq)))

2338

if (unlikely(blk_bidi_rq(rq)))

2339

bidi_bytes = blk_rq_bytes(rq->next_rq);

2339

bidi_bytes = blk_rq_bytes(rq->next_rq);

2340

2341

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2341

pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2342

BUG_ON(pending);

2342

BUG_ON(pending);

2343

}

2343

}

2344

EXPORT_SYMBOL(blk_end_request_all);

2344

EXPORT_SYMBOL(blk_end_request_all);

2345

2346

/**

2346

/**

2347

* blk_end_request_cur - Helper function to finish the current request chunk.

2347

* blk_end_request_cur - Helper function to finish the current request chunk.

2348

* @rq: the request to finish the current chunk for

2348

* @rq: the request to finish the current chunk for

2349

* @error: %0 for success, < %0 for error

2349

* @error: %0 for success, < %0 for error

2350

*

2350

*

2351

* Description:

2351

* Description:

2352

* Complete the current consecutively mapped chunk from @rq.

2352

* Complete the current consecutively mapped chunk from @rq.

2353

*

2353

*

2354

* Return:

2354

* Return:

2355

* %false - we are done with this request

2355

* %false - we are done with this request

2356

* %true - still buffers pending for this request

2356

* %true - still buffers pending for this request

2357

*/

2357

*/

2358

bool blk_end_request_cur(struct request *rq, int error)

2358

bool blk_end_request_cur(struct request *rq, int error)

2359

{

2359

{

2360

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2360

return blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2361

}

2361

}

2362

EXPORT_SYMBOL(blk_end_request_cur);

2362

EXPORT_SYMBOL(blk_end_request_cur);

2363

2364

/**

2364

/**

2365

* blk_end_request_err - Finish a request till the next failure boundary.

2365

* blk_end_request_err - Finish a request till the next failure boundary.

2366

* @rq: the request to finish till the next failure boundary for

2366

* @rq: the request to finish till the next failure boundary for

2367

* @error: must be negative errno

2367

* @error: must be negative errno

2368

*

2368

*

2369

* Description:

2369

* Description:

2370

* Complete @rq till the next failure boundary.

2370

* Complete @rq till the next failure boundary.

2371

*

2371

*

2372

* Return:

2372

* Return:

2373

* %false - we are done with this request

2373

* %false - we are done with this request

2374

* %true - still buffers pending for this request

2374

* %true - still buffers pending for this request

2375

*/

2375

*/

2376

bool blk_end_request_err(struct request *rq, int error)

2376

bool blk_end_request_err(struct request *rq, int error)

2377

{

2377

{

2378

WARN_ON(error >= 0);

2378

WARN_ON(error >= 0);

2379

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2379

return blk_end_request(rq, error, blk_rq_err_bytes(rq));

2380

}

2380

}

2381

EXPORT_SYMBOL_GPL(blk_end_request_err);

2381

EXPORT_SYMBOL_GPL(blk_end_request_err);

2382

2383

/**

2383

/**

2384

* __blk_end_request - Helper function for drivers to complete the request.

2384

* __blk_end_request - Helper function for drivers to complete the request.

2385

* @rq: the request being processed

2385

* @rq: the request being processed

2386

* @error: %0 for success, < %0 for error

2386

* @error: %0 for success, < %0 for error

2387

* @nr_bytes: number of bytes to complete

2387

* @nr_bytes: number of bytes to complete

2388

*

2388

*

2389

* Description:

2389

* Description:

2390

* Must be called with queue lock held unlike blk_end_request().

2390

* Must be called with queue lock held unlike blk_end_request().

2391

*

2391

*

2392

* Return:

2392

* Return:

2393

* %false - we are done with this request

2393

* %false - we are done with this request

2394

* %true - still buffers pending for this request

2394

* %true - still buffers pending for this request

2395

**/

2395

**/

2396

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2396

bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)

2397

{

2397

{

2398

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2398

return __blk_end_bidi_request(rq, error, nr_bytes, 0);

2399

}

2399

}

2400

EXPORT_SYMBOL(__blk_end_request);

2400

EXPORT_SYMBOL(__blk_end_request);

2401

2402

/**

2402

/**

2403

* __blk_end_request_all - Helper function for drives to finish the request.

2403

* __blk_end_request_all - Helper function for drives to finish the request.

2404

* @rq: the request to finish

2404

* @rq: the request to finish

2405

* @error: %0 for success, < %0 for error

2405

* @error: %0 for success, < %0 for error

2406

*

2406

*

2407

* Description:

2407

* Description:

2408

* Completely finish @rq. Must be called with queue lock held.

2408

* Completely finish @rq. Must be called with queue lock held.

2409

*/

2409

*/

2410

void __blk_end_request_all(struct request *rq, int error)

2410

void __blk_end_request_all(struct request *rq, int error)

2411

{

2411

{

2412

bool pending;

2412

bool pending;

2413

unsigned int bidi_bytes = 0;

2413

unsigned int bidi_bytes = 0;

2414

2415

if (unlikely(blk_bidi_rq(rq)))

2415

if (unlikely(blk_bidi_rq(rq)))

2416

bidi_bytes = blk_rq_bytes(rq->next_rq);

2416

bidi_bytes = blk_rq_bytes(rq->next_rq);

2417

2418

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2418

pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);

2419

BUG_ON(pending);

2419

BUG_ON(pending);

2420

}

2420

}

2421

EXPORT_SYMBOL(__blk_end_request_all);

2421

EXPORT_SYMBOL(__blk_end_request_all);

2422

2423

/**

2423

/**

2424

* __blk_end_request_cur - Helper function to finish the current request chunk.

2424

* __blk_end_request_cur - Helper function to finish the current request chunk.

2425

* @rq: the request to finish the current chunk for

2425

* @rq: the request to finish the current chunk for

2426

* @error: %0 for success, < %0 for error

2426

* @error: %0 for success, < %0 for error

2427

*

2427

*

2428

* Description:

2428

* Description:

2429

* Complete the current consecutively mapped chunk from @rq. Must

2429

* Complete the current consecutively mapped chunk from @rq. Must

2430

* be called with queue lock held.

2430

* be called with queue lock held.

2431

*

2431

*

2432

* Return:

2432

* Return:

2433

* %false - we are done with this request

2433

* %false - we are done with this request

2434

* %true - still buffers pending for this request

2434

* %true - still buffers pending for this request

2435

*/

2435

*/

2436

bool __blk_end_request_cur(struct request *rq, int error)

2436

bool __blk_end_request_cur(struct request *rq, int error)

2437

{

2437

{

2438

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2438

return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));

2439

}

2439

}

2440

EXPORT_SYMBOL(__blk_end_request_cur);

2440

EXPORT_SYMBOL(__blk_end_request_cur);

2441

2442

/**

2442

/**

2443

* __blk_end_request_err - Finish a request till the next failure boundary.

2443

* __blk_end_request_err - Finish a request till the next failure boundary.

2444

* @rq: the request to finish till the next failure boundary for

2444

* @rq: the request to finish till the next failure boundary for

2445

* @error: must be negative errno

2445

* @error: must be negative errno

2446

*

2446

*

2447

* Description:

2447

* Description:

2448

* Complete @rq till the next failure boundary. Must be called

2448

* Complete @rq till the next failure boundary. Must be called

2449

* with queue lock held.

2449

* with queue lock held.

2450

*

2450

*

2451

* Return:

2451

* Return:

2452

* %false - we are done with this request

2452

* %false - we are done with this request

2453

* %true - still buffers pending for this request

2453

* %true - still buffers pending for this request

2454

*/

2454

*/

2455

bool __blk_end_request_err(struct request *rq, int error)

2455

bool __blk_end_request_err(struct request *rq, int error)

2456

{

2456

{

2457

WARN_ON(error >= 0);

2457

WARN_ON(error >= 0);

2458

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2458

return __blk_end_request(rq, error, blk_rq_err_bytes(rq));

2459

}

2459

}

2460

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2460

EXPORT_SYMBOL_GPL(__blk_end_request_err);

2461

2462

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2462

void blk_rq_bio_prep(struct request_queue *q, struct request *rq,

2463

struct bio *bio)

2463

struct bio *bio)

2464

{

2464

{

2465

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2465

/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */

2466

rq->cmd_flags |= bio->bi_rw & REQ_WRITE;

2466

rq->cmd_flags |= bio->bi_rw & REQ_WRITE;

2467

2468

if (bio_has_data(bio)) {

2468

if (bio_has_data(bio)) {

2469

rq->nr_phys_segments = bio_phys_segments(q, bio);

2469

rq->nr_phys_segments = bio_phys_segments(q, bio);

2470

rq->buffer = bio_data(bio);

2470

rq->buffer = bio_data(bio);

2471

}

2471

}

2472

rq->__data_len = bio->bi_size;

2472

rq->__data_len = bio->bi_size;

2473

rq->bio = rq->biotail = bio;

2473

rq->bio = rq->biotail = bio;

2474

2475

if (bio->bi_bdev)

2475

if (bio->bi_bdev)

2476

rq->rq_disk = bio->bi_bdev->bd_disk;

2476

rq->rq_disk = bio->bi_bdev->bd_disk;

2477

}

2477

}

2478

2479

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2479

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE

2480

/**

2480

/**

2481

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2481

* rq_flush_dcache_pages - Helper function to flush all pages in a request

2482

* @rq: the request to be flushed

2482

* @rq: the request to be flushed

2483

*

2483

*

2484

* Description:

2484

* Description:

2485

* Flush all pages in @rq.

2485

* Flush all pages in @rq.

2486

*/

2486

*/

2487

void rq_flush_dcache_pages(struct request *rq)

2487

void rq_flush_dcache_pages(struct request *rq)

2488

{

2488

{

2489

struct req_iterator iter;

2489

struct req_iterator iter;

2490

struct bio_vec *bvec;

2490

struct bio_vec *bvec;

2491

2492

rq_for_each_segment(bvec, rq, iter)

2492

rq_for_each_segment(bvec, rq, iter)

2493

flush_dcache_page(bvec->bv_page);

2493

flush_dcache_page(bvec->bv_page);

2494

}

2494

}

2495

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2495

EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);

2496

#endif

2496

#endif

2497

2498

/**

2498

/**

2499

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2499

* blk_lld_busy - Check if underlying low-level drivers of a device are busy

2500

* @q : the queue of the device being checked

2500

* @q : the queue of the device being checked

2501

*

2501

*

2502

* Description:

2502

* Description:

2503

* Check if underlying low-level drivers of a device are busy.

2503

* Check if underlying low-level drivers of a device are busy.

2504

* If the drivers want to export their busy state, they must set own

2504

* If the drivers want to export their busy state, they must set own

2505

* exporting function using blk_queue_lld_busy() first.

2505

* exporting function using blk_queue_lld_busy() first.

2506

*

2506

*

2507

* Basically, this function is used only by request stacking drivers

2507

* Basically, this function is used only by request stacking drivers

2508

* to stop dispatching requests to underlying devices when underlying

2508

* to stop dispatching requests to underlying devices when underlying

2509

* devices are busy. This behavior helps more I/O merging on the queue

2509

* devices are busy. This behavior helps more I/O merging on the queue

2510

* of the request stacking driver and prevents I/O throughput regression

2510

* of the request stacking driver and prevents I/O throughput regression

2511

* on burst I/O load.

2511

* on burst I/O load.

2512

*

2512

*

2513

* Return:

2513

* Return:

2514

* 0 - Not busy (The request stacking driver should dispatch request)

2514

* 0 - Not busy (The request stacking driver should dispatch request)

2515

* 1 - Busy (The request stacking driver should stop dispatching request)

2515

* 1 - Busy (The request stacking driver should stop dispatching request)

2516

*/

2516

*/

2517

int blk_lld_busy(struct request_queue *q)

2517

int blk_lld_busy(struct request_queue *q)

2518

{

2518

{

2519

if (q->lld_busy_fn)

2519

if (q->lld_busy_fn)

2520

return q->lld_busy_fn(q);

2520

return q->lld_busy_fn(q);

2521

2522

return 0;

2522

return 0;

2523

}

2523

}

2524

EXPORT_SYMBOL_GPL(blk_lld_busy);

2524

EXPORT_SYMBOL_GPL(blk_lld_busy);

2525

2526

/**

2526

/**

2527

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2527

* blk_rq_unprep_clone - Helper function to free all bios in a cloned request

2528

* @rq: the clone request to be cleaned up

2528

* @rq: the clone request to be cleaned up

2529

*

2529

*

2530

* Description:

2530

* Description:

2531

* Free all bios in @rq for a cloned request.

2531

* Free all bios in @rq for a cloned request.

2532

*/

2532

*/

2533

void blk_rq_unprep_clone(struct request *rq)

2533

void blk_rq_unprep_clone(struct request *rq)

2534

{

2534

{

2535

struct bio *bio;

2535

struct bio *bio;

2536

2537

while ((bio = rq->bio) != NULL) {

2537

while ((bio = rq->bio) != NULL) {

2538

rq->bio = bio->bi_next;

2538

rq->bio = bio->bi_next;

2539

2540

bio_put(bio);

2540

bio_put(bio);

2541

}

2541

}

2542

}

2542

}

2543

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2543

EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

2544

2545

/*

2545

/*

2546

* Copy attributes of the original request to the clone request.

2546

* Copy attributes of the original request to the clone request.

2547

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2547

* The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.

2548

*/

2548

*/

2549

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2549

static void __blk_rq_prep_clone(struct request *dst, struct request *src)

2550

{

2550

{

2551

dst->cpu = src->cpu;

2551

dst->cpu = src->cpu;

2552

dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;

2552

dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;

2553

dst->cmd_type = src->cmd_type;

2553

dst->cmd_type = src->cmd_type;

2554

dst->__sector = blk_rq_pos(src);

2554

dst->__sector = blk_rq_pos(src);

2555

dst->__data_len = blk_rq_bytes(src);

2555

dst->__data_len = blk_rq_bytes(src);

2556

dst->nr_phys_segments = src->nr_phys_segments;

2556

dst->nr_phys_segments = src->nr_phys_segments;

2557

dst->ioprio = src->ioprio;

2557

dst->ioprio = src->ioprio;

2558

dst->extra_len = src->extra_len;

2558

dst->extra_len = src->extra_len;

2559

}

2559

}

2560

2561

/**

2561

/**

2562

* blk_rq_prep_clone - Helper function to setup clone request

2562

* blk_rq_prep_clone - Helper function to setup clone request

2563

* @rq: the request to be setup

2563

* @rq: the request to be setup

2564

* @rq_src: original request to be cloned

2564

* @rq_src: original request to be cloned

2565

* @bs: bio_set that bios for clone are allocated from

2565

* @bs: bio_set that bios for clone are allocated from

2566

* @gfp_mask: memory allocation mask for bio

2566

* @gfp_mask: memory allocation mask for bio

2567

* @bio_ctr: setup function to be called for each clone bio.

2567

* @bio_ctr: setup function to be called for each clone bio.

2568

* Returns %0 for success, non %0 for failure.

2568

* Returns %0 for success, non %0 for failure.

2569

* @data: private data to be passed to @bio_ctr

2569

* @data: private data to be passed to @bio_ctr

2570

*

2570

*

2571

* Description:

2571

* Description:

2572

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2572

* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.

2573

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2573

* The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)

2574

* are not copied, and copying such parts is the caller's responsibility.

2574

* are not copied, and copying such parts is the caller's responsibility.

2575

* Also, pages which the original bios are pointing to are not copied

2575

* Also, pages which the original bios are pointing to are not copied

2576

* and the cloned bios just point same pages.

2576

* and the cloned bios just point same pages.

2577

* So cloned bios must be completed before original bios, which means

2577

* So cloned bios must be completed before original bios, which means

2578

* the caller must complete @rq before @rq_src.

2578

* the caller must complete @rq before @rq_src.

2579

*/

2579

*/

2580

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2580

int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2581

struct bio_set *bs, gfp_t gfp_mask,

2581

struct bio_set *bs, gfp_t gfp_mask,

2582

int (*bio_ctr)(struct bio *, struct bio *, void *),

2582

int (*bio_ctr)(struct bio *, struct bio *, void *),

2583

void *data)

2583

void *data)

2584

{

2584

{

2585

struct bio *bio, *bio_src;

2585

struct bio *bio, *bio_src;

2586

2587

if (!bs)

2587

if (!bs)

2588

bs = fs_bio_set;

2588

bs = fs_bio_set;

2589

2590

blk_rq_init(NULL, rq);

2590

blk_rq_init(NULL, rq);

2591

2592

__rq_for_each_bio(bio_src, rq_src) {

2592

__rq_for_each_bio(bio_src, rq_src) {

2593

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2593

bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);

2594

if (!bio)

2594

if (!bio)

2595

goto free_and_out;

2595

goto free_and_out;

2596

2597

__bio_clone(bio, bio_src);

2597

__bio_clone(bio, bio_src);

2598

2599

if (bio_integrity(bio_src) &&

2599

if (bio_integrity(bio_src) &&

2600

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2600

bio_integrity_clone(bio, bio_src, gfp_mask, bs))

2601

goto free_and_out;

2601

goto free_and_out;

2602

2603

if (bio_ctr && bio_ctr(bio, bio_src, data))

2603

if (bio_ctr && bio_ctr(bio, bio_src, data))

2604

goto free_and_out;

2604

goto free_and_out;

2605

2606

if (rq->bio) {

2606

if (rq->bio) {

2607

rq->biotail->bi_next = bio;

2607

rq->biotail->bi_next = bio;

2608

rq->biotail = bio;

2608

rq->biotail = bio;

2609

} else

2609

} else

2610

rq->bio = rq->biotail = bio;

2610

rq->bio = rq->biotail = bio;

2611

}

2611

}

2612

2613

__blk_rq_prep_clone(rq, rq_src);

2613

__blk_rq_prep_clone(rq, rq_src);

2614

2615

return 0;

2615

return 0;

2616

2617

free_and_out:

2617

free_and_out:

2618

if (bio)

2618

if (bio)

2619

bio_free(bio, bs);

2619

bio_free(bio, bs);

2620

blk_rq_unprep_clone(rq);

2620

blk_rq_unprep_clone(rq);

2621

2622

return -ENOMEM;

2622

return -ENOMEM;

2623

}

2623

}

2624

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2624

EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

2625

2626

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2626

int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)

2627

{

2627

{

2628

return queue_work(kblockd_workqueue, work);

2628

return queue_work(kblockd_workqueue, work);

2629

}

2629

}

2630

EXPORT_SYMBOL(kblockd_schedule_work);

2630

EXPORT_SYMBOL(kblockd_schedule_work);

2631

2632

int kblockd_schedule_delayed_work(struct request_queue *q,

2632

int kblockd_schedule_delayed_work(struct request_queue *q,

2633

struct delayed_work *dwork, unsigned long delay)

2633

struct delayed_work *dwork, unsigned long delay)

2634

{

2634

{

2635

return queue_delayed_work(kblockd_workqueue, dwork, delay);

2635

return queue_delayed_work(kblockd_workqueue, dwork, delay);

2636

}

2636

}

2637

EXPORT_SYMBOL(kblockd_schedule_delayed_work);

2637

EXPORT_SYMBOL(kblockd_schedule_delayed_work);

2638

2639

#define PLUG_MAGIC 0x91827364

2639

#define PLUG_MAGIC 0x91827364

2640

2641

void blk_start_plug(struct blk_plug *plug)

2641

void blk_start_plug(struct blk_plug *plug)

2642

{

2642

{

2643

struct task_struct *tsk = current;

2643

struct task_struct *tsk = current;

2644

2645

plug->magic = PLUG_MAGIC;

2645

plug->magic = PLUG_MAGIC;

2646

INIT_LIST_HEAD(&plug->list);

2646

INIT_LIST_HEAD(&plug->list);

2647

plug->should_sort = 0;

2647

plug->should_sort = 0;

2648

2649

/*

2649

/*

2650

* If this is a nested plug, don't actually assign it. It will be

2650

* If this is a nested plug, don't actually assign it. It will be

2651

* flushed on its own.

2651

* flushed on its own.

2652

*/

2652

*/

2653

if (!tsk->plug) {

2653

if (!tsk->plug) {

2654

/*

2654

/*

2655

* Store ordering should not be needed here, since a potential

2655

* Store ordering should not be needed here, since a potential

2656

* preempt will imply a full memory barrier

2656

* preempt will imply a full memory barrier

2657

*/

2657

*/

2658

tsk->plug = plug;

2658

tsk->plug = plug;

2659

}

2659

}

2660

}

2660

}

2661

EXPORT_SYMBOL(blk_start_plug);

2661

EXPORT_SYMBOL(blk_start_plug);

2662

2663

static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

2663

static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

2664

{

2664

{

2665

struct request *rqa = container_of(a, struct request, queuelist);

2665

struct request *rqa = container_of(a, struct request, queuelist);

2666

struct request *rqb = container_of(b, struct request, queuelist);

2666

struct request *rqb = container_of(b, struct request, queuelist);

2667

2668

return !(rqa->q == rqb->q);

2668

return !(rqa->q == rqb->q);

2669

}

2669

}

2670

2671

static void flush_plug_list(struct blk_plug *plug)

2671

static void flush_plug_list(struct blk_plug *plug)

2672

{

2672

{

2673

struct request_queue *q;

2673

struct request_queue *q;

2674

unsigned long flags;

2674

unsigned long flags;

2675

struct request *rq;

2675

struct request *rq;

2676

2677

BUG_ON(plug->magic != PLUG_MAGIC);

2677

BUG_ON(plug->magic != PLUG_MAGIC);

2678

2679

if (list_empty(&plug->list))

2679

if (list_empty(&plug->list))

2680

return;

2680

return;

2681

2682

if (plug->should_sort)

2682

if (plug->should_sort)

2683

list_sort(NULL, &plug->list, plug_rq_cmp);

2683

list_sort(NULL, &plug->list, plug_rq_cmp);

2684

2685

q = NULL;

2685

q = NULL;

2686

local_irq_save(flags);

2686

local_irq_save(flags);

2687

while (!list_empty(&plug->list)) {

2687

while (!list_empty(&plug->list)) {

2688

rq = list_entry_rq(plug->list.next);

2688

rq = list_entry_rq(plug->list.next);

2689

list_del_init(&rq->queuelist);

2689

list_del_init(&rq->queuelist);

2690

BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));

2690

BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));

2691

BUG_ON(!rq->q);

2691

BUG_ON(!rq->q);

2692

if (rq->q != q) {

2692

if (rq->q != q) {

2693

if (q) {

2693

if (q) {

2694

__blk_run_queue(q, false);

2694

__blk_run_queue(q, false);

2695

spin_unlock(q->queue_lock);

2695

spin_unlock(q->queue_lock);

2696

}

2696

}

2697

q = rq->q;

2697

q = rq->q;

2698

spin_lock(q->queue_lock);

2698

spin_lock(q->queue_lock);

2699

}

2699

}

2700

rq->cmd_flags &= ~REQ_ON_PLUG;

2700

rq->cmd_flags &= ~REQ_ON_PLUG;

2701

2702

/*

2702

/*

2703

* rq is already accounted, so use raw insert

2703

* rq is already accounted, so use raw insert

2704

*/

2704

*/

2705

if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))

2705

if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))

2706

__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);

2706

__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);

2707

else

2707

else

2708

__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

2708

__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

2709

}

2709

}

2710

2711

if (q) {

2711

if (q) {

2712

__blk_run_queue(q, false);

2712

__blk_run_queue(q, false);

2713

spin_unlock(q->queue_lock);

2713

spin_unlock(q->queue_lock);

2714

}

2714

}

2715

2716

BUG_ON(!list_empty(&plug->list));

2716

BUG_ON(!list_empty(&plug->list));

2717

local_irq_restore(flags);

2717

local_irq_restore(flags);

2718

}

2718

}

2719

2720

static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)

2720

static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)

2721

{

2721

{

2722

flush_plug_list(plug);

2722

flush_plug_list(plug);

2723

2724

if (plug == tsk->plug)

2724

if (plug == tsk->plug)

2725

tsk->plug = NULL;

2725

tsk->plug = NULL;

2726

}

2726

}

2727

2728

void blk_finish_plug(struct blk_plug *plug)

2728

void blk_finish_plug(struct blk_plug *plug)

2729

{

2729

{

2730

if (plug)

2730

if (plug)

2731

__blk_finish_plug(current, plug);

2731

__blk_finish_plug(current, plug);

2732

}

2732

}

2733

EXPORT_SYMBOL(blk_finish_plug);

2733

EXPORT_SYMBOL(blk_finish_plug);

2734

2735

void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)

2735

void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)

2736

{

2736

{

2737

__blk_finish_plug(tsk, plug);

2737

__blk_finish_plug(tsk, plug);

2738

tsk->plug = plug;

2738

tsk->plug = plug;

2739

}

2739

}

2740

EXPORT_SYMBOL(__blk_flush_plug);

2740

EXPORT_SYMBOL(__blk_flush_plug);

2741

2742

int __init blk_dev_init(void)

2742

int __init blk_dev_init(void)

2743

{

2743

{

2744

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2744

BUILD_BUG_ON(__REQ_NR_BITS > 8 *

2745

sizeof(((struct request *)0)->cmd_flags));

2745

sizeof(((struct request *)0)->cmd_flags));

2746

2747

/* used for unplugging and affects IO latency/throughput - HIGHPRI */

2747

/* used for unplugging and affects IO latency/throughput - HIGHPRI */

2748

kblockd_workqueue = alloc_workqueue("kblockd",

2748

kblockd_workqueue = alloc_workqueue("kblockd",

2749

WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);

2749

WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);

2750

if (!kblockd_workqueue)

2750

if (!kblockd_workqueue)

2751

panic("Failed to create kblockd\n");

2751

panic("Failed to create kblockd\n");

2752

2753

request_cachep = kmem_cache_create("blkdev_requests",

2753

request_cachep = kmem_cache_create("blkdev_requests",

2754

sizeof(struct request), 0, SLAB_PANIC, NULL);

2754

sizeof(struct request), 0, SLAB_PANIC, NULL);

2755

2756

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2756

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

2757

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2757

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

2758

2759

return 0;

2759

return 0;

2760

}

2760

}

2761

GITLAB

block: fix issue with calling blk_stop_queue() from the request_fn handler