Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

3

4

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

5

* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000

6

* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000

7

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

7

* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001

8

*/

8

*/

9

10

/*

10

/*

11

* This handles all read/write requests to block devices

11

* This handles all read/write requests to block devices

12

*/

12

*/

13

#include <linux/config.h>

13

#include <linux/config.h>

14

#include <linux/kernel.h>

14

#include <linux/kernel.h>

15

#include <linux/module.h>

15

#include <linux/module.h>

16

#include <linux/backing-dev.h>

16

#include <linux/backing-dev.h>

17

#include <linux/bio.h>

17

#include <linux/bio.h>

18

#include <linux/blkdev.h>

18

#include <linux/blkdev.h>

19

#include <linux/highmem.h>

19

#include <linux/highmem.h>

20

#include <linux/mm.h>

20

#include <linux/mm.h>

21

#include <linux/kernel_stat.h>

21

#include <linux/kernel_stat.h>

22

#include <linux/string.h>

22

#include <linux/string.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */

24

#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */

25

#include <linux/completion.h>

25

#include <linux/completion.h>

26

#include <linux/slab.h>

26

#include <linux/slab.h>

27

#include <linux/swap.h>

27

#include <linux/swap.h>

28

#include <linux/writeback.h>

28

#include <linux/writeback.h>

29

#include <linux/blkdev.h>

29

#include <linux/blkdev.h>

30

31

/*

31

/*

32

* for max sense size

32

* for max sense size

33

*/

33

*/

34

#include <scsi/scsi_cmnd.h>

34

#include <scsi/scsi_cmnd.h>

35

36

static void blk_unplug_work(void *data);

36

static void blk_unplug_work(void *data);

37

static void blk_unplug_timeout(unsigned long data);

37

static void blk_unplug_timeout(unsigned long data);

38

static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);

38

static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);

39

40

/*

40

/*

41

* For the allocated request tables

41

* For the allocated request tables

42

*/

42

*/

43

static kmem_cache_t *request_cachep;

43

static kmem_cache_t *request_cachep;

44

45

/*

45

/*

46

* For queue allocation

46

* For queue allocation

47

*/

47

*/

48

static kmem_cache_t *requestq_cachep;

48

static kmem_cache_t *requestq_cachep;

49

50

/*

50

/*

51

* For io context allocations

51

* For io context allocations

52

*/

52

*/

53

static kmem_cache_t *iocontext_cachep;

53

static kmem_cache_t *iocontext_cachep;

54

55

static wait_queue_head_t congestion_wqh[2] = {

55

static wait_queue_head_t congestion_wqh[2] = {

56

__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),

56

__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),

57

__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])

57

__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])

58

};

58

};

59

60

/*

60

/*

61

* Controlling structure to kblockd

61

* Controlling structure to kblockd

62

*/

62

*/

63

static struct workqueue_struct *kblockd_workqueue;

63

static struct workqueue_struct *kblockd_workqueue;

64

65

unsigned long blk_max_low_pfn, blk_max_pfn;

65

unsigned long blk_max_low_pfn, blk_max_pfn;

66

67

EXPORT_SYMBOL(blk_max_low_pfn);

67

EXPORT_SYMBOL(blk_max_low_pfn);

68

EXPORT_SYMBOL(blk_max_pfn);

68

EXPORT_SYMBOL(blk_max_pfn);

69

70

/* Amount of time in which a process may batch requests */

70

/* Amount of time in which a process may batch requests */

71

#define BLK_BATCH_TIME (HZ/50UL)

71

#define BLK_BATCH_TIME (HZ/50UL)

72

73

/* Number of requests a "batching" process may submit */

73

/* Number of requests a "batching" process may submit */

74

#define BLK_BATCH_REQ 32

74

#define BLK_BATCH_REQ 32

75

76

/*

76

/*

77

* Return the threshold (number of used requests) at which the queue is

77

* Return the threshold (number of used requests) at which the queue is

78

* considered to be congested. It include a little hysteresis to keep the

78

* considered to be congested. It include a little hysteresis to keep the

79

* context switch rate down.

79

* context switch rate down.

80

*/

80

*/

81

static inline int queue_congestion_on_threshold(struct request_queue *q)

81

static inline int queue_congestion_on_threshold(struct request_queue *q)

82

{

82

{

83

return q->nr_congestion_on;

83

return q->nr_congestion_on;

84

}

84

}

85

86

/*

86

/*

87

* The threshold at which a queue is considered to be uncongested

87

* The threshold at which a queue is considered to be uncongested

88

*/

88

*/

89

static inline int queue_congestion_off_threshold(struct request_queue *q)

89

static inline int queue_congestion_off_threshold(struct request_queue *q)

90

{

90

{

91

return q->nr_congestion_off;

91

return q->nr_congestion_off;

92

}

92

}

93

94

static void blk_queue_congestion_threshold(struct request_queue *q)

94

static void blk_queue_congestion_threshold(struct request_queue *q)

95

{

95

{

96

int nr;

96

int nr;

97

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

98

nr = q->nr_requests - (q->nr_requests / 8) + 1;

99

if (nr > q->nr_requests)

99

if (nr > q->nr_requests)

100

nr = q->nr_requests;

100

nr = q->nr_requests;

101

q->nr_congestion_on = nr;

101

q->nr_congestion_on = nr;

102

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

103

nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;

104

if (nr < 1)

104

if (nr < 1)

105

nr = 1;

105

nr = 1;

106

q->nr_congestion_off = nr;

106

q->nr_congestion_off = nr;

107

}

107

}

108

109

/*

109

/*

110

* A queue has just exitted congestion. Note this in the global counter of

110

* A queue has just exitted congestion. Note this in the global counter of

111

* congested queues, and wake up anyone who was waiting for requests to be

111

* congested queues, and wake up anyone who was waiting for requests to be

112

* put back.

112

* put back.

113

*/

113

*/

114

static void clear_queue_congested(request_queue_t *q, int rw)

114

static void clear_queue_congested(request_queue_t *q, int rw)

115

{

115

{

116

enum bdi_state bit;

116

enum bdi_state bit;

117

wait_queue_head_t *wqh = &congestion_wqh[rw];

117

wait_queue_head_t *wqh = &congestion_wqh[rw];

118

119

bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;

119

bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;

120

clear_bit(bit, &q->backing_dev_info.state);

120

clear_bit(bit, &q->backing_dev_info.state);

121

smp_mb__after_clear_bit();

121

smp_mb__after_clear_bit();

122

if (waitqueue_active(wqh))

122

if (waitqueue_active(wqh))

123

wake_up(wqh);

123

wake_up(wqh);

124

}

124

}

125

126

/*

126

/*

127

* A queue has just entered congestion. Flag that in the queue's VM-visible

127

* A queue has just entered congestion. Flag that in the queue's VM-visible

128

* state flags and increment the global gounter of congested queues.

128

* state flags and increment the global gounter of congested queues.

129

*/

129

*/

130

static void set_queue_congested(request_queue_t *q, int rw)

130

static void set_queue_congested(request_queue_t *q, int rw)

131

{

131

{

132

enum bdi_state bit;

132

enum bdi_state bit;

133

134

bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;

134

bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;

135

set_bit(bit, &q->backing_dev_info.state);

135

set_bit(bit, &q->backing_dev_info.state);

136

}

136

}

137

138

/**

138

/**

139

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

139

* blk_get_backing_dev_info - get the address of a queue's backing_dev_info

140

* @bdev: device

140

* @bdev: device

141

*

141

*

142

* Locates the passed device's request queue and returns the address of its

142

* Locates the passed device's request queue and returns the address of its

143

* backing_dev_info

143

* backing_dev_info

144

*

144

*

145

* Will return NULL if the request queue cannot be located.

145

* Will return NULL if the request queue cannot be located.

146

*/

146

*/

147

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

147

struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)

148

{

148

{

149

struct backing_dev_info *ret = NULL;

149

struct backing_dev_info *ret = NULL;

150

request_queue_t *q = bdev_get_queue(bdev);

150

request_queue_t *q = bdev_get_queue(bdev);

151

152

if (q)

152

if (q)

153

ret = &q->backing_dev_info;

153

ret = &q->backing_dev_info;

154

return ret;

154

return ret;

155

}

155

}

156

157

EXPORT_SYMBOL(blk_get_backing_dev_info);

157

EXPORT_SYMBOL(blk_get_backing_dev_info);

158

159

void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)

159

void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)

160

{

160

{

161

q->activity_fn = fn;

161

q->activity_fn = fn;

162

q->activity_data = data;

162

q->activity_data = data;

163

}

163

}

164

165

EXPORT_SYMBOL(blk_queue_activity_fn);

165

EXPORT_SYMBOL(blk_queue_activity_fn);

166

167

/**

167

/**

168

* blk_queue_prep_rq - set a prepare_request function for queue

168

* blk_queue_prep_rq - set a prepare_request function for queue

169

* @q: queue

169

* @q: queue

170

* @pfn: prepare_request function

170

* @pfn: prepare_request function

171

*

171

*

172

* It's possible for a queue to register a prepare_request callback which

172

* It's possible for a queue to register a prepare_request callback which

173

* is invoked before the request is handed to the request_fn. The goal of

173

* is invoked before the request is handed to the request_fn. The goal of

174

* the function is to prepare a request for I/O, it can be used to build a

174

* the function is to prepare a request for I/O, it can be used to build a

175

* cdb from the request data for instance.

175

* cdb from the request data for instance.

176

*

176

*

177

*/

177

*/

178

void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)

178

void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)

179

{

179

{

180

q->prep_rq_fn = pfn;

180

q->prep_rq_fn = pfn;

181

}

181

}

182

183

EXPORT_SYMBOL(blk_queue_prep_rq);

183

EXPORT_SYMBOL(blk_queue_prep_rq);

184

185

/**

185

/**

186

* blk_queue_merge_bvec - set a merge_bvec function for queue

186

* blk_queue_merge_bvec - set a merge_bvec function for queue

187

* @q: queue

187

* @q: queue

188

* @mbfn: merge_bvec_fn

188

* @mbfn: merge_bvec_fn

189

*

189

*

190

* Usually queues have static limitations on the max sectors or segments that

190

* Usually queues have static limitations on the max sectors or segments that

191

* we can put in a request. Stacking drivers may have some settings that

191

* we can put in a request. Stacking drivers may have some settings that

192

* are dynamic, and thus we have to query the queue whether it is ok to

192

* are dynamic, and thus we have to query the queue whether it is ok to

193

* add a new bio_vec to a bio at a given offset or not. If the block device

193

* add a new bio_vec to a bio at a given offset or not. If the block device

194

* has such limitations, it needs to register a merge_bvec_fn to control

194

* has such limitations, it needs to register a merge_bvec_fn to control

195

* the size of bio's sent to it. Note that a block device *must* allow a

195

* the size of bio's sent to it. Note that a block device *must* allow a

196

* single page to be added to an empty bio. The block device driver may want

196

* single page to be added to an empty bio. The block device driver may want

197

* to use the bio_split() function to deal with these bio's. By default

197

* to use the bio_split() function to deal with these bio's. By default

198

* no merge_bvec_fn is defined for a queue, and only the fixed limits are

198

* no merge_bvec_fn is defined for a queue, and only the fixed limits are

199

* honored.

199

* honored.

200

*/

200

*/

201

void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)

201

void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)

202

{

202

{

203

q->merge_bvec_fn = mbfn;

203

q->merge_bvec_fn = mbfn;

204

}

204

}

205

206

EXPORT_SYMBOL(blk_queue_merge_bvec);

206

EXPORT_SYMBOL(blk_queue_merge_bvec);

207

208

/**

208

/**

209

* blk_queue_make_request - define an alternate make_request function for a device

209

* blk_queue_make_request - define an alternate make_request function for a device

210

* @q: the request queue for the device to be affected

210

* @q: the request queue for the device to be affected

211

* @mfn: the alternate make_request function

211

* @mfn: the alternate make_request function

212

*

212

*

213

* Description:

213

* Description:

214

* The normal way for &struct bios to be passed to a device

214

* The normal way for &struct bios to be passed to a device

215

* driver is for them to be collected into requests on a request

215

* driver is for them to be collected into requests on a request

216

* queue, and then to allow the device driver to select requests

216

* queue, and then to allow the device driver to select requests

217

* off that queue when it is ready. This works well for many block

217

* off that queue when it is ready. This works well for many block

218

* devices. However some block devices (typically virtual devices

218

* devices. However some block devices (typically virtual devices

219

* such as md or lvm) do not benefit from the processing on the

219

* such as md or lvm) do not benefit from the processing on the

220

* request queue, and are served best by having the requests passed

220

* request queue, and are served best by having the requests passed

221

* directly to them. This can be achieved by providing a function

221

* directly to them. This can be achieved by providing a function

222

* to blk_queue_make_request().

222

* to blk_queue_make_request().

223

*

223

*

224

* Caveat:

224

* Caveat:

225

* The driver that does this *must* be able to deal appropriately

225

* The driver that does this *must* be able to deal appropriately

226

* with buffers in "highmemory". This can be accomplished by either calling

226

* with buffers in "highmemory". This can be accomplished by either calling

227

* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling

227

* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling

228

* blk_queue_bounce() to create a buffer in normal memory.

228

* blk_queue_bounce() to create a buffer in normal memory.

229

**/

229

**/

230

void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)

230

void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)

231

{

231

{

232

/*

232

/*

233

* set defaults

233

* set defaults

234

*/

234

*/

235

q->nr_requests = BLKDEV_MAX_RQ;

235

q->nr_requests = BLKDEV_MAX_RQ;

236

blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

236

blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

237

blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

237

blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

238

q->make_request_fn = mfn;

238

q->make_request_fn = mfn;

239

q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

239

q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

240

q->backing_dev_info.state = 0;

240

q->backing_dev_info.state = 0;

241

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

241

q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

242

blk_queue_max_sectors(q, SAFE_MAX_SECTORS);

242

blk_queue_max_sectors(q, SAFE_MAX_SECTORS);

243

blk_queue_hardsect_size(q, 512);

243

blk_queue_hardsect_size(q, 512);

244

blk_queue_dma_alignment(q, 511);

244

blk_queue_dma_alignment(q, 511);

245

blk_queue_congestion_threshold(q);

245

blk_queue_congestion_threshold(q);

246

q->nr_batching = BLK_BATCH_REQ;

246

q->nr_batching = BLK_BATCH_REQ;

247

248

q->unplug_thresh = 4; /* hmm */

248

q->unplug_thresh = 4; /* hmm */

249

q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */

249

q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */

250

if (q->unplug_delay == 0)

250

if (q->unplug_delay == 0)

251

q->unplug_delay = 1;

251

q->unplug_delay = 1;

252

253

INIT_WORK(&q->unplug_work, blk_unplug_work, q);

253

INIT_WORK(&q->unplug_work, blk_unplug_work, q);

254

255

q->unplug_timer.function = blk_unplug_timeout;

255

q->unplug_timer.function = blk_unplug_timeout;

256

q->unplug_timer.data = (unsigned long)q;

256

q->unplug_timer.data = (unsigned long)q;

257

258

/*

258

/*

259

* by default assume old behaviour and bounce for any highmem page

259

* by default assume old behaviour and bounce for any highmem page

260

*/

260

*/

261

blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

261

blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

262

263

blk_queue_activity_fn(q, NULL, NULL);

263

blk_queue_activity_fn(q, NULL, NULL);

264

}

264

}

265

266

EXPORT_SYMBOL(blk_queue_make_request);

266

EXPORT_SYMBOL(blk_queue_make_request);

267

268

static inline void rq_init(request_queue_t *q, struct request *rq)

268

static inline void rq_init(request_queue_t *q, struct request *rq)

269

{

269

{

270

INIT_LIST_HEAD(&rq->queuelist);

270

INIT_LIST_HEAD(&rq->queuelist);

271

272

rq->errors = 0;

272

rq->errors = 0;

273

rq->rq_status = RQ_ACTIVE;

273

rq->rq_status = RQ_ACTIVE;

274

rq->bio = rq->biotail = NULL;

274

rq->bio = rq->biotail = NULL;

275

rq->ioprio = 0;

275

rq->ioprio = 0;

276

rq->buffer = NULL;

276

rq->buffer = NULL;

277

rq->ref_count = 1;

277

rq->ref_count = 1;

278

rq->q = q;

278

rq->q = q;

279

rq->waiting = NULL;

279

rq->waiting = NULL;

280

rq->special = NULL;

280

rq->special = NULL;

281

rq->data_len = 0;

281

rq->data_len = 0;

282

rq->data = NULL;

282

rq->data = NULL;

283

rq->nr_phys_segments = 0;

283

rq->nr_phys_segments = 0;

284

rq->sense = NULL;

284

rq->sense = NULL;

285

rq->end_io = NULL;

285

rq->end_io = NULL;

286

rq->end_io_data = NULL;

286

rq->end_io_data = NULL;

287

}

287

}

288

289

/**

289

/**

290

* blk_queue_ordered - does this queue support ordered writes

290

* blk_queue_ordered - does this queue support ordered writes

291

* @q: the request queue

291

* @q: the request queue

292

* @flag: see below

292

* @flag: see below

293

*

293

*

294

* Description:

294

* Description:

295

* For journalled file systems, doing ordered writes on a commit

295

* For journalled file systems, doing ordered writes on a commit

296

* block instead of explicitly doing wait_on_buffer (which is bad

296

* block instead of explicitly doing wait_on_buffer (which is bad

297

* for performance) can be a big win. Block drivers supporting this

297

* for performance) can be a big win. Block drivers supporting this

298

* feature should call this function and indicate so.

298

* feature should call this function and indicate so.

299

*

299

*

300

**/

300

**/

301

void blk_queue_ordered(request_queue_t *q, int flag)

301

void blk_queue_ordered(request_queue_t *q, int flag)

302

{

302

{

303

switch (flag) {

303

switch (flag) {

304

case QUEUE_ORDERED_NONE:

304

case QUEUE_ORDERED_NONE:

305

if (q->flush_rq)

305

if (q->flush_rq)

306

kmem_cache_free(request_cachep, q->flush_rq);

306

kmem_cache_free(request_cachep, q->flush_rq);

307

q->flush_rq = NULL;

307

q->flush_rq = NULL;

308

q->ordered = flag;

308

q->ordered = flag;

309

break;

309

break;

310

case QUEUE_ORDERED_TAG:

310

case QUEUE_ORDERED_TAG:

311

q->ordered = flag;

311

q->ordered = flag;

312

break;

312

break;

313

case QUEUE_ORDERED_FLUSH:

313

case QUEUE_ORDERED_FLUSH:

314

q->ordered = flag;

314

q->ordered = flag;

315

if (!q->flush_rq)

315

if (!q->flush_rq)

316

q->flush_rq = kmem_cache_alloc(request_cachep,

316

q->flush_rq = kmem_cache_alloc(request_cachep,

317

GFP_KERNEL);

317

GFP_KERNEL);

318

break;

318

break;

319

default:

319

default:

320

printk("blk_queue_ordered: bad value %d\n", flag);

320

printk("blk_queue_ordered: bad value %d\n", flag);

321

break;

321

break;

322

}

322

}

323

}

323

}

324

325

EXPORT_SYMBOL(blk_queue_ordered);

325

EXPORT_SYMBOL(blk_queue_ordered);

326

327

/**

327

/**

328

* blk_queue_issue_flush_fn - set function for issuing a flush

328

* blk_queue_issue_flush_fn - set function for issuing a flush

329

* @q: the request queue

329

* @q: the request queue

330

* @iff: the function to be called issuing the flush

330

* @iff: the function to be called issuing the flush

331

*

331

*

332

* Description:

332

* Description:

333

* If a driver supports issuing a flush command, the support is notified

333

* If a driver supports issuing a flush command, the support is notified

334

* to the block layer by defining it through this call.

334

* to the block layer by defining it through this call.

335

*

335

*

336

**/

336

**/

337

void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)

337

void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)

338

{

338

{

339

q->issue_flush_fn = iff;

339

q->issue_flush_fn = iff;

340

}

340

}

341

342

EXPORT_SYMBOL(blk_queue_issue_flush_fn);

342

EXPORT_SYMBOL(blk_queue_issue_flush_fn);

343

344

/*

344

/*

345

* Cache flushing for ordered writes handling

345

* Cache flushing for ordered writes handling

346

*/

346

*/

347

static void blk_pre_flush_end_io(struct request *flush_rq)

347

static void blk_pre_flush_end_io(struct request *flush_rq)

348

{

348

{

349

struct request *rq = flush_rq->end_io_data;

349

struct request *rq = flush_rq->end_io_data;

350

request_queue_t *q = rq->q;

350

request_queue_t *q = rq->q;

351

352

elv_completed_request(q, flush_rq);

352

elv_completed_request(q, flush_rq);

353

354

rq->flags |= REQ_BAR_PREFLUSH;

354

rq->flags |= REQ_BAR_PREFLUSH;

355

356

if (!flush_rq->errors)

356

if (!flush_rq->errors)

357

elv_requeue_request(q, rq);

357

elv_requeue_request(q, rq);

358

else {

358

else {

359

q->end_flush_fn(q, flush_rq);

359

q->end_flush_fn(q, flush_rq);

360

clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);

360

clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);

361

q->request_fn(q);

361

q->request_fn(q);

362

}

362

}

363

}

363

}

364

365

static void blk_post_flush_end_io(struct request *flush_rq)

365

static void blk_post_flush_end_io(struct request *flush_rq)

366

{

366

{

367

struct request *rq = flush_rq->end_io_data;

367

struct request *rq = flush_rq->end_io_data;

368

request_queue_t *q = rq->q;

368

request_queue_t *q = rq->q;

369

370

elv_completed_request(q, flush_rq);

370

elv_completed_request(q, flush_rq);

371

372

rq->flags |= REQ_BAR_POSTFLUSH;

372

rq->flags |= REQ_BAR_POSTFLUSH;

373

374

q->end_flush_fn(q, flush_rq);

374

q->end_flush_fn(q, flush_rq);

375

clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);

375

clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);

376

q->request_fn(q);

376

q->request_fn(q);

377

}

377

}

378

379

struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)

379

struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)

380

{

380

{

381

struct request *flush_rq = q->flush_rq;

381

struct request *flush_rq = q->flush_rq;

382

383

BUG_ON(!blk_barrier_rq(rq));

383

BUG_ON(!blk_barrier_rq(rq));

384

385

if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))

385

if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))

386

return NULL;

386

return NULL;

387

388

rq_init(q, flush_rq);

388

rq_init(q, flush_rq);

389

flush_rq->elevator_private = NULL;

389

flush_rq->elevator_private = NULL;

390

flush_rq->flags = REQ_BAR_FLUSH;

390

flush_rq->flags = REQ_BAR_FLUSH;

391

flush_rq->rq_disk = rq->rq_disk;

391

flush_rq->rq_disk = rq->rq_disk;

392

flush_rq->rl = NULL;

392

flush_rq->rl = NULL;

393

394

/*

394

/*

395

* prepare_flush returns 0 if no flush is needed, just mark both

395

* prepare_flush returns 0 if no flush is needed, just mark both

396

* pre and post flush as done in that case

396

* pre and post flush as done in that case

397

*/

397

*/

398

if (!q->prepare_flush_fn(q, flush_rq)) {

398

if (!q->prepare_flush_fn(q, flush_rq)) {

399

rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;

399

rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;

400

clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);

400

clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);

401

return rq;

401

return rq;

402

}

402

}

403

404

/*

404

/*

405

* some drivers dequeue requests right away, some only after io

405

* some drivers dequeue requests right away, some only after io

406

* completion. make sure the request is dequeued.

406

* completion. make sure the request is dequeued.

407

*/

407

*/

408

if (!list_empty(&rq->queuelist))

408

if (!list_empty(&rq->queuelist))

409

blkdev_dequeue_request(rq);

409

blkdev_dequeue_request(rq);

410

411

flush_rq->end_io_data = rq;

411

flush_rq->end_io_data = rq;

412

flush_rq->end_io = blk_pre_flush_end_io;

412

flush_rq->end_io = blk_pre_flush_end_io;

413

414

__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);

414

__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);

415

return flush_rq;

415

return flush_rq;

416

}

416

}

417

418

static void blk_start_post_flush(request_queue_t *q, struct request *rq)

418

static void blk_start_post_flush(request_queue_t *q, struct request *rq)

419

{

419

{

420

struct request *flush_rq = q->flush_rq;

420

struct request *flush_rq = q->flush_rq;

421

422

BUG_ON(!blk_barrier_rq(rq));

422

BUG_ON(!blk_barrier_rq(rq));

423

424

rq_init(q, flush_rq);

424

rq_init(q, flush_rq);

425

flush_rq->elevator_private = NULL;

425

flush_rq->elevator_private = NULL;

426

flush_rq->flags = REQ_BAR_FLUSH;

426

flush_rq->flags = REQ_BAR_FLUSH;

427

flush_rq->rq_disk = rq->rq_disk;

427

flush_rq->rq_disk = rq->rq_disk;

428

flush_rq->rl = NULL;

428

flush_rq->rl = NULL;

429

430

if (q->prepare_flush_fn(q, flush_rq)) {

430

if (q->prepare_flush_fn(q, flush_rq)) {

431

flush_rq->end_io_data = rq;

431

flush_rq->end_io_data = rq;

432

flush_rq->end_io = blk_post_flush_end_io;

432

flush_rq->end_io = blk_post_flush_end_io;

433

434

__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);

434

__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);

435

q->request_fn(q);

435

q->request_fn(q);

436

}

436

}

437

}

437

}

438

439

static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,

439

static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,

440

int sectors)

440

int sectors)

441

{

441

{

442

if (sectors > rq->nr_sectors)

442

if (sectors > rq->nr_sectors)

443

sectors = rq->nr_sectors;

443

sectors = rq->nr_sectors;

444

445

rq->nr_sectors -= sectors;

445

rq->nr_sectors -= sectors;

446

return rq->nr_sectors;

446

return rq->nr_sectors;

447

}

447

}

448

449

static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,

449

static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,

450

int sectors, int queue_locked)

450

int sectors, int queue_locked)

451

{

451

{

452

if (q->ordered != QUEUE_ORDERED_FLUSH)

452

if (q->ordered != QUEUE_ORDERED_FLUSH)

453

return 0;

453

return 0;

454

if (!blk_fs_request(rq) || !blk_barrier_rq(rq))

454

if (!blk_fs_request(rq) || !blk_barrier_rq(rq))

455

return 0;

455

return 0;

456

if (blk_barrier_postflush(rq))

456

if (blk_barrier_postflush(rq))

457

return 0;

457

return 0;

458

459

if (!blk_check_end_barrier(q, rq, sectors)) {

459

if (!blk_check_end_barrier(q, rq, sectors)) {

460

unsigned long flags = 0;

460

unsigned long flags = 0;

461

462

if (!queue_locked)

462

if (!queue_locked)

463

spin_lock_irqsave(q->queue_lock, flags);

463

spin_lock_irqsave(q->queue_lock, flags);

464

465

blk_start_post_flush(q, rq);

465

blk_start_post_flush(q, rq);

466

467

if (!queue_locked)

467

if (!queue_locked)

468

spin_unlock_irqrestore(q->queue_lock, flags);

468

spin_unlock_irqrestore(q->queue_lock, flags);

469

}

469

}

470

471

return 1;

471

return 1;

472

}

472

}

473

474

/**

474

/**

475

* blk_complete_barrier_rq - complete possible barrier request

475

* blk_complete_barrier_rq - complete possible barrier request

476

* @q: the request queue for the device

476

* @q: the request queue for the device

477

* @rq: the request

477

* @rq: the request

478

* @sectors: number of sectors to complete

478

* @sectors: number of sectors to complete

479

*

479

*

480

* Description:

480

* Description:

481

* Used in driver end_io handling to determine whether to postpone

481

* Used in driver end_io handling to determine whether to postpone

482

* completion of a barrier request until a post flush has been done. This

482

* completion of a barrier request until a post flush has been done. This

483

* is the unlocked variant, used if the caller doesn't already hold the

483

* is the unlocked variant, used if the caller doesn't already hold the

484

* queue lock.

484

* queue lock.

485

**/

485

**/

486

int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)

486

int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)

487

{

487

{

488

return __blk_complete_barrier_rq(q, rq, sectors, 0);

488

return __blk_complete_barrier_rq(q, rq, sectors, 0);

489

}

489

}

490

EXPORT_SYMBOL(blk_complete_barrier_rq);

490

EXPORT_SYMBOL(blk_complete_barrier_rq);

491

492

/**

492

/**

493

* blk_complete_barrier_rq_locked - complete possible barrier request

493

* blk_complete_barrier_rq_locked - complete possible barrier request

494

* @q: the request queue for the device

494

* @q: the request queue for the device

495

* @rq: the request

495

* @rq: the request

496

* @sectors: number of sectors to complete

496

* @sectors: number of sectors to complete

497

*

497

*

498

* Description:

498

* Description:

499

* See blk_complete_barrier_rq(). This variant must be used if the caller

499

* See blk_complete_barrier_rq(). This variant must be used if the caller

500

* holds the queue lock.

500

* holds the queue lock.

501

**/

501

**/

502

int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,

502

int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,

503

int sectors)

503

int sectors)

504

{

504

{

505

return __blk_complete_barrier_rq(q, rq, sectors, 1);

505

return __blk_complete_barrier_rq(q, rq, sectors, 1);

506

}

506

}

507

EXPORT_SYMBOL(blk_complete_barrier_rq_locked);

507

EXPORT_SYMBOL(blk_complete_barrier_rq_locked);

508

509

/**

509

/**

510

* blk_queue_bounce_limit - set bounce buffer limit for queue

510

* blk_queue_bounce_limit - set bounce buffer limit for queue

511

* @q: the request queue for the device

511

* @q: the request queue for the device

512

* @dma_addr: bus address limit

512

* @dma_addr: bus address limit

513

*

513

*

514

* Description:

514

* Description:

515

* Different hardware can have different requirements as to what pages

515

* Different hardware can have different requirements as to what pages

516

* it can do I/O directly to. A low level driver can call

516

* it can do I/O directly to. A low level driver can call

517

* blk_queue_bounce_limit to have lower memory pages allocated as bounce

517

* blk_queue_bounce_limit to have lower memory pages allocated as bounce

518

* buffers for doing I/O to pages residing above @page. By default

518

* buffers for doing I/O to pages residing above @page. By default

519

* the block layer sets this to the highest numbered "low" memory page.

519

* the block layer sets this to the highest numbered "low" memory page.

520

**/

520

**/

521

void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)

521

void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)

522

{

522

{

523

unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;

523

unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;

524

525

/*

525

/*

526

* set appropriate bounce gfp mask -- unfortunately we don't have a

526

* set appropriate bounce gfp mask -- unfortunately we don't have a

527

* full 4GB zone, so we have to resort to low memory for any bounces.

527

* full 4GB zone, so we have to resort to low memory for any bounces.

528

* ISA has its own < 16MB zone.

528

* ISA has its own < 16MB zone.

529

*/

529

*/

530

if (bounce_pfn < blk_max_low_pfn) {

530

if (bounce_pfn < blk_max_low_pfn) {

531

BUG_ON(dma_addr < BLK_BOUNCE_ISA);

531

BUG_ON(dma_addr < BLK_BOUNCE_ISA);

532

init_emergency_isa_pool();

532

init_emergency_isa_pool();

533

q->bounce_gfp = GFP_NOIO | GFP_DMA;

533

q->bounce_gfp = GFP_NOIO | GFP_DMA;

534

} else

534

} else

535

q->bounce_gfp = GFP_NOIO;

535

q->bounce_gfp = GFP_NOIO;

536

537

q->bounce_pfn = bounce_pfn;

537

q->bounce_pfn = bounce_pfn;

538

}

538

}

539

540

EXPORT_SYMBOL(blk_queue_bounce_limit);

540

EXPORT_SYMBOL(blk_queue_bounce_limit);

541

542

/**

542

/**

543

* blk_queue_max_sectors - set max sectors for a request for this queue

543

* blk_queue_max_sectors - set max sectors for a request for this queue

544

* @q: the request queue for the device

544

* @q: the request queue for the device

545

* @max_sectors: max sectors in the usual 512b unit

545

* @max_sectors: max sectors in the usual 512b unit

546

*

546

*

547

* Description:

547

* Description:

548

* Enables a low level driver to set an upper limit on the size of

548

* Enables a low level driver to set an upper limit on the size of

549

* received requests.

549

* received requests.

550

**/

550

**/

551

void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)

551

void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)

552

{

552

{

553

if ((max_sectors << 9) < PAGE_CACHE_SIZE) {

553

if ((max_sectors << 9) < PAGE_CACHE_SIZE) {

554

max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);

554

max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);

555

printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);

555

printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);

556

}

556

}

557

558

if (BLK_DEF_MAX_SECTORS > max_sectors)

558

if (BLK_DEF_MAX_SECTORS > max_sectors)

559

q->max_hw_sectors = q->max_sectors = max_sectors;

559

q->max_hw_sectors = q->max_sectors = max_sectors;

560

else {

560

else {

561

q->max_sectors = BLK_DEF_MAX_SECTORS;

561

q->max_sectors = BLK_DEF_MAX_SECTORS;

562

q->max_hw_sectors = max_sectors;

562

q->max_hw_sectors = max_sectors;

563

}

563

}

564

}

564

}

565

566

EXPORT_SYMBOL(blk_queue_max_sectors);

566

EXPORT_SYMBOL(blk_queue_max_sectors);

567

568

/**

568

/**

569

* blk_queue_max_phys_segments - set max phys segments for a request for this queue

569

* blk_queue_max_phys_segments - set max phys segments for a request for this queue

570

* @q: the request queue for the device

570

* @q: the request queue for the device

571

* @max_segments: max number of segments

571

* @max_segments: max number of segments

572

*

572

*

573

* Description:

573

* Description:

574

* Enables a low level driver to set an upper limit on the number of

574

* Enables a low level driver to set an upper limit on the number of

575

* physical data segments in a request. This would be the largest sized

575

* physical data segments in a request. This would be the largest sized

576

* scatter list the driver could handle.

576

* scatter list the driver could handle.

577

**/

577

**/

578

void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)

578

void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)

579

{

579

{

580

if (!max_segments) {

580

if (!max_segments) {

581

max_segments = 1;

581

max_segments = 1;

582

printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);

582

printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);

583

}

583

}

584

585

q->max_phys_segments = max_segments;

585

q->max_phys_segments = max_segments;

586

}

586

}

587

588

EXPORT_SYMBOL(blk_queue_max_phys_segments);

588

EXPORT_SYMBOL(blk_queue_max_phys_segments);

589

590

/**

590

/**

591

* blk_queue_max_hw_segments - set max hw segments for a request for this queue

591

* blk_queue_max_hw_segments - set max hw segments for a request for this queue

592

* @q: the request queue for the device

592

* @q: the request queue for the device

593

* @max_segments: max number of segments

593

* @max_segments: max number of segments

594

*

594

*

595

* Description:

595

* Description:

596

* Enables a low level driver to set an upper limit on the number of

596

* Enables a low level driver to set an upper limit on the number of

597

* hw data segments in a request. This would be the largest number of

597

* hw data segments in a request. This would be the largest number of

598

* address/length pairs the host adapter can actually give as once

598

* address/length pairs the host adapter can actually give as once

599

* to the device.

599

* to the device.

600

**/

600

**/

601

void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)

601

void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)

602

{

602

{

603

if (!max_segments) {

603

if (!max_segments) {

604

max_segments = 1;

604

max_segments = 1;

605

printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);

605

printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);

606

}

606

}

607

608

q->max_hw_segments = max_segments;

608

q->max_hw_segments = max_segments;

609

}

609

}

610

611

EXPORT_SYMBOL(blk_queue_max_hw_segments);

611

EXPORT_SYMBOL(blk_queue_max_hw_segments);

612

613

/**

613

/**

614

* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg

614

* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg

615

* @q: the request queue for the device

615

* @q: the request queue for the device

616

* @max_size: max size of segment in bytes

616

* @max_size: max size of segment in bytes

617

*

617

*

618

* Description:

618

* Description:

619

* Enables a low level driver to set an upper limit on the size of a

619

* Enables a low level driver to set an upper limit on the size of a

620

* coalesced segment

620

* coalesced segment

621

**/

621

**/

622

void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)

622

void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)

623

{

623

{

624

if (max_size < PAGE_CACHE_SIZE) {

624

if (max_size < PAGE_CACHE_SIZE) {

625

max_size = PAGE_CACHE_SIZE;

625

max_size = PAGE_CACHE_SIZE;

626

printk("%s: set to minimum %d\n", __FUNCTION__, max_size);

626

printk("%s: set to minimum %d\n", __FUNCTION__, max_size);

627

}

627

}

628

629

q->max_segment_size = max_size;

629

q->max_segment_size = max_size;

630

}

630

}

631

632

EXPORT_SYMBOL(blk_queue_max_segment_size);

632

EXPORT_SYMBOL(blk_queue_max_segment_size);

633

634

/**

634

/**

635

* blk_queue_hardsect_size - set hardware sector size for the queue

635

* blk_queue_hardsect_size - set hardware sector size for the queue

636

* @q: the request queue for the device

636

* @q: the request queue for the device

637

* @size: the hardware sector size, in bytes

637

* @size: the hardware sector size, in bytes

638

*

638

*

639

* Description:

639

* Description:

640

* This should typically be set to the lowest possible sector size

640

* This should typically be set to the lowest possible sector size

641

* that the hardware can operate on (possible without reverting to

641

* that the hardware can operate on (possible without reverting to

642

* even internal read-modify-write operations). Usually the default

642

* even internal read-modify-write operations). Usually the default

643

* of 512 covers most hardware.

643

* of 512 covers most hardware.

644

**/

644

**/

645

void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)

645

void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)

646

{

646

{

647

q->hardsect_size = size;

647

q->hardsect_size = size;

648

}

648

}

649

650

EXPORT_SYMBOL(blk_queue_hardsect_size);

650

EXPORT_SYMBOL(blk_queue_hardsect_size);

651

652

/*

652

/*

653

* Returns the minimum that is _not_ zero, unless both are zero.

653

* Returns the minimum that is _not_ zero, unless both are zero.

654

*/

654

*/

655

#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))

655

#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))

656

657

/**

657

/**

658

* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers

658

* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers

659

* @t: the stacking driver (top)

659

* @t: the stacking driver (top)

660

* @b: the underlying device (bottom)

660

* @b: the underlying device (bottom)

661

**/

661

**/

662

void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)

662

void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)

663

{

663

{

664

/* zero is "infinity" */

664

/* zero is "infinity" */

665

t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);

665

t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);

666

t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);

666

t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);

667

668

t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);

668

t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);

669

t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);

669

t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);

670

t->max_segment_size = min(t->max_segment_size,b->max_segment_size);

670

t->max_segment_size = min(t->max_segment_size,b->max_segment_size);

671

t->hardsect_size = max(t->hardsect_size,b->hardsect_size);

671

t->hardsect_size = max(t->hardsect_size,b->hardsect_size);

672

}

672

}

673

674

EXPORT_SYMBOL(blk_queue_stack_limits);

674

EXPORT_SYMBOL(blk_queue_stack_limits);

675

676

/**

676

/**

677

* blk_queue_segment_boundary - set boundary rules for segment merging

677

* blk_queue_segment_boundary - set boundary rules for segment merging

678

* @q: the request queue for the device

678

* @q: the request queue for the device

679

* @mask: the memory boundary mask

679

* @mask: the memory boundary mask

680

**/

680

**/

681

void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)

681

void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)

682

{

682

{

683

if (mask < PAGE_CACHE_SIZE - 1) {

683

if (mask < PAGE_CACHE_SIZE - 1) {

684

mask = PAGE_CACHE_SIZE - 1;

684

mask = PAGE_CACHE_SIZE - 1;

685

printk("%s: set to minimum %lx\n", __FUNCTION__, mask);

685

printk("%s: set to minimum %lx\n", __FUNCTION__, mask);

686

}

686

}

687

688

q->seg_boundary_mask = mask;

688

q->seg_boundary_mask = mask;

689

}

689

}

690

691

EXPORT_SYMBOL(blk_queue_segment_boundary);

691

EXPORT_SYMBOL(blk_queue_segment_boundary);

692

693

/**

693

/**

694

* blk_queue_dma_alignment - set dma length and memory alignment

694

* blk_queue_dma_alignment - set dma length and memory alignment

695

* @q: the request queue for the device

695

* @q: the request queue for the device

696

* @mask: alignment mask

696

* @mask: alignment mask

697

*

697

*

698

* description:

698

* description:

699

* set required memory and length aligment for direct dma transactions.

699

* set required memory and length aligment for direct dma transactions.

700

* this is used when buiding direct io requests for the queue.

700

* this is used when buiding direct io requests for the queue.

701

*

701

*

702

**/

702

**/

703

void blk_queue_dma_alignment(request_queue_t *q, int mask)

703

void blk_queue_dma_alignment(request_queue_t *q, int mask)

704

{

704

{

705

q->dma_alignment = mask;

705

q->dma_alignment = mask;

706

}

706

}

707

708

EXPORT_SYMBOL(blk_queue_dma_alignment);

708

EXPORT_SYMBOL(blk_queue_dma_alignment);

709

710

/**

710

/**

711

* blk_queue_find_tag - find a request by its tag and queue

711

* blk_queue_find_tag - find a request by its tag and queue

712

* @q: The request queue for the device

712

* @q: The request queue for the device

713

* @tag: The tag of the request

713

* @tag: The tag of the request

714

*

714

*

715

* Notes:

715

* Notes:

716

* Should be used when a device returns a tag and you want to match

716

* Should be used when a device returns a tag and you want to match

717

* it with a request.

717

* it with a request.

718

*

718

*

719

* no locks need be held.

719

* no locks need be held.

720

**/

720

**/

721

struct request *blk_queue_find_tag(request_queue_t *q, int tag)

721

struct request *blk_queue_find_tag(request_queue_t *q, int tag)

722

{

722

{

723

struct blk_queue_tag *bqt = q->queue_tags;

723

struct blk_queue_tag *bqt = q->queue_tags;

724

725

if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))

725

if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))

726

return NULL;

726

return NULL;

727

728

return bqt->tag_index[tag];

728

return bqt->tag_index[tag];

729

}

729

}

730

731

EXPORT_SYMBOL(blk_queue_find_tag);

731

EXPORT_SYMBOL(blk_queue_find_tag);

732

733

/**

733

/**

734

* __blk_queue_free_tags - release tag maintenance info

734

* __blk_queue_free_tags - release tag maintenance info

735

* @q: the request queue for the device

735

* @q: the request queue for the device

736

*

736

*

737

* Notes:

737

* Notes:

738

* blk_cleanup_queue() will take care of calling this function, if tagging

738

* blk_cleanup_queue() will take care of calling this function, if tagging

739

* has been used. So there's no need to call this directly.

739

* has been used. So there's no need to call this directly.

740

**/

740

**/

741

static void __blk_queue_free_tags(request_queue_t *q)

741

static void __blk_queue_free_tags(request_queue_t *q)

742

{

742

{

743

struct blk_queue_tag *bqt = q->queue_tags;

743

struct blk_queue_tag *bqt = q->queue_tags;

744

745

if (!bqt)

745

if (!bqt)

746

return;

746

return;

747

748

if (atomic_dec_and_test(&bqt->refcnt)) {

748

if (atomic_dec_and_test(&bqt->refcnt)) {

749

BUG_ON(bqt->busy);

749

BUG_ON(bqt->busy);

750

BUG_ON(!list_empty(&bqt->busy_list));

750

BUG_ON(!list_empty(&bqt->busy_list));

751

752

kfree(bqt->tag_index);

752

kfree(bqt->tag_index);

753

bqt->tag_index = NULL;

753

bqt->tag_index = NULL;

754

755

kfree(bqt->tag_map);

755

kfree(bqt->tag_map);

756

bqt->tag_map = NULL;

756

bqt->tag_map = NULL;

757

758

kfree(bqt);

758

kfree(bqt);

759

}

759

}

760

761

q->queue_tags = NULL;

761

q->queue_tags = NULL;

762

q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);

762

q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);

763

}

763

}

764

765

/**

765

/**

766

* blk_queue_free_tags - release tag maintenance info

766

* blk_queue_free_tags - release tag maintenance info

767

* @q: the request queue for the device

767

* @q: the request queue for the device

768

*

768

*

769

* Notes:

769

* Notes:

770

* This is used to disabled tagged queuing to a device, yet leave

770

* This is used to disabled tagged queuing to a device, yet leave

771

* queue in function.

771

* queue in function.

772

**/

772

**/

773

void blk_queue_free_tags(request_queue_t *q)

773

void blk_queue_free_tags(request_queue_t *q)

774

{

774

{

775

clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);

775

clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);

776

}

776

}

777

778

EXPORT_SYMBOL(blk_queue_free_tags);

778

EXPORT_SYMBOL(blk_queue_free_tags);

779

780

static int

780

static int

781

init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)

781

init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)

782

{

782

{

783

struct request **tag_index;

783

struct request **tag_index;

784

unsigned long *tag_map;

784

unsigned long *tag_map;

785

int nr_ulongs;

785

int nr_ulongs;

786

787

if (depth > q->nr_requests * 2) {

787

if (depth > q->nr_requests * 2) {

788

depth = q->nr_requests * 2;

788

depth = q->nr_requests * 2;

789

printk(KERN_ERR "%s: adjusted depth to %d\n",

789

printk(KERN_ERR "%s: adjusted depth to %d\n",

790

__FUNCTION__, depth);

790

__FUNCTION__, depth);

791

}

791

}

792

793

tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);

793

tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);

794

if (!tag_index)

794

if (!tag_index)

795

goto fail;

795

goto fail;

796

797

nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;

797

nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;

798

tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);

798

tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);

799

if (!tag_map)

799

if (!tag_map)

800

goto fail;

800

goto fail;

801

802

memset(tag_index, 0, depth * sizeof(struct request *));

802

memset(tag_index, 0, depth * sizeof(struct request *));

803

memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));

803

memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));

804

tags->real_max_depth = depth;

804

tags->real_max_depth = depth;

805

tags->max_depth = depth;

805

tags->max_depth = depth;

806

tags->tag_index = tag_index;

806

tags->tag_index = tag_index;

807

tags->tag_map = tag_map;

807

tags->tag_map = tag_map;

808

809

return 0;

809

return 0;

810

fail:

810

fail:

811

kfree(tag_index);

811

kfree(tag_index);

812

return -ENOMEM;

812

return -ENOMEM;

813

}

813

}

814

815

/**

815

/**

816

* blk_queue_init_tags - initialize the queue tag info

816

* blk_queue_init_tags - initialize the queue tag info

817

* @q: the request queue for the device

817

* @q: the request queue for the device

818

* @depth: the maximum queue depth supported

818

* @depth: the maximum queue depth supported

819

* @tags: the tag to use

819

* @tags: the tag to use

820

**/

820

**/

821

int blk_queue_init_tags(request_queue_t *q, int depth,

821

int blk_queue_init_tags(request_queue_t *q, int depth,

822

struct blk_queue_tag *tags)

822

struct blk_queue_tag *tags)

823

{

823

{

824

int rc;

824

int rc;

825

826

BUG_ON(tags && q->queue_tags && tags != q->queue_tags);

826

BUG_ON(tags && q->queue_tags && tags != q->queue_tags);

827

828

if (!tags && !q->queue_tags) {

828

if (!tags && !q->queue_tags) {

829

tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);

829

tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);

830

if (!tags)

830

if (!tags)

831

goto fail;

831

goto fail;

832

833

if (init_tag_map(q, tags, depth))

833

if (init_tag_map(q, tags, depth))

834

goto fail;

834

goto fail;

835

836

INIT_LIST_HEAD(&tags->busy_list);

836

INIT_LIST_HEAD(&tags->busy_list);

837

tags->busy = 0;

837

tags->busy = 0;

838

atomic_set(&tags->refcnt, 1);

838

atomic_set(&tags->refcnt, 1);

839

} else if (q->queue_tags) {

839

} else if (q->queue_tags) {

840

if ((rc = blk_queue_resize_tags(q, depth)))

840

if ((rc = blk_queue_resize_tags(q, depth)))

841

return rc;

841

return rc;

842

set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);

842

set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);

843

return 0;

843

return 0;

844

} else

844

} else

845

atomic_inc(&tags->refcnt);

845

atomic_inc(&tags->refcnt);

846

847

/*

847

/*

848

* assign it, all done

848

* assign it, all done

849

*/

849

*/

850

q->queue_tags = tags;

850

q->queue_tags = tags;

851

q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);

851

q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);

852

return 0;

852

return 0;

853

fail:

853

fail:

854

kfree(tags);

854

kfree(tags);

855

return -ENOMEM;

855

return -ENOMEM;

856

}

856

}

857

858

EXPORT_SYMBOL(blk_queue_init_tags);

858

EXPORT_SYMBOL(blk_queue_init_tags);

859

860

/**

860

/**

861

* blk_queue_resize_tags - change the queueing depth

861

* blk_queue_resize_tags - change the queueing depth

862

* @q: the request queue for the device

862

* @q: the request queue for the device

863

* @new_depth: the new max command queueing depth

863

* @new_depth: the new max command queueing depth

864

*

864

*

865

* Notes:

865

* Notes:

866

* Must be called with the queue lock held.

866

* Must be called with the queue lock held.

867

**/

867

**/

868

int blk_queue_resize_tags(request_queue_t *q, int new_depth)

868

int blk_queue_resize_tags(request_queue_t *q, int new_depth)

869

{

869

{

870

struct blk_queue_tag *bqt = q->queue_tags;

870

struct blk_queue_tag *bqt = q->queue_tags;

871

struct request **tag_index;

871

struct request **tag_index;

872

unsigned long *tag_map;

872

unsigned long *tag_map;

873

int max_depth, nr_ulongs;

873

int max_depth, nr_ulongs;

874

875

if (!bqt)

875

if (!bqt)

876

return -ENXIO;

876

return -ENXIO;

877

878

/*

878

/*

879

* if we already have large enough real_max_depth. just

879

* if we already have large enough real_max_depth. just

880

* adjust max_depth. *NOTE* as requests with tag value

880

* adjust max_depth. *NOTE* as requests with tag value

881

* between new_depth and real_max_depth can be in-flight, tag

881

* between new_depth and real_max_depth can be in-flight, tag

882

* map can not be shrunk blindly here.

882

* map can not be shrunk blindly here.

883

*/

883

*/

884

if (new_depth <= bqt->real_max_depth) {

884

if (new_depth <= bqt->real_max_depth) {

885

bqt->max_depth = new_depth;

885

bqt->max_depth = new_depth;

886

return 0;

886

return 0;

887

}

887

}

888

889

/*

889

/*

890

* save the old state info, so we can copy it back

890

* save the old state info, so we can copy it back

891

*/

891

*/

892

tag_index = bqt->tag_index;

892

tag_index = bqt->tag_index;

893

tag_map = bqt->tag_map;

893

tag_map = bqt->tag_map;

894

max_depth = bqt->real_max_depth;

894

max_depth = bqt->real_max_depth;

895

896

if (init_tag_map(q, bqt, new_depth))

896

if (init_tag_map(q, bqt, new_depth))

897

return -ENOMEM;

897

return -ENOMEM;

898

899

memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));

899

memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));

900

nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;

900

nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;

901

memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));

901

memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));

902

903

kfree(tag_index);

903

kfree(tag_index);

904

kfree(tag_map);

904

kfree(tag_map);

905

return 0;

905

return 0;

906

}

906

}

907

908

EXPORT_SYMBOL(blk_queue_resize_tags);

908

EXPORT_SYMBOL(blk_queue_resize_tags);

909

910

/**

910

/**

911

* blk_queue_end_tag - end tag operations for a request

911

* blk_queue_end_tag - end tag operations for a request

912

* @q: the request queue for the device

912

* @q: the request queue for the device

913

* @rq: the request that has completed

913

* @rq: the request that has completed

914

*

914

*

915

* Description:

915

* Description:

916

* Typically called when end_that_request_first() returns 0, meaning

916

* Typically called when end_that_request_first() returns 0, meaning

917

* all transfers have been done for a request. It's important to call

917

* all transfers have been done for a request. It's important to call

918

* this function before end_that_request_last(), as that will put the

918

* this function before end_that_request_last(), as that will put the

919

* request back on the free list thus corrupting the internal tag list.

919

* request back on the free list thus corrupting the internal tag list.

920

*

920

*

921

* Notes:

921

* Notes:

922

* queue lock must be held.

922

* queue lock must be held.

923

**/

923

**/

924

void blk_queue_end_tag(request_queue_t *q, struct request *rq)

924

void blk_queue_end_tag(request_queue_t *q, struct request *rq)

925

{

925

{

926

struct blk_queue_tag *bqt = q->queue_tags;

926

struct blk_queue_tag *bqt = q->queue_tags;

927

int tag = rq->tag;

927

int tag = rq->tag;

928

929

BUG_ON(tag == -1);

929

BUG_ON(tag == -1);

930

931

if (unlikely(tag >= bqt->real_max_depth))

931

if (unlikely(tag >= bqt->real_max_depth))

932

/*

932

/*

933

* This can happen after tag depth has been reduced.

933

* This can happen after tag depth has been reduced.

934

* FIXME: how about a warning or info message here?

934

* FIXME: how about a warning or info message here?

935

*/

935

*/

936

return;

936

return;

937

938

if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {

938

if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {

939

printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",

939

printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",

940

__FUNCTION__, tag);

940

__FUNCTION__, tag);

941

return;

941

return;

942

}

942

}

943

944

list_del_init(&rq->queuelist);

944

list_del_init(&rq->queuelist);

945

rq->flags &= ~REQ_QUEUED;

945

rq->flags &= ~REQ_QUEUED;

946

rq->tag = -1;

946

rq->tag = -1;

947

948

if (unlikely(bqt->tag_index[tag] == NULL))

948

if (unlikely(bqt->tag_index[tag] == NULL))

949

printk(KERN_ERR "%s: tag %d is missing\n",

949

printk(KERN_ERR "%s: tag %d is missing\n",

950

__FUNCTION__, tag);

950

__FUNCTION__, tag);

951

952

bqt->tag_index[tag] = NULL;

952

bqt->tag_index[tag] = NULL;

953

bqt->busy--;

953

bqt->busy--;

954

}

954

}

955

956

EXPORT_SYMBOL(blk_queue_end_tag);

956

EXPORT_SYMBOL(blk_queue_end_tag);

957

958

/**

958

/**

959

* blk_queue_start_tag - find a free tag and assign it

959

* blk_queue_start_tag - find a free tag and assign it

960

* @q: the request queue for the device

960

* @q: the request queue for the device

961

* @rq: the block request that needs tagging

961

* @rq: the block request that needs tagging

962

*

962

*

963

* Description:

963

* Description:

964

* This can either be used as a stand-alone helper, or possibly be

964

* This can either be used as a stand-alone helper, or possibly be

965

* assigned as the queue &prep_rq_fn (in which case &struct request

965

* assigned as the queue &prep_rq_fn (in which case &struct request

966

* automagically gets a tag assigned). Note that this function

966

* automagically gets a tag assigned). Note that this function

967

* assumes that any type of request can be queued! if this is not

967

* assumes that any type of request can be queued! if this is not

968

* true for your device, you must check the request type before

968

* true for your device, you must check the request type before

969

* calling this function. The request will also be removed from

969

* calling this function. The request will also be removed from

970

* the request queue, so it's the drivers responsibility to readd

970

* the request queue, so it's the drivers responsibility to readd

971

* it if it should need to be restarted for some reason.

971

* it if it should need to be restarted for some reason.

972

*

972

*

973

* Notes:

973

* Notes:

974

* queue lock must be held.

974

* queue lock must be held.

975

**/

975

**/

976

int blk_queue_start_tag(request_queue_t *q, struct request *rq)

976

int blk_queue_start_tag(request_queue_t *q, struct request *rq)

977

{

977

{

978

struct blk_queue_tag *bqt = q->queue_tags;

978

struct blk_queue_tag *bqt = q->queue_tags;

979

int tag;

979

int tag;

980

981

if (unlikely((rq->flags & REQ_QUEUED))) {

981

if (unlikely((rq->flags & REQ_QUEUED))) {

982

printk(KERN_ERR

982

printk(KERN_ERR

983

"%s: request %p for device [%s] already tagged %d",

983

"%s: request %p for device [%s] already tagged %d",

984

__FUNCTION__, rq,

984

__FUNCTION__, rq,

985

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);

985

rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);

986

BUG();

986

BUG();

987

}

987

}

988

989

tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);

989

tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);

990

if (tag >= bqt->max_depth)

990

if (tag >= bqt->max_depth)

991

return 1;

991

return 1;

992

993

__set_bit(tag, bqt->tag_map);

993

__set_bit(tag, bqt->tag_map);

994

995

rq->flags |= REQ_QUEUED;

995

rq->flags |= REQ_QUEUED;

996

rq->tag = tag;

996

rq->tag = tag;

997

bqt->tag_index[tag] = rq;

997

bqt->tag_index[tag] = rq;

998

blkdev_dequeue_request(rq);

998

blkdev_dequeue_request(rq);

999

list_add(&rq->queuelist, &bqt->busy_list);

999

list_add(&rq->queuelist, &bqt->busy_list);

1000

bqt->busy++;

1000

bqt->busy++;

1001

return 0;

1001

return 0;

1002

}

1002

}

1003

1004

EXPORT_SYMBOL(blk_queue_start_tag);

1004

EXPORT_SYMBOL(blk_queue_start_tag);

1005

1006

/**

1006

/**

1007

* blk_queue_invalidate_tags - invalidate all pending tags

1007

* blk_queue_invalidate_tags - invalidate all pending tags

1008

* @q: the request queue for the device

1008

* @q: the request queue for the device

1009

*

1009

*

1010

* Description:

1010

* Description:

1011

* Hardware conditions may dictate a need to stop all pending requests.

1011

* Hardware conditions may dictate a need to stop all pending requests.

1012

* In this case, we will safely clear the block side of the tag queue and

1012

* In this case, we will safely clear the block side of the tag queue and

1013

* readd all requests to the request queue in the right order.

1013

* readd all requests to the request queue in the right order.

1014

*

1014

*

1015

* Notes:

1015

* Notes:

1016

* queue lock must be held.

1016

* queue lock must be held.

1017

**/

1017

**/

1018

void blk_queue_invalidate_tags(request_queue_t *q)

1018

void blk_queue_invalidate_tags(request_queue_t *q)

1019

{

1019

{

1020

struct blk_queue_tag *bqt = q->queue_tags;

1020

struct blk_queue_tag *bqt = q->queue_tags;

1021

struct list_head *tmp, *n;

1021

struct list_head *tmp, *n;

1022

struct request *rq;

1022

struct request *rq;

1023

1024

list_for_each_safe(tmp, n, &bqt->busy_list) {

1024

list_for_each_safe(tmp, n, &bqt->busy_list) {

1025

rq = list_entry_rq(tmp);

1025

rq = list_entry_rq(tmp);

1026

1027

if (rq->tag == -1) {

1027

if (rq->tag == -1) {

1028

printk(KERN_ERR

1028

printk(KERN_ERR

1029

"%s: bad tag found on list\n", __FUNCTION__);

1029

"%s: bad tag found on list\n", __FUNCTION__);

1030

list_del_init(&rq->queuelist);

1030

list_del_init(&rq->queuelist);

1031

rq->flags &= ~REQ_QUEUED;

1031

rq->flags &= ~REQ_QUEUED;

1032

} else

1032

} else

1033

blk_queue_end_tag(q, rq);

1033

blk_queue_end_tag(q, rq);

1034

1035

rq->flags &= ~REQ_STARTED;

1035

rq->flags &= ~REQ_STARTED;

1036

__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);

1036

__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);

1037

}

1037

}

1038

}

1038

}

1039

1040

EXPORT_SYMBOL(blk_queue_invalidate_tags);

1040

EXPORT_SYMBOL(blk_queue_invalidate_tags);

1041

1042

static char *rq_flags[] = {

1042

static char *rq_flags[] = {

1043

"REQ_RW",

1043

"REQ_RW",

1044

"REQ_FAILFAST",

1044

"REQ_FAILFAST",

1045

"REQ_SORTED",

1045

"REQ_SORTED",

1046

"REQ_SOFTBARRIER",

1046

"REQ_SOFTBARRIER",

1047

"REQ_HARDBARRIER",

1047

"REQ_HARDBARRIER",

1048

"REQ_CMD",

1048

"REQ_CMD",

1049

"REQ_NOMERGE",

1049

"REQ_NOMERGE",

1050

"REQ_STARTED",

1050

"REQ_STARTED",

1051

"REQ_DONTPREP",

1051

"REQ_DONTPREP",

1052

"REQ_QUEUED",

1052

"REQ_QUEUED",

1053

"REQ_ELVPRIV",

1053

"REQ_ELVPRIV",

1054

"REQ_PC",

1054

"REQ_PC",

1055

"REQ_BLOCK_PC",

1055

"REQ_BLOCK_PC",

1056

"REQ_SENSE",

1056

"REQ_SENSE",

1057

"REQ_FAILED",

1057

"REQ_FAILED",

1058

"REQ_QUIET",

1058

"REQ_QUIET",

1059

"REQ_SPECIAL",

1059

"REQ_SPECIAL",

1060

"REQ_DRIVE_CMD",

1060

"REQ_DRIVE_CMD",

1061

"REQ_DRIVE_TASK",

1061

"REQ_DRIVE_TASK",

1062

"REQ_DRIVE_TASKFILE",

1062

"REQ_DRIVE_TASKFILE",

1063

"REQ_PREEMPT",

1063

"REQ_PREEMPT",

1064

"REQ_PM_SUSPEND",

1064

"REQ_PM_SUSPEND",

1065

"REQ_PM_RESUME",

1065

"REQ_PM_RESUME",

1066

"REQ_PM_SHUTDOWN",

1066

"REQ_PM_SHUTDOWN",

1067

};

1067

};

1068

1069

void blk_dump_rq_flags(struct request *rq, char *msg)

1069

void blk_dump_rq_flags(struct request *rq, char *msg)

1070

{

1070

{

1071

int bit;

1071

int bit;

1072

1073

printk("%s: dev %s: flags = ", msg,

1073

printk("%s: dev %s: flags = ", msg,

1074

rq->rq_disk ? rq->rq_disk->disk_name : "?");

1074

rq->rq_disk ? rq->rq_disk->disk_name : "?");

1075

bit = 0;

1075

bit = 0;

1076

do {

1076

do {

1077

if (rq->flags & (1 << bit))

1077

if (rq->flags & (1 << bit))

1078

printk("%s ", rq_flags[bit]);

1078

printk("%s ", rq_flags[bit]);

1079

bit++;

1079

bit++;

1080

} while (bit < __REQ_NR_BITS);

1080

} while (bit < __REQ_NR_BITS);

1081

1082

printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,

1082

printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,

1083

rq->nr_sectors,

1083

rq->nr_sectors,

1084

rq->current_nr_sectors);

1084

rq->current_nr_sectors);

1085

printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);

1085

printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);

1086

1087

if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {

1087

if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {

1088

printk("cdb: ");

1088

printk("cdb: ");

1089

for (bit = 0; bit < sizeof(rq->cmd); bit++)

1089

for (bit = 0; bit < sizeof(rq->cmd); bit++)

1090

printk("%02x ", rq->cmd[bit]);

1090

printk("%02x ", rq->cmd[bit]);

1091

printk("\n");

1091

printk("\n");

1092

}

1092

}

1093

}

1093

}

1094

1095

EXPORT_SYMBOL(blk_dump_rq_flags);

1095

EXPORT_SYMBOL(blk_dump_rq_flags);

1096

1097

void blk_recount_segments(request_queue_t *q, struct bio *bio)

1097

void blk_recount_segments(request_queue_t *q, struct bio *bio)

1098

{

1098

{

1099

struct bio_vec *bv, *bvprv = NULL;

1099

struct bio_vec *bv, *bvprv = NULL;

1100

int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;

1100

int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;

1101

int high, highprv = 1;

1101

int high, highprv = 1;

1102

1103

if (unlikely(!bio->bi_io_vec))

1103

if (unlikely(!bio->bi_io_vec))

1104

return;

1104

return;

1105

1106

cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);

1106

cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);

1107

hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;

1107

hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;

1108

bio_for_each_segment(bv, bio, i) {

1108

bio_for_each_segment(bv, bio, i) {

1109

/*

1109

/*

1110

* the trick here is making sure that a high page is never

1110

* the trick here is making sure that a high page is never

1111

* considered part of another segment, since that might

1111

* considered part of another segment, since that might

1112

* change with the bounce page.

1112

* change with the bounce page.

1113

*/

1113

*/

1114

high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;

1114

high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;

1115

if (high || highprv)

1115

if (high || highprv)

1116

goto new_hw_segment;

1116

goto new_hw_segment;

1117

if (cluster) {

1117

if (cluster) {

1118

if (seg_size + bv->bv_len > q->max_segment_size)

1118

if (seg_size + bv->bv_len > q->max_segment_size)

1119

goto new_segment;

1119

goto new_segment;

1120

if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))

1120

if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))

1121

goto new_segment;

1121

goto new_segment;

1122

if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))

1122

if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))

1123

goto new_segment;

1123

goto new_segment;

1124

if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))

1124

if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))

1125

goto new_hw_segment;

1125

goto new_hw_segment;

1126

1127

seg_size += bv->bv_len;

1127

seg_size += bv->bv_len;

1128

hw_seg_size += bv->bv_len;

1128

hw_seg_size += bv->bv_len;

1129

bvprv = bv;

1129

bvprv = bv;

1130

continue;

1130

continue;

1131

}

1131

}

1132

new_segment:

1132

new_segment:

1133

if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&

1133

if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&

1134

!BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {

1134

!BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {

1135

hw_seg_size += bv->bv_len;

1135

hw_seg_size += bv->bv_len;

1136

} else {

1136

} else {

1137

new_hw_segment:

1137

new_hw_segment:

1138

if (hw_seg_size > bio->bi_hw_front_size)

1138

if (hw_seg_size > bio->bi_hw_front_size)

1139

bio->bi_hw_front_size = hw_seg_size;

1139

bio->bi_hw_front_size = hw_seg_size;

1140

hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;

1140

hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;

1141

nr_hw_segs++;

1141

nr_hw_segs++;

1142

}

1142

}

1143

1144

nr_phys_segs++;

1144

nr_phys_segs++;

1145

bvprv = bv;

1145

bvprv = bv;

1146

seg_size = bv->bv_len;

1146

seg_size = bv->bv_len;

1147

highprv = high;

1147

highprv = high;

1148

}

1148

}

1149

if (hw_seg_size > bio->bi_hw_back_size)

1149

if (hw_seg_size > bio->bi_hw_back_size)

1150

bio->bi_hw_back_size = hw_seg_size;

1150

bio->bi_hw_back_size = hw_seg_size;

1151

if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)

1151

if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)

1152

bio->bi_hw_front_size = hw_seg_size;

1152

bio->bi_hw_front_size = hw_seg_size;

1153

bio->bi_phys_segments = nr_phys_segs;

1153

bio->bi_phys_segments = nr_phys_segs;

1154

bio->bi_hw_segments = nr_hw_segs;

1154

bio->bi_hw_segments = nr_hw_segs;

1155

bio->bi_flags |= (1 << BIO_SEG_VALID);

1155

bio->bi_flags |= (1 << BIO_SEG_VALID);

1156

}

1156

}

1157

1158

1159

static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,

1159

static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,

1160

struct bio *nxt)

1160

struct bio *nxt)

1161

{

1161

{

1162

if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))

1162

if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))

1163

return 0;

1163

return 0;

1164

1165

if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))

1165

if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))

1166

return 0;

1166

return 0;

1167

if (bio->bi_size + nxt->bi_size > q->max_segment_size)

1167

if (bio->bi_size + nxt->bi_size > q->max_segment_size)

1168

return 0;

1168

return 0;

1169

1170

/*

1170

/*

1171

* bio and nxt are contigous in memory, check if the queue allows

1171

* bio and nxt are contigous in memory, check if the queue allows

1172

* these two to be merged into one

1172

* these two to be merged into one

1173

*/

1173

*/

1174

if (BIO_SEG_BOUNDARY(q, bio, nxt))

1174

if (BIO_SEG_BOUNDARY(q, bio, nxt))

1175

return 1;

1175

return 1;

1176

1177

return 0;

1177

return 0;

1178

}

1178

}

1179

1180

static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,

1180

static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,

1181

struct bio *nxt)

1181

struct bio *nxt)

1182

{

1182

{

1183

if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))

1183

if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))

1184

blk_recount_segments(q, bio);

1184

blk_recount_segments(q, bio);

1185

if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))

1185

if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))

1186

blk_recount_segments(q, nxt);

1186

blk_recount_segments(q, nxt);

1187

if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||

1187

if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||

1188

BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))

1188

BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))

1189

return 0;

1189

return 0;

1190

if (bio->bi_size + nxt->bi_size > q->max_segment_size)

1190

if (bio->bi_size + nxt->bi_size > q->max_segment_size)

1191

return 0;

1191

return 0;

1192

1193

return 1;

1193

return 1;

1194

}

1194

}

1195

1196

/*

1196

/*

1197

* map a request to scatterlist, return number of sg entries setup. Caller

1197

* map a request to scatterlist, return number of sg entries setup. Caller

1198

* must make sure sg can hold rq->nr_phys_segments entries

1198

* must make sure sg can hold rq->nr_phys_segments entries

1199

*/

1199

*/

1200

int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)

1200

int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)

1201

{

1201

{

1202

struct bio_vec *bvec, *bvprv;

1202

struct bio_vec *bvec, *bvprv;

1203

struct bio *bio;

1203

struct bio *bio;

1204

int nsegs, i, cluster;

1204

int nsegs, i, cluster;

1205

1206

nsegs = 0;

1206

nsegs = 0;

1207

cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);

1207

cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);

1208

1209

/*

1209

/*

1210

* for each bio in rq

1210

* for each bio in rq

1211

*/

1211

*/

1212

bvprv = NULL;

1212

bvprv = NULL;

1213

rq_for_each_bio(bio, rq) {

1213

rq_for_each_bio(bio, rq) {

1214

/*

1214

/*

1215

* for each segment in bio

1215

* for each segment in bio

1216

*/

1216

*/

1217

bio_for_each_segment(bvec, bio, i) {

1217

bio_for_each_segment(bvec, bio, i) {

1218

int nbytes = bvec->bv_len;

1218

int nbytes = bvec->bv_len;

1219

1220

if (bvprv && cluster) {

1220

if (bvprv && cluster) {

1221

if (sg[nsegs - 1].length + nbytes > q->max_segment_size)

1221

if (sg[nsegs - 1].length + nbytes > q->max_segment_size)

1222

goto new_segment;

1222

goto new_segment;

1223

1224

if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))

1224

if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))

1225

goto new_segment;

1225

goto new_segment;

1226

if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))

1226

if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))

1227

goto new_segment;

1227

goto new_segment;

1228

1229

sg[nsegs - 1].length += nbytes;

1229

sg[nsegs - 1].length += nbytes;

1230

} else {

1230

} else {

1231

new_segment:

1231

new_segment:

1232

memset(&sg[nsegs],0,sizeof(struct scatterlist));

1232

memset(&sg[nsegs],0,sizeof(struct scatterlist));

1233

sg[nsegs].page = bvec->bv_page;

1233

sg[nsegs].page = bvec->bv_page;

1234

sg[nsegs].length = nbytes;

1234

sg[nsegs].length = nbytes;

1235

sg[nsegs].offset = bvec->bv_offset;

1235

sg[nsegs].offset = bvec->bv_offset;

1236

1237

nsegs++;

1237

nsegs++;

1238

}

1238

}

1239

bvprv = bvec;

1239

bvprv = bvec;

1240

} /* segments in bio */

1240

} /* segments in bio */

1241

} /* bios in rq */

1241

} /* bios in rq */

1242

1243

return nsegs;

1243

return nsegs;

1244

}

1244

}

1245

1246

EXPORT_SYMBOL(blk_rq_map_sg);

1246

EXPORT_SYMBOL(blk_rq_map_sg);

1247

1248

/*

1248

/*

1249

* the standard queue merge functions, can be overridden with device

1249

* the standard queue merge functions, can be overridden with device

1250

* specific ones if so desired

1250

* specific ones if so desired

1251

*/

1251

*/

1252

1253

static inline int ll_new_mergeable(request_queue_t *q,

1253

static inline int ll_new_mergeable(request_queue_t *q,

1254

struct request *req,

1254

struct request *req,

1255

struct bio *bio)

1255

struct bio *bio)

1256

{

1256

{

1257

int nr_phys_segs = bio_phys_segments(q, bio);

1257

int nr_phys_segs = bio_phys_segments(q, bio);

1258

1259

if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {

1259

if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {

1260

req->flags |= REQ_NOMERGE;

1260

req->flags |= REQ_NOMERGE;

1261

if (req == q->last_merge)

1261

if (req == q->last_merge)

1262

q->last_merge = NULL;

1262

q->last_merge = NULL;

1263

return 0;

1263

return 0;

1264

}

1264

}

1265

1266

/*

1266

/*

1267

* A hw segment is just getting larger, bump just the phys

1267

* A hw segment is just getting larger, bump just the phys

1268

* counter.

1268

* counter.

1269

*/

1269

*/

1270

req->nr_phys_segments += nr_phys_segs;

1270

req->nr_phys_segments += nr_phys_segs;

1271

return 1;

1271

return 1;

1272

}

1272

}

1273

1274

static inline int ll_new_hw_segment(request_queue_t *q,

1274

static inline int ll_new_hw_segment(request_queue_t *q,

1275

struct request *req,

1275

struct request *req,

1276

struct bio *bio)

1276

struct bio *bio)

1277

{

1277

{

1278

int nr_hw_segs = bio_hw_segments(q, bio);

1278

int nr_hw_segs = bio_hw_segments(q, bio);

1279

int nr_phys_segs = bio_phys_segments(q, bio);

1279

int nr_phys_segs = bio_phys_segments(q, bio);

1280

1281

if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments

1281

if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments

1282

|| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {

1282

|| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {

1283

req->flags |= REQ_NOMERGE;

1283

req->flags |= REQ_NOMERGE;

1284

if (req == q->last_merge)

1284

if (req == q->last_merge)

1285

q->last_merge = NULL;

1285

q->last_merge = NULL;

1286

return 0;

1286

return 0;

1287

}

1287

}

1288

1289

/*

1289

/*

1290

* This will form the start of a new hw segment. Bump both

1290

* This will form the start of a new hw segment. Bump both

1291

* counters.

1291

* counters.

1292

*/

1292

*/

1293

req->nr_hw_segments += nr_hw_segs;

1293

req->nr_hw_segments += nr_hw_segs;

1294

req->nr_phys_segments += nr_phys_segs;

1294

req->nr_phys_segments += nr_phys_segs;

1295

return 1;

1295

return 1;

1296

}

1296

}

1297

1298

static int ll_back_merge_fn(request_queue_t *q, struct request *req,

1298

static int ll_back_merge_fn(request_queue_t *q, struct request *req,

1299

struct bio *bio)

1299

struct bio *bio)

1300

{

1300

{

1301

unsigned short max_sectors;

1301

unsigned short max_sectors;

1302

int len;

1302

int len;

1303

1304

if (unlikely(blk_pc_request(req)))

1304

if (unlikely(blk_pc_request(req)))

1305

max_sectors = q->max_hw_sectors;

1305

max_sectors = q->max_hw_sectors;

1306

else

1306

else

1307

max_sectors = q->max_sectors;

1307

max_sectors = q->max_sectors;

1308

1309

if (req->nr_sectors + bio_sectors(bio) > max_sectors) {

1309

if (req->nr_sectors + bio_sectors(bio) > max_sectors) {

1310

req->flags |= REQ_NOMERGE;

1310

req->flags |= REQ_NOMERGE;

1311

if (req == q->last_merge)

1311

if (req == q->last_merge)

1312

q->last_merge = NULL;

1312

q->last_merge = NULL;

1313

return 0;

1313

return 0;

1314

}

1314

}

1315

if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))

1315

if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))

1316

blk_recount_segments(q, req->biotail);

1316

blk_recount_segments(q, req->biotail);

1317

if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))

1317

if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))

1318

blk_recount_segments(q, bio);

1318

blk_recount_segments(q, bio);

1319

len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;

1319

len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;

1320

if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&

1320

if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&

1321

!BIOVEC_VIRT_OVERSIZE(len)) {

1321

!BIOVEC_VIRT_OVERSIZE(len)) {

1322

int mergeable = ll_new_mergeable(q, req, bio);

1322

int mergeable = ll_new_mergeable(q, req, bio);

1323

1324

if (mergeable) {

1324

if (mergeable) {

1325

if (req->nr_hw_segments == 1)

1325

if (req->nr_hw_segments == 1)

1326

req->bio->bi_hw_front_size = len;

1326

req->bio->bi_hw_front_size = len;

1327

if (bio->bi_hw_segments == 1)

1327

if (bio->bi_hw_segments == 1)

1328

bio->bi_hw_back_size = len;

1328

bio->bi_hw_back_size = len;

1329

}

1329

}

1330

return mergeable;

1330

return mergeable;

1331

}

1331

}

1332

1333

return ll_new_hw_segment(q, req, bio);

1333

return ll_new_hw_segment(q, req, bio);

1334

}

1334

}

1335

1336

static int ll_front_merge_fn(request_queue_t *q, struct request *req,

1336

static int ll_front_merge_fn(request_queue_t *q, struct request *req,

1337

struct bio *bio)

1337

struct bio *bio)

1338

{

1338

{

1339

unsigned short max_sectors;

1339

unsigned short max_sectors;

1340

int len;

1340

int len;

1341

1342

if (unlikely(blk_pc_request(req)))

1342

if (unlikely(blk_pc_request(req)))

1343

max_sectors = q->max_hw_sectors;

1343

max_sectors = q->max_hw_sectors;

1344

else

1344

else

1345

max_sectors = q->max_sectors;

1345

max_sectors = q->max_sectors;

1346

1347

1348

if (req->nr_sectors + bio_sectors(bio) > max_sectors) {

1348

if (req->nr_sectors + bio_sectors(bio) > max_sectors) {

1349

req->flags |= REQ_NOMERGE;

1349

req->flags |= REQ_NOMERGE;

1350

if (req == q->last_merge)

1350

if (req == q->last_merge)

1351

q->last_merge = NULL;

1351

q->last_merge = NULL;

1352

return 0;

1352

return 0;

1353

}

1353

}

1354

len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;

1354

len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;

1355

if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))

1355

if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))

1356

blk_recount_segments(q, bio);

1356

blk_recount_segments(q, bio);

1357

if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))

1357

if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))

1358

blk_recount_segments(q, req->bio);

1358

blk_recount_segments(q, req->bio);

1359

if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&

1359

if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&

1360

!BIOVEC_VIRT_OVERSIZE(len)) {

1360

!BIOVEC_VIRT_OVERSIZE(len)) {

1361

int mergeable = ll_new_mergeable(q, req, bio);

1361

int mergeable = ll_new_mergeable(q, req, bio);

1362

1363

if (mergeable) {

1363

if (mergeable) {

1364

if (bio->bi_hw_segments == 1)

1364

if (bio->bi_hw_segments == 1)

1365

bio->bi_hw_front_size = len;

1365

bio->bi_hw_front_size = len;

1366

if (req->nr_hw_segments == 1)

1366

if (req->nr_hw_segments == 1)

1367

req->biotail->bi_hw_back_size = len;

1367

req->biotail->bi_hw_back_size = len;

1368

}

1368

}

1369

return mergeable;

1369

return mergeable;

1370

}

1370

}

1371

1372

return ll_new_hw_segment(q, req, bio);

1372

return ll_new_hw_segment(q, req, bio);

1373

}

1373

}

1374

1375

static int ll_merge_requests_fn(request_queue_t *q, struct request *req,

1375

static int ll_merge_requests_fn(request_queue_t *q, struct request *req,

1376

struct request *next)

1376

struct request *next)

1377

{

1377

{

1378

int total_phys_segments;

1378

int total_phys_segments;

1379

int total_hw_segments;

1379

int total_hw_segments;

1380

1381

/*

1381

/*

1382

* First check if the either of the requests are re-queued

1382

* First check if the either of the requests are re-queued

1383

* requests. Can't merge them if they are.

1383

* requests. Can't merge them if they are.

1384

*/

1384

*/

1385

if (req->special || next->special)

1385

if (req->special || next->special)

1386

return 0;

1386

return 0;

1387

1388

/*

1388

/*

1389

* Will it become too large?

1389

* Will it become too large?

1390

*/

1390

*/

1391

if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)

1391

if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)

1392

return 0;

1392

return 0;

1393

1394

total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;

1394

total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;

1395

if (blk_phys_contig_segment(q, req->biotail, next->bio))

1395

if (blk_phys_contig_segment(q, req->biotail, next->bio))

1396

total_phys_segments--;

1396

total_phys_segments--;

1397

1398

if (total_phys_segments > q->max_phys_segments)

1398

if (total_phys_segments > q->max_phys_segments)

1399

return 0;

1399

return 0;

1400

1401

total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;

1401

total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;

1402

if (blk_hw_contig_segment(q, req->biotail, next->bio)) {

1402

if (blk_hw_contig_segment(q, req->biotail, next->bio)) {

1403

int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;

1403

int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;

1404

/*

1404

/*

1405

* propagate the combined length to the end of the requests

1405

* propagate the combined length to the end of the requests

1406

*/

1406

*/

1407

if (req->nr_hw_segments == 1)

1407

if (req->nr_hw_segments == 1)

1408

req->bio->bi_hw_front_size = len;

1408

req->bio->bi_hw_front_size = len;

1409

if (next->nr_hw_segments == 1)

1409

if (next->nr_hw_segments == 1)

1410

next->biotail->bi_hw_back_size = len;

1410

next->biotail->bi_hw_back_size = len;

1411

total_hw_segments--;

1411

total_hw_segments--;

1412

}

1412

}

1413

1414

if (total_hw_segments > q->max_hw_segments)

1414

if (total_hw_segments > q->max_hw_segments)

1415

return 0;

1415

return 0;

1416

1417

/* Merge is OK... */

1417

/* Merge is OK... */

1418

req->nr_phys_segments = total_phys_segments;

1418

req->nr_phys_segments = total_phys_segments;

1419

req->nr_hw_segments = total_hw_segments;

1419

req->nr_hw_segments = total_hw_segments;

1420

return 1;

1420

return 1;

1421

}

1421

}

1422

1423

/*

1423

/*

1424

* "plug" the device if there are no outstanding requests: this will

1424

* "plug" the device if there are no outstanding requests: this will

1425

* force the transfer to start only after we have put all the requests

1425

* force the transfer to start only after we have put all the requests

1426

* on the list.

1426

* on the list.

1427

*

1427

*

1428

* This is called with interrupts off and no requests on the queue and

1428

* This is called with interrupts off and no requests on the queue and

1429

* with the queue lock held.

1429

* with the queue lock held.

1430

*/

1430

*/

1431

void blk_plug_device(request_queue_t *q)

1431

void blk_plug_device(request_queue_t *q)

1432

{

1432

{

1433

WARN_ON(!irqs_disabled());

1433

WARN_ON(!irqs_disabled());

1434

1435

/*

1435

/*

1436

* don't plug a stopped queue, it must be paired with blk_start_queue()

1436

* don't plug a stopped queue, it must be paired with blk_start_queue()

1437

* which will restart the queueing

1437

* which will restart the queueing

1438

*/

1438

*/

1439

if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))

1439

if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))

1440

return;

1440

return;

1441

1442

if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

1442

if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

1443

mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

1443

mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

1444

}

1444

}

1445

1446

EXPORT_SYMBOL(blk_plug_device);

1446

EXPORT_SYMBOL(blk_plug_device);

1447

1448

/*

1448

/*

1449

* remove the queue from the plugged list, if present. called with

1449

* remove the queue from the plugged list, if present. called with

1450

* queue lock held and interrupts disabled.

1450

* queue lock held and interrupts disabled.

1451

*/

1451

*/

1452

int blk_remove_plug(request_queue_t *q)

1452

int blk_remove_plug(request_queue_t *q)

1453

{

1453

{

1454

WARN_ON(!irqs_disabled());

1454

WARN_ON(!irqs_disabled());

1455

1456

if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

1456

if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

1457

return 0;

1457

return 0;

1458

1459

del_timer(&q->unplug_timer);

1459

del_timer(&q->unplug_timer);

1460

return 1;

1460

return 1;

1461

}

1461

}

1462

1463

EXPORT_SYMBOL(blk_remove_plug);

1463

EXPORT_SYMBOL(blk_remove_plug);

1464

1465

/*

1465

/*

1466

* remove the plug and let it rip..

1466

* remove the plug and let it rip..

1467

*/

1467

*/

1468

void __generic_unplug_device(request_queue_t *q)

1468

void __generic_unplug_device(request_queue_t *q)

1469

{

1469

{

1470

if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))

1470

if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))

1471

return;

1471

return;

1472

1473

if (!blk_remove_plug(q))

1473

if (!blk_remove_plug(q))

1474

return;

1474

return;

1475

1476

q->request_fn(q);

1476

q->request_fn(q);

1477

}

1477

}

1478

EXPORT_SYMBOL(__generic_unplug_device);

1478

EXPORT_SYMBOL(__generic_unplug_device);

1479

1480

/**

1480

/**

1481

* generic_unplug_device - fire a request queue

1481

* generic_unplug_device - fire a request queue

1482

* @q: The &request_queue_t in question

1482

* @q: The &request_queue_t in question

1483

*

1483

*

1484

* Description:

1484

* Description:

1485

* Linux uses plugging to build bigger requests queues before letting

1485

* Linux uses plugging to build bigger requests queues before letting

1486

* the device have at them. If a queue is plugged, the I/O scheduler

1486

* the device have at them. If a queue is plugged, the I/O scheduler

1487

* is still adding and merging requests on the queue. Once the queue

1487

* is still adding and merging requests on the queue. Once the queue

1488

* gets unplugged, the request_fn defined for the queue is invoked and

1488

* gets unplugged, the request_fn defined for the queue is invoked and

1489

* transfers started.

1489

* transfers started.

1490

**/

1490

**/

1491

void generic_unplug_device(request_queue_t *q)

1491

void generic_unplug_device(request_queue_t *q)

1492

{

1492

{

1493

spin_lock_irq(q->queue_lock);

1493

spin_lock_irq(q->queue_lock);

1494

__generic_unplug_device(q);

1494

__generic_unplug_device(q);

1495

spin_unlock_irq(q->queue_lock);

1495

spin_unlock_irq(q->queue_lock);

1496

}

1496

}

1497

EXPORT_SYMBOL(generic_unplug_device);

1497

EXPORT_SYMBOL(generic_unplug_device);

1498

1499

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,

1499

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,

1500

struct page *page)

1500

struct page *page)

1501

{

1501

{

1502

request_queue_t *q = bdi->unplug_io_data;

1502

request_queue_t *q = bdi->unplug_io_data;

1503

1504

/*

1504

/*

1505

* devices don't necessarily have an ->unplug_fn defined

1505

* devices don't necessarily have an ->unplug_fn defined

1506

*/

1506

*/

1507

if (q->unplug_fn)

1507

if (q->unplug_fn)

1508

q->unplug_fn(q);

1508

q->unplug_fn(q);

1509

}

1509

}

1510

1511

static void blk_unplug_work(void *data)

1511

static void blk_unplug_work(void *data)

1512

{

1512

{

1513

request_queue_t *q = data;

1513

request_queue_t *q = data;

1514

1515

q->unplug_fn(q);

1515

q->unplug_fn(q);

1516

}

1516

}

1517

1518

static void blk_unplug_timeout(unsigned long data)

1518

static void blk_unplug_timeout(unsigned long data)

1519

{

1519

{

1520

request_queue_t *q = (request_queue_t *)data;

1520

request_queue_t *q = (request_queue_t *)data;

1521

1522

kblockd_schedule_work(&q->unplug_work);

1522

kblockd_schedule_work(&q->unplug_work);

1523

}

1523

}

1524

1525

/**

1525

/**

1526

* blk_start_queue - restart a previously stopped queue

1526

* blk_start_queue - restart a previously stopped queue

1527

* @q: The &request_queue_t in question

1527

* @q: The &request_queue_t in question

1528

*

1528

*

1529

* Description:

1529

* Description:

1530

* blk_start_queue() will clear the stop flag on the queue, and call

1530

* blk_start_queue() will clear the stop flag on the queue, and call

1531

* the request_fn for the queue if it was in a stopped state when

1531

* the request_fn for the queue if it was in a stopped state when

1532

* entered. Also see blk_stop_queue(). Queue lock must be held.

1532

* entered. Also see blk_stop_queue(). Queue lock must be held.

1533

**/

1533

**/

1534

void blk_start_queue(request_queue_t *q)

1534

void blk_start_queue(request_queue_t *q)

1535

{

1535

{

1536

clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);

1536

clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);

1537

1538

/*

1538

/*

1539

* one level of recursion is ok and is much faster than kicking

1539

* one level of recursion is ok and is much faster than kicking

1540

* the unplug handling

1540

* the unplug handling

1541

*/

1541

*/

1542

if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {

1542

if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {

1543

q->request_fn(q);

1543

q->request_fn(q);

1544

clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);

1544

clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);

1545

} else {

1545

} else {

1546

blk_plug_device(q);

1546

blk_plug_device(q);

1547

kblockd_schedule_work(&q->unplug_work);

1547

kblockd_schedule_work(&q->unplug_work);

1548

}

1548

}

1549

}

1549

}

1550

1551

EXPORT_SYMBOL(blk_start_queue);

1551

EXPORT_SYMBOL(blk_start_queue);

1552

1553

/**

1553

/**

1554

* blk_stop_queue - stop a queue

1554

* blk_stop_queue - stop a queue

1555

* @q: The &request_queue_t in question

1555

* @q: The &request_queue_t in question

1556

*

1556

*

1557

* Description:

1557

* Description:

1558

* The Linux block layer assumes that a block driver will consume all

1558

* The Linux block layer assumes that a block driver will consume all

1559

* entries on the request queue when the request_fn strategy is called.

1559

* entries on the request queue when the request_fn strategy is called.

1560

* Often this will not happen, because of hardware limitations (queue

1560

* Often this will not happen, because of hardware limitations (queue

1561

* depth settings). If a device driver gets a 'queue full' response,

1561

* depth settings). If a device driver gets a 'queue full' response,

1562

* or if it simply chooses not to queue more I/O at one point, it can

1562

* or if it simply chooses not to queue more I/O at one point, it can

1563

* call this function to prevent the request_fn from being called until

1563

* call this function to prevent the request_fn from being called until

1564

* the driver has signalled it's ready to go again. This happens by calling

1564

* the driver has signalled it's ready to go again. This happens by calling

1565

* blk_start_queue() to restart queue operations. Queue lock must be held.

1565

* blk_start_queue() to restart queue operations. Queue lock must be held.

1566

**/

1566

**/

1567

void blk_stop_queue(request_queue_t *q)

1567

void blk_stop_queue(request_queue_t *q)

1568

{

1568

{

1569

blk_remove_plug(q);

1569

blk_remove_plug(q);

1570

set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);

1570

set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);

1571

}

1571

}

1572

EXPORT_SYMBOL(blk_stop_queue);

1572

EXPORT_SYMBOL(blk_stop_queue);

1573

1574

/**

1574

/**

1575

* blk_sync_queue - cancel any pending callbacks on a queue

1575

* blk_sync_queue - cancel any pending callbacks on a queue

1576

* @q: the queue

1576

* @q: the queue

1577

*

1577

*

1578

* Description:

1578

* Description:

1579

* The block layer may perform asynchronous callback activity

1579

* The block layer may perform asynchronous callback activity

1580

* on a queue, such as calling the unplug function after a timeout.

1580

* on a queue, such as calling the unplug function after a timeout.

1581

* A block device may call blk_sync_queue to ensure that any

1581

* A block device may call blk_sync_queue to ensure that any

1582

* such activity is cancelled, thus allowing it to release resources

1582

* such activity is cancelled, thus allowing it to release resources

1583

* the the callbacks might use. The caller must already have made sure

1583

* the the callbacks might use. The caller must already have made sure

1584

* that its ->make_request_fn will not re-add plugging prior to calling

1584

* that its ->make_request_fn will not re-add plugging prior to calling

1585

* this function.

1585

* this function.

1586

*

1586

*

1587

*/

1587

*/

1588

void blk_sync_queue(struct request_queue *q)

1588

void blk_sync_queue(struct request_queue *q)

1589

{

1589

{

1590

del_timer_sync(&q->unplug_timer);

1590

del_timer_sync(&q->unplug_timer);

1591

kblockd_flush();

1591

kblockd_flush();

1592

}

1592

}

1593

EXPORT_SYMBOL(blk_sync_queue);

1593

EXPORT_SYMBOL(blk_sync_queue);

1594

1595

/**

1595

/**

1596

* blk_run_queue - run a single device queue

1596

* blk_run_queue - run a single device queue

1597

* @q: The queue to run

1597

* @q: The queue to run

1598

*/

1598

*/

1599

void blk_run_queue(struct request_queue *q)

1599

void blk_run_queue(struct request_queue *q)

1600

{

1600

{

1601

unsigned long flags;

1601

unsigned long flags;

1602

1603

spin_lock_irqsave(q->queue_lock, flags);

1603

spin_lock_irqsave(q->queue_lock, flags);

1604

blk_remove_plug(q);

1604

blk_remove_plug(q);

1605

if (!elv_queue_empty(q))

1605

if (!elv_queue_empty(q))

1606

q->request_fn(q);

1606

q->request_fn(q);

1607

spin_unlock_irqrestore(q->queue_lock, flags);

1607

spin_unlock_irqrestore(q->queue_lock, flags);

1608

}

1608

}

1609

EXPORT_SYMBOL(blk_run_queue);

1609

EXPORT_SYMBOL(blk_run_queue);

1610

1611

/**

1611

/**

1612

* blk_cleanup_queue: - release a &request_queue_t when it is no longer needed

1612

* blk_cleanup_queue: - release a &request_queue_t when it is no longer needed

1613

* @q: the request queue to be released

1613

* @q: the request queue to be released

1614

*

1614

*

1615

* Description:

1615

* Description:

1616

* blk_cleanup_queue is the pair to blk_init_queue() or

1616

* blk_cleanup_queue is the pair to blk_init_queue() or

1617

* blk_queue_make_request(). It should be called when a request queue is

1617

* blk_queue_make_request(). It should be called when a request queue is

1618

* being released; typically when a block device is being de-registered.

1618

* being released; typically when a block device is being de-registered.

1619

* Currently, its primary task it to free all the &struct request

1619

* Currently, its primary task it to free all the &struct request

1620

* structures that were allocated to the queue and the queue itself.

1620

* structures that were allocated to the queue and the queue itself.

1621

*

1621

*

1622

* Caveat:

1622

* Caveat:

1623

* Hopefully the low level driver will have finished any

1623

* Hopefully the low level driver will have finished any

1624

* outstanding requests first...

1624

* outstanding requests first...

1625

**/

1625

**/

1626

void blk_cleanup_queue(request_queue_t * q)

1626

void blk_cleanup_queue(request_queue_t * q)

1627

{

1627

{

1628

struct request_list *rl = &q->rq;

1628

struct request_list *rl = &q->rq;

1629

1630

if (!atomic_dec_and_test(&q->refcnt))

1630

if (!atomic_dec_and_test(&q->refcnt))

1631

return;

1631

return;

1632

1633

if (q->elevator)

1633

if (q->elevator)

1634

elevator_exit(q->elevator);

1634

elevator_exit(q->elevator);

1635

1636

blk_sync_queue(q);

1636

blk_sync_queue(q);

1637

1638

if (rl->rq_pool)

1638

if (rl->rq_pool)

1639

mempool_destroy(rl->rq_pool);

1639

mempool_destroy(rl->rq_pool);

1640

1641

if (q->queue_tags)

1641

if (q->queue_tags)

1642

__blk_queue_free_tags(q);

1642

__blk_queue_free_tags(q);

1643

1644

blk_queue_ordered(q, QUEUE_ORDERED_NONE);

1644

blk_queue_ordered(q, QUEUE_ORDERED_NONE);

1645

1646

kmem_cache_free(requestq_cachep, q);

1646

kmem_cache_free(requestq_cachep, q);

1647

}

1647

}

1648

1649

EXPORT_SYMBOL(blk_cleanup_queue);

1649

EXPORT_SYMBOL(blk_cleanup_queue);

1650

1651

static int blk_init_free_list(request_queue_t *q)

1651

static int blk_init_free_list(request_queue_t *q)

1652

{

1652

{

1653

struct request_list *rl = &q->rq;

1653

struct request_list *rl = &q->rq;

1654

1655

rl->count[READ] = rl->count[WRITE] = 0;

1655

rl->count[READ] = rl->count[WRITE] = 0;

1656

rl->starved[READ] = rl->starved[WRITE] = 0;

1656

rl->starved[READ] = rl->starved[WRITE] = 0;

1657

rl->elvpriv = 0;

1657

rl->elvpriv = 0;

1658

init_waitqueue_head(&rl->wait[READ]);

1658

init_waitqueue_head(&rl->wait[READ]);

1659

init_waitqueue_head(&rl->wait[WRITE]);

1659

init_waitqueue_head(&rl->wait[WRITE]);

1660

1661

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

1661

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,

1662

mempool_free_slab, request_cachep, q->node);

1662

mempool_free_slab, request_cachep, q->node);

1663

1664

if (!rl->rq_pool)

1664

if (!rl->rq_pool)

1665

return -ENOMEM;

1665

return -ENOMEM;

1666

1667

return 0;

1667

return 0;

1668

}

1668

}

1669

1670

static int __make_request(request_queue_t *, struct bio *);

1670

static int __make_request(request_queue_t *, struct bio *);

1671

1672

request_queue_t *blk_alloc_queue(gfp_t gfp_mask)

1672

request_queue_t *blk_alloc_queue(gfp_t gfp_mask)

1673

{

1673

{

1674

return blk_alloc_queue_node(gfp_mask, -1);

1674

return blk_alloc_queue_node(gfp_mask, -1);

1675

}

1675

}

1676

EXPORT_SYMBOL(blk_alloc_queue);

1676

EXPORT_SYMBOL(blk_alloc_queue);

1677

1678

request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

1678

request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

1679

{

1679

{

1680

request_queue_t *q;

1680

request_queue_t *q;

1681

1682

q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);

1682

q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);

1683

if (!q)

1683

if (!q)

1684

return NULL;

1684

return NULL;

1685

1686

memset(q, 0, sizeof(*q));

1686

memset(q, 0, sizeof(*q));

1687

init_timer(&q->unplug_timer);

1687

init_timer(&q->unplug_timer);

1688

atomic_set(&q->refcnt, 1);

1688

atomic_set(&q->refcnt, 1);

1689

1690

q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

1690

q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

1691

q->backing_dev_info.unplug_io_data = q;

1691

q->backing_dev_info.unplug_io_data = q;

1692

1693

return q;

1693

return q;

1694

}

1694

}

1695

EXPORT_SYMBOL(blk_alloc_queue_node);

1695

EXPORT_SYMBOL(blk_alloc_queue_node);

1696

1697

/**

1697

/**

1698

* blk_init_queue - prepare a request queue for use with a block device

1698

* blk_init_queue - prepare a request queue for use with a block device

1699

* @rfn: The function to be called to process requests that have been

1699

* @rfn: The function to be called to process requests that have been

1700

* placed on the queue.

1700

* placed on the queue.

1701

* @lock: Request queue spin lock

1701

* @lock: Request queue spin lock

1702

*

1702

*

1703

* Description:

1703

* Description:

1704

* If a block device wishes to use the standard request handling procedures,

1704

* If a block device wishes to use the standard request handling procedures,

1705

* which sorts requests and coalesces adjacent requests, then it must

1705

* which sorts requests and coalesces adjacent requests, then it must

1706

* call blk_init_queue(). The function @rfn will be called when there

1706

* call blk_init_queue(). The function @rfn will be called when there

1707

* are requests on the queue that need to be processed. If the device

1707

* are requests on the queue that need to be processed. If the device

1708

* supports plugging, then @rfn may not be called immediately when requests

1708

* supports plugging, then @rfn may not be called immediately when requests

1709

* are available on the queue, but may be called at some time later instead.

1709

* are available on the queue, but may be called at some time later instead.

1710

* Plugged queues are generally unplugged when a buffer belonging to one

1710

* Plugged queues are generally unplugged when a buffer belonging to one

1711

* of the requests on the queue is needed, or due to memory pressure.

1711

* of the requests on the queue is needed, or due to memory pressure.

1712

*

1712

*

1713

* @rfn is not required, or even expected, to remove all requests off the

1713

* @rfn is not required, or even expected, to remove all requests off the

1714

* queue, but only as many as it can handle at a time. If it does leave

1714

* queue, but only as many as it can handle at a time. If it does leave

1715

* requests on the queue, it is responsible for arranging that the requests

1715

* requests on the queue, it is responsible for arranging that the requests

1716

* get dealt with eventually.

1716

* get dealt with eventually.

1717

*

1717

*

1718

* The queue spin lock must be held while manipulating the requests on the

1718

* The queue spin lock must be held while manipulating the requests on the

1719

* request queue.

1719

* request queue.

1720

*

1720

*

1721

* Function returns a pointer to the initialized request queue, or NULL if

1721

* Function returns a pointer to the initialized request queue, or NULL if

1722

* it didn't succeed.

1722

* it didn't succeed.

1723

*

1723

*

1724

* Note:

1724

* Note:

1725

* blk_init_queue() must be paired with a blk_cleanup_queue() call

1725

* blk_init_queue() must be paired with a blk_cleanup_queue() call

1726

* when the block device is deactivated (such as at module unload).

1726

* when the block device is deactivated (such as at module unload).

1727

**/

1727

**/

1728

1729

request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

1729

request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

1730

{

1730

{

1731

return blk_init_queue_node(rfn, lock, -1);

1731

return blk_init_queue_node(rfn, lock, -1);

1732

}

1732

}

1733

EXPORT_SYMBOL(blk_init_queue);

1733

EXPORT_SYMBOL(blk_init_queue);

1734

1735

request_queue_t *

1735

request_queue_t *

1736

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

1736

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

1737

{

1737

{

1738

request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);

1738

request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);

1739

1740

if (!q)

1740

if (!q)

1741

return NULL;

1741

return NULL;

1742

1743

q->node = node_id;

1743

q->node = node_id;

1744

if (blk_init_free_list(q))

1744

if (blk_init_free_list(q))

1745

goto out_init;

1745

goto out_init;

1746

1747

/*

1747

/*

1748

* if caller didn't supply a lock, they get per-queue locking with

1748

* if caller didn't supply a lock, they get per-queue locking with

1749

* our embedded lock

1749

* our embedded lock

1750

*/

1750

*/

1751

if (!lock) {

1751

if (!lock) {

1752

spin_lock_init(&q->__queue_lock);

1752

spin_lock_init(&q->__queue_lock);

1753

lock = &q->__queue_lock;

1753

lock = &q->__queue_lock;

1754

}

1754

}

1755

1756

q->request_fn = rfn;

1756

q->request_fn = rfn;

1757

q->back_merge_fn = ll_back_merge_fn;

1757

q->back_merge_fn = ll_back_merge_fn;

1758

q->front_merge_fn = ll_front_merge_fn;

1758

q->front_merge_fn = ll_front_merge_fn;

1759

q->merge_requests_fn = ll_merge_requests_fn;

1759

q->merge_requests_fn = ll_merge_requests_fn;

1760

q->prep_rq_fn = NULL;

1760

q->prep_rq_fn = NULL;

1761

q->unplug_fn = generic_unplug_device;

1761

q->unplug_fn = generic_unplug_device;

1762

q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);

1762

q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);

1763

q->queue_lock = lock;

1763

q->queue_lock = lock;

1764

1765

blk_queue_segment_boundary(q, 0xffffffff);

1765

blk_queue_segment_boundary(q, 0xffffffff);

1766

1767

blk_queue_make_request(q, __make_request);

1767

blk_queue_make_request(q, __make_request);

1768

blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);

1768

blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);

1769

1770

blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

1770

blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

1771

blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

1771

blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

1772

1773

/*

1773

/*

1774

* all done

1774

* all done

1775

*/

1775

*/

1776

if (!elevator_init(q, NULL)) {

1776

if (!elevator_init(q, NULL)) {

1777

blk_queue_congestion_threshold(q);

1777

blk_queue_congestion_threshold(q);

1778

return q;

1778

return q;

1779

}

1779

}

1780

1781

blk_cleanup_queue(q);

1781

blk_cleanup_queue(q);

1782

out_init:

1782

out_init:

1783

kmem_cache_free(requestq_cachep, q);

1783

kmem_cache_free(requestq_cachep, q);

1784

return NULL;

1784

return NULL;

1785

}

1785

}

1786

EXPORT_SYMBOL(blk_init_queue_node);

1786

EXPORT_SYMBOL(blk_init_queue_node);

1787

1788

int blk_get_queue(request_queue_t *q)

1788

int blk_get_queue(request_queue_t *q)

1789

{

1789

{

1790

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

1790

if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {

1791

atomic_inc(&q->refcnt);

1791

atomic_inc(&q->refcnt);

1792

return 0;

1792

return 0;

1793

}

1793

}

1794

1795

return 1;

1795

return 1;

1796

}

1796

}

1797

1798

EXPORT_SYMBOL(blk_get_queue);

1798

EXPORT_SYMBOL(blk_get_queue);

1799

1800

static inline void blk_free_request(request_queue_t *q, struct request *rq)

1800

static inline void blk_free_request(request_queue_t *q, struct request *rq)

1801

{

1801

{

1802

if (rq->flags & REQ_ELVPRIV)

1802

if (rq->flags & REQ_ELVPRIV)

1803

elv_put_request(q, rq);

1803

elv_put_request(q, rq);

1804

mempool_free(rq, q->rq.rq_pool);

1804

mempool_free(rq, q->rq.rq_pool);

1805

}

1805

}

1806

1807

static inline struct request *

1807

static inline struct request *

1808

blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,

1808

blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,

1809

int priv, gfp_t gfp_mask)

1809

int priv, gfp_t gfp_mask)

1810

{

1810

{

1811

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

1811

struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

1812

1813

if (!rq)

1813

if (!rq)

1814

return NULL;

1814

return NULL;

1815

1816

/*

1816

/*

1817

* first three bits are identical in rq->flags and bio->bi_rw,

1817

* first three bits are identical in rq->flags and bio->bi_rw,

1818

* see bio.h and blkdev.h

1818

* see bio.h and blkdev.h

1819

*/

1819

*/

1820

rq->flags = rw;

1820

rq->flags = rw;

1821

1822

if (priv) {

1822

if (priv) {

1823

if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {

1823

if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {

1824

mempool_free(rq, q->rq.rq_pool);

1824

mempool_free(rq, q->rq.rq_pool);

1825

return NULL;

1825

return NULL;

1826

}

1826

}

1827

rq->flags |= REQ_ELVPRIV;

1827

rq->flags |= REQ_ELVPRIV;

1828

}

1828

}

1829

1830

return rq;

1830

return rq;

1831

}

1831

}

1832

1833

/*

1833

/*

1834

* ioc_batching returns true if the ioc is a valid batching request and

1834

* ioc_batching returns true if the ioc is a valid batching request and

1835

* should be given priority access to a request.

1835

* should be given priority access to a request.

1836

*/

1836

*/

1837

static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)

1837

static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)

1838

{

1838

{

1839

if (!ioc)

1839

if (!ioc)

1840

return 0;

1840

return 0;

1841

1842

/*

1842

/*

1843

* Make sure the process is able to allocate at least 1 request

1843

* Make sure the process is able to allocate at least 1 request

1844

* even if the batch times out, otherwise we could theoretically

1844

* even if the batch times out, otherwise we could theoretically

1845

* lose wakeups.

1845

* lose wakeups.

1846

*/

1846

*/

1847

return ioc->nr_batch_requests == q->nr_batching ||

1847

return ioc->nr_batch_requests == q->nr_batching ||

1848

(ioc->nr_batch_requests > 0

1848

(ioc->nr_batch_requests > 0

1849

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

1849

&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));

1850

}

1850

}

1851

1852

/*

1852

/*

1853

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

1853

* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This

1854

* will cause the process to be a "batcher" on all queues in the system. This

1854

* will cause the process to be a "batcher" on all queues in the system. This

1855

* is the behaviour we want though - once it gets a wakeup it should be given

1855

* is the behaviour we want though - once it gets a wakeup it should be given

1856

* a nice run.

1856

* a nice run.

1857

*/

1857

*/

1858

static void ioc_set_batching(request_queue_t *q, struct io_context *ioc)

1858

static void ioc_set_batching(request_queue_t *q, struct io_context *ioc)

1859

{

1859

{

1860

if (!ioc || ioc_batching(q, ioc))

1860

if (!ioc || ioc_batching(q, ioc))

1861

return;

1861

return;

1862

1863

ioc->nr_batch_requests = q->nr_batching;

1863

ioc->nr_batch_requests = q->nr_batching;

1864

ioc->last_waited = jiffies;

1864

ioc->last_waited = jiffies;

1865

}

1865

}

1866

1867

static void __freed_request(request_queue_t *q, int rw)

1867

static void __freed_request(request_queue_t *q, int rw)

1868

{

1868

{

1869

struct request_list *rl = &q->rq;

1869

struct request_list *rl = &q->rq;

1870

1871

if (rl->count[rw] < queue_congestion_off_threshold(q))

1871

if (rl->count[rw] < queue_congestion_off_threshold(q))

1872

clear_queue_congested(q, rw);

1872

clear_queue_congested(q, rw);

1873

1874

if (rl->count[rw] + 1 <= q->nr_requests) {

1874

if (rl->count[rw] + 1 <= q->nr_requests) {

1875

if (waitqueue_active(&rl->wait[rw]))

1875

if (waitqueue_active(&rl->wait[rw]))

1876

wake_up(&rl->wait[rw]);

1876

wake_up(&rl->wait[rw]);

1877

1878

blk_clear_queue_full(q, rw);

1878

blk_clear_queue_full(q, rw);

1879

}

1879

}

1880

}

1880

}

1881

1882

/*

1882

/*

1883

* A request has just been released. Account for it, update the full and

1883

* A request has just been released. Account for it, update the full and

1884

* congestion status, wake up any waiters. Called under q->queue_lock.

1884

* congestion status, wake up any waiters. Called under q->queue_lock.

1885

*/

1885

*/

1886

static void freed_request(request_queue_t *q, int rw, int priv)

1886

static void freed_request(request_queue_t *q, int rw, int priv)

1887

{

1887

{

1888

struct request_list *rl = &q->rq;

1888

struct request_list *rl = &q->rq;

1889

1890

rl->count[rw]--;

1890

rl->count[rw]--;

1891

if (priv)

1891

if (priv)

1892

rl->elvpriv--;

1892

rl->elvpriv--;

1893

1894

__freed_request(q, rw);

1894

__freed_request(q, rw);

1895

1896

if (unlikely(rl->starved[rw ^ 1]))

1896

if (unlikely(rl->starved[rw ^ 1]))

1897

__freed_request(q, rw ^ 1);

1897

__freed_request(q, rw ^ 1);

1898

}

1898

}

1899

1900

#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)

1900

#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)

1901

/*

1901

/*

1902

* Get a free request, queue_lock must be held.

1902

* Get a free request, queue_lock must be held.

1903

* Returns NULL on failure, with queue_lock held.

1903

* Returns NULL on failure, with queue_lock held.

1904

* Returns !NULL on success, with queue_lock *not held*.

1904

* Returns !NULL on success, with queue_lock *not held*.

1905

*/

1905

*/

1906

static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,

1906

static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,

1907

gfp_t gfp_mask)

1907

gfp_t gfp_mask)

1908

{

1908

{

1909

struct request *rq = NULL;

1909

struct request *rq = NULL;

1910

struct request_list *rl = &q->rq;

1910

struct request_list *rl = &q->rq;

1911

struct io_context *ioc = current_io_context(GFP_ATOMIC);

1911

struct io_context *ioc = NULL;

1912

int priv;

1912

int may_queue, priv;

1913

1914

if (rl->count[rw]+1 >= q->nr_requests) {

1914

may_queue = elv_may_queue(q, rw, bio);

1915

/*

1915

if (may_queue == ELV_MQUEUE_NO)

1916

* The queue will fill after this allocation, so set it as

1916

goto rq_starved;

1917

* full, and mark this process as "batching". This process

1917

1918

* will be allowed to complete a batch of requests, others

1918

if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {

1919

* will be blocked.

1919

if (rl->count[rw]+1 >= q->nr_requests) {

1920

*/

1920

ioc = current_io_context(GFP_ATOMIC);

1921

if (!blk_queue_full(q, rw)) {

1921

/*

1922

ioc_set_batching(q, ioc);

1922

* The queue will fill after this allocation, so set

1923

blk_set_queue_full(q, rw);

1923

* it as full, and mark this process as "batching".

1924

* This process will be allowed to complete a batch of

1925

* requests, others will be blocked.

1926

*/

1927

if (!blk_queue_full(q, rw)) {

1928

ioc_set_batching(q, ioc);

1929

blk_set_queue_full(q, rw);

1930

} else {

1931

if (may_queue != ELV_MQUEUE_MUST

1932

&& !ioc_batching(q, ioc)) {

1933

/*

1934

* The queue is full and the allocating

1935

* process is not a "batcher", and not

1936

* exempted by the IO scheduler

1937

*/

1938

goto out;

1939

}

1940

}

1924

}

1941

}

1942

set_queue_congested(q, rw);

1925

}

1943

}

1926

1944

1927

switch (elv_may_queue(q, rw, bio)) {

1928

case ELV_MQUEUE_NO:

1929

goto rq_starved;

1930

case ELV_MQUEUE_MAY:

1931

break;

1932

case ELV_MQUEUE_MUST:

1933

goto get_rq;

1934

}

1935

1936

if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) {

1937

/*

1938

* The queue is full and the allocating process is not a

1939

* "batcher", and not exempted by the IO scheduler

1940

*/

1941

goto out;

1942

}

1943

1944

get_rq:

1945

/*

1945

/*

1946

* Only allow batching queuers to allocate up to 50% over the defined

1946

* Only allow batching queuers to allocate up to 50% over the defined

1947

* limit of requests, otherwise we could have thousands of requests

1947

* limit of requests, otherwise we could have thousands of requests

1948

* allocated with any setting of ->nr_requests

1948

* allocated with any setting of ->nr_requests

1949

*/

1949

*/

1950

if (rl->count[rw] >= (3 * q->nr_requests / 2))

1950

if (rl->count[rw] >= (3 * q->nr_requests / 2))

1951

goto out;

1951

goto out;

1952

1953

rl->count[rw]++;

1953

rl->count[rw]++;

1954

rl->starved[rw] = 0;

1954

rl->starved[rw] = 0;

1955

if (rl->count[rw] >= queue_congestion_on_threshold(q))

1956

set_queue_congested(q, rw);

1957

1955

1958

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

1956

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);

1959

if (priv)

1957

if (priv)

1960

rl->elvpriv++;

1958

rl->elvpriv++;

1961

1959

1962

spin_unlock_irq(q->queue_lock);

1960

spin_unlock_irq(q->queue_lock);

1963

1961

1964

rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);

1962

rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);

1965

if (!rq) {

1963

if (unlikely(!rq)) {

1966

/*

1964

/*

1967

* Allocation failed presumably due to memory. Undo anything

1965

* Allocation failed presumably due to memory. Undo anything

1968

* we might have messed up.

1966

* we might have messed up.

1969

*

1967

*

1970

* Allocating task should really be put onto the front of the

1968

* Allocating task should really be put onto the front of the

1971

* wait queue, but this is pretty rare.

1969

* wait queue, but this is pretty rare.

1972

*/

1970

*/

1973

spin_lock_irq(q->queue_lock);

1971

spin_lock_irq(q->queue_lock);

1974

freed_request(q, rw, priv);

1972

freed_request(q, rw, priv);

1975

1973

1976

/*

1974

/*

1977

* in the very unlikely event that allocation failed and no

1975

* in the very unlikely event that allocation failed and no

1978

* requests for this direction was pending, mark us starved

1976

* requests for this direction was pending, mark us starved

1979

* so that freeing of a request in the other direction will

1977

* so that freeing of a request in the other direction will

1980

* notice us. another possible fix would be to split the

1978

* notice us. another possible fix would be to split the

1981

* rq mempool into READ and WRITE

1979

* rq mempool into READ and WRITE

1982

*/

1980

*/

1983

rq_starved:

1981

rq_starved:

1984

if (unlikely(rl->count[rw] == 0))

1982

if (unlikely(rl->count[rw] == 0))

1985

rl->starved[rw] = 1;

1983

rl->starved[rw] = 1;

1986

1984

1987

goto out;

1985

goto out;

1988

}

1986

}

1989

1987

1988

/*

1989

* ioc may be NULL here, and ioc_batching will be false. That's

1990

* OK, if the queue is under the request limit then requests need

1991

* not count toward the nr_batch_requests limit. There will always

1992

* be some limit enforced by BLK_BATCH_TIME.

1993

*/

1990

if (ioc_batching(q, ioc))

1994

if (ioc_batching(q, ioc))

1991

ioc->nr_batch_requests--;

1995

ioc->nr_batch_requests--;

1992

1996

1993

rq_init(q, rq);

1997

rq_init(q, rq);

1994

rq->rl = rl;

1998

rq->rl = rl;

1995

out:

1999

out:

1996

return rq;

2000

return rq;

1997

}

2001

}

1998

2002

1999

/*

2003

/*

2000

* No available requests for this queue, unplug the device and wait for some

2004

* No available requests for this queue, unplug the device and wait for some

2001

* requests to become available.

2005

* requests to become available.

2002

*

2006

*

2003

* Called with q->queue_lock held, and returns with it unlocked.

2007

* Called with q->queue_lock held, and returns with it unlocked.

2004

*/

2008

*/

2005

static struct request *get_request_wait(request_queue_t *q, int rw,

2009

static struct request *get_request_wait(request_queue_t *q, int rw,

2006

struct bio *bio)

2010

struct bio *bio)

2007

{

2011

{

2008

struct request *rq;

2012

struct request *rq;

2009

2013

2010

rq = get_request(q, rw, bio, GFP_NOIO);

2014

rq = get_request(q, rw, bio, GFP_NOIO);

2011

while (!rq) {

2015

while (!rq) {

2012

DEFINE_WAIT(wait);

2016

DEFINE_WAIT(wait);

2013

struct request_list *rl = &q->rq;

2017

struct request_list *rl = &q->rq;

2014

2018

2015

prepare_to_wait_exclusive(&rl->wait[rw], &wait,

2019

prepare_to_wait_exclusive(&rl->wait[rw], &wait,

2016

TASK_UNINTERRUPTIBLE);

2020

TASK_UNINTERRUPTIBLE);

2017

2021

2018

rq = get_request(q, rw, bio, GFP_NOIO);

2022

rq = get_request(q, rw, bio, GFP_NOIO);

2019

2023

2020

if (!rq) {

2024

if (!rq) {

2021

struct io_context *ioc;

2025

struct io_context *ioc;

2022

2026

2023

__generic_unplug_device(q);

2027

__generic_unplug_device(q);

2024

spin_unlock_irq(q->queue_lock);

2028

spin_unlock_irq(q->queue_lock);

2025

io_schedule();

2029

io_schedule();

2026

2030

2027

/*

2031

/*

2028

* After sleeping, we become a "batching" process and

2032

* After sleeping, we become a "batching" process and

2029

* will be able to allocate at least one request, and

2033

* will be able to allocate at least one request, and

2030

* up to a big batch of them for a small period time.

2034

* up to a big batch of them for a small period time.

2031

* See ioc_batching, ioc_set_batching

2035

* See ioc_batching, ioc_set_batching

2032

*/

2036

*/

2033

ioc = current_io_context(GFP_NOIO);

2037

ioc = current_io_context(GFP_NOIO);

2034

ioc_set_batching(q, ioc);

2038

ioc_set_batching(q, ioc);

2035

2039

2036

spin_lock_irq(q->queue_lock);

2040

spin_lock_irq(q->queue_lock);

2037

}

2041

}

2038

finish_wait(&rl->wait[rw], &wait);

2042

finish_wait(&rl->wait[rw], &wait);

2039

}

2043

}

2040

2044

2041

return rq;

2045

return rq;

2042

}

2046

}

2043

2047

2044

struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)

2048

struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)

2045

{

2049

{

2046

struct request *rq;

2050

struct request *rq;

2047

2051

2048

BUG_ON(rw != READ && rw != WRITE);

2052

BUG_ON(rw != READ && rw != WRITE);

2049

2053

2050

spin_lock_irq(q->queue_lock);

2054

spin_lock_irq(q->queue_lock);

2051

if (gfp_mask & __GFP_WAIT) {

2055

if (gfp_mask & __GFP_WAIT) {

2052

rq = get_request_wait(q, rw, NULL);

2056

rq = get_request_wait(q, rw, NULL);

2053

} else {

2057

} else {

2054

rq = get_request(q, rw, NULL, gfp_mask);

2058

rq = get_request(q, rw, NULL, gfp_mask);

2055

if (!rq)

2059

if (!rq)

2056

spin_unlock_irq(q->queue_lock);

2060

spin_unlock_irq(q->queue_lock);

2057

}

2061

}

2058

/* q->queue_lock is unlocked at this point */

2062

/* q->queue_lock is unlocked at this point */

2059

2063

2060

return rq;

2064

return rq;

2061

}

2065

}

2062

EXPORT_SYMBOL(blk_get_request);

2066

EXPORT_SYMBOL(blk_get_request);

2063

2067

2064

/**

2068

/**

2065

* blk_requeue_request - put a request back on queue

2069

* blk_requeue_request - put a request back on queue

2066

* @q: request queue where request should be inserted

2070

* @q: request queue where request should be inserted

2067

* @rq: request to be inserted

2071

* @rq: request to be inserted

2068

*

2072

*

2069

* Description:

2073

* Description:

2070

* Drivers often keep queueing requests until the hardware cannot accept

2074

* Drivers often keep queueing requests until the hardware cannot accept

2071

* more, when that condition happens we need to put the request back

2075

* more, when that condition happens we need to put the request back

2072

* on the queue. Must be called with queue lock held.

2076

* on the queue. Must be called with queue lock held.

2073

*/

2077

*/

2074

void blk_requeue_request(request_queue_t *q, struct request *rq)

2078

void blk_requeue_request(request_queue_t *q, struct request *rq)

2075

{

2079

{

2076

if (blk_rq_tagged(rq))

2080

if (blk_rq_tagged(rq))

2077

blk_queue_end_tag(q, rq);

2081

blk_queue_end_tag(q, rq);

2078

2082

2079

elv_requeue_request(q, rq);

2083

elv_requeue_request(q, rq);

2080

}

2084

}

2081

2085

2082

EXPORT_SYMBOL(blk_requeue_request);

2086

EXPORT_SYMBOL(blk_requeue_request);

2083

2087

2084

/**

2088

/**

2085

* blk_insert_request - insert a special request in to a request queue

2089

* blk_insert_request - insert a special request in to a request queue

2086

* @q: request queue where request should be inserted

2090

* @q: request queue where request should be inserted

2087

* @rq: request to be inserted

2091

* @rq: request to be inserted

2088

* @at_head: insert request at head or tail of queue

2092

* @at_head: insert request at head or tail of queue

2089

* @data: private data

2093

* @data: private data

2090

*

2094

*

2091

* Description:

2095

* Description:

2092

* Many block devices need to execute commands asynchronously, so they don't

2096

* Many block devices need to execute commands asynchronously, so they don't

2093

* block the whole kernel from preemption during request execution. This is

2097

* block the whole kernel from preemption during request execution. This is

2094

* accomplished normally by inserting aritficial requests tagged as

2098

* accomplished normally by inserting aritficial requests tagged as

2095

* REQ_SPECIAL in to the corresponding request queue, and letting them be

2099

* REQ_SPECIAL in to the corresponding request queue, and letting them be

2096

* scheduled for actual execution by the request queue.

2100

* scheduled for actual execution by the request queue.

2097

*

2101

*

2098

* We have the option of inserting the head or the tail of the queue.

2102

* We have the option of inserting the head or the tail of the queue.

2099

* Typically we use the tail for new ioctls and so forth. We use the head

2103

* Typically we use the tail for new ioctls and so forth. We use the head

2100

* of the queue for things like a QUEUE_FULL message from a device, or a

2104

* of the queue for things like a QUEUE_FULL message from a device, or a

2101

* host that is unable to accept a particular command.

2105

* host that is unable to accept a particular command.

2102

*/

2106

*/

2103

void blk_insert_request(request_queue_t *q, struct request *rq,

2107

void blk_insert_request(request_queue_t *q, struct request *rq,

2104

int at_head, void *data)

2108

int at_head, void *data)

2105

{

2109

{

2106

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

2110

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

2107

unsigned long flags;

2111

unsigned long flags;

2108

2112

2109

/*

2113

/*

2110

* tell I/O scheduler that this isn't a regular read/write (ie it

2114

* tell I/O scheduler that this isn't a regular read/write (ie it

2111

* must not attempt merges on this) and that it acts as a soft

2115

* must not attempt merges on this) and that it acts as a soft

2112

* barrier

2116

* barrier

2113

*/

2117

*/

2114

rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;

2118

rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;

2115

2119

2116

rq->special = data;

2120

rq->special = data;

2117

2121

2118

spin_lock_irqsave(q->queue_lock, flags);

2122

spin_lock_irqsave(q->queue_lock, flags);

2119

2123

2120

/*

2124

/*

2121

* If command is tagged, release the tag

2125

* If command is tagged, release the tag

2122

*/

2126

*/

2123

if (blk_rq_tagged(rq))

2127

if (blk_rq_tagged(rq))

2124

blk_queue_end_tag(q, rq);

2128

blk_queue_end_tag(q, rq);

2125

2129

2126

drive_stat_acct(rq, rq->nr_sectors, 1);

2130

drive_stat_acct(rq, rq->nr_sectors, 1);

2127

__elv_add_request(q, rq, where, 0);

2131

__elv_add_request(q, rq, where, 0);

2128

2132

2129

if (blk_queue_plugged(q))

2133

if (blk_queue_plugged(q))

2130

__generic_unplug_device(q);

2134

__generic_unplug_device(q);

2131

else

2135

else

2132

q->request_fn(q);

2136

q->request_fn(q);

2133

spin_unlock_irqrestore(q->queue_lock, flags);

2137

spin_unlock_irqrestore(q->queue_lock, flags);

2134

}

2138

}

2135

2139

2136

EXPORT_SYMBOL(blk_insert_request);

2140

EXPORT_SYMBOL(blk_insert_request);

2137

2141

2138

/**

2142

/**

2139

* blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage

2143

* blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage

2140

* @q: request queue where request should be inserted

2144

* @q: request queue where request should be inserted

2141

* @rq: request structure to fill

2145

* @rq: request structure to fill

2142

* @ubuf: the user buffer

2146

* @ubuf: the user buffer

2143

* @len: length of user data

2147

* @len: length of user data

2144

*

2148

*

2145

* Description:

2149

* Description:

2146

* Data will be mapped directly for zero copy io, if possible. Otherwise

2150

* Data will be mapped directly for zero copy io, if possible. Otherwise

2147

* a kernel bounce buffer is used.

2151

* a kernel bounce buffer is used.

2148

*

2152

*

2149

* A matching blk_rq_unmap_user() must be issued at the end of io, while

2153

* A matching blk_rq_unmap_user() must be issued at the end of io, while

2150

* still in process context.

2154

* still in process context.

2151

*

2155

*

2152

* Note: The mapped bio may need to be bounced through blk_queue_bounce()

2156

* Note: The mapped bio may need to be bounced through blk_queue_bounce()

2153

* before being submitted to the device, as pages mapped may be out of

2157

* before being submitted to the device, as pages mapped may be out of

2154

* reach. It's the callers responsibility to make sure this happens. The

2158

* reach. It's the callers responsibility to make sure this happens. The

2155

* original bio must be passed back in to blk_rq_unmap_user() for proper

2159

* original bio must be passed back in to blk_rq_unmap_user() for proper

2156

* unmapping.

2160

* unmapping.

2157

*/

2161

*/

2158

int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,

2162

int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,

2159

unsigned int len)

2163

unsigned int len)

2160

{

2164

{

2161

unsigned long uaddr;

2165

unsigned long uaddr;

2162

struct bio *bio;

2166

struct bio *bio;

2163

int reading;

2167

int reading;

2164

2168

2165

if (len > (q->max_hw_sectors << 9))

2169

if (len > (q->max_hw_sectors << 9))

2166

return -EINVAL;

2170

return -EINVAL;

2167

if (!len || !ubuf)

2171

if (!len || !ubuf)

2168

return -EINVAL;

2172

return -EINVAL;

2169

2173

2170

reading = rq_data_dir(rq) == READ;

2174

reading = rq_data_dir(rq) == READ;

2171

2175

2172

/*

2176

/*

2173

* if alignment requirement is satisfied, map in user pages for

2177

* if alignment requirement is satisfied, map in user pages for

2174

* direct dma. else, set up kernel bounce buffers

2178

* direct dma. else, set up kernel bounce buffers

2175

*/

2179

*/

2176

uaddr = (unsigned long) ubuf;

2180

uaddr = (unsigned long) ubuf;

2177

if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))

2181

if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))

2178

bio = bio_map_user(q, NULL, uaddr, len, reading);

2182

bio = bio_map_user(q, NULL, uaddr, len, reading);

2179

else

2183

else

2180

bio = bio_copy_user(q, uaddr, len, reading);

2184

bio = bio_copy_user(q, uaddr, len, reading);

2181

2185

2182

if (!IS_ERR(bio)) {

2186

if (!IS_ERR(bio)) {

2183

rq->bio = rq->biotail = bio;

2187

rq->bio = rq->biotail = bio;

2184

blk_rq_bio_prep(q, rq, bio);

2188

blk_rq_bio_prep(q, rq, bio);

2185

2189

2186

rq->buffer = rq->data = NULL;

2190

rq->buffer = rq->data = NULL;

2187

rq->data_len = len;

2191

rq->data_len = len;

2188

return 0;

2192

return 0;

2189

}

2193

}

2190

2194

2191

/*

2195

/*

2192

* bio is the err-ptr

2196

* bio is the err-ptr

2193

*/

2197

*/

2194

return PTR_ERR(bio);

2198

return PTR_ERR(bio);

2195

}

2199

}

2196

2200

2197

EXPORT_SYMBOL(blk_rq_map_user);

2201

EXPORT_SYMBOL(blk_rq_map_user);

2198

2202

2199

/**

2203

/**

2200

* blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage

2204

* blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage

2201

* @q: request queue where request should be inserted

2205

* @q: request queue where request should be inserted

2202

* @rq: request to map data to

2206

* @rq: request to map data to

2203

* @iov: pointer to the iovec

2207

* @iov: pointer to the iovec

2204

* @iov_count: number of elements in the iovec

2208

* @iov_count: number of elements in the iovec

2205

*

2209

*

2206

* Description:

2210

* Description:

2207

* Data will be mapped directly for zero copy io, if possible. Otherwise

2211

* Data will be mapped directly for zero copy io, if possible. Otherwise

2208

* a kernel bounce buffer is used.

2212

* a kernel bounce buffer is used.

2209

*

2213

*

2210

* A matching blk_rq_unmap_user() must be issued at the end of io, while

2214

* A matching blk_rq_unmap_user() must be issued at the end of io, while

2211

* still in process context.

2215

* still in process context.

2212

*

2216

*

2213

* Note: The mapped bio may need to be bounced through blk_queue_bounce()

2217

* Note: The mapped bio may need to be bounced through blk_queue_bounce()

2214

* before being submitted to the device, as pages mapped may be out of

2218

* before being submitted to the device, as pages mapped may be out of

2215

* reach. It's the callers responsibility to make sure this happens. The

2219

* reach. It's the callers responsibility to make sure this happens. The

2216

* original bio must be passed back in to blk_rq_unmap_user() for proper

2220

* original bio must be passed back in to blk_rq_unmap_user() for proper

2217

* unmapping.

2221

* unmapping.

2218

*/

2222

*/

2219

int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,

2223

int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,

2220

struct sg_iovec *iov, int iov_count)

2224

struct sg_iovec *iov, int iov_count)

2221

{

2225

{

2222

struct bio *bio;

2226

struct bio *bio;

2223

2227

2224

if (!iov || iov_count <= 0)

2228

if (!iov || iov_count <= 0)

2225

return -EINVAL;

2229

return -EINVAL;

2226

2230

2227

/* we don't allow misaligned data like bio_map_user() does. If the

2231

/* we don't allow misaligned data like bio_map_user() does. If the

2228

* user is using sg, they're expected to know the alignment constraints

2232

* user is using sg, they're expected to know the alignment constraints

2229

* and respect them accordingly */

2233

* and respect them accordingly */

2230

bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);

2234

bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);

2231

if (IS_ERR(bio))

2235

if (IS_ERR(bio))

2232

return PTR_ERR(bio);

2236

return PTR_ERR(bio);

2233

2237

2234

rq->bio = rq->biotail = bio;

2238

rq->bio = rq->biotail = bio;

2235

blk_rq_bio_prep(q, rq, bio);

2239

blk_rq_bio_prep(q, rq, bio);

2236

rq->buffer = rq->data = NULL;

2240

rq->buffer = rq->data = NULL;

2237

rq->data_len = bio->bi_size;

2241

rq->data_len = bio->bi_size;

2238

return 0;

2242

return 0;

2239

}

2243

}

2240

2244

2241

EXPORT_SYMBOL(blk_rq_map_user_iov);

2245

EXPORT_SYMBOL(blk_rq_map_user_iov);

2242

2246

2243

/**

2247

/**

2244

* blk_rq_unmap_user - unmap a request with user data

2248

* blk_rq_unmap_user - unmap a request with user data

2245

* @bio: bio to be unmapped

2249

* @bio: bio to be unmapped

2246

* @ulen: length of user buffer

2250

* @ulen: length of user buffer

2247

*

2251

*

2248

* Description:

2252

* Description:

2249

* Unmap a bio previously mapped by blk_rq_map_user().

2253

* Unmap a bio previously mapped by blk_rq_map_user().

2250

*/

2254

*/

2251

int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)

2255

int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)

2252

{

2256

{

2253

int ret = 0;

2257

int ret = 0;

2254

2258

2255

if (bio) {

2259

if (bio) {

2256

if (bio_flagged(bio, BIO_USER_MAPPED))

2260

if (bio_flagged(bio, BIO_USER_MAPPED))

2257

bio_unmap_user(bio);

2261

bio_unmap_user(bio);

2258

else

2262

else

2259

ret = bio_uncopy_user(bio);

2263

ret = bio_uncopy_user(bio);

2260

}

2264

}

2261

2265

2262

return 0;

2266

return 0;

2263

}

2267

}

2264

2268

2265

EXPORT_SYMBOL(blk_rq_unmap_user);

2269

EXPORT_SYMBOL(blk_rq_unmap_user);

2266

2270

2267

/**

2271

/**

2268

* blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage

2272

* blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage

2269

* @q: request queue where request should be inserted

2273

* @q: request queue where request should be inserted

2270

* @rq: request to fill

2274

* @rq: request to fill

2271

* @kbuf: the kernel buffer

2275

* @kbuf: the kernel buffer

2272

* @len: length of user data

2276

* @len: length of user data

2273

* @gfp_mask: memory allocation flags

2277

* @gfp_mask: memory allocation flags

2274

*/

2278

*/

2275

int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,

2279

int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,

2276

unsigned int len, gfp_t gfp_mask)

2280

unsigned int len, gfp_t gfp_mask)

2277

{

2281

{

2278

struct bio *bio;

2282

struct bio *bio;

2279

2283

2280

if (len > (q->max_hw_sectors << 9))

2284

if (len > (q->max_hw_sectors << 9))

2281

return -EINVAL;

2285

return -EINVAL;

2282

if (!len || !kbuf)

2286

if (!len || !kbuf)

2283

return -EINVAL;

2287

return -EINVAL;

2284

2288

2285

bio = bio_map_kern(q, kbuf, len, gfp_mask);

2289

bio = bio_map_kern(q, kbuf, len, gfp_mask);

2286

if (IS_ERR(bio))

2290

if (IS_ERR(bio))

2287

return PTR_ERR(bio);

2291

return PTR_ERR(bio);

2288

2292

2289

if (rq_data_dir(rq) == WRITE)

2293

if (rq_data_dir(rq) == WRITE)

2290

bio->bi_rw |= (1 << BIO_RW);

2294

bio->bi_rw |= (1 << BIO_RW);

2291

2295

2292

rq->bio = rq->biotail = bio;

2296

rq->bio = rq->biotail = bio;

2293

blk_rq_bio_prep(q, rq, bio);

2297

blk_rq_bio_prep(q, rq, bio);

2294

2298

2295

rq->buffer = rq->data = NULL;

2299

rq->buffer = rq->data = NULL;

2296

rq->data_len = len;

2300

rq->data_len = len;

2297

return 0;

2301

return 0;

2298

}

2302

}

2299

2303

2300

EXPORT_SYMBOL(blk_rq_map_kern);

2304

EXPORT_SYMBOL(blk_rq_map_kern);

2301

2305

2302

/**

2306

/**

2303

* blk_execute_rq_nowait - insert a request into queue for execution

2307

* blk_execute_rq_nowait - insert a request into queue for execution

2304

* @q: queue to insert the request in

2308

* @q: queue to insert the request in

2305

* @bd_disk: matching gendisk

2309

* @bd_disk: matching gendisk

2306

* @rq: request to insert

2310

* @rq: request to insert

2307

* @at_head: insert request at head or tail of queue

2311

* @at_head: insert request at head or tail of queue

2308

* @done: I/O completion handler

2312

* @done: I/O completion handler

2309

*

2313

*

2310

* Description:

2314

* Description:

2311

* Insert a fully prepared request at the back of the io scheduler queue

2315

* Insert a fully prepared request at the back of the io scheduler queue

2312

* for execution. Don't wait for completion.

2316

* for execution. Don't wait for completion.

2313

*/

2317

*/

2314

void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,

2318

void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,

2315

struct request *rq, int at_head,

2319

struct request *rq, int at_head,

2316

void (*done)(struct request *))

2320

void (*done)(struct request *))

2317

{

2321

{

2318

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

2322

int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

2319

2323

2320

rq->rq_disk = bd_disk;

2324

rq->rq_disk = bd_disk;

2321

rq->flags |= REQ_NOMERGE;

2325

rq->flags |= REQ_NOMERGE;

2322

rq->end_io = done;

2326

rq->end_io = done;

2323

elv_add_request(q, rq, where, 1);

2327

elv_add_request(q, rq, where, 1);

2324

generic_unplug_device(q);

2328

generic_unplug_device(q);

2325

}

2329

}

2326

2330

2327

EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

2331

EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

2328

2332

2329

/**

2333

/**

2330

* blk_execute_rq - insert a request into queue for execution

2334

* blk_execute_rq - insert a request into queue for execution

2331

* @q: queue to insert the request in

2335

* @q: queue to insert the request in

2332

* @bd_disk: matching gendisk

2336

* @bd_disk: matching gendisk

2333

* @rq: request to insert

2337

* @rq: request to insert

2334

* @at_head: insert request at head or tail of queue

2338

* @at_head: insert request at head or tail of queue

2335

*

2339

*

2336

* Description:

2340

* Description:

2337

* Insert a fully prepared request at the back of the io scheduler queue

2341

* Insert a fully prepared request at the back of the io scheduler queue

2338

* for execution and wait for completion.

2342

* for execution and wait for completion.

2339

*/

2343

*/

2340

int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,

2344

int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,

2341

struct request *rq, int at_head)

2345

struct request *rq, int at_head)

2342

{

2346

{

2343

DECLARE_COMPLETION(wait);

2347

DECLARE_COMPLETION(wait);

2344

char sense[SCSI_SENSE_BUFFERSIZE];

2348

char sense[SCSI_SENSE_BUFFERSIZE];

2345

int err = 0;

2349

int err = 0;

2346

2350

2347

/*

2351

/*

2348

* we need an extra reference to the request, so we can look at

2352

* we need an extra reference to the request, so we can look at

2349

* it after io completion

2353

* it after io completion

2350

*/

2354

*/

2351

rq->ref_count++;

2355

rq->ref_count++;

2352

2356

2353

if (!rq->sense) {

2357

if (!rq->sense) {

2354

memset(sense, 0, sizeof(sense));

2358

memset(sense, 0, sizeof(sense));

2355

rq->sense = sense;

2359

rq->sense = sense;

2356

rq->sense_len = 0;

2360

rq->sense_len = 0;

2357

}

2361

}

2358

2362

2359

rq->waiting = &wait;

2363

rq->waiting = &wait;

2360

blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);

2364

blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);

2361

wait_for_completion(&wait);

2365

wait_for_completion(&wait);

2362

rq->waiting = NULL;

2366

rq->waiting = NULL;

2363

2367

2364

if (rq->errors)

2368

if (rq->errors)

2365

err = -EIO;

2369

err = -EIO;

2366

2370

2367

return err;

2371

return err;

2368

}

2372

}

2369

2373

2370

EXPORT_SYMBOL(blk_execute_rq);

2374

EXPORT_SYMBOL(blk_execute_rq);

2371

2375

2372

/**

2376

/**

2373

* blkdev_issue_flush - queue a flush

2377

* blkdev_issue_flush - queue a flush

2374

* @bdev: blockdev to issue flush for

2378

* @bdev: blockdev to issue flush for

2375

* @error_sector: error sector

2379

* @error_sector: error sector

2376

*

2380

*

2377

* Description:

2381

* Description:

2378

* Issue a flush for the block device in question. Caller can supply

2382

* Issue a flush for the block device in question. Caller can supply

2379

* room for storing the error offset in case of a flush error, if they

2383

* room for storing the error offset in case of a flush error, if they

2380

* wish to. Caller must run wait_for_completion() on its own.

2384

* wish to. Caller must run wait_for_completion() on its own.

2381

*/

2385

*/

2382

int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)

2386

int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)

2383

{

2387

{

2384

request_queue_t *q;

2388

request_queue_t *q;

2385

2389

2386

if (bdev->bd_disk == NULL)

2390

if (bdev->bd_disk == NULL)

2387

return -ENXIO;

2391

return -ENXIO;

2388

2392

2389

q = bdev_get_queue(bdev);

2393

q = bdev_get_queue(bdev);

2390

if (!q)

2394

if (!q)

2391

return -ENXIO;

2395

return -ENXIO;

2392

if (!q->issue_flush_fn)

2396

if (!q->issue_flush_fn)

2393

return -EOPNOTSUPP;

2397

return -EOPNOTSUPP;

2394

2398

2395

return q->issue_flush_fn(q, bdev->bd_disk, error_sector);

2399

return q->issue_flush_fn(q, bdev->bd_disk, error_sector);

2396

}

2400

}

2397

2401

2398

EXPORT_SYMBOL(blkdev_issue_flush);

2402

EXPORT_SYMBOL(blkdev_issue_flush);

2399

2403

2400

static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)

2404

static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)

2401

{

2405

{

2402

int rw = rq_data_dir(rq);

2406

int rw = rq_data_dir(rq);

2403

2407

2404

if (!blk_fs_request(rq) || !rq->rq_disk)

2408

if (!blk_fs_request(rq) || !rq->rq_disk)

2405

return;

2409

return;

2406

2410

2407

if (!new_io) {

2411

if (!new_io) {

2408

__disk_stat_inc(rq->rq_disk, merges[rw]);

2412

__disk_stat_inc(rq->rq_disk, merges[rw]);

2409

} else {

2413

} else {

2410

disk_round_stats(rq->rq_disk);

2414

disk_round_stats(rq->rq_disk);

2411

rq->rq_disk->in_flight++;

2415

rq->rq_disk->in_flight++;

2412

}

2416

}

2413

}

2417

}

2414

2418

2415

/*

2419

/*

2416

* add-request adds a request to the linked list.

2420

* add-request adds a request to the linked list.

2417

* queue lock is held and interrupts disabled, as we muck with the

2421

* queue lock is held and interrupts disabled, as we muck with the

2418

* request queue list.

2422

* request queue list.

2419

*/

2423

*/

2420

static inline void add_request(request_queue_t * q, struct request * req)

2424

static inline void add_request(request_queue_t * q, struct request * req)

2421

{

2425

{

2422

drive_stat_acct(req, req->nr_sectors, 1);

2426

drive_stat_acct(req, req->nr_sectors, 1);

2423

2427

2424

if (q->activity_fn)

2428

if (q->activity_fn)

2425

q->activity_fn(q->activity_data, rq_data_dir(req));

2429

q->activity_fn(q->activity_data, rq_data_dir(req));

2426

2430

2427

/*

2431

/*

2428

* elevator indicated where it wants this request to be

2432

* elevator indicated where it wants this request to be

2429

* inserted at elevator_merge time

2433

* inserted at elevator_merge time

2430

*/

2434

*/

2431

__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);

2435

__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);

2432

}

2436

}

2433

2437

2434

/*

2438

/*

2435

* disk_round_stats() - Round off the performance stats on a struct

2439

* disk_round_stats() - Round off the performance stats on a struct

2436

* disk_stats.

2440

* disk_stats.

2437

*

2441

*

2438

* The average IO queue length and utilisation statistics are maintained

2442

* The average IO queue length and utilisation statistics are maintained

2439

* by observing the current state of the queue length and the amount of

2443

* by observing the current state of the queue length and the amount of

2440

* time it has been in this state for.

2444

* time it has been in this state for.

2441

*

2445

*

2442

* Normally, that accounting is done on IO completion, but that can result

2446

* Normally, that accounting is done on IO completion, but that can result

2443

* in more than a second's worth of IO being accounted for within any one

2447

* in more than a second's worth of IO being accounted for within any one

2444

* second, leading to >100% utilisation. To deal with that, we call this

2448

* second, leading to >100% utilisation. To deal with that, we call this

2445

* function to do a round-off before returning the results when reading

2449

* function to do a round-off before returning the results when reading

2446

* /proc/diskstats. This accounts immediately for all queue usage up to

2450

* /proc/diskstats. This accounts immediately for all queue usage up to

2447

* the current jiffies and restarts the counters again.

2451

* the current jiffies and restarts the counters again.

2448

*/

2452

*/

2449

void disk_round_stats(struct gendisk *disk)

2453

void disk_round_stats(struct gendisk *disk)

2450

{

2454

{

2451

unsigned long now = jiffies;

2455

unsigned long now = jiffies;

2452

2456

2453

if (now == disk->stamp)

2457

if (now == disk->stamp)

2454

return;

2458

return;

2455

2459

2456

if (disk->in_flight) {

2460

if (disk->in_flight) {

2457

__disk_stat_add(disk, time_in_queue,

2461

__disk_stat_add(disk, time_in_queue,

2458

disk->in_flight * (now - disk->stamp));

2462

disk->in_flight * (now - disk->stamp));

2459

__disk_stat_add(disk, io_ticks, (now - disk->stamp));

2463

__disk_stat_add(disk, io_ticks, (now - disk->stamp));

2460

}

2464

}

2461

disk->stamp = now;

2465

disk->stamp = now;

2462

}

2466

}

2463

2467

2464

/*

2468

/*

2465

* queue lock must be held

2469

* queue lock must be held

2466

*/

2470

*/

2467

void __blk_put_request(request_queue_t *q, struct request *req)

2471

void __blk_put_request(request_queue_t *q, struct request *req)

2468

{

2472

{

2469

struct request_list *rl = req->rl;

2473

struct request_list *rl = req->rl;

2470

2474

2471

if (unlikely(!q))

2475

if (unlikely(!q))

2472

return;

2476

return;

2473

if (unlikely(--req->ref_count))

2477

if (unlikely(--req->ref_count))

2474

return;

2478

return;

2475

2479

2476

elv_completed_request(q, req);

2480

elv_completed_request(q, req);

2477

2481

2478

req->rq_status = RQ_INACTIVE;

2482

req->rq_status = RQ_INACTIVE;

2479

req->rl = NULL;

2483

req->rl = NULL;

2480

2484

2481

/*

2485

/*

2482

* Request may not have originated from ll_rw_blk. if not,

2486

* Request may not have originated from ll_rw_blk. if not,

2483

* it didn't come out of our reserved rq pools

2487

* it didn't come out of our reserved rq pools

2484

*/

2488

*/

2485

if (rl) {

2489

if (rl) {

2486

int rw = rq_data_dir(req);

2490

int rw = rq_data_dir(req);

2487

int priv = req->flags & REQ_ELVPRIV;

2491

int priv = req->flags & REQ_ELVPRIV;

2488

2492

2489

BUG_ON(!list_empty(&req->queuelist));

2493

BUG_ON(!list_empty(&req->queuelist));

2490

2494

2491

blk_free_request(q, req);

2495

blk_free_request(q, req);

2492

freed_request(q, rw, priv);

2496

freed_request(q, rw, priv);

2493

}

2497

}

2494

}

2498

}

2495

2499

2496

EXPORT_SYMBOL_GPL(__blk_put_request);

2500

EXPORT_SYMBOL_GPL(__blk_put_request);

2497

2501

2498

void blk_put_request(struct request *req)

2502

void blk_put_request(struct request *req)

2499

{

2503

{

2500

unsigned long flags;

2504

unsigned long flags;

2501

request_queue_t *q = req->q;

2505

request_queue_t *q = req->q;

2502

2506

2503

/*

2507

/*

2504

* Gee, IDE calls in w/ NULL q. Fix IDE and remove the

2508

* Gee, IDE calls in w/ NULL q. Fix IDE and remove the

2505

* following if (q) test.

2509

* following if (q) test.

2506

*/

2510

*/

2507

if (q) {

2511

if (q) {

2508

spin_lock_irqsave(q->queue_lock, flags);

2512

spin_lock_irqsave(q->queue_lock, flags);

2509

__blk_put_request(q, req);

2513

__blk_put_request(q, req);

2510

spin_unlock_irqrestore(q->queue_lock, flags);

2514

spin_unlock_irqrestore(q->queue_lock, flags);

2511

}

2515

}

2512

}

2516

}

2513

2517

2514

EXPORT_SYMBOL(blk_put_request);

2518

EXPORT_SYMBOL(blk_put_request);

2515

2519

2516

/**

2520

/**

2517

* blk_end_sync_rq - executes a completion event on a request

2521

* blk_end_sync_rq - executes a completion event on a request

2518

* @rq: request to complete

2522

* @rq: request to complete

2519

*/

2523

*/

2520

void blk_end_sync_rq(struct request *rq)

2524

void blk_end_sync_rq(struct request *rq)

2521

{

2525

{

2522

struct completion *waiting = rq->waiting;

2526

struct completion *waiting = rq->waiting;

2523

2527

2524

rq->waiting = NULL;

2528

rq->waiting = NULL;

2525

__blk_put_request(rq->q, rq);

2529

__blk_put_request(rq->q, rq);

2526

2530

2527

/*

2531

/*

2528

* complete last, if this is a stack request the process (and thus

2532

* complete last, if this is a stack request the process (and thus

2529

* the rq pointer) could be invalid right after this complete()

2533

* the rq pointer) could be invalid right after this complete()

2530

*/

2534

*/

2531

complete(waiting);

2535

complete(waiting);

2532

}

2536

}

2533

EXPORT_SYMBOL(blk_end_sync_rq);

2537

EXPORT_SYMBOL(blk_end_sync_rq);

2534

2538

2535

/**

2539

/**

2536

* blk_congestion_wait - wait for a queue to become uncongested

2540

* blk_congestion_wait - wait for a queue to become uncongested

2537

* @rw: READ or WRITE

2541

* @rw: READ or WRITE

2538

* @timeout: timeout in jiffies

2542

* @timeout: timeout in jiffies

2539

*

2543

*

2540

* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.

2544

* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.

2541

* If no queues are congested then just wait for the next request to be

2545

* If no queues are congested then just wait for the next request to be

2542

* returned.

2546

* returned.

2543

*/

2547

*/

2544

long blk_congestion_wait(int rw, long timeout)

2548

long blk_congestion_wait(int rw, long timeout)

2545

{

2549

{

2546

long ret;

2550

long ret;

2547

DEFINE_WAIT(wait);

2551

DEFINE_WAIT(wait);

2548

wait_queue_head_t *wqh = &congestion_wqh[rw];

2552

wait_queue_head_t *wqh = &congestion_wqh[rw];

2549

2553

2550

prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);

2554

prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);

2551

ret = io_schedule_timeout(timeout);

2555

ret = io_schedule_timeout(timeout);

2552

finish_wait(wqh, &wait);

2556

finish_wait(wqh, &wait);

2553

return ret;

2557

return ret;

2554

}

2558

}

2555

2559

2556

EXPORT_SYMBOL(blk_congestion_wait);

2560

EXPORT_SYMBOL(blk_congestion_wait);

2557

2561

2558

/*

2562

/*

2559

* Has to be called with the request spinlock acquired

2563

* Has to be called with the request spinlock acquired

2560

*/

2564

*/

2561

static int attempt_merge(request_queue_t *q, struct request *req,

2565

static int attempt_merge(request_queue_t *q, struct request *req,

2562

struct request *next)

2566

struct request *next)

2563

{

2567

{

2564

if (!rq_mergeable(req) || !rq_mergeable(next))

2568

if (!rq_mergeable(req) || !rq_mergeable(next))

2565

return 0;

2569

return 0;

2566

2570

2567

/*

2571

/*

2568

* not contigious

2572

* not contigious

2569

*/

2573

*/

2570

if (req->sector + req->nr_sectors != next->sector)

2574

if (req->sector + req->nr_sectors != next->sector)

2571

return 0;

2575

return 0;

2572

2576

2573

if (rq_data_dir(req) != rq_data_dir(next)

2577

if (rq_data_dir(req) != rq_data_dir(next)

2574

|| req->rq_disk != next->rq_disk

2578

|| req->rq_disk != next->rq_disk

2575

|| next->waiting || next->special)

2579

|| next->waiting || next->special)

2576

return 0;

2580

return 0;

2577

2581

2578

/*

2582

/*

2579

* If we are allowed to merge, then append bio list

2583

* If we are allowed to merge, then append bio list

2580

* from next to rq and release next. merge_requests_fn

2584

* from next to rq and release next. merge_requests_fn

2581

* will have updated segment counts, update sector

2585

* will have updated segment counts, update sector

2582

* counts here.

2586

* counts here.

2583

*/

2587

*/

2584

if (!q->merge_requests_fn(q, req, next))

2588

if (!q->merge_requests_fn(q, req, next))

2585

return 0;

2589

return 0;

2586

2590

2587

/*

2591

/*

2588

* At this point we have either done a back merge

2592

* At this point we have either done a back merge

2589

* or front merge. We need the smaller start_time of

2593

* or front merge. We need the smaller start_time of

2590

* the merged requests to be the current request

2594

* the merged requests to be the current request

2591

* for accounting purposes.

2595

* for accounting purposes.

2592

*/

2596

*/

2593

if (time_after(req->start_time, next->start_time))

2597

if (time_after(req->start_time, next->start_time))

2594

req->start_time = next->start_time;

2598

req->start_time = next->start_time;

2595

2599

2596

req->biotail->bi_next = next->bio;

2600

req->biotail->bi_next = next->bio;

2597

req->biotail = next->biotail;

2601

req->biotail = next->biotail;

2598

2602

2599

req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;

2603

req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;

2600

2604

2601

elv_merge_requests(q, req, next);

2605

elv_merge_requests(q, req, next);

2602

2606

2603

if (req->rq_disk) {

2607

if (req->rq_disk) {

2604

disk_round_stats(req->rq_disk);

2608

disk_round_stats(req->rq_disk);

2605

req->rq_disk->in_flight--;

2609

req->rq_disk->in_flight--;

2606

}

2610

}

2607

2611

2608

req->ioprio = ioprio_best(req->ioprio, next->ioprio);

2612

req->ioprio = ioprio_best(req->ioprio, next->ioprio);

2609

2613

2610

__blk_put_request(q, next);

2614

__blk_put_request(q, next);

2611

return 1;

2615

return 1;

2612

}

2616

}

2613

2617

2614

static inline int attempt_back_merge(request_queue_t *q, struct request *rq)

2618

static inline int attempt_back_merge(request_queue_t *q, struct request *rq)

2615

{

2619

{

2616

struct request *next = elv_latter_request(q, rq);

2620

struct request *next = elv_latter_request(q, rq);

2617

2621

2618

if (next)

2622

if (next)

2619

return attempt_merge(q, rq, next);

2623

return attempt_merge(q, rq, next);

2620

2624

2621

return 0;

2625

return 0;

2622

}

2626

}

2623

2627

2624

static inline int attempt_front_merge(request_queue_t *q, struct request *rq)

2628

static inline int attempt_front_merge(request_queue_t *q, struct request *rq)

2625

{

2629

{

2626

struct request *prev = elv_former_request(q, rq);

2630

struct request *prev = elv_former_request(q, rq);

2627

2631

2628

if (prev)

2632

if (prev)

2629

return attempt_merge(q, prev, rq);

2633

return attempt_merge(q, prev, rq);

2630

2634

2631

return 0;

2635

return 0;

2632

}

2636

}

2633

2637

2634

/**

2638

/**

2635

* blk_attempt_remerge - attempt to remerge active head with next request

2639

* blk_attempt_remerge - attempt to remerge active head with next request

2636

* @q: The &request_queue_t belonging to the device

2640

* @q: The &request_queue_t belonging to the device

2637

* @rq: The head request (usually)

2641

* @rq: The head request (usually)

2638

*

2642

*

2639

* Description:

2643

* Description:

2640

* For head-active devices, the queue can easily be unplugged so quickly

2644

* For head-active devices, the queue can easily be unplugged so quickly

2641

* that proper merging is not done on the front request. This may hurt

2645

* that proper merging is not done on the front request. This may hurt

2642

* performance greatly for some devices. The block layer cannot safely

2646

* performance greatly for some devices. The block layer cannot safely

2643

* do merging on that first request for these queues, but the driver can

2647

* do merging on that first request for these queues, but the driver can

2644

* call this function and make it happen any way. Only the driver knows

2648

* call this function and make it happen any way. Only the driver knows

2645

* when it is safe to do so.

2649

* when it is safe to do so.

2646

**/

2650

**/

2647

void blk_attempt_remerge(request_queue_t *q, struct request *rq)

2651

void blk_attempt_remerge(request_queue_t *q, struct request *rq)

2648

{

2652

{

2649

unsigned long flags;

2653

unsigned long flags;

2650

2654

2651

spin_lock_irqsave(q->queue_lock, flags);

2655

spin_lock_irqsave(q->queue_lock, flags);

2652

attempt_back_merge(q, rq);

2656

attempt_back_merge(q, rq);

2653

spin_unlock_irqrestore(q->queue_lock, flags);

2657

spin_unlock_irqrestore(q->queue_lock, flags);

2654

}

2658

}

2655

2659

2656

EXPORT_SYMBOL(blk_attempt_remerge);

2660

EXPORT_SYMBOL(blk_attempt_remerge);

2657

2661

2658

static int __make_request(request_queue_t *q, struct bio *bio)

2662

static int __make_request(request_queue_t *q, struct bio *bio)

2659

{

2663

{

2660

struct request *req;

2664

struct request *req;

2661

int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;

2665

int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;

2662

unsigned short prio;

2666

unsigned short prio;

2663

sector_t sector;

2667

sector_t sector;

2664

2668

2665

sector = bio->bi_sector;

2669

sector = bio->bi_sector;

2666

nr_sectors = bio_sectors(bio);

2670

nr_sectors = bio_sectors(bio);

2667

cur_nr_sectors = bio_cur_sectors(bio);

2671

cur_nr_sectors = bio_cur_sectors(bio);

2668

prio = bio_prio(bio);

2672

prio = bio_prio(bio);

2669

2673

2670

rw = bio_data_dir(bio);

2674

rw = bio_data_dir(bio);

2671

sync = bio_sync(bio);

2675

sync = bio_sync(bio);

2672

2676

2673

/*

2677

/*

2674

* low level driver can indicate that it wants pages above a

2678

* low level driver can indicate that it wants pages above a

2675

* certain limit bounced to low memory (ie for highmem, or even

2679

* certain limit bounced to low memory (ie for highmem, or even

2676

* ISA dma in theory)

2680

* ISA dma in theory)

2677

*/

2681

*/

2678

blk_queue_bounce(q, &bio);

2682

blk_queue_bounce(q, &bio);

2679

2683

2680

spin_lock_prefetch(q->queue_lock);

2684

spin_lock_prefetch(q->queue_lock);

2681

2685

2682

barrier = bio_barrier(bio);

2686

barrier = bio_barrier(bio);

2683

if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {

2687

if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {

2684

err = -EOPNOTSUPP;

2688

err = -EOPNOTSUPP;

2685

goto end_io;

2689

goto end_io;

2686

}

2690

}

2687

2691

2688

spin_lock_irq(q->queue_lock);

2692

spin_lock_irq(q->queue_lock);

2689

2693

2690

if (unlikely(barrier) || elv_queue_empty(q))

2694

if (unlikely(barrier) || elv_queue_empty(q))

2691

goto get_rq;

2695

goto get_rq;

2692

2696

2693

el_ret = elv_merge(q, &req, bio);

2697

el_ret = elv_merge(q, &req, bio);

2694

switch (el_ret) {

2698

switch (el_ret) {

2695

case ELEVATOR_BACK_MERGE:

2699

case ELEVATOR_BACK_MERGE:

2696

BUG_ON(!rq_mergeable(req));

2700

BUG_ON(!rq_mergeable(req));

2697

2701

2698

if (!q->back_merge_fn(q, req, bio))

2702

if (!q->back_merge_fn(q, req, bio))

2699

break;

2703

break;

2700

2704

2701

req->biotail->bi_next = bio;

2705

req->biotail->bi_next = bio;

2702

req->biotail = bio;

2706

req->biotail = bio;

2703

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

2707

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

2704

req->ioprio = ioprio_best(req->ioprio, prio);

2708

req->ioprio = ioprio_best(req->ioprio, prio);

2705

drive_stat_acct(req, nr_sectors, 0);

2709

drive_stat_acct(req, nr_sectors, 0);

2706

if (!attempt_back_merge(q, req))

2710

if (!attempt_back_merge(q, req))

2707

elv_merged_request(q, req);

2711

elv_merged_request(q, req);

2708

goto out;

2712

goto out;

2709

2713

2710

case ELEVATOR_FRONT_MERGE:

2714

case ELEVATOR_FRONT_MERGE:

2711

BUG_ON(!rq_mergeable(req));

2715

BUG_ON(!rq_mergeable(req));

2712

2716

2713

if (!q->front_merge_fn(q, req, bio))

2717

if (!q->front_merge_fn(q, req, bio))

2714

break;

2718

break;

2715

2719

2716

bio->bi_next = req->bio;

2720

bio->bi_next = req->bio;

2717

req->bio = bio;

2721

req->bio = bio;

2718

2722

2719

/*

2723

/*

2720

* may not be valid. if the low level driver said

2724

* may not be valid. if the low level driver said

2721

* it didn't need a bounce buffer then it better

2725

* it didn't need a bounce buffer then it better

2722

* not touch req->buffer either...

2726

* not touch req->buffer either...

2723

*/

2727

*/

2724

req->buffer = bio_data(bio);

2728

req->buffer = bio_data(bio);

2725

req->current_nr_sectors = cur_nr_sectors;

2729

req->current_nr_sectors = cur_nr_sectors;

2726

req->hard_cur_sectors = cur_nr_sectors;

2730

req->hard_cur_sectors = cur_nr_sectors;

2727

req->sector = req->hard_sector = sector;

2731

req->sector = req->hard_sector = sector;

2728

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

2732

req->nr_sectors = req->hard_nr_sectors += nr_sectors;

2729

req->ioprio = ioprio_best(req->ioprio, prio);

2733

req->ioprio = ioprio_best(req->ioprio, prio);

2730

drive_stat_acct(req, nr_sectors, 0);

2734

drive_stat_acct(req, nr_sectors, 0);

2731

if (!attempt_front_merge(q, req))

2735

if (!attempt_front_merge(q, req))

2732

elv_merged_request(q, req);

2736

elv_merged_request(q, req);

2733

goto out;

2737

goto out;

2734

2738

2735

/* ELV_NO_MERGE: elevator says don't/can't merge. */

2739

/* ELV_NO_MERGE: elevator says don't/can't merge. */

2736

default:

2740

default:

2737

;

2741

;

2738

}

2742

}

2739

2743

2740

get_rq:

2744

get_rq:

2741

/*

2745

/*

2742

* Grab a free request. This is might sleep but can not fail.

2746

* Grab a free request. This is might sleep but can not fail.

2743

* Returns with the queue unlocked.

2747

* Returns with the queue unlocked.

2744

*/

2748

*/

2745

req = get_request_wait(q, rw, bio);

2749

req = get_request_wait(q, rw, bio);

2746

2750

2747

/*

2751

/*

2748

* After dropping the lock and possibly sleeping here, our request

2752

* After dropping the lock and possibly sleeping here, our request

2749

* may now be mergeable after it had proven unmergeable (above).

2753

* may now be mergeable after it had proven unmergeable (above).

2750

* We don't worry about that case for efficiency. It won't happen

2754

* We don't worry about that case for efficiency. It won't happen

2751

* often, and the elevators are able to handle it.

2755

* often, and the elevators are able to handle it.

2752

*/

2756

*/

2753

2757

2754

req->flags |= REQ_CMD;

2758

req->flags |= REQ_CMD;

2755

2759

2756

/*

2760

/*

2757

* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)

2761

* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)

2758

*/

2762

*/

2759

if (bio_rw_ahead(bio) || bio_failfast(bio))

2763

if (bio_rw_ahead(bio) || bio_failfast(bio))

2760

req->flags |= REQ_FAILFAST;

2764

req->flags |= REQ_FAILFAST;

2761

2765

2762

/*

2766

/*

2763

* REQ_BARRIER implies no merging, but lets make it explicit

2767

* REQ_BARRIER implies no merging, but lets make it explicit

2764

*/

2768

*/

2765

if (unlikely(barrier))

2769

if (unlikely(barrier))

2766

req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);

2770

req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);

2767

2771

2768

req->errors = 0;

2772

req->errors = 0;

2769

req->hard_sector = req->sector = sector;

2773

req->hard_sector = req->sector = sector;

2770

req->hard_nr_sectors = req->nr_sectors = nr_sectors;

2774

req->hard_nr_sectors = req->nr_sectors = nr_sectors;

2771

req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;

2775

req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;

2772

req->nr_phys_segments = bio_phys_segments(q, bio);

2776

req->nr_phys_segments = bio_phys_segments(q, bio);

2773

req->nr_hw_segments = bio_hw_segments(q, bio);

2777

req->nr_hw_segments = bio_hw_segments(q, bio);

2774

req->buffer = bio_data(bio); /* see ->buffer comment above */

2778

req->buffer = bio_data(bio); /* see ->buffer comment above */

2775

req->waiting = NULL;

2779

req->waiting = NULL;

2776

req->bio = req->biotail = bio;

2780

req->bio = req->biotail = bio;

2777

req->ioprio = prio;

2781

req->ioprio = prio;

2778

req->rq_disk = bio->bi_bdev->bd_disk;

2782

req->rq_disk = bio->bi_bdev->bd_disk;

2779

req->start_time = jiffies;

2783

req->start_time = jiffies;

2780

2784

2781

spin_lock_irq(q->queue_lock);

2785

spin_lock_irq(q->queue_lock);

2782

if (elv_queue_empty(q))

2786

if (elv_queue_empty(q))

2783

blk_plug_device(q);

2787

blk_plug_device(q);

2784

add_request(q, req);

2788

add_request(q, req);

2785

out:

2789

out:

2786

if (sync)

2790

if (sync)

2787

__generic_unplug_device(q);

2791

__generic_unplug_device(q);

2788

2792

2789

spin_unlock_irq(q->queue_lock);

2793

spin_unlock_irq(q->queue_lock);

2790

return 0;

2794

return 0;

2791

2795

2792

end_io:

2796

end_io:

2793

bio_endio(bio, nr_sectors << 9, err);

2797

bio_endio(bio, nr_sectors << 9, err);

2794

return 0;

2798

return 0;

2795

}

2799

}

2796

2800

2797

/*

2801

/*

2798

* If bio->bi_dev is a partition, remap the location

2802

* If bio->bi_dev is a partition, remap the location

2799

*/

2803

*/

2800

static inline void blk_partition_remap(struct bio *bio)

2804

static inline void blk_partition_remap(struct bio *bio)

2801

{

2805

{

2802

struct block_device *bdev = bio->bi_bdev;

2806

struct block_device *bdev = bio->bi_bdev;

2803

2807

2804

if (bdev != bdev->bd_contains) {

2808

if (bdev != bdev->bd_contains) {

2805

struct hd_struct *p = bdev->bd_part;

2809

struct hd_struct *p = bdev->bd_part;

2806

const int rw = bio_data_dir(bio);

2810

const int rw = bio_data_dir(bio);

2807

2811

2808

p->sectors[rw] += bio_sectors(bio);

2812

p->sectors[rw] += bio_sectors(bio);

2809

p->ios[rw]++;

2813

p->ios[rw]++;

2810

2814

2811

bio->bi_sector += p->start_sect;

2815

bio->bi_sector += p->start_sect;

2812

bio->bi_bdev = bdev->bd_contains;

2816

bio->bi_bdev = bdev->bd_contains;

2813

}

2817

}

2814

}

2818

}

2815

2819

2816

static void handle_bad_sector(struct bio *bio)

2820

static void handle_bad_sector(struct bio *bio)

2817

{

2821

{

2818

char b[BDEVNAME_SIZE];

2822

char b[BDEVNAME_SIZE];

2819

2823

2820

printk(KERN_INFO "attempt to access beyond end of device\n");

2824

printk(KERN_INFO "attempt to access beyond end of device\n");

2821

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

2825

printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",

2822

bdevname(bio->bi_bdev, b),

2826

bdevname(bio->bi_bdev, b),

2823

bio->bi_rw,

2827

bio->bi_rw,

2824

(unsigned long long)bio->bi_sector + bio_sectors(bio),

2828

(unsigned long long)bio->bi_sector + bio_sectors(bio),

2825

(long long)(bio->bi_bdev->bd_inode->i_size >> 9));

2829

(long long)(bio->bi_bdev->bd_inode->i_size >> 9));

2826

2830

2827

set_bit(BIO_EOF, &bio->bi_flags);

2831

set_bit(BIO_EOF, &bio->bi_flags);

2828

}

2832

}

2829

2833

2830

/**

2834

/**

2831

* generic_make_request: hand a buffer to its device driver for I/O

2835

* generic_make_request: hand a buffer to its device driver for I/O

2832

* @bio: The bio describing the location in memory and on the device.

2836

* @bio: The bio describing the location in memory and on the device.

2833

*

2837

*

2834

* generic_make_request() is used to make I/O requests of block

2838

* generic_make_request() is used to make I/O requests of block

2835

* devices. It is passed a &struct bio, which describes the I/O that needs

2839

* devices. It is passed a &struct bio, which describes the I/O that needs

2836

* to be done.

2840

* to be done.

2837

*

2841

*

2838

* generic_make_request() does not return any status. The

2842

* generic_make_request() does not return any status. The

2839

* success/failure status of the request, along with notification of

2843

* success/failure status of the request, along with notification of

2840

* completion, is delivered asynchronously through the bio->bi_end_io

2844

* completion, is delivered asynchronously through the bio->bi_end_io

2841

* function described (one day) else where.

2845

* function described (one day) else where.

2842

*

2846

*

2843

* The caller of generic_make_request must make sure that bi_io_vec

2847

* The caller of generic_make_request must make sure that bi_io_vec

2844

* are set to describe the memory buffer, and that bi_dev and bi_sector are

2848

* are set to describe the memory buffer, and that bi_dev and bi_sector are

2845

* set to describe the device address, and the

2849

* set to describe the device address, and the

2846

* bi_end_io and optionally bi_private are set to describe how

2850

* bi_end_io and optionally bi_private are set to describe how

2847

* completion notification should be signaled.

2851

* completion notification should be signaled.

2848

*

2852

*

2849

* generic_make_request and the drivers it calls may use bi_next if this

2853

* generic_make_request and the drivers it calls may use bi_next if this

2850

* bio happens to be merged with someone else, and may change bi_dev and

2854

* bio happens to be merged with someone else, and may change bi_dev and

2851

* bi_sector for remaps as it sees fit. So the values of these fields

2855

* bi_sector for remaps as it sees fit. So the values of these fields

2852

* should NOT be depended on after the call to generic_make_request.

2856

* should NOT be depended on after the call to generic_make_request.

2853

*/

2857

*/

2854

void generic_make_request(struct bio *bio)

2858

void generic_make_request(struct bio *bio)

2855

{

2859

{

2856

request_queue_t *q;

2860

request_queue_t *q;

2857

sector_t maxsector;

2861

sector_t maxsector;

2858

int ret, nr_sectors = bio_sectors(bio);

2862

int ret, nr_sectors = bio_sectors(bio);

2859

2863

2860

might_sleep();

2864

might_sleep();

2861

/* Test device or partition size, when known. */

2865

/* Test device or partition size, when known. */

2862

maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

2866

maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

2863

if (maxsector) {

2867

if (maxsector) {

2864

sector_t sector = bio->bi_sector;

2868

sector_t sector = bio->bi_sector;

2865

2869

2866

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

2870

if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {

2867

/*

2871

/*

2868

* This may well happen - the kernel calls bread()

2872

* This may well happen - the kernel calls bread()

2869

* without checking the size of the device, e.g., when

2873

* without checking the size of the device, e.g., when

2870

* mounting a device.

2874

* mounting a device.

2871

*/

2875

*/

2872

handle_bad_sector(bio);

2876

handle_bad_sector(bio);

2873

goto end_io;

2877

goto end_io;

2874

}

2878

}

2875

}

2879

}

2876

2880

2877

/*

2881

/*

2878

* Resolve the mapping until finished. (drivers are

2882

* Resolve the mapping until finished. (drivers are

2879

* still free to implement/resolve their own stacking

2883

* still free to implement/resolve their own stacking

2880

* by explicitly returning 0)

2884

* by explicitly returning 0)

2881

*

2885

*

2882

* NOTE: we don't repeat the blk_size check for each new device.

2886

* NOTE: we don't repeat the blk_size check for each new device.

2883

* Stacking drivers are expected to know what they are doing.

2887

* Stacking drivers are expected to know what they are doing.

2884

*/

2888

*/

2885

do {

2889

do {

2886

char b[BDEVNAME_SIZE];

2890

char b[BDEVNAME_SIZE];

2887

2891

2888

q = bdev_get_queue(bio->bi_bdev);

2892

q = bdev_get_queue(bio->bi_bdev);

2889

if (!q) {

2893

if (!q) {

2890

printk(KERN_ERR

2894

printk(KERN_ERR

2891

"generic_make_request: Trying to access "

2895

"generic_make_request: Trying to access "

2892

"nonexistent block-device %s (%Lu)\n",

2896

"nonexistent block-device %s (%Lu)\n",

2893

bdevname(bio->bi_bdev, b),

2897

bdevname(bio->bi_bdev, b),

2894

(long long) bio->bi_sector);

2898

(long long) bio->bi_sector);

2895

end_io:

2899

end_io:

2896

bio_endio(bio, bio->bi_size, -EIO);

2900

bio_endio(bio, bio->bi_size, -EIO);

2897

break;

2901

break;

2898

}

2902

}

2899

2903

2900

if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {

2904

if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {

2901

printk("bio too big device %s (%u > %u)\n",

2905

printk("bio too big device %s (%u > %u)\n",

2902

bdevname(bio->bi_bdev, b),

2906

bdevname(bio->bi_bdev, b),

2903

bio_sectors(bio),

2907

bio_sectors(bio),

2904

q->max_hw_sectors);

2908

q->max_hw_sectors);

2905

goto end_io;

2909

goto end_io;

2906

}

2910

}

2907

2911

2908

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

2912

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))

2909

goto end_io;

2913

goto end_io;

2910

2914

2911

/*

2915

/*

2912

* If this device has partitions, remap block n

2916

* If this device has partitions, remap block n

2913

* of partition p to block n+start(p) of the disk.

2917

* of partition p to block n+start(p) of the disk.

2914

*/

2918

*/

2915

blk_partition_remap(bio);

2919

blk_partition_remap(bio);

2916

2920

2917

ret = q->make_request_fn(q, bio);

2921

ret = q->make_request_fn(q, bio);

2918

} while (ret);

2922

} while (ret);

2919

}

2923

}

2920

2924

2921

EXPORT_SYMBOL(generic_make_request);

2925

EXPORT_SYMBOL(generic_make_request);

2922

2926

2923

/**

2927

/**

2924

* submit_bio: submit a bio to the block device layer for I/O

2928

* submit_bio: submit a bio to the block device layer for I/O

2925

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

2929

* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)

2926

* @bio: The &struct bio which describes the I/O

2930

* @bio: The &struct bio which describes the I/O

2927

*

2931

*

2928

* submit_bio() is very similar in purpose to generic_make_request(), and

2932

* submit_bio() is very similar in purpose to generic_make_request(), and

2929

* uses that function to do most of the work. Both are fairly rough

2933

* uses that function to do most of the work. Both are fairly rough

2930

* interfaces, @bio must be presetup and ready for I/O.

2934

* interfaces, @bio must be presetup and ready for I/O.

2931

*

2935

*

2932

*/

2936

*/

2933

void submit_bio(int rw, struct bio *bio)

2937

void submit_bio(int rw, struct bio *bio)

2934

{

2938

{

2935

int count = bio_sectors(bio);

2939

int count = bio_sectors(bio);

2936

2940

2937

BIO_BUG_ON(!bio->bi_size);

2941

BIO_BUG_ON(!bio->bi_size);

2938

BIO_BUG_ON(!bio->bi_io_vec);

2942

BIO_BUG_ON(!bio->bi_io_vec);

2939

bio->bi_rw |= rw;

2943

bio->bi_rw |= rw;

2940

if (rw & WRITE)

2944

if (rw & WRITE)

2941

mod_page_state(pgpgout, count);

2945

mod_page_state(pgpgout, count);

2942

else

2946

else

2943

mod_page_state(pgpgin, count);

2947

mod_page_state(pgpgin, count);

2944

2948

2945

if (unlikely(block_dump)) {

2949

if (unlikely(block_dump)) {

2946

char b[BDEVNAME_SIZE];

2950

char b[BDEVNAME_SIZE];

2947

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",

2951

printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",

2948

current->comm, current->pid,

2952

current->comm, current->pid,

2949

(rw & WRITE) ? "WRITE" : "READ",

2953

(rw & WRITE) ? "WRITE" : "READ",

2950

(unsigned long long)bio->bi_sector,

2954

(unsigned long long)bio->bi_sector,

2951

bdevname(bio->bi_bdev,b));

2955

bdevname(bio->bi_bdev,b));

2952

}

2956

}

2953

2957

2954

generic_make_request(bio);

2958

generic_make_request(bio);

2955

}

2959

}

2956

2960

2957

EXPORT_SYMBOL(submit_bio);

2961

EXPORT_SYMBOL(submit_bio);

2958

2962

2959

static void blk_recalc_rq_segments(struct request *rq)

2963

static void blk_recalc_rq_segments(struct request *rq)

2960

{

2964

{

2961

struct bio *bio, *prevbio = NULL;

2965

struct bio *bio, *prevbio = NULL;

2962

int nr_phys_segs, nr_hw_segs;

2966

int nr_phys_segs, nr_hw_segs;

2963

unsigned int phys_size, hw_size;

2967

unsigned int phys_size, hw_size;

2964

request_queue_t *q = rq->q;

2968

request_queue_t *q = rq->q;

2965

2969

2966

if (!rq->bio)

2970

if (!rq->bio)

2967

return;

2971

return;

2968

2972

2969

phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;

2973

phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;

2970

rq_for_each_bio(bio, rq) {

2974

rq_for_each_bio(bio, rq) {

2971

/* Force bio hw/phys segs to be recalculated. */

2975

/* Force bio hw/phys segs to be recalculated. */

2972

bio->bi_flags &= ~(1 << BIO_SEG_VALID);

2976

bio->bi_flags &= ~(1 << BIO_SEG_VALID);

2973

2977

2974

nr_phys_segs += bio_phys_segments(q, bio);

2978

nr_phys_segs += bio_phys_segments(q, bio);

2975

nr_hw_segs += bio_hw_segments(q, bio);

2979

nr_hw_segs += bio_hw_segments(q, bio);

2976

if (prevbio) {

2980

if (prevbio) {

2977

int pseg = phys_size + prevbio->bi_size + bio->bi_size;

2981

int pseg = phys_size + prevbio->bi_size + bio->bi_size;

2978

int hseg = hw_size + prevbio->bi_size + bio->bi_size;

2982

int hseg = hw_size + prevbio->bi_size + bio->bi_size;

2979

2983

2980

if (blk_phys_contig_segment(q, prevbio, bio) &&

2984

if (blk_phys_contig_segment(q, prevbio, bio) &&

2981

pseg <= q->max_segment_size) {

2985

pseg <= q->max_segment_size) {

2982

nr_phys_segs--;

2986

nr_phys_segs--;

2983

phys_size += prevbio->bi_size + bio->bi_size;

2987

phys_size += prevbio->bi_size + bio->bi_size;

2984

} else

2988

} else

2985

phys_size = 0;

2989

phys_size = 0;

2986

2990

2987

if (blk_hw_contig_segment(q, prevbio, bio) &&

2991

if (blk_hw_contig_segment(q, prevbio, bio) &&

2988

hseg <= q->max_segment_size) {

2992

hseg <= q->max_segment_size) {

2989

nr_hw_segs--;

2993

nr_hw_segs--;

2990

hw_size += prevbio->bi_size + bio->bi_size;

2994

hw_size += prevbio->bi_size + bio->bi_size;

2991

} else

2995

} else

2992

hw_size = 0;

2996

hw_size = 0;

2993

}

2997

}

2994

prevbio = bio;

2998

prevbio = bio;

2995

}

2999

}

2996

3000

2997

rq->nr_phys_segments = nr_phys_segs;

3001

rq->nr_phys_segments = nr_phys_segs;

2998

rq->nr_hw_segments = nr_hw_segs;

3002

rq->nr_hw_segments = nr_hw_segs;

2999

}

3003

}

3000

3004

3001

static void blk_recalc_rq_sectors(struct request *rq, int nsect)

3005

static void blk_recalc_rq_sectors(struct request *rq, int nsect)

3002

{

3006

{

3003

if (blk_fs_request(rq)) {

3007

if (blk_fs_request(rq)) {

3004

rq->hard_sector += nsect;

3008

rq->hard_sector += nsect;

3005

rq->hard_nr_sectors -= nsect;

3009

rq->hard_nr_sectors -= nsect;

3006

3010

3007

/*

3011

/*

3008

* Move the I/O submission pointers ahead if required.

3012

* Move the I/O submission pointers ahead if required.

3009

*/

3013

*/

3010

if ((rq->nr_sectors >= rq->hard_nr_sectors) &&

3014

if ((rq->nr_sectors >= rq->hard_nr_sectors) &&

3011

(rq->sector <= rq->hard_sector)) {

3015

(rq->sector <= rq->hard_sector)) {

3012

rq->sector = rq->hard_sector;

3016

rq->sector = rq->hard_sector;

3013

rq->nr_sectors = rq->hard_nr_sectors;

3017

rq->nr_sectors = rq->hard_nr_sectors;

3014

rq->hard_cur_sectors = bio_cur_sectors(rq->bio);

3018

rq->hard_cur_sectors = bio_cur_sectors(rq->bio);

3015

rq->current_nr_sectors = rq->hard_cur_sectors;

3019

rq->current_nr_sectors = rq->hard_cur_sectors;

3016

rq->buffer = bio_data(rq->bio);

3020

rq->buffer = bio_data(rq->bio);

3017

}

3021

}

3018

3022

3019

/*

3023

/*

3020

* if total number of sectors is less than the first segment

3024

* if total number of sectors is less than the first segment

3021

* size, something has gone terribly wrong

3025

* size, something has gone terribly wrong

3022

*/

3026

*/

3023

if (rq->nr_sectors < rq->current_nr_sectors) {

3027

if (rq->nr_sectors < rq->current_nr_sectors) {

3024

printk("blk: request botched\n");

3028

printk("blk: request botched\n");

3025

rq->nr_sectors = rq->current_nr_sectors;

3029

rq->nr_sectors = rq->current_nr_sectors;

3026

}

3030

}

3027

}

3031

}

3028

}

3032

}

3029

3033

3030

static int __end_that_request_first(struct request *req, int uptodate,

3034

static int __end_that_request_first(struct request *req, int uptodate,

3031

int nr_bytes)

3035

int nr_bytes)

3032

{

3036

{

3033

int total_bytes, bio_nbytes, error, next_idx = 0;

3037

int total_bytes, bio_nbytes, error, next_idx = 0;

3034

struct bio *bio;

3038

struct bio *bio;

3035

3039

3036

/*

3040

/*

3037

* extend uptodate bool to allow < 0 value to be direct io error

3041

* extend uptodate bool to allow < 0 value to be direct io error

3038

*/

3042

*/

3039

error = 0;

3043

error = 0;

3040

if (end_io_error(uptodate))

3044

if (end_io_error(uptodate))

3041

error = !uptodate ? -EIO : uptodate;

3045

error = !uptodate ? -EIO : uptodate;

3042

3046

3043

/*

3047

/*

3044

* for a REQ_BLOCK_PC request, we want to carry any eventual

3048

* for a REQ_BLOCK_PC request, we want to carry any eventual

3045

* sense key with us all the way through

3049

* sense key with us all the way through

3046

*/

3050

*/

3047

if (!blk_pc_request(req))

3051

if (!blk_pc_request(req))

3048

req->errors = 0;

3052

req->errors = 0;

3049

3053

3050

if (!uptodate) {

3054

if (!uptodate) {

3051

if (blk_fs_request(req) && !(req->flags & REQ_QUIET))

3055

if (blk_fs_request(req) && !(req->flags & REQ_QUIET))

3052

printk("end_request: I/O error, dev %s, sector %llu\n",

3056

printk("end_request: I/O error, dev %s, sector %llu\n",

3053

req->rq_disk ? req->rq_disk->disk_name : "?",

3057

req->rq_disk ? req->rq_disk->disk_name : "?",

3054

(unsigned long long)req->sector);

3058

(unsigned long long)req->sector);

3055

}

3059

}

3056

3060

3057

if (blk_fs_request(req) && req->rq_disk) {

3061

if (blk_fs_request(req) && req->rq_disk) {

3058

const int rw = rq_data_dir(req);

3062

const int rw = rq_data_dir(req);

3059

3063

3060

__disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);

3064

__disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);

3061

}

3065

}

3062

3066

3063

total_bytes = bio_nbytes = 0;

3067

total_bytes = bio_nbytes = 0;

3064

while ((bio = req->bio) != NULL) {

3068

while ((bio = req->bio) != NULL) {

3065

int nbytes;

3069

int nbytes;

3066

3070

3067

if (nr_bytes >= bio->bi_size) {

3071

if (nr_bytes >= bio->bi_size) {

3068

req->bio = bio->bi_next;

3072

req->bio = bio->bi_next;

3069

nbytes = bio->bi_size;

3073

nbytes = bio->bi_size;

3070

bio_endio(bio, nbytes, error);

3074

bio_endio(bio, nbytes, error);

3071

next_idx = 0;

3075

next_idx = 0;

3072

bio_nbytes = 0;

3076

bio_nbytes = 0;

3073

} else {

3077

} else {

3074

int idx = bio->bi_idx + next_idx;

3078

int idx = bio->bi_idx + next_idx;

3075

3079

3076

if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {

3080

if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {

3077

blk_dump_rq_flags(req, "__end_that");

3081

blk_dump_rq_flags(req, "__end_that");

3078

printk("%s: bio idx %d >= vcnt %d\n",

3082

printk("%s: bio idx %d >= vcnt %d\n",

3079

__FUNCTION__,

3083

__FUNCTION__,

3080

bio->bi_idx, bio->bi_vcnt);

3084

bio->bi_idx, bio->bi_vcnt);

3081

break;

3085

break;

3082

}

3086

}

3083

3087

3084

nbytes = bio_iovec_idx(bio, idx)->bv_len;

3088

nbytes = bio_iovec_idx(bio, idx)->bv_len;

3085

BIO_BUG_ON(nbytes > bio->bi_size);

3089

BIO_BUG_ON(nbytes > bio->bi_size);

3086

3090

3087

/*

3091

/*

3088

* not a complete bvec done

3092

* not a complete bvec done

3089

*/

3093

*/

3090

if (unlikely(nbytes > nr_bytes)) {

3094

if (unlikely(nbytes > nr_bytes)) {

3091

bio_nbytes += nr_bytes;

3095

bio_nbytes += nr_bytes;

3092

total_bytes += nr_bytes;

3096

total_bytes += nr_bytes;

3093

break;

3097

break;

3094

}

3098

}

3095

3099

3096

/*

3100

/*

3097

* advance to the next vector

3101

* advance to the next vector

3098

*/

3102

*/

3099

next_idx++;

3103

next_idx++;

3100

bio_nbytes += nbytes;

3104

bio_nbytes += nbytes;

3101

}

3105

}

3102

3106

3103

total_bytes += nbytes;

3107

total_bytes += nbytes;

3104

nr_bytes -= nbytes;

3108

nr_bytes -= nbytes;

3105

3109

3106

if ((bio = req->bio)) {

3110

if ((bio = req->bio)) {

3107

/*

3111

/*

3108

* end more in this run, or just return 'not-done'

3112

* end more in this run, or just return 'not-done'

3109

*/

3113

*/

3110

if (unlikely(nr_bytes <= 0))

3114

if (unlikely(nr_bytes <= 0))

3111

break;

3115

break;

3112

}

3116

}

3113

}

3117

}

3114

3118

3115

/*

3119

/*

3116

* completely done

3120

* completely done

3117

*/

3121

*/

3118

if (!req->bio)

3122

if (!req->bio)

3119

return 0;

3123

return 0;

3120

3124

3121

/*

3125

/*

3122

* if the request wasn't completed, update state

3126

* if the request wasn't completed, update state

3123

*/

3127

*/

3124

if (bio_nbytes) {

3128

if (bio_nbytes) {

3125

bio_endio(bio, bio_nbytes, error);

3129

bio_endio(bio, bio_nbytes, error);

3126

bio->bi_idx += next_idx;

3130

bio->bi_idx += next_idx;

3127

bio_iovec(bio)->bv_offset += nr_bytes;

3131

bio_iovec(bio)->bv_offset += nr_bytes;

3128

bio_iovec(bio)->bv_len -= nr_bytes;

3132

bio_iovec(bio)->bv_len -= nr_bytes;

3129

}

3133

}

3130

3134

3131

blk_recalc_rq_sectors(req, total_bytes >> 9);

3135

blk_recalc_rq_sectors(req, total_bytes >> 9);

3132

blk_recalc_rq_segments(req);

3136

blk_recalc_rq_segments(req);

3133

return 1;

3137

return 1;

3134

}

3138

}

3135

3139

3136

/**

3140

/**

3137

* end_that_request_first - end I/O on a request

3141

* end_that_request_first - end I/O on a request

3138

* @req: the request being processed

3142

* @req: the request being processed

3139

* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error

3143

* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error

3140

* @nr_sectors: number of sectors to end I/O on

3144

* @nr_sectors: number of sectors to end I/O on

3141

*

3145

*

3142

* Description:

3146

* Description:

3143

* Ends I/O on a number of sectors attached to @req, and sets it up

3147

* Ends I/O on a number of sectors attached to @req, and sets it up

3144

* for the next range of segments (if any) in the cluster.

3148

* for the next range of segments (if any) in the cluster.

3145

*

3149

*

3146

* Return:

3150

* Return:

3147

* 0 - we are done with this request, call end_that_request_last()

3151

* 0 - we are done with this request, call end_that_request_last()

3148

* 1 - still buffers pending for this request

3152

* 1 - still buffers pending for this request

3149

**/

3153

**/

3150

int end_that_request_first(struct request *req, int uptodate, int nr_sectors)

3154

int end_that_request_first(struct request *req, int uptodate, int nr_sectors)

3151

{

3155

{

3152

return __end_that_request_first(req, uptodate, nr_sectors << 9);

3156

return __end_that_request_first(req, uptodate, nr_sectors << 9);

3153

}

3157

}

3154

3158

3155

EXPORT_SYMBOL(end_that_request_first);

3159

EXPORT_SYMBOL(end_that_request_first);

3156

3160

3157

/**

3161

/**

3158

* end_that_request_chunk - end I/O on a request

3162

* end_that_request_chunk - end I/O on a request

3159

* @req: the request being processed

3163

* @req: the request being processed

3160

* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error

3164

* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error

3161

* @nr_bytes: number of bytes to complete

3165

* @nr_bytes: number of bytes to complete

3162

*

3166

*

3163

* Description:

3167

* Description:

3164

* Ends I/O on a number of bytes attached to @req, and sets it up

3168

* Ends I/O on a number of bytes attached to @req, and sets it up

3165

* for the next range of segments (if any). Like end_that_request_first(),

3169

* for the next range of segments (if any). Like end_that_request_first(),

3166

* but deals with bytes instead of sectors.

3170

* but deals with bytes instead of sectors.

3167

*

3171

*

3168

* Return:

3172

* Return:

3169

* 0 - we are done with this request, call end_that_request_last()

3173

* 0 - we are done with this request, call end_that_request_last()

3170

* 1 - still buffers pending for this request

3174

* 1 - still buffers pending for this request

3171

**/

3175

**/

3172

int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)

3176

int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)

3173

{

3177

{

3174

return __end_that_request_first(req, uptodate, nr_bytes);

3178

return __end_that_request_first(req, uptodate, nr_bytes);

3175

}

3179

}

3176

3180

3177

EXPORT_SYMBOL(end_that_request_chunk);

3181

EXPORT_SYMBOL(end_that_request_chunk);

3178

3182

3179

/*

3183

/*

3180

* queue lock must be held

3184

* queue lock must be held

3181

*/

3185

*/

3182

void end_that_request_last(struct request *req)

3186

void end_that_request_last(struct request *req)

3183

{

3187

{

3184

struct gendisk *disk = req->rq_disk;

3188

struct gendisk *disk = req->rq_disk;

3185

3189

3186

if (unlikely(laptop_mode) && blk_fs_request(req))

3190

if (unlikely(laptop_mode) && blk_fs_request(req))

3187

laptop_io_completion();

3191

laptop_io_completion();

3188

3192

3189

if (disk && blk_fs_request(req)) {

3193

if (disk && blk_fs_request(req)) {

3190

unsigned long duration = jiffies - req->start_time;

3194

unsigned long duration = jiffies - req->start_time;

3191

const int rw = rq_data_dir(req);

3195

const int rw = rq_data_dir(req);

3192

3196

3193

__disk_stat_inc(disk, ios[rw]);

3197

__disk_stat_inc(disk, ios[rw]);

3194

__disk_stat_add(disk, ticks[rw], duration);

3198

__disk_stat_add(disk, ticks[rw], duration);

3195

disk_round_stats(disk);

3199

disk_round_stats(disk);

3196

disk->in_flight--;

3200

disk->in_flight--;

3197

}

3201

}

3198

if (req->end_io)

3202

if (req->end_io)

3199

req->end_io(req);

3203

req->end_io(req);

3200

else

3204

else

3201

__blk_put_request(req->q, req);

3205

__blk_put_request(req->q, req);

3202

}

3206

}

3203

3207

3204

EXPORT_SYMBOL(end_that_request_last);

3208

EXPORT_SYMBOL(end_that_request_last);

3205

3209

3206

void end_request(struct request *req, int uptodate)

3210

void end_request(struct request *req, int uptodate)

3207

{

3211

{

3208

if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {

3212

if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {

3209

add_disk_randomness(req->rq_disk);

3213

add_disk_randomness(req->rq_disk);

3210

blkdev_dequeue_request(req);

3214

blkdev_dequeue_request(req);

3211

end_that_request_last(req);

3215

end_that_request_last(req);

3212

}

3216

}

3213

}

3217

}

3214

3218

3215

EXPORT_SYMBOL(end_request);

3219

EXPORT_SYMBOL(end_request);

3216

3220

3217

void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)

3221

void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)

3218

{

3222

{

3219

/* first three bits are identical in rq->flags and bio->bi_rw */

3223

/* first three bits are identical in rq->flags and bio->bi_rw */

3220

rq->flags |= (bio->bi_rw & 7);

3224

rq->flags |= (bio->bi_rw & 7);

3221

3225

3222

rq->nr_phys_segments = bio_phys_segments(q, bio);

3226

rq->nr_phys_segments = bio_phys_segments(q, bio);

3223

rq->nr_hw_segments = bio_hw_segments(q, bio);

3227

rq->nr_hw_segments = bio_hw_segments(q, bio);

3224

rq->current_nr_sectors = bio_cur_sectors(bio);

3228

rq->current_nr_sectors = bio_cur_sectors(bio);

3225

rq->hard_cur_sectors = rq->current_nr_sectors;

3229

rq->hard_cur_sectors = rq->current_nr_sectors;

3226

rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);

3230

rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);

3227

rq->buffer = bio_data(bio);

3231

rq->buffer = bio_data(bio);

3228

3232

3229

rq->bio = rq->biotail = bio;

3233

rq->bio = rq->biotail = bio;

3230

}

3234

}

3231

3235

3232

EXPORT_SYMBOL(blk_rq_bio_prep);

3236

EXPORT_SYMBOL(blk_rq_bio_prep);

3233

3237

3234

int kblockd_schedule_work(struct work_struct *work)

3238

int kblockd_schedule_work(struct work_struct *work)

3235

{

3239

{

3236

return queue_work(kblockd_workqueue, work);

3240

return queue_work(kblockd_workqueue, work);

3237

}

3241

}

3238

3242

3239

EXPORT_SYMBOL(kblockd_schedule_work);

3243

EXPORT_SYMBOL(kblockd_schedule_work);

3240

3244

3241

void kblockd_flush(void)

3245

void kblockd_flush(void)

3242

{

3246

{

3243

flush_workqueue(kblockd_workqueue);

3247

flush_workqueue(kblockd_workqueue);

3244

}

3248

}

3245

EXPORT_SYMBOL(kblockd_flush);

3249

EXPORT_SYMBOL(kblockd_flush);

3246

3250

3247

int __init blk_dev_init(void)

3251

int __init blk_dev_init(void)

3248

{

3252

{

3249

kblockd_workqueue = create_workqueue("kblockd");

3253

kblockd_workqueue = create_workqueue("kblockd");

3250

if (!kblockd_workqueue)

3254

if (!kblockd_workqueue)

3251

panic("Failed to create kblockd\n");

3255

panic("Failed to create kblockd\n");

3252

3256

3253

request_cachep = kmem_cache_create("blkdev_requests",

3257

request_cachep = kmem_cache_create("blkdev_requests",

3254

sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);

3258

sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);

3255

3259

3256

requestq_cachep = kmem_cache_create("blkdev_queue",

3260

requestq_cachep = kmem_cache_create("blkdev_queue",

3257

sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);

3261

sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);

3258

3262

3259

iocontext_cachep = kmem_cache_create("blkdev_ioc",

3263

iocontext_cachep = kmem_cache_create("blkdev_ioc",

3260

sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);

3264

sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);

3261

3265

3262

blk_max_low_pfn = max_low_pfn;

3266

blk_max_low_pfn = max_low_pfn;

3263

blk_max_pfn = max_pfn;

3267

blk_max_pfn = max_pfn;

3264

3268

3265

return 0;

3269

return 0;

3266

}

3270

}

3267

3271

3268

/*

3272

/*

3269

* IO Context helper functions

3273

* IO Context helper functions

3270

*/

3274

*/

3271

void put_io_context(struct io_context *ioc)

3275

void put_io_context(struct io_context *ioc)

3272

{

3276

{

3273

if (ioc == NULL)

3277

if (ioc == NULL)

3274

return;

3278

return;

3275

3279

3276

BUG_ON(atomic_read(&ioc->refcount) == 0);

3280

BUG_ON(atomic_read(&ioc->refcount) == 0);

3277

3281

3278

if (atomic_dec_and_test(&ioc->refcount)) {

3282

if (atomic_dec_and_test(&ioc->refcount)) {

3279

if (ioc->aic && ioc->aic->dtor)

3283

if (ioc->aic && ioc->aic->dtor)

3280

ioc->aic->dtor(ioc->aic);

3284

ioc->aic->dtor(ioc->aic);

3281

if (ioc->cic && ioc->cic->dtor)

3285

if (ioc->cic && ioc->cic->dtor)

3282

ioc->cic->dtor(ioc->cic);

3286

ioc->cic->dtor(ioc->cic);

3283

3287

3284

kmem_cache_free(iocontext_cachep, ioc);

3288

kmem_cache_free(iocontext_cachep, ioc);

3285

}

3289

}

3286

}

3290

}

3287

EXPORT_SYMBOL(put_io_context);

3291

EXPORT_SYMBOL(put_io_context);

3288

3292

3289

/* Called by the exitting task */

3293

/* Called by the exitting task */

3290

void exit_io_context(void)

3294

void exit_io_context(void)

3291

{

3295

{

3292

unsigned long flags;

3296

unsigned long flags;

3293

struct io_context *ioc;

3297

struct io_context *ioc;

3294

3298

3295

local_irq_save(flags);

3299

local_irq_save(flags);

3296

task_lock(current);

3300

task_lock(current);

3297

ioc = current->io_context;

3301

ioc = current->io_context;

3298

current->io_context = NULL;

3302

current->io_context = NULL;

3299

ioc->task = NULL;

3303

ioc->task = NULL;

3300

task_unlock(current);

3304

task_unlock(current);

3301

local_irq_restore(flags);

3305

local_irq_restore(flags);

3302

3306

3303

if (ioc->aic && ioc->aic->exit)

3307

if (ioc->aic && ioc->aic->exit)

3304

ioc->aic->exit(ioc->aic);

3308

ioc->aic->exit(ioc->aic);

3305

if (ioc->cic && ioc->cic->exit)

3309

if (ioc->cic && ioc->cic->exit)

3306

ioc->cic->exit(ioc->cic);

3310

ioc->cic->exit(ioc->cic);

3307

3311

3308

put_io_context(ioc);

3312

put_io_context(ioc);

3309

}

3313

}

3310

3314

3311

/*

3315

/*

3312

* If the current task has no IO context then create one and initialise it.

3316

* If the current task has no IO context then create one and initialise it.

3313

* Otherwise, return its existing IO context.

3317

* Otherwise, return its existing IO context.

3314

*

3318

*

3315

* This returned IO context doesn't have a specifically elevated refcount,

3319

* This returned IO context doesn't have a specifically elevated refcount,

3316

* but since the current task itself holds a reference, the context can be

3320

* but since the current task itself holds a reference, the context can be

3317

* used in general code, so long as it stays within `current` context.

3321

* used in general code, so long as it stays within `current` context.

3318

*/

3322

*/

3319

struct io_context *current_io_context(gfp_t gfp_flags)

3323

struct io_context *current_io_context(gfp_t gfp_flags)

3320

{

3324

{

3321

struct task_struct *tsk = current;

3325

struct task_struct *tsk = current;

3322

struct io_context *ret;

3326

struct io_context *ret;

3323

3327

3324

ret = tsk->io_context;

3328

ret = tsk->io_context;

3325

if (likely(ret))

3329

if (likely(ret))

3326

return ret;

3330

return ret;

3327

3331

3328

ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);

3332

ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);

3329

if (ret) {

3333

if (ret) {

3330

atomic_set(&ret->refcount, 1);

3334

atomic_set(&ret->refcount, 1);

3331

ret->task = current;

3335

ret->task = current;

3332

ret->set_ioprio = NULL;

3336

ret->set_ioprio = NULL;

3333

ret->last_waited = jiffies; /* doesn't matter... */

3337

ret->last_waited = jiffies; /* doesn't matter... */

3334

ret->nr_batch_requests = 0; /* because this is 0 */

3338

ret->nr_batch_requests = 0; /* because this is 0 */

3335

ret->aic = NULL;

3339

ret->aic = NULL;

3336

ret->cic = NULL;

3340

ret->cic = NULL;

3337

tsk->io_context = ret;

3341

tsk->io_context = ret;

3338

}

3342

}

3339

3343

3340

return ret;

3344

return ret;

3341

}

3345

}

3342

EXPORT_SYMBOL(current_io_context);

3346

EXPORT_SYMBOL(current_io_context);

3343

3347

3344

/*

3348

/*

3345

* If the current task has no IO context then create one and initialise it.

3349

* If the current task has no IO context then create one and initialise it.

3346

* If it does have a context, take a ref on it.

3350

* If it does have a context, take a ref on it.

3347

*

3351

*

3348

* This is always called in the context of the task which submitted the I/O.

3352

* This is always called in the context of the task which submitted the I/O.

3349

*/

3353

*/

3350

struct io_context *get_io_context(gfp_t gfp_flags)

3354

struct io_context *get_io_context(gfp_t gfp_flags)

3351

{

3355

{

3352

struct io_context *ret;

3356

struct io_context *ret;

3353

ret = current_io_context(gfp_flags);

3357

ret = current_io_context(gfp_flags);

3354

if (likely(ret))

3358

if (likely(ret))

3355

atomic_inc(&ret->refcount);

3359

atomic_inc(&ret->refcount);

3356

return ret;

3360

return ret;

3357

}

3361

}

3358

EXPORT_SYMBOL(get_io_context);

3362

EXPORT_SYMBOL(get_io_context);

3359

3363

3360

void copy_io_context(struct io_context **pdst, struct io_context **psrc)

3364

void copy_io_context(struct io_context **pdst, struct io_context **psrc)

3361

{

3365

{

3362

struct io_context *src = *psrc;

3366

struct io_context *src = *psrc;

3363

struct io_context *dst = *pdst;

3367

struct io_context *dst = *pdst;

3364

3368

3365

if (src) {

3369

if (src) {

3366

BUG_ON(atomic_read(&src->refcount) == 0);

3370

BUG_ON(atomic_read(&src->refcount) == 0);

3367

atomic_inc(&src->refcount);

3371

atomic_inc(&src->refcount);

3368

put_io_context(dst);

3372

put_io_context(dst);

3369

*pdst = src;

3373

*pdst = src;

3370

}

3374

}

3371

}

3375

}

3372

EXPORT_SYMBOL(copy_io_context);

3376

EXPORT_SYMBOL(copy_io_context);

3373

3377

3374

void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)

3378

void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)

3375

{

3379

{

3376

struct io_context *temp;

3380

struct io_context *temp;

3377

temp = *ioc1;

3381

temp = *ioc1;

3378

*ioc1 = *ioc2;

3382

*ioc1 = *ioc2;

3379

*ioc2 = temp;

3383

*ioc2 = temp;

3380

}

3384

}

3381

EXPORT_SYMBOL(swap_io_context);

3385

EXPORT_SYMBOL(swap_io_context);

3382

3386

3383

/*

3387

/*

3384

* sysfs parts below

3388

* sysfs parts below

3385

*/

3389

*/

3386

struct queue_sysfs_entry {

3390

struct queue_sysfs_entry {

3387

struct attribute attr;

3391

struct attribute attr;

3388

ssize_t (*show)(struct request_queue *, char *);

3392

ssize_t (*show)(struct request_queue *, char *);

3389

ssize_t (*store)(struct request_queue *, const char *, size_t);

3393

ssize_t (*store)(struct request_queue *, const char *, size_t);

3390

};

3394

};

3391

3395

3392

static ssize_t

3396

static ssize_t

3393

queue_var_show(unsigned int var, char *page)

3397

queue_var_show(unsigned int var, char *page)

3394

{

3398

{

3395

return sprintf(page, "%d\n", var);

3399

return sprintf(page, "%d\n", var);

3396

}

3400

}

3397

3401

3398

static ssize_t

3402

static ssize_t

3399

queue_var_store(unsigned long *var, const char *page, size_t count)

3403

queue_var_store(unsigned long *var, const char *page, size_t count)

3400

{

3404

{

3401

char *p = (char *) page;

3405

char *p = (char *) page;

3402

3406

3403

*var = simple_strtoul(p, &p, 10);

3407

*var = simple_strtoul(p, &p, 10);

3404

return count;

3408

return count;

3405

}

3409

}

3406

3410

3407

static ssize_t queue_requests_show(struct request_queue *q, char *page)

3411

static ssize_t queue_requests_show(struct request_queue *q, char *page)

3408

{

3412

{

3409

return queue_var_show(q->nr_requests, (page));

3413

return queue_var_show(q->nr_requests, (page));

3410

}

3414

}

3411

3415

3412

static ssize_t

3416

static ssize_t

3413

queue_requests_store(struct request_queue *q, const char *page, size_t count)

3417

queue_requests_store(struct request_queue *q, const char *page, size_t count)

3414

{

3418

{

3415

struct request_list *rl = &q->rq;

3419

struct request_list *rl = &q->rq;

3416

3420

3417

int ret = queue_var_store(&q->nr_requests, page, count);

3421

int ret = queue_var_store(&q->nr_requests, page, count);

3418

if (q->nr_requests < BLKDEV_MIN_RQ)

3422

if (q->nr_requests < BLKDEV_MIN_RQ)

3419

q->nr_requests = BLKDEV_MIN_RQ;

3423

q->nr_requests = BLKDEV_MIN_RQ;

3420

blk_queue_congestion_threshold(q);

3424

blk_queue_congestion_threshold(q);

3421

3425

3422

if (rl->count[READ] >= queue_congestion_on_threshold(q))

3426

if (rl->count[READ] >= queue_congestion_on_threshold(q))

3423

set_queue_congested(q, READ);

3427

set_queue_congested(q, READ);

3424

else if (rl->count[READ] < queue_congestion_off_threshold(q))

3428

else if (rl->count[READ] < queue_congestion_off_threshold(q))

3425

clear_queue_congested(q, READ);

3429

clear_queue_congested(q, READ);

3426

3430

3427

if (rl->count[WRITE] >= queue_congestion_on_threshold(q))

3431

if (rl->count[WRITE] >= queue_congestion_on_threshold(q))

3428

set_queue_congested(q, WRITE);

3432

set_queue_congested(q, WRITE);

3429

else if (rl->count[WRITE] < queue_congestion_off_threshold(q))

3433

else if (rl->count[WRITE] < queue_congestion_off_threshold(q))

3430

clear_queue_congested(q, WRITE);

3434

clear_queue_congested(q, WRITE);

3431

3435

3432

if (rl->count[READ] >= q->nr_requests) {

3436

if (rl->count[READ] >= q->nr_requests) {

3433

blk_set_queue_full(q, READ);

3437

blk_set_queue_full(q, READ);

3434

} else if (rl->count[READ]+1 <= q->nr_requests) {

3438

} else if (rl->count[READ]+1 <= q->nr_requests) {

3435

blk_clear_queue_full(q, READ);

3439

blk_clear_queue_full(q, READ);

3436

wake_up(&rl->wait[READ]);

3440

wake_up(&rl->wait[READ]);

3437

}

3441

}

3438

3442

3439

if (rl->count[WRITE] >= q->nr_requests) {

3443

if (rl->count[WRITE] >= q->nr_requests) {

3440

blk_set_queue_full(q, WRITE);

3444

blk_set_queue_full(q, WRITE);

3441

} else if (rl->count[WRITE]+1 <= q->nr_requests) {

3445

} else if (rl->count[WRITE]+1 <= q->nr_requests) {

3442

blk_clear_queue_full(q, WRITE);

3446

blk_clear_queue_full(q, WRITE);

3443

wake_up(&rl->wait[WRITE]);

3447

wake_up(&rl->wait[WRITE]);

3444

}

3448

}

3445

return ret;

3449

return ret;

3446

}

3450

}

3447

3451

3448

static ssize_t queue_ra_show(struct request_queue *q, char *page)

3452

static ssize_t queue_ra_show(struct request_queue *q, char *page)

3449

{

3453

{

3450

int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);

3454

int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);

3451

3455

3452

return queue_var_show(ra_kb, (page));

3456

return queue_var_show(ra_kb, (page));

3453

}

3457

}

3454

3458

3455

static ssize_t

3459

static ssize_t

3456

queue_ra_store(struct request_queue *q, const char *page, size_t count)

3460

queue_ra_store(struct request_queue *q, const char *page, size_t count)

3457

{

3461

{

3458

unsigned long ra_kb;

3462

unsigned long ra_kb;

3459

ssize_t ret = queue_var_store(&ra_kb, page, count);

3463

ssize_t ret = queue_var_store(&ra_kb, page, count);

3460

3464

3461

spin_lock_irq(q->queue_lock);

3465

spin_lock_irq(q->queue_lock);

3462

if (ra_kb > (q->max_sectors >> 1))

3466

if (ra_kb > (q->max_sectors >> 1))

3463

ra_kb = (q->max_sectors >> 1);

3467

ra_kb = (q->max_sectors >> 1);

3464

3468

3465

q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);

3469

q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);

3466

spin_unlock_irq(q->queue_lock);

3470

spin_unlock_irq(q->queue_lock);

3467

3471

3468

return ret;

3472

return ret;

3469

}

3473

}

3470

3474

3471

static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)

3475

static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)

3472

{

3476

{

3473

int max_sectors_kb = q->max_sectors >> 1;

3477

int max_sectors_kb = q->max_sectors >> 1;

3474

3478

3475

return queue_var_show(max_sectors_kb, (page));

3479

return queue_var_show(max_sectors_kb, (page));

3476

}

3480

}

3477

3481

3478

static ssize_t

3482

static ssize_t

3479

queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)

3483

queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)

3480

{

3484

{

3481

unsigned long max_sectors_kb,

3485

unsigned long max_sectors_kb,

3482

max_hw_sectors_kb = q->max_hw_sectors >> 1,

3486

max_hw_sectors_kb = q->max_hw_sectors >> 1,

3483

page_kb = 1 << (PAGE_CACHE_SHIFT - 10);

3487

page_kb = 1 << (PAGE_CACHE_SHIFT - 10);

3484

ssize_t ret = queue_var_store(&max_sectors_kb, page, count);

3488

ssize_t ret = queue_var_store(&max_sectors_kb, page, count);

3485

int ra_kb;

3489

int ra_kb;

3486

3490

3487

if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)

3491

if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)

3488

return -EINVAL;

3492

return -EINVAL;

3489

/*

3493

/*

3490

* Take the queue lock to update the readahead and max_sectors

3494

* Take the queue lock to update the readahead and max_sectors

3491

* values synchronously:

3495

* values synchronously:

3492

*/

3496

*/

3493

spin_lock_irq(q->queue_lock);

3497

spin_lock_irq(q->queue_lock);

3494

/*

3498

/*

3495

* Trim readahead window as well, if necessary:

3499

* Trim readahead window as well, if necessary:

3496

*/

3500

*/

3497

ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);

3501

ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);

3498

if (ra_kb > max_sectors_kb)

3502

if (ra_kb > max_sectors_kb)

3499

q->backing_dev_info.ra_pages =

3503

q->backing_dev_info.ra_pages =

3500

max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);

3504

max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);

3501

3505

3502

q->max_sectors = max_sectors_kb << 1;

3506

q->max_sectors = max_sectors_kb << 1;

3503

spin_unlock_irq(q->queue_lock);

3507

spin_unlock_irq(q->queue_lock);

3504

3508

3505

return ret;

3509

return ret;

3506

}

3510

}

3507

3511

3508

static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)

3512

static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)

3509

{

3513

{

3510

int max_hw_sectors_kb = q->max_hw_sectors >> 1;

3514

int max_hw_sectors_kb = q->max_hw_sectors >> 1;

3511

3515

3512

return queue_var_show(max_hw_sectors_kb, (page));

3516

return queue_var_show(max_hw_sectors_kb, (page));

3513

}

3517

}

3514

3518

3515

3519

3516

static struct queue_sysfs_entry queue_requests_entry = {

3520

static struct queue_sysfs_entry queue_requests_entry = {

3517

.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },

3521

.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },

3518

.show = queue_requests_show,

3522

.show = queue_requests_show,

3519

.store = queue_requests_store,

3523

.store = queue_requests_store,

3520

};

3524

};

3521

3525

3522

static struct queue_sysfs_entry queue_ra_entry = {

3526

static struct queue_sysfs_entry queue_ra_entry = {

3523

.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },

3527

.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },

3524

.show = queue_ra_show,

3528

.show = queue_ra_show,

3525

.store = queue_ra_store,

3529

.store = queue_ra_store,

3526

};

3530

};

3527

3531

3528

static struct queue_sysfs_entry queue_max_sectors_entry = {

3532

static struct queue_sysfs_entry queue_max_sectors_entry = {

3529

.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },

3533

.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },

3530

.show = queue_max_sectors_show,

3534

.show = queue_max_sectors_show,

3531

.store = queue_max_sectors_store,

3535

.store = queue_max_sectors_store,

3532

};

3536

};

3533

3537

3534

static struct queue_sysfs_entry queue_max_hw_sectors_entry = {

3538

static struct queue_sysfs_entry queue_max_hw_sectors_entry = {

3535

.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },

3539

.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },

3536

.show = queue_max_hw_sectors_show,

3540

.show = queue_max_hw_sectors_show,

3537

};

3541

};

3538

3542

3539

static struct queue_sysfs_entry queue_iosched_entry = {

3543

static struct queue_sysfs_entry queue_iosched_entry = {

3540

.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },

3544

.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },

3541

.show = elv_iosched_show,

3545

.show = elv_iosched_show,

3542

.store = elv_iosched_store,

3546

.store = elv_iosched_store,

3543

};

3547

};

3544

3548

3545

static struct attribute *default_attrs[] = {

3549

static struct attribute *default_attrs[] = {

3546

&queue_requests_entry.attr,

3550

&queue_requests_entry.attr,

3547

&queue_ra_entry.attr,

3551

&queue_ra_entry.attr,

3548

&queue_max_hw_sectors_entry.attr,

3552

&queue_max_hw_sectors_entry.attr,

3549

&queue_max_sectors_entry.attr,

3553

&queue_max_sectors_entry.attr,

3550

&queue_iosched_entry.attr,

3554

&queue_iosched_entry.attr,

3551

NULL,

3555

NULL,

3552

};

3556

};

3553

3557

3554

#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)

3558

#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)

3555

3559

3556

static ssize_t

3560

static ssize_t

3557

queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)

3561

queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)

3558

{

3562

{

3559

struct queue_sysfs_entry *entry = to_queue(attr);

3563

struct queue_sysfs_entry *entry = to_queue(attr);

3560

struct request_queue *q;

3564

struct request_queue *q;

3561

3565

3562

q = container_of(kobj, struct request_queue, kobj);

3566

q = container_of(kobj, struct request_queue, kobj);

3563

if (!entry->show)

3567

if (!entry->show)

3564

return -EIO;

3568

return -EIO;

3565

3569

3566

return entry->show(q, page);

3570

return entry->show(q, page);

3567

}

3571

}

3568

3572

3569

static ssize_t

3573

static ssize_t

3570

queue_attr_store(struct kobject *kobj, struct attribute *attr,

3574

queue_attr_store(struct kobject *kobj, struct attribute *attr,

3571

const char *page, size_t length)

3575

const char *page, size_t length)

3572

{

3576

{

3573

struct queue_sysfs_entry *entry = to_queue(attr);

3577

struct queue_sysfs_entry *entry = to_queue(attr);

3574

struct request_queue *q;

3578

struct request_queue *q;

3575

3579

3576

q = container_of(kobj, struct request_queue, kobj);

3580

q = container_of(kobj, struct request_queue, kobj);

3577

if (!entry->store)

3581

if (!entry->store)

3578

return -EIO;

3582

return -EIO;

3579

3583

3580

return entry->store(q, page, length);

3584

return entry->store(q, page, length);

3581

}

3585

}

3582

3586

3583

static struct sysfs_ops queue_sysfs_ops = {

3587

static struct sysfs_ops queue_sysfs_ops = {

3584

.show = queue_attr_show,

3588

.show = queue_attr_show,

3585

.store = queue_attr_store,

3589

.store = queue_attr_store,

3586

};

3590

};

3587

3591

3588

static struct kobj_type queue_ktype = {

3592

static struct kobj_type queue_ktype = {

3589

.sysfs_ops = &queue_sysfs_ops,

3593

.sysfs_ops = &queue_sysfs_ops,

3590

.default_attrs = default_attrs,

3594

.default_attrs = default_attrs,

3591

};

3595

};

3592

3596

3593

int blk_register_queue(struct gendisk *disk)

3597

int blk_register_queue(struct gendisk *disk)

3594

{

3598

{

3595

int ret;

3599

int ret;

3596

3600

3597

request_queue_t *q = disk->queue;

3601

request_queue_t *q = disk->queue;

3598

3602

3599

if (!q || !q->request_fn)

3603

if (!q || !q->request_fn)

3600

return -ENXIO;

3604

return -ENXIO;

3601

3605

3602

q->kobj.parent = kobject_get(&disk->kobj);

3606

q->kobj.parent = kobject_get(&disk->kobj);

3603

if (!q->kobj.parent)

3607

if (!q->kobj.parent)

3604

return -EBUSY;

3608

return -EBUSY;

3605

3609

3606

snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");

3610

snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");

3607

q->kobj.ktype = &queue_ktype;

3611

q->kobj.ktype = &queue_ktype;

3608

3612

3609

ret = kobject_register(&q->kobj);

3613

ret = kobject_register(&q->kobj);

3610

if (ret < 0)

3614

if (ret < 0)

3611

return ret;

3615

return ret;

3612

3616

3613

ret = elv_register_queue(q);

3617

ret = elv_register_queue(q);

GITLAB

[BLOCK] ll_rw_blk: fastpath get_request()