Eric Lee / smarc-fsl-linux-kernel

1

/*

1

/*

2

* Functions related to setting various queue properties from drivers

2

* Functions related to setting various queue properties from drivers

3

*/

3

*/

4

#include <linux/kernel.h>

4

#include <linux/kernel.h>

5

#include <linux/module.h>

5

#include <linux/module.h>

6

#include <linux/init.h>

6

#include <linux/init.h>

7

#include <linux/bio.h>

7

#include <linux/bio.h>

8

#include <linux/blkdev.h>

8

#include <linux/blkdev.h>

9

#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */

9

#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */

10

#include <linux/gcd.h>

10

#include <linux/gcd.h>

11

12

#include "blk.h"

12

#include "blk.h"

13

14

unsigned long blk_max_low_pfn;

14

unsigned long blk_max_low_pfn;

15

EXPORT_SYMBOL(blk_max_low_pfn);

15

EXPORT_SYMBOL(blk_max_low_pfn);

16

17

unsigned long blk_max_pfn;

17

unsigned long blk_max_pfn;

18

19

/**

19

/**

20

* blk_queue_prep_rq - set a prepare_request function for queue

20

* blk_queue_prep_rq - set a prepare_request function for queue

21

* @q: queue

21

* @q: queue

22

* @pfn: prepare_request function

22

* @pfn: prepare_request function

23

*

23

*

24

* It's possible for a queue to register a prepare_request callback which

24

* It's possible for a queue to register a prepare_request callback which

25

* is invoked before the request is handed to the request_fn. The goal of

25

* is invoked before the request is handed to the request_fn. The goal of

26

* the function is to prepare a request for I/O, it can be used to build a

26

* the function is to prepare a request for I/O, it can be used to build a

27

* cdb from the request data for instance.

27

* cdb from the request data for instance.

28

*

28

*

29

*/

29

*/

30

void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)

30

void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)

31

{

31

{

32

q->prep_rq_fn = pfn;

32

q->prep_rq_fn = pfn;

33

}

33

}

34

EXPORT_SYMBOL(blk_queue_prep_rq);

34

EXPORT_SYMBOL(blk_queue_prep_rq);

35

36

/**

36

/**

37

* blk_queue_set_discard - set a discard_sectors function for queue

37

* blk_queue_set_discard - set a discard_sectors function for queue

38

* @q: queue

38

* @q: queue

39

* @dfn: prepare_discard function

39

* @dfn: prepare_discard function

40

*

40

*

41

* It's possible for a queue to register a discard callback which is used

41

* It's possible for a queue to register a discard callback which is used

42

* to transform a discard request into the appropriate type for the

42

* to transform a discard request into the appropriate type for the

43

* hardware. If none is registered, then discard requests are failed

43

* hardware. If none is registered, then discard requests are failed

44

* with %EOPNOTSUPP.

44

* with %EOPNOTSUPP.

45

*

45

*

46

*/

46

*/

47

void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)

47

void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)

48

{

48

{

49

q->prepare_discard_fn = dfn;

49

q->prepare_discard_fn = dfn;

50

}

50

}

51

EXPORT_SYMBOL(blk_queue_set_discard);

51

EXPORT_SYMBOL(blk_queue_set_discard);

52

53

/**

53

/**

54

* blk_queue_merge_bvec - set a merge_bvec function for queue

54

* blk_queue_merge_bvec - set a merge_bvec function for queue

55

* @q: queue

55

* @q: queue

56

* @mbfn: merge_bvec_fn

56

* @mbfn: merge_bvec_fn

57

*

57

*

58

* Usually queues have static limitations on the max sectors or segments that

58

* Usually queues have static limitations on the max sectors or segments that

59

* we can put in a request. Stacking drivers may have some settings that

59

* we can put in a request. Stacking drivers may have some settings that

60

* are dynamic, and thus we have to query the queue whether it is ok to

60

* are dynamic, and thus we have to query the queue whether it is ok to

61

* add a new bio_vec to a bio at a given offset or not. If the block device

61

* add a new bio_vec to a bio at a given offset or not. If the block device

62

* has such limitations, it needs to register a merge_bvec_fn to control

62

* has such limitations, it needs to register a merge_bvec_fn to control

63

* the size of bio's sent to it. Note that a block device *must* allow a

63

* the size of bio's sent to it. Note that a block device *must* allow a

64

* single page to be added to an empty bio. The block device driver may want

64

* single page to be added to an empty bio. The block device driver may want

65

* to use the bio_split() function to deal with these bio's. By default

65

* to use the bio_split() function to deal with these bio's. By default

66

* no merge_bvec_fn is defined for a queue, and only the fixed limits are

66

* no merge_bvec_fn is defined for a queue, and only the fixed limits are

67

* honored.

67

* honored.

68

*/

68

*/

69

void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)

69

void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)

70

{

70

{

71

q->merge_bvec_fn = mbfn;

71

q->merge_bvec_fn = mbfn;

72

}

72

}

73

EXPORT_SYMBOL(blk_queue_merge_bvec);

73

EXPORT_SYMBOL(blk_queue_merge_bvec);

74

75

void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)

75

void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)

76

{

76

{

77

q->softirq_done_fn = fn;

77

q->softirq_done_fn = fn;

78

}

78

}

79

EXPORT_SYMBOL(blk_queue_softirq_done);

79

EXPORT_SYMBOL(blk_queue_softirq_done);

80

81

void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)

81

void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)

82

{

82

{

83

q->rq_timeout = timeout;

83

q->rq_timeout = timeout;

84

}

84

}

85

EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);

85

EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);

86

87

void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)

87

void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)

88

{

88

{

89

q->rq_timed_out_fn = fn;

89

q->rq_timed_out_fn = fn;

90

}

90

}

91

EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);

91

EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);

92

93

void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)

93

void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)

94

{

94

{

95

q->lld_busy_fn = fn;

95

q->lld_busy_fn = fn;

96

}

96

}

97

EXPORT_SYMBOL_GPL(blk_queue_lld_busy);

97

EXPORT_SYMBOL_GPL(blk_queue_lld_busy);

98

99

/**

99

/**

100

* blk_set_default_limits - reset limits to default values

100

* blk_set_default_limits - reset limits to default values

101

* @lim: the queue_limits structure to reset

101

* @lim: the queue_limits structure to reset

102

*

102

*

103

* Description:

103

* Description:

104

* Returns a queue_limit struct to its default state. Can be used by

104

* Returns a queue_limit struct to its default state. Can be used by

105

* stacking drivers like DM that stage table swaps and reuse an

105

* stacking drivers like DM that stage table swaps and reuse an

106

* existing device queue.

106

* existing device queue.

107

*/

107

*/

108

void blk_set_default_limits(struct queue_limits *lim)

108

void blk_set_default_limits(struct queue_limits *lim)

109

{

109

{

110

lim->max_phys_segments = MAX_PHYS_SEGMENTS;

110

lim->max_phys_segments = MAX_PHYS_SEGMENTS;

111

lim->max_hw_segments = MAX_HW_SEGMENTS;

111

lim->max_hw_segments = MAX_HW_SEGMENTS;

112

lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;

112

lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;

113

lim->max_segment_size = MAX_SEGMENT_SIZE;

113

lim->max_segment_size = MAX_SEGMENT_SIZE;

114

lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;

114

lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;

115

lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;

115

lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;

116

lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);

116

lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);

117

lim->alignment_offset = 0;

117

lim->alignment_offset = 0;

118

lim->io_opt = 0;

118

lim->io_opt = 0;

119

lim->misaligned = 0;

119

lim->misaligned = 0;

120

lim->no_cluster = 0;

120

lim->no_cluster = 0;

121

}

121

}

122

EXPORT_SYMBOL(blk_set_default_limits);

122

EXPORT_SYMBOL(blk_set_default_limits);

123

124

/**

124

/**

125

* blk_queue_make_request - define an alternate make_request function for a device

125

* blk_queue_make_request - define an alternate make_request function for a device

126

* @q: the request queue for the device to be affected

126

* @q: the request queue for the device to be affected

127

* @mfn: the alternate make_request function

127

* @mfn: the alternate make_request function

128

*

128

*

129

* Description:

129

* Description:

130

* The normal way for &struct bios to be passed to a device

130

* The normal way for &struct bios to be passed to a device

131

* driver is for them to be collected into requests on a request

131

* driver is for them to be collected into requests on a request

132

* queue, and then to allow the device driver to select requests

132

* queue, and then to allow the device driver to select requests

133

* off that queue when it is ready. This works well for many block

133

* off that queue when it is ready. This works well for many block

134

* devices. However some block devices (typically virtual devices

134

* devices. However some block devices (typically virtual devices

135

* such as md or lvm) do not benefit from the processing on the

135

* such as md or lvm) do not benefit from the processing on the

136

* request queue, and are served best by having the requests passed

136

* request queue, and are served best by having the requests passed

137

* directly to them. This can be achieved by providing a function

137

* directly to them. This can be achieved by providing a function

138

* to blk_queue_make_request().

138

* to blk_queue_make_request().

139

*

139

*

140

* Caveat:

140

* Caveat:

141

* The driver that does this *must* be able to deal appropriately

141

* The driver that does this *must* be able to deal appropriately

142

* with buffers in "highmemory". This can be accomplished by either calling

142

* with buffers in "highmemory". This can be accomplished by either calling

143

* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling

143

* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling

144

* blk_queue_bounce() to create a buffer in normal memory.

144

* blk_queue_bounce() to create a buffer in normal memory.

145

**/

145

**/

146

void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)

146

void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)

147

{

147

{

148

/*

148

/*

149

* set defaults

149

* set defaults

150

*/

150

*/

151

q->nr_requests = BLKDEV_MAX_RQ;

151

q->nr_requests = BLKDEV_MAX_RQ;

152

153

q->make_request_fn = mfn;

153

q->make_request_fn = mfn;

154

blk_queue_dma_alignment(q, 511);

154

blk_queue_dma_alignment(q, 511);

155

blk_queue_congestion_threshold(q);

155

blk_queue_congestion_threshold(q);

156

q->nr_batching = BLK_BATCH_REQ;

156

q->nr_batching = BLK_BATCH_REQ;

157

158

q->unplug_thresh = 4; /* hmm */

158

q->unplug_thresh = 4; /* hmm */

159

q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */

159

q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */

160

if (q->unplug_delay == 0)

160

if (q->unplug_delay == 0)

161

q->unplug_delay = 1;

161

q->unplug_delay = 1;

162

163

q->unplug_timer.function = blk_unplug_timeout;

163

q->unplug_timer.function = blk_unplug_timeout;

164

q->unplug_timer.data = (unsigned long)q;

164

q->unplug_timer.data = (unsigned long)q;

165

166

blk_set_default_limits(&q->limits);

166

blk_set_default_limits(&q->limits);

167

168

/*

168

/*

169

* If the caller didn't supply a lock, fall back to our embedded

169

* If the caller didn't supply a lock, fall back to our embedded

170

* per-queue locks

170

* per-queue locks

171

*/

171

*/

172

if (!q->queue_lock)

172

if (!q->queue_lock)

173

q->queue_lock = &q->__queue_lock;

173

q->queue_lock = &q->__queue_lock;

174

175

/*

175

/*

176

* by default assume old behaviour and bounce for any highmem page

176

* by default assume old behaviour and bounce for any highmem page

177

*/

177

*/

178

blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

178

blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

179

}

179

}

180

EXPORT_SYMBOL(blk_queue_make_request);

180

EXPORT_SYMBOL(blk_queue_make_request);

181

182

/**

182

/**

183

* blk_queue_bounce_limit - set bounce buffer limit for queue

183

* blk_queue_bounce_limit - set bounce buffer limit for queue

184

* @q: the request queue for the device

184

* @q: the request queue for the device

185

* @dma_mask: the maximum address the device can handle

185

* @dma_mask: the maximum address the device can handle

186

*

186

*

187

* Description:

187

* Description:

188

* Different hardware can have different requirements as to what pages

188

* Different hardware can have different requirements as to what pages

189

* it can do I/O directly to. A low level driver can call

189

* it can do I/O directly to. A low level driver can call

190

* blk_queue_bounce_limit to have lower memory pages allocated as bounce

190

* blk_queue_bounce_limit to have lower memory pages allocated as bounce

191

* buffers for doing I/O to pages residing above @dma_mask.

191

* buffers for doing I/O to pages residing above @dma_mask.

192

**/

192

**/

193

void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)

193

void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)

194

{

194

{

195

unsigned long b_pfn = dma_mask >> PAGE_SHIFT;

195

unsigned long b_pfn = dma_mask >> PAGE_SHIFT;

196

int dma = 0;

196

int dma = 0;

197

198

q->bounce_gfp = GFP_NOIO;

198

q->bounce_gfp = GFP_NOIO;

199

#if BITS_PER_LONG == 64

199

#if BITS_PER_LONG == 64

200

/*

200

/*

201

* Assume anything <= 4GB can be handled by IOMMU. Actually

201

* Assume anything <= 4GB can be handled by IOMMU. Actually

202

* some IOMMUs can handle everything, but I don't know of a

202

* some IOMMUs can handle everything, but I don't know of a

203

* way to test this here.

203

* way to test this here.

204

*/

204

*/

205

if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))

205

if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))

206

dma = 1;

206

dma = 1;

207

q->limits.bounce_pfn = max_low_pfn;

207

q->limits.bounce_pfn = max_low_pfn;

208

#else

208

#else

209

if (b_pfn < blk_max_low_pfn)

209

if (b_pfn < blk_max_low_pfn)

210

dma = 1;

210

dma = 1;

211

q->limits.bounce_pfn = b_pfn;

211

q->limits.bounce_pfn = b_pfn;

212

#endif

212

#endif

213

if (dma) {

213

if (dma) {

214

init_emergency_isa_pool();

214

init_emergency_isa_pool();

215

q->bounce_gfp = GFP_NOIO | GFP_DMA;

215

q->bounce_gfp = GFP_NOIO | GFP_DMA;

216

q->limits.bounce_pfn = b_pfn;

216

q->limits.bounce_pfn = b_pfn;

217

}

217

}

218

}

218

}

219

EXPORT_SYMBOL(blk_queue_bounce_limit);

219

EXPORT_SYMBOL(blk_queue_bounce_limit);

220

221

/**

221

/**

222

* blk_queue_max_sectors - set max sectors for a request for this queue

222

* blk_queue_max_sectors - set max sectors for a request for this queue

223

* @q: the request queue for the device

223

* @q: the request queue for the device

224

* @max_sectors: max sectors in the usual 512b unit

224

* @max_sectors: max sectors in the usual 512b unit

225

*

225

*

226

* Description:

226

* Description:

227

* Enables a low level driver to set an upper limit on the size of

227

* Enables a low level driver to set an upper limit on the size of

228

* received requests.

228

* received requests.

229

**/

229

**/

230

void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)

230

void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)

231

{

231

{

232

if ((max_sectors << 9) < PAGE_CACHE_SIZE) {

232

if ((max_sectors << 9) < PAGE_CACHE_SIZE) {

233

max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);

233

max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);

234

printk(KERN_INFO "%s: set to minimum %d\n",

234

printk(KERN_INFO "%s: set to minimum %d\n",

235

__func__, max_sectors);

235

__func__, max_sectors);

236

}

236

}

237

238

if (BLK_DEF_MAX_SECTORS > max_sectors)

238

if (BLK_DEF_MAX_SECTORS > max_sectors)

239

q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;

239

q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;

240

else {

240

else {

241

q->limits.max_sectors = BLK_DEF_MAX_SECTORS;

241

q->limits.max_sectors = BLK_DEF_MAX_SECTORS;

242

q->limits.max_hw_sectors = max_sectors;

242

q->limits.max_hw_sectors = max_sectors;

243

}

243

}

244

}

244

}

245

EXPORT_SYMBOL(blk_queue_max_sectors);

245

EXPORT_SYMBOL(blk_queue_max_sectors);

246

247

void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)

247

void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)

248

{

248

{

249

if (BLK_DEF_MAX_SECTORS > max_sectors)

249

if (BLK_DEF_MAX_SECTORS > max_sectors)

250

q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;

250

q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;

251

else

251

else

252

q->limits.max_hw_sectors = max_sectors;

252

q->limits.max_hw_sectors = max_sectors;

253

}

253

}

254

EXPORT_SYMBOL(blk_queue_max_hw_sectors);

254

EXPORT_SYMBOL(blk_queue_max_hw_sectors);

255

256

/**

256

/**

257

* blk_queue_max_phys_segments - set max phys segments for a request for this queue

257

* blk_queue_max_phys_segments - set max phys segments for a request for this queue

258

* @q: the request queue for the device

258

* @q: the request queue for the device

259

* @max_segments: max number of segments

259

* @max_segments: max number of segments

260

*

260

*

261

* Description:

261

* Description:

262

* Enables a low level driver to set an upper limit on the number of

262

* Enables a low level driver to set an upper limit on the number of

263

* physical data segments in a request. This would be the largest sized

263

* physical data segments in a request. This would be the largest sized

264

* scatter list the driver could handle.

264

* scatter list the driver could handle.

265

**/

265

**/

266

void blk_queue_max_phys_segments(struct request_queue *q,

266

void blk_queue_max_phys_segments(struct request_queue *q,

267

unsigned short max_segments)

267

unsigned short max_segments)

268

{

268

{

269

if (!max_segments) {

269

if (!max_segments) {

270

max_segments = 1;

270

max_segments = 1;

271

printk(KERN_INFO "%s: set to minimum %d\n",

271

printk(KERN_INFO "%s: set to minimum %d\n",

272

__func__, max_segments);

272

__func__, max_segments);

273

}

273

}

274

275

q->limits.max_phys_segments = max_segments;

275

q->limits.max_phys_segments = max_segments;

276

}

276

}

277

EXPORT_SYMBOL(blk_queue_max_phys_segments);

277

EXPORT_SYMBOL(blk_queue_max_phys_segments);

278

279

/**

279

/**

280

* blk_queue_max_hw_segments - set max hw segments for a request for this queue

280

* blk_queue_max_hw_segments - set max hw segments for a request for this queue

281

* @q: the request queue for the device

281

* @q: the request queue for the device

282

* @max_segments: max number of segments

282

* @max_segments: max number of segments

283

*

283

*

284

* Description:

284

* Description:

285

* Enables a low level driver to set an upper limit on the number of

285

* Enables a low level driver to set an upper limit on the number of

286

* hw data segments in a request. This would be the largest number of

286

* hw data segments in a request. This would be the largest number of

287

* address/length pairs the host adapter can actually give at once

287

* address/length pairs the host adapter can actually give at once

288

* to the device.

288

* to the device.

289

**/

289

**/

290

void blk_queue_max_hw_segments(struct request_queue *q,

290

void blk_queue_max_hw_segments(struct request_queue *q,

291

unsigned short max_segments)

291

unsigned short max_segments)

292

{

292

{

293

if (!max_segments) {

293

if (!max_segments) {

294

max_segments = 1;

294

max_segments = 1;

295

printk(KERN_INFO "%s: set to minimum %d\n",

295

printk(KERN_INFO "%s: set to minimum %d\n",

296

__func__, max_segments);

296

__func__, max_segments);

297

}

297

}

298

299

q->limits.max_hw_segments = max_segments;

299

q->limits.max_hw_segments = max_segments;

300

}

300

}

301

EXPORT_SYMBOL(blk_queue_max_hw_segments);

301

EXPORT_SYMBOL(blk_queue_max_hw_segments);

302

303

/**

303

/**

304

* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg

304

* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg

305

* @q: the request queue for the device

305

* @q: the request queue for the device

306

* @max_size: max size of segment in bytes

306

* @max_size: max size of segment in bytes

307

*

307

*

308

* Description:

308

* Description:

309

* Enables a low level driver to set an upper limit on the size of a

309

* Enables a low level driver to set an upper limit on the size of a

310

* coalesced segment

310

* coalesced segment

311

**/

311

**/

312

void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)

312

void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)

313

{

313

{

314

if (max_size < PAGE_CACHE_SIZE) {

314

if (max_size < PAGE_CACHE_SIZE) {

315

max_size = PAGE_CACHE_SIZE;

315

max_size = PAGE_CACHE_SIZE;

316

printk(KERN_INFO "%s: set to minimum %d\n",

316

printk(KERN_INFO "%s: set to minimum %d\n",

317

__func__, max_size);

317

__func__, max_size);

318

}

318

}

319

320

q->limits.max_segment_size = max_size;

320

q->limits.max_segment_size = max_size;

321

}

321

}

322

EXPORT_SYMBOL(blk_queue_max_segment_size);

322

EXPORT_SYMBOL(blk_queue_max_segment_size);

323

324

/**

324

/**

325

* blk_queue_logical_block_size - set logical block size for the queue

325

* blk_queue_logical_block_size - set logical block size for the queue

326

* @q: the request queue for the device

326

* @q: the request queue for the device

327

* @size: the logical block size, in bytes

327

* @size: the logical block size, in bytes

328

*

328

*

329

* Description:

329

* Description:

330

* This should be set to the lowest possible block size that the

330

* This should be set to the lowest possible block size that the

331

* storage device can address. The default of 512 covers most

331

* storage device can address. The default of 512 covers most

332

* hardware.

332

* hardware.

333

**/

333

**/

334

void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)

334

void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)

335

{

335

{

336

q->limits.logical_block_size = size;

336

q->limits.logical_block_size = size;

337

338

if (q->limits.physical_block_size < size)

338

if (q->limits.physical_block_size < size)

339

q->limits.physical_block_size = size;

339

q->limits.physical_block_size = size;

340

341

if (q->limits.io_min < q->limits.physical_block_size)

341

if (q->limits.io_min < q->limits.physical_block_size)

342

q->limits.io_min = q->limits.physical_block_size;

342

q->limits.io_min = q->limits.physical_block_size;

343

}

343

}

344

EXPORT_SYMBOL(blk_queue_logical_block_size);

344

EXPORT_SYMBOL(blk_queue_logical_block_size);

345

346

/**

346

/**

347

* blk_queue_physical_block_size - set physical block size for the queue

347

* blk_queue_physical_block_size - set physical block size for the queue

348

* @q: the request queue for the device

348

* @q: the request queue for the device

349

* @size: the physical block size, in bytes

349

* @size: the physical block size, in bytes

350

*

350

*

351

* Description:

351

* Description:

352

* This should be set to the lowest possible sector size that the

352

* This should be set to the lowest possible sector size that the

353

* hardware can operate on without reverting to read-modify-write

353

* hardware can operate on without reverting to read-modify-write

354

* operations.

354

* operations.

355

*/

355

*/

356

void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)

356

void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)

357

{

357

{

358

q->limits.physical_block_size = size;

358

q->limits.physical_block_size = size;

359

360

if (q->limits.physical_block_size < q->limits.logical_block_size)

360

if (q->limits.physical_block_size < q->limits.logical_block_size)

361

q->limits.physical_block_size = q->limits.logical_block_size;

361

q->limits.physical_block_size = q->limits.logical_block_size;

362

363

if (q->limits.io_min < q->limits.physical_block_size)

363

if (q->limits.io_min < q->limits.physical_block_size)

364

q->limits.io_min = q->limits.physical_block_size;

364

q->limits.io_min = q->limits.physical_block_size;

365

}

365

}

366

EXPORT_SYMBOL(blk_queue_physical_block_size);

366

EXPORT_SYMBOL(blk_queue_physical_block_size);

367

368

/**

368

/**

369

* blk_queue_alignment_offset - set physical block alignment offset

369

* blk_queue_alignment_offset - set physical block alignment offset

370

* @q: the request queue for the device

370

* @q: the request queue for the device

371

* @offset: alignment offset in bytes

371

* @offset: alignment offset in bytes

372

*

372

*

373

* Description:

373

* Description:

374

* Some devices are naturally misaligned to compensate for things like

374

* Some devices are naturally misaligned to compensate for things like

375

* the legacy DOS partition table 63-sector offset. Low-level drivers

375

* the legacy DOS partition table 63-sector offset. Low-level drivers

376

* should call this function for devices whose first sector is not

376

* should call this function for devices whose first sector is not

377

* naturally aligned.

377

* naturally aligned.

378

*/

378

*/

379

void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)

379

void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)

380

{

380

{

381

q->limits.alignment_offset =

381

q->limits.alignment_offset =

382

offset & (q->limits.physical_block_size - 1);

382

offset & (q->limits.physical_block_size - 1);

383

q->limits.misaligned = 0;

383

q->limits.misaligned = 0;

384

}

384

}

385

EXPORT_SYMBOL(blk_queue_alignment_offset);

385

EXPORT_SYMBOL(blk_queue_alignment_offset);

386

387

/**

387

/**

388

* blk_limits_io_min - set minimum request size for a device

388

* blk_limits_io_min - set minimum request size for a device

389

* @limits: the queue limits

389

* @limits: the queue limits

390

* @min: smallest I/O size in bytes

390

* @min: smallest I/O size in bytes

391

*

391

*

392

* Description:

392

* Description:

393

* Some devices have an internal block size bigger than the reported

393

* Some devices have an internal block size bigger than the reported

394

* hardware sector size. This function can be used to signal the

394

* hardware sector size. This function can be used to signal the

395

* smallest I/O the device can perform without incurring a performance

395

* smallest I/O the device can perform without incurring a performance

396

* penalty.

396

* penalty.

397

*/

397

*/

398

void blk_limits_io_min(struct queue_limits *limits, unsigned int min)

398

void blk_limits_io_min(struct queue_limits *limits, unsigned int min)

399

{

399

{

400

limits->io_min = min;

400

limits->io_min = min;

401

402

if (limits->io_min < limits->logical_block_size)

402

if (limits->io_min < limits->logical_block_size)

403

limits->io_min = limits->logical_block_size;

403

limits->io_min = limits->logical_block_size;

404

405

if (limits->io_min < limits->physical_block_size)

405

if (limits->io_min < limits->physical_block_size)

406

limits->io_min = limits->physical_block_size;

406

limits->io_min = limits->physical_block_size;

407

}

407

}

408

EXPORT_SYMBOL(blk_limits_io_min);

408

EXPORT_SYMBOL(blk_limits_io_min);

409

410

/**

410

/**

411

* blk_queue_io_min - set minimum request size for the queue

411

* blk_queue_io_min - set minimum request size for the queue

412

* @q: the request queue for the device

412

* @q: the request queue for the device

413

* @min: smallest I/O size in bytes

413

* @min: smallest I/O size in bytes

414

*

414

*

415

* Description:

415

* Description:

416

* Some devices have an internal block size bigger than the reported

416

* Storage devices may report a granularity or preferred minimum I/O

417

* hardware sector size. This function can be used to signal the

417

* size which is the smallest request the device can perform without

418

* smallest I/O the device can perform without incurring a performance

418

* incurring a performance penalty. For disk drives this is often the

419

* penalty.

419

* physical block size. For RAID arrays it is often the stripe chunk

420

* size. A properly aligned multiple of minimum_io_size is the

421

* preferred request size for workloads where a high number of I/O

422

* operations is desired.

420

*/

423

*/

421

void blk_queue_io_min(struct request_queue *q, unsigned int min)

424

void blk_queue_io_min(struct request_queue *q, unsigned int min)

422

{

425

{

423

blk_limits_io_min(&q->limits, min);

426

blk_limits_io_min(&q->limits, min);

424

}

427

}

425

EXPORT_SYMBOL(blk_queue_io_min);

428

EXPORT_SYMBOL(blk_queue_io_min);

426

429

427

/**

430

/**

428

* blk_queue_io_opt - set optimal request size for the queue

431

* blk_queue_io_opt - set optimal request size for the queue

429

* @q: the request queue for the device

432

* @q: the request queue for the device

430

* @opt: optimal request size in bytes

433

* @opt: optimal request size in bytes

431

*

434

*

432

* Description:

435

* Description:

433

* Drivers can call this function to set the preferred I/O request

436

* Storage devices may report an optimal I/O size, which is the

434

* size for devices that report such a value.

437

* device's preferred unit for sustained I/O. This is rarely reported

438

* for disk drives. For RAID arrays it is usually the stripe width or

439

* the internal track size. A properly aligned multiple of

440

* optimal_io_size is the preferred request size for workloads where

441

* sustained throughput is desired.

435

*/

442

*/

436

void blk_queue_io_opt(struct request_queue *q, unsigned int opt)

443

void blk_queue_io_opt(struct request_queue *q, unsigned int opt)

437

{

444

{

438

q->limits.io_opt = opt;

445

q->limits.io_opt = opt;

439

}

446

}

440

EXPORT_SYMBOL(blk_queue_io_opt);

447

EXPORT_SYMBOL(blk_queue_io_opt);

441

448

442

/*

449

/*

443

* Returns the minimum that is _not_ zero, unless both are zero.

450

* Returns the minimum that is _not_ zero, unless both are zero.

444

*/

451

*/

445

#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))

452

#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))

446

453

447

/**

454

/**

448

* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers

455

* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers

449

* @t: the stacking driver (top)

456

* @t: the stacking driver (top)

450

* @b: the underlying device (bottom)

457

* @b: the underlying device (bottom)

451

**/

458

**/

452

void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)

459

void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)

453

{

460

{

454

blk_stack_limits(&t->limits, &b->limits, 0);

461

blk_stack_limits(&t->limits, &b->limits, 0);

455

462

456

if (!t->queue_lock)

463

if (!t->queue_lock)

457

WARN_ON_ONCE(1);

464

WARN_ON_ONCE(1);

458

else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {

465

else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {

459

unsigned long flags;

466

unsigned long flags;

460

spin_lock_irqsave(t->queue_lock, flags);

467

spin_lock_irqsave(t->queue_lock, flags);

461

queue_flag_clear(QUEUE_FLAG_CLUSTER, t);

468

queue_flag_clear(QUEUE_FLAG_CLUSTER, t);

462

spin_unlock_irqrestore(t->queue_lock, flags);

469

spin_unlock_irqrestore(t->queue_lock, flags);

463

}

470

}

464

}

471

}

465

EXPORT_SYMBOL(blk_queue_stack_limits);

472

EXPORT_SYMBOL(blk_queue_stack_limits);

466

473

467

/**

474

/**

468

* blk_stack_limits - adjust queue_limits for stacked devices

475

* blk_stack_limits - adjust queue_limits for stacked devices

469

* @t: the stacking driver limits (top)

476

* @t: the stacking driver limits (top)

470

* @b: the underlying queue limits (bottom)

477

* @b: the underlying queue limits (bottom)

471

* @offset: offset to beginning of data within component device

478

* @offset: offset to beginning of data within component device

472

*

479

*

473

* Description:

480

* Description:

474

* Merges two queue_limit structs. Returns 0 if alignment didn't

481

* Merges two queue_limit structs. Returns 0 if alignment didn't

475

* change. Returns -1 if adding the bottom device caused

482

* change. Returns -1 if adding the bottom device caused

476

* misalignment.

483

* misalignment.

477

*/

484

*/

478

int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,

485

int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,

479

sector_t offset)

486

sector_t offset)

480

{

487

{

481

t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);

488

t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);

482

t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);

489

t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);

483

t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);

490

t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);

484

491

485

t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,

492

t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,

486

b->seg_boundary_mask);

493

b->seg_boundary_mask);

487

494

488

t->max_phys_segments = min_not_zero(t->max_phys_segments,

495

t->max_phys_segments = min_not_zero(t->max_phys_segments,

489

b->max_phys_segments);

496

b->max_phys_segments);

490

497

491

t->max_hw_segments = min_not_zero(t->max_hw_segments,

498

t->max_hw_segments = min_not_zero(t->max_hw_segments,

492

b->max_hw_segments);

499

b->max_hw_segments);

493

500

494

t->max_segment_size = min_not_zero(t->max_segment_size,

501

t->max_segment_size = min_not_zero(t->max_segment_size,

495

b->max_segment_size);

502

b->max_segment_size);

496

503

497

t->logical_block_size = max(t->logical_block_size,

504

t->logical_block_size = max(t->logical_block_size,

498

b->logical_block_size);

505

b->logical_block_size);

499

506

500

t->physical_block_size = max(t->physical_block_size,

507

t->physical_block_size = max(t->physical_block_size,

501

b->physical_block_size);

508

b->physical_block_size);

502

509

503

t->io_min = max(t->io_min, b->io_min);

510

t->io_min = max(t->io_min, b->io_min);

504

t->no_cluster |= b->no_cluster;

511

t->no_cluster |= b->no_cluster;

505

512

506

/* Bottom device offset aligned? */

513

/* Bottom device offset aligned? */

507

if (offset &&

514

if (offset &&

508

(offset & (b->physical_block_size - 1)) != b->alignment_offset) {

515

(offset & (b->physical_block_size - 1)) != b->alignment_offset) {

509

t->misaligned = 1;

516

t->misaligned = 1;

510

return -1;

517

return -1;

511

}

518

}

512

519

513

/* If top has no alignment offset, inherit from bottom */

520

/* If top has no alignment offset, inherit from bottom */

514

if (!t->alignment_offset)

521

if (!t->alignment_offset)

515

t->alignment_offset =

522

t->alignment_offset =

516

b->alignment_offset & (b->physical_block_size - 1);

523

b->alignment_offset & (b->physical_block_size - 1);

517

524

518

/* Top device aligned on logical block boundary? */

525

/* Top device aligned on logical block boundary? */

519

if (t->alignment_offset & (t->logical_block_size - 1)) {

526

if (t->alignment_offset & (t->logical_block_size - 1)) {

520

t->misaligned = 1;

527

t->misaligned = 1;

521

return -1;

528

return -1;

522

}

529

}

523

530

524

/* Find lcm() of optimal I/O size */

531

/* Find lcm() of optimal I/O size */

525

if (t->io_opt && b->io_opt)

532

if (t->io_opt && b->io_opt)

526

t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);

533

t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);

527

else if (b->io_opt)

534

else if (b->io_opt)

528

t->io_opt = b->io_opt;

535

t->io_opt = b->io_opt;

529

536

530

/* Verify that optimal I/O size is a multiple of io_min */

537

/* Verify that optimal I/O size is a multiple of io_min */

531

if (t->io_min && t->io_opt % t->io_min)

538

if (t->io_min && t->io_opt % t->io_min)

532

return -1;

539

return -1;

533

540

534

return 0;

541

return 0;

535

}

542

}

536

EXPORT_SYMBOL(blk_stack_limits);

543

EXPORT_SYMBOL(blk_stack_limits);

537

544

538

/**

545

/**

539

* disk_stack_limits - adjust queue limits for stacked drivers

546

* disk_stack_limits - adjust queue limits for stacked drivers

540

* @disk: MD/DM gendisk (top)

547

* @disk: MD/DM gendisk (top)

541

* @bdev: the underlying block device (bottom)

548

* @bdev: the underlying block device (bottom)

542

* @offset: offset to beginning of data within component device

549

* @offset: offset to beginning of data within component device

543

*

550

*

544

* Description:

551

* Description:

545

* Merges the limits for two queues. Returns 0 if alignment

552

* Merges the limits for two queues. Returns 0 if alignment

546

* didn't change. Returns -1 if adding the bottom device caused

553

* didn't change. Returns -1 if adding the bottom device caused

547

* misalignment.

554

* misalignment.

548

*/

555

*/

549

void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,

556

void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,

550

sector_t offset)

557

sector_t offset)

551

{

558

{

552

struct request_queue *t = disk->queue;

559

struct request_queue *t = disk->queue;

553

struct request_queue *b = bdev_get_queue(bdev);

560

struct request_queue *b = bdev_get_queue(bdev);

554

561

555

offset += get_start_sect(bdev) << 9;

562

offset += get_start_sect(bdev) << 9;

556

563

557

if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {

564

if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {

558

char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];

565

char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];

559

566

560

disk_name(disk, 0, top);

567

disk_name(disk, 0, top);

561

bdevname(bdev, bottom);

568

bdevname(bdev, bottom);

562

569

563

printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",

570

printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",

564

top, bottom);

571

top, bottom);

565

}

572

}

566

573

567

if (!t->queue_lock)

574

if (!t->queue_lock)

568

WARN_ON_ONCE(1);

575

WARN_ON_ONCE(1);

569

else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {

576

else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {

570

unsigned long flags;

577

unsigned long flags;

571

578

572

spin_lock_irqsave(t->queue_lock, flags);

579

spin_lock_irqsave(t->queue_lock, flags);

573

if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))

580

if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))

574

queue_flag_clear(QUEUE_FLAG_CLUSTER, t);

581

queue_flag_clear(QUEUE_FLAG_CLUSTER, t);

575

spin_unlock_irqrestore(t->queue_lock, flags);

582

spin_unlock_irqrestore(t->queue_lock, flags);

576

}

583

}

577

}

584

}

578

EXPORT_SYMBOL(disk_stack_limits);

585

EXPORT_SYMBOL(disk_stack_limits);

579

586

580

/**

587

/**

581

* blk_queue_dma_pad - set pad mask

588

* blk_queue_dma_pad - set pad mask

582

* @q: the request queue for the device

589

* @q: the request queue for the device

583

* @mask: pad mask

590

* @mask: pad mask

584

*

591

*

585

* Set dma pad mask.

592

* Set dma pad mask.

586

*

593

*

587

* Appending pad buffer to a request modifies the last entry of a

594

* Appending pad buffer to a request modifies the last entry of a

588

* scatter list such that it includes the pad buffer.

595

* scatter list such that it includes the pad buffer.

589

**/

596

**/

590

void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)

597

void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)

591

{

598

{

592

q->dma_pad_mask = mask;

599

q->dma_pad_mask = mask;

593

}

600

}

594

EXPORT_SYMBOL(blk_queue_dma_pad);

601

EXPORT_SYMBOL(blk_queue_dma_pad);

595

602

596

/**

603

/**

597

* blk_queue_update_dma_pad - update pad mask

604

* blk_queue_update_dma_pad - update pad mask

598

* @q: the request queue for the device

605

* @q: the request queue for the device

599

* @mask: pad mask

606

* @mask: pad mask

600

*

607

*

601

* Update dma pad mask.

608

* Update dma pad mask.

602

*

609

*

603

* Appending pad buffer to a request modifies the last entry of a

610

* Appending pad buffer to a request modifies the last entry of a

604

* scatter list such that it includes the pad buffer.

611

* scatter list such that it includes the pad buffer.

605

**/

612

**/

606

void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)

613

void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)

607

{

614

{

608

if (mask > q->dma_pad_mask)

615

if (mask > q->dma_pad_mask)

609

q->dma_pad_mask = mask;

616

q->dma_pad_mask = mask;

610

}

617

}

611

EXPORT_SYMBOL(blk_queue_update_dma_pad);

618

EXPORT_SYMBOL(blk_queue_update_dma_pad);

612

619

613

/**

620

/**

614

* blk_queue_dma_drain - Set up a drain buffer for excess dma.

621

* blk_queue_dma_drain - Set up a drain buffer for excess dma.

615

* @q: the request queue for the device

622

* @q: the request queue for the device

616

* @dma_drain_needed: fn which returns non-zero if drain is necessary

623

* @dma_drain_needed: fn which returns non-zero if drain is necessary

617

* @buf: physically contiguous buffer

624

* @buf: physically contiguous buffer

618

* @size: size of the buffer in bytes

625

* @size: size of the buffer in bytes

619

*

626

*

620

* Some devices have excess DMA problems and can't simply discard (or

627

* Some devices have excess DMA problems and can't simply discard (or

621

* zero fill) the unwanted piece of the transfer. They have to have a

628

* zero fill) the unwanted piece of the transfer. They have to have a

622

* real area of memory to transfer it into. The use case for this is

629

* real area of memory to transfer it into. The use case for this is

623

* ATAPI devices in DMA mode. If the packet command causes a transfer

630

* ATAPI devices in DMA mode. If the packet command causes a transfer

624

* bigger than the transfer size some HBAs will lock up if there

631

* bigger than the transfer size some HBAs will lock up if there

625

* aren't DMA elements to contain the excess transfer. What this API

632

* aren't DMA elements to contain the excess transfer. What this API

626

* does is adjust the queue so that the buf is always appended

633

* does is adjust the queue so that the buf is always appended

627

* silently to the scatterlist.

634

* silently to the scatterlist.

628

*

635

*

629

* Note: This routine adjusts max_hw_segments to make room for

636

* Note: This routine adjusts max_hw_segments to make room for

630

* appending the drain buffer. If you call

637

* appending the drain buffer. If you call

631

* blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after

638

* blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after

632

* calling this routine, you must set the limit to one fewer than your

639

* calling this routine, you must set the limit to one fewer than your

633

* device can support otherwise there won't be room for the drain

640

* device can support otherwise there won't be room for the drain

634

* buffer.

641

* buffer.

635

*/

642

*/

636

int blk_queue_dma_drain(struct request_queue *q,

643

int blk_queue_dma_drain(struct request_queue *q,

637

dma_drain_needed_fn *dma_drain_needed,

644

dma_drain_needed_fn *dma_drain_needed,

638

void *buf, unsigned int size)

645

void *buf, unsigned int size)

639

{

646

{

640

if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)

647

if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)

641

return -EINVAL;

648

return -EINVAL;

642

/* make room for appending the drain */

649

/* make room for appending the drain */

643

blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);

650

blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);

644

blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);

651

blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);

645

q->dma_drain_needed = dma_drain_needed;

652

q->dma_drain_needed = dma_drain_needed;

646

q->dma_drain_buffer = buf;

653

q->dma_drain_buffer = buf;

647

q->dma_drain_size = size;

654

q->dma_drain_size = size;

648

655

649

return 0;

656

return 0;

650

}

657

}

651

EXPORT_SYMBOL_GPL(blk_queue_dma_drain);

658

EXPORT_SYMBOL_GPL(blk_queue_dma_drain);

652

659

653

/**

660

/**

654

* blk_queue_segment_boundary - set boundary rules for segment merging

661

* blk_queue_segment_boundary - set boundary rules for segment merging

655

* @q: the request queue for the device

662

* @q: the request queue for the device

656

* @mask: the memory boundary mask

663

* @mask: the memory boundary mask

657

**/

664

**/

658

void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)

665

void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)

659

{

666

{

660

if (mask < PAGE_CACHE_SIZE - 1) {

667

if (mask < PAGE_CACHE_SIZE - 1) {

661

mask = PAGE_CACHE_SIZE - 1;

668

mask = PAGE_CACHE_SIZE - 1;

662

printk(KERN_INFO "%s: set to minimum %lx\n",

669

printk(KERN_INFO "%s: set to minimum %lx\n",

663

__func__, mask);

670

__func__, mask);

664

}

671

}

665

672

666

q->limits.seg_boundary_mask = mask;

673

q->limits.seg_boundary_mask = mask;

667

}

674

}

668

EXPORT_SYMBOL(blk_queue_segment_boundary);

675

EXPORT_SYMBOL(blk_queue_segment_boundary);

669

676

670

/**

677

/**

671

* blk_queue_dma_alignment - set dma length and memory alignment

678

* blk_queue_dma_alignment - set dma length and memory alignment

672

* @q: the request queue for the device

679

* @q: the request queue for the device

673

* @mask: alignment mask

680

* @mask: alignment mask

674

*

681

*

675

* description:

682

* description:

676

* set required memory and length alignment for direct dma transactions.

683

* set required memory and length alignment for direct dma transactions.

677

* this is used when building direct io requests for the queue.

684

* this is used when building direct io requests for the queue.

678

*

685

*

679

**/

686

**/

680

void blk_queue_dma_alignment(struct request_queue *q, int mask)

687

void blk_queue_dma_alignment(struct request_queue *q, int mask)

681

{

688

{

682

q->dma_alignment = mask;

689

q->dma_alignment = mask;

683

}

690

}

684

EXPORT_SYMBOL(blk_queue_dma_alignment);

691

EXPORT_SYMBOL(blk_queue_dma_alignment);

685

692

686

/**

693

/**

687

* blk_queue_update_dma_alignment - update dma length and memory alignment

694

* blk_queue_update_dma_alignment - update dma length and memory alignment

688

* @q: the request queue for the device

695

* @q: the request queue for the device

689

* @mask: alignment mask

696

* @mask: alignment mask

690

*

697

*

691

* description:

698

* description:

692

* update required memory and length alignment for direct dma transactions.

699

* update required memory and length alignment for direct dma transactions.

693

* If the requested alignment is larger than the current alignment, then

700

* If the requested alignment is larger than the current alignment, then

694

* the current queue alignment is updated to the new value, otherwise it

701

* the current queue alignment is updated to the new value, otherwise it

695

* is left alone. The design of this is to allow multiple objects

702

* is left alone. The design of this is to allow multiple objects

696

* (driver, device, transport etc) to set their respective

703

* (driver, device, transport etc) to set their respective

697

* alignments without having them interfere.

704

* alignments without having them interfere.

698

*

705

*

699

**/

706

**/

700

void blk_queue_update_dma_alignment(struct request_queue *q, int mask)

707

void blk_queue_update_dma_alignment(struct request_queue *q, int mask)

701

{

708

{

702

BUG_ON(mask > PAGE_SIZE);

709

BUG_ON(mask > PAGE_SIZE);

703

710

704

if (mask > q->dma_alignment)

711

if (mask > q->dma_alignment)

705

q->dma_alignment = mask;

712

q->dma_alignment = mask;

706

}

713

}

707

EXPORT_SYMBOL(blk_queue_update_dma_alignment);

714

EXPORT_SYMBOL(blk_queue_update_dma_alignment);

708

715

709

static int __init blk_settings_init(void)

716

static int __init blk_settings_init(void)

710

{

717

{

711

blk_max_low_pfn = max_low_pfn - 1;

718

blk_max_low_pfn = max_low_pfn - 1;

712

blk_max_pfn = max_pfn - 1;

719

blk_max_pfn = max_pfn - 1;

713

return 0;

720

return 0;

714

}

721

}

715

subsys_initcall(blk_settings_init);

722

subsys_initcall(blk_settings_init);

716

723

GITLAB

Eric Lee / smarc-fsl-linux-kernel

block: Update topology documentation

 What:		/sys/block/<disk>/stat
 Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@redhat.com>
 Description:
 		The /sys/block/<disk>/stat files displays the I/O
 		statistics of disk <disk>. They contain 11 fields:
 		 1 - reads completed succesfully
 		 2 - reads merged
 		 3 - sectors read
 		 4 - time spent reading (ms)
 		 5 - writes completed
 		 6 - writes merged
 		 7 - sectors written
 		 8 - time spent writing (ms)
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
 		For more details refer Documentation/iostats.txt
 What:		/sys/block/<disk>/<part>/stat
 Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@redhat.com>
 Description:
 		The /sys/block/<disk>/<part>/stat files display the
 		I/O statistics of partition <part>. The format is the
 		same as the above-written /sys/block/<disk>/stat
 		format.
 What:		/sys/block/<disk>/integrity/format
 Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Metadata format for integrity capable block device.
 		E.g. T10-DIF-TYPE1-CRC.
 What:		/sys/block/<disk>/integrity/read_verify
 Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Indicates whether the block layer should verify the
 		integrity of read requests serviced by devices that
 		support sending integrity metadata.
 What:		/sys/block/<disk>/integrity/tag_size
 Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Number of bytes of integrity tag space available per
 		512 bytes of data.
 What:		/sys/block/<disk>/integrity/write_generate
 Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Indicates whether the block layer should automatically
 		generate checksums for write requests bound for
 		devices that support receiving integrity metadata.
 What:		/sys/block/<disk>/alignment_offset
 Date:		April 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Storage devices may report a physical block size that is
 		bigger than the logical block size (for instance a drive
 		with 4KB physical sectors exposing 512-byte logical
 		blocks to the operating system).  This parameter
 		indicates how many bytes the beginning of the device is
 		offset from the disk's natural alignment.
 What:		/sys/block/<disk>/<partition>/alignment_offset
 Date:		April 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Storage devices may report a physical block size that is
 		bigger than the logical block size (for instance a drive
 		with 4KB physical sectors exposing 512-byte logical
 		blocks to the operating system).  This parameter
 		indicates how many bytes the beginning of the partition
 		is offset from the disk's natural alignment.
 What:		/sys/block/<disk>/queue/logical_block_size
 Date:		May 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		This is the smallest unit the storage device can
 		address.  It is typically 512 bytes.
 What:		/sys/block/<disk>/queue/physical_block_size
 Date:		May 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
-		This is the smallest unit the storage device can write
+		This is the smallest unit a physical storage device can
-		without resorting to read-modify-write operation.  It is
+		write atomically.  It is usually the same as the logical
-		usually the same as the logical block size but may be
+		block size but may be bigger.  One example is SATA
-		bigger.  One example is SATA drives with 4KB sectors
+		drives with 4KB sectors that expose a 512-byte logical
-		that expose a 512-byte logical block size to the
+		block size to the operating system.  For stacked block
-		operating system.
+		devices the physical_block_size variable contains the
+		maximum physical_block_size of the component devices.
 What:		/sys/block/<disk>/queue/minimum_io_size
 Date:		April 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
-		Storage devices may report a preferred minimum I/O size,
+		Storage devices may report a granularity or preferred
-		which is the smallest request the device can perform
+		minimum I/O size which is the smallest request the
-		without incurring a read-modify-write penalty.  For disk
+		device can perform without incurring a performance
-		drives this is often the physical block size.  For RAID
+		penalty.  For disk drives this is often the physical
-		arrays it is often the stripe chunk size.
+		block size.  For RAID arrays it is often the stripe
+		chunk size.  A properly aligned multiple of
+		minimum_io_size is the preferred request size for
+		workloads where a high number of I/O operations is
+		desired.
 What:		/sys/block/<disk>/queue/optimal_io_size
 Date:		April 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Storage devices may report an optimal I/O size, which is
-		the device's preferred unit of receiving I/O.  This is
+		the device's preferred unit for sustained I/O.  This is
-		rarely reported for disk drives.  For RAID devices it is
+		rarely reported for disk drives.  For RAID arrays it is
-		usually the stripe width or the internal block size.
+		usually the stripe width or the internal track size.  A
+		properly aligned multiple of optimal_io_size is the
+		preferred request size for workloads where sustained
+		throughput is desired.  If no optimal I/O size is
+		reported this file contains 0.

 /*
  * Functions related to setting various queue properties from drivers
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
 #include <linux/gcd.h>
 #include "blk.h"
 unsigned long blk_max_low_pfn;
 EXPORT_SYMBOL(blk_max_low_pfn);
 unsigned long blk_max_pfn;
 /**
  * blk_queue_prep_rq - set a prepare_request function for queue
  * @q:		queue
  * @pfn:	prepare_request function
  *
  * It's possible for a queue to register a prepare_request callback which
  * is invoked before the request is handed to the request_fn. The goal of
  * the function is to prepare a request for I/O, it can be used to build a
  * cdb from the request data for instance.
  *
  */
 void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 {
 	q->prep_rq_fn = pfn;
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 /**
  * blk_queue_set_discard - set a discard_sectors function for queue
  * @q:		queue
  * @dfn:	prepare_discard function
  *
  * It's possible for a queue to register a discard callback which is used
  * to transform a discard request into the appropriate type for the
  * hardware. If none is registered, then discard requests are failed
  * with %EOPNOTSUPP.
  *
  */
 void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
 {
 	q->prepare_discard_fn = dfn;
 }
 EXPORT_SYMBOL(blk_queue_set_discard);
 /**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn
  *
  * Usually queues have static limitations on the max sectors or segments that
  * we can put in a request. Stacking drivers may have some settings that
  * are dynamic, and thus we have to query the queue whether it is ok to
  * add a new bio_vec to a bio at a given offset or not. If the block device
  * has such limitations, it needs to register a merge_bvec_fn to control
  * the size of bio's sent to it. Note that a block device *must* allow a
  * single page to be added to an empty bio. The block device driver may want
  * to use the bio_split() function to deal with these bio's. By default
  * no merge_bvec_fn is defined for a queue, and only the fixed limits are
  * honored.
  */
 void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
 {
 	q->merge_bvec_fn = mbfn;
 }
 EXPORT_SYMBOL(blk_queue_merge_bvec);
 void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
 {
 	q->softirq_done_fn = fn;
 }
 EXPORT_SYMBOL(blk_queue_softirq_done);
 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
 {
 	q->rq_timeout = timeout;
 }
 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
 void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
 {
 	q->rq_timed_out_fn = fn;
 }
 EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
 void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 {
 	q->lld_busy_fn = fn;
 }
 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 /**
  * blk_set_default_limits - reset limits to default values
  * @lim:  the queue_limits structure to reset
  *
  * Description:
  *   Returns a queue_limit struct to its default state.  Can be used by
  *   stacking drivers like DM that stage table swaps and reuse an
  *   existing device queue.
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
 	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
 	lim->max_hw_segments = MAX_HW_SEGMENTS;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
 	lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
 	lim->io_opt = 0;
 	lim->misaligned = 0;
 	lim->no_cluster = 0;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
  * @mfn: the alternate make_request function
  *
  * Description:
  *    The normal way for &struct bios to be passed to a device
  *    driver is for them to be collected into requests on a request
  *    queue, and then to allow the device driver to select requests
  *    off that queue when it is ready.  This works well for many block
  *    devices. However some block devices (typically virtual devices
  *    such as md or lvm) do not benefit from the processing on the
  *    request queue, and are served best by having the requests passed
  *    directly to them.  This can be achieved by providing a function
  *    to blk_queue_make_request().
  *
  * Caveat:
  *    The driver that does this *must* be able to deal appropriately
  *    with buffers in "highmemory". This can be accomplished by either calling
  *    __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
  *    blk_queue_bounce() to create a buffer in normal memory.
  **/
 void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 {
 	/*
 	 * set defaults
 	 */
 	q->nr_requests = BLKDEV_MAX_RQ;
 	q->make_request_fn = mfn;
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
 	q->unplug_thresh = 4;		/* hmm */
 	q->unplug_delay = (3 * HZ) / 1000;	/* 3 milliseconds */
 	if (q->unplug_delay == 0)
 		q->unplug_delay = 1;
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
 	blk_set_default_limits(&q->limits);
 	/*
 	 * If the caller didn't supply a lock, fall back to our embedded
 	 * per-queue locks
 	 */
 	if (!q->queue_lock)
 		q->queue_lock = &q->__queue_lock;
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 }
 EXPORT_SYMBOL(blk_queue_make_request);
 /**
  * blk_queue_bounce_limit - set bounce buffer limit for queue
  * @q: the request queue for the device
  * @dma_mask: the maximum address the device can handle
  *
  * Description:
  *    Different hardware can have different requirements as to what pages
  *    it can do I/O directly to. A low level driver can call
  *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
  *    buffers for doing I/O to pages residing above @dma_mask.
  **/
 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 {
 	unsigned long b_pfn = dma_mask >> PAGE_SHIFT;
 	int dma = 0;
 	q->bounce_gfp = GFP_NOIO;
 #if BITS_PER_LONG == 64
 	/*
 	 * Assume anything <= 4GB can be handled by IOMMU.  Actually
 	 * some IOMMUs can handle everything, but I don't know of a
 	 * way to test this here.
 	 */
 	if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
 		dma = 1;
 	q->limits.bounce_pfn = max_low_pfn;
 #else
 	if (b_pfn < blk_max_low_pfn)
 		dma = 1;
 	q->limits.bounce_pfn = b_pfn;
 #endif
 	if (dma) {
 		init_emergency_isa_pool();
 		q->bounce_gfp = GFP_NOIO | GFP_DMA;
 		q->limits.bounce_pfn = b_pfn;
 	}
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 /**
  * blk_queue_max_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
  * @max_sectors:  max sectors in the usual 512b unit
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the size of
  *    received requests.
  **/
 void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
 {
 	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_sectors);
 	}
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
 		q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
 	else {
 		q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
 		q->limits.max_hw_sectors = max_sectors;
 	}
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 {
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
 		q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
 	else
 		q->limits.max_hw_sectors = max_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
  *    physical data segments in a request.  This would be the largest sized
  *    scatter list the driver could handle.
  **/
 void blk_queue_max_phys_segments(struct request_queue *q,
 				 unsigned short max_segments)
 {
 	if (!max_segments) {
 		max_segments = 1;
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_segments);
 	}
 	q->limits.max_phys_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_phys_segments);
 /**
  * blk_queue_max_hw_segments - set max hw segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
  *    hw data segments in a request.  This would be the largest number of
  *    address/length pairs the host adapter can actually give at once
  *    to the device.
  **/
 void blk_queue_max_hw_segments(struct request_queue *q,
 			       unsigned short max_segments)
 {
 	if (!max_segments) {
 		max_segments = 1;
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_segments);
 	}
 	q->limits.max_hw_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_segments);
 /**
  * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
  * @q:  the request queue for the device
  * @max_size:  max size of segment in bytes
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the size of a
  *    coalesced segment
  **/
 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
 	if (max_size < PAGE_CACHE_SIZE) {
 		max_size = PAGE_CACHE_SIZE;
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_size);
 	}
 	q->limits.max_segment_size = max_size;
 }
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 /**
  * blk_queue_logical_block_size - set logical block size for the queue
  * @q:  the request queue for the device
  * @size:  the logical block size, in bytes
  *
  * Description:
  *   This should be set to the lowest possible block size that the
  *   storage device can address.  The default of 512 covers most
  *   hardware.
  **/
 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
 	q->limits.logical_block_size = size;
 	if (q->limits.physical_block_size < size)
 		q->limits.physical_block_size = size;
 	if (q->limits.io_min < q->limits.physical_block_size)
 		q->limits.io_min = q->limits.physical_block_size;
 }
 EXPORT_SYMBOL(blk_queue_logical_block_size);
 /**
  * blk_queue_physical_block_size - set physical block size for the queue
  * @q:  the request queue for the device
  * @size:  the physical block size, in bytes
  *
  * Description:
  *   This should be set to the lowest possible sector size that the
  *   hardware can operate on without reverting to read-modify-write
  *   operations.
  */
 void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
 {
 	q->limits.physical_block_size = size;
 	if (q->limits.physical_block_size < q->limits.logical_block_size)
 		q->limits.physical_block_size = q->limits.logical_block_size;
 	if (q->limits.io_min < q->limits.physical_block_size)
 		q->limits.io_min = q->limits.physical_block_size;
 }
 EXPORT_SYMBOL(blk_queue_physical_block_size);
 /**
  * blk_queue_alignment_offset - set physical block alignment offset
  * @q:	the request queue for the device
  * @offset: alignment offset in bytes
  *
  * Description:
  *   Some devices are naturally misaligned to compensate for things like
  *   the legacy DOS partition table 63-sector offset.  Low-level drivers
  *   should call this function for devices whose first sector is not
  *   naturally aligned.
  */
 void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
 {
 	q->limits.alignment_offset =
 		offset & (q->limits.physical_block_size - 1);
 	q->limits.misaligned = 0;
 }
 EXPORT_SYMBOL(blk_queue_alignment_offset);
 /**
  * blk_limits_io_min - set minimum request size for a device
  * @limits: the queue limits
  * @min:  smallest I/O size in bytes
  *
  * Description:
  *   Some devices have an internal block size bigger than the reported
  *   hardware sector size.  This function can be used to signal the
  *   smallest I/O the device can perform without incurring a performance
  *   penalty.
  */
 void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
 {
 	limits->io_min = min;
 	if (limits->io_min < limits->logical_block_size)
 		limits->io_min = limits->logical_block_size;
 	if (limits->io_min < limits->physical_block_size)
 		limits->io_min = limits->physical_block_size;
 }
 EXPORT_SYMBOL(blk_limits_io_min);
 /**
  * blk_queue_io_min - set minimum request size for the queue
  * @q:	the request queue for the device
  * @min:  smallest I/O size in bytes
  *
  * Description:
- *   Some devices have an internal block size bigger than the reported
+ *   Storage devices may report a granularity or preferred minimum I/O
- *   hardware sector size.  This function can be used to signal the
+ *   size which is the smallest request the device can perform without
- *   smallest I/O the device can perform without incurring a performance
+ *   incurring a performance penalty.  For disk drives this is often the
- *   penalty.
+ *   physical block size.  For RAID arrays it is often the stripe chunk
+ *   size.  A properly aligned multiple of minimum_io_size is the
+ *   preferred request size for workloads where a high number of I/O
+ *   operations is desired.
  */
 void blk_queue_io_min(struct request_queue *q, unsigned int min)
 {
 	blk_limits_io_min(&q->limits, min);
 }
 EXPORT_SYMBOL(blk_queue_io_min);
 /**
  * blk_queue_io_opt - set optimal request size for the queue
  * @q:	the request queue for the device
  * @opt:  optimal request size in bytes
  *
  * Description:
- *   Drivers can call this function to set the preferred I/O request
+ *   Storage devices may report an optimal I/O size, which is the
- *   size for devices that report such a value.
+ *   device's preferred unit for sustained I/O.  This is rarely reported
+ *   for disk drives.  For RAID arrays it is usually the stripe width or
+ *   the internal track size.  A properly aligned multiple of
+ *   optimal_io_size is the preferred request size for workloads where
+ *   sustained throughput is desired.
  */
 void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 {
 	q->limits.io_opt = opt;
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
  */
 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 /**
  * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
  * @t:	the stacking driver (top)
  * @b:  the underlying device (bottom)
  **/
 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 {
 	blk_stack_limits(&t->limits, &b->limits, 0);
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
 		unsigned long flags;
 		spin_lock_irqsave(t->queue_lock, flags);
 		queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
 		spin_unlock_irqrestore(t->queue_lock, flags);
 	}
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top)
  * @b:  the underlying queue limits (bottom)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
  *    Merges two queue_limit structs.  Returns 0 if alignment didn't
  *    change.  Returns -1 if adding the bottom device caused
  *    misalignment.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
 	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
 					    b->seg_boundary_mask);
 	t->max_phys_segments = min_not_zero(t->max_phys_segments,
 					    b->max_phys_segments);
 	t->max_hw_segments = min_not_zero(t->max_hw_segments,
 					  b->max_hw_segments);
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 	t->logical_block_size = max(t->logical_block_size,
 				    b->logical_block_size);
 	t->physical_block_size = max(t->physical_block_size,
 				     b->physical_block_size);
 	t->io_min = max(t->io_min, b->io_min);
 	t->no_cluster |= b->no_cluster;
 	/* Bottom device offset aligned? */
 	if (offset &&
 	    (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
 		t->misaligned = 1;
 		return -1;
 	}
 	/* If top has no alignment offset, inherit from bottom */
 	if (!t->alignment_offset)
 		t->alignment_offset =
 			b->alignment_offset & (b->physical_block_size - 1);
 	/* Top device aligned on logical block boundary? */
 	if (t->alignment_offset & (t->logical_block_size - 1)) {
 		t->misaligned = 1;
 		return -1;
 	}
 	/* Find lcm() of optimal I/O size */
 	if (t->io_opt && b->io_opt)
 		t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);
 	else if (b->io_opt)
 		t->io_opt = b->io_opt;
 	/* Verify that optimal I/O size is a multiple of io_min */
 	if (t->io_min && t->io_opt % t->io_min)
 		return -1;
 	return 0;
 }
 EXPORT_SYMBOL(blk_stack_limits);
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
  * @disk:  MD/DM gendisk (top)
  * @bdev:  the underlying block device (bottom)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
  *    Merges the limits for two queues.  Returns 0 if alignment
  *    didn't change.  Returns -1 if adding the bottom device caused
  *    misalignment.
  */
 void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 		       sector_t offset)
 {
 	struct request_queue *t = disk->queue;
 	struct request_queue *b = bdev_get_queue(bdev);
 	offset += get_start_sect(bdev) << 9;
 	if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
 		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
 		disk_name(disk, 0, top);
 		bdevname(bdev, bottom);
 		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
 		       top, bottom);
 	}
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
 		unsigned long flags;
 		spin_lock_irqsave(t->queue_lock, flags);
 		if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
 			queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
 		spin_unlock_irqrestore(t->queue_lock, flags);
 	}
 }
 EXPORT_SYMBOL(disk_stack_limits);
 /**
  * blk_queue_dma_pad - set pad mask
  * @q:     the request queue for the device
  * @mask:  pad mask
  *
  * Set dma pad mask.
  *
  * Appending pad buffer to a request modifies the last entry of a
  * scatter list such that it includes the pad buffer.
  **/
 void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
 {
 	q->dma_pad_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_dma_pad);
 /**
  * blk_queue_update_dma_pad - update pad mask
  * @q:     the request queue for the device
  * @mask:  pad mask
  *
  * Update dma pad mask.
  *
  * Appending pad buffer to a request modifies the last entry of a
  * scatter list such that it includes the pad buffer.
  **/
 void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
 {
 	if (mask > q->dma_pad_mask)
 		q->dma_pad_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_update_dma_pad);
 /**
  * blk_queue_dma_drain - Set up a drain buffer for excess dma.
  * @q:  the request queue for the device
  * @dma_drain_needed: fn which returns non-zero if drain is necessary
  * @buf:	physically contiguous buffer
  * @size:	size of the buffer in bytes
  *
  * Some devices have excess DMA problems and can't simply discard (or
  * zero fill) the unwanted piece of the transfer.  They have to have a
  * real area of memory to transfer it into.  The use case for this is
  * ATAPI devices in DMA mode.  If the packet command causes a transfer
  * bigger than the transfer size some HBAs will lock up if there
  * aren't DMA elements to contain the excess transfer.  What this API
  * does is adjust the queue so that the buf is always appended
  * silently to the scatterlist.
  *
  * Note: This routine adjusts max_hw_segments to make room for
  * appending the drain buffer.  If you call
  * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
  * calling this routine, you must set the limit to one fewer than your
  * device can support otherwise there won't be room for the drain
  * buffer.
  */
 int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size)
 {
 	if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
 	blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
 	blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
 	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
 /**
  * blk_queue_segment_boundary - set boundary rules for segment merging
  * @q:  the request queue for the device
  * @mask:  the memory boundary mask
  **/
 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 {
 	if (mask < PAGE_CACHE_SIZE - 1) {
 		mask = PAGE_CACHE_SIZE - 1;
 		printk(KERN_INFO "%s: set to minimum %lx\n",
 		       __func__, mask);
 	}
 	q->limits.seg_boundary_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_segment_boundary);
 /**
  * blk_queue_dma_alignment - set dma length and memory alignment
  * @q:     the request queue for the device
  * @mask:  alignment mask
  *
  * description:
  *    set required memory and length alignment for direct dma transactions.
  *    this is used when building direct io requests for the queue.
  *
  **/
 void blk_queue_dma_alignment(struct request_queue *q, int mask)
 {
 	q->dma_alignment = mask;
 }
 EXPORT_SYMBOL(blk_queue_dma_alignment);
 /**
  * blk_queue_update_dma_alignment - update dma length and memory alignment
  * @q:     the request queue for the device
  * @mask:  alignment mask
  *
  * description:
  *    update required memory and length alignment for direct dma transactions.
  *    If the requested alignment is larger than the current alignment, then
  *    the current queue alignment is updated to the new value, otherwise it
  *    is left alone.  The design of this is to allow multiple objects
  *    (driver, device, transport etc) to set their respective
  *    alignments without having them interfere.
  *
  **/
 void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
 {
 	BUG_ON(mask > PAGE_SIZE);
 	if (mask > q->dma_alignment)
 		q->dma_alignment = mask;
 }
 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
 	blk_max_pfn = max_pfn - 1;
 	return 0;
 }
 subsys_initcall(blk_settings_init);