Commit 4913efe456c987057e5d36a3f0a55422a9072cae

Authored by Tejun Heo
Committed by Jens Axboe
1 parent 6958f14545

block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush()

Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA
requests.  Deprecate barrier.  All REQ_HARDBARRIERs are failed with
-EOPNOTSUPP and blk_queue_ordered() is replaced with simpler
blk_queue_flush().

blk_queue_flush() takes combinations of REQ_FLUSH and FUA.  If a
device has write cache and can flush it, it should set REQ_FLUSH.  If
the device can handle FUA writes, it should also set REQ_FUA.

All blk_queue_ordered() users are converted.

* ORDERED_DRAIN is mapped to 0 which is the default value.
* ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH.
* ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 15 changed files with 67 additions and 102 deletions Inline Diff

1 /* 1 /*
2 * Functions related to barrier IO handling 2 * Functions related to barrier IO handling
3 */ 3 */
4 #include <linux/kernel.h> 4 #include <linux/kernel.h>
5 #include <linux/module.h> 5 #include <linux/module.h>
6 #include <linux/bio.h> 6 #include <linux/bio.h>
7 #include <linux/blkdev.h> 7 #include <linux/blkdev.h>
8 #include <linux/gfp.h> 8 #include <linux/gfp.h>
9 9
10 #include "blk.h" 10 #include "blk.h"
11 11
12 /**
13 * blk_queue_ordered - does this queue support ordered writes
14 * @q: the request queue
15 * @ordered: one of QUEUE_ORDERED_*
16 *
17 * Description:
18 * For journalled file systems, doing ordered writes on a commit
19 * block instead of explicitly doing wait_on_buffer (which is bad
20 * for performance) can be a big win. Block drivers supporting this
21 * feature should call this function and indicate so.
22 *
23 **/
24 int blk_queue_ordered(struct request_queue *q, unsigned ordered)
25 {
26 if (ordered != QUEUE_ORDERED_NONE &&
27 ordered != QUEUE_ORDERED_DRAIN &&
28 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
29 ordered != QUEUE_ORDERED_DRAIN_FUA) {
30 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
31 return -EINVAL;
32 }
33
34 q->ordered = ordered;
35 q->next_ordered = ordered;
36
37 return 0;
38 }
39 EXPORT_SYMBOL(blk_queue_ordered);
40
41 /* 12 /*
42 * Cache flushing for ordered writes handling 13 * Cache flushing for ordered writes handling
43 */ 14 */
44 unsigned blk_ordered_cur_seq(struct request_queue *q) 15 unsigned blk_ordered_cur_seq(struct request_queue *q)
45 { 16 {
46 if (!q->ordseq) 17 if (!q->ordseq)
47 return 0; 18 return 0;
48 return 1 << ffz(q->ordseq); 19 return 1 << ffz(q->ordseq);
49 } 20 }
50 21
51 unsigned blk_ordered_req_seq(struct request *rq) 22 unsigned blk_ordered_req_seq(struct request *rq)
52 { 23 {
53 struct request_queue *q = rq->q; 24 struct request_queue *q = rq->q;
54 25
55 BUG_ON(q->ordseq == 0); 26 BUG_ON(q->ordseq == 0);
56 27
57 if (rq == &q->pre_flush_rq) 28 if (rq == &q->pre_flush_rq)
58 return QUEUE_ORDSEQ_PREFLUSH; 29 return QUEUE_ORDSEQ_PREFLUSH;
59 if (rq == &q->bar_rq) 30 if (rq == &q->bar_rq)
60 return QUEUE_ORDSEQ_BAR; 31 return QUEUE_ORDSEQ_BAR;
61 if (rq == &q->post_flush_rq) 32 if (rq == &q->post_flush_rq)
62 return QUEUE_ORDSEQ_POSTFLUSH; 33 return QUEUE_ORDSEQ_POSTFLUSH;
63 34
64 /* 35 /*
65 * !fs requests don't need to follow barrier ordering. Always 36 * !fs requests don't need to follow barrier ordering. Always
66 * put them at the front. This fixes the following deadlock. 37 * put them at the front. This fixes the following deadlock.
67 * 38 *
68 * http://thread.gmane.org/gmane.linux.kernel/537473 39 * http://thread.gmane.org/gmane.linux.kernel/537473
69 */ 40 */
70 if (rq->cmd_type != REQ_TYPE_FS) 41 if (rq->cmd_type != REQ_TYPE_FS)
71 return QUEUE_ORDSEQ_DRAIN; 42 return QUEUE_ORDSEQ_DRAIN;
72 43
73 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 44 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
74 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 45 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
75 return QUEUE_ORDSEQ_DRAIN; 46 return QUEUE_ORDSEQ_DRAIN;
76 else 47 else
77 return QUEUE_ORDSEQ_DONE; 48 return QUEUE_ORDSEQ_DONE;
78 } 49 }
79 50
80 bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 51 bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
81 { 52 {
82 struct request *rq; 53 struct request *rq;
83 54
84 if (error && !q->orderr) 55 if (error && !q->orderr)
85 q->orderr = error; 56 q->orderr = error;
86 57
87 BUG_ON(q->ordseq & seq); 58 BUG_ON(q->ordseq & seq);
88 q->ordseq |= seq; 59 q->ordseq |= seq;
89 60
90 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 61 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
91 return false; 62 return false;
92 63
93 /* 64 /*
94 * Okay, sequence complete. 65 * Okay, sequence complete.
95 */ 66 */
96 q->ordseq = 0; 67 q->ordseq = 0;
97 rq = q->orig_bar_rq; 68 rq = q->orig_bar_rq;
98 __blk_end_request_all(rq, q->orderr); 69 __blk_end_request_all(rq, q->orderr);
99 return true; 70 return true;
100 } 71 }
101 72
102 static void pre_flush_end_io(struct request *rq, int error) 73 static void pre_flush_end_io(struct request *rq, int error)
103 { 74 {
104 elv_completed_request(rq->q, rq); 75 elv_completed_request(rq->q, rq);
105 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 76 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
106 } 77 }
107 78
108 static void bar_end_io(struct request *rq, int error) 79 static void bar_end_io(struct request *rq, int error)
109 { 80 {
110 elv_completed_request(rq->q, rq); 81 elv_completed_request(rq->q, rq);
111 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 82 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
112 } 83 }
113 84
114 static void post_flush_end_io(struct request *rq, int error) 85 static void post_flush_end_io(struct request *rq, int error)
115 { 86 {
116 elv_completed_request(rq->q, rq); 87 elv_completed_request(rq->q, rq);
117 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 88 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
118 } 89 }
119 90
120 static void queue_flush(struct request_queue *q, unsigned which) 91 static void queue_flush(struct request_queue *q, unsigned which)
121 { 92 {
122 struct request *rq; 93 struct request *rq;
123 rq_end_io_fn *end_io; 94 rq_end_io_fn *end_io;
124 95
125 if (which == QUEUE_ORDERED_DO_PREFLUSH) { 96 if (which == QUEUE_ORDERED_DO_PREFLUSH) {
126 rq = &q->pre_flush_rq; 97 rq = &q->pre_flush_rq;
127 end_io = pre_flush_end_io; 98 end_io = pre_flush_end_io;
128 } else { 99 } else {
129 rq = &q->post_flush_rq; 100 rq = &q->post_flush_rq;
130 end_io = post_flush_end_io; 101 end_io = post_flush_end_io;
131 } 102 }
132 103
133 blk_rq_init(q, rq); 104 blk_rq_init(q, rq);
134 rq->cmd_type = REQ_TYPE_FS; 105 rq->cmd_type = REQ_TYPE_FS;
135 rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; 106 rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
136 rq->rq_disk = q->orig_bar_rq->rq_disk; 107 rq->rq_disk = q->orig_bar_rq->rq_disk;
137 rq->end_io = end_io; 108 rq->end_io = end_io;
138 109
139 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 110 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
140 } 111 }
141 112
142 static inline bool start_ordered(struct request_queue *q, struct request **rqp) 113 static inline bool start_ordered(struct request_queue *q, struct request **rqp)
143 { 114 {
144 struct request *rq = *rqp; 115 struct request *rq = *rqp;
145 unsigned skip = 0; 116 unsigned skip = 0;
146 117
147 q->orderr = 0; 118 q->orderr = 0;
148 q->ordered = q->next_ordered; 119 q->ordered = q->next_ordered;
149 q->ordseq |= QUEUE_ORDSEQ_STARTED; 120 q->ordseq |= QUEUE_ORDSEQ_STARTED;
150 121
151 /* 122 /*
152 * For an empty barrier, there's no actual BAR request, which 123 * For an empty barrier, there's no actual BAR request, which
153 * in turn makes POSTFLUSH unnecessary. Mask them off. 124 * in turn makes POSTFLUSH unnecessary. Mask them off.
154 */ 125 */
155 if (!blk_rq_sectors(rq)) 126 if (!blk_rq_sectors(rq))
156 q->ordered &= ~(QUEUE_ORDERED_DO_BAR | 127 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
157 QUEUE_ORDERED_DO_POSTFLUSH); 128 QUEUE_ORDERED_DO_POSTFLUSH);
158 129
159 /* stash away the original request */ 130 /* stash away the original request */
160 blk_dequeue_request(rq); 131 blk_dequeue_request(rq);
161 q->orig_bar_rq = rq; 132 q->orig_bar_rq = rq;
162 rq = NULL; 133 rq = NULL;
163 134
164 /* 135 /*
165 * Queue ordered sequence. As we stack them at the head, we 136 * Queue ordered sequence. As we stack them at the head, we
166 * need to queue in reverse order. Note that we rely on that 137 * need to queue in reverse order. Note that we rely on that
167 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 138 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
168 * request gets inbetween ordered sequence. 139 * request gets inbetween ordered sequence.
169 */ 140 */
170 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { 141 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
171 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); 142 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
172 rq = &q->post_flush_rq; 143 rq = &q->post_flush_rq;
173 } else 144 } else
174 skip |= QUEUE_ORDSEQ_POSTFLUSH; 145 skip |= QUEUE_ORDSEQ_POSTFLUSH;
175 146
176 if (q->ordered & QUEUE_ORDERED_DO_BAR) { 147 if (q->ordered & QUEUE_ORDERED_DO_BAR) {
177 rq = &q->bar_rq; 148 rq = &q->bar_rq;
178 149
179 /* initialize proxy request and queue it */ 150 /* initialize proxy request and queue it */
180 blk_rq_init(q, rq); 151 blk_rq_init(q, rq);
181 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 152 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
182 rq->cmd_flags |= REQ_WRITE; 153 rq->cmd_flags |= REQ_WRITE;
183 if (q->ordered & QUEUE_ORDERED_DO_FUA) 154 if (q->ordered & QUEUE_ORDERED_DO_FUA)
184 rq->cmd_flags |= REQ_FUA; 155 rq->cmd_flags |= REQ_FUA;
185 init_request_from_bio(rq, q->orig_bar_rq->bio); 156 init_request_from_bio(rq, q->orig_bar_rq->bio);
186 rq->end_io = bar_end_io; 157 rq->end_io = bar_end_io;
187 158
188 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 159 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
189 } else 160 } else
190 skip |= QUEUE_ORDSEQ_BAR; 161 skip |= QUEUE_ORDSEQ_BAR;
191 162
192 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { 163 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
193 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); 164 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
194 rq = &q->pre_flush_rq; 165 rq = &q->pre_flush_rq;
195 } else 166 } else
196 skip |= QUEUE_ORDSEQ_PREFLUSH; 167 skip |= QUEUE_ORDSEQ_PREFLUSH;
197 168
198 if (queue_in_flight(q)) 169 if (queue_in_flight(q))
199 rq = NULL; 170 rq = NULL;
200 else 171 else
201 skip |= QUEUE_ORDSEQ_DRAIN; 172 skip |= QUEUE_ORDSEQ_DRAIN;
202 173
203 *rqp = rq; 174 *rqp = rq;
204 175
205 /* 176 /*
206 * Complete skipped sequences. If whole sequence is complete, 177 * Complete skipped sequences. If whole sequence is complete,
207 * return false to tell elevator that this request is gone. 178 * return false to tell elevator that this request is gone.
208 */ 179 */
209 return !blk_ordered_complete_seq(q, skip, 0); 180 return !blk_ordered_complete_seq(q, skip, 0);
210 } 181 }
211 182
212 bool blk_do_ordered(struct request_queue *q, struct request **rqp) 183 bool blk_do_ordered(struct request_queue *q, struct request **rqp)
213 { 184 {
214 struct request *rq = *rqp; 185 struct request *rq = *rqp;
215 const int is_barrier = rq->cmd_type == REQ_TYPE_FS && 186 const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
216 (rq->cmd_flags & REQ_HARDBARRIER); 187 (rq->cmd_flags & REQ_HARDBARRIER);
217 188
218 if (!q->ordseq) { 189 if (!q->ordseq) {
219 if (!is_barrier) 190 if (!is_barrier)
220 return true; 191 return true;
221 192
222 if (q->next_ordered != QUEUE_ORDERED_NONE) 193 if (q->next_ordered != QUEUE_ORDERED_NONE)
223 return start_ordered(q, rqp); 194 return start_ordered(q, rqp);
224 else { 195 else {
225 /* 196 /*
226 * Queue ordering not supported. Terminate 197 * Queue ordering not supported. Terminate
227 * with prejudice. 198 * with prejudice.
228 */ 199 */
229 blk_dequeue_request(rq); 200 blk_dequeue_request(rq);
230 __blk_end_request_all(rq, -EOPNOTSUPP); 201 __blk_end_request_all(rq, -EOPNOTSUPP);
231 *rqp = NULL; 202 *rqp = NULL;
232 return false; 203 return false;
233 } 204 }
234 } 205 }
235 206
236 /* 207 /*
237 * Ordered sequence in progress 208 * Ordered sequence in progress
238 */ 209 */
239 210
240 /* Special requests are not subject to ordering rules. */ 211 /* Special requests are not subject to ordering rules. */
241 if (rq->cmd_type != REQ_TYPE_FS && 212 if (rq->cmd_type != REQ_TYPE_FS &&
242 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 213 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
243 return true; 214 return true;
244 215
245 /* Ordered by draining. Wait for turn. */ 216 /* Ordered by draining. Wait for turn. */
246 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 217 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
247 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 218 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
248 *rqp = NULL; 219 *rqp = NULL;
249 220
250 return true; 221 return true;
251 } 222 }
252 223
253 static void bio_end_empty_barrier(struct bio *bio, int err) 224 static void bio_end_empty_barrier(struct bio *bio, int err)
254 { 225 {
255 if (err) { 226 if (err) {
256 if (err == -EOPNOTSUPP) 227 if (err == -EOPNOTSUPP)
257 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 228 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
258 clear_bit(BIO_UPTODATE, &bio->bi_flags); 229 clear_bit(BIO_UPTODATE, &bio->bi_flags);
259 } 230 }
260 if (bio->bi_private) 231 if (bio->bi_private)
261 complete(bio->bi_private); 232 complete(bio->bi_private);
262 bio_put(bio); 233 bio_put(bio);
263 } 234 }
264 235
265 /** 236 /**
266 * blkdev_issue_flush - queue a flush 237 * blkdev_issue_flush - queue a flush
267 * @bdev: blockdev to issue flush for 238 * @bdev: blockdev to issue flush for
268 * @gfp_mask: memory allocation flags (for bio_alloc) 239 * @gfp_mask: memory allocation flags (for bio_alloc)
269 * @error_sector: error sector 240 * @error_sector: error sector
270 * @flags: BLKDEV_IFL_* flags to control behaviour 241 * @flags: BLKDEV_IFL_* flags to control behaviour
271 * 242 *
272 * Description: 243 * Description:
273 * Issue a flush for the block device in question. Caller can supply 244 * Issue a flush for the block device in question. Caller can supply
274 * room for storing the error offset in case of a flush error, if they 245 * room for storing the error offset in case of a flush error, if they
275 * wish to. If WAIT flag is not passed then caller may check only what 246 * wish to. If WAIT flag is not passed then caller may check only what
276 * request was pushed in some internal queue for later handling. 247 * request was pushed in some internal queue for later handling.
277 */ 248 */
278 int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, 249 int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
279 sector_t *error_sector, unsigned long flags) 250 sector_t *error_sector, unsigned long flags)
280 { 251 {
281 DECLARE_COMPLETION_ONSTACK(wait); 252 DECLARE_COMPLETION_ONSTACK(wait);
282 struct request_queue *q; 253 struct request_queue *q;
283 struct bio *bio; 254 struct bio *bio;
284 int ret = 0; 255 int ret = 0;
285 256
286 if (bdev->bd_disk == NULL) 257 if (bdev->bd_disk == NULL)
287 return -ENXIO; 258 return -ENXIO;
288 259
289 q = bdev_get_queue(bdev); 260 q = bdev_get_queue(bdev);
290 if (!q) 261 if (!q)
291 return -ENXIO; 262 return -ENXIO;
292 263
293 /* 264 /*
294 * some block devices may not have their queue correctly set up here 265 * some block devices may not have their queue correctly set up here
295 * (e.g. loop device without a backing file) and so issuing a flush 266 * (e.g. loop device without a backing file) and so issuing a flush
296 * here will panic. Ensure there is a request function before issuing 267 * here will panic. Ensure there is a request function before issuing
297 * the barrier. 268 * the barrier.
298 */ 269 */
299 if (!q->make_request_fn) 270 if (!q->make_request_fn)
300 return -ENXIO; 271 return -ENXIO;
301 272
302 bio = bio_alloc(gfp_mask, 0); 273 bio = bio_alloc(gfp_mask, 0);
303 bio->bi_end_io = bio_end_empty_barrier; 274 bio->bi_end_io = bio_end_empty_barrier;
304 bio->bi_bdev = bdev; 275 bio->bi_bdev = bdev;
305 if (test_bit(BLKDEV_WAIT, &flags)) 276 if (test_bit(BLKDEV_WAIT, &flags))
306 bio->bi_private = &wait; 277 bio->bi_private = &wait;
307 278
308 bio_get(bio); 279 bio_get(bio);
309 submit_bio(WRITE_BARRIER, bio); 280 submit_bio(WRITE_BARRIER, bio);
310 if (test_bit(BLKDEV_WAIT, &flags)) { 281 if (test_bit(BLKDEV_WAIT, &flags)) {
311 wait_for_completion(&wait); 282 wait_for_completion(&wait);
312 /* 283 /*
313 * The driver must store the error location in ->bi_sector, if 284 * The driver must store the error location in ->bi_sector, if
314 * it supports it. For non-stacked drivers, this should be 285 * it supports it. For non-stacked drivers, this should be
315 * copied from blk_rq_pos(rq). 286 * copied from blk_rq_pos(rq).
316 */ 287 */
317 if (error_sector) 288 if (error_sector)
318 *error_sector = bio->bi_sector; 289 *error_sector = bio->bi_sector;
319 } 290 }
320 291
321 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 292 if (bio_flagged(bio, BIO_EOPNOTSUPP))
322 ret = -EOPNOTSUPP; 293 ret = -EOPNOTSUPP;
323 else if (!bio_flagged(bio, BIO_UPTODATE)) 294 else if (!bio_flagged(bio, BIO_UPTODATE))
324 ret = -EIO; 295 ret = -EIO;
325 296
326 bio_put(bio); 297 bio_put(bio);
327 return ret; 298 return ret;
328 } 299 }
329 EXPORT_SYMBOL(blkdev_issue_flush); 300 EXPORT_SYMBOL(blkdev_issue_flush);
330 301
1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
7 * - July2000 7 * - July2000
8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
9 */ 9 */
10 10
11 /* 11 /*
12 * This handles all read/write requests to block devices 12 * This handles all read/write requests to block devices
13 */ 13 */
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/backing-dev.h> 16 #include <linux/backing-dev.h>
17 #include <linux/bio.h> 17 #include <linux/bio.h>
18 #include <linux/blkdev.h> 18 #include <linux/blkdev.h>
19 #include <linux/highmem.h> 19 #include <linux/highmem.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/kernel_stat.h> 21 #include <linux/kernel_stat.h>
22 #include <linux/string.h> 22 #include <linux/string.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/swap.h> 26 #include <linux/swap.h>
27 #include <linux/writeback.h> 27 #include <linux/writeback.h>
28 #include <linux/task_io_accounting_ops.h> 28 #include <linux/task_io_accounting_ops.h>
29 #include <linux/fault-inject.h> 29 #include <linux/fault-inject.h>
30 30
31 #define CREATE_TRACE_POINTS 31 #define CREATE_TRACE_POINTS
32 #include <trace/events/block.h> 32 #include <trace/events/block.h>
33 33
34 #include "blk.h" 34 #include "blk.h"
35 35
36 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); 36 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
37 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 37 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
38 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 38 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
39 39
40 static int __make_request(struct request_queue *q, struct bio *bio); 40 static int __make_request(struct request_queue *q, struct bio *bio);
41 41
42 /* 42 /*
43 * For the allocated request tables 43 * For the allocated request tables
44 */ 44 */
45 static struct kmem_cache *request_cachep; 45 static struct kmem_cache *request_cachep;
46 46
47 /* 47 /*
48 * For queue allocation 48 * For queue allocation
49 */ 49 */
50 struct kmem_cache *blk_requestq_cachep; 50 struct kmem_cache *blk_requestq_cachep;
51 51
52 /* 52 /*
53 * Controlling structure to kblockd 53 * Controlling structure to kblockd
54 */ 54 */
55 static struct workqueue_struct *kblockd_workqueue; 55 static struct workqueue_struct *kblockd_workqueue;
56 56
57 static void drive_stat_acct(struct request *rq, int new_io) 57 static void drive_stat_acct(struct request *rq, int new_io)
58 { 58 {
59 struct hd_struct *part; 59 struct hd_struct *part;
60 int rw = rq_data_dir(rq); 60 int rw = rq_data_dir(rq);
61 int cpu; 61 int cpu;
62 62
63 if (!blk_do_io_stat(rq)) 63 if (!blk_do_io_stat(rq))
64 return; 64 return;
65 65
66 cpu = part_stat_lock(); 66 cpu = part_stat_lock();
67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); 67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
68 68
69 if (!new_io) 69 if (!new_io)
70 part_stat_inc(cpu, part, merges[rw]); 70 part_stat_inc(cpu, part, merges[rw]);
71 else { 71 else {
72 part_round_stats(cpu, part); 72 part_round_stats(cpu, part);
73 part_inc_in_flight(part, rw); 73 part_inc_in_flight(part, rw);
74 } 74 }
75 75
76 part_stat_unlock(); 76 part_stat_unlock();
77 } 77 }
78 78
79 void blk_queue_congestion_threshold(struct request_queue *q) 79 void blk_queue_congestion_threshold(struct request_queue *q)
80 { 80 {
81 int nr; 81 int nr;
82 82
83 nr = q->nr_requests - (q->nr_requests / 8) + 1; 83 nr = q->nr_requests - (q->nr_requests / 8) + 1;
84 if (nr > q->nr_requests) 84 if (nr > q->nr_requests)
85 nr = q->nr_requests; 85 nr = q->nr_requests;
86 q->nr_congestion_on = nr; 86 q->nr_congestion_on = nr;
87 87
88 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 88 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
89 if (nr < 1) 89 if (nr < 1)
90 nr = 1; 90 nr = 1;
91 q->nr_congestion_off = nr; 91 q->nr_congestion_off = nr;
92 } 92 }
93 93
94 /** 94 /**
95 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 95 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
96 * @bdev: device 96 * @bdev: device
97 * 97 *
98 * Locates the passed device's request queue and returns the address of its 98 * Locates the passed device's request queue and returns the address of its
99 * backing_dev_info 99 * backing_dev_info
100 * 100 *
101 * Will return NULL if the request queue cannot be located. 101 * Will return NULL if the request queue cannot be located.
102 */ 102 */
103 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 103 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
104 { 104 {
105 struct backing_dev_info *ret = NULL; 105 struct backing_dev_info *ret = NULL;
106 struct request_queue *q = bdev_get_queue(bdev); 106 struct request_queue *q = bdev_get_queue(bdev);
107 107
108 if (q) 108 if (q)
109 ret = &q->backing_dev_info; 109 ret = &q->backing_dev_info;
110 return ret; 110 return ret;
111 } 111 }
112 EXPORT_SYMBOL(blk_get_backing_dev_info); 112 EXPORT_SYMBOL(blk_get_backing_dev_info);
113 113
114 void blk_rq_init(struct request_queue *q, struct request *rq) 114 void blk_rq_init(struct request_queue *q, struct request *rq)
115 { 115 {
116 memset(rq, 0, sizeof(*rq)); 116 memset(rq, 0, sizeof(*rq));
117 117
118 INIT_LIST_HEAD(&rq->queuelist); 118 INIT_LIST_HEAD(&rq->queuelist);
119 INIT_LIST_HEAD(&rq->timeout_list); 119 INIT_LIST_HEAD(&rq->timeout_list);
120 rq->cpu = -1; 120 rq->cpu = -1;
121 rq->q = q; 121 rq->q = q;
122 rq->__sector = (sector_t) -1; 122 rq->__sector = (sector_t) -1;
123 INIT_HLIST_NODE(&rq->hash); 123 INIT_HLIST_NODE(&rq->hash);
124 RB_CLEAR_NODE(&rq->rb_node); 124 RB_CLEAR_NODE(&rq->rb_node);
125 rq->cmd = rq->__cmd; 125 rq->cmd = rq->__cmd;
126 rq->cmd_len = BLK_MAX_CDB; 126 rq->cmd_len = BLK_MAX_CDB;
127 rq->tag = -1; 127 rq->tag = -1;
128 rq->ref_count = 1; 128 rq->ref_count = 1;
129 rq->start_time = jiffies; 129 rq->start_time = jiffies;
130 set_start_time_ns(rq); 130 set_start_time_ns(rq);
131 } 131 }
132 EXPORT_SYMBOL(blk_rq_init); 132 EXPORT_SYMBOL(blk_rq_init);
133 133
134 static void req_bio_endio(struct request *rq, struct bio *bio, 134 static void req_bio_endio(struct request *rq, struct bio *bio,
135 unsigned int nbytes, int error) 135 unsigned int nbytes, int error)
136 { 136 {
137 struct request_queue *q = rq->q; 137 struct request_queue *q = rq->q;
138 138
139 if (&q->bar_rq != rq) { 139 if (&q->bar_rq != rq) {
140 if (error) 140 if (error)
141 clear_bit(BIO_UPTODATE, &bio->bi_flags); 141 clear_bit(BIO_UPTODATE, &bio->bi_flags);
142 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 142 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
143 error = -EIO; 143 error = -EIO;
144 144
145 if (unlikely(nbytes > bio->bi_size)) { 145 if (unlikely(nbytes > bio->bi_size)) {
146 printk(KERN_ERR "%s: want %u bytes done, %u left\n", 146 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
147 __func__, nbytes, bio->bi_size); 147 __func__, nbytes, bio->bi_size);
148 nbytes = bio->bi_size; 148 nbytes = bio->bi_size;
149 } 149 }
150 150
151 if (unlikely(rq->cmd_flags & REQ_QUIET)) 151 if (unlikely(rq->cmd_flags & REQ_QUIET))
152 set_bit(BIO_QUIET, &bio->bi_flags); 152 set_bit(BIO_QUIET, &bio->bi_flags);
153 153
154 bio->bi_size -= nbytes; 154 bio->bi_size -= nbytes;
155 bio->bi_sector += (nbytes >> 9); 155 bio->bi_sector += (nbytes >> 9);
156 156
157 if (bio_integrity(bio)) 157 if (bio_integrity(bio))
158 bio_integrity_advance(bio, nbytes); 158 bio_integrity_advance(bio, nbytes);
159 159
160 if (bio->bi_size == 0) 160 if (bio->bi_size == 0)
161 bio_endio(bio, error); 161 bio_endio(bio, error);
162 } else { 162 } else {
163 163
164 /* 164 /*
165 * Okay, this is the barrier request in progress, just 165 * Okay, this is the barrier request in progress, just
166 * record the error; 166 * record the error;
167 */ 167 */
168 if (error && !q->orderr) 168 if (error && !q->orderr)
169 q->orderr = error; 169 q->orderr = error;
170 } 170 }
171 } 171 }
172 172
173 void blk_dump_rq_flags(struct request *rq, char *msg) 173 void blk_dump_rq_flags(struct request *rq, char *msg)
174 { 174 {
175 int bit; 175 int bit;
176 176
177 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, 177 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
178 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 178 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
179 rq->cmd_flags); 179 rq->cmd_flags);
180 180
181 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", 181 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
182 (unsigned long long)blk_rq_pos(rq), 182 (unsigned long long)blk_rq_pos(rq),
183 blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); 183 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
184 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", 184 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
185 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); 185 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
186 186
187 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 187 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
188 printk(KERN_INFO " cdb: "); 188 printk(KERN_INFO " cdb: ");
189 for (bit = 0; bit < BLK_MAX_CDB; bit++) 189 for (bit = 0; bit < BLK_MAX_CDB; bit++)
190 printk("%02x ", rq->cmd[bit]); 190 printk("%02x ", rq->cmd[bit]);
191 printk("\n"); 191 printk("\n");
192 } 192 }
193 } 193 }
194 EXPORT_SYMBOL(blk_dump_rq_flags); 194 EXPORT_SYMBOL(blk_dump_rq_flags);
195 195
196 /* 196 /*
197 * "plug" the device if there are no outstanding requests: this will 197 * "plug" the device if there are no outstanding requests: this will
198 * force the transfer to start only after we have put all the requests 198 * force the transfer to start only after we have put all the requests
199 * on the list. 199 * on the list.
200 * 200 *
201 * This is called with interrupts off and no requests on the queue and 201 * This is called with interrupts off and no requests on the queue and
202 * with the queue lock held. 202 * with the queue lock held.
203 */ 203 */
204 void blk_plug_device(struct request_queue *q) 204 void blk_plug_device(struct request_queue *q)
205 { 205 {
206 WARN_ON(!irqs_disabled()); 206 WARN_ON(!irqs_disabled());
207 207
208 /* 208 /*
209 * don't plug a stopped queue, it must be paired with blk_start_queue() 209 * don't plug a stopped queue, it must be paired with blk_start_queue()
210 * which will restart the queueing 210 * which will restart the queueing
211 */ 211 */
212 if (blk_queue_stopped(q)) 212 if (blk_queue_stopped(q))
213 return; 213 return;
214 214
215 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { 215 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
216 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 216 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
217 trace_block_plug(q); 217 trace_block_plug(q);
218 } 218 }
219 } 219 }
220 EXPORT_SYMBOL(blk_plug_device); 220 EXPORT_SYMBOL(blk_plug_device);
221 221
222 /** 222 /**
223 * blk_plug_device_unlocked - plug a device without queue lock held 223 * blk_plug_device_unlocked - plug a device without queue lock held
224 * @q: The &struct request_queue to plug 224 * @q: The &struct request_queue to plug
225 * 225 *
226 * Description: 226 * Description:
227 * Like @blk_plug_device(), but grabs the queue lock and disables 227 * Like @blk_plug_device(), but grabs the queue lock and disables
228 * interrupts. 228 * interrupts.
229 **/ 229 **/
230 void blk_plug_device_unlocked(struct request_queue *q) 230 void blk_plug_device_unlocked(struct request_queue *q)
231 { 231 {
232 unsigned long flags; 232 unsigned long flags;
233 233
234 spin_lock_irqsave(q->queue_lock, flags); 234 spin_lock_irqsave(q->queue_lock, flags);
235 blk_plug_device(q); 235 blk_plug_device(q);
236 spin_unlock_irqrestore(q->queue_lock, flags); 236 spin_unlock_irqrestore(q->queue_lock, flags);
237 } 237 }
238 EXPORT_SYMBOL(blk_plug_device_unlocked); 238 EXPORT_SYMBOL(blk_plug_device_unlocked);
239 239
240 /* 240 /*
241 * remove the queue from the plugged list, if present. called with 241 * remove the queue from the plugged list, if present. called with
242 * queue lock held and interrupts disabled. 242 * queue lock held and interrupts disabled.
243 */ 243 */
244 int blk_remove_plug(struct request_queue *q) 244 int blk_remove_plug(struct request_queue *q)
245 { 245 {
246 WARN_ON(!irqs_disabled()); 246 WARN_ON(!irqs_disabled());
247 247
248 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) 248 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
249 return 0; 249 return 0;
250 250
251 del_timer(&q->unplug_timer); 251 del_timer(&q->unplug_timer);
252 return 1; 252 return 1;
253 } 253 }
254 EXPORT_SYMBOL(blk_remove_plug); 254 EXPORT_SYMBOL(blk_remove_plug);
255 255
256 /* 256 /*
257 * remove the plug and let it rip.. 257 * remove the plug and let it rip..
258 */ 258 */
259 void __generic_unplug_device(struct request_queue *q) 259 void __generic_unplug_device(struct request_queue *q)
260 { 260 {
261 if (unlikely(blk_queue_stopped(q))) 261 if (unlikely(blk_queue_stopped(q)))
262 return; 262 return;
263 if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) 263 if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
264 return; 264 return;
265 265
266 q->request_fn(q); 266 q->request_fn(q);
267 } 267 }
268 268
269 /** 269 /**
270 * generic_unplug_device - fire a request queue 270 * generic_unplug_device - fire a request queue
271 * @q: The &struct request_queue in question 271 * @q: The &struct request_queue in question
272 * 272 *
273 * Description: 273 * Description:
274 * Linux uses plugging to build bigger requests queues before letting 274 * Linux uses plugging to build bigger requests queues before letting
275 * the device have at them. If a queue is plugged, the I/O scheduler 275 * the device have at them. If a queue is plugged, the I/O scheduler
276 * is still adding and merging requests on the queue. Once the queue 276 * is still adding and merging requests on the queue. Once the queue
277 * gets unplugged, the request_fn defined for the queue is invoked and 277 * gets unplugged, the request_fn defined for the queue is invoked and
278 * transfers started. 278 * transfers started.
279 **/ 279 **/
280 void generic_unplug_device(struct request_queue *q) 280 void generic_unplug_device(struct request_queue *q)
281 { 281 {
282 if (blk_queue_plugged(q)) { 282 if (blk_queue_plugged(q)) {
283 spin_lock_irq(q->queue_lock); 283 spin_lock_irq(q->queue_lock);
284 __generic_unplug_device(q); 284 __generic_unplug_device(q);
285 spin_unlock_irq(q->queue_lock); 285 spin_unlock_irq(q->queue_lock);
286 } 286 }
287 } 287 }
288 EXPORT_SYMBOL(generic_unplug_device); 288 EXPORT_SYMBOL(generic_unplug_device);
289 289
290 static void blk_backing_dev_unplug(struct backing_dev_info *bdi, 290 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
291 struct page *page) 291 struct page *page)
292 { 292 {
293 struct request_queue *q = bdi->unplug_io_data; 293 struct request_queue *q = bdi->unplug_io_data;
294 294
295 blk_unplug(q); 295 blk_unplug(q);
296 } 296 }
297 297
298 void blk_unplug_work(struct work_struct *work) 298 void blk_unplug_work(struct work_struct *work)
299 { 299 {
300 struct request_queue *q = 300 struct request_queue *q =
301 container_of(work, struct request_queue, unplug_work); 301 container_of(work, struct request_queue, unplug_work);
302 302
303 trace_block_unplug_io(q); 303 trace_block_unplug_io(q);
304 q->unplug_fn(q); 304 q->unplug_fn(q);
305 } 305 }
306 306
307 void blk_unplug_timeout(unsigned long data) 307 void blk_unplug_timeout(unsigned long data)
308 { 308 {
309 struct request_queue *q = (struct request_queue *)data; 309 struct request_queue *q = (struct request_queue *)data;
310 310
311 trace_block_unplug_timer(q); 311 trace_block_unplug_timer(q);
312 kblockd_schedule_work(q, &q->unplug_work); 312 kblockd_schedule_work(q, &q->unplug_work);
313 } 313 }
314 314
315 void blk_unplug(struct request_queue *q) 315 void blk_unplug(struct request_queue *q)
316 { 316 {
317 /* 317 /*
318 * devices don't necessarily have an ->unplug_fn defined 318 * devices don't necessarily have an ->unplug_fn defined
319 */ 319 */
320 if (q->unplug_fn) { 320 if (q->unplug_fn) {
321 trace_block_unplug_io(q); 321 trace_block_unplug_io(q);
322 q->unplug_fn(q); 322 q->unplug_fn(q);
323 } 323 }
324 } 324 }
325 EXPORT_SYMBOL(blk_unplug); 325 EXPORT_SYMBOL(blk_unplug);
326 326
327 /** 327 /**
328 * blk_start_queue - restart a previously stopped queue 328 * blk_start_queue - restart a previously stopped queue
329 * @q: The &struct request_queue in question 329 * @q: The &struct request_queue in question
330 * 330 *
331 * Description: 331 * Description:
332 * blk_start_queue() will clear the stop flag on the queue, and call 332 * blk_start_queue() will clear the stop flag on the queue, and call
333 * the request_fn for the queue if it was in a stopped state when 333 * the request_fn for the queue if it was in a stopped state when
334 * entered. Also see blk_stop_queue(). Queue lock must be held. 334 * entered. Also see blk_stop_queue(). Queue lock must be held.
335 **/ 335 **/
336 void blk_start_queue(struct request_queue *q) 336 void blk_start_queue(struct request_queue *q)
337 { 337 {
338 WARN_ON(!irqs_disabled()); 338 WARN_ON(!irqs_disabled());
339 339
340 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 340 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
341 __blk_run_queue(q); 341 __blk_run_queue(q);
342 } 342 }
343 EXPORT_SYMBOL(blk_start_queue); 343 EXPORT_SYMBOL(blk_start_queue);
344 344
345 /** 345 /**
346 * blk_stop_queue - stop a queue 346 * blk_stop_queue - stop a queue
347 * @q: The &struct request_queue in question 347 * @q: The &struct request_queue in question
348 * 348 *
349 * Description: 349 * Description:
350 * The Linux block layer assumes that a block driver will consume all 350 * The Linux block layer assumes that a block driver will consume all
351 * entries on the request queue when the request_fn strategy is called. 351 * entries on the request queue when the request_fn strategy is called.
352 * Often this will not happen, because of hardware limitations (queue 352 * Often this will not happen, because of hardware limitations (queue
353 * depth settings). If a device driver gets a 'queue full' response, 353 * depth settings). If a device driver gets a 'queue full' response,
354 * or if it simply chooses not to queue more I/O at one point, it can 354 * or if it simply chooses not to queue more I/O at one point, it can
355 * call this function to prevent the request_fn from being called until 355 * call this function to prevent the request_fn from being called until
356 * the driver has signalled it's ready to go again. This happens by calling 356 * the driver has signalled it's ready to go again. This happens by calling
357 * blk_start_queue() to restart queue operations. Queue lock must be held. 357 * blk_start_queue() to restart queue operations. Queue lock must be held.
358 **/ 358 **/
359 void blk_stop_queue(struct request_queue *q) 359 void blk_stop_queue(struct request_queue *q)
360 { 360 {
361 blk_remove_plug(q); 361 blk_remove_plug(q);
362 queue_flag_set(QUEUE_FLAG_STOPPED, q); 362 queue_flag_set(QUEUE_FLAG_STOPPED, q);
363 } 363 }
364 EXPORT_SYMBOL(blk_stop_queue); 364 EXPORT_SYMBOL(blk_stop_queue);
365 365
366 /** 366 /**
367 * blk_sync_queue - cancel any pending callbacks on a queue 367 * blk_sync_queue - cancel any pending callbacks on a queue
368 * @q: the queue 368 * @q: the queue
369 * 369 *
370 * Description: 370 * Description:
371 * The block layer may perform asynchronous callback activity 371 * The block layer may perform asynchronous callback activity
372 * on a queue, such as calling the unplug function after a timeout. 372 * on a queue, such as calling the unplug function after a timeout.
373 * A block device may call blk_sync_queue to ensure that any 373 * A block device may call blk_sync_queue to ensure that any
374 * such activity is cancelled, thus allowing it to release resources 374 * such activity is cancelled, thus allowing it to release resources
375 * that the callbacks might use. The caller must already have made sure 375 * that the callbacks might use. The caller must already have made sure
376 * that its ->make_request_fn will not re-add plugging prior to calling 376 * that its ->make_request_fn will not re-add plugging prior to calling
377 * this function. 377 * this function.
378 * 378 *
379 */ 379 */
380 void blk_sync_queue(struct request_queue *q) 380 void blk_sync_queue(struct request_queue *q)
381 { 381 {
382 del_timer_sync(&q->unplug_timer); 382 del_timer_sync(&q->unplug_timer);
383 del_timer_sync(&q->timeout); 383 del_timer_sync(&q->timeout);
384 cancel_work_sync(&q->unplug_work); 384 cancel_work_sync(&q->unplug_work);
385 } 385 }
386 EXPORT_SYMBOL(blk_sync_queue); 386 EXPORT_SYMBOL(blk_sync_queue);
387 387
388 /** 388 /**
389 * __blk_run_queue - run a single device queue 389 * __blk_run_queue - run a single device queue
390 * @q: The queue to run 390 * @q: The queue to run
391 * 391 *
392 * Description: 392 * Description:
393 * See @blk_run_queue. This variant must be called with the queue lock 393 * See @blk_run_queue. This variant must be called with the queue lock
394 * held and interrupts disabled. 394 * held and interrupts disabled.
395 * 395 *
396 */ 396 */
397 void __blk_run_queue(struct request_queue *q) 397 void __blk_run_queue(struct request_queue *q)
398 { 398 {
399 blk_remove_plug(q); 399 blk_remove_plug(q);
400 400
401 if (unlikely(blk_queue_stopped(q))) 401 if (unlikely(blk_queue_stopped(q)))
402 return; 402 return;
403 403
404 if (elv_queue_empty(q)) 404 if (elv_queue_empty(q))
405 return; 405 return;
406 406
407 /* 407 /*
408 * Only recurse once to avoid overrunning the stack, let the unplug 408 * Only recurse once to avoid overrunning the stack, let the unplug
409 * handling reinvoke the handler shortly if we already got there. 409 * handling reinvoke the handler shortly if we already got there.
410 */ 410 */
411 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 411 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
412 q->request_fn(q); 412 q->request_fn(q);
413 queue_flag_clear(QUEUE_FLAG_REENTER, q); 413 queue_flag_clear(QUEUE_FLAG_REENTER, q);
414 } else { 414 } else {
415 queue_flag_set(QUEUE_FLAG_PLUGGED, q); 415 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
416 kblockd_schedule_work(q, &q->unplug_work); 416 kblockd_schedule_work(q, &q->unplug_work);
417 } 417 }
418 } 418 }
419 EXPORT_SYMBOL(__blk_run_queue); 419 EXPORT_SYMBOL(__blk_run_queue);
420 420
421 /** 421 /**
422 * blk_run_queue - run a single device queue 422 * blk_run_queue - run a single device queue
423 * @q: The queue to run 423 * @q: The queue to run
424 * 424 *
425 * Description: 425 * Description:
426 * Invoke request handling on this queue, if it has pending work to do. 426 * Invoke request handling on this queue, if it has pending work to do.
427 * May be used to restart queueing when a request has completed. 427 * May be used to restart queueing when a request has completed.
428 */ 428 */
429 void blk_run_queue(struct request_queue *q) 429 void blk_run_queue(struct request_queue *q)
430 { 430 {
431 unsigned long flags; 431 unsigned long flags;
432 432
433 spin_lock_irqsave(q->queue_lock, flags); 433 spin_lock_irqsave(q->queue_lock, flags);
434 __blk_run_queue(q); 434 __blk_run_queue(q);
435 spin_unlock_irqrestore(q->queue_lock, flags); 435 spin_unlock_irqrestore(q->queue_lock, flags);
436 } 436 }
437 EXPORT_SYMBOL(blk_run_queue); 437 EXPORT_SYMBOL(blk_run_queue);
438 438
439 void blk_put_queue(struct request_queue *q) 439 void blk_put_queue(struct request_queue *q)
440 { 440 {
441 kobject_put(&q->kobj); 441 kobject_put(&q->kobj);
442 } 442 }
443 443
444 void blk_cleanup_queue(struct request_queue *q) 444 void blk_cleanup_queue(struct request_queue *q)
445 { 445 {
446 /* 446 /*
447 * We know we have process context here, so we can be a little 447 * We know we have process context here, so we can be a little
448 * cautious and ensure that pending block actions on this device 448 * cautious and ensure that pending block actions on this device
449 * are done before moving on. Going into this function, we should 449 * are done before moving on. Going into this function, we should
450 * not have processes doing IO to this device. 450 * not have processes doing IO to this device.
451 */ 451 */
452 blk_sync_queue(q); 452 blk_sync_queue(q);
453 453
454 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); 454 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
455 mutex_lock(&q->sysfs_lock); 455 mutex_lock(&q->sysfs_lock);
456 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 456 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
457 mutex_unlock(&q->sysfs_lock); 457 mutex_unlock(&q->sysfs_lock);
458 458
459 if (q->elevator) 459 if (q->elevator)
460 elevator_exit(q->elevator); 460 elevator_exit(q->elevator);
461 461
462 blk_put_queue(q); 462 blk_put_queue(q);
463 } 463 }
464 EXPORT_SYMBOL(blk_cleanup_queue); 464 EXPORT_SYMBOL(blk_cleanup_queue);
465 465
466 static int blk_init_free_list(struct request_queue *q) 466 static int blk_init_free_list(struct request_queue *q)
467 { 467 {
468 struct request_list *rl = &q->rq; 468 struct request_list *rl = &q->rq;
469 469
470 if (unlikely(rl->rq_pool)) 470 if (unlikely(rl->rq_pool))
471 return 0; 471 return 0;
472 472
473 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; 473 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
474 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; 474 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
475 rl->elvpriv = 0; 475 rl->elvpriv = 0;
476 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); 476 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
477 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); 477 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
478 478
479 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 479 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
480 mempool_free_slab, request_cachep, q->node); 480 mempool_free_slab, request_cachep, q->node);
481 481
482 if (!rl->rq_pool) 482 if (!rl->rq_pool)
483 return -ENOMEM; 483 return -ENOMEM;
484 484
485 return 0; 485 return 0;
486 } 486 }
487 487
488 struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 488 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
489 { 489 {
490 return blk_alloc_queue_node(gfp_mask, -1); 490 return blk_alloc_queue_node(gfp_mask, -1);
491 } 491 }
492 EXPORT_SYMBOL(blk_alloc_queue); 492 EXPORT_SYMBOL(blk_alloc_queue);
493 493
494 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 494 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
495 { 495 {
496 struct request_queue *q; 496 struct request_queue *q;
497 int err; 497 int err;
498 498
499 q = kmem_cache_alloc_node(blk_requestq_cachep, 499 q = kmem_cache_alloc_node(blk_requestq_cachep,
500 gfp_mask | __GFP_ZERO, node_id); 500 gfp_mask | __GFP_ZERO, node_id);
501 if (!q) 501 if (!q)
502 return NULL; 502 return NULL;
503 503
504 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; 504 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
505 q->backing_dev_info.unplug_io_data = q; 505 q->backing_dev_info.unplug_io_data = q;
506 q->backing_dev_info.ra_pages = 506 q->backing_dev_info.ra_pages =
507 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 507 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
508 q->backing_dev_info.state = 0; 508 q->backing_dev_info.state = 0;
509 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 509 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
510 q->backing_dev_info.name = "block"; 510 q->backing_dev_info.name = "block";
511 511
512 err = bdi_init(&q->backing_dev_info); 512 err = bdi_init(&q->backing_dev_info);
513 if (err) { 513 if (err) {
514 kmem_cache_free(blk_requestq_cachep, q); 514 kmem_cache_free(blk_requestq_cachep, q);
515 return NULL; 515 return NULL;
516 } 516 }
517 517
518 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, 518 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
519 laptop_mode_timer_fn, (unsigned long) q); 519 laptop_mode_timer_fn, (unsigned long) q);
520 init_timer(&q->unplug_timer); 520 init_timer(&q->unplug_timer);
521 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 521 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
522 INIT_LIST_HEAD(&q->timeout_list); 522 INIT_LIST_HEAD(&q->timeout_list);
523 INIT_WORK(&q->unplug_work, blk_unplug_work); 523 INIT_WORK(&q->unplug_work, blk_unplug_work);
524 524
525 kobject_init(&q->kobj, &blk_queue_ktype); 525 kobject_init(&q->kobj, &blk_queue_ktype);
526 526
527 mutex_init(&q->sysfs_lock); 527 mutex_init(&q->sysfs_lock);
528 spin_lock_init(&q->__queue_lock); 528 spin_lock_init(&q->__queue_lock);
529 529
530 return q; 530 return q;
531 } 531 }
532 EXPORT_SYMBOL(blk_alloc_queue_node); 532 EXPORT_SYMBOL(blk_alloc_queue_node);
533 533
534 /** 534 /**
535 * blk_init_queue - prepare a request queue for use with a block device 535 * blk_init_queue - prepare a request queue for use with a block device
536 * @rfn: The function to be called to process requests that have been 536 * @rfn: The function to be called to process requests that have been
537 * placed on the queue. 537 * placed on the queue.
538 * @lock: Request queue spin lock 538 * @lock: Request queue spin lock
539 * 539 *
540 * Description: 540 * Description:
541 * If a block device wishes to use the standard request handling procedures, 541 * If a block device wishes to use the standard request handling procedures,
542 * which sorts requests and coalesces adjacent requests, then it must 542 * which sorts requests and coalesces adjacent requests, then it must
543 * call blk_init_queue(). The function @rfn will be called when there 543 * call blk_init_queue(). The function @rfn will be called when there
544 * are requests on the queue that need to be processed. If the device 544 * are requests on the queue that need to be processed. If the device
545 * supports plugging, then @rfn may not be called immediately when requests 545 * supports plugging, then @rfn may not be called immediately when requests
546 * are available on the queue, but may be called at some time later instead. 546 * are available on the queue, but may be called at some time later instead.
547 * Plugged queues are generally unplugged when a buffer belonging to one 547 * Plugged queues are generally unplugged when a buffer belonging to one
548 * of the requests on the queue is needed, or due to memory pressure. 548 * of the requests on the queue is needed, or due to memory pressure.
549 * 549 *
550 * @rfn is not required, or even expected, to remove all requests off the 550 * @rfn is not required, or even expected, to remove all requests off the
551 * queue, but only as many as it can handle at a time. If it does leave 551 * queue, but only as many as it can handle at a time. If it does leave
552 * requests on the queue, it is responsible for arranging that the requests 552 * requests on the queue, it is responsible for arranging that the requests
553 * get dealt with eventually. 553 * get dealt with eventually.
554 * 554 *
555 * The queue spin lock must be held while manipulating the requests on the 555 * The queue spin lock must be held while manipulating the requests on the
556 * request queue; this lock will be taken also from interrupt context, so irq 556 * request queue; this lock will be taken also from interrupt context, so irq
557 * disabling is needed for it. 557 * disabling is needed for it.
558 * 558 *
559 * Function returns a pointer to the initialized request queue, or %NULL if 559 * Function returns a pointer to the initialized request queue, or %NULL if
560 * it didn't succeed. 560 * it didn't succeed.
561 * 561 *
562 * Note: 562 * Note:
563 * blk_init_queue() must be paired with a blk_cleanup_queue() call 563 * blk_init_queue() must be paired with a blk_cleanup_queue() call
564 * when the block device is deactivated (such as at module unload). 564 * when the block device is deactivated (such as at module unload).
565 **/ 565 **/
566 566
567 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 567 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
568 { 568 {
569 return blk_init_queue_node(rfn, lock, -1); 569 return blk_init_queue_node(rfn, lock, -1);
570 } 570 }
571 EXPORT_SYMBOL(blk_init_queue); 571 EXPORT_SYMBOL(blk_init_queue);
572 572
573 struct request_queue * 573 struct request_queue *
574 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 574 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
575 { 575 {
576 struct request_queue *uninit_q, *q; 576 struct request_queue *uninit_q, *q;
577 577
578 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); 578 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
579 if (!uninit_q) 579 if (!uninit_q)
580 return NULL; 580 return NULL;
581 581
582 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); 582 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
583 if (!q) 583 if (!q)
584 blk_cleanup_queue(uninit_q); 584 blk_cleanup_queue(uninit_q);
585 585
586 return q; 586 return q;
587 } 587 }
588 EXPORT_SYMBOL(blk_init_queue_node); 588 EXPORT_SYMBOL(blk_init_queue_node);
589 589
590 struct request_queue * 590 struct request_queue *
591 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, 591 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
592 spinlock_t *lock) 592 spinlock_t *lock)
593 { 593 {
594 return blk_init_allocated_queue_node(q, rfn, lock, -1); 594 return blk_init_allocated_queue_node(q, rfn, lock, -1);
595 } 595 }
596 EXPORT_SYMBOL(blk_init_allocated_queue); 596 EXPORT_SYMBOL(blk_init_allocated_queue);
597 597
598 struct request_queue * 598 struct request_queue *
599 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, 599 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
600 spinlock_t *lock, int node_id) 600 spinlock_t *lock, int node_id)
601 { 601 {
602 if (!q) 602 if (!q)
603 return NULL; 603 return NULL;
604 604
605 q->node = node_id; 605 q->node = node_id;
606 if (blk_init_free_list(q)) 606 if (blk_init_free_list(q))
607 return NULL; 607 return NULL;
608 608
609 q->request_fn = rfn; 609 q->request_fn = rfn;
610 q->prep_rq_fn = NULL; 610 q->prep_rq_fn = NULL;
611 q->unprep_rq_fn = NULL; 611 q->unprep_rq_fn = NULL;
612 q->unplug_fn = generic_unplug_device; 612 q->unplug_fn = generic_unplug_device;
613 q->queue_flags = QUEUE_FLAG_DEFAULT; 613 q->queue_flags = QUEUE_FLAG_DEFAULT;
614 q->queue_lock = lock; 614 q->queue_lock = lock;
615 615
616 /* 616 /*
617 * This also sets hw/phys segments, boundary and size 617 * This also sets hw/phys segments, boundary and size
618 */ 618 */
619 blk_queue_make_request(q, __make_request); 619 blk_queue_make_request(q, __make_request);
620 620
621 q->sg_reserved_size = INT_MAX; 621 q->sg_reserved_size = INT_MAX;
622 622
623 /* 623 /*
624 * all done 624 * all done
625 */ 625 */
626 if (!elevator_init(q, NULL)) { 626 if (!elevator_init(q, NULL)) {
627 blk_queue_congestion_threshold(q); 627 blk_queue_congestion_threshold(q);
628 return q; 628 return q;
629 } 629 }
630 630
631 return NULL; 631 return NULL;
632 } 632 }
633 EXPORT_SYMBOL(blk_init_allocated_queue_node); 633 EXPORT_SYMBOL(blk_init_allocated_queue_node);
634 634
635 int blk_get_queue(struct request_queue *q) 635 int blk_get_queue(struct request_queue *q)
636 { 636 {
637 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 637 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
638 kobject_get(&q->kobj); 638 kobject_get(&q->kobj);
639 return 0; 639 return 0;
640 } 640 }
641 641
642 return 1; 642 return 1;
643 } 643 }
644 644
645 static inline void blk_free_request(struct request_queue *q, struct request *rq) 645 static inline void blk_free_request(struct request_queue *q, struct request *rq)
646 { 646 {
647 if (rq->cmd_flags & REQ_ELVPRIV) 647 if (rq->cmd_flags & REQ_ELVPRIV)
648 elv_put_request(q, rq); 648 elv_put_request(q, rq);
649 mempool_free(rq, q->rq.rq_pool); 649 mempool_free(rq, q->rq.rq_pool);
650 } 650 }
651 651
652 static struct request * 652 static struct request *
653 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) 653 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
654 { 654 {
655 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 655 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
656 656
657 if (!rq) 657 if (!rq)
658 return NULL; 658 return NULL;
659 659
660 blk_rq_init(q, rq); 660 blk_rq_init(q, rq);
661 661
662 rq->cmd_flags = flags | REQ_ALLOCED; 662 rq->cmd_flags = flags | REQ_ALLOCED;
663 663
664 if (priv) { 664 if (priv) {
665 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 665 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
666 mempool_free(rq, q->rq.rq_pool); 666 mempool_free(rq, q->rq.rq_pool);
667 return NULL; 667 return NULL;
668 } 668 }
669 rq->cmd_flags |= REQ_ELVPRIV; 669 rq->cmd_flags |= REQ_ELVPRIV;
670 } 670 }
671 671
672 return rq; 672 return rq;
673 } 673 }
674 674
675 /* 675 /*
676 * ioc_batching returns true if the ioc is a valid batching request and 676 * ioc_batching returns true if the ioc is a valid batching request and
677 * should be given priority access to a request. 677 * should be given priority access to a request.
678 */ 678 */
679 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) 679 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
680 { 680 {
681 if (!ioc) 681 if (!ioc)
682 return 0; 682 return 0;
683 683
684 /* 684 /*
685 * Make sure the process is able to allocate at least 1 request 685 * Make sure the process is able to allocate at least 1 request
686 * even if the batch times out, otherwise we could theoretically 686 * even if the batch times out, otherwise we could theoretically
687 * lose wakeups. 687 * lose wakeups.
688 */ 688 */
689 return ioc->nr_batch_requests == q->nr_batching || 689 return ioc->nr_batch_requests == q->nr_batching ||
690 (ioc->nr_batch_requests > 0 690 (ioc->nr_batch_requests > 0
691 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 691 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
692 } 692 }
693 693
694 /* 694 /*
695 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 695 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
696 * will cause the process to be a "batcher" on all queues in the system. This 696 * will cause the process to be a "batcher" on all queues in the system. This
697 * is the behaviour we want though - once it gets a wakeup it should be given 697 * is the behaviour we want though - once it gets a wakeup it should be given
698 * a nice run. 698 * a nice run.
699 */ 699 */
700 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) 700 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
701 { 701 {
702 if (!ioc || ioc_batching(q, ioc)) 702 if (!ioc || ioc_batching(q, ioc))
703 return; 703 return;
704 704
705 ioc->nr_batch_requests = q->nr_batching; 705 ioc->nr_batch_requests = q->nr_batching;
706 ioc->last_waited = jiffies; 706 ioc->last_waited = jiffies;
707 } 707 }
708 708
709 static void __freed_request(struct request_queue *q, int sync) 709 static void __freed_request(struct request_queue *q, int sync)
710 { 710 {
711 struct request_list *rl = &q->rq; 711 struct request_list *rl = &q->rq;
712 712
713 if (rl->count[sync] < queue_congestion_off_threshold(q)) 713 if (rl->count[sync] < queue_congestion_off_threshold(q))
714 blk_clear_queue_congested(q, sync); 714 blk_clear_queue_congested(q, sync);
715 715
716 if (rl->count[sync] + 1 <= q->nr_requests) { 716 if (rl->count[sync] + 1 <= q->nr_requests) {
717 if (waitqueue_active(&rl->wait[sync])) 717 if (waitqueue_active(&rl->wait[sync]))
718 wake_up(&rl->wait[sync]); 718 wake_up(&rl->wait[sync]);
719 719
720 blk_clear_queue_full(q, sync); 720 blk_clear_queue_full(q, sync);
721 } 721 }
722 } 722 }
723 723
724 /* 724 /*
725 * A request has just been released. Account for it, update the full and 725 * A request has just been released. Account for it, update the full and
726 * congestion status, wake up any waiters. Called under q->queue_lock. 726 * congestion status, wake up any waiters. Called under q->queue_lock.
727 */ 727 */
728 static void freed_request(struct request_queue *q, int sync, int priv) 728 static void freed_request(struct request_queue *q, int sync, int priv)
729 { 729 {
730 struct request_list *rl = &q->rq; 730 struct request_list *rl = &q->rq;
731 731
732 rl->count[sync]--; 732 rl->count[sync]--;
733 if (priv) 733 if (priv)
734 rl->elvpriv--; 734 rl->elvpriv--;
735 735
736 __freed_request(q, sync); 736 __freed_request(q, sync);
737 737
738 if (unlikely(rl->starved[sync ^ 1])) 738 if (unlikely(rl->starved[sync ^ 1]))
739 __freed_request(q, sync ^ 1); 739 __freed_request(q, sync ^ 1);
740 } 740 }
741 741
742 /* 742 /*
743 * Get a free request, queue_lock must be held. 743 * Get a free request, queue_lock must be held.
744 * Returns NULL on failure, with queue_lock held. 744 * Returns NULL on failure, with queue_lock held.
745 * Returns !NULL on success, with queue_lock *not held*. 745 * Returns !NULL on success, with queue_lock *not held*.
746 */ 746 */
747 static struct request *get_request(struct request_queue *q, int rw_flags, 747 static struct request *get_request(struct request_queue *q, int rw_flags,
748 struct bio *bio, gfp_t gfp_mask) 748 struct bio *bio, gfp_t gfp_mask)
749 { 749 {
750 struct request *rq = NULL; 750 struct request *rq = NULL;
751 struct request_list *rl = &q->rq; 751 struct request_list *rl = &q->rq;
752 struct io_context *ioc = NULL; 752 struct io_context *ioc = NULL;
753 const bool is_sync = rw_is_sync(rw_flags) != 0; 753 const bool is_sync = rw_is_sync(rw_flags) != 0;
754 int may_queue, priv; 754 int may_queue, priv;
755 755
756 may_queue = elv_may_queue(q, rw_flags); 756 may_queue = elv_may_queue(q, rw_flags);
757 if (may_queue == ELV_MQUEUE_NO) 757 if (may_queue == ELV_MQUEUE_NO)
758 goto rq_starved; 758 goto rq_starved;
759 759
760 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { 760 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
761 if (rl->count[is_sync]+1 >= q->nr_requests) { 761 if (rl->count[is_sync]+1 >= q->nr_requests) {
762 ioc = current_io_context(GFP_ATOMIC, q->node); 762 ioc = current_io_context(GFP_ATOMIC, q->node);
763 /* 763 /*
764 * The queue will fill after this allocation, so set 764 * The queue will fill after this allocation, so set
765 * it as full, and mark this process as "batching". 765 * it as full, and mark this process as "batching".
766 * This process will be allowed to complete a batch of 766 * This process will be allowed to complete a batch of
767 * requests, others will be blocked. 767 * requests, others will be blocked.
768 */ 768 */
769 if (!blk_queue_full(q, is_sync)) { 769 if (!blk_queue_full(q, is_sync)) {
770 ioc_set_batching(q, ioc); 770 ioc_set_batching(q, ioc);
771 blk_set_queue_full(q, is_sync); 771 blk_set_queue_full(q, is_sync);
772 } else { 772 } else {
773 if (may_queue != ELV_MQUEUE_MUST 773 if (may_queue != ELV_MQUEUE_MUST
774 && !ioc_batching(q, ioc)) { 774 && !ioc_batching(q, ioc)) {
775 /* 775 /*
776 * The queue is full and the allocating 776 * The queue is full and the allocating
777 * process is not a "batcher", and not 777 * process is not a "batcher", and not
778 * exempted by the IO scheduler 778 * exempted by the IO scheduler
779 */ 779 */
780 goto out; 780 goto out;
781 } 781 }
782 } 782 }
783 } 783 }
784 blk_set_queue_congested(q, is_sync); 784 blk_set_queue_congested(q, is_sync);
785 } 785 }
786 786
787 /* 787 /*
788 * Only allow batching queuers to allocate up to 50% over the defined 788 * Only allow batching queuers to allocate up to 50% over the defined
789 * limit of requests, otherwise we could have thousands of requests 789 * limit of requests, otherwise we could have thousands of requests
790 * allocated with any setting of ->nr_requests 790 * allocated with any setting of ->nr_requests
791 */ 791 */
792 if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) 792 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
793 goto out; 793 goto out;
794 794
795 rl->count[is_sync]++; 795 rl->count[is_sync]++;
796 rl->starved[is_sync] = 0; 796 rl->starved[is_sync] = 0;
797 797
798 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 798 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
799 if (priv) 799 if (priv)
800 rl->elvpriv++; 800 rl->elvpriv++;
801 801
802 if (blk_queue_io_stat(q)) 802 if (blk_queue_io_stat(q))
803 rw_flags |= REQ_IO_STAT; 803 rw_flags |= REQ_IO_STAT;
804 spin_unlock_irq(q->queue_lock); 804 spin_unlock_irq(q->queue_lock);
805 805
806 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 806 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
807 if (unlikely(!rq)) { 807 if (unlikely(!rq)) {
808 /* 808 /*
809 * Allocation failed presumably due to memory. Undo anything 809 * Allocation failed presumably due to memory. Undo anything
810 * we might have messed up. 810 * we might have messed up.
811 * 811 *
812 * Allocating task should really be put onto the front of the 812 * Allocating task should really be put onto the front of the
813 * wait queue, but this is pretty rare. 813 * wait queue, but this is pretty rare.
814 */ 814 */
815 spin_lock_irq(q->queue_lock); 815 spin_lock_irq(q->queue_lock);
816 freed_request(q, is_sync, priv); 816 freed_request(q, is_sync, priv);
817 817
818 /* 818 /*
819 * in the very unlikely event that allocation failed and no 819 * in the very unlikely event that allocation failed and no
820 * requests for this direction was pending, mark us starved 820 * requests for this direction was pending, mark us starved
821 * so that freeing of a request in the other direction will 821 * so that freeing of a request in the other direction will
822 * notice us. another possible fix would be to split the 822 * notice us. another possible fix would be to split the
823 * rq mempool into READ and WRITE 823 * rq mempool into READ and WRITE
824 */ 824 */
825 rq_starved: 825 rq_starved:
826 if (unlikely(rl->count[is_sync] == 0)) 826 if (unlikely(rl->count[is_sync] == 0))
827 rl->starved[is_sync] = 1; 827 rl->starved[is_sync] = 1;
828 828
829 goto out; 829 goto out;
830 } 830 }
831 831
832 /* 832 /*
833 * ioc may be NULL here, and ioc_batching will be false. That's 833 * ioc may be NULL here, and ioc_batching will be false. That's
834 * OK, if the queue is under the request limit then requests need 834 * OK, if the queue is under the request limit then requests need
835 * not count toward the nr_batch_requests limit. There will always 835 * not count toward the nr_batch_requests limit. There will always
836 * be some limit enforced by BLK_BATCH_TIME. 836 * be some limit enforced by BLK_BATCH_TIME.
837 */ 837 */
838 if (ioc_batching(q, ioc)) 838 if (ioc_batching(q, ioc))
839 ioc->nr_batch_requests--; 839 ioc->nr_batch_requests--;
840 840
841 trace_block_getrq(q, bio, rw_flags & 1); 841 trace_block_getrq(q, bio, rw_flags & 1);
842 out: 842 out:
843 return rq; 843 return rq;
844 } 844 }
845 845
846 /* 846 /*
847 * No available requests for this queue, unplug the device and wait for some 847 * No available requests for this queue, unplug the device and wait for some
848 * requests to become available. 848 * requests to become available.
849 * 849 *
850 * Called with q->queue_lock held, and returns with it unlocked. 850 * Called with q->queue_lock held, and returns with it unlocked.
851 */ 851 */
852 static struct request *get_request_wait(struct request_queue *q, int rw_flags, 852 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
853 struct bio *bio) 853 struct bio *bio)
854 { 854 {
855 const bool is_sync = rw_is_sync(rw_flags) != 0; 855 const bool is_sync = rw_is_sync(rw_flags) != 0;
856 struct request *rq; 856 struct request *rq;
857 857
858 rq = get_request(q, rw_flags, bio, GFP_NOIO); 858 rq = get_request(q, rw_flags, bio, GFP_NOIO);
859 while (!rq) { 859 while (!rq) {
860 DEFINE_WAIT(wait); 860 DEFINE_WAIT(wait);
861 struct io_context *ioc; 861 struct io_context *ioc;
862 struct request_list *rl = &q->rq; 862 struct request_list *rl = &q->rq;
863 863
864 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, 864 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
865 TASK_UNINTERRUPTIBLE); 865 TASK_UNINTERRUPTIBLE);
866 866
867 trace_block_sleeprq(q, bio, rw_flags & 1); 867 trace_block_sleeprq(q, bio, rw_flags & 1);
868 868
869 __generic_unplug_device(q); 869 __generic_unplug_device(q);
870 spin_unlock_irq(q->queue_lock); 870 spin_unlock_irq(q->queue_lock);
871 io_schedule(); 871 io_schedule();
872 872
873 /* 873 /*
874 * After sleeping, we become a "batching" process and 874 * After sleeping, we become a "batching" process and
875 * will be able to allocate at least one request, and 875 * will be able to allocate at least one request, and
876 * up to a big batch of them for a small period time. 876 * up to a big batch of them for a small period time.
877 * See ioc_batching, ioc_set_batching 877 * See ioc_batching, ioc_set_batching
878 */ 878 */
879 ioc = current_io_context(GFP_NOIO, q->node); 879 ioc = current_io_context(GFP_NOIO, q->node);
880 ioc_set_batching(q, ioc); 880 ioc_set_batching(q, ioc);
881 881
882 spin_lock_irq(q->queue_lock); 882 spin_lock_irq(q->queue_lock);
883 finish_wait(&rl->wait[is_sync], &wait); 883 finish_wait(&rl->wait[is_sync], &wait);
884 884
885 rq = get_request(q, rw_flags, bio, GFP_NOIO); 885 rq = get_request(q, rw_flags, bio, GFP_NOIO);
886 }; 886 };
887 887
888 return rq; 888 return rq;
889 } 889 }
890 890
891 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 891 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
892 { 892 {
893 struct request *rq; 893 struct request *rq;
894 894
895 BUG_ON(rw != READ && rw != WRITE); 895 BUG_ON(rw != READ && rw != WRITE);
896 896
897 spin_lock_irq(q->queue_lock); 897 spin_lock_irq(q->queue_lock);
898 if (gfp_mask & __GFP_WAIT) { 898 if (gfp_mask & __GFP_WAIT) {
899 rq = get_request_wait(q, rw, NULL); 899 rq = get_request_wait(q, rw, NULL);
900 } else { 900 } else {
901 rq = get_request(q, rw, NULL, gfp_mask); 901 rq = get_request(q, rw, NULL, gfp_mask);
902 if (!rq) 902 if (!rq)
903 spin_unlock_irq(q->queue_lock); 903 spin_unlock_irq(q->queue_lock);
904 } 904 }
905 /* q->queue_lock is unlocked at this point */ 905 /* q->queue_lock is unlocked at this point */
906 906
907 return rq; 907 return rq;
908 } 908 }
909 EXPORT_SYMBOL(blk_get_request); 909 EXPORT_SYMBOL(blk_get_request);
910 910
911 /** 911 /**
912 * blk_make_request - given a bio, allocate a corresponding struct request. 912 * blk_make_request - given a bio, allocate a corresponding struct request.
913 * @q: target request queue 913 * @q: target request queue
914 * @bio: The bio describing the memory mappings that will be submitted for IO. 914 * @bio: The bio describing the memory mappings that will be submitted for IO.
915 * It may be a chained-bio properly constructed by block/bio layer. 915 * It may be a chained-bio properly constructed by block/bio layer.
916 * @gfp_mask: gfp flags to be used for memory allocation 916 * @gfp_mask: gfp flags to be used for memory allocation
917 * 917 *
918 * blk_make_request is the parallel of generic_make_request for BLOCK_PC 918 * blk_make_request is the parallel of generic_make_request for BLOCK_PC
919 * type commands. Where the struct request needs to be farther initialized by 919 * type commands. Where the struct request needs to be farther initialized by
920 * the caller. It is passed a &struct bio, which describes the memory info of 920 * the caller. It is passed a &struct bio, which describes the memory info of
921 * the I/O transfer. 921 * the I/O transfer.
922 * 922 *
923 * The caller of blk_make_request must make sure that bi_io_vec 923 * The caller of blk_make_request must make sure that bi_io_vec
924 * are set to describe the memory buffers. That bio_data_dir() will return 924 * are set to describe the memory buffers. That bio_data_dir() will return
925 * the needed direction of the request. (And all bio's in the passed bio-chain 925 * the needed direction of the request. (And all bio's in the passed bio-chain
926 * are properly set accordingly) 926 * are properly set accordingly)
927 * 927 *
928 * If called under none-sleepable conditions, mapped bio buffers must not 928 * If called under none-sleepable conditions, mapped bio buffers must not
929 * need bouncing, by calling the appropriate masked or flagged allocator, 929 * need bouncing, by calling the appropriate masked or flagged allocator,
930 * suitable for the target device. Otherwise the call to blk_queue_bounce will 930 * suitable for the target device. Otherwise the call to blk_queue_bounce will
931 * BUG. 931 * BUG.
932 * 932 *
933 * WARNING: When allocating/cloning a bio-chain, careful consideration should be 933 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
934 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for 934 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
935 * anything but the first bio in the chain. Otherwise you risk waiting for IO 935 * anything but the first bio in the chain. Otherwise you risk waiting for IO
936 * completion of a bio that hasn't been submitted yet, thus resulting in a 936 * completion of a bio that hasn't been submitted yet, thus resulting in a
937 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead 937 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
938 * of bio_alloc(), as that avoids the mempool deadlock. 938 * of bio_alloc(), as that avoids the mempool deadlock.
939 * If possible a big IO should be split into smaller parts when allocation 939 * If possible a big IO should be split into smaller parts when allocation
940 * fails. Partial allocation should not be an error, or you risk a live-lock. 940 * fails. Partial allocation should not be an error, or you risk a live-lock.
941 */ 941 */
942 struct request *blk_make_request(struct request_queue *q, struct bio *bio, 942 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
943 gfp_t gfp_mask) 943 gfp_t gfp_mask)
944 { 944 {
945 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); 945 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
946 946
947 if (unlikely(!rq)) 947 if (unlikely(!rq))
948 return ERR_PTR(-ENOMEM); 948 return ERR_PTR(-ENOMEM);
949 949
950 for_each_bio(bio) { 950 for_each_bio(bio) {
951 struct bio *bounce_bio = bio; 951 struct bio *bounce_bio = bio;
952 int ret; 952 int ret;
953 953
954 blk_queue_bounce(q, &bounce_bio); 954 blk_queue_bounce(q, &bounce_bio);
955 ret = blk_rq_append_bio(q, rq, bounce_bio); 955 ret = blk_rq_append_bio(q, rq, bounce_bio);
956 if (unlikely(ret)) { 956 if (unlikely(ret)) {
957 blk_put_request(rq); 957 blk_put_request(rq);
958 return ERR_PTR(ret); 958 return ERR_PTR(ret);
959 } 959 }
960 } 960 }
961 961
962 return rq; 962 return rq;
963 } 963 }
964 EXPORT_SYMBOL(blk_make_request); 964 EXPORT_SYMBOL(blk_make_request);
965 965
966 /** 966 /**
967 * blk_requeue_request - put a request back on queue 967 * blk_requeue_request - put a request back on queue
968 * @q: request queue where request should be inserted 968 * @q: request queue where request should be inserted
969 * @rq: request to be inserted 969 * @rq: request to be inserted
970 * 970 *
971 * Description: 971 * Description:
972 * Drivers often keep queueing requests until the hardware cannot accept 972 * Drivers often keep queueing requests until the hardware cannot accept
973 * more, when that condition happens we need to put the request back 973 * more, when that condition happens we need to put the request back
974 * on the queue. Must be called with queue lock held. 974 * on the queue. Must be called with queue lock held.
975 */ 975 */
976 void blk_requeue_request(struct request_queue *q, struct request *rq) 976 void blk_requeue_request(struct request_queue *q, struct request *rq)
977 { 977 {
978 blk_delete_timer(rq); 978 blk_delete_timer(rq);
979 blk_clear_rq_complete(rq); 979 blk_clear_rq_complete(rq);
980 trace_block_rq_requeue(q, rq); 980 trace_block_rq_requeue(q, rq);
981 981
982 if (blk_rq_tagged(rq)) 982 if (blk_rq_tagged(rq))
983 blk_queue_end_tag(q, rq); 983 blk_queue_end_tag(q, rq);
984 984
985 BUG_ON(blk_queued_rq(rq)); 985 BUG_ON(blk_queued_rq(rq));
986 986
987 elv_requeue_request(q, rq); 987 elv_requeue_request(q, rq);
988 } 988 }
989 EXPORT_SYMBOL(blk_requeue_request); 989 EXPORT_SYMBOL(blk_requeue_request);
990 990
991 /** 991 /**
992 * blk_insert_request - insert a special request into a request queue 992 * blk_insert_request - insert a special request into a request queue
993 * @q: request queue where request should be inserted 993 * @q: request queue where request should be inserted
994 * @rq: request to be inserted 994 * @rq: request to be inserted
995 * @at_head: insert request at head or tail of queue 995 * @at_head: insert request at head or tail of queue
996 * @data: private data 996 * @data: private data
997 * 997 *
998 * Description: 998 * Description:
999 * Many block devices need to execute commands asynchronously, so they don't 999 * Many block devices need to execute commands asynchronously, so they don't
1000 * block the whole kernel from preemption during request execution. This is 1000 * block the whole kernel from preemption during request execution. This is
1001 * accomplished normally by inserting aritficial requests tagged as 1001 * accomplished normally by inserting aritficial requests tagged as
1002 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them 1002 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
1003 * be scheduled for actual execution by the request queue. 1003 * be scheduled for actual execution by the request queue.
1004 * 1004 *
1005 * We have the option of inserting the head or the tail of the queue. 1005 * We have the option of inserting the head or the tail of the queue.
1006 * Typically we use the tail for new ioctls and so forth. We use the head 1006 * Typically we use the tail for new ioctls and so forth. We use the head
1007 * of the queue for things like a QUEUE_FULL message from a device, or a 1007 * of the queue for things like a QUEUE_FULL message from a device, or a
1008 * host that is unable to accept a particular command. 1008 * host that is unable to accept a particular command.
1009 */ 1009 */
1010 void blk_insert_request(struct request_queue *q, struct request *rq, 1010 void blk_insert_request(struct request_queue *q, struct request *rq,
1011 int at_head, void *data) 1011 int at_head, void *data)
1012 { 1012 {
1013 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 1013 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1014 unsigned long flags; 1014 unsigned long flags;
1015 1015
1016 /* 1016 /*
1017 * tell I/O scheduler that this isn't a regular read/write (ie it 1017 * tell I/O scheduler that this isn't a regular read/write (ie it
1018 * must not attempt merges on this) and that it acts as a soft 1018 * must not attempt merges on this) and that it acts as a soft
1019 * barrier 1019 * barrier
1020 */ 1020 */
1021 rq->cmd_type = REQ_TYPE_SPECIAL; 1021 rq->cmd_type = REQ_TYPE_SPECIAL;
1022 1022
1023 rq->special = data; 1023 rq->special = data;
1024 1024
1025 spin_lock_irqsave(q->queue_lock, flags); 1025 spin_lock_irqsave(q->queue_lock, flags);
1026 1026
1027 /* 1027 /*
1028 * If command is tagged, release the tag 1028 * If command is tagged, release the tag
1029 */ 1029 */
1030 if (blk_rq_tagged(rq)) 1030 if (blk_rq_tagged(rq))
1031 blk_queue_end_tag(q, rq); 1031 blk_queue_end_tag(q, rq);
1032 1032
1033 drive_stat_acct(rq, 1); 1033 drive_stat_acct(rq, 1);
1034 __elv_add_request(q, rq, where, 0); 1034 __elv_add_request(q, rq, where, 0);
1035 __blk_run_queue(q); 1035 __blk_run_queue(q);
1036 spin_unlock_irqrestore(q->queue_lock, flags); 1036 spin_unlock_irqrestore(q->queue_lock, flags);
1037 } 1037 }
1038 EXPORT_SYMBOL(blk_insert_request); 1038 EXPORT_SYMBOL(blk_insert_request);
1039 1039
1040 /* 1040 /*
1041 * add-request adds a request to the linked list. 1041 * add-request adds a request to the linked list.
1042 * queue lock is held and interrupts disabled, as we muck with the 1042 * queue lock is held and interrupts disabled, as we muck with the
1043 * request queue list. 1043 * request queue list.
1044 */ 1044 */
1045 static inline void add_request(struct request_queue *q, struct request *req) 1045 static inline void add_request(struct request_queue *q, struct request *req)
1046 { 1046 {
1047 drive_stat_acct(req, 1); 1047 drive_stat_acct(req, 1);
1048 1048
1049 /* 1049 /*
1050 * elevator indicated where it wants this request to be 1050 * elevator indicated where it wants this request to be
1051 * inserted at elevator_merge time 1051 * inserted at elevator_merge time
1052 */ 1052 */
1053 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 1053 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
1054 } 1054 }
1055 1055
1056 static void part_round_stats_single(int cpu, struct hd_struct *part, 1056 static void part_round_stats_single(int cpu, struct hd_struct *part,
1057 unsigned long now) 1057 unsigned long now)
1058 { 1058 {
1059 if (now == part->stamp) 1059 if (now == part->stamp)
1060 return; 1060 return;
1061 1061
1062 if (part_in_flight(part)) { 1062 if (part_in_flight(part)) {
1063 __part_stat_add(cpu, part, time_in_queue, 1063 __part_stat_add(cpu, part, time_in_queue,
1064 part_in_flight(part) * (now - part->stamp)); 1064 part_in_flight(part) * (now - part->stamp));
1065 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 1065 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1066 } 1066 }
1067 part->stamp = now; 1067 part->stamp = now;
1068 } 1068 }
1069 1069
1070 /** 1070 /**
1071 * part_round_stats() - Round off the performance stats on a struct disk_stats. 1071 * part_round_stats() - Round off the performance stats on a struct disk_stats.
1072 * @cpu: cpu number for stats access 1072 * @cpu: cpu number for stats access
1073 * @part: target partition 1073 * @part: target partition
1074 * 1074 *
1075 * The average IO queue length and utilisation statistics are maintained 1075 * The average IO queue length and utilisation statistics are maintained
1076 * by observing the current state of the queue length and the amount of 1076 * by observing the current state of the queue length and the amount of
1077 * time it has been in this state for. 1077 * time it has been in this state for.
1078 * 1078 *
1079 * Normally, that accounting is done on IO completion, but that can result 1079 * Normally, that accounting is done on IO completion, but that can result
1080 * in more than a second's worth of IO being accounted for within any one 1080 * in more than a second's worth of IO being accounted for within any one
1081 * second, leading to >100% utilisation. To deal with that, we call this 1081 * second, leading to >100% utilisation. To deal with that, we call this
1082 * function to do a round-off before returning the results when reading 1082 * function to do a round-off before returning the results when reading
1083 * /proc/diskstats. This accounts immediately for all queue usage up to 1083 * /proc/diskstats. This accounts immediately for all queue usage up to
1084 * the current jiffies and restarts the counters again. 1084 * the current jiffies and restarts the counters again.
1085 */ 1085 */
1086 void part_round_stats(int cpu, struct hd_struct *part) 1086 void part_round_stats(int cpu, struct hd_struct *part)
1087 { 1087 {
1088 unsigned long now = jiffies; 1088 unsigned long now = jiffies;
1089 1089
1090 if (part->partno) 1090 if (part->partno)
1091 part_round_stats_single(cpu, &part_to_disk(part)->part0, now); 1091 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1092 part_round_stats_single(cpu, part, now); 1092 part_round_stats_single(cpu, part, now);
1093 } 1093 }
1094 EXPORT_SYMBOL_GPL(part_round_stats); 1094 EXPORT_SYMBOL_GPL(part_round_stats);
1095 1095
1096 /* 1096 /*
1097 * queue lock must be held 1097 * queue lock must be held
1098 */ 1098 */
1099 void __blk_put_request(struct request_queue *q, struct request *req) 1099 void __blk_put_request(struct request_queue *q, struct request *req)
1100 { 1100 {
1101 if (unlikely(!q)) 1101 if (unlikely(!q))
1102 return; 1102 return;
1103 if (unlikely(--req->ref_count)) 1103 if (unlikely(--req->ref_count))
1104 return; 1104 return;
1105 1105
1106 elv_completed_request(q, req); 1106 elv_completed_request(q, req);
1107 1107
1108 /* this is a bio leak */ 1108 /* this is a bio leak */
1109 WARN_ON(req->bio != NULL); 1109 WARN_ON(req->bio != NULL);
1110 1110
1111 /* 1111 /*
1112 * Request may not have originated from ll_rw_blk. if not, 1112 * Request may not have originated from ll_rw_blk. if not,
1113 * it didn't come out of our reserved rq pools 1113 * it didn't come out of our reserved rq pools
1114 */ 1114 */
1115 if (req->cmd_flags & REQ_ALLOCED) { 1115 if (req->cmd_flags & REQ_ALLOCED) {
1116 int is_sync = rq_is_sync(req) != 0; 1116 int is_sync = rq_is_sync(req) != 0;
1117 int priv = req->cmd_flags & REQ_ELVPRIV; 1117 int priv = req->cmd_flags & REQ_ELVPRIV;
1118 1118
1119 BUG_ON(!list_empty(&req->queuelist)); 1119 BUG_ON(!list_empty(&req->queuelist));
1120 BUG_ON(!hlist_unhashed(&req->hash)); 1120 BUG_ON(!hlist_unhashed(&req->hash));
1121 1121
1122 blk_free_request(q, req); 1122 blk_free_request(q, req);
1123 freed_request(q, is_sync, priv); 1123 freed_request(q, is_sync, priv);
1124 } 1124 }
1125 } 1125 }
1126 EXPORT_SYMBOL_GPL(__blk_put_request); 1126 EXPORT_SYMBOL_GPL(__blk_put_request);
1127 1127
1128 void blk_put_request(struct request *req) 1128 void blk_put_request(struct request *req)
1129 { 1129 {
1130 unsigned long flags; 1130 unsigned long flags;
1131 struct request_queue *q = req->q; 1131 struct request_queue *q = req->q;
1132 1132
1133 spin_lock_irqsave(q->queue_lock, flags); 1133 spin_lock_irqsave(q->queue_lock, flags);
1134 __blk_put_request(q, req); 1134 __blk_put_request(q, req);
1135 spin_unlock_irqrestore(q->queue_lock, flags); 1135 spin_unlock_irqrestore(q->queue_lock, flags);
1136 } 1136 }
1137 EXPORT_SYMBOL(blk_put_request); 1137 EXPORT_SYMBOL(blk_put_request);
1138 1138
1139 /** 1139 /**
1140 * blk_add_request_payload - add a payload to a request 1140 * blk_add_request_payload - add a payload to a request
1141 * @rq: request to update 1141 * @rq: request to update
1142 * @page: page backing the payload 1142 * @page: page backing the payload
1143 * @len: length of the payload. 1143 * @len: length of the payload.
1144 * 1144 *
1145 * This allows to later add a payload to an already submitted request by 1145 * This allows to later add a payload to an already submitted request by
1146 * a block driver. The driver needs to take care of freeing the payload 1146 * a block driver. The driver needs to take care of freeing the payload
1147 * itself. 1147 * itself.
1148 * 1148 *
1149 * Note that this is a quite horrible hack and nothing but handling of 1149 * Note that this is a quite horrible hack and nothing but handling of
1150 * discard requests should ever use it. 1150 * discard requests should ever use it.
1151 */ 1151 */
1152 void blk_add_request_payload(struct request *rq, struct page *page, 1152 void blk_add_request_payload(struct request *rq, struct page *page,
1153 unsigned int len) 1153 unsigned int len)
1154 { 1154 {
1155 struct bio *bio = rq->bio; 1155 struct bio *bio = rq->bio;
1156 1156
1157 bio->bi_io_vec->bv_page = page; 1157 bio->bi_io_vec->bv_page = page;
1158 bio->bi_io_vec->bv_offset = 0; 1158 bio->bi_io_vec->bv_offset = 0;
1159 bio->bi_io_vec->bv_len = len; 1159 bio->bi_io_vec->bv_len = len;
1160 1160
1161 bio->bi_size = len; 1161 bio->bi_size = len;
1162 bio->bi_vcnt = 1; 1162 bio->bi_vcnt = 1;
1163 bio->bi_phys_segments = 1; 1163 bio->bi_phys_segments = 1;
1164 1164
1165 rq->__data_len = rq->resid_len = len; 1165 rq->__data_len = rq->resid_len = len;
1166 rq->nr_phys_segments = 1; 1166 rq->nr_phys_segments = 1;
1167 rq->buffer = bio_data(bio); 1167 rq->buffer = bio_data(bio);
1168 } 1168 }
1169 EXPORT_SYMBOL_GPL(blk_add_request_payload); 1169 EXPORT_SYMBOL_GPL(blk_add_request_payload);
1170 1170
1171 void init_request_from_bio(struct request *req, struct bio *bio) 1171 void init_request_from_bio(struct request *req, struct bio *bio)
1172 { 1172 {
1173 req->cpu = bio->bi_comp_cpu; 1173 req->cpu = bio->bi_comp_cpu;
1174 req->cmd_type = REQ_TYPE_FS; 1174 req->cmd_type = REQ_TYPE_FS;
1175 1175
1176 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; 1176 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1177 if (bio->bi_rw & REQ_RAHEAD) 1177 if (bio->bi_rw & REQ_RAHEAD)
1178 req->cmd_flags |= REQ_FAILFAST_MASK; 1178 req->cmd_flags |= REQ_FAILFAST_MASK;
1179 1179
1180 req->errors = 0; 1180 req->errors = 0;
1181 req->__sector = bio->bi_sector; 1181 req->__sector = bio->bi_sector;
1182 req->ioprio = bio_prio(bio); 1182 req->ioprio = bio_prio(bio);
1183 blk_rq_bio_prep(req->q, req, bio); 1183 blk_rq_bio_prep(req->q, req, bio);
1184 } 1184 }
1185 1185
1186 /* 1186 /*
1187 * Only disabling plugging for non-rotational devices if it does tagging 1187 * Only disabling plugging for non-rotational devices if it does tagging
1188 * as well, otherwise we do need the proper merging 1188 * as well, otherwise we do need the proper merging
1189 */ 1189 */
1190 static inline bool queue_should_plug(struct request_queue *q) 1190 static inline bool queue_should_plug(struct request_queue *q)
1191 { 1191 {
1192 return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); 1192 return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1193 } 1193 }
1194 1194
1195 static int __make_request(struct request_queue *q, struct bio *bio) 1195 static int __make_request(struct request_queue *q, struct bio *bio)
1196 { 1196 {
1197 struct request *req; 1197 struct request *req;
1198 int el_ret; 1198 int el_ret;
1199 unsigned int bytes = bio->bi_size; 1199 unsigned int bytes = bio->bi_size;
1200 const unsigned short prio = bio_prio(bio); 1200 const unsigned short prio = bio_prio(bio);
1201 const bool sync = (bio->bi_rw & REQ_SYNC); 1201 const bool sync = (bio->bi_rw & REQ_SYNC);
1202 const bool unplug = (bio->bi_rw & REQ_UNPLUG); 1202 const bool unplug = (bio->bi_rw & REQ_UNPLUG);
1203 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; 1203 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1204 int rw_flags; 1204 int rw_flags;
1205 1205
1206 if ((bio->bi_rw & REQ_HARDBARRIER) && 1206 /* REQ_HARDBARRIER is no more */
1207 (q->next_ordered == QUEUE_ORDERED_NONE)) { 1207 if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
1208 "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
1208 bio_endio(bio, -EOPNOTSUPP); 1209 bio_endio(bio, -EOPNOTSUPP);
1209 return 0; 1210 return 0;
1210 } 1211 }
1212
1211 /* 1213 /*
1212 * low level driver can indicate that it wants pages above a 1214 * low level driver can indicate that it wants pages above a
1213 * certain limit bounced to low memory (ie for highmem, or even 1215 * certain limit bounced to low memory (ie for highmem, or even
1214 * ISA dma in theory) 1216 * ISA dma in theory)
1215 */ 1217 */
1216 blk_queue_bounce(q, &bio); 1218 blk_queue_bounce(q, &bio);
1217 1219
1218 spin_lock_irq(q->queue_lock); 1220 spin_lock_irq(q->queue_lock);
1219 1221
1220 if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) 1222 if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
1221 goto get_rq; 1223 goto get_rq;
1222 1224
1223 el_ret = elv_merge(q, &req, bio); 1225 el_ret = elv_merge(q, &req, bio);
1224 switch (el_ret) { 1226 switch (el_ret) {
1225 case ELEVATOR_BACK_MERGE: 1227 case ELEVATOR_BACK_MERGE:
1226 BUG_ON(!rq_mergeable(req)); 1228 BUG_ON(!rq_mergeable(req));
1227 1229
1228 if (!ll_back_merge_fn(q, req, bio)) 1230 if (!ll_back_merge_fn(q, req, bio))
1229 break; 1231 break;
1230 1232
1231 trace_block_bio_backmerge(q, bio); 1233 trace_block_bio_backmerge(q, bio);
1232 1234
1233 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 1235 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1234 blk_rq_set_mixed_merge(req); 1236 blk_rq_set_mixed_merge(req);
1235 1237
1236 req->biotail->bi_next = bio; 1238 req->biotail->bi_next = bio;
1237 req->biotail = bio; 1239 req->biotail = bio;
1238 req->__data_len += bytes; 1240 req->__data_len += bytes;
1239 req->ioprio = ioprio_best(req->ioprio, prio); 1241 req->ioprio = ioprio_best(req->ioprio, prio);
1240 if (!blk_rq_cpu_valid(req)) 1242 if (!blk_rq_cpu_valid(req))
1241 req->cpu = bio->bi_comp_cpu; 1243 req->cpu = bio->bi_comp_cpu;
1242 drive_stat_acct(req, 0); 1244 drive_stat_acct(req, 0);
1243 elv_bio_merged(q, req, bio); 1245 elv_bio_merged(q, req, bio);
1244 if (!attempt_back_merge(q, req)) 1246 if (!attempt_back_merge(q, req))
1245 elv_merged_request(q, req, el_ret); 1247 elv_merged_request(q, req, el_ret);
1246 goto out; 1248 goto out;
1247 1249
1248 case ELEVATOR_FRONT_MERGE: 1250 case ELEVATOR_FRONT_MERGE:
1249 BUG_ON(!rq_mergeable(req)); 1251 BUG_ON(!rq_mergeable(req));
1250 1252
1251 if (!ll_front_merge_fn(q, req, bio)) 1253 if (!ll_front_merge_fn(q, req, bio))
1252 break; 1254 break;
1253 1255
1254 trace_block_bio_frontmerge(q, bio); 1256 trace_block_bio_frontmerge(q, bio);
1255 1257
1256 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { 1258 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1257 blk_rq_set_mixed_merge(req); 1259 blk_rq_set_mixed_merge(req);
1258 req->cmd_flags &= ~REQ_FAILFAST_MASK; 1260 req->cmd_flags &= ~REQ_FAILFAST_MASK;
1259 req->cmd_flags |= ff; 1261 req->cmd_flags |= ff;
1260 } 1262 }
1261 1263
1262 bio->bi_next = req->bio; 1264 bio->bi_next = req->bio;
1263 req->bio = bio; 1265 req->bio = bio;
1264 1266
1265 /* 1267 /*
1266 * may not be valid. if the low level driver said 1268 * may not be valid. if the low level driver said
1267 * it didn't need a bounce buffer then it better 1269 * it didn't need a bounce buffer then it better
1268 * not touch req->buffer either... 1270 * not touch req->buffer either...
1269 */ 1271 */
1270 req->buffer = bio_data(bio); 1272 req->buffer = bio_data(bio);
1271 req->__sector = bio->bi_sector; 1273 req->__sector = bio->bi_sector;
1272 req->__data_len += bytes; 1274 req->__data_len += bytes;
1273 req->ioprio = ioprio_best(req->ioprio, prio); 1275 req->ioprio = ioprio_best(req->ioprio, prio);
1274 if (!blk_rq_cpu_valid(req)) 1276 if (!blk_rq_cpu_valid(req))
1275 req->cpu = bio->bi_comp_cpu; 1277 req->cpu = bio->bi_comp_cpu;
1276 drive_stat_acct(req, 0); 1278 drive_stat_acct(req, 0);
1277 elv_bio_merged(q, req, bio); 1279 elv_bio_merged(q, req, bio);
1278 if (!attempt_front_merge(q, req)) 1280 if (!attempt_front_merge(q, req))
1279 elv_merged_request(q, req, el_ret); 1281 elv_merged_request(q, req, el_ret);
1280 goto out; 1282 goto out;
1281 1283
1282 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 1284 /* ELV_NO_MERGE: elevator says don't/can't merge. */
1283 default: 1285 default:
1284 ; 1286 ;
1285 } 1287 }
1286 1288
1287 get_rq: 1289 get_rq:
1288 /* 1290 /*
1289 * This sync check and mask will be re-done in init_request_from_bio(), 1291 * This sync check and mask will be re-done in init_request_from_bio(),
1290 * but we need to set it earlier to expose the sync flag to the 1292 * but we need to set it earlier to expose the sync flag to the
1291 * rq allocator and io schedulers. 1293 * rq allocator and io schedulers.
1292 */ 1294 */
1293 rw_flags = bio_data_dir(bio); 1295 rw_flags = bio_data_dir(bio);
1294 if (sync) 1296 if (sync)
1295 rw_flags |= REQ_SYNC; 1297 rw_flags |= REQ_SYNC;
1296 1298
1297 /* 1299 /*
1298 * Grab a free request. This is might sleep but can not fail. 1300 * Grab a free request. This is might sleep but can not fail.
1299 * Returns with the queue unlocked. 1301 * Returns with the queue unlocked.
1300 */ 1302 */
1301 req = get_request_wait(q, rw_flags, bio); 1303 req = get_request_wait(q, rw_flags, bio);
1302 1304
1303 /* 1305 /*
1304 * After dropping the lock and possibly sleeping here, our request 1306 * After dropping the lock and possibly sleeping here, our request
1305 * may now be mergeable after it had proven unmergeable (above). 1307 * may now be mergeable after it had proven unmergeable (above).
1306 * We don't worry about that case for efficiency. It won't happen 1308 * We don't worry about that case for efficiency. It won't happen
1307 * often, and the elevators are able to handle it. 1309 * often, and the elevators are able to handle it.
1308 */ 1310 */
1309 init_request_from_bio(req, bio); 1311 init_request_from_bio(req, bio);
1310 1312
1311 spin_lock_irq(q->queue_lock); 1313 spin_lock_irq(q->queue_lock);
1312 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1314 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1313 bio_flagged(bio, BIO_CPU_AFFINE)) 1315 bio_flagged(bio, BIO_CPU_AFFINE))
1314 req->cpu = blk_cpu_to_group(smp_processor_id()); 1316 req->cpu = blk_cpu_to_group(smp_processor_id());
1315 if (queue_should_plug(q) && elv_queue_empty(q)) 1317 if (queue_should_plug(q) && elv_queue_empty(q))
1316 blk_plug_device(q); 1318 blk_plug_device(q);
1317 add_request(q, req); 1319 add_request(q, req);
1318 out: 1320 out:
1319 if (unplug || !queue_should_plug(q)) 1321 if (unplug || !queue_should_plug(q))
1320 __generic_unplug_device(q); 1322 __generic_unplug_device(q);
1321 spin_unlock_irq(q->queue_lock); 1323 spin_unlock_irq(q->queue_lock);
1322 return 0; 1324 return 0;
1323 } 1325 }
1324 1326
1325 /* 1327 /*
1326 * If bio->bi_dev is a partition, remap the location 1328 * If bio->bi_dev is a partition, remap the location
1327 */ 1329 */
1328 static inline void blk_partition_remap(struct bio *bio) 1330 static inline void blk_partition_remap(struct bio *bio)
1329 { 1331 {
1330 struct block_device *bdev = bio->bi_bdev; 1332 struct block_device *bdev = bio->bi_bdev;
1331 1333
1332 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1334 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1333 struct hd_struct *p = bdev->bd_part; 1335 struct hd_struct *p = bdev->bd_part;
1334 1336
1335 bio->bi_sector += p->start_sect; 1337 bio->bi_sector += p->start_sect;
1336 bio->bi_bdev = bdev->bd_contains; 1338 bio->bi_bdev = bdev->bd_contains;
1337 1339
1338 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1340 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1339 bdev->bd_dev, 1341 bdev->bd_dev,
1340 bio->bi_sector - p->start_sect); 1342 bio->bi_sector - p->start_sect);
1341 } 1343 }
1342 } 1344 }
1343 1345
1344 static void handle_bad_sector(struct bio *bio) 1346 static void handle_bad_sector(struct bio *bio)
1345 { 1347 {
1346 char b[BDEVNAME_SIZE]; 1348 char b[BDEVNAME_SIZE];
1347 1349
1348 printk(KERN_INFO "attempt to access beyond end of device\n"); 1350 printk(KERN_INFO "attempt to access beyond end of device\n");
1349 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 1351 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1350 bdevname(bio->bi_bdev, b), 1352 bdevname(bio->bi_bdev, b),
1351 bio->bi_rw, 1353 bio->bi_rw,
1352 (unsigned long long)bio->bi_sector + bio_sectors(bio), 1354 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1353 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 1355 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
1354 1356
1355 set_bit(BIO_EOF, &bio->bi_flags); 1357 set_bit(BIO_EOF, &bio->bi_flags);
1356 } 1358 }
1357 1359
1358 #ifdef CONFIG_FAIL_MAKE_REQUEST 1360 #ifdef CONFIG_FAIL_MAKE_REQUEST
1359 1361
1360 static DECLARE_FAULT_ATTR(fail_make_request); 1362 static DECLARE_FAULT_ATTR(fail_make_request);
1361 1363
1362 static int __init setup_fail_make_request(char *str) 1364 static int __init setup_fail_make_request(char *str)
1363 { 1365 {
1364 return setup_fault_attr(&fail_make_request, str); 1366 return setup_fault_attr(&fail_make_request, str);
1365 } 1367 }
1366 __setup("fail_make_request=", setup_fail_make_request); 1368 __setup("fail_make_request=", setup_fail_make_request);
1367 1369
1368 static int should_fail_request(struct bio *bio) 1370 static int should_fail_request(struct bio *bio)
1369 { 1371 {
1370 struct hd_struct *part = bio->bi_bdev->bd_part; 1372 struct hd_struct *part = bio->bi_bdev->bd_part;
1371 1373
1372 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) 1374 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1373 return should_fail(&fail_make_request, bio->bi_size); 1375 return should_fail(&fail_make_request, bio->bi_size);
1374 1376
1375 return 0; 1377 return 0;
1376 } 1378 }
1377 1379
1378 static int __init fail_make_request_debugfs(void) 1380 static int __init fail_make_request_debugfs(void)
1379 { 1381 {
1380 return init_fault_attr_dentries(&fail_make_request, 1382 return init_fault_attr_dentries(&fail_make_request,
1381 "fail_make_request"); 1383 "fail_make_request");
1382 } 1384 }
1383 1385
1384 late_initcall(fail_make_request_debugfs); 1386 late_initcall(fail_make_request_debugfs);
1385 1387
1386 #else /* CONFIG_FAIL_MAKE_REQUEST */ 1388 #else /* CONFIG_FAIL_MAKE_REQUEST */
1387 1389
1388 static inline int should_fail_request(struct bio *bio) 1390 static inline int should_fail_request(struct bio *bio)
1389 { 1391 {
1390 return 0; 1392 return 0;
1391 } 1393 }
1392 1394
1393 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 1395 #endif /* CONFIG_FAIL_MAKE_REQUEST */
1394 1396
1395 /* 1397 /*
1396 * Check whether this bio extends beyond the end of the device. 1398 * Check whether this bio extends beyond the end of the device.
1397 */ 1399 */
1398 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) 1400 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1399 { 1401 {
1400 sector_t maxsector; 1402 sector_t maxsector;
1401 1403
1402 if (!nr_sectors) 1404 if (!nr_sectors)
1403 return 0; 1405 return 0;
1404 1406
1405 /* Test device or partition size, when known. */ 1407 /* Test device or partition size, when known. */
1406 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 1408 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
1407 if (maxsector) { 1409 if (maxsector) {
1408 sector_t sector = bio->bi_sector; 1410 sector_t sector = bio->bi_sector;
1409 1411
1410 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 1412 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1411 /* 1413 /*
1412 * This may well happen - the kernel calls bread() 1414 * This may well happen - the kernel calls bread()
1413 * without checking the size of the device, e.g., when 1415 * without checking the size of the device, e.g., when
1414 * mounting a device. 1416 * mounting a device.
1415 */ 1417 */
1416 handle_bad_sector(bio); 1418 handle_bad_sector(bio);
1417 return 1; 1419 return 1;
1418 } 1420 }
1419 } 1421 }
1420 1422
1421 return 0; 1423 return 0;
1422 } 1424 }
1423 1425
1424 /** 1426 /**
1425 * generic_make_request - hand a buffer to its device driver for I/O 1427 * generic_make_request - hand a buffer to its device driver for I/O
1426 * @bio: The bio describing the location in memory and on the device. 1428 * @bio: The bio describing the location in memory and on the device.
1427 * 1429 *
1428 * generic_make_request() is used to make I/O requests of block 1430 * generic_make_request() is used to make I/O requests of block
1429 * devices. It is passed a &struct bio, which describes the I/O that needs 1431 * devices. It is passed a &struct bio, which describes the I/O that needs
1430 * to be done. 1432 * to be done.
1431 * 1433 *
1432 * generic_make_request() does not return any status. The 1434 * generic_make_request() does not return any status. The
1433 * success/failure status of the request, along with notification of 1435 * success/failure status of the request, along with notification of
1434 * completion, is delivered asynchronously through the bio->bi_end_io 1436 * completion, is delivered asynchronously through the bio->bi_end_io
1435 * function described (one day) else where. 1437 * function described (one day) else where.
1436 * 1438 *
1437 * The caller of generic_make_request must make sure that bi_io_vec 1439 * The caller of generic_make_request must make sure that bi_io_vec
1438 * are set to describe the memory buffer, and that bi_dev and bi_sector are 1440 * are set to describe the memory buffer, and that bi_dev and bi_sector are
1439 * set to describe the device address, and the 1441 * set to describe the device address, and the
1440 * bi_end_io and optionally bi_private are set to describe how 1442 * bi_end_io and optionally bi_private are set to describe how
1441 * completion notification should be signaled. 1443 * completion notification should be signaled.
1442 * 1444 *
1443 * generic_make_request and the drivers it calls may use bi_next if this 1445 * generic_make_request and the drivers it calls may use bi_next if this
1444 * bio happens to be merged with someone else, and may change bi_dev and 1446 * bio happens to be merged with someone else, and may change bi_dev and
1445 * bi_sector for remaps as it sees fit. So the values of these fields 1447 * bi_sector for remaps as it sees fit. So the values of these fields
1446 * should NOT be depended on after the call to generic_make_request. 1448 * should NOT be depended on after the call to generic_make_request.
1447 */ 1449 */
1448 static inline void __generic_make_request(struct bio *bio) 1450 static inline void __generic_make_request(struct bio *bio)
1449 { 1451 {
1450 struct request_queue *q; 1452 struct request_queue *q;
1451 sector_t old_sector; 1453 sector_t old_sector;
1452 int ret, nr_sectors = bio_sectors(bio); 1454 int ret, nr_sectors = bio_sectors(bio);
1453 dev_t old_dev; 1455 dev_t old_dev;
1454 int err = -EIO; 1456 int err = -EIO;
1455 1457
1456 might_sleep(); 1458 might_sleep();
1457 1459
1458 if (bio_check_eod(bio, nr_sectors)) 1460 if (bio_check_eod(bio, nr_sectors))
1459 goto end_io; 1461 goto end_io;
1460 1462
1461 /* 1463 /*
1462 * Resolve the mapping until finished. (drivers are 1464 * Resolve the mapping until finished. (drivers are
1463 * still free to implement/resolve their own stacking 1465 * still free to implement/resolve their own stacking
1464 * by explicitly returning 0) 1466 * by explicitly returning 0)
1465 * 1467 *
1466 * NOTE: we don't repeat the blk_size check for each new device. 1468 * NOTE: we don't repeat the blk_size check for each new device.
1467 * Stacking drivers are expected to know what they are doing. 1469 * Stacking drivers are expected to know what they are doing.
1468 */ 1470 */
1469 old_sector = -1; 1471 old_sector = -1;
1470 old_dev = 0; 1472 old_dev = 0;
1471 do { 1473 do {
1472 char b[BDEVNAME_SIZE]; 1474 char b[BDEVNAME_SIZE];
1473 1475
1474 q = bdev_get_queue(bio->bi_bdev); 1476 q = bdev_get_queue(bio->bi_bdev);
1475 if (unlikely(!q)) { 1477 if (unlikely(!q)) {
1476 printk(KERN_ERR 1478 printk(KERN_ERR
1477 "generic_make_request: Trying to access " 1479 "generic_make_request: Trying to access "
1478 "nonexistent block-device %s (%Lu)\n", 1480 "nonexistent block-device %s (%Lu)\n",
1479 bdevname(bio->bi_bdev, b), 1481 bdevname(bio->bi_bdev, b),
1480 (long long) bio->bi_sector); 1482 (long long) bio->bi_sector);
1481 goto end_io; 1483 goto end_io;
1482 } 1484 }
1483 1485
1484 if (unlikely(!(bio->bi_rw & REQ_DISCARD) && 1486 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1485 nr_sectors > queue_max_hw_sectors(q))) { 1487 nr_sectors > queue_max_hw_sectors(q))) {
1486 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1488 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1487 bdevname(bio->bi_bdev, b), 1489 bdevname(bio->bi_bdev, b),
1488 bio_sectors(bio), 1490 bio_sectors(bio),
1489 queue_max_hw_sectors(q)); 1491 queue_max_hw_sectors(q));
1490 goto end_io; 1492 goto end_io;
1491 } 1493 }
1492 1494
1493 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 1495 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1494 goto end_io; 1496 goto end_io;
1495 1497
1496 if (should_fail_request(bio)) 1498 if (should_fail_request(bio))
1497 goto end_io; 1499 goto end_io;
1498 1500
1499 /* 1501 /*
1500 * If this device has partitions, remap block n 1502 * If this device has partitions, remap block n
1501 * of partition p to block n+start(p) of the disk. 1503 * of partition p to block n+start(p) of the disk.
1502 */ 1504 */
1503 blk_partition_remap(bio); 1505 blk_partition_remap(bio);
1504 1506
1505 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) 1507 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1506 goto end_io; 1508 goto end_io;
1507 1509
1508 if (old_sector != -1) 1510 if (old_sector != -1)
1509 trace_block_remap(q, bio, old_dev, old_sector); 1511 trace_block_remap(q, bio, old_dev, old_sector);
1510 1512
1511 old_sector = bio->bi_sector; 1513 old_sector = bio->bi_sector;
1512 old_dev = bio->bi_bdev->bd_dev; 1514 old_dev = bio->bi_bdev->bd_dev;
1513 1515
1514 if (bio_check_eod(bio, nr_sectors)) 1516 if (bio_check_eod(bio, nr_sectors))
1515 goto end_io; 1517 goto end_io;
1516 1518
1517 if ((bio->bi_rw & REQ_DISCARD) && 1519 if ((bio->bi_rw & REQ_DISCARD) &&
1518 (!blk_queue_discard(q) || 1520 (!blk_queue_discard(q) ||
1519 ((bio->bi_rw & REQ_SECURE) && 1521 ((bio->bi_rw & REQ_SECURE) &&
1520 !blk_queue_secdiscard(q)))) { 1522 !blk_queue_secdiscard(q)))) {
1521 err = -EOPNOTSUPP; 1523 err = -EOPNOTSUPP;
1522 goto end_io; 1524 goto end_io;
1523 } 1525 }
1524 1526
1525 trace_block_bio_queue(q, bio); 1527 trace_block_bio_queue(q, bio);
1526 1528
1527 ret = q->make_request_fn(q, bio); 1529 ret = q->make_request_fn(q, bio);
1528 } while (ret); 1530 } while (ret);
1529 1531
1530 return; 1532 return;
1531 1533
1532 end_io: 1534 end_io:
1533 bio_endio(bio, err); 1535 bio_endio(bio, err);
1534 } 1536 }
1535 1537
1536 /* 1538 /*
1537 * We only want one ->make_request_fn to be active at a time, 1539 * We only want one ->make_request_fn to be active at a time,
1538 * else stack usage with stacked devices could be a problem. 1540 * else stack usage with stacked devices could be a problem.
1539 * So use current->bio_list to keep a list of requests 1541 * So use current->bio_list to keep a list of requests
1540 * submited by a make_request_fn function. 1542 * submited by a make_request_fn function.
1541 * current->bio_list is also used as a flag to say if 1543 * current->bio_list is also used as a flag to say if
1542 * generic_make_request is currently active in this task or not. 1544 * generic_make_request is currently active in this task or not.
1543 * If it is NULL, then no make_request is active. If it is non-NULL, 1545 * If it is NULL, then no make_request is active. If it is non-NULL,
1544 * then a make_request is active, and new requests should be added 1546 * then a make_request is active, and new requests should be added
1545 * at the tail 1547 * at the tail
1546 */ 1548 */
1547 void generic_make_request(struct bio *bio) 1549 void generic_make_request(struct bio *bio)
1548 { 1550 {
1549 struct bio_list bio_list_on_stack; 1551 struct bio_list bio_list_on_stack;
1550 1552
1551 if (current->bio_list) { 1553 if (current->bio_list) {
1552 /* make_request is active */ 1554 /* make_request is active */
1553 bio_list_add(current->bio_list, bio); 1555 bio_list_add(current->bio_list, bio);
1554 return; 1556 return;
1555 } 1557 }
1556 /* following loop may be a bit non-obvious, and so deserves some 1558 /* following loop may be a bit non-obvious, and so deserves some
1557 * explanation. 1559 * explanation.
1558 * Before entering the loop, bio->bi_next is NULL (as all callers 1560 * Before entering the loop, bio->bi_next is NULL (as all callers
1559 * ensure that) so we have a list with a single bio. 1561 * ensure that) so we have a list with a single bio.
1560 * We pretend that we have just taken it off a longer list, so 1562 * We pretend that we have just taken it off a longer list, so
1561 * we assign bio_list to a pointer to the bio_list_on_stack, 1563 * we assign bio_list to a pointer to the bio_list_on_stack,
1562 * thus initialising the bio_list of new bios to be 1564 * thus initialising the bio_list of new bios to be
1563 * added. __generic_make_request may indeed add some more bios 1565 * added. __generic_make_request may indeed add some more bios
1564 * through a recursive call to generic_make_request. If it 1566 * through a recursive call to generic_make_request. If it
1565 * did, we find a non-NULL value in bio_list and re-enter the loop 1567 * did, we find a non-NULL value in bio_list and re-enter the loop
1566 * from the top. In this case we really did just take the bio 1568 * from the top. In this case we really did just take the bio
1567 * of the top of the list (no pretending) and so remove it from 1569 * of the top of the list (no pretending) and so remove it from
1568 * bio_list, and call into __generic_make_request again. 1570 * bio_list, and call into __generic_make_request again.
1569 * 1571 *
1570 * The loop was structured like this to make only one call to 1572 * The loop was structured like this to make only one call to
1571 * __generic_make_request (which is important as it is large and 1573 * __generic_make_request (which is important as it is large and
1572 * inlined) and to keep the structure simple. 1574 * inlined) and to keep the structure simple.
1573 */ 1575 */
1574 BUG_ON(bio->bi_next); 1576 BUG_ON(bio->bi_next);
1575 bio_list_init(&bio_list_on_stack); 1577 bio_list_init(&bio_list_on_stack);
1576 current->bio_list = &bio_list_on_stack; 1578 current->bio_list = &bio_list_on_stack;
1577 do { 1579 do {
1578 __generic_make_request(bio); 1580 __generic_make_request(bio);
1579 bio = bio_list_pop(current->bio_list); 1581 bio = bio_list_pop(current->bio_list);
1580 } while (bio); 1582 } while (bio);
1581 current->bio_list = NULL; /* deactivate */ 1583 current->bio_list = NULL; /* deactivate */
1582 } 1584 }
1583 EXPORT_SYMBOL(generic_make_request); 1585 EXPORT_SYMBOL(generic_make_request);
1584 1586
1585 /** 1587 /**
1586 * submit_bio - submit a bio to the block device layer for I/O 1588 * submit_bio - submit a bio to the block device layer for I/O
1587 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1589 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1588 * @bio: The &struct bio which describes the I/O 1590 * @bio: The &struct bio which describes the I/O
1589 * 1591 *
1590 * submit_bio() is very similar in purpose to generic_make_request(), and 1592 * submit_bio() is very similar in purpose to generic_make_request(), and
1591 * uses that function to do most of the work. Both are fairly rough 1593 * uses that function to do most of the work. Both are fairly rough
1592 * interfaces; @bio must be presetup and ready for I/O. 1594 * interfaces; @bio must be presetup and ready for I/O.
1593 * 1595 *
1594 */ 1596 */
1595 void submit_bio(int rw, struct bio *bio) 1597 void submit_bio(int rw, struct bio *bio)
1596 { 1598 {
1597 int count = bio_sectors(bio); 1599 int count = bio_sectors(bio);
1598 1600
1599 bio->bi_rw |= rw; 1601 bio->bi_rw |= rw;
1600 1602
1601 /* 1603 /*
1602 * If it's a regular read/write or a barrier with data attached, 1604 * If it's a regular read/write or a barrier with data attached,
1603 * go through the normal accounting stuff before submission. 1605 * go through the normal accounting stuff before submission.
1604 */ 1606 */
1605 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { 1607 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1606 if (rw & WRITE) { 1608 if (rw & WRITE) {
1607 count_vm_events(PGPGOUT, count); 1609 count_vm_events(PGPGOUT, count);
1608 } else { 1610 } else {
1609 task_io_account_read(bio->bi_size); 1611 task_io_account_read(bio->bi_size);
1610 count_vm_events(PGPGIN, count); 1612 count_vm_events(PGPGIN, count);
1611 } 1613 }
1612 1614
1613 if (unlikely(block_dump)) { 1615 if (unlikely(block_dump)) {
1614 char b[BDEVNAME_SIZE]; 1616 char b[BDEVNAME_SIZE];
1615 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 1617 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
1616 current->comm, task_pid_nr(current), 1618 current->comm, task_pid_nr(current),
1617 (rw & WRITE) ? "WRITE" : "READ", 1619 (rw & WRITE) ? "WRITE" : "READ",
1618 (unsigned long long)bio->bi_sector, 1620 (unsigned long long)bio->bi_sector,
1619 bdevname(bio->bi_bdev, b)); 1621 bdevname(bio->bi_bdev, b));
1620 } 1622 }
1621 } 1623 }
1622 1624
1623 generic_make_request(bio); 1625 generic_make_request(bio);
1624 } 1626 }
1625 EXPORT_SYMBOL(submit_bio); 1627 EXPORT_SYMBOL(submit_bio);
1626 1628
1627 /** 1629 /**
1628 * blk_rq_check_limits - Helper function to check a request for the queue limit 1630 * blk_rq_check_limits - Helper function to check a request for the queue limit
1629 * @q: the queue 1631 * @q: the queue
1630 * @rq: the request being checked 1632 * @rq: the request being checked
1631 * 1633 *
1632 * Description: 1634 * Description:
1633 * @rq may have been made based on weaker limitations of upper-level queues 1635 * @rq may have been made based on weaker limitations of upper-level queues
1634 * in request stacking drivers, and it may violate the limitation of @q. 1636 * in request stacking drivers, and it may violate the limitation of @q.
1635 * Since the block layer and the underlying device driver trust @rq 1637 * Since the block layer and the underlying device driver trust @rq
1636 * after it is inserted to @q, it should be checked against @q before 1638 * after it is inserted to @q, it should be checked against @q before
1637 * the insertion using this generic function. 1639 * the insertion using this generic function.
1638 * 1640 *
1639 * This function should also be useful for request stacking drivers 1641 * This function should also be useful for request stacking drivers
1640 * in some cases below, so export this fuction. 1642 * in some cases below, so export this fuction.
1641 * Request stacking drivers like request-based dm may change the queue 1643 * Request stacking drivers like request-based dm may change the queue
1642 * limits while requests are in the queue (e.g. dm's table swapping). 1644 * limits while requests are in the queue (e.g. dm's table swapping).
1643 * Such request stacking drivers should check those requests agaist 1645 * Such request stacking drivers should check those requests agaist
1644 * the new queue limits again when they dispatch those requests, 1646 * the new queue limits again when they dispatch those requests,
1645 * although such checkings are also done against the old queue limits 1647 * although such checkings are also done against the old queue limits
1646 * when submitting requests. 1648 * when submitting requests.
1647 */ 1649 */
1648 int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1650 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1649 { 1651 {
1650 if (rq->cmd_flags & REQ_DISCARD) 1652 if (rq->cmd_flags & REQ_DISCARD)
1651 return 0; 1653 return 0;
1652 1654
1653 if (blk_rq_sectors(rq) > queue_max_sectors(q) || 1655 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1654 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { 1656 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1655 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1657 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1656 return -EIO; 1658 return -EIO;
1657 } 1659 }
1658 1660
1659 /* 1661 /*
1660 * queue's settings related to segment counting like q->bounce_pfn 1662 * queue's settings related to segment counting like q->bounce_pfn
1661 * may differ from that of other stacking queues. 1663 * may differ from that of other stacking queues.
1662 * Recalculate it to check the request correctly on this queue's 1664 * Recalculate it to check the request correctly on this queue's
1663 * limitation. 1665 * limitation.
1664 */ 1666 */
1665 blk_recalc_rq_segments(rq); 1667 blk_recalc_rq_segments(rq);
1666 if (rq->nr_phys_segments > queue_max_segments(q)) { 1668 if (rq->nr_phys_segments > queue_max_segments(q)) {
1667 printk(KERN_ERR "%s: over max segments limit.\n", __func__); 1669 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1668 return -EIO; 1670 return -EIO;
1669 } 1671 }
1670 1672
1671 return 0; 1673 return 0;
1672 } 1674 }
1673 EXPORT_SYMBOL_GPL(blk_rq_check_limits); 1675 EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1674 1676
1675 /** 1677 /**
1676 * blk_insert_cloned_request - Helper for stacking drivers to submit a request 1678 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
1677 * @q: the queue to submit the request 1679 * @q: the queue to submit the request
1678 * @rq: the request being queued 1680 * @rq: the request being queued
1679 */ 1681 */
1680 int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1682 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1681 { 1683 {
1682 unsigned long flags; 1684 unsigned long flags;
1683 1685
1684 if (blk_rq_check_limits(q, rq)) 1686 if (blk_rq_check_limits(q, rq))
1685 return -EIO; 1687 return -EIO;
1686 1688
1687 #ifdef CONFIG_FAIL_MAKE_REQUEST 1689 #ifdef CONFIG_FAIL_MAKE_REQUEST
1688 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && 1690 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1689 should_fail(&fail_make_request, blk_rq_bytes(rq))) 1691 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1690 return -EIO; 1692 return -EIO;
1691 #endif 1693 #endif
1692 1694
1693 spin_lock_irqsave(q->queue_lock, flags); 1695 spin_lock_irqsave(q->queue_lock, flags);
1694 1696
1695 /* 1697 /*
1696 * Submitting request must be dequeued before calling this function 1698 * Submitting request must be dequeued before calling this function
1697 * because it will be linked to another request_queue 1699 * because it will be linked to another request_queue
1698 */ 1700 */
1699 BUG_ON(blk_queued_rq(rq)); 1701 BUG_ON(blk_queued_rq(rq));
1700 1702
1701 drive_stat_acct(rq, 1); 1703 drive_stat_acct(rq, 1);
1702 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1704 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1703 1705
1704 spin_unlock_irqrestore(q->queue_lock, flags); 1706 spin_unlock_irqrestore(q->queue_lock, flags);
1705 1707
1706 return 0; 1708 return 0;
1707 } 1709 }
1708 EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1710 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1709 1711
1710 /** 1712 /**
1711 * blk_rq_err_bytes - determine number of bytes till the next failure boundary 1713 * blk_rq_err_bytes - determine number of bytes till the next failure boundary
1712 * @rq: request to examine 1714 * @rq: request to examine
1713 * 1715 *
1714 * Description: 1716 * Description:
1715 * A request could be merge of IOs which require different failure 1717 * A request could be merge of IOs which require different failure
1716 * handling. This function determines the number of bytes which 1718 * handling. This function determines the number of bytes which
1717 * can be failed from the beginning of the request without 1719 * can be failed from the beginning of the request without
1718 * crossing into area which need to be retried further. 1720 * crossing into area which need to be retried further.
1719 * 1721 *
1720 * Return: 1722 * Return:
1721 * The number of bytes to fail. 1723 * The number of bytes to fail.
1722 * 1724 *
1723 * Context: 1725 * Context:
1724 * queue_lock must be held. 1726 * queue_lock must be held.
1725 */ 1727 */
1726 unsigned int blk_rq_err_bytes(const struct request *rq) 1728 unsigned int blk_rq_err_bytes(const struct request *rq)
1727 { 1729 {
1728 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 1730 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1729 unsigned int bytes = 0; 1731 unsigned int bytes = 0;
1730 struct bio *bio; 1732 struct bio *bio;
1731 1733
1732 if (!(rq->cmd_flags & REQ_MIXED_MERGE)) 1734 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1733 return blk_rq_bytes(rq); 1735 return blk_rq_bytes(rq);
1734 1736
1735 /* 1737 /*
1736 * Currently the only 'mixing' which can happen is between 1738 * Currently the only 'mixing' which can happen is between
1737 * different fastfail types. We can safely fail portions 1739 * different fastfail types. We can safely fail portions
1738 * which have all the failfast bits that the first one has - 1740 * which have all the failfast bits that the first one has -
1739 * the ones which are at least as eager to fail as the first 1741 * the ones which are at least as eager to fail as the first
1740 * one. 1742 * one.
1741 */ 1743 */
1742 for (bio = rq->bio; bio; bio = bio->bi_next) { 1744 for (bio = rq->bio; bio; bio = bio->bi_next) {
1743 if ((bio->bi_rw & ff) != ff) 1745 if ((bio->bi_rw & ff) != ff)
1744 break; 1746 break;
1745 bytes += bio->bi_size; 1747 bytes += bio->bi_size;
1746 } 1748 }
1747 1749
1748 /* this could lead to infinite loop */ 1750 /* this could lead to infinite loop */
1749 BUG_ON(blk_rq_bytes(rq) && !bytes); 1751 BUG_ON(blk_rq_bytes(rq) && !bytes);
1750 return bytes; 1752 return bytes;
1751 } 1753 }
1752 EXPORT_SYMBOL_GPL(blk_rq_err_bytes); 1754 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1753 1755
1754 static void blk_account_io_completion(struct request *req, unsigned int bytes) 1756 static void blk_account_io_completion(struct request *req, unsigned int bytes)
1755 { 1757 {
1756 if (blk_do_io_stat(req)) { 1758 if (blk_do_io_stat(req)) {
1757 const int rw = rq_data_dir(req); 1759 const int rw = rq_data_dir(req);
1758 struct hd_struct *part; 1760 struct hd_struct *part;
1759 int cpu; 1761 int cpu;
1760 1762
1761 cpu = part_stat_lock(); 1763 cpu = part_stat_lock();
1762 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1764 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1763 part_stat_add(cpu, part, sectors[rw], bytes >> 9); 1765 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1764 part_stat_unlock(); 1766 part_stat_unlock();
1765 } 1767 }
1766 } 1768 }
1767 1769
1768 static void blk_account_io_done(struct request *req) 1770 static void blk_account_io_done(struct request *req)
1769 { 1771 {
1770 /* 1772 /*
1771 * Account IO completion. bar_rq isn't accounted as a normal 1773 * Account IO completion. bar_rq isn't accounted as a normal
1772 * IO on queueing nor completion. Accounting the containing 1774 * IO on queueing nor completion. Accounting the containing
1773 * request is enough. 1775 * request is enough.
1774 */ 1776 */
1775 if (blk_do_io_stat(req) && req != &req->q->bar_rq) { 1777 if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
1776 unsigned long duration = jiffies - req->start_time; 1778 unsigned long duration = jiffies - req->start_time;
1777 const int rw = rq_data_dir(req); 1779 const int rw = rq_data_dir(req);
1778 struct hd_struct *part; 1780 struct hd_struct *part;
1779 int cpu; 1781 int cpu;
1780 1782
1781 cpu = part_stat_lock(); 1783 cpu = part_stat_lock();
1782 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1784 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1783 1785
1784 part_stat_inc(cpu, part, ios[rw]); 1786 part_stat_inc(cpu, part, ios[rw]);
1785 part_stat_add(cpu, part, ticks[rw], duration); 1787 part_stat_add(cpu, part, ticks[rw], duration);
1786 part_round_stats(cpu, part); 1788 part_round_stats(cpu, part);
1787 part_dec_in_flight(part, rw); 1789 part_dec_in_flight(part, rw);
1788 1790
1789 part_stat_unlock(); 1791 part_stat_unlock();
1790 } 1792 }
1791 } 1793 }
1792 1794
1793 /** 1795 /**
1794 * blk_peek_request - peek at the top of a request queue 1796 * blk_peek_request - peek at the top of a request queue
1795 * @q: request queue to peek at 1797 * @q: request queue to peek at
1796 * 1798 *
1797 * Description: 1799 * Description:
1798 * Return the request at the top of @q. The returned request 1800 * Return the request at the top of @q. The returned request
1799 * should be started using blk_start_request() before LLD starts 1801 * should be started using blk_start_request() before LLD starts
1800 * processing it. 1802 * processing it.
1801 * 1803 *
1802 * Return: 1804 * Return:
1803 * Pointer to the request at the top of @q if available. Null 1805 * Pointer to the request at the top of @q if available. Null
1804 * otherwise. 1806 * otherwise.
1805 * 1807 *
1806 * Context: 1808 * Context:
1807 * queue_lock must be held. 1809 * queue_lock must be held.
1808 */ 1810 */
1809 struct request *blk_peek_request(struct request_queue *q) 1811 struct request *blk_peek_request(struct request_queue *q)
1810 { 1812 {
1811 struct request *rq; 1813 struct request *rq;
1812 int ret; 1814 int ret;
1813 1815
1814 while ((rq = __elv_next_request(q)) != NULL) { 1816 while ((rq = __elv_next_request(q)) != NULL) {
1815 if (!(rq->cmd_flags & REQ_STARTED)) { 1817 if (!(rq->cmd_flags & REQ_STARTED)) {
1816 /* 1818 /*
1817 * This is the first time the device driver 1819 * This is the first time the device driver
1818 * sees this request (possibly after 1820 * sees this request (possibly after
1819 * requeueing). Notify IO scheduler. 1821 * requeueing). Notify IO scheduler.
1820 */ 1822 */
1821 if (rq->cmd_flags & REQ_SORTED) 1823 if (rq->cmd_flags & REQ_SORTED)
1822 elv_activate_rq(q, rq); 1824 elv_activate_rq(q, rq);
1823 1825
1824 /* 1826 /*
1825 * just mark as started even if we don't start 1827 * just mark as started even if we don't start
1826 * it, a request that has been delayed should 1828 * it, a request that has been delayed should
1827 * not be passed by new incoming requests 1829 * not be passed by new incoming requests
1828 */ 1830 */
1829 rq->cmd_flags |= REQ_STARTED; 1831 rq->cmd_flags |= REQ_STARTED;
1830 trace_block_rq_issue(q, rq); 1832 trace_block_rq_issue(q, rq);
1831 } 1833 }
1832 1834
1833 if (!q->boundary_rq || q->boundary_rq == rq) { 1835 if (!q->boundary_rq || q->boundary_rq == rq) {
1834 q->end_sector = rq_end_sector(rq); 1836 q->end_sector = rq_end_sector(rq);
1835 q->boundary_rq = NULL; 1837 q->boundary_rq = NULL;
1836 } 1838 }
1837 1839
1838 if (rq->cmd_flags & REQ_DONTPREP) 1840 if (rq->cmd_flags & REQ_DONTPREP)
1839 break; 1841 break;
1840 1842
1841 if (q->dma_drain_size && blk_rq_bytes(rq)) { 1843 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1842 /* 1844 /*
1843 * make sure space for the drain appears we 1845 * make sure space for the drain appears we
1844 * know we can do this because max_hw_segments 1846 * know we can do this because max_hw_segments
1845 * has been adjusted to be one fewer than the 1847 * has been adjusted to be one fewer than the
1846 * device can handle 1848 * device can handle
1847 */ 1849 */
1848 rq->nr_phys_segments++; 1850 rq->nr_phys_segments++;
1849 } 1851 }
1850 1852
1851 if (!q->prep_rq_fn) 1853 if (!q->prep_rq_fn)
1852 break; 1854 break;
1853 1855
1854 ret = q->prep_rq_fn(q, rq); 1856 ret = q->prep_rq_fn(q, rq);
1855 if (ret == BLKPREP_OK) { 1857 if (ret == BLKPREP_OK) {
1856 break; 1858 break;
1857 } else if (ret == BLKPREP_DEFER) { 1859 } else if (ret == BLKPREP_DEFER) {
1858 /* 1860 /*
1859 * the request may have been (partially) prepped. 1861 * the request may have been (partially) prepped.
1860 * we need to keep this request in the front to 1862 * we need to keep this request in the front to
1861 * avoid resource deadlock. REQ_STARTED will 1863 * avoid resource deadlock. REQ_STARTED will
1862 * prevent other fs requests from passing this one. 1864 * prevent other fs requests from passing this one.
1863 */ 1865 */
1864 if (q->dma_drain_size && blk_rq_bytes(rq) && 1866 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1865 !(rq->cmd_flags & REQ_DONTPREP)) { 1867 !(rq->cmd_flags & REQ_DONTPREP)) {
1866 /* 1868 /*
1867 * remove the space for the drain we added 1869 * remove the space for the drain we added
1868 * so that we don't add it again 1870 * so that we don't add it again
1869 */ 1871 */
1870 --rq->nr_phys_segments; 1872 --rq->nr_phys_segments;
1871 } 1873 }
1872 1874
1873 rq = NULL; 1875 rq = NULL;
1874 break; 1876 break;
1875 } else if (ret == BLKPREP_KILL) { 1877 } else if (ret == BLKPREP_KILL) {
1876 rq->cmd_flags |= REQ_QUIET; 1878 rq->cmd_flags |= REQ_QUIET;
1877 /* 1879 /*
1878 * Mark this request as started so we don't trigger 1880 * Mark this request as started so we don't trigger
1879 * any debug logic in the end I/O path. 1881 * any debug logic in the end I/O path.
1880 */ 1882 */
1881 blk_start_request(rq); 1883 blk_start_request(rq);
1882 __blk_end_request_all(rq, -EIO); 1884 __blk_end_request_all(rq, -EIO);
1883 } else { 1885 } else {
1884 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 1886 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1885 break; 1887 break;
1886 } 1888 }
1887 } 1889 }
1888 1890
1889 return rq; 1891 return rq;
1890 } 1892 }
1891 EXPORT_SYMBOL(blk_peek_request); 1893 EXPORT_SYMBOL(blk_peek_request);
1892 1894
1893 void blk_dequeue_request(struct request *rq) 1895 void blk_dequeue_request(struct request *rq)
1894 { 1896 {
1895 struct request_queue *q = rq->q; 1897 struct request_queue *q = rq->q;
1896 1898
1897 BUG_ON(list_empty(&rq->queuelist)); 1899 BUG_ON(list_empty(&rq->queuelist));
1898 BUG_ON(ELV_ON_HASH(rq)); 1900 BUG_ON(ELV_ON_HASH(rq));
1899 1901
1900 list_del_init(&rq->queuelist); 1902 list_del_init(&rq->queuelist);
1901 1903
1902 /* 1904 /*
1903 * the time frame between a request being removed from the lists 1905 * the time frame between a request being removed from the lists
1904 * and to it is freed is accounted as io that is in progress at 1906 * and to it is freed is accounted as io that is in progress at
1905 * the driver side. 1907 * the driver side.
1906 */ 1908 */
1907 if (blk_account_rq(rq)) { 1909 if (blk_account_rq(rq)) {
1908 q->in_flight[rq_is_sync(rq)]++; 1910 q->in_flight[rq_is_sync(rq)]++;
1909 set_io_start_time_ns(rq); 1911 set_io_start_time_ns(rq);
1910 } 1912 }
1911 } 1913 }
1912 1914
1913 /** 1915 /**
1914 * blk_start_request - start request processing on the driver 1916 * blk_start_request - start request processing on the driver
1915 * @req: request to dequeue 1917 * @req: request to dequeue
1916 * 1918 *
1917 * Description: 1919 * Description:
1918 * Dequeue @req and start timeout timer on it. This hands off the 1920 * Dequeue @req and start timeout timer on it. This hands off the
1919 * request to the driver. 1921 * request to the driver.
1920 * 1922 *
1921 * Block internal functions which don't want to start timer should 1923 * Block internal functions which don't want to start timer should
1922 * call blk_dequeue_request(). 1924 * call blk_dequeue_request().
1923 * 1925 *
1924 * Context: 1926 * Context:
1925 * queue_lock must be held. 1927 * queue_lock must be held.
1926 */ 1928 */
1927 void blk_start_request(struct request *req) 1929 void blk_start_request(struct request *req)
1928 { 1930 {
1929 blk_dequeue_request(req); 1931 blk_dequeue_request(req);
1930 1932
1931 /* 1933 /*
1932 * We are now handing the request to the hardware, initialize 1934 * We are now handing the request to the hardware, initialize
1933 * resid_len to full count and add the timeout handler. 1935 * resid_len to full count and add the timeout handler.
1934 */ 1936 */
1935 req->resid_len = blk_rq_bytes(req); 1937 req->resid_len = blk_rq_bytes(req);
1936 if (unlikely(blk_bidi_rq(req))) 1938 if (unlikely(blk_bidi_rq(req)))
1937 req->next_rq->resid_len = blk_rq_bytes(req->next_rq); 1939 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
1938 1940
1939 blk_add_timer(req); 1941 blk_add_timer(req);
1940 } 1942 }
1941 EXPORT_SYMBOL(blk_start_request); 1943 EXPORT_SYMBOL(blk_start_request);
1942 1944
1943 /** 1945 /**
1944 * blk_fetch_request - fetch a request from a request queue 1946 * blk_fetch_request - fetch a request from a request queue
1945 * @q: request queue to fetch a request from 1947 * @q: request queue to fetch a request from
1946 * 1948 *
1947 * Description: 1949 * Description:
1948 * Return the request at the top of @q. The request is started on 1950 * Return the request at the top of @q. The request is started on
1949 * return and LLD can start processing it immediately. 1951 * return and LLD can start processing it immediately.
1950 * 1952 *
1951 * Return: 1953 * Return:
1952 * Pointer to the request at the top of @q if available. Null 1954 * Pointer to the request at the top of @q if available. Null
1953 * otherwise. 1955 * otherwise.
1954 * 1956 *
1955 * Context: 1957 * Context:
1956 * queue_lock must be held. 1958 * queue_lock must be held.
1957 */ 1959 */
1958 struct request *blk_fetch_request(struct request_queue *q) 1960 struct request *blk_fetch_request(struct request_queue *q)
1959 { 1961 {
1960 struct request *rq; 1962 struct request *rq;
1961 1963
1962 rq = blk_peek_request(q); 1964 rq = blk_peek_request(q);
1963 if (rq) 1965 if (rq)
1964 blk_start_request(rq); 1966 blk_start_request(rq);
1965 return rq; 1967 return rq;
1966 } 1968 }
1967 EXPORT_SYMBOL(blk_fetch_request); 1969 EXPORT_SYMBOL(blk_fetch_request);
1968 1970
1969 /** 1971 /**
1970 * blk_update_request - Special helper function for request stacking drivers 1972 * blk_update_request - Special helper function for request stacking drivers
1971 * @req: the request being processed 1973 * @req: the request being processed
1972 * @error: %0 for success, < %0 for error 1974 * @error: %0 for success, < %0 for error
1973 * @nr_bytes: number of bytes to complete @req 1975 * @nr_bytes: number of bytes to complete @req
1974 * 1976 *
1975 * Description: 1977 * Description:
1976 * Ends I/O on a number of bytes attached to @req, but doesn't complete 1978 * Ends I/O on a number of bytes attached to @req, but doesn't complete
1977 * the request structure even if @req doesn't have leftover. 1979 * the request structure even if @req doesn't have leftover.
1978 * If @req has leftover, sets it up for the next range of segments. 1980 * If @req has leftover, sets it up for the next range of segments.
1979 * 1981 *
1980 * This special helper function is only for request stacking drivers 1982 * This special helper function is only for request stacking drivers
1981 * (e.g. request-based dm) so that they can handle partial completion. 1983 * (e.g. request-based dm) so that they can handle partial completion.
1982 * Actual device drivers should use blk_end_request instead. 1984 * Actual device drivers should use blk_end_request instead.
1983 * 1985 *
1984 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees 1986 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees
1985 * %false return from this function. 1987 * %false return from this function.
1986 * 1988 *
1987 * Return: 1989 * Return:
1988 * %false - this request doesn't have any more data 1990 * %false - this request doesn't have any more data
1989 * %true - this request has more data 1991 * %true - this request has more data
1990 **/ 1992 **/
1991 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) 1993 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
1992 { 1994 {
1993 int total_bytes, bio_nbytes, next_idx = 0; 1995 int total_bytes, bio_nbytes, next_idx = 0;
1994 struct bio *bio; 1996 struct bio *bio;
1995 1997
1996 if (!req->bio) 1998 if (!req->bio)
1997 return false; 1999 return false;
1998 2000
1999 trace_block_rq_complete(req->q, req); 2001 trace_block_rq_complete(req->q, req);
2000 2002
2001 /* 2003 /*
2002 * For fs requests, rq is just carrier of independent bio's 2004 * For fs requests, rq is just carrier of independent bio's
2003 * and each partial completion should be handled separately. 2005 * and each partial completion should be handled separately.
2004 * Reset per-request error on each partial completion. 2006 * Reset per-request error on each partial completion.
2005 * 2007 *
2006 * TODO: tj: This is too subtle. It would be better to let 2008 * TODO: tj: This is too subtle. It would be better to let
2007 * low level drivers do what they see fit. 2009 * low level drivers do what they see fit.
2008 */ 2010 */
2009 if (req->cmd_type == REQ_TYPE_FS) 2011 if (req->cmd_type == REQ_TYPE_FS)
2010 req->errors = 0; 2012 req->errors = 0;
2011 2013
2012 if (error && req->cmd_type == REQ_TYPE_FS && 2014 if (error && req->cmd_type == REQ_TYPE_FS &&
2013 !(req->cmd_flags & REQ_QUIET)) { 2015 !(req->cmd_flags & REQ_QUIET)) {
2014 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", 2016 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
2015 req->rq_disk ? req->rq_disk->disk_name : "?", 2017 req->rq_disk ? req->rq_disk->disk_name : "?",
2016 (unsigned long long)blk_rq_pos(req)); 2018 (unsigned long long)blk_rq_pos(req));
2017 } 2019 }
2018 2020
2019 blk_account_io_completion(req, nr_bytes); 2021 blk_account_io_completion(req, nr_bytes);
2020 2022
2021 total_bytes = bio_nbytes = 0; 2023 total_bytes = bio_nbytes = 0;
2022 while ((bio = req->bio) != NULL) { 2024 while ((bio = req->bio) != NULL) {
2023 int nbytes; 2025 int nbytes;
2024 2026
2025 if (nr_bytes >= bio->bi_size) { 2027 if (nr_bytes >= bio->bi_size) {
2026 req->bio = bio->bi_next; 2028 req->bio = bio->bi_next;
2027 nbytes = bio->bi_size; 2029 nbytes = bio->bi_size;
2028 req_bio_endio(req, bio, nbytes, error); 2030 req_bio_endio(req, bio, nbytes, error);
2029 next_idx = 0; 2031 next_idx = 0;
2030 bio_nbytes = 0; 2032 bio_nbytes = 0;
2031 } else { 2033 } else {
2032 int idx = bio->bi_idx + next_idx; 2034 int idx = bio->bi_idx + next_idx;
2033 2035
2034 if (unlikely(idx >= bio->bi_vcnt)) { 2036 if (unlikely(idx >= bio->bi_vcnt)) {
2035 blk_dump_rq_flags(req, "__end_that"); 2037 blk_dump_rq_flags(req, "__end_that");
2036 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", 2038 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2037 __func__, idx, bio->bi_vcnt); 2039 __func__, idx, bio->bi_vcnt);
2038 break; 2040 break;
2039 } 2041 }
2040 2042
2041 nbytes = bio_iovec_idx(bio, idx)->bv_len; 2043 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2042 BIO_BUG_ON(nbytes > bio->bi_size); 2044 BIO_BUG_ON(nbytes > bio->bi_size);
2043 2045
2044 /* 2046 /*
2045 * not a complete bvec done 2047 * not a complete bvec done
2046 */ 2048 */
2047 if (unlikely(nbytes > nr_bytes)) { 2049 if (unlikely(nbytes > nr_bytes)) {
2048 bio_nbytes += nr_bytes; 2050 bio_nbytes += nr_bytes;
2049 total_bytes += nr_bytes; 2051 total_bytes += nr_bytes;
2050 break; 2052 break;
2051 } 2053 }
2052 2054
2053 /* 2055 /*
2054 * advance to the next vector 2056 * advance to the next vector
2055 */ 2057 */
2056 next_idx++; 2058 next_idx++;
2057 bio_nbytes += nbytes; 2059 bio_nbytes += nbytes;
2058 } 2060 }
2059 2061
2060 total_bytes += nbytes; 2062 total_bytes += nbytes;
2061 nr_bytes -= nbytes; 2063 nr_bytes -= nbytes;
2062 2064
2063 bio = req->bio; 2065 bio = req->bio;
2064 if (bio) { 2066 if (bio) {
2065 /* 2067 /*
2066 * end more in this run, or just return 'not-done' 2068 * end more in this run, or just return 'not-done'
2067 */ 2069 */
2068 if (unlikely(nr_bytes <= 0)) 2070 if (unlikely(nr_bytes <= 0))
2069 break; 2071 break;
2070 } 2072 }
2071 } 2073 }
2072 2074
2073 /* 2075 /*
2074 * completely done 2076 * completely done
2075 */ 2077 */
2076 if (!req->bio) { 2078 if (!req->bio) {
2077 /* 2079 /*
2078 * Reset counters so that the request stacking driver 2080 * Reset counters so that the request stacking driver
2079 * can find how many bytes remain in the request 2081 * can find how many bytes remain in the request
2080 * later. 2082 * later.
2081 */ 2083 */
2082 req->__data_len = 0; 2084 req->__data_len = 0;
2083 return false; 2085 return false;
2084 } 2086 }
2085 2087
2086 /* 2088 /*
2087 * if the request wasn't completed, update state 2089 * if the request wasn't completed, update state
2088 */ 2090 */
2089 if (bio_nbytes) { 2091 if (bio_nbytes) {
2090 req_bio_endio(req, bio, bio_nbytes, error); 2092 req_bio_endio(req, bio, bio_nbytes, error);
2091 bio->bi_idx += next_idx; 2093 bio->bi_idx += next_idx;
2092 bio_iovec(bio)->bv_offset += nr_bytes; 2094 bio_iovec(bio)->bv_offset += nr_bytes;
2093 bio_iovec(bio)->bv_len -= nr_bytes; 2095 bio_iovec(bio)->bv_len -= nr_bytes;
2094 } 2096 }
2095 2097
2096 req->__data_len -= total_bytes; 2098 req->__data_len -= total_bytes;
2097 req->buffer = bio_data(req->bio); 2099 req->buffer = bio_data(req->bio);
2098 2100
2099 /* update sector only for requests with clear definition of sector */ 2101 /* update sector only for requests with clear definition of sector */
2100 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) 2102 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2101 req->__sector += total_bytes >> 9; 2103 req->__sector += total_bytes >> 9;
2102 2104
2103 /* mixed attributes always follow the first bio */ 2105 /* mixed attributes always follow the first bio */
2104 if (req->cmd_flags & REQ_MIXED_MERGE) { 2106 if (req->cmd_flags & REQ_MIXED_MERGE) {
2105 req->cmd_flags &= ~REQ_FAILFAST_MASK; 2107 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2106 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; 2108 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2107 } 2109 }
2108 2110
2109 /* 2111 /*
2110 * If total number of sectors is less than the first segment 2112 * If total number of sectors is less than the first segment
2111 * size, something has gone terribly wrong. 2113 * size, something has gone terribly wrong.
2112 */ 2114 */
2113 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { 2115 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2114 printk(KERN_ERR "blk: request botched\n"); 2116 printk(KERN_ERR "blk: request botched\n");
2115 req->__data_len = blk_rq_cur_bytes(req); 2117 req->__data_len = blk_rq_cur_bytes(req);
2116 } 2118 }
2117 2119
2118 /* recalculate the number of segments */ 2120 /* recalculate the number of segments */
2119 blk_recalc_rq_segments(req); 2121 blk_recalc_rq_segments(req);
2120 2122
2121 return true; 2123 return true;
2122 } 2124 }
2123 EXPORT_SYMBOL_GPL(blk_update_request); 2125 EXPORT_SYMBOL_GPL(blk_update_request);
2124 2126
2125 static bool blk_update_bidi_request(struct request *rq, int error, 2127 static bool blk_update_bidi_request(struct request *rq, int error,
2126 unsigned int nr_bytes, 2128 unsigned int nr_bytes,
2127 unsigned int bidi_bytes) 2129 unsigned int bidi_bytes)
2128 { 2130 {
2129 if (blk_update_request(rq, error, nr_bytes)) 2131 if (blk_update_request(rq, error, nr_bytes))
2130 return true; 2132 return true;
2131 2133
2132 /* Bidi request must be completed as a whole */ 2134 /* Bidi request must be completed as a whole */
2133 if (unlikely(blk_bidi_rq(rq)) && 2135 if (unlikely(blk_bidi_rq(rq)) &&
2134 blk_update_request(rq->next_rq, error, bidi_bytes)) 2136 blk_update_request(rq->next_rq, error, bidi_bytes))
2135 return true; 2137 return true;
2136 2138
2137 if (blk_queue_add_random(rq->q)) 2139 if (blk_queue_add_random(rq->q))
2138 add_disk_randomness(rq->rq_disk); 2140 add_disk_randomness(rq->rq_disk);
2139 2141
2140 return false; 2142 return false;
2141 } 2143 }
2142 2144
2143 /** 2145 /**
2144 * blk_unprep_request - unprepare a request 2146 * blk_unprep_request - unprepare a request
2145 * @req: the request 2147 * @req: the request
2146 * 2148 *
2147 * This function makes a request ready for complete resubmission (or 2149 * This function makes a request ready for complete resubmission (or
2148 * completion). It happens only after all error handling is complete, 2150 * completion). It happens only after all error handling is complete,
2149 * so represents the appropriate moment to deallocate any resources 2151 * so represents the appropriate moment to deallocate any resources
2150 * that were allocated to the request in the prep_rq_fn. The queue 2152 * that were allocated to the request in the prep_rq_fn. The queue
2151 * lock is held when calling this. 2153 * lock is held when calling this.
2152 */ 2154 */
2153 void blk_unprep_request(struct request *req) 2155 void blk_unprep_request(struct request *req)
2154 { 2156 {
2155 struct request_queue *q = req->q; 2157 struct request_queue *q = req->q;
2156 2158
2157 req->cmd_flags &= ~REQ_DONTPREP; 2159 req->cmd_flags &= ~REQ_DONTPREP;
2158 if (q->unprep_rq_fn) 2160 if (q->unprep_rq_fn)
2159 q->unprep_rq_fn(q, req); 2161 q->unprep_rq_fn(q, req);
2160 } 2162 }
2161 EXPORT_SYMBOL_GPL(blk_unprep_request); 2163 EXPORT_SYMBOL_GPL(blk_unprep_request);
2162 2164
2163 /* 2165 /*
2164 * queue lock must be held 2166 * queue lock must be held
2165 */ 2167 */
2166 static void blk_finish_request(struct request *req, int error) 2168 static void blk_finish_request(struct request *req, int error)
2167 { 2169 {
2168 if (blk_rq_tagged(req)) 2170 if (blk_rq_tagged(req))
2169 blk_queue_end_tag(req->q, req); 2171 blk_queue_end_tag(req->q, req);
2170 2172
2171 BUG_ON(blk_queued_rq(req)); 2173 BUG_ON(blk_queued_rq(req));
2172 2174
2173 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) 2175 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2174 laptop_io_completion(&req->q->backing_dev_info); 2176 laptop_io_completion(&req->q->backing_dev_info);
2175 2177
2176 blk_delete_timer(req); 2178 blk_delete_timer(req);
2177 2179
2178 if (req->cmd_flags & REQ_DONTPREP) 2180 if (req->cmd_flags & REQ_DONTPREP)
2179 blk_unprep_request(req); 2181 blk_unprep_request(req);
2180 2182
2181 2183
2182 blk_account_io_done(req); 2184 blk_account_io_done(req);
2183 2185
2184 if (req->end_io) 2186 if (req->end_io)
2185 req->end_io(req, error); 2187 req->end_io(req, error);
2186 else { 2188 else {
2187 if (blk_bidi_rq(req)) 2189 if (blk_bidi_rq(req))
2188 __blk_put_request(req->next_rq->q, req->next_rq); 2190 __blk_put_request(req->next_rq->q, req->next_rq);
2189 2191
2190 __blk_put_request(req->q, req); 2192 __blk_put_request(req->q, req);
2191 } 2193 }
2192 } 2194 }
2193 2195
2194 /** 2196 /**
2195 * blk_end_bidi_request - Complete a bidi request 2197 * blk_end_bidi_request - Complete a bidi request
2196 * @rq: the request to complete 2198 * @rq: the request to complete
2197 * @error: %0 for success, < %0 for error 2199 * @error: %0 for success, < %0 for error
2198 * @nr_bytes: number of bytes to complete @rq 2200 * @nr_bytes: number of bytes to complete @rq
2199 * @bidi_bytes: number of bytes to complete @rq->next_rq 2201 * @bidi_bytes: number of bytes to complete @rq->next_rq
2200 * 2202 *
2201 * Description: 2203 * Description:
2202 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 2204 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
2203 * Drivers that supports bidi can safely call this member for any 2205 * Drivers that supports bidi can safely call this member for any
2204 * type of request, bidi or uni. In the later case @bidi_bytes is 2206 * type of request, bidi or uni. In the later case @bidi_bytes is
2205 * just ignored. 2207 * just ignored.
2206 * 2208 *
2207 * Return: 2209 * Return:
2208 * %false - we are done with this request 2210 * %false - we are done with this request
2209 * %true - still buffers pending for this request 2211 * %true - still buffers pending for this request
2210 **/ 2212 **/
2211 static bool blk_end_bidi_request(struct request *rq, int error, 2213 static bool blk_end_bidi_request(struct request *rq, int error,
2212 unsigned int nr_bytes, unsigned int bidi_bytes) 2214 unsigned int nr_bytes, unsigned int bidi_bytes)
2213 { 2215 {
2214 struct request_queue *q = rq->q; 2216 struct request_queue *q = rq->q;
2215 unsigned long flags; 2217 unsigned long flags;
2216 2218
2217 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2219 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2218 return true; 2220 return true;
2219 2221
2220 spin_lock_irqsave(q->queue_lock, flags); 2222 spin_lock_irqsave(q->queue_lock, flags);
2221 blk_finish_request(rq, error); 2223 blk_finish_request(rq, error);
2222 spin_unlock_irqrestore(q->queue_lock, flags); 2224 spin_unlock_irqrestore(q->queue_lock, flags);
2223 2225
2224 return false; 2226 return false;
2225 } 2227 }
2226 2228
2227 /** 2229 /**
2228 * __blk_end_bidi_request - Complete a bidi request with queue lock held 2230 * __blk_end_bidi_request - Complete a bidi request with queue lock held
2229 * @rq: the request to complete 2231 * @rq: the request to complete
2230 * @error: %0 for success, < %0 for error 2232 * @error: %0 for success, < %0 for error
2231 * @nr_bytes: number of bytes to complete @rq 2233 * @nr_bytes: number of bytes to complete @rq
2232 * @bidi_bytes: number of bytes to complete @rq->next_rq 2234 * @bidi_bytes: number of bytes to complete @rq->next_rq
2233 * 2235 *
2234 * Description: 2236 * Description:
2235 * Identical to blk_end_bidi_request() except that queue lock is 2237 * Identical to blk_end_bidi_request() except that queue lock is
2236 * assumed to be locked on entry and remains so on return. 2238 * assumed to be locked on entry and remains so on return.
2237 * 2239 *
2238 * Return: 2240 * Return:
2239 * %false - we are done with this request 2241 * %false - we are done with this request
2240 * %true - still buffers pending for this request 2242 * %true - still buffers pending for this request
2241 **/ 2243 **/
2242 static bool __blk_end_bidi_request(struct request *rq, int error, 2244 static bool __blk_end_bidi_request(struct request *rq, int error,
2243 unsigned int nr_bytes, unsigned int bidi_bytes) 2245 unsigned int nr_bytes, unsigned int bidi_bytes)
2244 { 2246 {
2245 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2247 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2246 return true; 2248 return true;
2247 2249
2248 blk_finish_request(rq, error); 2250 blk_finish_request(rq, error);
2249 2251
2250 return false; 2252 return false;
2251 } 2253 }
2252 2254
2253 /** 2255 /**
2254 * blk_end_request - Helper function for drivers to complete the request. 2256 * blk_end_request - Helper function for drivers to complete the request.
2255 * @rq: the request being processed 2257 * @rq: the request being processed
2256 * @error: %0 for success, < %0 for error 2258 * @error: %0 for success, < %0 for error
2257 * @nr_bytes: number of bytes to complete 2259 * @nr_bytes: number of bytes to complete
2258 * 2260 *
2259 * Description: 2261 * Description:
2260 * Ends I/O on a number of bytes attached to @rq. 2262 * Ends I/O on a number of bytes attached to @rq.
2261 * If @rq has leftover, sets it up for the next range of segments. 2263 * If @rq has leftover, sets it up for the next range of segments.
2262 * 2264 *
2263 * Return: 2265 * Return:
2264 * %false - we are done with this request 2266 * %false - we are done with this request
2265 * %true - still buffers pending for this request 2267 * %true - still buffers pending for this request
2266 **/ 2268 **/
2267 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2269 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2268 { 2270 {
2269 return blk_end_bidi_request(rq, error, nr_bytes, 0); 2271 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2270 } 2272 }
2271 EXPORT_SYMBOL(blk_end_request); 2273 EXPORT_SYMBOL(blk_end_request);
2272 2274
2273 /** 2275 /**
2274 * blk_end_request_all - Helper function for drives to finish the request. 2276 * blk_end_request_all - Helper function for drives to finish the request.
2275 * @rq: the request to finish 2277 * @rq: the request to finish
2276 * @error: %0 for success, < %0 for error 2278 * @error: %0 for success, < %0 for error
2277 * 2279 *
2278 * Description: 2280 * Description:
2279 * Completely finish @rq. 2281 * Completely finish @rq.
2280 */ 2282 */
2281 void blk_end_request_all(struct request *rq, int error) 2283 void blk_end_request_all(struct request *rq, int error)
2282 { 2284 {
2283 bool pending; 2285 bool pending;
2284 unsigned int bidi_bytes = 0; 2286 unsigned int bidi_bytes = 0;
2285 2287
2286 if (unlikely(blk_bidi_rq(rq))) 2288 if (unlikely(blk_bidi_rq(rq)))
2287 bidi_bytes = blk_rq_bytes(rq->next_rq); 2289 bidi_bytes = blk_rq_bytes(rq->next_rq);
2288 2290
2289 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2291 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2290 BUG_ON(pending); 2292 BUG_ON(pending);
2291 } 2293 }
2292 EXPORT_SYMBOL(blk_end_request_all); 2294 EXPORT_SYMBOL(blk_end_request_all);
2293 2295
2294 /** 2296 /**
2295 * blk_end_request_cur - Helper function to finish the current request chunk. 2297 * blk_end_request_cur - Helper function to finish the current request chunk.
2296 * @rq: the request to finish the current chunk for 2298 * @rq: the request to finish the current chunk for
2297 * @error: %0 for success, < %0 for error 2299 * @error: %0 for success, < %0 for error
2298 * 2300 *
2299 * Description: 2301 * Description:
2300 * Complete the current consecutively mapped chunk from @rq. 2302 * Complete the current consecutively mapped chunk from @rq.
2301 * 2303 *
2302 * Return: 2304 * Return:
2303 * %false - we are done with this request 2305 * %false - we are done with this request
2304 * %true - still buffers pending for this request 2306 * %true - still buffers pending for this request
2305 */ 2307 */
2306 bool blk_end_request_cur(struct request *rq, int error) 2308 bool blk_end_request_cur(struct request *rq, int error)
2307 { 2309 {
2308 return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2310 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2309 } 2311 }
2310 EXPORT_SYMBOL(blk_end_request_cur); 2312 EXPORT_SYMBOL(blk_end_request_cur);
2311 2313
2312 /** 2314 /**
2313 * blk_end_request_err - Finish a request till the next failure boundary. 2315 * blk_end_request_err - Finish a request till the next failure boundary.
2314 * @rq: the request to finish till the next failure boundary for 2316 * @rq: the request to finish till the next failure boundary for
2315 * @error: must be negative errno 2317 * @error: must be negative errno
2316 * 2318 *
2317 * Description: 2319 * Description:
2318 * Complete @rq till the next failure boundary. 2320 * Complete @rq till the next failure boundary.
2319 * 2321 *
2320 * Return: 2322 * Return:
2321 * %false - we are done with this request 2323 * %false - we are done with this request
2322 * %true - still buffers pending for this request 2324 * %true - still buffers pending for this request
2323 */ 2325 */
2324 bool blk_end_request_err(struct request *rq, int error) 2326 bool blk_end_request_err(struct request *rq, int error)
2325 { 2327 {
2326 WARN_ON(error >= 0); 2328 WARN_ON(error >= 0);
2327 return blk_end_request(rq, error, blk_rq_err_bytes(rq)); 2329 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2328 } 2330 }
2329 EXPORT_SYMBOL_GPL(blk_end_request_err); 2331 EXPORT_SYMBOL_GPL(blk_end_request_err);
2330 2332
2331 /** 2333 /**
2332 * __blk_end_request - Helper function for drivers to complete the request. 2334 * __blk_end_request - Helper function for drivers to complete the request.
2333 * @rq: the request being processed 2335 * @rq: the request being processed
2334 * @error: %0 for success, < %0 for error 2336 * @error: %0 for success, < %0 for error
2335 * @nr_bytes: number of bytes to complete 2337 * @nr_bytes: number of bytes to complete
2336 * 2338 *
2337 * Description: 2339 * Description:
2338 * Must be called with queue lock held unlike blk_end_request(). 2340 * Must be called with queue lock held unlike blk_end_request().
2339 * 2341 *
2340 * Return: 2342 * Return:
2341 * %false - we are done with this request 2343 * %false - we are done with this request
2342 * %true - still buffers pending for this request 2344 * %true - still buffers pending for this request
2343 **/ 2345 **/
2344 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2346 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2345 { 2347 {
2346 return __blk_end_bidi_request(rq, error, nr_bytes, 0); 2348 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2347 } 2349 }
2348 EXPORT_SYMBOL(__blk_end_request); 2350 EXPORT_SYMBOL(__blk_end_request);
2349 2351
2350 /** 2352 /**
2351 * __blk_end_request_all - Helper function for drives to finish the request. 2353 * __blk_end_request_all - Helper function for drives to finish the request.
2352 * @rq: the request to finish 2354 * @rq: the request to finish
2353 * @error: %0 for success, < %0 for error 2355 * @error: %0 for success, < %0 for error
2354 * 2356 *
2355 * Description: 2357 * Description:
2356 * Completely finish @rq. Must be called with queue lock held. 2358 * Completely finish @rq. Must be called with queue lock held.
2357 */ 2359 */
2358 void __blk_end_request_all(struct request *rq, int error) 2360 void __blk_end_request_all(struct request *rq, int error)
2359 { 2361 {
2360 bool pending; 2362 bool pending;
2361 unsigned int bidi_bytes = 0; 2363 unsigned int bidi_bytes = 0;
2362 2364
2363 if (unlikely(blk_bidi_rq(rq))) 2365 if (unlikely(blk_bidi_rq(rq)))
2364 bidi_bytes = blk_rq_bytes(rq->next_rq); 2366 bidi_bytes = blk_rq_bytes(rq->next_rq);
2365 2367
2366 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2368 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2367 BUG_ON(pending); 2369 BUG_ON(pending);
2368 } 2370 }
2369 EXPORT_SYMBOL(__blk_end_request_all); 2371 EXPORT_SYMBOL(__blk_end_request_all);
2370 2372
2371 /** 2373 /**
2372 * __blk_end_request_cur - Helper function to finish the current request chunk. 2374 * __blk_end_request_cur - Helper function to finish the current request chunk.
2373 * @rq: the request to finish the current chunk for 2375 * @rq: the request to finish the current chunk for
2374 * @error: %0 for success, < %0 for error 2376 * @error: %0 for success, < %0 for error
2375 * 2377 *
2376 * Description: 2378 * Description:
2377 * Complete the current consecutively mapped chunk from @rq. Must 2379 * Complete the current consecutively mapped chunk from @rq. Must
2378 * be called with queue lock held. 2380 * be called with queue lock held.
2379 * 2381 *
2380 * Return: 2382 * Return:
2381 * %false - we are done with this request 2383 * %false - we are done with this request
2382 * %true - still buffers pending for this request 2384 * %true - still buffers pending for this request
2383 */ 2385 */
2384 bool __blk_end_request_cur(struct request *rq, int error) 2386 bool __blk_end_request_cur(struct request *rq, int error)
2385 { 2387 {
2386 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2388 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2387 } 2389 }
2388 EXPORT_SYMBOL(__blk_end_request_cur); 2390 EXPORT_SYMBOL(__blk_end_request_cur);
2389 2391
2390 /** 2392 /**
2391 * __blk_end_request_err - Finish a request till the next failure boundary. 2393 * __blk_end_request_err - Finish a request till the next failure boundary.
2392 * @rq: the request to finish till the next failure boundary for 2394 * @rq: the request to finish till the next failure boundary for
2393 * @error: must be negative errno 2395 * @error: must be negative errno
2394 * 2396 *
2395 * Description: 2397 * Description:
2396 * Complete @rq till the next failure boundary. Must be called 2398 * Complete @rq till the next failure boundary. Must be called
2397 * with queue lock held. 2399 * with queue lock held.
2398 * 2400 *
2399 * Return: 2401 * Return:
2400 * %false - we are done with this request 2402 * %false - we are done with this request
2401 * %true - still buffers pending for this request 2403 * %true - still buffers pending for this request
2402 */ 2404 */
2403 bool __blk_end_request_err(struct request *rq, int error) 2405 bool __blk_end_request_err(struct request *rq, int error)
2404 { 2406 {
2405 WARN_ON(error >= 0); 2407 WARN_ON(error >= 0);
2406 return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); 2408 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2407 } 2409 }
2408 EXPORT_SYMBOL_GPL(__blk_end_request_err); 2410 EXPORT_SYMBOL_GPL(__blk_end_request_err);
2409 2411
2410 void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2412 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2411 struct bio *bio) 2413 struct bio *bio)
2412 { 2414 {
2413 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ 2415 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
2414 rq->cmd_flags |= bio->bi_rw & REQ_WRITE; 2416 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2415 2417
2416 if (bio_has_data(bio)) { 2418 if (bio_has_data(bio)) {
2417 rq->nr_phys_segments = bio_phys_segments(q, bio); 2419 rq->nr_phys_segments = bio_phys_segments(q, bio);
2418 rq->buffer = bio_data(bio); 2420 rq->buffer = bio_data(bio);
2419 } 2421 }
2420 rq->__data_len = bio->bi_size; 2422 rq->__data_len = bio->bi_size;
2421 rq->bio = rq->biotail = bio; 2423 rq->bio = rq->biotail = bio;
2422 2424
2423 if (bio->bi_bdev) 2425 if (bio->bi_bdev)
2424 rq->rq_disk = bio->bi_bdev->bd_disk; 2426 rq->rq_disk = bio->bi_bdev->bd_disk;
2425 } 2427 }
2426 2428
2427 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 2429 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2428 /** 2430 /**
2429 * rq_flush_dcache_pages - Helper function to flush all pages in a request 2431 * rq_flush_dcache_pages - Helper function to flush all pages in a request
2430 * @rq: the request to be flushed 2432 * @rq: the request to be flushed
2431 * 2433 *
2432 * Description: 2434 * Description:
2433 * Flush all pages in @rq. 2435 * Flush all pages in @rq.
2434 */ 2436 */
2435 void rq_flush_dcache_pages(struct request *rq) 2437 void rq_flush_dcache_pages(struct request *rq)
2436 { 2438 {
2437 struct req_iterator iter; 2439 struct req_iterator iter;
2438 struct bio_vec *bvec; 2440 struct bio_vec *bvec;
2439 2441
2440 rq_for_each_segment(bvec, rq, iter) 2442 rq_for_each_segment(bvec, rq, iter)
2441 flush_dcache_page(bvec->bv_page); 2443 flush_dcache_page(bvec->bv_page);
2442 } 2444 }
2443 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); 2445 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2444 #endif 2446 #endif
2445 2447
2446 /** 2448 /**
2447 * blk_lld_busy - Check if underlying low-level drivers of a device are busy 2449 * blk_lld_busy - Check if underlying low-level drivers of a device are busy
2448 * @q : the queue of the device being checked 2450 * @q : the queue of the device being checked
2449 * 2451 *
2450 * Description: 2452 * Description:
2451 * Check if underlying low-level drivers of a device are busy. 2453 * Check if underlying low-level drivers of a device are busy.
2452 * If the drivers want to export their busy state, they must set own 2454 * If the drivers want to export their busy state, they must set own
2453 * exporting function using blk_queue_lld_busy() first. 2455 * exporting function using blk_queue_lld_busy() first.
2454 * 2456 *
2455 * Basically, this function is used only by request stacking drivers 2457 * Basically, this function is used only by request stacking drivers
2456 * to stop dispatching requests to underlying devices when underlying 2458 * to stop dispatching requests to underlying devices when underlying
2457 * devices are busy. This behavior helps more I/O merging on the queue 2459 * devices are busy. This behavior helps more I/O merging on the queue
2458 * of the request stacking driver and prevents I/O throughput regression 2460 * of the request stacking driver and prevents I/O throughput regression
2459 * on burst I/O load. 2461 * on burst I/O load.
2460 * 2462 *
2461 * Return: 2463 * Return:
2462 * 0 - Not busy (The request stacking driver should dispatch request) 2464 * 0 - Not busy (The request stacking driver should dispatch request)
2463 * 1 - Busy (The request stacking driver should stop dispatching request) 2465 * 1 - Busy (The request stacking driver should stop dispatching request)
2464 */ 2466 */
2465 int blk_lld_busy(struct request_queue *q) 2467 int blk_lld_busy(struct request_queue *q)
2466 { 2468 {
2467 if (q->lld_busy_fn) 2469 if (q->lld_busy_fn)
2468 return q->lld_busy_fn(q); 2470 return q->lld_busy_fn(q);
2469 2471
2470 return 0; 2472 return 0;
2471 } 2473 }
2472 EXPORT_SYMBOL_GPL(blk_lld_busy); 2474 EXPORT_SYMBOL_GPL(blk_lld_busy);
2473 2475
2474 /** 2476 /**
2475 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request 2477 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
2476 * @rq: the clone request to be cleaned up 2478 * @rq: the clone request to be cleaned up
2477 * 2479 *
2478 * Description: 2480 * Description:
2479 * Free all bios in @rq for a cloned request. 2481 * Free all bios in @rq for a cloned request.
2480 */ 2482 */
2481 void blk_rq_unprep_clone(struct request *rq) 2483 void blk_rq_unprep_clone(struct request *rq)
2482 { 2484 {
2483 struct bio *bio; 2485 struct bio *bio;
2484 2486
2485 while ((bio = rq->bio) != NULL) { 2487 while ((bio = rq->bio) != NULL) {
2486 rq->bio = bio->bi_next; 2488 rq->bio = bio->bi_next;
2487 2489
2488 bio_put(bio); 2490 bio_put(bio);
2489 } 2491 }
2490 } 2492 }
2491 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); 2493 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2492 2494
2493 /* 2495 /*
2494 * Copy attributes of the original request to the clone request. 2496 * Copy attributes of the original request to the clone request.
2495 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. 2497 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
2496 */ 2498 */
2497 static void __blk_rq_prep_clone(struct request *dst, struct request *src) 2499 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2498 { 2500 {
2499 dst->cpu = src->cpu; 2501 dst->cpu = src->cpu;
2500 dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); 2502 dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
2501 if (src->cmd_flags & REQ_DISCARD) 2503 if (src->cmd_flags & REQ_DISCARD)
2502 dst->cmd_flags |= REQ_DISCARD; 2504 dst->cmd_flags |= REQ_DISCARD;
2503 dst->cmd_type = src->cmd_type; 2505 dst->cmd_type = src->cmd_type;
2504 dst->__sector = blk_rq_pos(src); 2506 dst->__sector = blk_rq_pos(src);
2505 dst->__data_len = blk_rq_bytes(src); 2507 dst->__data_len = blk_rq_bytes(src);
2506 dst->nr_phys_segments = src->nr_phys_segments; 2508 dst->nr_phys_segments = src->nr_phys_segments;
2507 dst->ioprio = src->ioprio; 2509 dst->ioprio = src->ioprio;
2508 dst->extra_len = src->extra_len; 2510 dst->extra_len = src->extra_len;
2509 } 2511 }
2510 2512
2511 /** 2513 /**
2512 * blk_rq_prep_clone - Helper function to setup clone request 2514 * blk_rq_prep_clone - Helper function to setup clone request
2513 * @rq: the request to be setup 2515 * @rq: the request to be setup
2514 * @rq_src: original request to be cloned 2516 * @rq_src: original request to be cloned
2515 * @bs: bio_set that bios for clone are allocated from 2517 * @bs: bio_set that bios for clone are allocated from
2516 * @gfp_mask: memory allocation mask for bio 2518 * @gfp_mask: memory allocation mask for bio
2517 * @bio_ctr: setup function to be called for each clone bio. 2519 * @bio_ctr: setup function to be called for each clone bio.
2518 * Returns %0 for success, non %0 for failure. 2520 * Returns %0 for success, non %0 for failure.
2519 * @data: private data to be passed to @bio_ctr 2521 * @data: private data to be passed to @bio_ctr
2520 * 2522 *
2521 * Description: 2523 * Description:
2522 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. 2524 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
2523 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) 2525 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
2524 * are not copied, and copying such parts is the caller's responsibility. 2526 * are not copied, and copying such parts is the caller's responsibility.
2525 * Also, pages which the original bios are pointing to are not copied 2527 * Also, pages which the original bios are pointing to are not copied
2526 * and the cloned bios just point same pages. 2528 * and the cloned bios just point same pages.
2527 * So cloned bios must be completed before original bios, which means 2529 * So cloned bios must be completed before original bios, which means
2528 * the caller must complete @rq before @rq_src. 2530 * the caller must complete @rq before @rq_src.
2529 */ 2531 */
2530 int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 2532 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2531 struct bio_set *bs, gfp_t gfp_mask, 2533 struct bio_set *bs, gfp_t gfp_mask,
2532 int (*bio_ctr)(struct bio *, struct bio *, void *), 2534 int (*bio_ctr)(struct bio *, struct bio *, void *),
2533 void *data) 2535 void *data)
2534 { 2536 {
2535 struct bio *bio, *bio_src; 2537 struct bio *bio, *bio_src;
2536 2538
2537 if (!bs) 2539 if (!bs)
2538 bs = fs_bio_set; 2540 bs = fs_bio_set;
2539 2541
2540 blk_rq_init(NULL, rq); 2542 blk_rq_init(NULL, rq);
2541 2543
2542 __rq_for_each_bio(bio_src, rq_src) { 2544 __rq_for_each_bio(bio_src, rq_src) {
2543 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); 2545 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2544 if (!bio) 2546 if (!bio)
2545 goto free_and_out; 2547 goto free_and_out;
2546 2548
2547 __bio_clone(bio, bio_src); 2549 __bio_clone(bio, bio_src);
2548 2550
2549 if (bio_integrity(bio_src) && 2551 if (bio_integrity(bio_src) &&
2550 bio_integrity_clone(bio, bio_src, gfp_mask, bs)) 2552 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2551 goto free_and_out; 2553 goto free_and_out;
2552 2554
2553 if (bio_ctr && bio_ctr(bio, bio_src, data)) 2555 if (bio_ctr && bio_ctr(bio, bio_src, data))
2554 goto free_and_out; 2556 goto free_and_out;
2555 2557
2556 if (rq->bio) { 2558 if (rq->bio) {
2557 rq->biotail->bi_next = bio; 2559 rq->biotail->bi_next = bio;
2558 rq->biotail = bio; 2560 rq->biotail = bio;
2559 } else 2561 } else
2560 rq->bio = rq->biotail = bio; 2562 rq->bio = rq->biotail = bio;
2561 } 2563 }
2562 2564
2563 __blk_rq_prep_clone(rq, rq_src); 2565 __blk_rq_prep_clone(rq, rq_src);
2564 2566
2565 return 0; 2567 return 0;
2566 2568
2567 free_and_out: 2569 free_and_out:
2568 if (bio) 2570 if (bio)
2569 bio_free(bio, bs); 2571 bio_free(bio, bs);
2570 blk_rq_unprep_clone(rq); 2572 blk_rq_unprep_clone(rq);
2571 2573
2572 return -ENOMEM; 2574 return -ENOMEM;
2573 } 2575 }
2574 EXPORT_SYMBOL_GPL(blk_rq_prep_clone); 2576 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2575 2577
2576 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) 2578 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2577 { 2579 {
2578 return queue_work(kblockd_workqueue, work); 2580 return queue_work(kblockd_workqueue, work);
2579 } 2581 }
2580 EXPORT_SYMBOL(kblockd_schedule_work); 2582 EXPORT_SYMBOL(kblockd_schedule_work);
2581 2583
2582 int __init blk_dev_init(void) 2584 int __init blk_dev_init(void)
2583 { 2585 {
2584 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2586 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2585 sizeof(((struct request *)0)->cmd_flags)); 2587 sizeof(((struct request *)0)->cmd_flags));
2586 2588
2587 kblockd_workqueue = create_workqueue("kblockd"); 2589 kblockd_workqueue = create_workqueue("kblockd");
2588 if (!kblockd_workqueue) 2590 if (!kblockd_workqueue)
2589 panic("Failed to create kblockd\n"); 2591 panic("Failed to create kblockd\n");
2590 2592
2591 request_cachep = kmem_cache_create("blkdev_requests", 2593 request_cachep = kmem_cache_create("blkdev_requests",
2592 sizeof(struct request), 0, SLAB_PANIC, NULL); 2594 sizeof(struct request), 0, SLAB_PANIC, NULL);
2593 2595
2594 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2596 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2595 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2597 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2596 2598
2597 return 0; 2599 return 0;
2598 } 2600 }
2599 2601
block/blk-settings.c
1 /* 1 /*
2 * Functions related to setting various queue properties from drivers 2 * Functions related to setting various queue properties from drivers
3 */ 3 */
4 #include <linux/kernel.h> 4 #include <linux/kernel.h>
5 #include <linux/module.h> 5 #include <linux/module.h>
6 #include <linux/init.h> 6 #include <linux/init.h>
7 #include <linux/bio.h> 7 #include <linux/bio.h>
8 #include <linux/blkdev.h> 8 #include <linux/blkdev.h>
9 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 9 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
10 #include <linux/gcd.h> 10 #include <linux/gcd.h>
11 #include <linux/lcm.h> 11 #include <linux/lcm.h>
12 #include <linux/jiffies.h> 12 #include <linux/jiffies.h>
13 #include <linux/gfp.h> 13 #include <linux/gfp.h>
14 14
15 #include "blk.h" 15 #include "blk.h"
16 16
17 unsigned long blk_max_low_pfn; 17 unsigned long blk_max_low_pfn;
18 EXPORT_SYMBOL(blk_max_low_pfn); 18 EXPORT_SYMBOL(blk_max_low_pfn);
19 19
20 unsigned long blk_max_pfn; 20 unsigned long blk_max_pfn;
21 21
22 /** 22 /**
23 * blk_queue_prep_rq - set a prepare_request function for queue 23 * blk_queue_prep_rq - set a prepare_request function for queue
24 * @q: queue 24 * @q: queue
25 * @pfn: prepare_request function 25 * @pfn: prepare_request function
26 * 26 *
27 * It's possible for a queue to register a prepare_request callback which 27 * It's possible for a queue to register a prepare_request callback which
28 * is invoked before the request is handed to the request_fn. The goal of 28 * is invoked before the request is handed to the request_fn. The goal of
29 * the function is to prepare a request for I/O, it can be used to build a 29 * the function is to prepare a request for I/O, it can be used to build a
30 * cdb from the request data for instance. 30 * cdb from the request data for instance.
31 * 31 *
32 */ 32 */
33 void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) 33 void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
34 { 34 {
35 q->prep_rq_fn = pfn; 35 q->prep_rq_fn = pfn;
36 } 36 }
37 EXPORT_SYMBOL(blk_queue_prep_rq); 37 EXPORT_SYMBOL(blk_queue_prep_rq);
38 38
39 /** 39 /**
40 * blk_queue_unprep_rq - set an unprepare_request function for queue 40 * blk_queue_unprep_rq - set an unprepare_request function for queue
41 * @q: queue 41 * @q: queue
42 * @ufn: unprepare_request function 42 * @ufn: unprepare_request function
43 * 43 *
44 * It's possible for a queue to register an unprepare_request callback 44 * It's possible for a queue to register an unprepare_request callback
45 * which is invoked before the request is finally completed. The goal 45 * which is invoked before the request is finally completed. The goal
46 * of the function is to deallocate any data that was allocated in the 46 * of the function is to deallocate any data that was allocated in the
47 * prepare_request callback. 47 * prepare_request callback.
48 * 48 *
49 */ 49 */
50 void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn) 50 void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
51 { 51 {
52 q->unprep_rq_fn = ufn; 52 q->unprep_rq_fn = ufn;
53 } 53 }
54 EXPORT_SYMBOL(blk_queue_unprep_rq); 54 EXPORT_SYMBOL(blk_queue_unprep_rq);
55 55
56 /** 56 /**
57 * blk_queue_merge_bvec - set a merge_bvec function for queue 57 * blk_queue_merge_bvec - set a merge_bvec function for queue
58 * @q: queue 58 * @q: queue
59 * @mbfn: merge_bvec_fn 59 * @mbfn: merge_bvec_fn
60 * 60 *
61 * Usually queues have static limitations on the max sectors or segments that 61 * Usually queues have static limitations on the max sectors or segments that
62 * we can put in a request. Stacking drivers may have some settings that 62 * we can put in a request. Stacking drivers may have some settings that
63 * are dynamic, and thus we have to query the queue whether it is ok to 63 * are dynamic, and thus we have to query the queue whether it is ok to
64 * add a new bio_vec to a bio at a given offset or not. If the block device 64 * add a new bio_vec to a bio at a given offset or not. If the block device
65 * has such limitations, it needs to register a merge_bvec_fn to control 65 * has such limitations, it needs to register a merge_bvec_fn to control
66 * the size of bio's sent to it. Note that a block device *must* allow a 66 * the size of bio's sent to it. Note that a block device *must* allow a
67 * single page to be added to an empty bio. The block device driver may want 67 * single page to be added to an empty bio. The block device driver may want
68 * to use the bio_split() function to deal with these bio's. By default 68 * to use the bio_split() function to deal with these bio's. By default
69 * no merge_bvec_fn is defined for a queue, and only the fixed limits are 69 * no merge_bvec_fn is defined for a queue, and only the fixed limits are
70 * honored. 70 * honored.
71 */ 71 */
72 void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) 72 void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
73 { 73 {
74 q->merge_bvec_fn = mbfn; 74 q->merge_bvec_fn = mbfn;
75 } 75 }
76 EXPORT_SYMBOL(blk_queue_merge_bvec); 76 EXPORT_SYMBOL(blk_queue_merge_bvec);
77 77
78 void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) 78 void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
79 { 79 {
80 q->softirq_done_fn = fn; 80 q->softirq_done_fn = fn;
81 } 81 }
82 EXPORT_SYMBOL(blk_queue_softirq_done); 82 EXPORT_SYMBOL(blk_queue_softirq_done);
83 83
84 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) 84 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
85 { 85 {
86 q->rq_timeout = timeout; 86 q->rq_timeout = timeout;
87 } 87 }
88 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); 88 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
89 89
90 void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) 90 void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
91 { 91 {
92 q->rq_timed_out_fn = fn; 92 q->rq_timed_out_fn = fn;
93 } 93 }
94 EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); 94 EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
95 95
96 void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) 96 void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
97 { 97 {
98 q->lld_busy_fn = fn; 98 q->lld_busy_fn = fn;
99 } 99 }
100 EXPORT_SYMBOL_GPL(blk_queue_lld_busy); 100 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
101 101
102 /** 102 /**
103 * blk_set_default_limits - reset limits to default values 103 * blk_set_default_limits - reset limits to default values
104 * @lim: the queue_limits structure to reset 104 * @lim: the queue_limits structure to reset
105 * 105 *
106 * Description: 106 * Description:
107 * Returns a queue_limit struct to its default state. Can be used by 107 * Returns a queue_limit struct to its default state. Can be used by
108 * stacking drivers like DM that stage table swaps and reuse an 108 * stacking drivers like DM that stage table swaps and reuse an
109 * existing device queue. 109 * existing device queue.
110 */ 110 */
111 void blk_set_default_limits(struct queue_limits *lim) 111 void blk_set_default_limits(struct queue_limits *lim)
112 { 112 {
113 lim->max_segments = BLK_MAX_SEGMENTS; 113 lim->max_segments = BLK_MAX_SEGMENTS;
114 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 114 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
115 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; 115 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
116 lim->max_sectors = BLK_DEF_MAX_SECTORS; 116 lim->max_sectors = BLK_DEF_MAX_SECTORS;
117 lim->max_hw_sectors = INT_MAX; 117 lim->max_hw_sectors = INT_MAX;
118 lim->max_discard_sectors = 0; 118 lim->max_discard_sectors = 0;
119 lim->discard_granularity = 0; 119 lim->discard_granularity = 0;
120 lim->discard_alignment = 0; 120 lim->discard_alignment = 0;
121 lim->discard_misaligned = 0; 121 lim->discard_misaligned = 0;
122 lim->discard_zeroes_data = -1; 122 lim->discard_zeroes_data = -1;
123 lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; 123 lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
124 lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); 124 lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
125 lim->alignment_offset = 0; 125 lim->alignment_offset = 0;
126 lim->io_opt = 0; 126 lim->io_opt = 0;
127 lim->misaligned = 0; 127 lim->misaligned = 0;
128 lim->no_cluster = 0; 128 lim->no_cluster = 0;
129 } 129 }
130 EXPORT_SYMBOL(blk_set_default_limits); 130 EXPORT_SYMBOL(blk_set_default_limits);
131 131
132 /** 132 /**
133 * blk_queue_make_request - define an alternate make_request function for a device 133 * blk_queue_make_request - define an alternate make_request function for a device
134 * @q: the request queue for the device to be affected 134 * @q: the request queue for the device to be affected
135 * @mfn: the alternate make_request function 135 * @mfn: the alternate make_request function
136 * 136 *
137 * Description: 137 * Description:
138 * The normal way for &struct bios to be passed to a device 138 * The normal way for &struct bios to be passed to a device
139 * driver is for them to be collected into requests on a request 139 * driver is for them to be collected into requests on a request
140 * queue, and then to allow the device driver to select requests 140 * queue, and then to allow the device driver to select requests
141 * off that queue when it is ready. This works well for many block 141 * off that queue when it is ready. This works well for many block
142 * devices. However some block devices (typically virtual devices 142 * devices. However some block devices (typically virtual devices
143 * such as md or lvm) do not benefit from the processing on the 143 * such as md or lvm) do not benefit from the processing on the
144 * request queue, and are served best by having the requests passed 144 * request queue, and are served best by having the requests passed
145 * directly to them. This can be achieved by providing a function 145 * directly to them. This can be achieved by providing a function
146 * to blk_queue_make_request(). 146 * to blk_queue_make_request().
147 * 147 *
148 * Caveat: 148 * Caveat:
149 * The driver that does this *must* be able to deal appropriately 149 * The driver that does this *must* be able to deal appropriately
150 * with buffers in "highmemory". This can be accomplished by either calling 150 * with buffers in "highmemory". This can be accomplished by either calling
151 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling 151 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
152 * blk_queue_bounce() to create a buffer in normal memory. 152 * blk_queue_bounce() to create a buffer in normal memory.
153 **/ 153 **/
154 void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) 154 void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
155 { 155 {
156 /* 156 /*
157 * set defaults 157 * set defaults
158 */ 158 */
159 q->nr_requests = BLKDEV_MAX_RQ; 159 q->nr_requests = BLKDEV_MAX_RQ;
160 160
161 q->make_request_fn = mfn; 161 q->make_request_fn = mfn;
162 blk_queue_dma_alignment(q, 511); 162 blk_queue_dma_alignment(q, 511);
163 blk_queue_congestion_threshold(q); 163 blk_queue_congestion_threshold(q);
164 q->nr_batching = BLK_BATCH_REQ; 164 q->nr_batching = BLK_BATCH_REQ;
165 165
166 q->unplug_thresh = 4; /* hmm */ 166 q->unplug_thresh = 4; /* hmm */
167 q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */ 167 q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */
168 if (q->unplug_delay == 0) 168 if (q->unplug_delay == 0)
169 q->unplug_delay = 1; 169 q->unplug_delay = 1;
170 170
171 q->unplug_timer.function = blk_unplug_timeout; 171 q->unplug_timer.function = blk_unplug_timeout;
172 q->unplug_timer.data = (unsigned long)q; 172 q->unplug_timer.data = (unsigned long)q;
173 173
174 blk_set_default_limits(&q->limits); 174 blk_set_default_limits(&q->limits);
175 blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); 175 blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
176 176
177 /* 177 /*
178 * If the caller didn't supply a lock, fall back to our embedded 178 * If the caller didn't supply a lock, fall back to our embedded
179 * per-queue locks 179 * per-queue locks
180 */ 180 */
181 if (!q->queue_lock) 181 if (!q->queue_lock)
182 q->queue_lock = &q->__queue_lock; 182 q->queue_lock = &q->__queue_lock;
183 183
184 /* 184 /*
185 * by default assume old behaviour and bounce for any highmem page 185 * by default assume old behaviour and bounce for any highmem page
186 */ 186 */
187 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 187 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
188 } 188 }
189 EXPORT_SYMBOL(blk_queue_make_request); 189 EXPORT_SYMBOL(blk_queue_make_request);
190 190
191 /** 191 /**
192 * blk_queue_bounce_limit - set bounce buffer limit for queue 192 * blk_queue_bounce_limit - set bounce buffer limit for queue
193 * @q: the request queue for the device 193 * @q: the request queue for the device
194 * @dma_mask: the maximum address the device can handle 194 * @dma_mask: the maximum address the device can handle
195 * 195 *
196 * Description: 196 * Description:
197 * Different hardware can have different requirements as to what pages 197 * Different hardware can have different requirements as to what pages
198 * it can do I/O directly to. A low level driver can call 198 * it can do I/O directly to. A low level driver can call
199 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 199 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
200 * buffers for doing I/O to pages residing above @dma_mask. 200 * buffers for doing I/O to pages residing above @dma_mask.
201 **/ 201 **/
202 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) 202 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
203 { 203 {
204 unsigned long b_pfn = dma_mask >> PAGE_SHIFT; 204 unsigned long b_pfn = dma_mask >> PAGE_SHIFT;
205 int dma = 0; 205 int dma = 0;
206 206
207 q->bounce_gfp = GFP_NOIO; 207 q->bounce_gfp = GFP_NOIO;
208 #if BITS_PER_LONG == 64 208 #if BITS_PER_LONG == 64
209 /* 209 /*
210 * Assume anything <= 4GB can be handled by IOMMU. Actually 210 * Assume anything <= 4GB can be handled by IOMMU. Actually
211 * some IOMMUs can handle everything, but I don't know of a 211 * some IOMMUs can handle everything, but I don't know of a
212 * way to test this here. 212 * way to test this here.
213 */ 213 */
214 if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 214 if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
215 dma = 1; 215 dma = 1;
216 q->limits.bounce_pfn = max_low_pfn; 216 q->limits.bounce_pfn = max_low_pfn;
217 #else 217 #else
218 if (b_pfn < blk_max_low_pfn) 218 if (b_pfn < blk_max_low_pfn)
219 dma = 1; 219 dma = 1;
220 q->limits.bounce_pfn = b_pfn; 220 q->limits.bounce_pfn = b_pfn;
221 #endif 221 #endif
222 if (dma) { 222 if (dma) {
223 init_emergency_isa_pool(); 223 init_emergency_isa_pool();
224 q->bounce_gfp = GFP_NOIO | GFP_DMA; 224 q->bounce_gfp = GFP_NOIO | GFP_DMA;
225 q->limits.bounce_pfn = b_pfn; 225 q->limits.bounce_pfn = b_pfn;
226 } 226 }
227 } 227 }
228 EXPORT_SYMBOL(blk_queue_bounce_limit); 228 EXPORT_SYMBOL(blk_queue_bounce_limit);
229 229
230 /** 230 /**
231 * blk_queue_max_hw_sectors - set max sectors for a request for this queue 231 * blk_queue_max_hw_sectors - set max sectors for a request for this queue
232 * @q: the request queue for the device 232 * @q: the request queue for the device
233 * @max_hw_sectors: max hardware sectors in the usual 512b unit 233 * @max_hw_sectors: max hardware sectors in the usual 512b unit
234 * 234 *
235 * Description: 235 * Description:
236 * Enables a low level driver to set a hard upper limit, 236 * Enables a low level driver to set a hard upper limit,
237 * max_hw_sectors, on the size of requests. max_hw_sectors is set by 237 * max_hw_sectors, on the size of requests. max_hw_sectors is set by
238 * the device driver based upon the combined capabilities of I/O 238 * the device driver based upon the combined capabilities of I/O
239 * controller and storage device. 239 * controller and storage device.
240 * 240 *
241 * max_sectors is a soft limit imposed by the block layer for 241 * max_sectors is a soft limit imposed by the block layer for
242 * filesystem type requests. This value can be overridden on a 242 * filesystem type requests. This value can be overridden on a
243 * per-device basis in /sys/block/<device>/queue/max_sectors_kb. 243 * per-device basis in /sys/block/<device>/queue/max_sectors_kb.
244 * The soft limit can not exceed max_hw_sectors. 244 * The soft limit can not exceed max_hw_sectors.
245 **/ 245 **/
246 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) 246 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
247 { 247 {
248 if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { 248 if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
249 max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); 249 max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
250 printk(KERN_INFO "%s: set to minimum %d\n", 250 printk(KERN_INFO "%s: set to minimum %d\n",
251 __func__, max_hw_sectors); 251 __func__, max_hw_sectors);
252 } 252 }
253 253
254 q->limits.max_hw_sectors = max_hw_sectors; 254 q->limits.max_hw_sectors = max_hw_sectors;
255 q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, 255 q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
256 BLK_DEF_MAX_SECTORS); 256 BLK_DEF_MAX_SECTORS);
257 } 257 }
258 EXPORT_SYMBOL(blk_queue_max_hw_sectors); 258 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
259 259
260 /** 260 /**
261 * blk_queue_max_discard_sectors - set max sectors for a single discard 261 * blk_queue_max_discard_sectors - set max sectors for a single discard
262 * @q: the request queue for the device 262 * @q: the request queue for the device
263 * @max_discard_sectors: maximum number of sectors to discard 263 * @max_discard_sectors: maximum number of sectors to discard
264 **/ 264 **/
265 void blk_queue_max_discard_sectors(struct request_queue *q, 265 void blk_queue_max_discard_sectors(struct request_queue *q,
266 unsigned int max_discard_sectors) 266 unsigned int max_discard_sectors)
267 { 267 {
268 q->limits.max_discard_sectors = max_discard_sectors; 268 q->limits.max_discard_sectors = max_discard_sectors;
269 } 269 }
270 EXPORT_SYMBOL(blk_queue_max_discard_sectors); 270 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
271 271
272 /** 272 /**
273 * blk_queue_max_segments - set max hw segments for a request for this queue 273 * blk_queue_max_segments - set max hw segments for a request for this queue
274 * @q: the request queue for the device 274 * @q: the request queue for the device
275 * @max_segments: max number of segments 275 * @max_segments: max number of segments
276 * 276 *
277 * Description: 277 * Description:
278 * Enables a low level driver to set an upper limit on the number of 278 * Enables a low level driver to set an upper limit on the number of
279 * hw data segments in a request. 279 * hw data segments in a request.
280 **/ 280 **/
281 void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) 281 void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
282 { 282 {
283 if (!max_segments) { 283 if (!max_segments) {
284 max_segments = 1; 284 max_segments = 1;
285 printk(KERN_INFO "%s: set to minimum %d\n", 285 printk(KERN_INFO "%s: set to minimum %d\n",
286 __func__, max_segments); 286 __func__, max_segments);
287 } 287 }
288 288
289 q->limits.max_segments = max_segments; 289 q->limits.max_segments = max_segments;
290 } 290 }
291 EXPORT_SYMBOL(blk_queue_max_segments); 291 EXPORT_SYMBOL(blk_queue_max_segments);
292 292
293 /** 293 /**
294 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 294 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
295 * @q: the request queue for the device 295 * @q: the request queue for the device
296 * @max_size: max size of segment in bytes 296 * @max_size: max size of segment in bytes
297 * 297 *
298 * Description: 298 * Description:
299 * Enables a low level driver to set an upper limit on the size of a 299 * Enables a low level driver to set an upper limit on the size of a
300 * coalesced segment 300 * coalesced segment
301 **/ 301 **/
302 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) 302 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
303 { 303 {
304 if (max_size < PAGE_CACHE_SIZE) { 304 if (max_size < PAGE_CACHE_SIZE) {
305 max_size = PAGE_CACHE_SIZE; 305 max_size = PAGE_CACHE_SIZE;
306 printk(KERN_INFO "%s: set to minimum %d\n", 306 printk(KERN_INFO "%s: set to minimum %d\n",
307 __func__, max_size); 307 __func__, max_size);
308 } 308 }
309 309
310 q->limits.max_segment_size = max_size; 310 q->limits.max_segment_size = max_size;
311 } 311 }
312 EXPORT_SYMBOL(blk_queue_max_segment_size); 312 EXPORT_SYMBOL(blk_queue_max_segment_size);
313 313
314 /** 314 /**
315 * blk_queue_logical_block_size - set logical block size for the queue 315 * blk_queue_logical_block_size - set logical block size for the queue
316 * @q: the request queue for the device 316 * @q: the request queue for the device
317 * @size: the logical block size, in bytes 317 * @size: the logical block size, in bytes
318 * 318 *
319 * Description: 319 * Description:
320 * This should be set to the lowest possible block size that the 320 * This should be set to the lowest possible block size that the
321 * storage device can address. The default of 512 covers most 321 * storage device can address. The default of 512 covers most
322 * hardware. 322 * hardware.
323 **/ 323 **/
324 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) 324 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
325 { 325 {
326 q->limits.logical_block_size = size; 326 q->limits.logical_block_size = size;
327 327
328 if (q->limits.physical_block_size < size) 328 if (q->limits.physical_block_size < size)
329 q->limits.physical_block_size = size; 329 q->limits.physical_block_size = size;
330 330
331 if (q->limits.io_min < q->limits.physical_block_size) 331 if (q->limits.io_min < q->limits.physical_block_size)
332 q->limits.io_min = q->limits.physical_block_size; 332 q->limits.io_min = q->limits.physical_block_size;
333 } 333 }
334 EXPORT_SYMBOL(blk_queue_logical_block_size); 334 EXPORT_SYMBOL(blk_queue_logical_block_size);
335 335
336 /** 336 /**
337 * blk_queue_physical_block_size - set physical block size for the queue 337 * blk_queue_physical_block_size - set physical block size for the queue
338 * @q: the request queue for the device 338 * @q: the request queue for the device
339 * @size: the physical block size, in bytes 339 * @size: the physical block size, in bytes
340 * 340 *
341 * Description: 341 * Description:
342 * This should be set to the lowest possible sector size that the 342 * This should be set to the lowest possible sector size that the
343 * hardware can operate on without reverting to read-modify-write 343 * hardware can operate on without reverting to read-modify-write
344 * operations. 344 * operations.
345 */ 345 */
346 void blk_queue_physical_block_size(struct request_queue *q, unsigned short size) 346 void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
347 { 347 {
348 q->limits.physical_block_size = size; 348 q->limits.physical_block_size = size;
349 349
350 if (q->limits.physical_block_size < q->limits.logical_block_size) 350 if (q->limits.physical_block_size < q->limits.logical_block_size)
351 q->limits.physical_block_size = q->limits.logical_block_size; 351 q->limits.physical_block_size = q->limits.logical_block_size;
352 352
353 if (q->limits.io_min < q->limits.physical_block_size) 353 if (q->limits.io_min < q->limits.physical_block_size)
354 q->limits.io_min = q->limits.physical_block_size; 354 q->limits.io_min = q->limits.physical_block_size;
355 } 355 }
356 EXPORT_SYMBOL(blk_queue_physical_block_size); 356 EXPORT_SYMBOL(blk_queue_physical_block_size);
357 357
358 /** 358 /**
359 * blk_queue_alignment_offset - set physical block alignment offset 359 * blk_queue_alignment_offset - set physical block alignment offset
360 * @q: the request queue for the device 360 * @q: the request queue for the device
361 * @offset: alignment offset in bytes 361 * @offset: alignment offset in bytes
362 * 362 *
363 * Description: 363 * Description:
364 * Some devices are naturally misaligned to compensate for things like 364 * Some devices are naturally misaligned to compensate for things like
365 * the legacy DOS partition table 63-sector offset. Low-level drivers 365 * the legacy DOS partition table 63-sector offset. Low-level drivers
366 * should call this function for devices whose first sector is not 366 * should call this function for devices whose first sector is not
367 * naturally aligned. 367 * naturally aligned.
368 */ 368 */
369 void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) 369 void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
370 { 370 {
371 q->limits.alignment_offset = 371 q->limits.alignment_offset =
372 offset & (q->limits.physical_block_size - 1); 372 offset & (q->limits.physical_block_size - 1);
373 q->limits.misaligned = 0; 373 q->limits.misaligned = 0;
374 } 374 }
375 EXPORT_SYMBOL(blk_queue_alignment_offset); 375 EXPORT_SYMBOL(blk_queue_alignment_offset);
376 376
377 /** 377 /**
378 * blk_limits_io_min - set minimum request size for a device 378 * blk_limits_io_min - set minimum request size for a device
379 * @limits: the queue limits 379 * @limits: the queue limits
380 * @min: smallest I/O size in bytes 380 * @min: smallest I/O size in bytes
381 * 381 *
382 * Description: 382 * Description:
383 * Some devices have an internal block size bigger than the reported 383 * Some devices have an internal block size bigger than the reported
384 * hardware sector size. This function can be used to signal the 384 * hardware sector size. This function can be used to signal the
385 * smallest I/O the device can perform without incurring a performance 385 * smallest I/O the device can perform without incurring a performance
386 * penalty. 386 * penalty.
387 */ 387 */
388 void blk_limits_io_min(struct queue_limits *limits, unsigned int min) 388 void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
389 { 389 {
390 limits->io_min = min; 390 limits->io_min = min;
391 391
392 if (limits->io_min < limits->logical_block_size) 392 if (limits->io_min < limits->logical_block_size)
393 limits->io_min = limits->logical_block_size; 393 limits->io_min = limits->logical_block_size;
394 394
395 if (limits->io_min < limits->physical_block_size) 395 if (limits->io_min < limits->physical_block_size)
396 limits->io_min = limits->physical_block_size; 396 limits->io_min = limits->physical_block_size;
397 } 397 }
398 EXPORT_SYMBOL(blk_limits_io_min); 398 EXPORT_SYMBOL(blk_limits_io_min);
399 399
400 /** 400 /**
401 * blk_queue_io_min - set minimum request size for the queue 401 * blk_queue_io_min - set minimum request size for the queue
402 * @q: the request queue for the device 402 * @q: the request queue for the device
403 * @min: smallest I/O size in bytes 403 * @min: smallest I/O size in bytes
404 * 404 *
405 * Description: 405 * Description:
406 * Storage devices may report a granularity or preferred minimum I/O 406 * Storage devices may report a granularity or preferred minimum I/O
407 * size which is the smallest request the device can perform without 407 * size which is the smallest request the device can perform without
408 * incurring a performance penalty. For disk drives this is often the 408 * incurring a performance penalty. For disk drives this is often the
409 * physical block size. For RAID arrays it is often the stripe chunk 409 * physical block size. For RAID arrays it is often the stripe chunk
410 * size. A properly aligned multiple of minimum_io_size is the 410 * size. A properly aligned multiple of minimum_io_size is the
411 * preferred request size for workloads where a high number of I/O 411 * preferred request size for workloads where a high number of I/O
412 * operations is desired. 412 * operations is desired.
413 */ 413 */
414 void blk_queue_io_min(struct request_queue *q, unsigned int min) 414 void blk_queue_io_min(struct request_queue *q, unsigned int min)
415 { 415 {
416 blk_limits_io_min(&q->limits, min); 416 blk_limits_io_min(&q->limits, min);
417 } 417 }
418 EXPORT_SYMBOL(blk_queue_io_min); 418 EXPORT_SYMBOL(blk_queue_io_min);
419 419
420 /** 420 /**
421 * blk_limits_io_opt - set optimal request size for a device 421 * blk_limits_io_opt - set optimal request size for a device
422 * @limits: the queue limits 422 * @limits: the queue limits
423 * @opt: smallest I/O size in bytes 423 * @opt: smallest I/O size in bytes
424 * 424 *
425 * Description: 425 * Description:
426 * Storage devices may report an optimal I/O size, which is the 426 * Storage devices may report an optimal I/O size, which is the
427 * device's preferred unit for sustained I/O. This is rarely reported 427 * device's preferred unit for sustained I/O. This is rarely reported
428 * for disk drives. For RAID arrays it is usually the stripe width or 428 * for disk drives. For RAID arrays it is usually the stripe width or
429 * the internal track size. A properly aligned multiple of 429 * the internal track size. A properly aligned multiple of
430 * optimal_io_size is the preferred request size for workloads where 430 * optimal_io_size is the preferred request size for workloads where
431 * sustained throughput is desired. 431 * sustained throughput is desired.
432 */ 432 */
433 void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt) 433 void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
434 { 434 {
435 limits->io_opt = opt; 435 limits->io_opt = opt;
436 } 436 }
437 EXPORT_SYMBOL(blk_limits_io_opt); 437 EXPORT_SYMBOL(blk_limits_io_opt);
438 438
439 /** 439 /**
440 * blk_queue_io_opt - set optimal request size for the queue 440 * blk_queue_io_opt - set optimal request size for the queue
441 * @q: the request queue for the device 441 * @q: the request queue for the device
442 * @opt: optimal request size in bytes 442 * @opt: optimal request size in bytes
443 * 443 *
444 * Description: 444 * Description:
445 * Storage devices may report an optimal I/O size, which is the 445 * Storage devices may report an optimal I/O size, which is the
446 * device's preferred unit for sustained I/O. This is rarely reported 446 * device's preferred unit for sustained I/O. This is rarely reported
447 * for disk drives. For RAID arrays it is usually the stripe width or 447 * for disk drives. For RAID arrays it is usually the stripe width or
448 * the internal track size. A properly aligned multiple of 448 * the internal track size. A properly aligned multiple of
449 * optimal_io_size is the preferred request size for workloads where 449 * optimal_io_size is the preferred request size for workloads where
450 * sustained throughput is desired. 450 * sustained throughput is desired.
451 */ 451 */
452 void blk_queue_io_opt(struct request_queue *q, unsigned int opt) 452 void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
453 { 453 {
454 blk_limits_io_opt(&q->limits, opt); 454 blk_limits_io_opt(&q->limits, opt);
455 } 455 }
456 EXPORT_SYMBOL(blk_queue_io_opt); 456 EXPORT_SYMBOL(blk_queue_io_opt);
457 457
458 /* 458 /*
459 * Returns the minimum that is _not_ zero, unless both are zero. 459 * Returns the minimum that is _not_ zero, unless both are zero.
460 */ 460 */
461 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 461 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
462 462
463 /** 463 /**
464 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers 464 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
465 * @t: the stacking driver (top) 465 * @t: the stacking driver (top)
466 * @b: the underlying device (bottom) 466 * @b: the underlying device (bottom)
467 **/ 467 **/
468 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 468 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
469 { 469 {
470 blk_stack_limits(&t->limits, &b->limits, 0); 470 blk_stack_limits(&t->limits, &b->limits, 0);
471 471
472 if (!t->queue_lock) 472 if (!t->queue_lock)
473 WARN_ON_ONCE(1); 473 WARN_ON_ONCE(1);
474 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 474 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
475 unsigned long flags; 475 unsigned long flags;
476 spin_lock_irqsave(t->queue_lock, flags); 476 spin_lock_irqsave(t->queue_lock, flags);
477 queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 477 queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
478 spin_unlock_irqrestore(t->queue_lock, flags); 478 spin_unlock_irqrestore(t->queue_lock, flags);
479 } 479 }
480 } 480 }
481 EXPORT_SYMBOL(blk_queue_stack_limits); 481 EXPORT_SYMBOL(blk_queue_stack_limits);
482 482
483 /** 483 /**
484 * blk_stack_limits - adjust queue_limits for stacked devices 484 * blk_stack_limits - adjust queue_limits for stacked devices
485 * @t: the stacking driver limits (top device) 485 * @t: the stacking driver limits (top device)
486 * @b: the underlying queue limits (bottom, component device) 486 * @b: the underlying queue limits (bottom, component device)
487 * @start: first data sector within component device 487 * @start: first data sector within component device
488 * 488 *
489 * Description: 489 * Description:
490 * This function is used by stacking drivers like MD and DM to ensure 490 * This function is used by stacking drivers like MD and DM to ensure
491 * that all component devices have compatible block sizes and 491 * that all component devices have compatible block sizes and
492 * alignments. The stacking driver must provide a queue_limits 492 * alignments. The stacking driver must provide a queue_limits
493 * struct (top) and then iteratively call the stacking function for 493 * struct (top) and then iteratively call the stacking function for
494 * all component (bottom) devices. The stacking function will 494 * all component (bottom) devices. The stacking function will
495 * attempt to combine the values and ensure proper alignment. 495 * attempt to combine the values and ensure proper alignment.
496 * 496 *
497 * Returns 0 if the top and bottom queue_limits are compatible. The 497 * Returns 0 if the top and bottom queue_limits are compatible. The
498 * top device's block sizes and alignment offsets may be adjusted to 498 * top device's block sizes and alignment offsets may be adjusted to
499 * ensure alignment with the bottom device. If no compatible sizes 499 * ensure alignment with the bottom device. If no compatible sizes
500 * and alignments exist, -1 is returned and the resulting top 500 * and alignments exist, -1 is returned and the resulting top
501 * queue_limits will have the misaligned flag set to indicate that 501 * queue_limits will have the misaligned flag set to indicate that
502 * the alignment_offset is undefined. 502 * the alignment_offset is undefined.
503 */ 503 */
504 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 504 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
505 sector_t start) 505 sector_t start)
506 { 506 {
507 unsigned int top, bottom, alignment, ret = 0; 507 unsigned int top, bottom, alignment, ret = 0;
508 508
509 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); 509 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
510 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); 510 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
511 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); 511 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
512 512
513 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, 513 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
514 b->seg_boundary_mask); 514 b->seg_boundary_mask);
515 515
516 t->max_segments = min_not_zero(t->max_segments, b->max_segments); 516 t->max_segments = min_not_zero(t->max_segments, b->max_segments);
517 517
518 t->max_segment_size = min_not_zero(t->max_segment_size, 518 t->max_segment_size = min_not_zero(t->max_segment_size,
519 b->max_segment_size); 519 b->max_segment_size);
520 520
521 t->misaligned |= b->misaligned; 521 t->misaligned |= b->misaligned;
522 522
523 alignment = queue_limit_alignment_offset(b, start); 523 alignment = queue_limit_alignment_offset(b, start);
524 524
525 /* Bottom device has different alignment. Check that it is 525 /* Bottom device has different alignment. Check that it is
526 * compatible with the current top alignment. 526 * compatible with the current top alignment.
527 */ 527 */
528 if (t->alignment_offset != alignment) { 528 if (t->alignment_offset != alignment) {
529 529
530 top = max(t->physical_block_size, t->io_min) 530 top = max(t->physical_block_size, t->io_min)
531 + t->alignment_offset; 531 + t->alignment_offset;
532 bottom = max(b->physical_block_size, b->io_min) + alignment; 532 bottom = max(b->physical_block_size, b->io_min) + alignment;
533 533
534 /* Verify that top and bottom intervals line up */ 534 /* Verify that top and bottom intervals line up */
535 if (max(top, bottom) & (min(top, bottom) - 1)) { 535 if (max(top, bottom) & (min(top, bottom) - 1)) {
536 t->misaligned = 1; 536 t->misaligned = 1;
537 ret = -1; 537 ret = -1;
538 } 538 }
539 } 539 }
540 540
541 t->logical_block_size = max(t->logical_block_size, 541 t->logical_block_size = max(t->logical_block_size,
542 b->logical_block_size); 542 b->logical_block_size);
543 543
544 t->physical_block_size = max(t->physical_block_size, 544 t->physical_block_size = max(t->physical_block_size,
545 b->physical_block_size); 545 b->physical_block_size);
546 546
547 t->io_min = max(t->io_min, b->io_min); 547 t->io_min = max(t->io_min, b->io_min);
548 t->io_opt = lcm(t->io_opt, b->io_opt); 548 t->io_opt = lcm(t->io_opt, b->io_opt);
549 549
550 t->no_cluster |= b->no_cluster; 550 t->no_cluster |= b->no_cluster;
551 t->discard_zeroes_data &= b->discard_zeroes_data; 551 t->discard_zeroes_data &= b->discard_zeroes_data;
552 552
553 /* Physical block size a multiple of the logical block size? */ 553 /* Physical block size a multiple of the logical block size? */
554 if (t->physical_block_size & (t->logical_block_size - 1)) { 554 if (t->physical_block_size & (t->logical_block_size - 1)) {
555 t->physical_block_size = t->logical_block_size; 555 t->physical_block_size = t->logical_block_size;
556 t->misaligned = 1; 556 t->misaligned = 1;
557 ret = -1; 557 ret = -1;
558 } 558 }
559 559
560 /* Minimum I/O a multiple of the physical block size? */ 560 /* Minimum I/O a multiple of the physical block size? */
561 if (t->io_min & (t->physical_block_size - 1)) { 561 if (t->io_min & (t->physical_block_size - 1)) {
562 t->io_min = t->physical_block_size; 562 t->io_min = t->physical_block_size;
563 t->misaligned = 1; 563 t->misaligned = 1;
564 ret = -1; 564 ret = -1;
565 } 565 }
566 566
567 /* Optimal I/O a multiple of the physical block size? */ 567 /* Optimal I/O a multiple of the physical block size? */
568 if (t->io_opt & (t->physical_block_size - 1)) { 568 if (t->io_opt & (t->physical_block_size - 1)) {
569 t->io_opt = 0; 569 t->io_opt = 0;
570 t->misaligned = 1; 570 t->misaligned = 1;
571 ret = -1; 571 ret = -1;
572 } 572 }
573 573
574 /* Find lowest common alignment_offset */ 574 /* Find lowest common alignment_offset */
575 t->alignment_offset = lcm(t->alignment_offset, alignment) 575 t->alignment_offset = lcm(t->alignment_offset, alignment)
576 & (max(t->physical_block_size, t->io_min) - 1); 576 & (max(t->physical_block_size, t->io_min) - 1);
577 577
578 /* Verify that new alignment_offset is on a logical block boundary */ 578 /* Verify that new alignment_offset is on a logical block boundary */
579 if (t->alignment_offset & (t->logical_block_size - 1)) { 579 if (t->alignment_offset & (t->logical_block_size - 1)) {
580 t->misaligned = 1; 580 t->misaligned = 1;
581 ret = -1; 581 ret = -1;
582 } 582 }
583 583
584 /* Discard alignment and granularity */ 584 /* Discard alignment and granularity */
585 if (b->discard_granularity) { 585 if (b->discard_granularity) {
586 alignment = queue_limit_discard_alignment(b, start); 586 alignment = queue_limit_discard_alignment(b, start);
587 587
588 if (t->discard_granularity != 0 && 588 if (t->discard_granularity != 0 &&
589 t->discard_alignment != alignment) { 589 t->discard_alignment != alignment) {
590 top = t->discard_granularity + t->discard_alignment; 590 top = t->discard_granularity + t->discard_alignment;
591 bottom = b->discard_granularity + alignment; 591 bottom = b->discard_granularity + alignment;
592 592
593 /* Verify that top and bottom intervals line up */ 593 /* Verify that top and bottom intervals line up */
594 if (max(top, bottom) & (min(top, bottom) - 1)) 594 if (max(top, bottom) & (min(top, bottom) - 1))
595 t->discard_misaligned = 1; 595 t->discard_misaligned = 1;
596 } 596 }
597 597
598 t->max_discard_sectors = min_not_zero(t->max_discard_sectors, 598 t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
599 b->max_discard_sectors); 599 b->max_discard_sectors);
600 t->discard_granularity = max(t->discard_granularity, 600 t->discard_granularity = max(t->discard_granularity,
601 b->discard_granularity); 601 b->discard_granularity);
602 t->discard_alignment = lcm(t->discard_alignment, alignment) & 602 t->discard_alignment = lcm(t->discard_alignment, alignment) &
603 (t->discard_granularity - 1); 603 (t->discard_granularity - 1);
604 } 604 }
605 605
606 return ret; 606 return ret;
607 } 607 }
608 EXPORT_SYMBOL(blk_stack_limits); 608 EXPORT_SYMBOL(blk_stack_limits);
609 609
610 /** 610 /**
611 * bdev_stack_limits - adjust queue limits for stacked drivers 611 * bdev_stack_limits - adjust queue limits for stacked drivers
612 * @t: the stacking driver limits (top device) 612 * @t: the stacking driver limits (top device)
613 * @bdev: the component block_device (bottom) 613 * @bdev: the component block_device (bottom)
614 * @start: first data sector within component device 614 * @start: first data sector within component device
615 * 615 *
616 * Description: 616 * Description:
617 * Merges queue limits for a top device and a block_device. Returns 617 * Merges queue limits for a top device and a block_device. Returns
618 * 0 if alignment didn't change. Returns -1 if adding the bottom 618 * 0 if alignment didn't change. Returns -1 if adding the bottom
619 * device caused misalignment. 619 * device caused misalignment.
620 */ 620 */
621 int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, 621 int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
622 sector_t start) 622 sector_t start)
623 { 623 {
624 struct request_queue *bq = bdev_get_queue(bdev); 624 struct request_queue *bq = bdev_get_queue(bdev);
625 625
626 start += get_start_sect(bdev); 626 start += get_start_sect(bdev);
627 627
628 return blk_stack_limits(t, &bq->limits, start); 628 return blk_stack_limits(t, &bq->limits, start);
629 } 629 }
630 EXPORT_SYMBOL(bdev_stack_limits); 630 EXPORT_SYMBOL(bdev_stack_limits);
631 631
632 /** 632 /**
633 * disk_stack_limits - adjust queue limits for stacked drivers 633 * disk_stack_limits - adjust queue limits for stacked drivers
634 * @disk: MD/DM gendisk (top) 634 * @disk: MD/DM gendisk (top)
635 * @bdev: the underlying block device (bottom) 635 * @bdev: the underlying block device (bottom)
636 * @offset: offset to beginning of data within component device 636 * @offset: offset to beginning of data within component device
637 * 637 *
638 * Description: 638 * Description:
639 * Merges the limits for a top level gendisk and a bottom level 639 * Merges the limits for a top level gendisk and a bottom level
640 * block_device. 640 * block_device.
641 */ 641 */
642 void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, 642 void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
643 sector_t offset) 643 sector_t offset)
644 { 644 {
645 struct request_queue *t = disk->queue; 645 struct request_queue *t = disk->queue;
646 struct request_queue *b = bdev_get_queue(bdev); 646 struct request_queue *b = bdev_get_queue(bdev);
647 647
648 if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { 648 if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
649 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; 649 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
650 650
651 disk_name(disk, 0, top); 651 disk_name(disk, 0, top);
652 bdevname(bdev, bottom); 652 bdevname(bdev, bottom);
653 653
654 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", 654 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
655 top, bottom); 655 top, bottom);
656 } 656 }
657 657
658 if (!t->queue_lock) 658 if (!t->queue_lock)
659 WARN_ON_ONCE(1); 659 WARN_ON_ONCE(1);
660 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 660 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
661 unsigned long flags; 661 unsigned long flags;
662 662
663 spin_lock_irqsave(t->queue_lock, flags); 663 spin_lock_irqsave(t->queue_lock, flags);
664 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 664 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
665 queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 665 queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
666 spin_unlock_irqrestore(t->queue_lock, flags); 666 spin_unlock_irqrestore(t->queue_lock, flags);
667 } 667 }
668 } 668 }
669 EXPORT_SYMBOL(disk_stack_limits); 669 EXPORT_SYMBOL(disk_stack_limits);
670 670
671 /** 671 /**
672 * blk_queue_dma_pad - set pad mask 672 * blk_queue_dma_pad - set pad mask
673 * @q: the request queue for the device 673 * @q: the request queue for the device
674 * @mask: pad mask 674 * @mask: pad mask
675 * 675 *
676 * Set dma pad mask. 676 * Set dma pad mask.
677 * 677 *
678 * Appending pad buffer to a request modifies the last entry of a 678 * Appending pad buffer to a request modifies the last entry of a
679 * scatter list such that it includes the pad buffer. 679 * scatter list such that it includes the pad buffer.
680 **/ 680 **/
681 void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) 681 void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
682 { 682 {
683 q->dma_pad_mask = mask; 683 q->dma_pad_mask = mask;
684 } 684 }
685 EXPORT_SYMBOL(blk_queue_dma_pad); 685 EXPORT_SYMBOL(blk_queue_dma_pad);
686 686
687 /** 687 /**
688 * blk_queue_update_dma_pad - update pad mask 688 * blk_queue_update_dma_pad - update pad mask
689 * @q: the request queue for the device 689 * @q: the request queue for the device
690 * @mask: pad mask 690 * @mask: pad mask
691 * 691 *
692 * Update dma pad mask. 692 * Update dma pad mask.
693 * 693 *
694 * Appending pad buffer to a request modifies the last entry of a 694 * Appending pad buffer to a request modifies the last entry of a
695 * scatter list such that it includes the pad buffer. 695 * scatter list such that it includes the pad buffer.
696 **/ 696 **/
697 void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) 697 void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
698 { 698 {
699 if (mask > q->dma_pad_mask) 699 if (mask > q->dma_pad_mask)
700 q->dma_pad_mask = mask; 700 q->dma_pad_mask = mask;
701 } 701 }
702 EXPORT_SYMBOL(blk_queue_update_dma_pad); 702 EXPORT_SYMBOL(blk_queue_update_dma_pad);
703 703
704 /** 704 /**
705 * blk_queue_dma_drain - Set up a drain buffer for excess dma. 705 * blk_queue_dma_drain - Set up a drain buffer for excess dma.
706 * @q: the request queue for the device 706 * @q: the request queue for the device
707 * @dma_drain_needed: fn which returns non-zero if drain is necessary 707 * @dma_drain_needed: fn which returns non-zero if drain is necessary
708 * @buf: physically contiguous buffer 708 * @buf: physically contiguous buffer
709 * @size: size of the buffer in bytes 709 * @size: size of the buffer in bytes
710 * 710 *
711 * Some devices have excess DMA problems and can't simply discard (or 711 * Some devices have excess DMA problems and can't simply discard (or
712 * zero fill) the unwanted piece of the transfer. They have to have a 712 * zero fill) the unwanted piece of the transfer. They have to have a
713 * real area of memory to transfer it into. The use case for this is 713 * real area of memory to transfer it into. The use case for this is
714 * ATAPI devices in DMA mode. If the packet command causes a transfer 714 * ATAPI devices in DMA mode. If the packet command causes a transfer
715 * bigger than the transfer size some HBAs will lock up if there 715 * bigger than the transfer size some HBAs will lock up if there
716 * aren't DMA elements to contain the excess transfer. What this API 716 * aren't DMA elements to contain the excess transfer. What this API
717 * does is adjust the queue so that the buf is always appended 717 * does is adjust the queue so that the buf is always appended
718 * silently to the scatterlist. 718 * silently to the scatterlist.
719 * 719 *
720 * Note: This routine adjusts max_hw_segments to make room for appending 720 * Note: This routine adjusts max_hw_segments to make room for appending
721 * the drain buffer. If you call blk_queue_max_segments() after calling 721 * the drain buffer. If you call blk_queue_max_segments() after calling
722 * this routine, you must set the limit to one fewer than your device 722 * this routine, you must set the limit to one fewer than your device
723 * can support otherwise there won't be room for the drain buffer. 723 * can support otherwise there won't be room for the drain buffer.
724 */ 724 */
725 int blk_queue_dma_drain(struct request_queue *q, 725 int blk_queue_dma_drain(struct request_queue *q,
726 dma_drain_needed_fn *dma_drain_needed, 726 dma_drain_needed_fn *dma_drain_needed,
727 void *buf, unsigned int size) 727 void *buf, unsigned int size)
728 { 728 {
729 if (queue_max_segments(q) < 2) 729 if (queue_max_segments(q) < 2)
730 return -EINVAL; 730 return -EINVAL;
731 /* make room for appending the drain */ 731 /* make room for appending the drain */
732 blk_queue_max_segments(q, queue_max_segments(q) - 1); 732 blk_queue_max_segments(q, queue_max_segments(q) - 1);
733 q->dma_drain_needed = dma_drain_needed; 733 q->dma_drain_needed = dma_drain_needed;
734 q->dma_drain_buffer = buf; 734 q->dma_drain_buffer = buf;
735 q->dma_drain_size = size; 735 q->dma_drain_size = size;
736 736
737 return 0; 737 return 0;
738 } 738 }
739 EXPORT_SYMBOL_GPL(blk_queue_dma_drain); 739 EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
740 740
741 /** 741 /**
742 * blk_queue_segment_boundary - set boundary rules for segment merging 742 * blk_queue_segment_boundary - set boundary rules for segment merging
743 * @q: the request queue for the device 743 * @q: the request queue for the device
744 * @mask: the memory boundary mask 744 * @mask: the memory boundary mask
745 **/ 745 **/
746 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) 746 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
747 { 747 {
748 if (mask < PAGE_CACHE_SIZE - 1) { 748 if (mask < PAGE_CACHE_SIZE - 1) {
749 mask = PAGE_CACHE_SIZE - 1; 749 mask = PAGE_CACHE_SIZE - 1;
750 printk(KERN_INFO "%s: set to minimum %lx\n", 750 printk(KERN_INFO "%s: set to minimum %lx\n",
751 __func__, mask); 751 __func__, mask);
752 } 752 }
753 753
754 q->limits.seg_boundary_mask = mask; 754 q->limits.seg_boundary_mask = mask;
755 } 755 }
756 EXPORT_SYMBOL(blk_queue_segment_boundary); 756 EXPORT_SYMBOL(blk_queue_segment_boundary);
757 757
758 /** 758 /**
759 * blk_queue_dma_alignment - set dma length and memory alignment 759 * blk_queue_dma_alignment - set dma length and memory alignment
760 * @q: the request queue for the device 760 * @q: the request queue for the device
761 * @mask: alignment mask 761 * @mask: alignment mask
762 * 762 *
763 * description: 763 * description:
764 * set required memory and length alignment for direct dma transactions. 764 * set required memory and length alignment for direct dma transactions.
765 * this is used when building direct io requests for the queue. 765 * this is used when building direct io requests for the queue.
766 * 766 *
767 **/ 767 **/
768 void blk_queue_dma_alignment(struct request_queue *q, int mask) 768 void blk_queue_dma_alignment(struct request_queue *q, int mask)
769 { 769 {
770 q->dma_alignment = mask; 770 q->dma_alignment = mask;
771 } 771 }
772 EXPORT_SYMBOL(blk_queue_dma_alignment); 772 EXPORT_SYMBOL(blk_queue_dma_alignment);
773 773
774 /** 774 /**
775 * blk_queue_update_dma_alignment - update dma length and memory alignment 775 * blk_queue_update_dma_alignment - update dma length and memory alignment
776 * @q: the request queue for the device 776 * @q: the request queue for the device
777 * @mask: alignment mask 777 * @mask: alignment mask
778 * 778 *
779 * description: 779 * description:
780 * update required memory and length alignment for direct dma transactions. 780 * update required memory and length alignment for direct dma transactions.
781 * If the requested alignment is larger than the current alignment, then 781 * If the requested alignment is larger than the current alignment, then
782 * the current queue alignment is updated to the new value, otherwise it 782 * the current queue alignment is updated to the new value, otherwise it
783 * is left alone. The design of this is to allow multiple objects 783 * is left alone. The design of this is to allow multiple objects
784 * (driver, device, transport etc) to set their respective 784 * (driver, device, transport etc) to set their respective
785 * alignments without having them interfere. 785 * alignments without having them interfere.
786 * 786 *
787 **/ 787 **/
788 void blk_queue_update_dma_alignment(struct request_queue *q, int mask) 788 void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
789 { 789 {
790 BUG_ON(mask > PAGE_SIZE); 790 BUG_ON(mask > PAGE_SIZE);
791 791
792 if (mask > q->dma_alignment) 792 if (mask > q->dma_alignment)
793 q->dma_alignment = mask; 793 q->dma_alignment = mask;
794 } 794 }
795 EXPORT_SYMBOL(blk_queue_update_dma_alignment); 795 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
796 796
797 /**
798 * blk_queue_flush - configure queue's cache flush capability
799 * @q: the request queue for the device
800 * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
801 *
802 * Tell block layer cache flush capability of @q. If it supports
803 * flushing, REQ_FLUSH should be set. If it supports bypassing
804 * write cache for individual writes, REQ_FUA should be set.
805 */
806 void blk_queue_flush(struct request_queue *q, unsigned int flush)
807 {
808 WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
809
810 if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
811 flush &= ~REQ_FUA;
812
813 q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
814 }
815 EXPORT_SYMBOL_GPL(blk_queue_flush);
816
797 static int __init blk_settings_init(void) 817 static int __init blk_settings_init(void)
798 { 818 {
799 blk_max_low_pfn = max_low_pfn - 1; 819 blk_max_low_pfn = max_low_pfn - 1;
800 blk_max_pfn = max_pfn - 1; 820 blk_max_pfn = max_pfn - 1;
801 return 0; 821 return 0;
802 } 822 }
803 subsys_initcall(blk_settings_init); 823 subsys_initcall(blk_settings_init);
804 824
1 /* 1 /*
2 * Ram backed block device driver. 2 * Ram backed block device driver.
3 * 3 *
4 * Copyright (C) 2007 Nick Piggin 4 * Copyright (C) 2007 Nick Piggin
5 * Copyright (C) 2007 Novell Inc. 5 * Copyright (C) 2007 Novell Inc.
6 * 6 *
7 * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright 7 * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
8 * of their respective owners. 8 * of their respective owners.
9 */ 9 */
10 10
11 #include <linux/init.h> 11 #include <linux/init.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/moduleparam.h> 13 #include <linux/moduleparam.h>
14 #include <linux/major.h> 14 #include <linux/major.h>
15 #include <linux/blkdev.h> 15 #include <linux/blkdev.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/highmem.h> 17 #include <linux/highmem.h>
18 #include <linux/smp_lock.h> 18 #include <linux/smp_lock.h>
19 #include <linux/radix-tree.h> 19 #include <linux/radix-tree.h>
20 #include <linux/buffer_head.h> /* invalidate_bh_lrus() */ 20 #include <linux/buffer_head.h> /* invalidate_bh_lrus() */
21 #include <linux/slab.h> 21 #include <linux/slab.h>
22 22
23 #include <asm/uaccess.h> 23 #include <asm/uaccess.h>
24 24
25 #define SECTOR_SHIFT 9 25 #define SECTOR_SHIFT 9
26 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 26 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
27 #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) 27 #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
28 28
29 /* 29 /*
30 * Each block ramdisk device has a radix_tree brd_pages of pages that stores 30 * Each block ramdisk device has a radix_tree brd_pages of pages that stores
31 * the pages containing the block device's contents. A brd page's ->index is 31 * the pages containing the block device's contents. A brd page's ->index is
32 * its offset in PAGE_SIZE units. This is similar to, but in no way connected 32 * its offset in PAGE_SIZE units. This is similar to, but in no way connected
33 * with, the kernel's pagecache or buffer cache (which sit above our block 33 * with, the kernel's pagecache or buffer cache (which sit above our block
34 * device). 34 * device).
35 */ 35 */
36 struct brd_device { 36 struct brd_device {
37 int brd_number; 37 int brd_number;
38 int brd_refcnt; 38 int brd_refcnt;
39 loff_t brd_offset; 39 loff_t brd_offset;
40 loff_t brd_sizelimit; 40 loff_t brd_sizelimit;
41 unsigned brd_blocksize; 41 unsigned brd_blocksize;
42 42
43 struct request_queue *brd_queue; 43 struct request_queue *brd_queue;
44 struct gendisk *brd_disk; 44 struct gendisk *brd_disk;
45 struct list_head brd_list; 45 struct list_head brd_list;
46 46
47 /* 47 /*
48 * Backing store of pages and lock to protect it. This is the contents 48 * Backing store of pages and lock to protect it. This is the contents
49 * of the block device. 49 * of the block device.
50 */ 50 */
51 spinlock_t brd_lock; 51 spinlock_t brd_lock;
52 struct radix_tree_root brd_pages; 52 struct radix_tree_root brd_pages;
53 }; 53 };
54 54
55 /* 55 /*
56 * Look up and return a brd's page for a given sector. 56 * Look up and return a brd's page for a given sector.
57 */ 57 */
58 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) 58 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
59 { 59 {
60 pgoff_t idx; 60 pgoff_t idx;
61 struct page *page; 61 struct page *page;
62 62
63 /* 63 /*
64 * The page lifetime is protected by the fact that we have opened the 64 * The page lifetime is protected by the fact that we have opened the
65 * device node -- brd pages will never be deleted under us, so we 65 * device node -- brd pages will never be deleted under us, so we
66 * don't need any further locking or refcounting. 66 * don't need any further locking or refcounting.
67 * 67 *
68 * This is strictly true for the radix-tree nodes as well (ie. we 68 * This is strictly true for the radix-tree nodes as well (ie. we
69 * don't actually need the rcu_read_lock()), however that is not a 69 * don't actually need the rcu_read_lock()), however that is not a
70 * documented feature of the radix-tree API so it is better to be 70 * documented feature of the radix-tree API so it is better to be
71 * safe here (we don't have total exclusion from radix tree updates 71 * safe here (we don't have total exclusion from radix tree updates
72 * here, only deletes). 72 * here, only deletes).
73 */ 73 */
74 rcu_read_lock(); 74 rcu_read_lock();
75 idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ 75 idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
76 page = radix_tree_lookup(&brd->brd_pages, idx); 76 page = radix_tree_lookup(&brd->brd_pages, idx);
77 rcu_read_unlock(); 77 rcu_read_unlock();
78 78
79 BUG_ON(page && page->index != idx); 79 BUG_ON(page && page->index != idx);
80 80
81 return page; 81 return page;
82 } 82 }
83 83
84 /* 84 /*
85 * Look up and return a brd's page for a given sector. 85 * Look up and return a brd's page for a given sector.
86 * If one does not exist, allocate an empty page, and insert that. Then 86 * If one does not exist, allocate an empty page, and insert that. Then
87 * return it. 87 * return it.
88 */ 88 */
89 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) 89 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
90 { 90 {
91 pgoff_t idx; 91 pgoff_t idx;
92 struct page *page; 92 struct page *page;
93 gfp_t gfp_flags; 93 gfp_t gfp_flags;
94 94
95 page = brd_lookup_page(brd, sector); 95 page = brd_lookup_page(brd, sector);
96 if (page) 96 if (page)
97 return page; 97 return page;
98 98
99 /* 99 /*
100 * Must use NOIO because we don't want to recurse back into the 100 * Must use NOIO because we don't want to recurse back into the
101 * block or filesystem layers from page reclaim. 101 * block or filesystem layers from page reclaim.
102 * 102 *
103 * Cannot support XIP and highmem, because our ->direct_access 103 * Cannot support XIP and highmem, because our ->direct_access
104 * routine for XIP must return memory that is always addressable. 104 * routine for XIP must return memory that is always addressable.
105 * If XIP was reworked to use pfns and kmap throughout, this 105 * If XIP was reworked to use pfns and kmap throughout, this
106 * restriction might be able to be lifted. 106 * restriction might be able to be lifted.
107 */ 107 */
108 gfp_flags = GFP_NOIO | __GFP_ZERO; 108 gfp_flags = GFP_NOIO | __GFP_ZERO;
109 #ifndef CONFIG_BLK_DEV_XIP 109 #ifndef CONFIG_BLK_DEV_XIP
110 gfp_flags |= __GFP_HIGHMEM; 110 gfp_flags |= __GFP_HIGHMEM;
111 #endif 111 #endif
112 page = alloc_page(gfp_flags); 112 page = alloc_page(gfp_flags);
113 if (!page) 113 if (!page)
114 return NULL; 114 return NULL;
115 115
116 if (radix_tree_preload(GFP_NOIO)) { 116 if (radix_tree_preload(GFP_NOIO)) {
117 __free_page(page); 117 __free_page(page);
118 return NULL; 118 return NULL;
119 } 119 }
120 120
121 spin_lock(&brd->brd_lock); 121 spin_lock(&brd->brd_lock);
122 idx = sector >> PAGE_SECTORS_SHIFT; 122 idx = sector >> PAGE_SECTORS_SHIFT;
123 if (radix_tree_insert(&brd->brd_pages, idx, page)) { 123 if (radix_tree_insert(&brd->brd_pages, idx, page)) {
124 __free_page(page); 124 __free_page(page);
125 page = radix_tree_lookup(&brd->brd_pages, idx); 125 page = radix_tree_lookup(&brd->brd_pages, idx);
126 BUG_ON(!page); 126 BUG_ON(!page);
127 BUG_ON(page->index != idx); 127 BUG_ON(page->index != idx);
128 } else 128 } else
129 page->index = idx; 129 page->index = idx;
130 spin_unlock(&brd->brd_lock); 130 spin_unlock(&brd->brd_lock);
131 131
132 radix_tree_preload_end(); 132 radix_tree_preload_end();
133 133
134 return page; 134 return page;
135 } 135 }
136 136
137 static void brd_free_page(struct brd_device *brd, sector_t sector) 137 static void brd_free_page(struct brd_device *brd, sector_t sector)
138 { 138 {
139 struct page *page; 139 struct page *page;
140 pgoff_t idx; 140 pgoff_t idx;
141 141
142 spin_lock(&brd->brd_lock); 142 spin_lock(&brd->brd_lock);
143 idx = sector >> PAGE_SECTORS_SHIFT; 143 idx = sector >> PAGE_SECTORS_SHIFT;
144 page = radix_tree_delete(&brd->brd_pages, idx); 144 page = radix_tree_delete(&brd->brd_pages, idx);
145 spin_unlock(&brd->brd_lock); 145 spin_unlock(&brd->brd_lock);
146 if (page) 146 if (page)
147 __free_page(page); 147 __free_page(page);
148 } 148 }
149 149
150 static void brd_zero_page(struct brd_device *brd, sector_t sector) 150 static void brd_zero_page(struct brd_device *brd, sector_t sector)
151 { 151 {
152 struct page *page; 152 struct page *page;
153 153
154 page = brd_lookup_page(brd, sector); 154 page = brd_lookup_page(brd, sector);
155 if (page) 155 if (page)
156 clear_highpage(page); 156 clear_highpage(page);
157 } 157 }
158 158
159 /* 159 /*
160 * Free all backing store pages and radix tree. This must only be called when 160 * Free all backing store pages and radix tree. This must only be called when
161 * there are no other users of the device. 161 * there are no other users of the device.
162 */ 162 */
163 #define FREE_BATCH 16 163 #define FREE_BATCH 16
164 static void brd_free_pages(struct brd_device *brd) 164 static void brd_free_pages(struct brd_device *brd)
165 { 165 {
166 unsigned long pos = 0; 166 unsigned long pos = 0;
167 struct page *pages[FREE_BATCH]; 167 struct page *pages[FREE_BATCH];
168 int nr_pages; 168 int nr_pages;
169 169
170 do { 170 do {
171 int i; 171 int i;
172 172
173 nr_pages = radix_tree_gang_lookup(&brd->brd_pages, 173 nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
174 (void **)pages, pos, FREE_BATCH); 174 (void **)pages, pos, FREE_BATCH);
175 175
176 for (i = 0; i < nr_pages; i++) { 176 for (i = 0; i < nr_pages; i++) {
177 void *ret; 177 void *ret;
178 178
179 BUG_ON(pages[i]->index < pos); 179 BUG_ON(pages[i]->index < pos);
180 pos = pages[i]->index; 180 pos = pages[i]->index;
181 ret = radix_tree_delete(&brd->brd_pages, pos); 181 ret = radix_tree_delete(&brd->brd_pages, pos);
182 BUG_ON(!ret || ret != pages[i]); 182 BUG_ON(!ret || ret != pages[i]);
183 __free_page(pages[i]); 183 __free_page(pages[i]);
184 } 184 }
185 185
186 pos++; 186 pos++;
187 187
188 /* 188 /*
189 * This assumes radix_tree_gang_lookup always returns as 189 * This assumes radix_tree_gang_lookup always returns as
190 * many pages as possible. If the radix-tree code changes, 190 * many pages as possible. If the radix-tree code changes,
191 * so will this have to. 191 * so will this have to.
192 */ 192 */
193 } while (nr_pages == FREE_BATCH); 193 } while (nr_pages == FREE_BATCH);
194 } 194 }
195 195
196 /* 196 /*
197 * copy_to_brd_setup must be called before copy_to_brd. It may sleep. 197 * copy_to_brd_setup must be called before copy_to_brd. It may sleep.
198 */ 198 */
199 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) 199 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
200 { 200 {
201 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; 201 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
202 size_t copy; 202 size_t copy;
203 203
204 copy = min_t(size_t, n, PAGE_SIZE - offset); 204 copy = min_t(size_t, n, PAGE_SIZE - offset);
205 if (!brd_insert_page(brd, sector)) 205 if (!brd_insert_page(brd, sector))
206 return -ENOMEM; 206 return -ENOMEM;
207 if (copy < n) { 207 if (copy < n) {
208 sector += copy >> SECTOR_SHIFT; 208 sector += copy >> SECTOR_SHIFT;
209 if (!brd_insert_page(brd, sector)) 209 if (!brd_insert_page(brd, sector))
210 return -ENOMEM; 210 return -ENOMEM;
211 } 211 }
212 return 0; 212 return 0;
213 } 213 }
214 214
215 static void discard_from_brd(struct brd_device *brd, 215 static void discard_from_brd(struct brd_device *brd,
216 sector_t sector, size_t n) 216 sector_t sector, size_t n)
217 { 217 {
218 while (n >= PAGE_SIZE) { 218 while (n >= PAGE_SIZE) {
219 /* 219 /*
220 * Don't want to actually discard pages here because 220 * Don't want to actually discard pages here because
221 * re-allocating the pages can result in writeback 221 * re-allocating the pages can result in writeback
222 * deadlocks under heavy load. 222 * deadlocks under heavy load.
223 */ 223 */
224 if (0) 224 if (0)
225 brd_free_page(brd, sector); 225 brd_free_page(brd, sector);
226 else 226 else
227 brd_zero_page(brd, sector); 227 brd_zero_page(brd, sector);
228 sector += PAGE_SIZE >> SECTOR_SHIFT; 228 sector += PAGE_SIZE >> SECTOR_SHIFT;
229 n -= PAGE_SIZE; 229 n -= PAGE_SIZE;
230 } 230 }
231 } 231 }
232 232
233 /* 233 /*
234 * Copy n bytes from src to the brd starting at sector. Does not sleep. 234 * Copy n bytes from src to the brd starting at sector. Does not sleep.
235 */ 235 */
236 static void copy_to_brd(struct brd_device *brd, const void *src, 236 static void copy_to_brd(struct brd_device *brd, const void *src,
237 sector_t sector, size_t n) 237 sector_t sector, size_t n)
238 { 238 {
239 struct page *page; 239 struct page *page;
240 void *dst; 240 void *dst;
241 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; 241 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
242 size_t copy; 242 size_t copy;
243 243
244 copy = min_t(size_t, n, PAGE_SIZE - offset); 244 copy = min_t(size_t, n, PAGE_SIZE - offset);
245 page = brd_lookup_page(brd, sector); 245 page = brd_lookup_page(brd, sector);
246 BUG_ON(!page); 246 BUG_ON(!page);
247 247
248 dst = kmap_atomic(page, KM_USER1); 248 dst = kmap_atomic(page, KM_USER1);
249 memcpy(dst + offset, src, copy); 249 memcpy(dst + offset, src, copy);
250 kunmap_atomic(dst, KM_USER1); 250 kunmap_atomic(dst, KM_USER1);
251 251
252 if (copy < n) { 252 if (copy < n) {
253 src += copy; 253 src += copy;
254 sector += copy >> SECTOR_SHIFT; 254 sector += copy >> SECTOR_SHIFT;
255 copy = n - copy; 255 copy = n - copy;
256 page = brd_lookup_page(brd, sector); 256 page = brd_lookup_page(brd, sector);
257 BUG_ON(!page); 257 BUG_ON(!page);
258 258
259 dst = kmap_atomic(page, KM_USER1); 259 dst = kmap_atomic(page, KM_USER1);
260 memcpy(dst, src, copy); 260 memcpy(dst, src, copy);
261 kunmap_atomic(dst, KM_USER1); 261 kunmap_atomic(dst, KM_USER1);
262 } 262 }
263 } 263 }
264 264
265 /* 265 /*
266 * Copy n bytes to dst from the brd starting at sector. Does not sleep. 266 * Copy n bytes to dst from the brd starting at sector. Does not sleep.
267 */ 267 */
268 static void copy_from_brd(void *dst, struct brd_device *brd, 268 static void copy_from_brd(void *dst, struct brd_device *brd,
269 sector_t sector, size_t n) 269 sector_t sector, size_t n)
270 { 270 {
271 struct page *page; 271 struct page *page;
272 void *src; 272 void *src;
273 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; 273 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
274 size_t copy; 274 size_t copy;
275 275
276 copy = min_t(size_t, n, PAGE_SIZE - offset); 276 copy = min_t(size_t, n, PAGE_SIZE - offset);
277 page = brd_lookup_page(brd, sector); 277 page = brd_lookup_page(brd, sector);
278 if (page) { 278 if (page) {
279 src = kmap_atomic(page, KM_USER1); 279 src = kmap_atomic(page, KM_USER1);
280 memcpy(dst, src + offset, copy); 280 memcpy(dst, src + offset, copy);
281 kunmap_atomic(src, KM_USER1); 281 kunmap_atomic(src, KM_USER1);
282 } else 282 } else
283 memset(dst, 0, copy); 283 memset(dst, 0, copy);
284 284
285 if (copy < n) { 285 if (copy < n) {
286 dst += copy; 286 dst += copy;
287 sector += copy >> SECTOR_SHIFT; 287 sector += copy >> SECTOR_SHIFT;
288 copy = n - copy; 288 copy = n - copy;
289 page = brd_lookup_page(brd, sector); 289 page = brd_lookup_page(brd, sector);
290 if (page) { 290 if (page) {
291 src = kmap_atomic(page, KM_USER1); 291 src = kmap_atomic(page, KM_USER1);
292 memcpy(dst, src, copy); 292 memcpy(dst, src, copy);
293 kunmap_atomic(src, KM_USER1); 293 kunmap_atomic(src, KM_USER1);
294 } else 294 } else
295 memset(dst, 0, copy); 295 memset(dst, 0, copy);
296 } 296 }
297 } 297 }
298 298
299 /* 299 /*
300 * Process a single bvec of a bio. 300 * Process a single bvec of a bio.
301 */ 301 */
302 static int brd_do_bvec(struct brd_device *brd, struct page *page, 302 static int brd_do_bvec(struct brd_device *brd, struct page *page,
303 unsigned int len, unsigned int off, int rw, 303 unsigned int len, unsigned int off, int rw,
304 sector_t sector) 304 sector_t sector)
305 { 305 {
306 void *mem; 306 void *mem;
307 int err = 0; 307 int err = 0;
308 308
309 if (rw != READ) { 309 if (rw != READ) {
310 err = copy_to_brd_setup(brd, sector, len); 310 err = copy_to_brd_setup(brd, sector, len);
311 if (err) 311 if (err)
312 goto out; 312 goto out;
313 } 313 }
314 314
315 mem = kmap_atomic(page, KM_USER0); 315 mem = kmap_atomic(page, KM_USER0);
316 if (rw == READ) { 316 if (rw == READ) {
317 copy_from_brd(mem + off, brd, sector, len); 317 copy_from_brd(mem + off, brd, sector, len);
318 flush_dcache_page(page); 318 flush_dcache_page(page);
319 } else { 319 } else {
320 flush_dcache_page(page); 320 flush_dcache_page(page);
321 copy_to_brd(brd, mem + off, sector, len); 321 copy_to_brd(brd, mem + off, sector, len);
322 } 322 }
323 kunmap_atomic(mem, KM_USER0); 323 kunmap_atomic(mem, KM_USER0);
324 324
325 out: 325 out:
326 return err; 326 return err;
327 } 327 }
328 328
329 static int brd_make_request(struct request_queue *q, struct bio *bio) 329 static int brd_make_request(struct request_queue *q, struct bio *bio)
330 { 330 {
331 struct block_device *bdev = bio->bi_bdev; 331 struct block_device *bdev = bio->bi_bdev;
332 struct brd_device *brd = bdev->bd_disk->private_data; 332 struct brd_device *brd = bdev->bd_disk->private_data;
333 int rw; 333 int rw;
334 struct bio_vec *bvec; 334 struct bio_vec *bvec;
335 sector_t sector; 335 sector_t sector;
336 int i; 336 int i;
337 int err = -EIO; 337 int err = -EIO;
338 338
339 sector = bio->bi_sector; 339 sector = bio->bi_sector;
340 if (sector + (bio->bi_size >> SECTOR_SHIFT) > 340 if (sector + (bio->bi_size >> SECTOR_SHIFT) >
341 get_capacity(bdev->bd_disk)) 341 get_capacity(bdev->bd_disk))
342 goto out; 342 goto out;
343 343
344 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 344 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
345 err = 0; 345 err = 0;
346 discard_from_brd(brd, sector, bio->bi_size); 346 discard_from_brd(brd, sector, bio->bi_size);
347 goto out; 347 goto out;
348 } 348 }
349 349
350 rw = bio_rw(bio); 350 rw = bio_rw(bio);
351 if (rw == READA) 351 if (rw == READA)
352 rw = READ; 352 rw = READ;
353 353
354 bio_for_each_segment(bvec, bio, i) { 354 bio_for_each_segment(bvec, bio, i) {
355 unsigned int len = bvec->bv_len; 355 unsigned int len = bvec->bv_len;
356 err = brd_do_bvec(brd, bvec->bv_page, len, 356 err = brd_do_bvec(brd, bvec->bv_page, len,
357 bvec->bv_offset, rw, sector); 357 bvec->bv_offset, rw, sector);
358 if (err) 358 if (err)
359 break; 359 break;
360 sector += len >> SECTOR_SHIFT; 360 sector += len >> SECTOR_SHIFT;
361 } 361 }
362 362
363 out: 363 out:
364 bio_endio(bio, err); 364 bio_endio(bio, err);
365 365
366 return 0; 366 return 0;
367 } 367 }
368 368
369 #ifdef CONFIG_BLK_DEV_XIP 369 #ifdef CONFIG_BLK_DEV_XIP
370 static int brd_direct_access(struct block_device *bdev, sector_t sector, 370 static int brd_direct_access(struct block_device *bdev, sector_t sector,
371 void **kaddr, unsigned long *pfn) 371 void **kaddr, unsigned long *pfn)
372 { 372 {
373 struct brd_device *brd = bdev->bd_disk->private_data; 373 struct brd_device *brd = bdev->bd_disk->private_data;
374 struct page *page; 374 struct page *page;
375 375
376 if (!brd) 376 if (!brd)
377 return -ENODEV; 377 return -ENODEV;
378 if (sector & (PAGE_SECTORS-1)) 378 if (sector & (PAGE_SECTORS-1))
379 return -EINVAL; 379 return -EINVAL;
380 if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk)) 380 if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk))
381 return -ERANGE; 381 return -ERANGE;
382 page = brd_insert_page(brd, sector); 382 page = brd_insert_page(brd, sector);
383 if (!page) 383 if (!page)
384 return -ENOMEM; 384 return -ENOMEM;
385 *kaddr = page_address(page); 385 *kaddr = page_address(page);
386 *pfn = page_to_pfn(page); 386 *pfn = page_to_pfn(page);
387 387
388 return 0; 388 return 0;
389 } 389 }
390 #endif 390 #endif
391 391
392 static int brd_ioctl(struct block_device *bdev, fmode_t mode, 392 static int brd_ioctl(struct block_device *bdev, fmode_t mode,
393 unsigned int cmd, unsigned long arg) 393 unsigned int cmd, unsigned long arg)
394 { 394 {
395 int error; 395 int error;
396 struct brd_device *brd = bdev->bd_disk->private_data; 396 struct brd_device *brd = bdev->bd_disk->private_data;
397 397
398 if (cmd != BLKFLSBUF) 398 if (cmd != BLKFLSBUF)
399 return -ENOTTY; 399 return -ENOTTY;
400 400
401 /* 401 /*
402 * ram device BLKFLSBUF has special semantics, we want to actually 402 * ram device BLKFLSBUF has special semantics, we want to actually
403 * release and destroy the ramdisk data. 403 * release and destroy the ramdisk data.
404 */ 404 */
405 lock_kernel(); 405 lock_kernel();
406 mutex_lock(&bdev->bd_mutex); 406 mutex_lock(&bdev->bd_mutex);
407 error = -EBUSY; 407 error = -EBUSY;
408 if (bdev->bd_openers <= 1) { 408 if (bdev->bd_openers <= 1) {
409 /* 409 /*
410 * Invalidate the cache first, so it isn't written 410 * Invalidate the cache first, so it isn't written
411 * back to the device. 411 * back to the device.
412 * 412 *
413 * Another thread might instantiate more buffercache here, 413 * Another thread might instantiate more buffercache here,
414 * but there is not much we can do to close that race. 414 * but there is not much we can do to close that race.
415 */ 415 */
416 invalidate_bh_lrus(); 416 invalidate_bh_lrus();
417 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 417 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
418 brd_free_pages(brd); 418 brd_free_pages(brd);
419 error = 0; 419 error = 0;
420 } 420 }
421 mutex_unlock(&bdev->bd_mutex); 421 mutex_unlock(&bdev->bd_mutex);
422 unlock_kernel(); 422 unlock_kernel();
423 423
424 return error; 424 return error;
425 } 425 }
426 426
427 static const struct block_device_operations brd_fops = { 427 static const struct block_device_operations brd_fops = {
428 .owner = THIS_MODULE, 428 .owner = THIS_MODULE,
429 .ioctl = brd_ioctl, 429 .ioctl = brd_ioctl,
430 #ifdef CONFIG_BLK_DEV_XIP 430 #ifdef CONFIG_BLK_DEV_XIP
431 .direct_access = brd_direct_access, 431 .direct_access = brd_direct_access,
432 #endif 432 #endif
433 }; 433 };
434 434
435 /* 435 /*
436 * And now the modules code and kernel interface. 436 * And now the modules code and kernel interface.
437 */ 437 */
438 static int rd_nr; 438 static int rd_nr;
439 int rd_size = CONFIG_BLK_DEV_RAM_SIZE; 439 int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
440 static int max_part; 440 static int max_part;
441 static int part_shift; 441 static int part_shift;
442 module_param(rd_nr, int, 0); 442 module_param(rd_nr, int, 0);
443 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); 443 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
444 module_param(rd_size, int, 0); 444 module_param(rd_size, int, 0);
445 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); 445 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
446 module_param(max_part, int, 0); 446 module_param(max_part, int, 0);
447 MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk"); 447 MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
448 MODULE_LICENSE("GPL"); 448 MODULE_LICENSE("GPL");
449 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); 449 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
450 MODULE_ALIAS("rd"); 450 MODULE_ALIAS("rd");
451 451
452 #ifndef MODULE 452 #ifndef MODULE
453 /* Legacy boot options - nonmodular */ 453 /* Legacy boot options - nonmodular */
454 static int __init ramdisk_size(char *str) 454 static int __init ramdisk_size(char *str)
455 { 455 {
456 rd_size = simple_strtol(str, NULL, 0); 456 rd_size = simple_strtol(str, NULL, 0);
457 return 1; 457 return 1;
458 } 458 }
459 __setup("ramdisk_size=", ramdisk_size); 459 __setup("ramdisk_size=", ramdisk_size);
460 #endif 460 #endif
461 461
462 /* 462 /*
463 * The device scheme is derived from loop.c. Keep them in synch where possible 463 * The device scheme is derived from loop.c. Keep them in synch where possible
464 * (should share code eventually). 464 * (should share code eventually).
465 */ 465 */
466 static LIST_HEAD(brd_devices); 466 static LIST_HEAD(brd_devices);
467 static DEFINE_MUTEX(brd_devices_mutex); 467 static DEFINE_MUTEX(brd_devices_mutex);
468 468
469 static struct brd_device *brd_alloc(int i) 469 static struct brd_device *brd_alloc(int i)
470 { 470 {
471 struct brd_device *brd; 471 struct brd_device *brd;
472 struct gendisk *disk; 472 struct gendisk *disk;
473 473
474 brd = kzalloc(sizeof(*brd), GFP_KERNEL); 474 brd = kzalloc(sizeof(*brd), GFP_KERNEL);
475 if (!brd) 475 if (!brd)
476 goto out; 476 goto out;
477 brd->brd_number = i; 477 brd->brd_number = i;
478 spin_lock_init(&brd->brd_lock); 478 spin_lock_init(&brd->brd_lock);
479 INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); 479 INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
480 480
481 brd->brd_queue = blk_alloc_queue(GFP_KERNEL); 481 brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
482 if (!brd->brd_queue) 482 if (!brd->brd_queue)
483 goto out_free_dev; 483 goto out_free_dev;
484 blk_queue_make_request(brd->brd_queue, brd_make_request); 484 blk_queue_make_request(brd->brd_queue, brd_make_request);
485 blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN);
486 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 485 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
487 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); 486 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
488 487
489 brd->brd_queue->limits.discard_granularity = PAGE_SIZE; 488 brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
490 brd->brd_queue->limits.max_discard_sectors = UINT_MAX; 489 brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
491 brd->brd_queue->limits.discard_zeroes_data = 1; 490 brd->brd_queue->limits.discard_zeroes_data = 1;
492 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); 491 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
493 492
494 disk = brd->brd_disk = alloc_disk(1 << part_shift); 493 disk = brd->brd_disk = alloc_disk(1 << part_shift);
495 if (!disk) 494 if (!disk)
496 goto out_free_queue; 495 goto out_free_queue;
497 disk->major = RAMDISK_MAJOR; 496 disk->major = RAMDISK_MAJOR;
498 disk->first_minor = i << part_shift; 497 disk->first_minor = i << part_shift;
499 disk->fops = &brd_fops; 498 disk->fops = &brd_fops;
500 disk->private_data = brd; 499 disk->private_data = brd;
501 disk->queue = brd->brd_queue; 500 disk->queue = brd->brd_queue;
502 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; 501 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
503 sprintf(disk->disk_name, "ram%d", i); 502 sprintf(disk->disk_name, "ram%d", i);
504 set_capacity(disk, rd_size * 2); 503 set_capacity(disk, rd_size * 2);
505 504
506 return brd; 505 return brd;
507 506
508 out_free_queue: 507 out_free_queue:
509 blk_cleanup_queue(brd->brd_queue); 508 blk_cleanup_queue(brd->brd_queue);
510 out_free_dev: 509 out_free_dev:
511 kfree(brd); 510 kfree(brd);
512 out: 511 out:
513 return NULL; 512 return NULL;
514 } 513 }
515 514
516 static void brd_free(struct brd_device *brd) 515 static void brd_free(struct brd_device *brd)
517 { 516 {
518 put_disk(brd->brd_disk); 517 put_disk(brd->brd_disk);
519 blk_cleanup_queue(brd->brd_queue); 518 blk_cleanup_queue(brd->brd_queue);
520 brd_free_pages(brd); 519 brd_free_pages(brd);
521 kfree(brd); 520 kfree(brd);
522 } 521 }
523 522
524 static struct brd_device *brd_init_one(int i) 523 static struct brd_device *brd_init_one(int i)
525 { 524 {
526 struct brd_device *brd; 525 struct brd_device *brd;
527 526
528 list_for_each_entry(brd, &brd_devices, brd_list) { 527 list_for_each_entry(brd, &brd_devices, brd_list) {
529 if (brd->brd_number == i) 528 if (brd->brd_number == i)
530 goto out; 529 goto out;
531 } 530 }
532 531
533 brd = brd_alloc(i); 532 brd = brd_alloc(i);
534 if (brd) { 533 if (brd) {
535 add_disk(brd->brd_disk); 534 add_disk(brd->brd_disk);
536 list_add_tail(&brd->brd_list, &brd_devices); 535 list_add_tail(&brd->brd_list, &brd_devices);
537 } 536 }
538 out: 537 out:
539 return brd; 538 return brd;
540 } 539 }
541 540
542 static void brd_del_one(struct brd_device *brd) 541 static void brd_del_one(struct brd_device *brd)
543 { 542 {
544 list_del(&brd->brd_list); 543 list_del(&brd->brd_list);
545 del_gendisk(brd->brd_disk); 544 del_gendisk(brd->brd_disk);
546 brd_free(brd); 545 brd_free(brd);
547 } 546 }
548 547
549 static struct kobject *brd_probe(dev_t dev, int *part, void *data) 548 static struct kobject *brd_probe(dev_t dev, int *part, void *data)
550 { 549 {
551 struct brd_device *brd; 550 struct brd_device *brd;
552 struct kobject *kobj; 551 struct kobject *kobj;
553 552
554 mutex_lock(&brd_devices_mutex); 553 mutex_lock(&brd_devices_mutex);
555 brd = brd_init_one(dev & MINORMASK); 554 brd = brd_init_one(dev & MINORMASK);
556 kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); 555 kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM);
557 mutex_unlock(&brd_devices_mutex); 556 mutex_unlock(&brd_devices_mutex);
558 557
559 *part = 0; 558 *part = 0;
560 return kobj; 559 return kobj;
561 } 560 }
562 561
563 static int __init brd_init(void) 562 static int __init brd_init(void)
564 { 563 {
565 int i, nr; 564 int i, nr;
566 unsigned long range; 565 unsigned long range;
567 struct brd_device *brd, *next; 566 struct brd_device *brd, *next;
568 567
569 /* 568 /*
570 * brd module now has a feature to instantiate underlying device 569 * brd module now has a feature to instantiate underlying device
571 * structure on-demand, provided that there is an access dev node. 570 * structure on-demand, provided that there is an access dev node.
572 * However, this will not work well with user space tool that doesn't 571 * However, this will not work well with user space tool that doesn't
573 * know about such "feature". In order to not break any existing 572 * know about such "feature". In order to not break any existing
574 * tool, we do the following: 573 * tool, we do the following:
575 * 574 *
576 * (1) if rd_nr is specified, create that many upfront, and this 575 * (1) if rd_nr is specified, create that many upfront, and this
577 * also becomes a hard limit. 576 * also becomes a hard limit.
578 * (2) if rd_nr is not specified, create 1 rd device on module 577 * (2) if rd_nr is not specified, create 1 rd device on module
579 * load, user can further extend brd device by create dev node 578 * load, user can further extend brd device by create dev node
580 * themselves and have kernel automatically instantiate actual 579 * themselves and have kernel automatically instantiate actual
581 * device on-demand. 580 * device on-demand.
582 */ 581 */
583 582
584 part_shift = 0; 583 part_shift = 0;
585 if (max_part > 0) 584 if (max_part > 0)
586 part_shift = fls(max_part); 585 part_shift = fls(max_part);
587 586
588 if (rd_nr > 1UL << (MINORBITS - part_shift)) 587 if (rd_nr > 1UL << (MINORBITS - part_shift))
589 return -EINVAL; 588 return -EINVAL;
590 589
591 if (rd_nr) { 590 if (rd_nr) {
592 nr = rd_nr; 591 nr = rd_nr;
593 range = rd_nr; 592 range = rd_nr;
594 } else { 593 } else {
595 nr = CONFIG_BLK_DEV_RAM_COUNT; 594 nr = CONFIG_BLK_DEV_RAM_COUNT;
596 range = 1UL << (MINORBITS - part_shift); 595 range = 1UL << (MINORBITS - part_shift);
597 } 596 }
598 597
599 if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) 598 if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
600 return -EIO; 599 return -EIO;
601 600
602 for (i = 0; i < nr; i++) { 601 for (i = 0; i < nr; i++) {
603 brd = brd_alloc(i); 602 brd = brd_alloc(i);
604 if (!brd) 603 if (!brd)
605 goto out_free; 604 goto out_free;
606 list_add_tail(&brd->brd_list, &brd_devices); 605 list_add_tail(&brd->brd_list, &brd_devices);
607 } 606 }
608 607
609 /* point of no return */ 608 /* point of no return */
610 609
611 list_for_each_entry(brd, &brd_devices, brd_list) 610 list_for_each_entry(brd, &brd_devices, brd_list)
612 add_disk(brd->brd_disk); 611 add_disk(brd->brd_disk);
613 612
614 blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range, 613 blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range,
615 THIS_MODULE, brd_probe, NULL, NULL); 614 THIS_MODULE, brd_probe, NULL, NULL);
616 615
617 printk(KERN_INFO "brd: module loaded\n"); 616 printk(KERN_INFO "brd: module loaded\n");
618 return 0; 617 return 0;
619 618
620 out_free: 619 out_free:
621 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { 620 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
622 list_del(&brd->brd_list); 621 list_del(&brd->brd_list);
623 brd_free(brd); 622 brd_free(brd);
624 } 623 }
625 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 624 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
626 625
627 return -ENOMEM; 626 return -ENOMEM;
628 } 627 }
629 628
630 static void __exit brd_exit(void) 629 static void __exit brd_exit(void)
631 { 630 {
632 unsigned long range; 631 unsigned long range;
633 struct brd_device *brd, *next; 632 struct brd_device *brd, *next;
634 633
635 range = rd_nr ? rd_nr : 1UL << (MINORBITS - part_shift); 634 range = rd_nr ? rd_nr : 1UL << (MINORBITS - part_shift);
636 635
637 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) 636 list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
638 brd_del_one(brd); 637 brd_del_one(brd);
639 638
640 blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range); 639 blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range);
641 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 640 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
642 } 641 }
643 642
644 module_init(brd_init); 643 module_init(brd_init);
645 module_exit(brd_exit); 644 module_exit(brd_exit);
646 645
647 646
drivers/block/loop.c
1 /* 1 /*
2 * linux/drivers/block/loop.c 2 * linux/drivers/block/loop.c
3 * 3 *
4 * Written by Theodore Ts'o, 3/29/93 4 * Written by Theodore Ts'o, 3/29/93
5 * 5 *
6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is 6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
7 * permitted under the GNU General Public License. 7 * permitted under the GNU General Public License.
8 * 8 *
9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
11 * 11 *
12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
14 * 14 *
15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
16 * 16 *
17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
18 * 18 *
19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
20 * 20 *
21 * Loadable modules and other fixes by AK, 1998 21 * Loadable modules and other fixes by AK, 1998
22 * 22 *
23 * Make real block number available to downstream transfer functions, enables 23 * Make real block number available to downstream transfer functions, enables
24 * CBC (and relatives) mode encryption requiring unique IVs per data block. 24 * CBC (and relatives) mode encryption requiring unique IVs per data block.
25 * Reed H. Petty, rhp@draper.net 25 * Reed H. Petty, rhp@draper.net
26 * 26 *
27 * Maximum number of loop devices now dynamic via max_loop module parameter. 27 * Maximum number of loop devices now dynamic via max_loop module parameter.
28 * Russell Kroll <rkroll@exploits.org> 19990701 28 * Russell Kroll <rkroll@exploits.org> 19990701
29 * 29 *
30 * Maximum number of loop devices when compiled-in now selectable by passing 30 * Maximum number of loop devices when compiled-in now selectable by passing
31 * max_loop=<1-255> to the kernel on boot. 31 * max_loop=<1-255> to the kernel on boot.
32 * Erik I. Bolsรธ, <eriki@himolde.no>, Oct 31, 1999 32 * Erik I. Bolsรธ, <eriki@himolde.no>, Oct 31, 1999
33 * 33 *
34 * Completely rewrite request handling to be make_request_fn style and 34 * Completely rewrite request handling to be make_request_fn style and
35 * non blocking, pushing work to a helper thread. Lots of fixes from 35 * non blocking, pushing work to a helper thread. Lots of fixes from
36 * Al Viro too. 36 * Al Viro too.
37 * Jens Axboe <axboe@suse.de>, Nov 2000 37 * Jens Axboe <axboe@suse.de>, Nov 2000
38 * 38 *
39 * Support up to 256 loop devices 39 * Support up to 256 loop devices
40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
41 * 41 *
42 * Support for falling back on the write file operation when the address space 42 * Support for falling back on the write file operation when the address space
43 * operations write_begin is not available on the backing filesystem. 43 * operations write_begin is not available on the backing filesystem.
44 * Anton Altaparmakov, 16 Feb 2005 44 * Anton Altaparmakov, 16 Feb 2005
45 * 45 *
46 * Still To Fix: 46 * Still To Fix:
47 * - Advisory locking is ignored here. 47 * - Advisory locking is ignored here.
48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN 48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
49 * 49 *
50 */ 50 */
51 51
52 #include <linux/module.h> 52 #include <linux/module.h>
53 #include <linux/moduleparam.h> 53 #include <linux/moduleparam.h>
54 #include <linux/sched.h> 54 #include <linux/sched.h>
55 #include <linux/fs.h> 55 #include <linux/fs.h>
56 #include <linux/file.h> 56 #include <linux/file.h>
57 #include <linux/stat.h> 57 #include <linux/stat.h>
58 #include <linux/errno.h> 58 #include <linux/errno.h>
59 #include <linux/major.h> 59 #include <linux/major.h>
60 #include <linux/wait.h> 60 #include <linux/wait.h>
61 #include <linux/blkdev.h> 61 #include <linux/blkdev.h>
62 #include <linux/blkpg.h> 62 #include <linux/blkpg.h>
63 #include <linux/init.h> 63 #include <linux/init.h>
64 #include <linux/swap.h> 64 #include <linux/swap.h>
65 #include <linux/slab.h> 65 #include <linux/slab.h>
66 #include <linux/loop.h> 66 #include <linux/loop.h>
67 #include <linux/compat.h> 67 #include <linux/compat.h>
68 #include <linux/suspend.h> 68 #include <linux/suspend.h>
69 #include <linux/freezer.h> 69 #include <linux/freezer.h>
70 #include <linux/smp_lock.h> 70 #include <linux/smp_lock.h>
71 #include <linux/writeback.h> 71 #include <linux/writeback.h>
72 #include <linux/buffer_head.h> /* for invalidate_bdev() */ 72 #include <linux/buffer_head.h> /* for invalidate_bdev() */
73 #include <linux/completion.h> 73 #include <linux/completion.h>
74 #include <linux/highmem.h> 74 #include <linux/highmem.h>
75 #include <linux/kthread.h> 75 #include <linux/kthread.h>
76 #include <linux/splice.h> 76 #include <linux/splice.h>
77 77
78 #include <asm/uaccess.h> 78 #include <asm/uaccess.h>
79 79
80 static LIST_HEAD(loop_devices); 80 static LIST_HEAD(loop_devices);
81 static DEFINE_MUTEX(loop_devices_mutex); 81 static DEFINE_MUTEX(loop_devices_mutex);
82 82
83 static int max_part; 83 static int max_part;
84 static int part_shift; 84 static int part_shift;
85 85
86 /* 86 /*
87 * Transfer functions 87 * Transfer functions
88 */ 88 */
89 static int transfer_none(struct loop_device *lo, int cmd, 89 static int transfer_none(struct loop_device *lo, int cmd,
90 struct page *raw_page, unsigned raw_off, 90 struct page *raw_page, unsigned raw_off,
91 struct page *loop_page, unsigned loop_off, 91 struct page *loop_page, unsigned loop_off,
92 int size, sector_t real_block) 92 int size, sector_t real_block)
93 { 93 {
94 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 94 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
95 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 95 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
96 96
97 if (cmd == READ) 97 if (cmd == READ)
98 memcpy(loop_buf, raw_buf, size); 98 memcpy(loop_buf, raw_buf, size);
99 else 99 else
100 memcpy(raw_buf, loop_buf, size); 100 memcpy(raw_buf, loop_buf, size);
101 101
102 kunmap_atomic(raw_buf, KM_USER0); 102 kunmap_atomic(raw_buf, KM_USER0);
103 kunmap_atomic(loop_buf, KM_USER1); 103 kunmap_atomic(loop_buf, KM_USER1);
104 cond_resched(); 104 cond_resched();
105 return 0; 105 return 0;
106 } 106 }
107 107
108 static int transfer_xor(struct loop_device *lo, int cmd, 108 static int transfer_xor(struct loop_device *lo, int cmd,
109 struct page *raw_page, unsigned raw_off, 109 struct page *raw_page, unsigned raw_off,
110 struct page *loop_page, unsigned loop_off, 110 struct page *loop_page, unsigned loop_off,
111 int size, sector_t real_block) 111 int size, sector_t real_block)
112 { 112 {
113 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 113 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
114 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 114 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
115 char *in, *out, *key; 115 char *in, *out, *key;
116 int i, keysize; 116 int i, keysize;
117 117
118 if (cmd == READ) { 118 if (cmd == READ) {
119 in = raw_buf; 119 in = raw_buf;
120 out = loop_buf; 120 out = loop_buf;
121 } else { 121 } else {
122 in = loop_buf; 122 in = loop_buf;
123 out = raw_buf; 123 out = raw_buf;
124 } 124 }
125 125
126 key = lo->lo_encrypt_key; 126 key = lo->lo_encrypt_key;
127 keysize = lo->lo_encrypt_key_size; 127 keysize = lo->lo_encrypt_key_size;
128 for (i = 0; i < size; i++) 128 for (i = 0; i < size; i++)
129 *out++ = *in++ ^ key[(i & 511) % keysize]; 129 *out++ = *in++ ^ key[(i & 511) % keysize];
130 130
131 kunmap_atomic(raw_buf, KM_USER0); 131 kunmap_atomic(raw_buf, KM_USER0);
132 kunmap_atomic(loop_buf, KM_USER1); 132 kunmap_atomic(loop_buf, KM_USER1);
133 cond_resched(); 133 cond_resched();
134 return 0; 134 return 0;
135 } 135 }
136 136
137 static int xor_init(struct loop_device *lo, const struct loop_info64 *info) 137 static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
138 { 138 {
139 if (unlikely(info->lo_encrypt_key_size <= 0)) 139 if (unlikely(info->lo_encrypt_key_size <= 0))
140 return -EINVAL; 140 return -EINVAL;
141 return 0; 141 return 0;
142 } 142 }
143 143
144 static struct loop_func_table none_funcs = { 144 static struct loop_func_table none_funcs = {
145 .number = LO_CRYPT_NONE, 145 .number = LO_CRYPT_NONE,
146 .transfer = transfer_none, 146 .transfer = transfer_none,
147 }; 147 };
148 148
149 static struct loop_func_table xor_funcs = { 149 static struct loop_func_table xor_funcs = {
150 .number = LO_CRYPT_XOR, 150 .number = LO_CRYPT_XOR,
151 .transfer = transfer_xor, 151 .transfer = transfer_xor,
152 .init = xor_init 152 .init = xor_init
153 }; 153 };
154 154
155 /* xfer_funcs[0] is special - its release function is never called */ 155 /* xfer_funcs[0] is special - its release function is never called */
156 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { 156 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
157 &none_funcs, 157 &none_funcs,
158 &xor_funcs 158 &xor_funcs
159 }; 159 };
160 160
161 static loff_t get_loop_size(struct loop_device *lo, struct file *file) 161 static loff_t get_loop_size(struct loop_device *lo, struct file *file)
162 { 162 {
163 loff_t size, offset, loopsize; 163 loff_t size, offset, loopsize;
164 164
165 /* Compute loopsize in bytes */ 165 /* Compute loopsize in bytes */
166 size = i_size_read(file->f_mapping->host); 166 size = i_size_read(file->f_mapping->host);
167 offset = lo->lo_offset; 167 offset = lo->lo_offset;
168 loopsize = size - offset; 168 loopsize = size - offset;
169 if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) 169 if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
170 loopsize = lo->lo_sizelimit; 170 loopsize = lo->lo_sizelimit;
171 171
172 /* 172 /*
173 * Unfortunately, if we want to do I/O on the device, 173 * Unfortunately, if we want to do I/O on the device,
174 * the number of 512-byte sectors has to fit into a sector_t. 174 * the number of 512-byte sectors has to fit into a sector_t.
175 */ 175 */
176 return loopsize >> 9; 176 return loopsize >> 9;
177 } 177 }
178 178
179 static int 179 static int
180 figure_loop_size(struct loop_device *lo) 180 figure_loop_size(struct loop_device *lo)
181 { 181 {
182 loff_t size = get_loop_size(lo, lo->lo_backing_file); 182 loff_t size = get_loop_size(lo, lo->lo_backing_file);
183 sector_t x = (sector_t)size; 183 sector_t x = (sector_t)size;
184 184
185 if (unlikely((loff_t)x != size)) 185 if (unlikely((loff_t)x != size))
186 return -EFBIG; 186 return -EFBIG;
187 187
188 set_capacity(lo->lo_disk, x); 188 set_capacity(lo->lo_disk, x);
189 return 0; 189 return 0;
190 } 190 }
191 191
192 static inline int 192 static inline int
193 lo_do_transfer(struct loop_device *lo, int cmd, 193 lo_do_transfer(struct loop_device *lo, int cmd,
194 struct page *rpage, unsigned roffs, 194 struct page *rpage, unsigned roffs,
195 struct page *lpage, unsigned loffs, 195 struct page *lpage, unsigned loffs,
196 int size, sector_t rblock) 196 int size, sector_t rblock)
197 { 197 {
198 if (unlikely(!lo->transfer)) 198 if (unlikely(!lo->transfer))
199 return 0; 199 return 0;
200 200
201 return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); 201 return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
202 } 202 }
203 203
204 /** 204 /**
205 * do_lo_send_aops - helper for writing data to a loop device 205 * do_lo_send_aops - helper for writing data to a loop device
206 * 206 *
207 * This is the fast version for backing filesystems which implement the address 207 * This is the fast version for backing filesystems which implement the address
208 * space operations write_begin and write_end. 208 * space operations write_begin and write_end.
209 */ 209 */
210 static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, 210 static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
211 loff_t pos, struct page *unused) 211 loff_t pos, struct page *unused)
212 { 212 {
213 struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ 213 struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
214 struct address_space *mapping = file->f_mapping; 214 struct address_space *mapping = file->f_mapping;
215 pgoff_t index; 215 pgoff_t index;
216 unsigned offset, bv_offs; 216 unsigned offset, bv_offs;
217 int len, ret; 217 int len, ret;
218 218
219 mutex_lock(&mapping->host->i_mutex); 219 mutex_lock(&mapping->host->i_mutex);
220 index = pos >> PAGE_CACHE_SHIFT; 220 index = pos >> PAGE_CACHE_SHIFT;
221 offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); 221 offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
222 bv_offs = bvec->bv_offset; 222 bv_offs = bvec->bv_offset;
223 len = bvec->bv_len; 223 len = bvec->bv_len;
224 while (len > 0) { 224 while (len > 0) {
225 sector_t IV; 225 sector_t IV;
226 unsigned size, copied; 226 unsigned size, copied;
227 int transfer_result; 227 int transfer_result;
228 struct page *page; 228 struct page *page;
229 void *fsdata; 229 void *fsdata;
230 230
231 IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); 231 IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
232 size = PAGE_CACHE_SIZE - offset; 232 size = PAGE_CACHE_SIZE - offset;
233 if (size > len) 233 if (size > len)
234 size = len; 234 size = len;
235 235
236 ret = pagecache_write_begin(file, mapping, pos, size, 0, 236 ret = pagecache_write_begin(file, mapping, pos, size, 0,
237 &page, &fsdata); 237 &page, &fsdata);
238 if (ret) 238 if (ret)
239 goto fail; 239 goto fail;
240 240
241 file_update_time(file); 241 file_update_time(file);
242 242
243 transfer_result = lo_do_transfer(lo, WRITE, page, offset, 243 transfer_result = lo_do_transfer(lo, WRITE, page, offset,
244 bvec->bv_page, bv_offs, size, IV); 244 bvec->bv_page, bv_offs, size, IV);
245 copied = size; 245 copied = size;
246 if (unlikely(transfer_result)) 246 if (unlikely(transfer_result))
247 copied = 0; 247 copied = 0;
248 248
249 ret = pagecache_write_end(file, mapping, pos, size, copied, 249 ret = pagecache_write_end(file, mapping, pos, size, copied,
250 page, fsdata); 250 page, fsdata);
251 if (ret < 0 || ret != copied) 251 if (ret < 0 || ret != copied)
252 goto fail; 252 goto fail;
253 253
254 if (unlikely(transfer_result)) 254 if (unlikely(transfer_result))
255 goto fail; 255 goto fail;
256 256
257 bv_offs += copied; 257 bv_offs += copied;
258 len -= copied; 258 len -= copied;
259 offset = 0; 259 offset = 0;
260 index++; 260 index++;
261 pos += copied; 261 pos += copied;
262 } 262 }
263 ret = 0; 263 ret = 0;
264 out: 264 out:
265 mutex_unlock(&mapping->host->i_mutex); 265 mutex_unlock(&mapping->host->i_mutex);
266 return ret; 266 return ret;
267 fail: 267 fail:
268 ret = -1; 268 ret = -1;
269 goto out; 269 goto out;
270 } 270 }
271 271
272 /** 272 /**
273 * __do_lo_send_write - helper for writing data to a loop device 273 * __do_lo_send_write - helper for writing data to a loop device
274 * 274 *
275 * This helper just factors out common code between do_lo_send_direct_write() 275 * This helper just factors out common code between do_lo_send_direct_write()
276 * and do_lo_send_write(). 276 * and do_lo_send_write().
277 */ 277 */
278 static int __do_lo_send_write(struct file *file, 278 static int __do_lo_send_write(struct file *file,
279 u8 *buf, const int len, loff_t pos) 279 u8 *buf, const int len, loff_t pos)
280 { 280 {
281 ssize_t bw; 281 ssize_t bw;
282 mm_segment_t old_fs = get_fs(); 282 mm_segment_t old_fs = get_fs();
283 283
284 set_fs(get_ds()); 284 set_fs(get_ds());
285 bw = file->f_op->write(file, buf, len, &pos); 285 bw = file->f_op->write(file, buf, len, &pos);
286 set_fs(old_fs); 286 set_fs(old_fs);
287 if (likely(bw == len)) 287 if (likely(bw == len))
288 return 0; 288 return 0;
289 printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", 289 printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
290 (unsigned long long)pos, len); 290 (unsigned long long)pos, len);
291 if (bw >= 0) 291 if (bw >= 0)
292 bw = -EIO; 292 bw = -EIO;
293 return bw; 293 return bw;
294 } 294 }
295 295
296 /** 296 /**
297 * do_lo_send_direct_write - helper for writing data to a loop device 297 * do_lo_send_direct_write - helper for writing data to a loop device
298 * 298 *
299 * This is the fast, non-transforming version for backing filesystems which do 299 * This is the fast, non-transforming version for backing filesystems which do
300 * not implement the address space operations write_begin and write_end. 300 * not implement the address space operations write_begin and write_end.
301 * It uses the write file operation which should be present on all writeable 301 * It uses the write file operation which should be present on all writeable
302 * filesystems. 302 * filesystems.
303 */ 303 */
304 static int do_lo_send_direct_write(struct loop_device *lo, 304 static int do_lo_send_direct_write(struct loop_device *lo,
305 struct bio_vec *bvec, loff_t pos, struct page *page) 305 struct bio_vec *bvec, loff_t pos, struct page *page)
306 { 306 {
307 ssize_t bw = __do_lo_send_write(lo->lo_backing_file, 307 ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
308 kmap(bvec->bv_page) + bvec->bv_offset, 308 kmap(bvec->bv_page) + bvec->bv_offset,
309 bvec->bv_len, pos); 309 bvec->bv_len, pos);
310 kunmap(bvec->bv_page); 310 kunmap(bvec->bv_page);
311 cond_resched(); 311 cond_resched();
312 return bw; 312 return bw;
313 } 313 }
314 314
315 /** 315 /**
316 * do_lo_send_write - helper for writing data to a loop device 316 * do_lo_send_write - helper for writing data to a loop device
317 * 317 *
318 * This is the slow, transforming version for filesystems which do not 318 * This is the slow, transforming version for filesystems which do not
319 * implement the address space operations write_begin and write_end. It 319 * implement the address space operations write_begin and write_end. It
320 * uses the write file operation which should be present on all writeable 320 * uses the write file operation which should be present on all writeable
321 * filesystems. 321 * filesystems.
322 * 322 *
323 * Using fops->write is slower than using aops->{prepare,commit}_write in the 323 * Using fops->write is slower than using aops->{prepare,commit}_write in the
324 * transforming case because we need to double buffer the data as we cannot do 324 * transforming case because we need to double buffer the data as we cannot do
325 * the transformations in place as we do not have direct access to the 325 * the transformations in place as we do not have direct access to the
326 * destination pages of the backing file. 326 * destination pages of the backing file.
327 */ 327 */
328 static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, 328 static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
329 loff_t pos, struct page *page) 329 loff_t pos, struct page *page)
330 { 330 {
331 int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, 331 int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
332 bvec->bv_offset, bvec->bv_len, pos >> 9); 332 bvec->bv_offset, bvec->bv_len, pos >> 9);
333 if (likely(!ret)) 333 if (likely(!ret))
334 return __do_lo_send_write(lo->lo_backing_file, 334 return __do_lo_send_write(lo->lo_backing_file,
335 page_address(page), bvec->bv_len, 335 page_address(page), bvec->bv_len,
336 pos); 336 pos);
337 printk(KERN_ERR "loop: Transfer error at byte offset %llu, " 337 printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
338 "length %i.\n", (unsigned long long)pos, bvec->bv_len); 338 "length %i.\n", (unsigned long long)pos, bvec->bv_len);
339 if (ret > 0) 339 if (ret > 0)
340 ret = -EIO; 340 ret = -EIO;
341 return ret; 341 return ret;
342 } 342 }
343 343
344 static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) 344 static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
345 { 345 {
346 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 346 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
347 struct page *page); 347 struct page *page);
348 struct bio_vec *bvec; 348 struct bio_vec *bvec;
349 struct page *page = NULL; 349 struct page *page = NULL;
350 int i, ret = 0; 350 int i, ret = 0;
351 351
352 do_lo_send = do_lo_send_aops; 352 do_lo_send = do_lo_send_aops;
353 if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { 353 if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
354 do_lo_send = do_lo_send_direct_write; 354 do_lo_send = do_lo_send_direct_write;
355 if (lo->transfer != transfer_none) { 355 if (lo->transfer != transfer_none) {
356 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 356 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
357 if (unlikely(!page)) 357 if (unlikely(!page))
358 goto fail; 358 goto fail;
359 kmap(page); 359 kmap(page);
360 do_lo_send = do_lo_send_write; 360 do_lo_send = do_lo_send_write;
361 } 361 }
362 } 362 }
363 bio_for_each_segment(bvec, bio, i) { 363 bio_for_each_segment(bvec, bio, i) {
364 ret = do_lo_send(lo, bvec, pos, page); 364 ret = do_lo_send(lo, bvec, pos, page);
365 if (ret < 0) 365 if (ret < 0)
366 break; 366 break;
367 pos += bvec->bv_len; 367 pos += bvec->bv_len;
368 } 368 }
369 if (page) { 369 if (page) {
370 kunmap(page); 370 kunmap(page);
371 __free_page(page); 371 __free_page(page);
372 } 372 }
373 out: 373 out:
374 return ret; 374 return ret;
375 fail: 375 fail:
376 printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); 376 printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
377 ret = -ENOMEM; 377 ret = -ENOMEM;
378 goto out; 378 goto out;
379 } 379 }
380 380
381 struct lo_read_data { 381 struct lo_read_data {
382 struct loop_device *lo; 382 struct loop_device *lo;
383 struct page *page; 383 struct page *page;
384 unsigned offset; 384 unsigned offset;
385 int bsize; 385 int bsize;
386 }; 386 };
387 387
388 static int 388 static int
389 lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 389 lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
390 struct splice_desc *sd) 390 struct splice_desc *sd)
391 { 391 {
392 struct lo_read_data *p = sd->u.data; 392 struct lo_read_data *p = sd->u.data;
393 struct loop_device *lo = p->lo; 393 struct loop_device *lo = p->lo;
394 struct page *page = buf->page; 394 struct page *page = buf->page;
395 sector_t IV; 395 sector_t IV;
396 int size, ret; 396 int size, ret;
397 397
398 ret = buf->ops->confirm(pipe, buf); 398 ret = buf->ops->confirm(pipe, buf);
399 if (unlikely(ret)) 399 if (unlikely(ret))
400 return ret; 400 return ret;
401 401
402 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + 402 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
403 (buf->offset >> 9); 403 (buf->offset >> 9);
404 size = sd->len; 404 size = sd->len;
405 if (size > p->bsize) 405 if (size > p->bsize)
406 size = p->bsize; 406 size = p->bsize;
407 407
408 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { 408 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
409 printk(KERN_ERR "loop: transfer error block %ld\n", 409 printk(KERN_ERR "loop: transfer error block %ld\n",
410 page->index); 410 page->index);
411 size = -EINVAL; 411 size = -EINVAL;
412 } 412 }
413 413
414 flush_dcache_page(p->page); 414 flush_dcache_page(p->page);
415 415
416 if (size > 0) 416 if (size > 0)
417 p->offset += size; 417 p->offset += size;
418 418
419 return size; 419 return size;
420 } 420 }
421 421
422 static int 422 static int
423 lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) 423 lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
424 { 424 {
425 return __splice_from_pipe(pipe, sd, lo_splice_actor); 425 return __splice_from_pipe(pipe, sd, lo_splice_actor);
426 } 426 }
427 427
428 static int 428 static int
429 do_lo_receive(struct loop_device *lo, 429 do_lo_receive(struct loop_device *lo,
430 struct bio_vec *bvec, int bsize, loff_t pos) 430 struct bio_vec *bvec, int bsize, loff_t pos)
431 { 431 {
432 struct lo_read_data cookie; 432 struct lo_read_data cookie;
433 struct splice_desc sd; 433 struct splice_desc sd;
434 struct file *file; 434 struct file *file;
435 long retval; 435 long retval;
436 436
437 cookie.lo = lo; 437 cookie.lo = lo;
438 cookie.page = bvec->bv_page; 438 cookie.page = bvec->bv_page;
439 cookie.offset = bvec->bv_offset; 439 cookie.offset = bvec->bv_offset;
440 cookie.bsize = bsize; 440 cookie.bsize = bsize;
441 441
442 sd.len = 0; 442 sd.len = 0;
443 sd.total_len = bvec->bv_len; 443 sd.total_len = bvec->bv_len;
444 sd.flags = 0; 444 sd.flags = 0;
445 sd.pos = pos; 445 sd.pos = pos;
446 sd.u.data = &cookie; 446 sd.u.data = &cookie;
447 447
448 file = lo->lo_backing_file; 448 file = lo->lo_backing_file;
449 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); 449 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
450 450
451 if (retval < 0) 451 if (retval < 0)
452 return retval; 452 return retval;
453 453
454 return 0; 454 return 0;
455 } 455 }
456 456
457 static int 457 static int
458 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 458 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
459 { 459 {
460 struct bio_vec *bvec; 460 struct bio_vec *bvec;
461 int i, ret = 0; 461 int i, ret = 0;
462 462
463 bio_for_each_segment(bvec, bio, i) { 463 bio_for_each_segment(bvec, bio, i) {
464 ret = do_lo_receive(lo, bvec, bsize, pos); 464 ret = do_lo_receive(lo, bvec, bsize, pos);
465 if (ret < 0) 465 if (ret < 0)
466 break; 466 break;
467 pos += bvec->bv_len; 467 pos += bvec->bv_len;
468 } 468 }
469 return ret; 469 return ret;
470 } 470 }
471 471
472 static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) 472 static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
473 { 473 {
474 loff_t pos; 474 loff_t pos;
475 int ret; 475 int ret;
476 476
477 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; 477 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
478 478
479 if (bio_rw(bio) == WRITE) { 479 if (bio_rw(bio) == WRITE) {
480 bool barrier = (bio->bi_rw & REQ_HARDBARRIER); 480 bool barrier = (bio->bi_rw & REQ_HARDBARRIER);
481 struct file *file = lo->lo_backing_file; 481 struct file *file = lo->lo_backing_file;
482 482
483 if (barrier) { 483 if (barrier) {
484 if (unlikely(!file->f_op->fsync)) { 484 if (unlikely(!file->f_op->fsync)) {
485 ret = -EOPNOTSUPP; 485 ret = -EOPNOTSUPP;
486 goto out; 486 goto out;
487 } 487 }
488 488
489 ret = vfs_fsync(file, 0); 489 ret = vfs_fsync(file, 0);
490 if (unlikely(ret)) { 490 if (unlikely(ret)) {
491 ret = -EIO; 491 ret = -EIO;
492 goto out; 492 goto out;
493 } 493 }
494 } 494 }
495 495
496 ret = lo_send(lo, bio, pos); 496 ret = lo_send(lo, bio, pos);
497 497
498 if (barrier && !ret) { 498 if (barrier && !ret) {
499 ret = vfs_fsync(file, 0); 499 ret = vfs_fsync(file, 0);
500 if (unlikely(ret)) 500 if (unlikely(ret))
501 ret = -EIO; 501 ret = -EIO;
502 } 502 }
503 } else 503 } else
504 ret = lo_receive(lo, bio, lo->lo_blocksize, pos); 504 ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
505 505
506 out: 506 out:
507 return ret; 507 return ret;
508 } 508 }
509 509
510 /* 510 /*
511 * Add bio to back of pending list 511 * Add bio to back of pending list
512 */ 512 */
513 static void loop_add_bio(struct loop_device *lo, struct bio *bio) 513 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
514 { 514 {
515 bio_list_add(&lo->lo_bio_list, bio); 515 bio_list_add(&lo->lo_bio_list, bio);
516 } 516 }
517 517
518 /* 518 /*
519 * Grab first pending buffer 519 * Grab first pending buffer
520 */ 520 */
521 static struct bio *loop_get_bio(struct loop_device *lo) 521 static struct bio *loop_get_bio(struct loop_device *lo)
522 { 522 {
523 return bio_list_pop(&lo->lo_bio_list); 523 return bio_list_pop(&lo->lo_bio_list);
524 } 524 }
525 525
526 static int loop_make_request(struct request_queue *q, struct bio *old_bio) 526 static int loop_make_request(struct request_queue *q, struct bio *old_bio)
527 { 527 {
528 struct loop_device *lo = q->queuedata; 528 struct loop_device *lo = q->queuedata;
529 int rw = bio_rw(old_bio); 529 int rw = bio_rw(old_bio);
530 530
531 if (rw == READA) 531 if (rw == READA)
532 rw = READ; 532 rw = READ;
533 533
534 BUG_ON(!lo || (rw != READ && rw != WRITE)); 534 BUG_ON(!lo || (rw != READ && rw != WRITE));
535 535
536 spin_lock_irq(&lo->lo_lock); 536 spin_lock_irq(&lo->lo_lock);
537 if (lo->lo_state != Lo_bound) 537 if (lo->lo_state != Lo_bound)
538 goto out; 538 goto out;
539 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) 539 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
540 goto out; 540 goto out;
541 loop_add_bio(lo, old_bio); 541 loop_add_bio(lo, old_bio);
542 wake_up(&lo->lo_event); 542 wake_up(&lo->lo_event);
543 spin_unlock_irq(&lo->lo_lock); 543 spin_unlock_irq(&lo->lo_lock);
544 return 0; 544 return 0;
545 545
546 out: 546 out:
547 spin_unlock_irq(&lo->lo_lock); 547 spin_unlock_irq(&lo->lo_lock);
548 bio_io_error(old_bio); 548 bio_io_error(old_bio);
549 return 0; 549 return 0;
550 } 550 }
551 551
552 /* 552 /*
553 * kick off io on the underlying address space 553 * kick off io on the underlying address space
554 */ 554 */
555 static void loop_unplug(struct request_queue *q) 555 static void loop_unplug(struct request_queue *q)
556 { 556 {
557 struct loop_device *lo = q->queuedata; 557 struct loop_device *lo = q->queuedata;
558 558
559 queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); 559 queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
560 blk_run_address_space(lo->lo_backing_file->f_mapping); 560 blk_run_address_space(lo->lo_backing_file->f_mapping);
561 } 561 }
562 562
563 struct switch_request { 563 struct switch_request {
564 struct file *file; 564 struct file *file;
565 struct completion wait; 565 struct completion wait;
566 }; 566 };
567 567
568 static void do_loop_switch(struct loop_device *, struct switch_request *); 568 static void do_loop_switch(struct loop_device *, struct switch_request *);
569 569
570 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) 570 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
571 { 571 {
572 if (unlikely(!bio->bi_bdev)) { 572 if (unlikely(!bio->bi_bdev)) {
573 do_loop_switch(lo, bio->bi_private); 573 do_loop_switch(lo, bio->bi_private);
574 bio_put(bio); 574 bio_put(bio);
575 } else { 575 } else {
576 int ret = do_bio_filebacked(lo, bio); 576 int ret = do_bio_filebacked(lo, bio);
577 bio_endio(bio, ret); 577 bio_endio(bio, ret);
578 } 578 }
579 } 579 }
580 580
581 /* 581 /*
582 * worker thread that handles reads/writes to file backed loop devices, 582 * worker thread that handles reads/writes to file backed loop devices,
583 * to avoid blocking in our make_request_fn. it also does loop decrypting 583 * to avoid blocking in our make_request_fn. it also does loop decrypting
584 * on reads for block backed loop, as that is too heavy to do from 584 * on reads for block backed loop, as that is too heavy to do from
585 * b_end_io context where irqs may be disabled. 585 * b_end_io context where irqs may be disabled.
586 * 586 *
587 * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before 587 * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before
588 * calling kthread_stop(). Therefore once kthread_should_stop() is 588 * calling kthread_stop(). Therefore once kthread_should_stop() is
589 * true, make_request will not place any more requests. Therefore 589 * true, make_request will not place any more requests. Therefore
590 * once kthread_should_stop() is true and lo_bio is NULL, we are 590 * once kthread_should_stop() is true and lo_bio is NULL, we are
591 * done with the loop. 591 * done with the loop.
592 */ 592 */
593 static int loop_thread(void *data) 593 static int loop_thread(void *data)
594 { 594 {
595 struct loop_device *lo = data; 595 struct loop_device *lo = data;
596 struct bio *bio; 596 struct bio *bio;
597 597
598 set_user_nice(current, -20); 598 set_user_nice(current, -20);
599 599
600 while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { 600 while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
601 601
602 wait_event_interruptible(lo->lo_event, 602 wait_event_interruptible(lo->lo_event,
603 !bio_list_empty(&lo->lo_bio_list) || 603 !bio_list_empty(&lo->lo_bio_list) ||
604 kthread_should_stop()); 604 kthread_should_stop());
605 605
606 if (bio_list_empty(&lo->lo_bio_list)) 606 if (bio_list_empty(&lo->lo_bio_list))
607 continue; 607 continue;
608 spin_lock_irq(&lo->lo_lock); 608 spin_lock_irq(&lo->lo_lock);
609 bio = loop_get_bio(lo); 609 bio = loop_get_bio(lo);
610 spin_unlock_irq(&lo->lo_lock); 610 spin_unlock_irq(&lo->lo_lock);
611 611
612 BUG_ON(!bio); 612 BUG_ON(!bio);
613 loop_handle_bio(lo, bio); 613 loop_handle_bio(lo, bio);
614 } 614 }
615 615
616 return 0; 616 return 0;
617 } 617 }
618 618
619 /* 619 /*
620 * loop_switch performs the hard work of switching a backing store. 620 * loop_switch performs the hard work of switching a backing store.
621 * First it needs to flush existing IO, it does this by sending a magic 621 * First it needs to flush existing IO, it does this by sending a magic
622 * BIO down the pipe. The completion of this BIO does the actual switch. 622 * BIO down the pipe. The completion of this BIO does the actual switch.
623 */ 623 */
624 static int loop_switch(struct loop_device *lo, struct file *file) 624 static int loop_switch(struct loop_device *lo, struct file *file)
625 { 625 {
626 struct switch_request w; 626 struct switch_request w;
627 struct bio *bio = bio_alloc(GFP_KERNEL, 0); 627 struct bio *bio = bio_alloc(GFP_KERNEL, 0);
628 if (!bio) 628 if (!bio)
629 return -ENOMEM; 629 return -ENOMEM;
630 init_completion(&w.wait); 630 init_completion(&w.wait);
631 w.file = file; 631 w.file = file;
632 bio->bi_private = &w; 632 bio->bi_private = &w;
633 bio->bi_bdev = NULL; 633 bio->bi_bdev = NULL;
634 loop_make_request(lo->lo_queue, bio); 634 loop_make_request(lo->lo_queue, bio);
635 wait_for_completion(&w.wait); 635 wait_for_completion(&w.wait);
636 return 0; 636 return 0;
637 } 637 }
638 638
639 /* 639 /*
640 * Helper to flush the IOs in loop, but keeping loop thread running 640 * Helper to flush the IOs in loop, but keeping loop thread running
641 */ 641 */
642 static int loop_flush(struct loop_device *lo) 642 static int loop_flush(struct loop_device *lo)
643 { 643 {
644 /* loop not yet configured, no running thread, nothing to flush */ 644 /* loop not yet configured, no running thread, nothing to flush */
645 if (!lo->lo_thread) 645 if (!lo->lo_thread)
646 return 0; 646 return 0;
647 647
648 return loop_switch(lo, NULL); 648 return loop_switch(lo, NULL);
649 } 649 }
650 650
651 /* 651 /*
652 * Do the actual switch; called from the BIO completion routine 652 * Do the actual switch; called from the BIO completion routine
653 */ 653 */
654 static void do_loop_switch(struct loop_device *lo, struct switch_request *p) 654 static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
655 { 655 {
656 struct file *file = p->file; 656 struct file *file = p->file;
657 struct file *old_file = lo->lo_backing_file; 657 struct file *old_file = lo->lo_backing_file;
658 struct address_space *mapping; 658 struct address_space *mapping;
659 659
660 /* if no new file, only flush of queued bios requested */ 660 /* if no new file, only flush of queued bios requested */
661 if (!file) 661 if (!file)
662 goto out; 662 goto out;
663 663
664 mapping = file->f_mapping; 664 mapping = file->f_mapping;
665 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); 665 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
666 lo->lo_backing_file = file; 666 lo->lo_backing_file = file;
667 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? 667 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
668 mapping->host->i_bdev->bd_block_size : PAGE_SIZE; 668 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
669 lo->old_gfp_mask = mapping_gfp_mask(mapping); 669 lo->old_gfp_mask = mapping_gfp_mask(mapping);
670 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 670 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
671 out: 671 out:
672 complete(&p->wait); 672 complete(&p->wait);
673 } 673 }
674 674
675 675
676 /* 676 /*
677 * loop_change_fd switched the backing store of a loopback device to 677 * loop_change_fd switched the backing store of a loopback device to
678 * a new file. This is useful for operating system installers to free up 678 * a new file. This is useful for operating system installers to free up
679 * the original file and in High Availability environments to switch to 679 * the original file and in High Availability environments to switch to
680 * an alternative location for the content in case of server meltdown. 680 * an alternative location for the content in case of server meltdown.
681 * This can only work if the loop device is used read-only, and if the 681 * This can only work if the loop device is used read-only, and if the
682 * new backing store is the same size and type as the old backing store. 682 * new backing store is the same size and type as the old backing store.
683 */ 683 */
684 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, 684 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
685 unsigned int arg) 685 unsigned int arg)
686 { 686 {
687 struct file *file, *old_file; 687 struct file *file, *old_file;
688 struct inode *inode; 688 struct inode *inode;
689 int error; 689 int error;
690 690
691 error = -ENXIO; 691 error = -ENXIO;
692 if (lo->lo_state != Lo_bound) 692 if (lo->lo_state != Lo_bound)
693 goto out; 693 goto out;
694 694
695 /* the loop device has to be read-only */ 695 /* the loop device has to be read-only */
696 error = -EINVAL; 696 error = -EINVAL;
697 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) 697 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
698 goto out; 698 goto out;
699 699
700 error = -EBADF; 700 error = -EBADF;
701 file = fget(arg); 701 file = fget(arg);
702 if (!file) 702 if (!file)
703 goto out; 703 goto out;
704 704
705 inode = file->f_mapping->host; 705 inode = file->f_mapping->host;
706 old_file = lo->lo_backing_file; 706 old_file = lo->lo_backing_file;
707 707
708 error = -EINVAL; 708 error = -EINVAL;
709 709
710 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 710 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
711 goto out_putf; 711 goto out_putf;
712 712
713 /* size of the new backing store needs to be the same */ 713 /* size of the new backing store needs to be the same */
714 if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) 714 if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
715 goto out_putf; 715 goto out_putf;
716 716
717 /* and ... switch */ 717 /* and ... switch */
718 error = loop_switch(lo, file); 718 error = loop_switch(lo, file);
719 if (error) 719 if (error)
720 goto out_putf; 720 goto out_putf;
721 721
722 fput(old_file); 722 fput(old_file);
723 if (max_part > 0) 723 if (max_part > 0)
724 ioctl_by_bdev(bdev, BLKRRPART, 0); 724 ioctl_by_bdev(bdev, BLKRRPART, 0);
725 return 0; 725 return 0;
726 726
727 out_putf: 727 out_putf:
728 fput(file); 728 fput(file);
729 out: 729 out:
730 return error; 730 return error;
731 } 731 }
732 732
733 static inline int is_loop_device(struct file *file) 733 static inline int is_loop_device(struct file *file)
734 { 734 {
735 struct inode *i = file->f_mapping->host; 735 struct inode *i = file->f_mapping->host;
736 736
737 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; 737 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
738 } 738 }
739 739
740 static int loop_set_fd(struct loop_device *lo, fmode_t mode, 740 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
741 struct block_device *bdev, unsigned int arg) 741 struct block_device *bdev, unsigned int arg)
742 { 742 {
743 struct file *file, *f; 743 struct file *file, *f;
744 struct inode *inode; 744 struct inode *inode;
745 struct address_space *mapping; 745 struct address_space *mapping;
746 unsigned lo_blocksize; 746 unsigned lo_blocksize;
747 int lo_flags = 0; 747 int lo_flags = 0;
748 int error; 748 int error;
749 loff_t size; 749 loff_t size;
750 750
751 /* This is safe, since we have a reference from open(). */ 751 /* This is safe, since we have a reference from open(). */
752 __module_get(THIS_MODULE); 752 __module_get(THIS_MODULE);
753 753
754 error = -EBADF; 754 error = -EBADF;
755 file = fget(arg); 755 file = fget(arg);
756 if (!file) 756 if (!file)
757 goto out; 757 goto out;
758 758
759 error = -EBUSY; 759 error = -EBUSY;
760 if (lo->lo_state != Lo_unbound) 760 if (lo->lo_state != Lo_unbound)
761 goto out_putf; 761 goto out_putf;
762 762
763 /* Avoid recursion */ 763 /* Avoid recursion */
764 f = file; 764 f = file;
765 while (is_loop_device(f)) { 765 while (is_loop_device(f)) {
766 struct loop_device *l; 766 struct loop_device *l;
767 767
768 if (f->f_mapping->host->i_bdev == bdev) 768 if (f->f_mapping->host->i_bdev == bdev)
769 goto out_putf; 769 goto out_putf;
770 770
771 l = f->f_mapping->host->i_bdev->bd_disk->private_data; 771 l = f->f_mapping->host->i_bdev->bd_disk->private_data;
772 if (l->lo_state == Lo_unbound) { 772 if (l->lo_state == Lo_unbound) {
773 error = -EINVAL; 773 error = -EINVAL;
774 goto out_putf; 774 goto out_putf;
775 } 775 }
776 f = l->lo_backing_file; 776 f = l->lo_backing_file;
777 } 777 }
778 778
779 mapping = file->f_mapping; 779 mapping = file->f_mapping;
780 inode = mapping->host; 780 inode = mapping->host;
781 781
782 if (!(file->f_mode & FMODE_WRITE)) 782 if (!(file->f_mode & FMODE_WRITE))
783 lo_flags |= LO_FLAGS_READ_ONLY; 783 lo_flags |= LO_FLAGS_READ_ONLY;
784 784
785 error = -EINVAL; 785 error = -EINVAL;
786 if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { 786 if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
787 const struct address_space_operations *aops = mapping->a_ops; 787 const struct address_space_operations *aops = mapping->a_ops;
788 788
789 if (aops->write_begin) 789 if (aops->write_begin)
790 lo_flags |= LO_FLAGS_USE_AOPS; 790 lo_flags |= LO_FLAGS_USE_AOPS;
791 if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) 791 if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
792 lo_flags |= LO_FLAGS_READ_ONLY; 792 lo_flags |= LO_FLAGS_READ_ONLY;
793 793
794 lo_blocksize = S_ISBLK(inode->i_mode) ? 794 lo_blocksize = S_ISBLK(inode->i_mode) ?
795 inode->i_bdev->bd_block_size : PAGE_SIZE; 795 inode->i_bdev->bd_block_size : PAGE_SIZE;
796 796
797 error = 0; 797 error = 0;
798 } else { 798 } else {
799 goto out_putf; 799 goto out_putf;
800 } 800 }
801 801
802 size = get_loop_size(lo, file); 802 size = get_loop_size(lo, file);
803 803
804 if ((loff_t)(sector_t)size != size) { 804 if ((loff_t)(sector_t)size != size) {
805 error = -EFBIG; 805 error = -EFBIG;
806 goto out_putf; 806 goto out_putf;
807 } 807 }
808 808
809 if (!(mode & FMODE_WRITE)) 809 if (!(mode & FMODE_WRITE))
810 lo_flags |= LO_FLAGS_READ_ONLY; 810 lo_flags |= LO_FLAGS_READ_ONLY;
811 811
812 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); 812 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
813 813
814 lo->lo_blocksize = lo_blocksize; 814 lo->lo_blocksize = lo_blocksize;
815 lo->lo_device = bdev; 815 lo->lo_device = bdev;
816 lo->lo_flags = lo_flags; 816 lo->lo_flags = lo_flags;
817 lo->lo_backing_file = file; 817 lo->lo_backing_file = file;
818 lo->transfer = transfer_none; 818 lo->transfer = transfer_none;
819 lo->ioctl = NULL; 819 lo->ioctl = NULL;
820 lo->lo_sizelimit = 0; 820 lo->lo_sizelimit = 0;
821 lo->old_gfp_mask = mapping_gfp_mask(mapping); 821 lo->old_gfp_mask = mapping_gfp_mask(mapping);
822 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 822 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
823 823
824 bio_list_init(&lo->lo_bio_list); 824 bio_list_init(&lo->lo_bio_list);
825 825
826 /* 826 /*
827 * set queue make_request_fn, and add limits based on lower level 827 * set queue make_request_fn, and add limits based on lower level
828 * device 828 * device
829 */ 829 */
830 blk_queue_make_request(lo->lo_queue, loop_make_request); 830 blk_queue_make_request(lo->lo_queue, loop_make_request);
831 lo->lo_queue->queuedata = lo; 831 lo->lo_queue->queuedata = lo;
832 lo->lo_queue->unplug_fn = loop_unplug; 832 lo->lo_queue->unplug_fn = loop_unplug;
833 833
834 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 834 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
835 blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH); 835 blk_queue_flush(lo->lo_queue, REQ_FLUSH);
836 836
837 set_capacity(lo->lo_disk, size); 837 set_capacity(lo->lo_disk, size);
838 bd_set_size(bdev, size << 9); 838 bd_set_size(bdev, size << 9);
839 /* let user-space know about the new size */ 839 /* let user-space know about the new size */
840 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 840 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
841 841
842 set_blocksize(bdev, lo_blocksize); 842 set_blocksize(bdev, lo_blocksize);
843 843
844 lo->lo_thread = kthread_create(loop_thread, lo, "loop%d", 844 lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
845 lo->lo_number); 845 lo->lo_number);
846 if (IS_ERR(lo->lo_thread)) { 846 if (IS_ERR(lo->lo_thread)) {
847 error = PTR_ERR(lo->lo_thread); 847 error = PTR_ERR(lo->lo_thread);
848 goto out_clr; 848 goto out_clr;
849 } 849 }
850 lo->lo_state = Lo_bound; 850 lo->lo_state = Lo_bound;
851 wake_up_process(lo->lo_thread); 851 wake_up_process(lo->lo_thread);
852 if (max_part > 0) 852 if (max_part > 0)
853 ioctl_by_bdev(bdev, BLKRRPART, 0); 853 ioctl_by_bdev(bdev, BLKRRPART, 0);
854 return 0; 854 return 0;
855 855
856 out_clr: 856 out_clr:
857 lo->lo_thread = NULL; 857 lo->lo_thread = NULL;
858 lo->lo_device = NULL; 858 lo->lo_device = NULL;
859 lo->lo_backing_file = NULL; 859 lo->lo_backing_file = NULL;
860 lo->lo_flags = 0; 860 lo->lo_flags = 0;
861 set_capacity(lo->lo_disk, 0); 861 set_capacity(lo->lo_disk, 0);
862 invalidate_bdev(bdev); 862 invalidate_bdev(bdev);
863 bd_set_size(bdev, 0); 863 bd_set_size(bdev, 0);
864 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 864 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
865 mapping_set_gfp_mask(mapping, lo->old_gfp_mask); 865 mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
866 lo->lo_state = Lo_unbound; 866 lo->lo_state = Lo_unbound;
867 out_putf: 867 out_putf:
868 fput(file); 868 fput(file);
869 out: 869 out:
870 /* This is safe: open() is still holding a reference. */ 870 /* This is safe: open() is still holding a reference. */
871 module_put(THIS_MODULE); 871 module_put(THIS_MODULE);
872 return error; 872 return error;
873 } 873 }
874 874
875 static int 875 static int
876 loop_release_xfer(struct loop_device *lo) 876 loop_release_xfer(struct loop_device *lo)
877 { 877 {
878 int err = 0; 878 int err = 0;
879 struct loop_func_table *xfer = lo->lo_encryption; 879 struct loop_func_table *xfer = lo->lo_encryption;
880 880
881 if (xfer) { 881 if (xfer) {
882 if (xfer->release) 882 if (xfer->release)
883 err = xfer->release(lo); 883 err = xfer->release(lo);
884 lo->transfer = NULL; 884 lo->transfer = NULL;
885 lo->lo_encryption = NULL; 885 lo->lo_encryption = NULL;
886 module_put(xfer->owner); 886 module_put(xfer->owner);
887 } 887 }
888 return err; 888 return err;
889 } 889 }
890 890
891 static int 891 static int
892 loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, 892 loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
893 const struct loop_info64 *i) 893 const struct loop_info64 *i)
894 { 894 {
895 int err = 0; 895 int err = 0;
896 896
897 if (xfer) { 897 if (xfer) {
898 struct module *owner = xfer->owner; 898 struct module *owner = xfer->owner;
899 899
900 if (!try_module_get(owner)) 900 if (!try_module_get(owner))
901 return -EINVAL; 901 return -EINVAL;
902 if (xfer->init) 902 if (xfer->init)
903 err = xfer->init(lo, i); 903 err = xfer->init(lo, i);
904 if (err) 904 if (err)
905 module_put(owner); 905 module_put(owner);
906 else 906 else
907 lo->lo_encryption = xfer; 907 lo->lo_encryption = xfer;
908 } 908 }
909 return err; 909 return err;
910 } 910 }
911 911
912 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) 912 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
913 { 913 {
914 struct file *filp = lo->lo_backing_file; 914 struct file *filp = lo->lo_backing_file;
915 gfp_t gfp = lo->old_gfp_mask; 915 gfp_t gfp = lo->old_gfp_mask;
916 916
917 if (lo->lo_state != Lo_bound) 917 if (lo->lo_state != Lo_bound)
918 return -ENXIO; 918 return -ENXIO;
919 919
920 if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ 920 if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
921 return -EBUSY; 921 return -EBUSY;
922 922
923 if (filp == NULL) 923 if (filp == NULL)
924 return -EINVAL; 924 return -EINVAL;
925 925
926 spin_lock_irq(&lo->lo_lock); 926 spin_lock_irq(&lo->lo_lock);
927 lo->lo_state = Lo_rundown; 927 lo->lo_state = Lo_rundown;
928 spin_unlock_irq(&lo->lo_lock); 928 spin_unlock_irq(&lo->lo_lock);
929 929
930 kthread_stop(lo->lo_thread); 930 kthread_stop(lo->lo_thread);
931 931
932 lo->lo_queue->unplug_fn = NULL; 932 lo->lo_queue->unplug_fn = NULL;
933 lo->lo_backing_file = NULL; 933 lo->lo_backing_file = NULL;
934 934
935 loop_release_xfer(lo); 935 loop_release_xfer(lo);
936 lo->transfer = NULL; 936 lo->transfer = NULL;
937 lo->ioctl = NULL; 937 lo->ioctl = NULL;
938 lo->lo_device = NULL; 938 lo->lo_device = NULL;
939 lo->lo_encryption = NULL; 939 lo->lo_encryption = NULL;
940 lo->lo_offset = 0; 940 lo->lo_offset = 0;
941 lo->lo_sizelimit = 0; 941 lo->lo_sizelimit = 0;
942 lo->lo_encrypt_key_size = 0; 942 lo->lo_encrypt_key_size = 0;
943 lo->lo_flags = 0; 943 lo->lo_flags = 0;
944 lo->lo_thread = NULL; 944 lo->lo_thread = NULL;
945 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 945 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
946 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 946 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
947 memset(lo->lo_file_name, 0, LO_NAME_SIZE); 947 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
948 if (bdev) 948 if (bdev)
949 invalidate_bdev(bdev); 949 invalidate_bdev(bdev);
950 set_capacity(lo->lo_disk, 0); 950 set_capacity(lo->lo_disk, 0);
951 if (bdev) { 951 if (bdev) {
952 bd_set_size(bdev, 0); 952 bd_set_size(bdev, 0);
953 /* let user-space know about this change */ 953 /* let user-space know about this change */
954 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 954 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
955 } 955 }
956 mapping_set_gfp_mask(filp->f_mapping, gfp); 956 mapping_set_gfp_mask(filp->f_mapping, gfp);
957 lo->lo_state = Lo_unbound; 957 lo->lo_state = Lo_unbound;
958 /* This is safe: open() is still holding a reference. */ 958 /* This is safe: open() is still holding a reference. */
959 module_put(THIS_MODULE); 959 module_put(THIS_MODULE);
960 if (max_part > 0 && bdev) 960 if (max_part > 0 && bdev)
961 ioctl_by_bdev(bdev, BLKRRPART, 0); 961 ioctl_by_bdev(bdev, BLKRRPART, 0);
962 mutex_unlock(&lo->lo_ctl_mutex); 962 mutex_unlock(&lo->lo_ctl_mutex);
963 /* 963 /*
964 * Need not hold lo_ctl_mutex to fput backing file. 964 * Need not hold lo_ctl_mutex to fput backing file.
965 * Calling fput holding lo_ctl_mutex triggers a circular 965 * Calling fput holding lo_ctl_mutex triggers a circular
966 * lock dependency possibility warning as fput can take 966 * lock dependency possibility warning as fput can take
967 * bd_mutex which is usually taken before lo_ctl_mutex. 967 * bd_mutex which is usually taken before lo_ctl_mutex.
968 */ 968 */
969 fput(filp); 969 fput(filp);
970 return 0; 970 return 0;
971 } 971 }
972 972
973 static int 973 static int
974 loop_set_status(struct loop_device *lo, const struct loop_info64 *info) 974 loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
975 { 975 {
976 int err; 976 int err;
977 struct loop_func_table *xfer; 977 struct loop_func_table *xfer;
978 uid_t uid = current_uid(); 978 uid_t uid = current_uid();
979 979
980 if (lo->lo_encrypt_key_size && 980 if (lo->lo_encrypt_key_size &&
981 lo->lo_key_owner != uid && 981 lo->lo_key_owner != uid &&
982 !capable(CAP_SYS_ADMIN)) 982 !capable(CAP_SYS_ADMIN))
983 return -EPERM; 983 return -EPERM;
984 if (lo->lo_state != Lo_bound) 984 if (lo->lo_state != Lo_bound)
985 return -ENXIO; 985 return -ENXIO;
986 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 986 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
987 return -EINVAL; 987 return -EINVAL;
988 988
989 err = loop_release_xfer(lo); 989 err = loop_release_xfer(lo);
990 if (err) 990 if (err)
991 return err; 991 return err;
992 992
993 if (info->lo_encrypt_type) { 993 if (info->lo_encrypt_type) {
994 unsigned int type = info->lo_encrypt_type; 994 unsigned int type = info->lo_encrypt_type;
995 995
996 if (type >= MAX_LO_CRYPT) 996 if (type >= MAX_LO_CRYPT)
997 return -EINVAL; 997 return -EINVAL;
998 xfer = xfer_funcs[type]; 998 xfer = xfer_funcs[type];
999 if (xfer == NULL) 999 if (xfer == NULL)
1000 return -EINVAL; 1000 return -EINVAL;
1001 } else 1001 } else
1002 xfer = NULL; 1002 xfer = NULL;
1003 1003
1004 err = loop_init_xfer(lo, xfer, info); 1004 err = loop_init_xfer(lo, xfer, info);
1005 if (err) 1005 if (err)
1006 return err; 1006 return err;
1007 1007
1008 if (lo->lo_offset != info->lo_offset || 1008 if (lo->lo_offset != info->lo_offset ||
1009 lo->lo_sizelimit != info->lo_sizelimit) { 1009 lo->lo_sizelimit != info->lo_sizelimit) {
1010 lo->lo_offset = info->lo_offset; 1010 lo->lo_offset = info->lo_offset;
1011 lo->lo_sizelimit = info->lo_sizelimit; 1011 lo->lo_sizelimit = info->lo_sizelimit;
1012 if (figure_loop_size(lo)) 1012 if (figure_loop_size(lo))
1013 return -EFBIG; 1013 return -EFBIG;
1014 } 1014 }
1015 1015
1016 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1016 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1017 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1017 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1018 lo->lo_file_name[LO_NAME_SIZE-1] = 0; 1018 lo->lo_file_name[LO_NAME_SIZE-1] = 0;
1019 lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; 1019 lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
1020 1020
1021 if (!xfer) 1021 if (!xfer)
1022 xfer = &none_funcs; 1022 xfer = &none_funcs;
1023 lo->transfer = xfer->transfer; 1023 lo->transfer = xfer->transfer;
1024 lo->ioctl = xfer->ioctl; 1024 lo->ioctl = xfer->ioctl;
1025 1025
1026 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != 1026 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
1027 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1027 (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1028 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1028 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1029 1029
1030 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1030 lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1031 lo->lo_init[0] = info->lo_init[0]; 1031 lo->lo_init[0] = info->lo_init[0];
1032 lo->lo_init[1] = info->lo_init[1]; 1032 lo->lo_init[1] = info->lo_init[1];
1033 if (info->lo_encrypt_key_size) { 1033 if (info->lo_encrypt_key_size) {
1034 memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, 1034 memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1035 info->lo_encrypt_key_size); 1035 info->lo_encrypt_key_size);
1036 lo->lo_key_owner = uid; 1036 lo->lo_key_owner = uid;
1037 } 1037 }
1038 1038
1039 return 0; 1039 return 0;
1040 } 1040 }
1041 1041
1042 static int 1042 static int
1043 loop_get_status(struct loop_device *lo, struct loop_info64 *info) 1043 loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1044 { 1044 {
1045 struct file *file = lo->lo_backing_file; 1045 struct file *file = lo->lo_backing_file;
1046 struct kstat stat; 1046 struct kstat stat;
1047 int error; 1047 int error;
1048 1048
1049 if (lo->lo_state != Lo_bound) 1049 if (lo->lo_state != Lo_bound)
1050 return -ENXIO; 1050 return -ENXIO;
1051 error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat); 1051 error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
1052 if (error) 1052 if (error)
1053 return error; 1053 return error;
1054 memset(info, 0, sizeof(*info)); 1054 memset(info, 0, sizeof(*info));
1055 info->lo_number = lo->lo_number; 1055 info->lo_number = lo->lo_number;
1056 info->lo_device = huge_encode_dev(stat.dev); 1056 info->lo_device = huge_encode_dev(stat.dev);
1057 info->lo_inode = stat.ino; 1057 info->lo_inode = stat.ino;
1058 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); 1058 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1059 info->lo_offset = lo->lo_offset; 1059 info->lo_offset = lo->lo_offset;
1060 info->lo_sizelimit = lo->lo_sizelimit; 1060 info->lo_sizelimit = lo->lo_sizelimit;
1061 info->lo_flags = lo->lo_flags; 1061 info->lo_flags = lo->lo_flags;
1062 memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); 1062 memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1063 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); 1063 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1064 info->lo_encrypt_type = 1064 info->lo_encrypt_type =
1065 lo->lo_encryption ? lo->lo_encryption->number : 0; 1065 lo->lo_encryption ? lo->lo_encryption->number : 0;
1066 if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { 1066 if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1067 info->lo_encrypt_key_size = lo->lo_encrypt_key_size; 1067 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1068 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, 1068 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1069 lo->lo_encrypt_key_size); 1069 lo->lo_encrypt_key_size);
1070 } 1070 }
1071 return 0; 1071 return 0;
1072 } 1072 }
1073 1073
1074 static void 1074 static void
1075 loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) 1075 loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1076 { 1076 {
1077 memset(info64, 0, sizeof(*info64)); 1077 memset(info64, 0, sizeof(*info64));
1078 info64->lo_number = info->lo_number; 1078 info64->lo_number = info->lo_number;
1079 info64->lo_device = info->lo_device; 1079 info64->lo_device = info->lo_device;
1080 info64->lo_inode = info->lo_inode; 1080 info64->lo_inode = info->lo_inode;
1081 info64->lo_rdevice = info->lo_rdevice; 1081 info64->lo_rdevice = info->lo_rdevice;
1082 info64->lo_offset = info->lo_offset; 1082 info64->lo_offset = info->lo_offset;
1083 info64->lo_sizelimit = 0; 1083 info64->lo_sizelimit = 0;
1084 info64->lo_encrypt_type = info->lo_encrypt_type; 1084 info64->lo_encrypt_type = info->lo_encrypt_type;
1085 info64->lo_encrypt_key_size = info->lo_encrypt_key_size; 1085 info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1086 info64->lo_flags = info->lo_flags; 1086 info64->lo_flags = info->lo_flags;
1087 info64->lo_init[0] = info->lo_init[0]; 1087 info64->lo_init[0] = info->lo_init[0];
1088 info64->lo_init[1] = info->lo_init[1]; 1088 info64->lo_init[1] = info->lo_init[1];
1089 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1089 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1090 memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); 1090 memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1091 else 1091 else
1092 memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); 1092 memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1093 memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); 1093 memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1094 } 1094 }
1095 1095
1096 static int 1096 static int
1097 loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) 1097 loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1098 { 1098 {
1099 memset(info, 0, sizeof(*info)); 1099 memset(info, 0, sizeof(*info));
1100 info->lo_number = info64->lo_number; 1100 info->lo_number = info64->lo_number;
1101 info->lo_device = info64->lo_device; 1101 info->lo_device = info64->lo_device;
1102 info->lo_inode = info64->lo_inode; 1102 info->lo_inode = info64->lo_inode;
1103 info->lo_rdevice = info64->lo_rdevice; 1103 info->lo_rdevice = info64->lo_rdevice;
1104 info->lo_offset = info64->lo_offset; 1104 info->lo_offset = info64->lo_offset;
1105 info->lo_encrypt_type = info64->lo_encrypt_type; 1105 info->lo_encrypt_type = info64->lo_encrypt_type;
1106 info->lo_encrypt_key_size = info64->lo_encrypt_key_size; 1106 info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1107 info->lo_flags = info64->lo_flags; 1107 info->lo_flags = info64->lo_flags;
1108 info->lo_init[0] = info64->lo_init[0]; 1108 info->lo_init[0] = info64->lo_init[0];
1109 info->lo_init[1] = info64->lo_init[1]; 1109 info->lo_init[1] = info64->lo_init[1];
1110 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1110 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1111 memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1111 memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1112 else 1112 else
1113 memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); 1113 memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1114 memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1114 memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1115 1115
1116 /* error in case values were truncated */ 1116 /* error in case values were truncated */
1117 if (info->lo_device != info64->lo_device || 1117 if (info->lo_device != info64->lo_device ||
1118 info->lo_rdevice != info64->lo_rdevice || 1118 info->lo_rdevice != info64->lo_rdevice ||
1119 info->lo_inode != info64->lo_inode || 1119 info->lo_inode != info64->lo_inode ||
1120 info->lo_offset != info64->lo_offset) 1120 info->lo_offset != info64->lo_offset)
1121 return -EOVERFLOW; 1121 return -EOVERFLOW;
1122 1122
1123 return 0; 1123 return 0;
1124 } 1124 }
1125 1125
1126 static int 1126 static int
1127 loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) 1127 loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1128 { 1128 {
1129 struct loop_info info; 1129 struct loop_info info;
1130 struct loop_info64 info64; 1130 struct loop_info64 info64;
1131 1131
1132 if (copy_from_user(&info, arg, sizeof (struct loop_info))) 1132 if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1133 return -EFAULT; 1133 return -EFAULT;
1134 loop_info64_from_old(&info, &info64); 1134 loop_info64_from_old(&info, &info64);
1135 return loop_set_status(lo, &info64); 1135 return loop_set_status(lo, &info64);
1136 } 1136 }
1137 1137
1138 static int 1138 static int
1139 loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) 1139 loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1140 { 1140 {
1141 struct loop_info64 info64; 1141 struct loop_info64 info64;
1142 1142
1143 if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) 1143 if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1144 return -EFAULT; 1144 return -EFAULT;
1145 return loop_set_status(lo, &info64); 1145 return loop_set_status(lo, &info64);
1146 } 1146 }
1147 1147
1148 static int 1148 static int
1149 loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { 1149 loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1150 struct loop_info info; 1150 struct loop_info info;
1151 struct loop_info64 info64; 1151 struct loop_info64 info64;
1152 int err = 0; 1152 int err = 0;
1153 1153
1154 if (!arg) 1154 if (!arg)
1155 err = -EINVAL; 1155 err = -EINVAL;
1156 if (!err) 1156 if (!err)
1157 err = loop_get_status(lo, &info64); 1157 err = loop_get_status(lo, &info64);
1158 if (!err) 1158 if (!err)
1159 err = loop_info64_to_old(&info64, &info); 1159 err = loop_info64_to_old(&info64, &info);
1160 if (!err && copy_to_user(arg, &info, sizeof(info))) 1160 if (!err && copy_to_user(arg, &info, sizeof(info)))
1161 err = -EFAULT; 1161 err = -EFAULT;
1162 1162
1163 return err; 1163 return err;
1164 } 1164 }
1165 1165
1166 static int 1166 static int
1167 loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { 1167 loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1168 struct loop_info64 info64; 1168 struct loop_info64 info64;
1169 int err = 0; 1169 int err = 0;
1170 1170
1171 if (!arg) 1171 if (!arg)
1172 err = -EINVAL; 1172 err = -EINVAL;
1173 if (!err) 1173 if (!err)
1174 err = loop_get_status(lo, &info64); 1174 err = loop_get_status(lo, &info64);
1175 if (!err && copy_to_user(arg, &info64, sizeof(info64))) 1175 if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1176 err = -EFAULT; 1176 err = -EFAULT;
1177 1177
1178 return err; 1178 return err;
1179 } 1179 }
1180 1180
1181 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) 1181 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
1182 { 1182 {
1183 int err; 1183 int err;
1184 sector_t sec; 1184 sector_t sec;
1185 loff_t sz; 1185 loff_t sz;
1186 1186
1187 err = -ENXIO; 1187 err = -ENXIO;
1188 if (unlikely(lo->lo_state != Lo_bound)) 1188 if (unlikely(lo->lo_state != Lo_bound))
1189 goto out; 1189 goto out;
1190 err = figure_loop_size(lo); 1190 err = figure_loop_size(lo);
1191 if (unlikely(err)) 1191 if (unlikely(err))
1192 goto out; 1192 goto out;
1193 sec = get_capacity(lo->lo_disk); 1193 sec = get_capacity(lo->lo_disk);
1194 /* the width of sector_t may be narrow for bit-shift */ 1194 /* the width of sector_t may be narrow for bit-shift */
1195 sz = sec; 1195 sz = sec;
1196 sz <<= 9; 1196 sz <<= 9;
1197 mutex_lock(&bdev->bd_mutex); 1197 mutex_lock(&bdev->bd_mutex);
1198 bd_set_size(bdev, sz); 1198 bd_set_size(bdev, sz);
1199 /* let user-space know about the new size */ 1199 /* let user-space know about the new size */
1200 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1200 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1201 mutex_unlock(&bdev->bd_mutex); 1201 mutex_unlock(&bdev->bd_mutex);
1202 1202
1203 out: 1203 out:
1204 return err; 1204 return err;
1205 } 1205 }
1206 1206
1207 static int lo_ioctl(struct block_device *bdev, fmode_t mode, 1207 static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1208 unsigned int cmd, unsigned long arg) 1208 unsigned int cmd, unsigned long arg)
1209 { 1209 {
1210 struct loop_device *lo = bdev->bd_disk->private_data; 1210 struct loop_device *lo = bdev->bd_disk->private_data;
1211 int err; 1211 int err;
1212 1212
1213 mutex_lock_nested(&lo->lo_ctl_mutex, 1); 1213 mutex_lock_nested(&lo->lo_ctl_mutex, 1);
1214 switch (cmd) { 1214 switch (cmd) {
1215 case LOOP_SET_FD: 1215 case LOOP_SET_FD:
1216 err = loop_set_fd(lo, mode, bdev, arg); 1216 err = loop_set_fd(lo, mode, bdev, arg);
1217 break; 1217 break;
1218 case LOOP_CHANGE_FD: 1218 case LOOP_CHANGE_FD:
1219 err = loop_change_fd(lo, bdev, arg); 1219 err = loop_change_fd(lo, bdev, arg);
1220 break; 1220 break;
1221 case LOOP_CLR_FD: 1221 case LOOP_CLR_FD:
1222 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1222 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1223 err = loop_clr_fd(lo, bdev); 1223 err = loop_clr_fd(lo, bdev);
1224 if (!err) 1224 if (!err)
1225 goto out_unlocked; 1225 goto out_unlocked;
1226 break; 1226 break;
1227 case LOOP_SET_STATUS: 1227 case LOOP_SET_STATUS:
1228 err = loop_set_status_old(lo, (struct loop_info __user *) arg); 1228 err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1229 break; 1229 break;
1230 case LOOP_GET_STATUS: 1230 case LOOP_GET_STATUS:
1231 err = loop_get_status_old(lo, (struct loop_info __user *) arg); 1231 err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1232 break; 1232 break;
1233 case LOOP_SET_STATUS64: 1233 case LOOP_SET_STATUS64:
1234 err = loop_set_status64(lo, (struct loop_info64 __user *) arg); 1234 err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1235 break; 1235 break;
1236 case LOOP_GET_STATUS64: 1236 case LOOP_GET_STATUS64:
1237 err = loop_get_status64(lo, (struct loop_info64 __user *) arg); 1237 err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1238 break; 1238 break;
1239 case LOOP_SET_CAPACITY: 1239 case LOOP_SET_CAPACITY:
1240 err = -EPERM; 1240 err = -EPERM;
1241 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1241 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1242 err = loop_set_capacity(lo, bdev); 1242 err = loop_set_capacity(lo, bdev);
1243 break; 1243 break;
1244 default: 1244 default:
1245 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; 1245 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1246 } 1246 }
1247 mutex_unlock(&lo->lo_ctl_mutex); 1247 mutex_unlock(&lo->lo_ctl_mutex);
1248 1248
1249 out_unlocked: 1249 out_unlocked:
1250 return err; 1250 return err;
1251 } 1251 }
1252 1252
1253 #ifdef CONFIG_COMPAT 1253 #ifdef CONFIG_COMPAT
1254 struct compat_loop_info { 1254 struct compat_loop_info {
1255 compat_int_t lo_number; /* ioctl r/o */ 1255 compat_int_t lo_number; /* ioctl r/o */
1256 compat_dev_t lo_device; /* ioctl r/o */ 1256 compat_dev_t lo_device; /* ioctl r/o */
1257 compat_ulong_t lo_inode; /* ioctl r/o */ 1257 compat_ulong_t lo_inode; /* ioctl r/o */
1258 compat_dev_t lo_rdevice; /* ioctl r/o */ 1258 compat_dev_t lo_rdevice; /* ioctl r/o */
1259 compat_int_t lo_offset; 1259 compat_int_t lo_offset;
1260 compat_int_t lo_encrypt_type; 1260 compat_int_t lo_encrypt_type;
1261 compat_int_t lo_encrypt_key_size; /* ioctl w/o */ 1261 compat_int_t lo_encrypt_key_size; /* ioctl w/o */
1262 compat_int_t lo_flags; /* ioctl r/o */ 1262 compat_int_t lo_flags; /* ioctl r/o */
1263 char lo_name[LO_NAME_SIZE]; 1263 char lo_name[LO_NAME_SIZE];
1264 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ 1264 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1265 compat_ulong_t lo_init[2]; 1265 compat_ulong_t lo_init[2];
1266 char reserved[4]; 1266 char reserved[4];
1267 }; 1267 };
1268 1268
1269 /* 1269 /*
1270 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info 1270 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1271 * - noinlined to reduce stack space usage in main part of driver 1271 * - noinlined to reduce stack space usage in main part of driver
1272 */ 1272 */
1273 static noinline int 1273 static noinline int
1274 loop_info64_from_compat(const struct compat_loop_info __user *arg, 1274 loop_info64_from_compat(const struct compat_loop_info __user *arg,
1275 struct loop_info64 *info64) 1275 struct loop_info64 *info64)
1276 { 1276 {
1277 struct compat_loop_info info; 1277 struct compat_loop_info info;
1278 1278
1279 if (copy_from_user(&info, arg, sizeof(info))) 1279 if (copy_from_user(&info, arg, sizeof(info)))
1280 return -EFAULT; 1280 return -EFAULT;
1281 1281
1282 memset(info64, 0, sizeof(*info64)); 1282 memset(info64, 0, sizeof(*info64));
1283 info64->lo_number = info.lo_number; 1283 info64->lo_number = info.lo_number;
1284 info64->lo_device = info.lo_device; 1284 info64->lo_device = info.lo_device;
1285 info64->lo_inode = info.lo_inode; 1285 info64->lo_inode = info.lo_inode;
1286 info64->lo_rdevice = info.lo_rdevice; 1286 info64->lo_rdevice = info.lo_rdevice;
1287 info64->lo_offset = info.lo_offset; 1287 info64->lo_offset = info.lo_offset;
1288 info64->lo_sizelimit = 0; 1288 info64->lo_sizelimit = 0;
1289 info64->lo_encrypt_type = info.lo_encrypt_type; 1289 info64->lo_encrypt_type = info.lo_encrypt_type;
1290 info64->lo_encrypt_key_size = info.lo_encrypt_key_size; 1290 info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1291 info64->lo_flags = info.lo_flags; 1291 info64->lo_flags = info.lo_flags;
1292 info64->lo_init[0] = info.lo_init[0]; 1292 info64->lo_init[0] = info.lo_init[0];
1293 info64->lo_init[1] = info.lo_init[1]; 1293 info64->lo_init[1] = info.lo_init[1];
1294 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1294 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1295 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); 1295 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1296 else 1296 else
1297 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); 1297 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1298 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); 1298 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1299 return 0; 1299 return 0;
1300 } 1300 }
1301 1301
1302 /* 1302 /*
1303 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace 1303 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1304 * - noinlined to reduce stack space usage in main part of driver 1304 * - noinlined to reduce stack space usage in main part of driver
1305 */ 1305 */
1306 static noinline int 1306 static noinline int
1307 loop_info64_to_compat(const struct loop_info64 *info64, 1307 loop_info64_to_compat(const struct loop_info64 *info64,
1308 struct compat_loop_info __user *arg) 1308 struct compat_loop_info __user *arg)
1309 { 1309 {
1310 struct compat_loop_info info; 1310 struct compat_loop_info info;
1311 1311
1312 memset(&info, 0, sizeof(info)); 1312 memset(&info, 0, sizeof(info));
1313 info.lo_number = info64->lo_number; 1313 info.lo_number = info64->lo_number;
1314 info.lo_device = info64->lo_device; 1314 info.lo_device = info64->lo_device;
1315 info.lo_inode = info64->lo_inode; 1315 info.lo_inode = info64->lo_inode;
1316 info.lo_rdevice = info64->lo_rdevice; 1316 info.lo_rdevice = info64->lo_rdevice;
1317 info.lo_offset = info64->lo_offset; 1317 info.lo_offset = info64->lo_offset;
1318 info.lo_encrypt_type = info64->lo_encrypt_type; 1318 info.lo_encrypt_type = info64->lo_encrypt_type;
1319 info.lo_encrypt_key_size = info64->lo_encrypt_key_size; 1319 info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1320 info.lo_flags = info64->lo_flags; 1320 info.lo_flags = info64->lo_flags;
1321 info.lo_init[0] = info64->lo_init[0]; 1321 info.lo_init[0] = info64->lo_init[0];
1322 info.lo_init[1] = info64->lo_init[1]; 1322 info.lo_init[1] = info64->lo_init[1];
1323 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1323 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1324 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1324 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1325 else 1325 else
1326 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); 1326 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1327 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1327 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1328 1328
1329 /* error in case values were truncated */ 1329 /* error in case values were truncated */
1330 if (info.lo_device != info64->lo_device || 1330 if (info.lo_device != info64->lo_device ||
1331 info.lo_rdevice != info64->lo_rdevice || 1331 info.lo_rdevice != info64->lo_rdevice ||
1332 info.lo_inode != info64->lo_inode || 1332 info.lo_inode != info64->lo_inode ||
1333 info.lo_offset != info64->lo_offset || 1333 info.lo_offset != info64->lo_offset ||
1334 info.lo_init[0] != info64->lo_init[0] || 1334 info.lo_init[0] != info64->lo_init[0] ||
1335 info.lo_init[1] != info64->lo_init[1]) 1335 info.lo_init[1] != info64->lo_init[1])
1336 return -EOVERFLOW; 1336 return -EOVERFLOW;
1337 1337
1338 if (copy_to_user(arg, &info, sizeof(info))) 1338 if (copy_to_user(arg, &info, sizeof(info)))
1339 return -EFAULT; 1339 return -EFAULT;
1340 return 0; 1340 return 0;
1341 } 1341 }
1342 1342
1343 static int 1343 static int
1344 loop_set_status_compat(struct loop_device *lo, 1344 loop_set_status_compat(struct loop_device *lo,
1345 const struct compat_loop_info __user *arg) 1345 const struct compat_loop_info __user *arg)
1346 { 1346 {
1347 struct loop_info64 info64; 1347 struct loop_info64 info64;
1348 int ret; 1348 int ret;
1349 1349
1350 ret = loop_info64_from_compat(arg, &info64); 1350 ret = loop_info64_from_compat(arg, &info64);
1351 if (ret < 0) 1351 if (ret < 0)
1352 return ret; 1352 return ret;
1353 return loop_set_status(lo, &info64); 1353 return loop_set_status(lo, &info64);
1354 } 1354 }
1355 1355
1356 static int 1356 static int
1357 loop_get_status_compat(struct loop_device *lo, 1357 loop_get_status_compat(struct loop_device *lo,
1358 struct compat_loop_info __user *arg) 1358 struct compat_loop_info __user *arg)
1359 { 1359 {
1360 struct loop_info64 info64; 1360 struct loop_info64 info64;
1361 int err = 0; 1361 int err = 0;
1362 1362
1363 if (!arg) 1363 if (!arg)
1364 err = -EINVAL; 1364 err = -EINVAL;
1365 if (!err) 1365 if (!err)
1366 err = loop_get_status(lo, &info64); 1366 err = loop_get_status(lo, &info64);
1367 if (!err) 1367 if (!err)
1368 err = loop_info64_to_compat(&info64, arg); 1368 err = loop_info64_to_compat(&info64, arg);
1369 return err; 1369 return err;
1370 } 1370 }
1371 1371
1372 static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, 1372 static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1373 unsigned int cmd, unsigned long arg) 1373 unsigned int cmd, unsigned long arg)
1374 { 1374 {
1375 struct loop_device *lo = bdev->bd_disk->private_data; 1375 struct loop_device *lo = bdev->bd_disk->private_data;
1376 int err; 1376 int err;
1377 1377
1378 switch(cmd) { 1378 switch(cmd) {
1379 case LOOP_SET_STATUS: 1379 case LOOP_SET_STATUS:
1380 mutex_lock(&lo->lo_ctl_mutex); 1380 mutex_lock(&lo->lo_ctl_mutex);
1381 err = loop_set_status_compat( 1381 err = loop_set_status_compat(
1382 lo, (const struct compat_loop_info __user *) arg); 1382 lo, (const struct compat_loop_info __user *) arg);
1383 mutex_unlock(&lo->lo_ctl_mutex); 1383 mutex_unlock(&lo->lo_ctl_mutex);
1384 break; 1384 break;
1385 case LOOP_GET_STATUS: 1385 case LOOP_GET_STATUS:
1386 mutex_lock(&lo->lo_ctl_mutex); 1386 mutex_lock(&lo->lo_ctl_mutex);
1387 err = loop_get_status_compat( 1387 err = loop_get_status_compat(
1388 lo, (struct compat_loop_info __user *) arg); 1388 lo, (struct compat_loop_info __user *) arg);
1389 mutex_unlock(&lo->lo_ctl_mutex); 1389 mutex_unlock(&lo->lo_ctl_mutex);
1390 break; 1390 break;
1391 case LOOP_SET_CAPACITY: 1391 case LOOP_SET_CAPACITY:
1392 case LOOP_CLR_FD: 1392 case LOOP_CLR_FD:
1393 case LOOP_GET_STATUS64: 1393 case LOOP_GET_STATUS64:
1394 case LOOP_SET_STATUS64: 1394 case LOOP_SET_STATUS64:
1395 arg = (unsigned long) compat_ptr(arg); 1395 arg = (unsigned long) compat_ptr(arg);
1396 case LOOP_SET_FD: 1396 case LOOP_SET_FD:
1397 case LOOP_CHANGE_FD: 1397 case LOOP_CHANGE_FD:
1398 err = lo_ioctl(bdev, mode, cmd, arg); 1398 err = lo_ioctl(bdev, mode, cmd, arg);
1399 break; 1399 break;
1400 default: 1400 default:
1401 err = -ENOIOCTLCMD; 1401 err = -ENOIOCTLCMD;
1402 break; 1402 break;
1403 } 1403 }
1404 return err; 1404 return err;
1405 } 1405 }
1406 #endif 1406 #endif
1407 1407
1408 static int lo_open(struct block_device *bdev, fmode_t mode) 1408 static int lo_open(struct block_device *bdev, fmode_t mode)
1409 { 1409 {
1410 struct loop_device *lo = bdev->bd_disk->private_data; 1410 struct loop_device *lo = bdev->bd_disk->private_data;
1411 1411
1412 lock_kernel(); 1412 lock_kernel();
1413 mutex_lock(&lo->lo_ctl_mutex); 1413 mutex_lock(&lo->lo_ctl_mutex);
1414 lo->lo_refcnt++; 1414 lo->lo_refcnt++;
1415 mutex_unlock(&lo->lo_ctl_mutex); 1415 mutex_unlock(&lo->lo_ctl_mutex);
1416 unlock_kernel(); 1416 unlock_kernel();
1417 1417
1418 return 0; 1418 return 0;
1419 } 1419 }
1420 1420
1421 static int lo_release(struct gendisk *disk, fmode_t mode) 1421 static int lo_release(struct gendisk *disk, fmode_t mode)
1422 { 1422 {
1423 struct loop_device *lo = disk->private_data; 1423 struct loop_device *lo = disk->private_data;
1424 int err; 1424 int err;
1425 1425
1426 lock_kernel(); 1426 lock_kernel();
1427 mutex_lock(&lo->lo_ctl_mutex); 1427 mutex_lock(&lo->lo_ctl_mutex);
1428 1428
1429 if (--lo->lo_refcnt) 1429 if (--lo->lo_refcnt)
1430 goto out; 1430 goto out;
1431 1431
1432 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { 1432 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1433 /* 1433 /*
1434 * In autoclear mode, stop the loop thread 1434 * In autoclear mode, stop the loop thread
1435 * and remove configuration after last close. 1435 * and remove configuration after last close.
1436 */ 1436 */
1437 err = loop_clr_fd(lo, NULL); 1437 err = loop_clr_fd(lo, NULL);
1438 if (!err) 1438 if (!err)
1439 goto out_unlocked; 1439 goto out_unlocked;
1440 } else { 1440 } else {
1441 /* 1441 /*
1442 * Otherwise keep thread (if running) and config, 1442 * Otherwise keep thread (if running) and config,
1443 * but flush possible ongoing bios in thread. 1443 * but flush possible ongoing bios in thread.
1444 */ 1444 */
1445 loop_flush(lo); 1445 loop_flush(lo);
1446 } 1446 }
1447 1447
1448 out: 1448 out:
1449 mutex_unlock(&lo->lo_ctl_mutex); 1449 mutex_unlock(&lo->lo_ctl_mutex);
1450 out_unlocked: 1450 out_unlocked:
1451 lock_kernel(); 1451 lock_kernel();
1452 return 0; 1452 return 0;
1453 } 1453 }
1454 1454
1455 static const struct block_device_operations lo_fops = { 1455 static const struct block_device_operations lo_fops = {
1456 .owner = THIS_MODULE, 1456 .owner = THIS_MODULE,
1457 .open = lo_open, 1457 .open = lo_open,
1458 .release = lo_release, 1458 .release = lo_release,
1459 .ioctl = lo_ioctl, 1459 .ioctl = lo_ioctl,
1460 #ifdef CONFIG_COMPAT 1460 #ifdef CONFIG_COMPAT
1461 .compat_ioctl = lo_compat_ioctl, 1461 .compat_ioctl = lo_compat_ioctl,
1462 #endif 1462 #endif
1463 }; 1463 };
1464 1464
1465 /* 1465 /*
1466 * And now the modules code and kernel interface. 1466 * And now the modules code and kernel interface.
1467 */ 1467 */
1468 static int max_loop; 1468 static int max_loop;
1469 module_param(max_loop, int, 0); 1469 module_param(max_loop, int, 0);
1470 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); 1470 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
1471 module_param(max_part, int, 0); 1471 module_param(max_part, int, 0);
1472 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); 1472 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
1473 MODULE_LICENSE("GPL"); 1473 MODULE_LICENSE("GPL");
1474 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); 1474 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1475 1475
1476 int loop_register_transfer(struct loop_func_table *funcs) 1476 int loop_register_transfer(struct loop_func_table *funcs)
1477 { 1477 {
1478 unsigned int n = funcs->number; 1478 unsigned int n = funcs->number;
1479 1479
1480 if (n >= MAX_LO_CRYPT || xfer_funcs[n]) 1480 if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1481 return -EINVAL; 1481 return -EINVAL;
1482 xfer_funcs[n] = funcs; 1482 xfer_funcs[n] = funcs;
1483 return 0; 1483 return 0;
1484 } 1484 }
1485 1485
1486 int loop_unregister_transfer(int number) 1486 int loop_unregister_transfer(int number)
1487 { 1487 {
1488 unsigned int n = number; 1488 unsigned int n = number;
1489 struct loop_device *lo; 1489 struct loop_device *lo;
1490 struct loop_func_table *xfer; 1490 struct loop_func_table *xfer;
1491 1491
1492 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) 1492 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1493 return -EINVAL; 1493 return -EINVAL;
1494 1494
1495 xfer_funcs[n] = NULL; 1495 xfer_funcs[n] = NULL;
1496 1496
1497 list_for_each_entry(lo, &loop_devices, lo_list) { 1497 list_for_each_entry(lo, &loop_devices, lo_list) {
1498 mutex_lock(&lo->lo_ctl_mutex); 1498 mutex_lock(&lo->lo_ctl_mutex);
1499 1499
1500 if (lo->lo_encryption == xfer) 1500 if (lo->lo_encryption == xfer)
1501 loop_release_xfer(lo); 1501 loop_release_xfer(lo);
1502 1502
1503 mutex_unlock(&lo->lo_ctl_mutex); 1503 mutex_unlock(&lo->lo_ctl_mutex);
1504 } 1504 }
1505 1505
1506 return 0; 1506 return 0;
1507 } 1507 }
1508 1508
1509 EXPORT_SYMBOL(loop_register_transfer); 1509 EXPORT_SYMBOL(loop_register_transfer);
1510 EXPORT_SYMBOL(loop_unregister_transfer); 1510 EXPORT_SYMBOL(loop_unregister_transfer);
1511 1511
1512 static struct loop_device *loop_alloc(int i) 1512 static struct loop_device *loop_alloc(int i)
1513 { 1513 {
1514 struct loop_device *lo; 1514 struct loop_device *lo;
1515 struct gendisk *disk; 1515 struct gendisk *disk;
1516 1516
1517 lo = kzalloc(sizeof(*lo), GFP_KERNEL); 1517 lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1518 if (!lo) 1518 if (!lo)
1519 goto out; 1519 goto out;
1520 1520
1521 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1521 lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1522 if (!lo->lo_queue) 1522 if (!lo->lo_queue)
1523 goto out_free_dev; 1523 goto out_free_dev;
1524 1524
1525 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1525 disk = lo->lo_disk = alloc_disk(1 << part_shift);
1526 if (!disk) 1526 if (!disk)
1527 goto out_free_queue; 1527 goto out_free_queue;
1528 1528
1529 mutex_init(&lo->lo_ctl_mutex); 1529 mutex_init(&lo->lo_ctl_mutex);
1530 lo->lo_number = i; 1530 lo->lo_number = i;
1531 lo->lo_thread = NULL; 1531 lo->lo_thread = NULL;
1532 init_waitqueue_head(&lo->lo_event); 1532 init_waitqueue_head(&lo->lo_event);
1533 spin_lock_init(&lo->lo_lock); 1533 spin_lock_init(&lo->lo_lock);
1534 disk->major = LOOP_MAJOR; 1534 disk->major = LOOP_MAJOR;
1535 disk->first_minor = i << part_shift; 1535 disk->first_minor = i << part_shift;
1536 disk->fops = &lo_fops; 1536 disk->fops = &lo_fops;
1537 disk->private_data = lo; 1537 disk->private_data = lo;
1538 disk->queue = lo->lo_queue; 1538 disk->queue = lo->lo_queue;
1539 sprintf(disk->disk_name, "loop%d", i); 1539 sprintf(disk->disk_name, "loop%d", i);
1540 return lo; 1540 return lo;
1541 1541
1542 out_free_queue: 1542 out_free_queue:
1543 blk_cleanup_queue(lo->lo_queue); 1543 blk_cleanup_queue(lo->lo_queue);
1544 out_free_dev: 1544 out_free_dev:
1545 kfree(lo); 1545 kfree(lo);
1546 out: 1546 out:
1547 return NULL; 1547 return NULL;
1548 } 1548 }
1549 1549
1550 static void loop_free(struct loop_device *lo) 1550 static void loop_free(struct loop_device *lo)
1551 { 1551 {
1552 blk_cleanup_queue(lo->lo_queue); 1552 blk_cleanup_queue(lo->lo_queue);
1553 put_disk(lo->lo_disk); 1553 put_disk(lo->lo_disk);
1554 list_del(&lo->lo_list); 1554 list_del(&lo->lo_list);
1555 kfree(lo); 1555 kfree(lo);
1556 } 1556 }
1557 1557
1558 static struct loop_device *loop_init_one(int i) 1558 static struct loop_device *loop_init_one(int i)
1559 { 1559 {
1560 struct loop_device *lo; 1560 struct loop_device *lo;
1561 1561
1562 list_for_each_entry(lo, &loop_devices, lo_list) { 1562 list_for_each_entry(lo, &loop_devices, lo_list) {
1563 if (lo->lo_number == i) 1563 if (lo->lo_number == i)
1564 return lo; 1564 return lo;
1565 } 1565 }
1566 1566
1567 lo = loop_alloc(i); 1567 lo = loop_alloc(i);
1568 if (lo) { 1568 if (lo) {
1569 add_disk(lo->lo_disk); 1569 add_disk(lo->lo_disk);
1570 list_add_tail(&lo->lo_list, &loop_devices); 1570 list_add_tail(&lo->lo_list, &loop_devices);
1571 } 1571 }
1572 return lo; 1572 return lo;
1573 } 1573 }
1574 1574
1575 static void loop_del_one(struct loop_device *lo) 1575 static void loop_del_one(struct loop_device *lo)
1576 { 1576 {
1577 del_gendisk(lo->lo_disk); 1577 del_gendisk(lo->lo_disk);
1578 loop_free(lo); 1578 loop_free(lo);
1579 } 1579 }
1580 1580
1581 static struct kobject *loop_probe(dev_t dev, int *part, void *data) 1581 static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1582 { 1582 {
1583 struct loop_device *lo; 1583 struct loop_device *lo;
1584 struct kobject *kobj; 1584 struct kobject *kobj;
1585 1585
1586 mutex_lock(&loop_devices_mutex); 1586 mutex_lock(&loop_devices_mutex);
1587 lo = loop_init_one(dev & MINORMASK); 1587 lo = loop_init_one(dev & MINORMASK);
1588 kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); 1588 kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
1589 mutex_unlock(&loop_devices_mutex); 1589 mutex_unlock(&loop_devices_mutex);
1590 1590
1591 *part = 0; 1591 *part = 0;
1592 return kobj; 1592 return kobj;
1593 } 1593 }
1594 1594
1595 static int __init loop_init(void) 1595 static int __init loop_init(void)
1596 { 1596 {
1597 int i, nr; 1597 int i, nr;
1598 unsigned long range; 1598 unsigned long range;
1599 struct loop_device *lo, *next; 1599 struct loop_device *lo, *next;
1600 1600
1601 /* 1601 /*
1602 * loop module now has a feature to instantiate underlying device 1602 * loop module now has a feature to instantiate underlying device
1603 * structure on-demand, provided that there is an access dev node. 1603 * structure on-demand, provided that there is an access dev node.
1604 * However, this will not work well with user space tool that doesn't 1604 * However, this will not work well with user space tool that doesn't
1605 * know about such "feature". In order to not break any existing 1605 * know about such "feature". In order to not break any existing
1606 * tool, we do the following: 1606 * tool, we do the following:
1607 * 1607 *
1608 * (1) if max_loop is specified, create that many upfront, and this 1608 * (1) if max_loop is specified, create that many upfront, and this
1609 * also becomes a hard limit. 1609 * also becomes a hard limit.
1610 * (2) if max_loop is not specified, create 8 loop device on module 1610 * (2) if max_loop is not specified, create 8 loop device on module
1611 * load, user can further extend loop device by create dev node 1611 * load, user can further extend loop device by create dev node
1612 * themselves and have kernel automatically instantiate actual 1612 * themselves and have kernel automatically instantiate actual
1613 * device on-demand. 1613 * device on-demand.
1614 */ 1614 */
1615 1615
1616 part_shift = 0; 1616 part_shift = 0;
1617 if (max_part > 0) 1617 if (max_part > 0)
1618 part_shift = fls(max_part); 1618 part_shift = fls(max_part);
1619 1619
1620 if (max_loop > 1UL << (MINORBITS - part_shift)) 1620 if (max_loop > 1UL << (MINORBITS - part_shift))
1621 return -EINVAL; 1621 return -EINVAL;
1622 1622
1623 if (max_loop) { 1623 if (max_loop) {
1624 nr = max_loop; 1624 nr = max_loop;
1625 range = max_loop; 1625 range = max_loop;
1626 } else { 1626 } else {
1627 nr = 8; 1627 nr = 8;
1628 range = 1UL << (MINORBITS - part_shift); 1628 range = 1UL << (MINORBITS - part_shift);
1629 } 1629 }
1630 1630
1631 if (register_blkdev(LOOP_MAJOR, "loop")) 1631 if (register_blkdev(LOOP_MAJOR, "loop"))
1632 return -EIO; 1632 return -EIO;
1633 1633
1634 for (i = 0; i < nr; i++) { 1634 for (i = 0; i < nr; i++) {
1635 lo = loop_alloc(i); 1635 lo = loop_alloc(i);
1636 if (!lo) 1636 if (!lo)
1637 goto Enomem; 1637 goto Enomem;
1638 list_add_tail(&lo->lo_list, &loop_devices); 1638 list_add_tail(&lo->lo_list, &loop_devices);
1639 } 1639 }
1640 1640
1641 /* point of no return */ 1641 /* point of no return */
1642 1642
1643 list_for_each_entry(lo, &loop_devices, lo_list) 1643 list_for_each_entry(lo, &loop_devices, lo_list)
1644 add_disk(lo->lo_disk); 1644 add_disk(lo->lo_disk);
1645 1645
1646 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1646 blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1647 THIS_MODULE, loop_probe, NULL, NULL); 1647 THIS_MODULE, loop_probe, NULL, NULL);
1648 1648
1649 printk(KERN_INFO "loop: module loaded\n"); 1649 printk(KERN_INFO "loop: module loaded\n");
1650 return 0; 1650 return 0;
1651 1651
1652 Enomem: 1652 Enomem:
1653 printk(KERN_INFO "loop: out of memory\n"); 1653 printk(KERN_INFO "loop: out of memory\n");
1654 1654
1655 list_for_each_entry_safe(lo, next, &loop_devices, lo_list) 1655 list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1656 loop_free(lo); 1656 loop_free(lo);
1657 1657
1658 unregister_blkdev(LOOP_MAJOR, "loop"); 1658 unregister_blkdev(LOOP_MAJOR, "loop");
1659 return -ENOMEM; 1659 return -ENOMEM;
1660 } 1660 }
1661 1661
1662 static void __exit loop_exit(void) 1662 static void __exit loop_exit(void)
1663 { 1663 {
1664 unsigned long range; 1664 unsigned long range;
1665 struct loop_device *lo, *next; 1665 struct loop_device *lo, *next;
1666 1666
1667 range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift); 1667 range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift);
1668 1668
1669 list_for_each_entry_safe(lo, next, &loop_devices, lo_list) 1669 list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1670 loop_del_one(lo); 1670 loop_del_one(lo);
1671 1671
1672 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1672 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1673 unregister_blkdev(LOOP_MAJOR, "loop"); 1673 unregister_blkdev(LOOP_MAJOR, "loop");
1674 } 1674 }
1675 1675
1676 module_init(loop_init); 1676 module_init(loop_init);
1677 module_exit(loop_exit); 1677 module_exit(loop_exit);
1678 1678
1679 #ifndef MODULE 1679 #ifndef MODULE
1680 static int __init max_loop_setup(char *str) 1680 static int __init max_loop_setup(char *str)
1681 { 1681 {
1682 max_loop = simple_strtol(str, NULL, 0); 1682 max_loop = simple_strtol(str, NULL, 0);
1683 return 1; 1683 return 1;
1684 } 1684 }
1685 1685
1686 __setup("max_loop=", max_loop_setup); 1686 __setup("max_loop=", max_loop_setup);
1687 #endif 1687 #endif
1688 1688
drivers/block/osdblk.c
1 1
2 /* 2 /*
3 osdblk.c -- Export a single SCSI OSD object as a Linux block device 3 osdblk.c -- Export a single SCSI OSD object as a Linux block device
4 4
5 5
6 Copyright 2009 Red Hat, Inc. 6 Copyright 2009 Red Hat, Inc.
7 7
8 This program is free software; you can redistribute it and/or modify 8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by 9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation. 10 the Free Software Foundation.
11 11
12 This program is distributed in the hope that it will be useful, 12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details. 15 GNU General Public License for more details.
16 16
17 You should have received a copy of the GNU General Public License 17 You should have received a copy of the GNU General Public License
18 along with this program; see the file COPYING. If not, write to 18 along with this program; see the file COPYING. If not, write to
19 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 19 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 20
21 21
22 Instructions for use 22 Instructions for use
23 -------------------- 23 --------------------
24 24
25 1) Map a Linux block device to an existing OSD object. 25 1) Map a Linux block device to an existing OSD object.
26 26
27 In this example, we will use partition id 1234, object id 5678, 27 In this example, we will use partition id 1234, object id 5678,
28 OSD device /dev/osd1. 28 OSD device /dev/osd1.
29 29
30 $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add 30 $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
31 31
32 32
33 2) List all active blkdev<->object mappings. 33 2) List all active blkdev<->object mappings.
34 34
35 In this example, we have performed step #1 twice, creating two blkdevs, 35 In this example, we have performed step #1 twice, creating two blkdevs,
36 mapped to two separate OSD objects. 36 mapped to two separate OSD objects.
37 37
38 $ cat /sys/class/osdblk/list 38 $ cat /sys/class/osdblk/list
39 0 174 1234 5678 /dev/osd1 39 0 174 1234 5678 /dev/osd1
40 1 179 1994 897123 /dev/osd0 40 1 179 1994 897123 /dev/osd0
41 41
42 The columns, in order, are: 42 The columns, in order, are:
43 - blkdev unique id 43 - blkdev unique id
44 - blkdev assigned major 44 - blkdev assigned major
45 - OSD object partition id 45 - OSD object partition id
46 - OSD object id 46 - OSD object id
47 - OSD device 47 - OSD device
48 48
49 49
50 3) Remove an active blkdev<->object mapping. 50 3) Remove an active blkdev<->object mapping.
51 51
52 In this example, we remove the mapping with blkdev unique id 1. 52 In this example, we remove the mapping with blkdev unique id 1.
53 53
54 $ echo 1 > /sys/class/osdblk/remove 54 $ echo 1 > /sys/class/osdblk/remove
55 55
56 56
57 NOTE: The actual creation and deletion of OSD objects is outside the scope 57 NOTE: The actual creation and deletion of OSD objects is outside the scope
58 of this driver. 58 of this driver.
59 59
60 */ 60 */
61 61
62 #include <linux/kernel.h> 62 #include <linux/kernel.h>
63 #include <linux/device.h> 63 #include <linux/device.h>
64 #include <linux/module.h> 64 #include <linux/module.h>
65 #include <linux/fs.h> 65 #include <linux/fs.h>
66 #include <linux/slab.h> 66 #include <linux/slab.h>
67 #include <scsi/osd_initiator.h> 67 #include <scsi/osd_initiator.h>
68 #include <scsi/osd_attributes.h> 68 #include <scsi/osd_attributes.h>
69 #include <scsi/osd_sec.h> 69 #include <scsi/osd_sec.h>
70 #include <scsi/scsi_device.h> 70 #include <scsi/scsi_device.h>
71 71
72 #define DRV_NAME "osdblk" 72 #define DRV_NAME "osdblk"
73 #define PFX DRV_NAME ": " 73 #define PFX DRV_NAME ": "
74 74
75 /* #define _OSDBLK_DEBUG */ 75 /* #define _OSDBLK_DEBUG */
76 #ifdef _OSDBLK_DEBUG 76 #ifdef _OSDBLK_DEBUG
77 #define OSDBLK_DEBUG(fmt, a...) \ 77 #define OSDBLK_DEBUG(fmt, a...) \
78 printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) 78 printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
79 #else 79 #else
80 #define OSDBLK_DEBUG(fmt, a...) \ 80 #define OSDBLK_DEBUG(fmt, a...) \
81 do { if (0) printk(fmt, ##a); } while (0) 81 do { if (0) printk(fmt, ##a); } while (0)
82 #endif 82 #endif
83 83
84 MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 84 MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
85 MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); 85 MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
86 MODULE_LICENSE("GPL"); 86 MODULE_LICENSE("GPL");
87 87
88 struct osdblk_device; 88 struct osdblk_device;
89 89
90 enum { 90 enum {
91 OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ 91 OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */
92 OSDBLK_MAX_REQ = 32, /* max parallel requests */ 92 OSDBLK_MAX_REQ = 32, /* max parallel requests */
93 OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ 93 OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */
94 }; 94 };
95 95
96 struct osdblk_request { 96 struct osdblk_request {
97 struct request *rq; /* blk layer request */ 97 struct request *rq; /* blk layer request */
98 struct bio *bio; /* cloned bio */ 98 struct bio *bio; /* cloned bio */
99 struct osdblk_device *osdev; /* associated blkdev */ 99 struct osdblk_device *osdev; /* associated blkdev */
100 }; 100 };
101 101
102 struct osdblk_device { 102 struct osdblk_device {
103 int id; /* blkdev unique id */ 103 int id; /* blkdev unique id */
104 104
105 int major; /* blkdev assigned major */ 105 int major; /* blkdev assigned major */
106 struct gendisk *disk; /* blkdev's gendisk and rq */ 106 struct gendisk *disk; /* blkdev's gendisk and rq */
107 struct request_queue *q; 107 struct request_queue *q;
108 108
109 struct osd_dev *osd; /* associated OSD */ 109 struct osd_dev *osd; /* associated OSD */
110 110
111 char name[32]; /* blkdev name, e.g. osdblk34 */ 111 char name[32]; /* blkdev name, e.g. osdblk34 */
112 112
113 spinlock_t lock; /* queue lock */ 113 spinlock_t lock; /* queue lock */
114 114
115 struct osd_obj_id obj; /* OSD partition, obj id */ 115 struct osd_obj_id obj; /* OSD partition, obj id */
116 uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ 116 uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */
117 117
118 struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ 118 struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */
119 119
120 struct list_head node; 120 struct list_head node;
121 121
122 char osd_path[0]; /* OSD device path */ 122 char osd_path[0]; /* OSD device path */
123 }; 123 };
124 124
125 static struct class *class_osdblk; /* /sys/class/osdblk */ 125 static struct class *class_osdblk; /* /sys/class/osdblk */
126 static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 126 static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
127 static LIST_HEAD(osdblkdev_list); 127 static LIST_HEAD(osdblkdev_list);
128 128
129 static const struct block_device_operations osdblk_bd_ops = { 129 static const struct block_device_operations osdblk_bd_ops = {
130 .owner = THIS_MODULE, 130 .owner = THIS_MODULE,
131 }; 131 };
132 132
133 static const struct osd_attr g_attr_logical_length = ATTR_DEF( 133 static const struct osd_attr g_attr_logical_length = ATTR_DEF(
134 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 134 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
135 135
136 static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], 136 static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
137 const struct osd_obj_id *obj) 137 const struct osd_obj_id *obj)
138 { 138 {
139 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 139 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
140 } 140 }
141 141
142 /* copied from exofs; move to libosd? */ 142 /* copied from exofs; move to libosd? */
143 /* 143 /*
144 * Perform a synchronous OSD operation. copied from exofs; move to libosd? 144 * Perform a synchronous OSD operation. copied from exofs; move to libosd?
145 */ 145 */
146 static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) 146 static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
147 { 147 {
148 int ret; 148 int ret;
149 149
150 or->timeout = timeout; 150 or->timeout = timeout;
151 ret = osd_finalize_request(or, 0, credential, NULL); 151 ret = osd_finalize_request(or, 0, credential, NULL);
152 if (ret) 152 if (ret)
153 return ret; 153 return ret;
154 154
155 ret = osd_execute_request(or); 155 ret = osd_execute_request(or);
156 156
157 /* osd_req_decode_sense(or, ret); */ 157 /* osd_req_decode_sense(or, ret); */
158 return ret; 158 return ret;
159 } 159 }
160 160
161 /* 161 /*
162 * Perform an asynchronous OSD operation. copied from exofs; move to libosd? 162 * Perform an asynchronous OSD operation. copied from exofs; move to libosd?
163 */ 163 */
164 static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, 164 static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
165 void *caller_context, u8 *cred) 165 void *caller_context, u8 *cred)
166 { 166 {
167 int ret; 167 int ret;
168 168
169 ret = osd_finalize_request(or, 0, cred, NULL); 169 ret = osd_finalize_request(or, 0, cred, NULL);
170 if (ret) 170 if (ret)
171 return ret; 171 return ret;
172 172
173 ret = osd_execute_request_async(or, async_done, caller_context); 173 ret = osd_execute_request_async(or, async_done, caller_context);
174 174
175 return ret; 175 return ret;
176 } 176 }
177 177
178 /* copied from exofs; move to libosd? */ 178 /* copied from exofs; move to libosd? */
179 static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) 179 static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
180 { 180 {
181 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 181 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
182 void *iter = NULL; 182 void *iter = NULL;
183 int nelem; 183 int nelem;
184 184
185 do { 185 do {
186 nelem = 1; 186 nelem = 1;
187 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); 187 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
188 if ((cur_attr.attr_page == attr->attr_page) && 188 if ((cur_attr.attr_page == attr->attr_page) &&
189 (cur_attr.attr_id == attr->attr_id)) { 189 (cur_attr.attr_id == attr->attr_id)) {
190 attr->len = cur_attr.len; 190 attr->len = cur_attr.len;
191 attr->val_ptr = cur_attr.val_ptr; 191 attr->val_ptr = cur_attr.val_ptr;
192 return 0; 192 return 0;
193 } 193 }
194 } while (iter); 194 } while (iter);
195 195
196 return -EIO; 196 return -EIO;
197 } 197 }
198 198
199 static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) 199 static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
200 { 200 {
201 struct osd_request *or; 201 struct osd_request *or;
202 struct osd_attr attr; 202 struct osd_attr attr;
203 int ret; 203 int ret;
204 204
205 /* start request */ 205 /* start request */
206 or = osd_start_request(osdev->osd, GFP_KERNEL); 206 or = osd_start_request(osdev->osd, GFP_KERNEL);
207 if (!or) 207 if (!or)
208 return -ENOMEM; 208 return -ENOMEM;
209 209
210 /* create a get-attributes(length) request */ 210 /* create a get-attributes(length) request */
211 osd_req_get_attributes(or, &osdev->obj); 211 osd_req_get_attributes(or, &osdev->obj);
212 212
213 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); 213 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
214 214
215 /* execute op synchronously */ 215 /* execute op synchronously */
216 ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); 216 ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
217 if (ret) 217 if (ret)
218 goto out; 218 goto out;
219 219
220 /* extract length from returned attribute info */ 220 /* extract length from returned attribute info */
221 attr = g_attr_logical_length; 221 attr = g_attr_logical_length;
222 ret = extract_attr_from_req(or, &attr); 222 ret = extract_attr_from_req(or, &attr);
223 if (ret) 223 if (ret)
224 goto out; 224 goto out;
225 225
226 *size_out = get_unaligned_be64(attr.val_ptr); 226 *size_out = get_unaligned_be64(attr.val_ptr);
227 227
228 out: 228 out:
229 osd_end_request(or); 229 osd_end_request(or);
230 return ret; 230 return ret;
231 231
232 } 232 }
233 233
234 static void osdblk_osd_complete(struct osd_request *or, void *private) 234 static void osdblk_osd_complete(struct osd_request *or, void *private)
235 { 235 {
236 struct osdblk_request *orq = private; 236 struct osdblk_request *orq = private;
237 struct osd_sense_info osi; 237 struct osd_sense_info osi;
238 int ret = osd_req_decode_sense(or, &osi); 238 int ret = osd_req_decode_sense(or, &osi);
239 239
240 if (ret) { 240 if (ret) {
241 ret = -EIO; 241 ret = -EIO;
242 OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); 242 OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
243 } 243 }
244 244
245 /* complete OSD request */ 245 /* complete OSD request */
246 osd_end_request(or); 246 osd_end_request(or);
247 247
248 /* complete request passed to osdblk by block layer */ 248 /* complete request passed to osdblk by block layer */
249 __blk_end_request_all(orq->rq, ret); 249 __blk_end_request_all(orq->rq, ret);
250 } 250 }
251 251
252 static void bio_chain_put(struct bio *chain) 252 static void bio_chain_put(struct bio *chain)
253 { 253 {
254 struct bio *tmp; 254 struct bio *tmp;
255 255
256 while (chain) { 256 while (chain) {
257 tmp = chain; 257 tmp = chain;
258 chain = chain->bi_next; 258 chain = chain->bi_next;
259 259
260 bio_put(tmp); 260 bio_put(tmp);
261 } 261 }
262 } 262 }
263 263
264 static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) 264 static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
265 { 265 {
266 struct bio *tmp, *new_chain = NULL, *tail = NULL; 266 struct bio *tmp, *new_chain = NULL, *tail = NULL;
267 267
268 while (old_chain) { 268 while (old_chain) {
269 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 269 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
270 if (!tmp) 270 if (!tmp)
271 goto err_out; 271 goto err_out;
272 272
273 __bio_clone(tmp, old_chain); 273 __bio_clone(tmp, old_chain);
274 tmp->bi_bdev = NULL; 274 tmp->bi_bdev = NULL;
275 gfpmask &= ~__GFP_WAIT; 275 gfpmask &= ~__GFP_WAIT;
276 tmp->bi_next = NULL; 276 tmp->bi_next = NULL;
277 277
278 if (!new_chain) 278 if (!new_chain)
279 new_chain = tail = tmp; 279 new_chain = tail = tmp;
280 else { 280 else {
281 tail->bi_next = tmp; 281 tail->bi_next = tmp;
282 tail = tmp; 282 tail = tmp;
283 } 283 }
284 284
285 old_chain = old_chain->bi_next; 285 old_chain = old_chain->bi_next;
286 } 286 }
287 287
288 return new_chain; 288 return new_chain;
289 289
290 err_out: 290 err_out:
291 OSDBLK_DEBUG("bio_chain_clone with err\n"); 291 OSDBLK_DEBUG("bio_chain_clone with err\n");
292 bio_chain_put(new_chain); 292 bio_chain_put(new_chain);
293 return NULL; 293 return NULL;
294 } 294 }
295 295
296 static void osdblk_rq_fn(struct request_queue *q) 296 static void osdblk_rq_fn(struct request_queue *q)
297 { 297 {
298 struct osdblk_device *osdev = q->queuedata; 298 struct osdblk_device *osdev = q->queuedata;
299 299
300 while (1) { 300 while (1) {
301 struct request *rq; 301 struct request *rq;
302 struct osdblk_request *orq; 302 struct osdblk_request *orq;
303 struct osd_request *or; 303 struct osd_request *or;
304 struct bio *bio; 304 struct bio *bio;
305 bool do_write, do_flush; 305 bool do_write, do_flush;
306 306
307 /* peek at request from block layer */ 307 /* peek at request from block layer */
308 rq = blk_fetch_request(q); 308 rq = blk_fetch_request(q);
309 if (!rq) 309 if (!rq)
310 break; 310 break;
311 311
312 /* filter out block requests we don't understand */ 312 /* filter out block requests we don't understand */
313 if (rq->cmd_type != REQ_TYPE_FS && 313 if (rq->cmd_type != REQ_TYPE_FS &&
314 !(rq->cmd_flags & REQ_HARDBARRIER)) { 314 !(rq->cmd_flags & REQ_HARDBARRIER)) {
315 blk_end_request_all(rq, 0); 315 blk_end_request_all(rq, 0);
316 continue; 316 continue;
317 } 317 }
318 318
319 /* deduce our operation (read, write, flush) */ 319 /* deduce our operation (read, write, flush) */
320 /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] 320 /* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
321 * into a clearly defined set of RPC commands: 321 * into a clearly defined set of RPC commands:
322 * read, write, flush, scsi command, power mgmt req, 322 * read, write, flush, scsi command, power mgmt req,
323 * driver-specific, etc. 323 * driver-specific, etc.
324 */ 324 */
325 325
326 do_flush = rq->cmd_flags & REQ_FLUSH; 326 do_flush = rq->cmd_flags & REQ_FLUSH;
327 do_write = (rq_data_dir(rq) == WRITE); 327 do_write = (rq_data_dir(rq) == WRITE);
328 328
329 if (!do_flush) { /* osd_flush does not use a bio */ 329 if (!do_flush) { /* osd_flush does not use a bio */
330 /* a bio clone to be passed down to OSD request */ 330 /* a bio clone to be passed down to OSD request */
331 bio = bio_chain_clone(rq->bio, GFP_ATOMIC); 331 bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
332 if (!bio) 332 if (!bio)
333 break; 333 break;
334 } else 334 } else
335 bio = NULL; 335 bio = NULL;
336 336
337 /* alloc internal OSD request, for OSD command execution */ 337 /* alloc internal OSD request, for OSD command execution */
338 or = osd_start_request(osdev->osd, GFP_ATOMIC); 338 or = osd_start_request(osdev->osd, GFP_ATOMIC);
339 if (!or) { 339 if (!or) {
340 bio_chain_put(bio); 340 bio_chain_put(bio);
341 OSDBLK_DEBUG("osd_start_request with err\n"); 341 OSDBLK_DEBUG("osd_start_request with err\n");
342 break; 342 break;
343 } 343 }
344 344
345 orq = &osdev->req[rq->tag]; 345 orq = &osdev->req[rq->tag];
346 orq->rq = rq; 346 orq->rq = rq;
347 orq->bio = bio; 347 orq->bio = bio;
348 orq->osdev = osdev; 348 orq->osdev = osdev;
349 349
350 /* init OSD command: flush, write or read */ 350 /* init OSD command: flush, write or read */
351 if (do_flush) 351 if (do_flush)
352 osd_req_flush_object(or, &osdev->obj, 352 osd_req_flush_object(or, &osdev->obj,
353 OSD_CDB_FLUSH_ALL, 0, 0); 353 OSD_CDB_FLUSH_ALL, 0, 0);
354 else if (do_write) 354 else if (do_write)
355 osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, 355 osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
356 bio, blk_rq_bytes(rq)); 356 bio, blk_rq_bytes(rq));
357 else 357 else
358 osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, 358 osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
359 bio, blk_rq_bytes(rq)); 359 bio, blk_rq_bytes(rq));
360 360
361 OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", 361 OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
362 do_flush ? "flush" : do_write ? 362 do_flush ? "flush" : do_write ?
363 "write" : "read", blk_rq_bytes(rq), 363 "write" : "read", blk_rq_bytes(rq),
364 blk_rq_pos(rq) * 512ULL); 364 blk_rq_pos(rq) * 512ULL);
365 365
366 /* begin OSD command execution */ 366 /* begin OSD command execution */
367 if (osd_async_op(or, osdblk_osd_complete, orq, 367 if (osd_async_op(or, osdblk_osd_complete, orq,
368 osdev->obj_cred)) { 368 osdev->obj_cred)) {
369 osd_end_request(or); 369 osd_end_request(or);
370 blk_requeue_request(q, rq); 370 blk_requeue_request(q, rq);
371 bio_chain_put(bio); 371 bio_chain_put(bio);
372 OSDBLK_DEBUG("osd_execute_request_async with err\n"); 372 OSDBLK_DEBUG("osd_execute_request_async with err\n");
373 break; 373 break;
374 } 374 }
375 375
376 /* remove the special 'flush' marker, now that the command 376 /* remove the special 'flush' marker, now that the command
377 * is executing 377 * is executing
378 */ 378 */
379 rq->special = NULL; 379 rq->special = NULL;
380 } 380 }
381 } 381 }
382 382
383 static void osdblk_free_disk(struct osdblk_device *osdev) 383 static void osdblk_free_disk(struct osdblk_device *osdev)
384 { 384 {
385 struct gendisk *disk = osdev->disk; 385 struct gendisk *disk = osdev->disk;
386 386
387 if (!disk) 387 if (!disk)
388 return; 388 return;
389 389
390 if (disk->flags & GENHD_FL_UP) 390 if (disk->flags & GENHD_FL_UP)
391 del_gendisk(disk); 391 del_gendisk(disk);
392 if (disk->queue) 392 if (disk->queue)
393 blk_cleanup_queue(disk->queue); 393 blk_cleanup_queue(disk->queue);
394 put_disk(disk); 394 put_disk(disk);
395 } 395 }
396 396
397 static int osdblk_init_disk(struct osdblk_device *osdev) 397 static int osdblk_init_disk(struct osdblk_device *osdev)
398 { 398 {
399 struct gendisk *disk; 399 struct gendisk *disk;
400 struct request_queue *q; 400 struct request_queue *q;
401 int rc; 401 int rc;
402 u64 obj_size = 0; 402 u64 obj_size = 0;
403 403
404 /* contact OSD, request size info about the object being mapped */ 404 /* contact OSD, request size info about the object being mapped */
405 rc = osdblk_get_obj_size(osdev, &obj_size); 405 rc = osdblk_get_obj_size(osdev, &obj_size);
406 if (rc) 406 if (rc)
407 return rc; 407 return rc;
408 408
409 /* create gendisk info */ 409 /* create gendisk info */
410 disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); 410 disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
411 if (!disk) 411 if (!disk)
412 return -ENOMEM; 412 return -ENOMEM;
413 413
414 sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); 414 sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
415 disk->major = osdev->major; 415 disk->major = osdev->major;
416 disk->first_minor = 0; 416 disk->first_minor = 0;
417 disk->fops = &osdblk_bd_ops; 417 disk->fops = &osdblk_bd_ops;
418 disk->private_data = osdev; 418 disk->private_data = osdev;
419 419
420 /* init rq */ 420 /* init rq */
421 q = blk_init_queue(osdblk_rq_fn, &osdev->lock); 421 q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
422 if (!q) { 422 if (!q) {
423 put_disk(disk); 423 put_disk(disk);
424 return -ENOMEM; 424 return -ENOMEM;
425 } 425 }
426 426
427 /* switch queue to TCQ mode; allocate tag map */ 427 /* switch queue to TCQ mode; allocate tag map */
428 rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); 428 rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
429 if (rc) { 429 if (rc) {
430 blk_cleanup_queue(q); 430 blk_cleanup_queue(q);
431 put_disk(disk); 431 put_disk(disk);
432 return rc; 432 return rc;
433 } 433 }
434 434
435 /* Set our limits to the lower device limits, because osdblk cannot 435 /* Set our limits to the lower device limits, because osdblk cannot
436 * sleep when allocating a lower-request and therefore cannot be 436 * sleep when allocating a lower-request and therefore cannot be
437 * bouncing. 437 * bouncing.
438 */ 438 */
439 blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); 439 blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
440 440
441 blk_queue_prep_rq(q, blk_queue_start_tag); 441 blk_queue_prep_rq(q, blk_queue_start_tag);
442 blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); 442 blk_queue_flush(q, REQ_FLUSH);
443 443
444 disk->queue = q; 444 disk->queue = q;
445 445
446 q->queuedata = osdev; 446 q->queuedata = osdev;
447 447
448 osdev->disk = disk; 448 osdev->disk = disk;
449 osdev->q = q; 449 osdev->q = q;
450 450
451 /* finally, announce the disk to the world */ 451 /* finally, announce the disk to the world */
452 set_capacity(disk, obj_size / 512ULL); 452 set_capacity(disk, obj_size / 512ULL);
453 add_disk(disk); 453 add_disk(disk);
454 454
455 printk(KERN_INFO "%s: Added of size 0x%llx\n", 455 printk(KERN_INFO "%s: Added of size 0x%llx\n",
456 disk->disk_name, (unsigned long long)obj_size); 456 disk->disk_name, (unsigned long long)obj_size);
457 457
458 return 0; 458 return 0;
459 } 459 }
460 460
461 /******************************************************************** 461 /********************************************************************
462 * /sys/class/osdblk/ 462 * /sys/class/osdblk/
463 * add map OSD object to blkdev 463 * add map OSD object to blkdev
464 * remove unmap OSD object 464 * remove unmap OSD object
465 * list show mappings 465 * list show mappings
466 *******************************************************************/ 466 *******************************************************************/
467 467
468 static void class_osdblk_release(struct class *cls) 468 static void class_osdblk_release(struct class *cls)
469 { 469 {
470 kfree(cls); 470 kfree(cls);
471 } 471 }
472 472
473 static ssize_t class_osdblk_list(struct class *c, 473 static ssize_t class_osdblk_list(struct class *c,
474 struct class_attribute *attr, 474 struct class_attribute *attr,
475 char *data) 475 char *data)
476 { 476 {
477 int n = 0; 477 int n = 0;
478 struct list_head *tmp; 478 struct list_head *tmp;
479 479
480 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 480 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
481 481
482 list_for_each(tmp, &osdblkdev_list) { 482 list_for_each(tmp, &osdblkdev_list) {
483 struct osdblk_device *osdev; 483 struct osdblk_device *osdev;
484 484
485 osdev = list_entry(tmp, struct osdblk_device, node); 485 osdev = list_entry(tmp, struct osdblk_device, node);
486 486
487 n += sprintf(data+n, "%d %d %llu %llu %s\n", 487 n += sprintf(data+n, "%d %d %llu %llu %s\n",
488 osdev->id, 488 osdev->id,
489 osdev->major, 489 osdev->major,
490 osdev->obj.partition, 490 osdev->obj.partition,
491 osdev->obj.id, 491 osdev->obj.id,
492 osdev->osd_path); 492 osdev->osd_path);
493 } 493 }
494 494
495 mutex_unlock(&ctl_mutex); 495 mutex_unlock(&ctl_mutex);
496 return n; 496 return n;
497 } 497 }
498 498
499 static ssize_t class_osdblk_add(struct class *c, 499 static ssize_t class_osdblk_add(struct class *c,
500 struct class_attribute *attr, 500 struct class_attribute *attr,
501 const char *buf, size_t count) 501 const char *buf, size_t count)
502 { 502 {
503 struct osdblk_device *osdev; 503 struct osdblk_device *osdev;
504 ssize_t rc; 504 ssize_t rc;
505 int irc, new_id = 0; 505 int irc, new_id = 0;
506 struct list_head *tmp; 506 struct list_head *tmp;
507 507
508 if (!try_module_get(THIS_MODULE)) 508 if (!try_module_get(THIS_MODULE))
509 return -ENODEV; 509 return -ENODEV;
510 510
511 /* new osdblk_device object */ 511 /* new osdblk_device object */
512 osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); 512 osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
513 if (!osdev) { 513 if (!osdev) {
514 rc = -ENOMEM; 514 rc = -ENOMEM;
515 goto err_out_mod; 515 goto err_out_mod;
516 } 516 }
517 517
518 /* static osdblk_device initialization */ 518 /* static osdblk_device initialization */
519 spin_lock_init(&osdev->lock); 519 spin_lock_init(&osdev->lock);
520 INIT_LIST_HEAD(&osdev->node); 520 INIT_LIST_HEAD(&osdev->node);
521 521
522 /* generate unique id: find highest unique id, add one */ 522 /* generate unique id: find highest unique id, add one */
523 523
524 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 524 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
525 525
526 list_for_each(tmp, &osdblkdev_list) { 526 list_for_each(tmp, &osdblkdev_list) {
527 struct osdblk_device *osdev; 527 struct osdblk_device *osdev;
528 528
529 osdev = list_entry(tmp, struct osdblk_device, node); 529 osdev = list_entry(tmp, struct osdblk_device, node);
530 if (osdev->id > new_id) 530 if (osdev->id > new_id)
531 new_id = osdev->id + 1; 531 new_id = osdev->id + 1;
532 } 532 }
533 533
534 osdev->id = new_id; 534 osdev->id = new_id;
535 535
536 /* add to global list */ 536 /* add to global list */
537 list_add_tail(&osdev->node, &osdblkdev_list); 537 list_add_tail(&osdev->node, &osdblkdev_list);
538 538
539 mutex_unlock(&ctl_mutex); 539 mutex_unlock(&ctl_mutex);
540 540
541 /* parse add command */ 541 /* parse add command */
542 if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, 542 if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
543 osdev->osd_path) != 3) { 543 osdev->osd_path) != 3) {
544 rc = -EINVAL; 544 rc = -EINVAL;
545 goto err_out_slot; 545 goto err_out_slot;
546 } 546 }
547 547
548 /* initialize rest of new object */ 548 /* initialize rest of new object */
549 sprintf(osdev->name, DRV_NAME "%d", osdev->id); 549 sprintf(osdev->name, DRV_NAME "%d", osdev->id);
550 550
551 /* contact requested OSD */ 551 /* contact requested OSD */
552 osdev->osd = osduld_path_lookup(osdev->osd_path); 552 osdev->osd = osduld_path_lookup(osdev->osd_path);
553 if (IS_ERR(osdev->osd)) { 553 if (IS_ERR(osdev->osd)) {
554 rc = PTR_ERR(osdev->osd); 554 rc = PTR_ERR(osdev->osd);
555 goto err_out_slot; 555 goto err_out_slot;
556 } 556 }
557 557
558 /* build OSD credential */ 558 /* build OSD credential */
559 osdblk_make_credential(osdev->obj_cred, &osdev->obj); 559 osdblk_make_credential(osdev->obj_cred, &osdev->obj);
560 560
561 /* register our block device */ 561 /* register our block device */
562 irc = register_blkdev(0, osdev->name); 562 irc = register_blkdev(0, osdev->name);
563 if (irc < 0) { 563 if (irc < 0) {
564 rc = irc; 564 rc = irc;
565 goto err_out_osd; 565 goto err_out_osd;
566 } 566 }
567 567
568 osdev->major = irc; 568 osdev->major = irc;
569 569
570 /* set up and announce blkdev mapping */ 570 /* set up and announce blkdev mapping */
571 rc = osdblk_init_disk(osdev); 571 rc = osdblk_init_disk(osdev);
572 if (rc) 572 if (rc)
573 goto err_out_blkdev; 573 goto err_out_blkdev;
574 574
575 return count; 575 return count;
576 576
577 err_out_blkdev: 577 err_out_blkdev:
578 unregister_blkdev(osdev->major, osdev->name); 578 unregister_blkdev(osdev->major, osdev->name);
579 err_out_osd: 579 err_out_osd:
580 osduld_put_device(osdev->osd); 580 osduld_put_device(osdev->osd);
581 err_out_slot: 581 err_out_slot:
582 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 582 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
583 list_del_init(&osdev->node); 583 list_del_init(&osdev->node);
584 mutex_unlock(&ctl_mutex); 584 mutex_unlock(&ctl_mutex);
585 585
586 kfree(osdev); 586 kfree(osdev);
587 err_out_mod: 587 err_out_mod:
588 OSDBLK_DEBUG("Error adding device %s\n", buf); 588 OSDBLK_DEBUG("Error adding device %s\n", buf);
589 module_put(THIS_MODULE); 589 module_put(THIS_MODULE);
590 return rc; 590 return rc;
591 } 591 }
592 592
593 static ssize_t class_osdblk_remove(struct class *c, 593 static ssize_t class_osdblk_remove(struct class *c,
594 struct class_attribute *attr, 594 struct class_attribute *attr,
595 const char *buf, 595 const char *buf,
596 size_t count) 596 size_t count)
597 { 597 {
598 struct osdblk_device *osdev = NULL; 598 struct osdblk_device *osdev = NULL;
599 int target_id, rc; 599 int target_id, rc;
600 unsigned long ul; 600 unsigned long ul;
601 struct list_head *tmp; 601 struct list_head *tmp;
602 602
603 rc = strict_strtoul(buf, 10, &ul); 603 rc = strict_strtoul(buf, 10, &ul);
604 if (rc) 604 if (rc)
605 return rc; 605 return rc;
606 606
607 /* convert to int; abort if we lost anything in the conversion */ 607 /* convert to int; abort if we lost anything in the conversion */
608 target_id = (int) ul; 608 target_id = (int) ul;
609 if (target_id != ul) 609 if (target_id != ul)
610 return -EINVAL; 610 return -EINVAL;
611 611
612 /* remove object from list immediately */ 612 /* remove object from list immediately */
613 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 613 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
614 614
615 list_for_each(tmp, &osdblkdev_list) { 615 list_for_each(tmp, &osdblkdev_list) {
616 osdev = list_entry(tmp, struct osdblk_device, node); 616 osdev = list_entry(tmp, struct osdblk_device, node);
617 if (osdev->id == target_id) { 617 if (osdev->id == target_id) {
618 list_del_init(&osdev->node); 618 list_del_init(&osdev->node);
619 break; 619 break;
620 } 620 }
621 osdev = NULL; 621 osdev = NULL;
622 } 622 }
623 623
624 mutex_unlock(&ctl_mutex); 624 mutex_unlock(&ctl_mutex);
625 625
626 if (!osdev) 626 if (!osdev)
627 return -ENOENT; 627 return -ENOENT;
628 628
629 /* clean up and free blkdev and associated OSD connection */ 629 /* clean up and free blkdev and associated OSD connection */
630 osdblk_free_disk(osdev); 630 osdblk_free_disk(osdev);
631 unregister_blkdev(osdev->major, osdev->name); 631 unregister_blkdev(osdev->major, osdev->name);
632 osduld_put_device(osdev->osd); 632 osduld_put_device(osdev->osd);
633 kfree(osdev); 633 kfree(osdev);
634 634
635 /* release module ref */ 635 /* release module ref */
636 module_put(THIS_MODULE); 636 module_put(THIS_MODULE);
637 637
638 return count; 638 return count;
639 } 639 }
640 640
641 static struct class_attribute class_osdblk_attrs[] = { 641 static struct class_attribute class_osdblk_attrs[] = {
642 __ATTR(add, 0200, NULL, class_osdblk_add), 642 __ATTR(add, 0200, NULL, class_osdblk_add),
643 __ATTR(remove, 0200, NULL, class_osdblk_remove), 643 __ATTR(remove, 0200, NULL, class_osdblk_remove),
644 __ATTR(list, 0444, class_osdblk_list, NULL), 644 __ATTR(list, 0444, class_osdblk_list, NULL),
645 __ATTR_NULL 645 __ATTR_NULL
646 }; 646 };
647 647
648 static int osdblk_sysfs_init(void) 648 static int osdblk_sysfs_init(void)
649 { 649 {
650 int ret = 0; 650 int ret = 0;
651 651
652 /* 652 /*
653 * create control files in sysfs 653 * create control files in sysfs
654 * /sys/class/osdblk/... 654 * /sys/class/osdblk/...
655 */ 655 */
656 class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); 656 class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
657 if (!class_osdblk) 657 if (!class_osdblk)
658 return -ENOMEM; 658 return -ENOMEM;
659 659
660 class_osdblk->name = DRV_NAME; 660 class_osdblk->name = DRV_NAME;
661 class_osdblk->owner = THIS_MODULE; 661 class_osdblk->owner = THIS_MODULE;
662 class_osdblk->class_release = class_osdblk_release; 662 class_osdblk->class_release = class_osdblk_release;
663 class_osdblk->class_attrs = class_osdblk_attrs; 663 class_osdblk->class_attrs = class_osdblk_attrs;
664 664
665 ret = class_register(class_osdblk); 665 ret = class_register(class_osdblk);
666 if (ret) { 666 if (ret) {
667 kfree(class_osdblk); 667 kfree(class_osdblk);
668 class_osdblk = NULL; 668 class_osdblk = NULL;
669 printk(PFX "failed to create class osdblk\n"); 669 printk(PFX "failed to create class osdblk\n");
670 return ret; 670 return ret;
671 } 671 }
672 672
673 return 0; 673 return 0;
674 } 674 }
675 675
676 static void osdblk_sysfs_cleanup(void) 676 static void osdblk_sysfs_cleanup(void)
677 { 677 {
678 if (class_osdblk) 678 if (class_osdblk)
679 class_destroy(class_osdblk); 679 class_destroy(class_osdblk);
680 class_osdblk = NULL; 680 class_osdblk = NULL;
681 } 681 }
682 682
683 static int __init osdblk_init(void) 683 static int __init osdblk_init(void)
684 { 684 {
685 int rc; 685 int rc;
686 686
687 rc = osdblk_sysfs_init(); 687 rc = osdblk_sysfs_init();
688 if (rc) 688 if (rc)
689 return rc; 689 return rc;
690 690
691 return 0; 691 return 0;
692 } 692 }
693 693
694 static void __exit osdblk_exit(void) 694 static void __exit osdblk_exit(void)
695 { 695 {
696 osdblk_sysfs_cleanup(); 696 osdblk_sysfs_cleanup();
697 } 697 }
698 698
699 module_init(osdblk_init); 699 module_init(osdblk_init);
700 module_exit(osdblk_exit); 700 module_exit(osdblk_exit);
701 701
702 702
drivers/block/ps3disk.c
1 /* 1 /*
2 * PS3 Disk Storage Driver 2 * PS3 Disk Storage Driver
3 * 3 *
4 * Copyright (C) 2007 Sony Computer Entertainment Inc. 4 * Copyright (C) 2007 Sony Computer Entertainment Inc.
5 * Copyright 2007 Sony Corp. 5 * Copyright 2007 Sony Corp.
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify it 7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published 8 * under the terms of the GNU General Public License as published
9 * by the Free Software Foundation; version 2 of the License. 9 * by the Free Software Foundation; version 2 of the License.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, but 11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details. 14 * General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License along 16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc., 17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */ 19 */
20 20
21 #include <linux/ata.h> 21 #include <linux/ata.h>
22 #include <linux/blkdev.h> 22 #include <linux/blkdev.h>
23 #include <linux/slab.h> 23 #include <linux/slab.h>
24 24
25 #include <asm/lv1call.h> 25 #include <asm/lv1call.h>
26 #include <asm/ps3stor.h> 26 #include <asm/ps3stor.h>
27 #include <asm/firmware.h> 27 #include <asm/firmware.h>
28 28
29 29
30 #define DEVICE_NAME "ps3disk" 30 #define DEVICE_NAME "ps3disk"
31 31
32 #define BOUNCE_SIZE (64*1024) 32 #define BOUNCE_SIZE (64*1024)
33 33
34 #define PS3DISK_MAX_DISKS 16 34 #define PS3DISK_MAX_DISKS 16
35 #define PS3DISK_MINORS 16 35 #define PS3DISK_MINORS 16
36 36
37 37
38 #define PS3DISK_NAME "ps3d%c" 38 #define PS3DISK_NAME "ps3d%c"
39 39
40 40
41 struct ps3disk_private { 41 struct ps3disk_private {
42 spinlock_t lock; /* Request queue spinlock */ 42 spinlock_t lock; /* Request queue spinlock */
43 struct request_queue *queue; 43 struct request_queue *queue;
44 struct gendisk *gendisk; 44 struct gendisk *gendisk;
45 unsigned int blocking_factor; 45 unsigned int blocking_factor;
46 struct request *req; 46 struct request *req;
47 u64 raw_capacity; 47 u64 raw_capacity;
48 unsigned char model[ATA_ID_PROD_LEN+1]; 48 unsigned char model[ATA_ID_PROD_LEN+1];
49 }; 49 };
50 50
51 51
52 #define LV1_STORAGE_SEND_ATA_COMMAND (2) 52 #define LV1_STORAGE_SEND_ATA_COMMAND (2)
53 #define LV1_STORAGE_ATA_HDDOUT (0x23) 53 #define LV1_STORAGE_ATA_HDDOUT (0x23)
54 54
55 struct lv1_ata_cmnd_block { 55 struct lv1_ata_cmnd_block {
56 u16 features; 56 u16 features;
57 u16 sector_count; 57 u16 sector_count;
58 u16 LBA_low; 58 u16 LBA_low;
59 u16 LBA_mid; 59 u16 LBA_mid;
60 u16 LBA_high; 60 u16 LBA_high;
61 u8 device; 61 u8 device;
62 u8 command; 62 u8 command;
63 u32 is_ext; 63 u32 is_ext;
64 u32 proto; 64 u32 proto;
65 u32 in_out; 65 u32 in_out;
66 u32 size; 66 u32 size;
67 u64 buffer; 67 u64 buffer;
68 u32 arglen; 68 u32 arglen;
69 }; 69 };
70 70
71 enum lv1_ata_proto { 71 enum lv1_ata_proto {
72 NON_DATA_PROTO = 0, 72 NON_DATA_PROTO = 0,
73 PIO_DATA_IN_PROTO = 1, 73 PIO_DATA_IN_PROTO = 1,
74 PIO_DATA_OUT_PROTO = 2, 74 PIO_DATA_OUT_PROTO = 2,
75 DMA_PROTO = 3 75 DMA_PROTO = 3
76 }; 76 };
77 77
78 enum lv1_ata_in_out { 78 enum lv1_ata_in_out {
79 DIR_WRITE = 0, /* memory -> device */ 79 DIR_WRITE = 0, /* memory -> device */
80 DIR_READ = 1 /* device -> memory */ 80 DIR_READ = 1 /* device -> memory */
81 }; 81 };
82 82
83 static int ps3disk_major; 83 static int ps3disk_major;
84 84
85 85
86 static const struct block_device_operations ps3disk_fops = { 86 static const struct block_device_operations ps3disk_fops = {
87 .owner = THIS_MODULE, 87 .owner = THIS_MODULE,
88 }; 88 };
89 89
90 90
91 static void ps3disk_scatter_gather(struct ps3_storage_device *dev, 91 static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
92 struct request *req, int gather) 92 struct request *req, int gather)
93 { 93 {
94 unsigned int offset = 0; 94 unsigned int offset = 0;
95 struct req_iterator iter; 95 struct req_iterator iter;
96 struct bio_vec *bvec; 96 struct bio_vec *bvec;
97 unsigned int i = 0; 97 unsigned int i = 0;
98 size_t size; 98 size_t size;
99 void *buf; 99 void *buf;
100 100
101 rq_for_each_segment(bvec, req, iter) { 101 rq_for_each_segment(bvec, req, iter) {
102 unsigned long flags; 102 unsigned long flags;
103 dev_dbg(&dev->sbd.core, 103 dev_dbg(&dev->sbd.core,
104 "%s:%u: bio %u: %u segs %u sectors from %lu\n", 104 "%s:%u: bio %u: %u segs %u sectors from %lu\n",
105 __func__, __LINE__, i, bio_segments(iter.bio), 105 __func__, __LINE__, i, bio_segments(iter.bio),
106 bio_sectors(iter.bio), iter.bio->bi_sector); 106 bio_sectors(iter.bio), iter.bio->bi_sector);
107 107
108 size = bvec->bv_len; 108 size = bvec->bv_len;
109 buf = bvec_kmap_irq(bvec, &flags); 109 buf = bvec_kmap_irq(bvec, &flags);
110 if (gather) 110 if (gather)
111 memcpy(dev->bounce_buf+offset, buf, size); 111 memcpy(dev->bounce_buf+offset, buf, size);
112 else 112 else
113 memcpy(buf, dev->bounce_buf+offset, size); 113 memcpy(buf, dev->bounce_buf+offset, size);
114 offset += size; 114 offset += size;
115 flush_kernel_dcache_page(bvec->bv_page); 115 flush_kernel_dcache_page(bvec->bv_page);
116 bvec_kunmap_irq(bvec, &flags); 116 bvec_kunmap_irq(bvec, &flags);
117 i++; 117 i++;
118 } 118 }
119 } 119 }
120 120
121 static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, 121 static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
122 struct request *req) 122 struct request *req)
123 { 123 {
124 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); 124 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
125 int write = rq_data_dir(req), res; 125 int write = rq_data_dir(req), res;
126 const char *op = write ? "write" : "read"; 126 const char *op = write ? "write" : "read";
127 u64 start_sector, sectors; 127 u64 start_sector, sectors;
128 unsigned int region_id = dev->regions[dev->region_idx].id; 128 unsigned int region_id = dev->regions[dev->region_idx].id;
129 129
130 #ifdef DEBUG 130 #ifdef DEBUG
131 unsigned int n = 0; 131 unsigned int n = 0;
132 struct bio_vec *bv; 132 struct bio_vec *bv;
133 struct req_iterator iter; 133 struct req_iterator iter;
134 134
135 rq_for_each_segment(bv, req, iter) 135 rq_for_each_segment(bv, req, iter)
136 n++; 136 n++;
137 dev_dbg(&dev->sbd.core, 137 dev_dbg(&dev->sbd.core,
138 "%s:%u: %s req has %u bvecs for %u sectors\n", 138 "%s:%u: %s req has %u bvecs for %u sectors\n",
139 __func__, __LINE__, op, n, blk_rq_sectors(req)); 139 __func__, __LINE__, op, n, blk_rq_sectors(req));
140 #endif 140 #endif
141 141
142 start_sector = blk_rq_pos(req) * priv->blocking_factor; 142 start_sector = blk_rq_pos(req) * priv->blocking_factor;
143 sectors = blk_rq_sectors(req) * priv->blocking_factor; 143 sectors = blk_rq_sectors(req) * priv->blocking_factor;
144 dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n", 144 dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n",
145 __func__, __LINE__, op, sectors, start_sector); 145 __func__, __LINE__, op, sectors, start_sector);
146 146
147 if (write) { 147 if (write) {
148 ps3disk_scatter_gather(dev, req, 1); 148 ps3disk_scatter_gather(dev, req, 1);
149 149
150 res = lv1_storage_write(dev->sbd.dev_id, region_id, 150 res = lv1_storage_write(dev->sbd.dev_id, region_id,
151 start_sector, sectors, 0, 151 start_sector, sectors, 0,
152 dev->bounce_lpar, &dev->tag); 152 dev->bounce_lpar, &dev->tag);
153 } else { 153 } else {
154 res = lv1_storage_read(dev->sbd.dev_id, region_id, 154 res = lv1_storage_read(dev->sbd.dev_id, region_id,
155 start_sector, sectors, 0, 155 start_sector, sectors, 0,
156 dev->bounce_lpar, &dev->tag); 156 dev->bounce_lpar, &dev->tag);
157 } 157 }
158 if (res) { 158 if (res) {
159 dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, 159 dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
160 __LINE__, op, res); 160 __LINE__, op, res);
161 __blk_end_request_all(req, -EIO); 161 __blk_end_request_all(req, -EIO);
162 return 0; 162 return 0;
163 } 163 }
164 164
165 priv->req = req; 165 priv->req = req;
166 return 1; 166 return 1;
167 } 167 }
168 168
169 static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, 169 static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
170 struct request *req) 170 struct request *req)
171 { 171 {
172 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); 172 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
173 u64 res; 173 u64 res;
174 174
175 dev_dbg(&dev->sbd.core, "%s:%u: flush request\n", __func__, __LINE__); 175 dev_dbg(&dev->sbd.core, "%s:%u: flush request\n", __func__, __LINE__);
176 176
177 res = lv1_storage_send_device_command(dev->sbd.dev_id, 177 res = lv1_storage_send_device_command(dev->sbd.dev_id,
178 LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 178 LV1_STORAGE_ATA_HDDOUT, 0, 0, 0,
179 0, &dev->tag); 179 0, &dev->tag);
180 if (res) { 180 if (res) {
181 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", 181 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
182 __func__, __LINE__, res); 182 __func__, __LINE__, res);
183 __blk_end_request_all(req, -EIO); 183 __blk_end_request_all(req, -EIO);
184 return 0; 184 return 0;
185 } 185 }
186 186
187 priv->req = req; 187 priv->req = req;
188 return 1; 188 return 1;
189 } 189 }
190 190
191 static void ps3disk_do_request(struct ps3_storage_device *dev, 191 static void ps3disk_do_request(struct ps3_storage_device *dev,
192 struct request_queue *q) 192 struct request_queue *q)
193 { 193 {
194 struct request *req; 194 struct request *req;
195 195
196 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); 196 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
197 197
198 while ((req = blk_fetch_request(q))) { 198 while ((req = blk_fetch_request(q))) {
199 if (req->cmd_flags & REQ_FLUSH) { 199 if (req->cmd_flags & REQ_FLUSH) {
200 if (ps3disk_submit_flush_request(dev, req)) 200 if (ps3disk_submit_flush_request(dev, req))
201 break; 201 break;
202 } else if (req->cmd_type == REQ_TYPE_FS) { 202 } else if (req->cmd_type == REQ_TYPE_FS) {
203 if (ps3disk_submit_request_sg(dev, req)) 203 if (ps3disk_submit_request_sg(dev, req))
204 break; 204 break;
205 } else { 205 } else {
206 blk_dump_rq_flags(req, DEVICE_NAME " bad request"); 206 blk_dump_rq_flags(req, DEVICE_NAME " bad request");
207 __blk_end_request_all(req, -EIO); 207 __blk_end_request_all(req, -EIO);
208 continue; 208 continue;
209 } 209 }
210 } 210 }
211 } 211 }
212 212
213 static void ps3disk_request(struct request_queue *q) 213 static void ps3disk_request(struct request_queue *q)
214 { 214 {
215 struct ps3_storage_device *dev = q->queuedata; 215 struct ps3_storage_device *dev = q->queuedata;
216 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); 216 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
217 217
218 if (priv->req) { 218 if (priv->req) {
219 dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__); 219 dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__);
220 return; 220 return;
221 } 221 }
222 222
223 ps3disk_do_request(dev, q); 223 ps3disk_do_request(dev, q);
224 } 224 }
225 225
226 static irqreturn_t ps3disk_interrupt(int irq, void *data) 226 static irqreturn_t ps3disk_interrupt(int irq, void *data)
227 { 227 {
228 struct ps3_storage_device *dev = data; 228 struct ps3_storage_device *dev = data;
229 struct ps3disk_private *priv; 229 struct ps3disk_private *priv;
230 struct request *req; 230 struct request *req;
231 int res, read, error; 231 int res, read, error;
232 u64 tag, status; 232 u64 tag, status;
233 const char *op; 233 const char *op;
234 234
235 res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status); 235 res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status);
236 236
237 if (tag != dev->tag) 237 if (tag != dev->tag)
238 dev_err(&dev->sbd.core, 238 dev_err(&dev->sbd.core,
239 "%s:%u: tag mismatch, got %llx, expected %llx\n", 239 "%s:%u: tag mismatch, got %llx, expected %llx\n",
240 __func__, __LINE__, tag, dev->tag); 240 __func__, __LINE__, tag, dev->tag);
241 241
242 if (res) { 242 if (res) {
243 dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n", 243 dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n",
244 __func__, __LINE__, res, status); 244 __func__, __LINE__, res, status);
245 return IRQ_HANDLED; 245 return IRQ_HANDLED;
246 } 246 }
247 247
248 priv = ps3_system_bus_get_drvdata(&dev->sbd); 248 priv = ps3_system_bus_get_drvdata(&dev->sbd);
249 req = priv->req; 249 req = priv->req;
250 if (!req) { 250 if (!req) {
251 dev_dbg(&dev->sbd.core, 251 dev_dbg(&dev->sbd.core,
252 "%s:%u non-block layer request completed\n", __func__, 252 "%s:%u non-block layer request completed\n", __func__,
253 __LINE__); 253 __LINE__);
254 dev->lv1_status = status; 254 dev->lv1_status = status;
255 complete(&dev->done); 255 complete(&dev->done);
256 return IRQ_HANDLED; 256 return IRQ_HANDLED;
257 } 257 }
258 258
259 if (req->cmd_flags & REQ_FLUSH) { 259 if (req->cmd_flags & REQ_FLUSH) {
260 read = 0; 260 read = 0;
261 op = "flush"; 261 op = "flush";
262 } else { 262 } else {
263 read = !rq_data_dir(req); 263 read = !rq_data_dir(req);
264 op = read ? "read" : "write"; 264 op = read ? "read" : "write";
265 } 265 }
266 if (status) { 266 if (status) {
267 dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, 267 dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
268 __LINE__, op, status); 268 __LINE__, op, status);
269 error = -EIO; 269 error = -EIO;
270 } else { 270 } else {
271 dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__, 271 dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
272 __LINE__, op); 272 __LINE__, op);
273 error = 0; 273 error = 0;
274 if (read) 274 if (read)
275 ps3disk_scatter_gather(dev, req, 0); 275 ps3disk_scatter_gather(dev, req, 0);
276 } 276 }
277 277
278 spin_lock(&priv->lock); 278 spin_lock(&priv->lock);
279 __blk_end_request_all(req, error); 279 __blk_end_request_all(req, error);
280 priv->req = NULL; 280 priv->req = NULL;
281 ps3disk_do_request(dev, priv->queue); 281 ps3disk_do_request(dev, priv->queue);
282 spin_unlock(&priv->lock); 282 spin_unlock(&priv->lock);
283 283
284 return IRQ_HANDLED; 284 return IRQ_HANDLED;
285 } 285 }
286 286
287 static int ps3disk_sync_cache(struct ps3_storage_device *dev) 287 static int ps3disk_sync_cache(struct ps3_storage_device *dev)
288 { 288 {
289 u64 res; 289 u64 res;
290 290
291 dev_dbg(&dev->sbd.core, "%s:%u: sync cache\n", __func__, __LINE__); 291 dev_dbg(&dev->sbd.core, "%s:%u: sync cache\n", __func__, __LINE__);
292 292
293 res = ps3stor_send_command(dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0); 293 res = ps3stor_send_command(dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0);
294 if (res) { 294 if (res) {
295 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", 295 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
296 __func__, __LINE__, res); 296 __func__, __LINE__, res);
297 return -EIO; 297 return -EIO;
298 } 298 }
299 return 0; 299 return 0;
300 } 300 }
301 301
302 302
303 /* ATA helpers copied from drivers/ata/libata-core.c */ 303 /* ATA helpers copied from drivers/ata/libata-core.c */
304 304
305 static void swap_buf_le16(u16 *buf, unsigned int buf_words) 305 static void swap_buf_le16(u16 *buf, unsigned int buf_words)
306 { 306 {
307 #ifdef __BIG_ENDIAN 307 #ifdef __BIG_ENDIAN
308 unsigned int i; 308 unsigned int i;
309 309
310 for (i = 0; i < buf_words; i++) 310 for (i = 0; i < buf_words; i++)
311 buf[i] = le16_to_cpu(buf[i]); 311 buf[i] = le16_to_cpu(buf[i]);
312 #endif /* __BIG_ENDIAN */ 312 #endif /* __BIG_ENDIAN */
313 } 313 }
314 314
315 static u64 ata_id_n_sectors(const u16 *id) 315 static u64 ata_id_n_sectors(const u16 *id)
316 { 316 {
317 if (ata_id_has_lba(id)) { 317 if (ata_id_has_lba(id)) {
318 if (ata_id_has_lba48(id)) 318 if (ata_id_has_lba48(id))
319 return ata_id_u64(id, 100); 319 return ata_id_u64(id, 100);
320 else 320 else
321 return ata_id_u32(id, 60); 321 return ata_id_u32(id, 60);
322 } else { 322 } else {
323 if (ata_id_current_chs_valid(id)) 323 if (ata_id_current_chs_valid(id))
324 return ata_id_u32(id, 57); 324 return ata_id_u32(id, 57);
325 else 325 else
326 return id[1] * id[3] * id[6]; 326 return id[1] * id[3] * id[6];
327 } 327 }
328 } 328 }
329 329
330 static void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, 330 static void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs,
331 unsigned int len) 331 unsigned int len)
332 { 332 {
333 unsigned int c; 333 unsigned int c;
334 334
335 while (len > 0) { 335 while (len > 0) {
336 c = id[ofs] >> 8; 336 c = id[ofs] >> 8;
337 *s = c; 337 *s = c;
338 s++; 338 s++;
339 339
340 c = id[ofs] & 0xff; 340 c = id[ofs] & 0xff;
341 *s = c; 341 *s = c;
342 s++; 342 s++;
343 343
344 ofs++; 344 ofs++;
345 len -= 2; 345 len -= 2;
346 } 346 }
347 } 347 }
348 348
349 static void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, 349 static void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs,
350 unsigned int len) 350 unsigned int len)
351 { 351 {
352 unsigned char *p; 352 unsigned char *p;
353 353
354 WARN_ON(!(len & 1)); 354 WARN_ON(!(len & 1));
355 355
356 ata_id_string(id, s, ofs, len - 1); 356 ata_id_string(id, s, ofs, len - 1);
357 357
358 p = s + strnlen(s, len - 1); 358 p = s + strnlen(s, len - 1);
359 while (p > s && p[-1] == ' ') 359 while (p > s && p[-1] == ' ')
360 p--; 360 p--;
361 *p = '\0'; 361 *p = '\0';
362 } 362 }
363 363
364 static int ps3disk_identify(struct ps3_storage_device *dev) 364 static int ps3disk_identify(struct ps3_storage_device *dev)
365 { 365 {
366 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); 366 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
367 struct lv1_ata_cmnd_block ata_cmnd; 367 struct lv1_ata_cmnd_block ata_cmnd;
368 u16 *id = dev->bounce_buf; 368 u16 *id = dev->bounce_buf;
369 u64 res; 369 u64 res;
370 370
371 dev_dbg(&dev->sbd.core, "%s:%u: identify disk\n", __func__, __LINE__); 371 dev_dbg(&dev->sbd.core, "%s:%u: identify disk\n", __func__, __LINE__);
372 372
373 memset(&ata_cmnd, 0, sizeof(struct lv1_ata_cmnd_block)); 373 memset(&ata_cmnd, 0, sizeof(struct lv1_ata_cmnd_block));
374 ata_cmnd.command = ATA_CMD_ID_ATA; 374 ata_cmnd.command = ATA_CMD_ID_ATA;
375 ata_cmnd.sector_count = 1; 375 ata_cmnd.sector_count = 1;
376 ata_cmnd.size = ata_cmnd.arglen = ATA_ID_WORDS * 2; 376 ata_cmnd.size = ata_cmnd.arglen = ATA_ID_WORDS * 2;
377 ata_cmnd.buffer = dev->bounce_lpar; 377 ata_cmnd.buffer = dev->bounce_lpar;
378 ata_cmnd.proto = PIO_DATA_IN_PROTO; 378 ata_cmnd.proto = PIO_DATA_IN_PROTO;
379 ata_cmnd.in_out = DIR_READ; 379 ata_cmnd.in_out = DIR_READ;
380 380
381 res = ps3stor_send_command(dev, LV1_STORAGE_SEND_ATA_COMMAND, 381 res = ps3stor_send_command(dev, LV1_STORAGE_SEND_ATA_COMMAND,
382 ps3_mm_phys_to_lpar(__pa(&ata_cmnd)), 382 ps3_mm_phys_to_lpar(__pa(&ata_cmnd)),
383 sizeof(ata_cmnd), ata_cmnd.buffer, 383 sizeof(ata_cmnd), ata_cmnd.buffer,
384 ata_cmnd.arglen); 384 ata_cmnd.arglen);
385 if (res) { 385 if (res) {
386 dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%llx\n", 386 dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%llx\n",
387 __func__, __LINE__, res); 387 __func__, __LINE__, res);
388 return -EIO; 388 return -EIO;
389 } 389 }
390 390
391 swap_buf_le16(id, ATA_ID_WORDS); 391 swap_buf_le16(id, ATA_ID_WORDS);
392 392
393 /* All we're interested in are raw capacity and model name */ 393 /* All we're interested in are raw capacity and model name */
394 priv->raw_capacity = ata_id_n_sectors(id); 394 priv->raw_capacity = ata_id_n_sectors(id);
395 ata_id_c_string(id, priv->model, ATA_ID_PROD, sizeof(priv->model)); 395 ata_id_c_string(id, priv->model, ATA_ID_PROD, sizeof(priv->model));
396 return 0; 396 return 0;
397 } 397 }
398 398
399 static unsigned long ps3disk_mask; 399 static unsigned long ps3disk_mask;
400 400
401 static DEFINE_MUTEX(ps3disk_mask_mutex); 401 static DEFINE_MUTEX(ps3disk_mask_mutex);
402 402
403 static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) 403 static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
404 { 404 {
405 struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); 405 struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core);
406 struct ps3disk_private *priv; 406 struct ps3disk_private *priv;
407 int error; 407 int error;
408 unsigned int devidx; 408 unsigned int devidx;
409 struct request_queue *queue; 409 struct request_queue *queue;
410 struct gendisk *gendisk; 410 struct gendisk *gendisk;
411 411
412 if (dev->blk_size < 512) { 412 if (dev->blk_size < 512) {
413 dev_err(&dev->sbd.core, 413 dev_err(&dev->sbd.core,
414 "%s:%u: cannot handle block size %llu\n", __func__, 414 "%s:%u: cannot handle block size %llu\n", __func__,
415 __LINE__, dev->blk_size); 415 __LINE__, dev->blk_size);
416 return -EINVAL; 416 return -EINVAL;
417 } 417 }
418 418
419 BUILD_BUG_ON(PS3DISK_MAX_DISKS > BITS_PER_LONG); 419 BUILD_BUG_ON(PS3DISK_MAX_DISKS > BITS_PER_LONG);
420 mutex_lock(&ps3disk_mask_mutex); 420 mutex_lock(&ps3disk_mask_mutex);
421 devidx = find_first_zero_bit(&ps3disk_mask, PS3DISK_MAX_DISKS); 421 devidx = find_first_zero_bit(&ps3disk_mask, PS3DISK_MAX_DISKS);
422 if (devidx >= PS3DISK_MAX_DISKS) { 422 if (devidx >= PS3DISK_MAX_DISKS) {
423 dev_err(&dev->sbd.core, "%s:%u: Too many disks\n", __func__, 423 dev_err(&dev->sbd.core, "%s:%u: Too many disks\n", __func__,
424 __LINE__); 424 __LINE__);
425 mutex_unlock(&ps3disk_mask_mutex); 425 mutex_unlock(&ps3disk_mask_mutex);
426 return -ENOSPC; 426 return -ENOSPC;
427 } 427 }
428 __set_bit(devidx, &ps3disk_mask); 428 __set_bit(devidx, &ps3disk_mask);
429 mutex_unlock(&ps3disk_mask_mutex); 429 mutex_unlock(&ps3disk_mask_mutex);
430 430
431 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 431 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
432 if (!priv) { 432 if (!priv) {
433 error = -ENOMEM; 433 error = -ENOMEM;
434 goto fail; 434 goto fail;
435 } 435 }
436 436
437 ps3_system_bus_set_drvdata(_dev, priv); 437 ps3_system_bus_set_drvdata(_dev, priv);
438 spin_lock_init(&priv->lock); 438 spin_lock_init(&priv->lock);
439 439
440 dev->bounce_size = BOUNCE_SIZE; 440 dev->bounce_size = BOUNCE_SIZE;
441 dev->bounce_buf = kmalloc(BOUNCE_SIZE, GFP_DMA); 441 dev->bounce_buf = kmalloc(BOUNCE_SIZE, GFP_DMA);
442 if (!dev->bounce_buf) { 442 if (!dev->bounce_buf) {
443 error = -ENOMEM; 443 error = -ENOMEM;
444 goto fail_free_priv; 444 goto fail_free_priv;
445 } 445 }
446 446
447 error = ps3stor_setup(dev, ps3disk_interrupt); 447 error = ps3stor_setup(dev, ps3disk_interrupt);
448 if (error) 448 if (error)
449 goto fail_free_bounce; 449 goto fail_free_bounce;
450 450
451 ps3disk_identify(dev); 451 ps3disk_identify(dev);
452 452
453 queue = blk_init_queue(ps3disk_request, &priv->lock); 453 queue = blk_init_queue(ps3disk_request, &priv->lock);
454 if (!queue) { 454 if (!queue) {
455 dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n", 455 dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n",
456 __func__, __LINE__); 456 __func__, __LINE__);
457 error = -ENOMEM; 457 error = -ENOMEM;
458 goto fail_teardown; 458 goto fail_teardown;
459 } 459 }
460 460
461 priv->queue = queue; 461 priv->queue = queue;
462 queue->queuedata = dev; 462 queue->queuedata = dev;
463 463
464 blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH); 464 blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
465 465
466 blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); 466 blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
467 blk_queue_segment_boundary(queue, -1UL); 467 blk_queue_segment_boundary(queue, -1UL);
468 blk_queue_dma_alignment(queue, dev->blk_size-1); 468 blk_queue_dma_alignment(queue, dev->blk_size-1);
469 blk_queue_logical_block_size(queue, dev->blk_size); 469 blk_queue_logical_block_size(queue, dev->blk_size);
470 470
471 blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); 471 blk_queue_flush(queue, REQ_FLUSH);
472 472
473 blk_queue_max_segments(queue, -1); 473 blk_queue_max_segments(queue, -1);
474 blk_queue_max_segment_size(queue, dev->bounce_size); 474 blk_queue_max_segment_size(queue, dev->bounce_size);
475 475
476 gendisk = alloc_disk(PS3DISK_MINORS); 476 gendisk = alloc_disk(PS3DISK_MINORS);
477 if (!gendisk) { 477 if (!gendisk) {
478 dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__, 478 dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__,
479 __LINE__); 479 __LINE__);
480 error = -ENOMEM; 480 error = -ENOMEM;
481 goto fail_cleanup_queue; 481 goto fail_cleanup_queue;
482 } 482 }
483 483
484 priv->gendisk = gendisk; 484 priv->gendisk = gendisk;
485 gendisk->major = ps3disk_major; 485 gendisk->major = ps3disk_major;
486 gendisk->first_minor = devidx * PS3DISK_MINORS; 486 gendisk->first_minor = devidx * PS3DISK_MINORS;
487 gendisk->fops = &ps3disk_fops; 487 gendisk->fops = &ps3disk_fops;
488 gendisk->queue = queue; 488 gendisk->queue = queue;
489 gendisk->private_data = dev; 489 gendisk->private_data = dev;
490 gendisk->driverfs_dev = &dev->sbd.core; 490 gendisk->driverfs_dev = &dev->sbd.core;
491 snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME, 491 snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME,
492 devidx+'a'); 492 devidx+'a');
493 priv->blocking_factor = dev->blk_size >> 9; 493 priv->blocking_factor = dev->blk_size >> 9;
494 set_capacity(gendisk, 494 set_capacity(gendisk,
495 dev->regions[dev->region_idx].size*priv->blocking_factor); 495 dev->regions[dev->region_idx].size*priv->blocking_factor);
496 496
497 dev_info(&dev->sbd.core, 497 dev_info(&dev->sbd.core,
498 "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n", 498 "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n",
499 gendisk->disk_name, priv->model, priv->raw_capacity >> 11, 499 gendisk->disk_name, priv->model, priv->raw_capacity >> 11,
500 get_capacity(gendisk) >> 11); 500 get_capacity(gendisk) >> 11);
501 501
502 add_disk(gendisk); 502 add_disk(gendisk);
503 return 0; 503 return 0;
504 504
505 fail_cleanup_queue: 505 fail_cleanup_queue:
506 blk_cleanup_queue(queue); 506 blk_cleanup_queue(queue);
507 fail_teardown: 507 fail_teardown:
508 ps3stor_teardown(dev); 508 ps3stor_teardown(dev);
509 fail_free_bounce: 509 fail_free_bounce:
510 kfree(dev->bounce_buf); 510 kfree(dev->bounce_buf);
511 fail_free_priv: 511 fail_free_priv:
512 kfree(priv); 512 kfree(priv);
513 ps3_system_bus_set_drvdata(_dev, NULL); 513 ps3_system_bus_set_drvdata(_dev, NULL);
514 fail: 514 fail:
515 mutex_lock(&ps3disk_mask_mutex); 515 mutex_lock(&ps3disk_mask_mutex);
516 __clear_bit(devidx, &ps3disk_mask); 516 __clear_bit(devidx, &ps3disk_mask);
517 mutex_unlock(&ps3disk_mask_mutex); 517 mutex_unlock(&ps3disk_mask_mutex);
518 return error; 518 return error;
519 } 519 }
520 520
521 static int ps3disk_remove(struct ps3_system_bus_device *_dev) 521 static int ps3disk_remove(struct ps3_system_bus_device *_dev)
522 { 522 {
523 struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); 523 struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core);
524 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); 524 struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
525 525
526 mutex_lock(&ps3disk_mask_mutex); 526 mutex_lock(&ps3disk_mask_mutex);
527 __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, 527 __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS,
528 &ps3disk_mask); 528 &ps3disk_mask);
529 mutex_unlock(&ps3disk_mask_mutex); 529 mutex_unlock(&ps3disk_mask_mutex);
530 del_gendisk(priv->gendisk); 530 del_gendisk(priv->gendisk);
531 blk_cleanup_queue(priv->queue); 531 blk_cleanup_queue(priv->queue);
532 put_disk(priv->gendisk); 532 put_disk(priv->gendisk);
533 dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); 533 dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
534 ps3disk_sync_cache(dev); 534 ps3disk_sync_cache(dev);
535 ps3stor_teardown(dev); 535 ps3stor_teardown(dev);
536 kfree(dev->bounce_buf); 536 kfree(dev->bounce_buf);
537 kfree(priv); 537 kfree(priv);
538 ps3_system_bus_set_drvdata(_dev, NULL); 538 ps3_system_bus_set_drvdata(_dev, NULL);
539 return 0; 539 return 0;
540 } 540 }
541 541
542 static struct ps3_system_bus_driver ps3disk = { 542 static struct ps3_system_bus_driver ps3disk = {
543 .match_id = PS3_MATCH_ID_STOR_DISK, 543 .match_id = PS3_MATCH_ID_STOR_DISK,
544 .core.name = DEVICE_NAME, 544 .core.name = DEVICE_NAME,
545 .core.owner = THIS_MODULE, 545 .core.owner = THIS_MODULE,
546 .probe = ps3disk_probe, 546 .probe = ps3disk_probe,
547 .remove = ps3disk_remove, 547 .remove = ps3disk_remove,
548 .shutdown = ps3disk_remove, 548 .shutdown = ps3disk_remove,
549 }; 549 };
550 550
551 551
552 static int __init ps3disk_init(void) 552 static int __init ps3disk_init(void)
553 { 553 {
554 int error; 554 int error;
555 555
556 if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) 556 if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
557 return -ENODEV; 557 return -ENODEV;
558 558
559 error = register_blkdev(0, DEVICE_NAME); 559 error = register_blkdev(0, DEVICE_NAME);
560 if (error <= 0) { 560 if (error <= 0) {
561 printk(KERN_ERR "%s:%u: register_blkdev failed %d\n", __func__, 561 printk(KERN_ERR "%s:%u: register_blkdev failed %d\n", __func__,
562 __LINE__, error); 562 __LINE__, error);
563 return error; 563 return error;
564 } 564 }
565 ps3disk_major = error; 565 ps3disk_major = error;
566 566
567 pr_info("%s:%u: registered block device major %d\n", __func__, 567 pr_info("%s:%u: registered block device major %d\n", __func__,
568 __LINE__, ps3disk_major); 568 __LINE__, ps3disk_major);
569 569
570 error = ps3_system_bus_driver_register(&ps3disk); 570 error = ps3_system_bus_driver_register(&ps3disk);
571 if (error) 571 if (error)
572 unregister_blkdev(ps3disk_major, DEVICE_NAME); 572 unregister_blkdev(ps3disk_major, DEVICE_NAME);
573 573
574 return error; 574 return error;
575 } 575 }
576 576
577 static void __exit ps3disk_exit(void) 577 static void __exit ps3disk_exit(void)
578 { 578 {
579 ps3_system_bus_driver_unregister(&ps3disk); 579 ps3_system_bus_driver_unregister(&ps3disk);
580 unregister_blkdev(ps3disk_major, DEVICE_NAME); 580 unregister_blkdev(ps3disk_major, DEVICE_NAME);
581 } 581 }
582 582
583 module_init(ps3disk_init); 583 module_init(ps3disk_init);
584 module_exit(ps3disk_exit); 584 module_exit(ps3disk_exit);
585 585
586 MODULE_LICENSE("GPL"); 586 MODULE_LICENSE("GPL");
587 MODULE_DESCRIPTION("PS3 Disk Storage Driver"); 587 MODULE_DESCRIPTION("PS3 Disk Storage Driver");
588 MODULE_AUTHOR("Sony Corporation"); 588 MODULE_AUTHOR("Sony Corporation");
589 MODULE_ALIAS(PS3_MODULE_ALIAS_STOR_DISK); 589 MODULE_ALIAS(PS3_MODULE_ALIAS_STOR_DISK);
590 590
drivers/block/virtio_blk.c
1 //#define DEBUG 1 //#define DEBUG
2 #include <linux/spinlock.h> 2 #include <linux/spinlock.h>
3 #include <linux/slab.h> 3 #include <linux/slab.h>
4 #include <linux/blkdev.h> 4 #include <linux/blkdev.h>
5 #include <linux/smp_lock.h> 5 #include <linux/smp_lock.h>
6 #include <linux/hdreg.h> 6 #include <linux/hdreg.h>
7 #include <linux/virtio.h> 7 #include <linux/virtio.h>
8 #include <linux/virtio_blk.h> 8 #include <linux/virtio_blk.h>
9 #include <linux/scatterlist.h> 9 #include <linux/scatterlist.h>
10 10
11 #define PART_BITS 4 11 #define PART_BITS 4
12 12
13 static int major, index; 13 static int major, index;
14 14
15 struct virtio_blk 15 struct virtio_blk
16 { 16 {
17 spinlock_t lock; 17 spinlock_t lock;
18 18
19 struct virtio_device *vdev; 19 struct virtio_device *vdev;
20 struct virtqueue *vq; 20 struct virtqueue *vq;
21 21
22 /* The disk structure for the kernel. */ 22 /* The disk structure for the kernel. */
23 struct gendisk *disk; 23 struct gendisk *disk;
24 24
25 /* Request tracking. */ 25 /* Request tracking. */
26 struct list_head reqs; 26 struct list_head reqs;
27 27
28 mempool_t *pool; 28 mempool_t *pool;
29 29
30 /* What host tells us, plus 2 for header & tailer. */ 30 /* What host tells us, plus 2 for header & tailer. */
31 unsigned int sg_elems; 31 unsigned int sg_elems;
32 32
33 /* Scatterlist: can be too big for stack. */ 33 /* Scatterlist: can be too big for stack. */
34 struct scatterlist sg[/*sg_elems*/]; 34 struct scatterlist sg[/*sg_elems*/];
35 }; 35 };
36 36
37 struct virtblk_req 37 struct virtblk_req
38 { 38 {
39 struct list_head list; 39 struct list_head list;
40 struct request *req; 40 struct request *req;
41 struct virtio_blk_outhdr out_hdr; 41 struct virtio_blk_outhdr out_hdr;
42 struct virtio_scsi_inhdr in_hdr; 42 struct virtio_scsi_inhdr in_hdr;
43 u8 status; 43 u8 status;
44 }; 44 };
45 45
46 static void blk_done(struct virtqueue *vq) 46 static void blk_done(struct virtqueue *vq)
47 { 47 {
48 struct virtio_blk *vblk = vq->vdev->priv; 48 struct virtio_blk *vblk = vq->vdev->priv;
49 struct virtblk_req *vbr; 49 struct virtblk_req *vbr;
50 unsigned int len; 50 unsigned int len;
51 unsigned long flags; 51 unsigned long flags;
52 52
53 spin_lock_irqsave(&vblk->lock, flags); 53 spin_lock_irqsave(&vblk->lock, flags);
54 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 54 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
55 int error; 55 int error;
56 56
57 switch (vbr->status) { 57 switch (vbr->status) {
58 case VIRTIO_BLK_S_OK: 58 case VIRTIO_BLK_S_OK:
59 error = 0; 59 error = 0;
60 break; 60 break;
61 case VIRTIO_BLK_S_UNSUPP: 61 case VIRTIO_BLK_S_UNSUPP:
62 error = -ENOTTY; 62 error = -ENOTTY;
63 break; 63 break;
64 default: 64 default:
65 error = -EIO; 65 error = -EIO;
66 break; 66 break;
67 } 67 }
68 68
69 switch (vbr->req->cmd_type) { 69 switch (vbr->req->cmd_type) {
70 case REQ_TYPE_BLOCK_PC: 70 case REQ_TYPE_BLOCK_PC:
71 vbr->req->resid_len = vbr->in_hdr.residual; 71 vbr->req->resid_len = vbr->in_hdr.residual;
72 vbr->req->sense_len = vbr->in_hdr.sense_len; 72 vbr->req->sense_len = vbr->in_hdr.sense_len;
73 vbr->req->errors = vbr->in_hdr.errors; 73 vbr->req->errors = vbr->in_hdr.errors;
74 break; 74 break;
75 case REQ_TYPE_SPECIAL: 75 case REQ_TYPE_SPECIAL:
76 vbr->req->errors = (error != 0); 76 vbr->req->errors = (error != 0);
77 break; 77 break;
78 default: 78 default:
79 break; 79 break;
80 } 80 }
81 81
82 __blk_end_request_all(vbr->req, error); 82 __blk_end_request_all(vbr->req, error);
83 list_del(&vbr->list); 83 list_del(&vbr->list);
84 mempool_free(vbr, vblk->pool); 84 mempool_free(vbr, vblk->pool);
85 } 85 }
86 /* In case queue is stopped waiting for more buffers. */ 86 /* In case queue is stopped waiting for more buffers. */
87 blk_start_queue(vblk->disk->queue); 87 blk_start_queue(vblk->disk->queue);
88 spin_unlock_irqrestore(&vblk->lock, flags); 88 spin_unlock_irqrestore(&vblk->lock, flags);
89 } 89 }
90 90
91 static bool do_req(struct request_queue *q, struct virtio_blk *vblk, 91 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
92 struct request *req) 92 struct request *req)
93 { 93 {
94 unsigned long num, out = 0, in = 0; 94 unsigned long num, out = 0, in = 0;
95 struct virtblk_req *vbr; 95 struct virtblk_req *vbr;
96 96
97 vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); 97 vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
98 if (!vbr) 98 if (!vbr)
99 /* When another request finishes we'll try again. */ 99 /* When another request finishes we'll try again. */
100 return false; 100 return false;
101 101
102 vbr->req = req; 102 vbr->req = req;
103 103
104 if (req->cmd_flags & REQ_FLUSH) { 104 if (req->cmd_flags & REQ_FLUSH) {
105 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; 105 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
106 vbr->out_hdr.sector = 0; 106 vbr->out_hdr.sector = 0;
107 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 107 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
108 } else { 108 } else {
109 switch (req->cmd_type) { 109 switch (req->cmd_type) {
110 case REQ_TYPE_FS: 110 case REQ_TYPE_FS:
111 vbr->out_hdr.type = 0; 111 vbr->out_hdr.type = 0;
112 vbr->out_hdr.sector = blk_rq_pos(vbr->req); 112 vbr->out_hdr.sector = blk_rq_pos(vbr->req);
113 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 113 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
114 break; 114 break;
115 case REQ_TYPE_BLOCK_PC: 115 case REQ_TYPE_BLOCK_PC:
116 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; 116 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
117 vbr->out_hdr.sector = 0; 117 vbr->out_hdr.sector = 0;
118 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 118 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
119 break; 119 break;
120 case REQ_TYPE_SPECIAL: 120 case REQ_TYPE_SPECIAL:
121 vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID; 121 vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
122 vbr->out_hdr.sector = 0; 122 vbr->out_hdr.sector = 0;
123 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 123 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
124 break; 124 break;
125 default: 125 default:
126 /* We don't put anything else in the queue. */ 126 /* We don't put anything else in the queue. */
127 BUG(); 127 BUG();
128 } 128 }
129 } 129 }
130 130
131 if (vbr->req->cmd_flags & REQ_HARDBARRIER) 131 if (vbr->req->cmd_flags & REQ_HARDBARRIER)
132 vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; 132 vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
133 133
134 sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); 134 sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
135 135
136 /* 136 /*
137 * If this is a packet command we need a couple of additional headers. 137 * If this is a packet command we need a couple of additional headers.
138 * Behind the normal outhdr we put a segment with the scsi command 138 * Behind the normal outhdr we put a segment with the scsi command
139 * block, and before the normal inhdr we put the sense data and the 139 * block, and before the normal inhdr we put the sense data and the
140 * inhdr with additional status information before the normal inhdr. 140 * inhdr with additional status information before the normal inhdr.
141 */ 141 */
142 if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) 142 if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
143 sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len); 143 sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
144 144
145 num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); 145 num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
146 146
147 if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { 147 if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
148 sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); 148 sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
149 sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, 149 sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
150 sizeof(vbr->in_hdr)); 150 sizeof(vbr->in_hdr));
151 } 151 }
152 152
153 sg_set_buf(&vblk->sg[num + out + in++], &vbr->status, 153 sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
154 sizeof(vbr->status)); 154 sizeof(vbr->status));
155 155
156 if (num) { 156 if (num) {
157 if (rq_data_dir(vbr->req) == WRITE) { 157 if (rq_data_dir(vbr->req) == WRITE) {
158 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; 158 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
159 out += num; 159 out += num;
160 } else { 160 } else {
161 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 161 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
162 in += num; 162 in += num;
163 } 163 }
164 } 164 }
165 165
166 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { 166 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
167 mempool_free(vbr, vblk->pool); 167 mempool_free(vbr, vblk->pool);
168 return false; 168 return false;
169 } 169 }
170 170
171 list_add_tail(&vbr->list, &vblk->reqs); 171 list_add_tail(&vbr->list, &vblk->reqs);
172 return true; 172 return true;
173 } 173 }
174 174
175 static void do_virtblk_request(struct request_queue *q) 175 static void do_virtblk_request(struct request_queue *q)
176 { 176 {
177 struct virtio_blk *vblk = q->queuedata; 177 struct virtio_blk *vblk = q->queuedata;
178 struct request *req; 178 struct request *req;
179 unsigned int issued = 0; 179 unsigned int issued = 0;
180 180
181 while ((req = blk_peek_request(q)) != NULL) { 181 while ((req = blk_peek_request(q)) != NULL) {
182 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 182 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
183 183
184 /* If this request fails, stop queue and wait for something to 184 /* If this request fails, stop queue and wait for something to
185 finish to restart it. */ 185 finish to restart it. */
186 if (!do_req(q, vblk, req)) { 186 if (!do_req(q, vblk, req)) {
187 blk_stop_queue(q); 187 blk_stop_queue(q);
188 break; 188 break;
189 } 189 }
190 blk_start_request(req); 190 blk_start_request(req);
191 issued++; 191 issued++;
192 } 192 }
193 193
194 if (issued) 194 if (issued)
195 virtqueue_kick(vblk->vq); 195 virtqueue_kick(vblk->vq);
196 } 196 }
197 197
198 /* return id (s/n) string for *disk to *id_str 198 /* return id (s/n) string for *disk to *id_str
199 */ 199 */
200 static int virtblk_get_id(struct gendisk *disk, char *id_str) 200 static int virtblk_get_id(struct gendisk *disk, char *id_str)
201 { 201 {
202 struct virtio_blk *vblk = disk->private_data; 202 struct virtio_blk *vblk = disk->private_data;
203 struct request *req; 203 struct request *req;
204 struct bio *bio; 204 struct bio *bio;
205 205
206 bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, 206 bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
207 GFP_KERNEL); 207 GFP_KERNEL);
208 if (IS_ERR(bio)) 208 if (IS_ERR(bio))
209 return PTR_ERR(bio); 209 return PTR_ERR(bio);
210 210
211 req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL); 211 req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
212 if (IS_ERR(req)) { 212 if (IS_ERR(req)) {
213 bio_put(bio); 213 bio_put(bio);
214 return PTR_ERR(req); 214 return PTR_ERR(req);
215 } 215 }
216 216
217 req->cmd_type = REQ_TYPE_SPECIAL; 217 req->cmd_type = REQ_TYPE_SPECIAL;
218 return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 218 return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
219 } 219 }
220 220
221 static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, 221 static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
222 unsigned cmd, unsigned long data) 222 unsigned cmd, unsigned long data)
223 { 223 {
224 struct gendisk *disk = bdev->bd_disk; 224 struct gendisk *disk = bdev->bd_disk;
225 struct virtio_blk *vblk = disk->private_data; 225 struct virtio_blk *vblk = disk->private_data;
226 226
227 /* 227 /*
228 * Only allow the generic SCSI ioctls if the host can support it. 228 * Only allow the generic SCSI ioctls if the host can support it.
229 */ 229 */
230 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) 230 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
231 return -ENOTTY; 231 return -ENOTTY;
232 232
233 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, 233 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
234 (void __user *)data); 234 (void __user *)data);
235 } 235 }
236 236
237 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, 237 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
238 unsigned int cmd, unsigned long param) 238 unsigned int cmd, unsigned long param)
239 { 239 {
240 int ret; 240 int ret;
241 241
242 lock_kernel(); 242 lock_kernel();
243 ret = virtblk_locked_ioctl(bdev, mode, cmd, param); 243 ret = virtblk_locked_ioctl(bdev, mode, cmd, param);
244 unlock_kernel(); 244 unlock_kernel();
245 245
246 return ret; 246 return ret;
247 } 247 }
248 248
249 /* We provide getgeo only to please some old bootloader/partitioning tools */ 249 /* We provide getgeo only to please some old bootloader/partitioning tools */
250 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 250 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
251 { 251 {
252 struct virtio_blk *vblk = bd->bd_disk->private_data; 252 struct virtio_blk *vblk = bd->bd_disk->private_data;
253 struct virtio_blk_geometry vgeo; 253 struct virtio_blk_geometry vgeo;
254 int err; 254 int err;
255 255
256 /* see if the host passed in geometry config */ 256 /* see if the host passed in geometry config */
257 err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, 257 err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
258 offsetof(struct virtio_blk_config, geometry), 258 offsetof(struct virtio_blk_config, geometry),
259 &vgeo); 259 &vgeo);
260 260
261 if (!err) { 261 if (!err) {
262 geo->heads = vgeo.heads; 262 geo->heads = vgeo.heads;
263 geo->sectors = vgeo.sectors; 263 geo->sectors = vgeo.sectors;
264 geo->cylinders = vgeo.cylinders; 264 geo->cylinders = vgeo.cylinders;
265 } else { 265 } else {
266 /* some standard values, similar to sd */ 266 /* some standard values, similar to sd */
267 geo->heads = 1 << 6; 267 geo->heads = 1 << 6;
268 geo->sectors = 1 << 5; 268 geo->sectors = 1 << 5;
269 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 269 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
270 } 270 }
271 return 0; 271 return 0;
272 } 272 }
273 273
274 static const struct block_device_operations virtblk_fops = { 274 static const struct block_device_operations virtblk_fops = {
275 .ioctl = virtblk_ioctl, 275 .ioctl = virtblk_ioctl,
276 .owner = THIS_MODULE, 276 .owner = THIS_MODULE,
277 .getgeo = virtblk_getgeo, 277 .getgeo = virtblk_getgeo,
278 }; 278 };
279 279
280 static int index_to_minor(int index) 280 static int index_to_minor(int index)
281 { 281 {
282 return index << PART_BITS; 282 return index << PART_BITS;
283 } 283 }
284 284
285 static ssize_t virtblk_serial_show(struct device *dev, 285 static ssize_t virtblk_serial_show(struct device *dev,
286 struct device_attribute *attr, char *buf) 286 struct device_attribute *attr, char *buf)
287 { 287 {
288 struct gendisk *disk = dev_to_disk(dev); 288 struct gendisk *disk = dev_to_disk(dev);
289 int err; 289 int err;
290 290
291 /* sysfs gives us a PAGE_SIZE buffer */ 291 /* sysfs gives us a PAGE_SIZE buffer */
292 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 292 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
293 293
294 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 294 buf[VIRTIO_BLK_ID_BYTES] = '\0';
295 err = virtblk_get_id(disk, buf); 295 err = virtblk_get_id(disk, buf);
296 if (!err) 296 if (!err)
297 return strlen(buf); 297 return strlen(buf);
298 298
299 if (err == -EIO) /* Unsupported? Make it empty. */ 299 if (err == -EIO) /* Unsupported? Make it empty. */
300 return 0; 300 return 0;
301 301
302 return err; 302 return err;
303 } 303 }
304 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); 304 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
305 305
306 static int __devinit virtblk_probe(struct virtio_device *vdev) 306 static int __devinit virtblk_probe(struct virtio_device *vdev)
307 { 307 {
308 struct virtio_blk *vblk; 308 struct virtio_blk *vblk;
309 struct request_queue *q; 309 struct request_queue *q;
310 int err; 310 int err;
311 u64 cap; 311 u64 cap;
312 u32 v, blk_size, sg_elems, opt_io_size; 312 u32 v, blk_size, sg_elems, opt_io_size;
313 u16 min_io_size; 313 u16 min_io_size;
314 u8 physical_block_exp, alignment_offset; 314 u8 physical_block_exp, alignment_offset;
315 315
316 if (index_to_minor(index) >= 1 << MINORBITS) 316 if (index_to_minor(index) >= 1 << MINORBITS)
317 return -ENOSPC; 317 return -ENOSPC;
318 318
319 /* We need to know how many segments before we allocate. */ 319 /* We need to know how many segments before we allocate. */
320 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, 320 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
321 offsetof(struct virtio_blk_config, seg_max), 321 offsetof(struct virtio_blk_config, seg_max),
322 &sg_elems); 322 &sg_elems);
323 323
324 /* We need at least one SG element, whatever they say. */ 324 /* We need at least one SG element, whatever they say. */
325 if (err || !sg_elems) 325 if (err || !sg_elems)
326 sg_elems = 1; 326 sg_elems = 1;
327 327
328 /* We need an extra sg elements at head and tail. */ 328 /* We need an extra sg elements at head and tail. */
329 sg_elems += 2; 329 sg_elems += 2;
330 vdev->priv = vblk = kmalloc(sizeof(*vblk) + 330 vdev->priv = vblk = kmalloc(sizeof(*vblk) +
331 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); 331 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
332 if (!vblk) { 332 if (!vblk) {
333 err = -ENOMEM; 333 err = -ENOMEM;
334 goto out; 334 goto out;
335 } 335 }
336 336
337 INIT_LIST_HEAD(&vblk->reqs); 337 INIT_LIST_HEAD(&vblk->reqs);
338 spin_lock_init(&vblk->lock); 338 spin_lock_init(&vblk->lock);
339 vblk->vdev = vdev; 339 vblk->vdev = vdev;
340 vblk->sg_elems = sg_elems; 340 vblk->sg_elems = sg_elems;
341 sg_init_table(vblk->sg, vblk->sg_elems); 341 sg_init_table(vblk->sg, vblk->sg_elems);
342 342
343 /* We expect one virtqueue, for output. */ 343 /* We expect one virtqueue, for output. */
344 vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); 344 vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
345 if (IS_ERR(vblk->vq)) { 345 if (IS_ERR(vblk->vq)) {
346 err = PTR_ERR(vblk->vq); 346 err = PTR_ERR(vblk->vq);
347 goto out_free_vblk; 347 goto out_free_vblk;
348 } 348 }
349 349
350 vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); 350 vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
351 if (!vblk->pool) { 351 if (!vblk->pool) {
352 err = -ENOMEM; 352 err = -ENOMEM;
353 goto out_free_vq; 353 goto out_free_vq;
354 } 354 }
355 355
356 /* FIXME: How many partitions? How long is a piece of string? */ 356 /* FIXME: How many partitions? How long is a piece of string? */
357 vblk->disk = alloc_disk(1 << PART_BITS); 357 vblk->disk = alloc_disk(1 << PART_BITS);
358 if (!vblk->disk) { 358 if (!vblk->disk) {
359 err = -ENOMEM; 359 err = -ENOMEM;
360 goto out_mempool; 360 goto out_mempool;
361 } 361 }
362 362
363 q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); 363 q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
364 if (!q) { 364 if (!q) {
365 err = -ENOMEM; 365 err = -ENOMEM;
366 goto out_put_disk; 366 goto out_put_disk;
367 } 367 }
368 368
369 q->queuedata = vblk; 369 q->queuedata = vblk;
370 370
371 if (index < 26) { 371 if (index < 26) {
372 sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); 372 sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
373 } else if (index < (26 + 1) * 26) { 373 } else if (index < (26 + 1) * 26) {
374 sprintf(vblk->disk->disk_name, "vd%c%c", 374 sprintf(vblk->disk->disk_name, "vd%c%c",
375 'a' + index / 26 - 1, 'a' + index % 26); 375 'a' + index / 26 - 1, 'a' + index % 26);
376 } else { 376 } else {
377 const unsigned int m1 = (index / 26 - 1) / 26 - 1; 377 const unsigned int m1 = (index / 26 - 1) / 26 - 1;
378 const unsigned int m2 = (index / 26 - 1) % 26; 378 const unsigned int m2 = (index / 26 - 1) % 26;
379 const unsigned int m3 = index % 26; 379 const unsigned int m3 = index % 26;
380 sprintf(vblk->disk->disk_name, "vd%c%c%c", 380 sprintf(vblk->disk->disk_name, "vd%c%c%c",
381 'a' + m1, 'a' + m2, 'a' + m3); 381 'a' + m1, 'a' + m2, 'a' + m3);
382 } 382 }
383 383
384 vblk->disk->major = major; 384 vblk->disk->major = major;
385 vblk->disk->first_minor = index_to_minor(index); 385 vblk->disk->first_minor = index_to_minor(index);
386 vblk->disk->private_data = vblk; 386 vblk->disk->private_data = vblk;
387 vblk->disk->fops = &virtblk_fops; 387 vblk->disk->fops = &virtblk_fops;
388 vblk->disk->driverfs_dev = &vdev->dev; 388 vblk->disk->driverfs_dev = &vdev->dev;
389 index++; 389 index++;
390 390
391 if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { 391 /*
392 /* 392 * If the FLUSH feature is supported we do have support for
393 * If the FLUSH feature is supported we do have support for 393 * flushing a volatile write cache on the host. Use that to
394 * flushing a volatile write cache on the host. Use that 394 * implement write barrier support; otherwise, we must assume
395 * to implement write barrier support. 395 * that the host does not perform any kind of volatile write
396 */ 396 * caching.
397 blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); 397 */
398 } else { 398 if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
399 /* 399 blk_queue_flush(q, REQ_FLUSH);
400 * If the FLUSH feature is not supported we must assume that
401 * the host does not perform any kind of volatile write
402 * caching. We still need to drain the queue to provider
403 * proper barrier semantics.
404 */
405 blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
406 }
407 400
408 /* If disk is read-only in the host, the guest should obey */ 401 /* If disk is read-only in the host, the guest should obey */
409 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 402 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
410 set_disk_ro(vblk->disk, 1); 403 set_disk_ro(vblk->disk, 1);
411 404
412 /* Host must always specify the capacity. */ 405 /* Host must always specify the capacity. */
413 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), 406 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
414 &cap, sizeof(cap)); 407 &cap, sizeof(cap));
415 408
416 /* If capacity is too big, truncate with warning. */ 409 /* If capacity is too big, truncate with warning. */
417 if ((sector_t)cap != cap) { 410 if ((sector_t)cap != cap) {
418 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 411 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
419 (unsigned long long)cap); 412 (unsigned long long)cap);
420 cap = (sector_t)-1; 413 cap = (sector_t)-1;
421 } 414 }
422 set_capacity(vblk->disk, cap); 415 set_capacity(vblk->disk, cap);
423 416
424 /* We can handle whatever the host told us to handle. */ 417 /* We can handle whatever the host told us to handle. */
425 blk_queue_max_segments(q, vblk->sg_elems-2); 418 blk_queue_max_segments(q, vblk->sg_elems-2);
426 419
427 /* No need to bounce any requests */ 420 /* No need to bounce any requests */
428 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 421 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
429 422
430 /* No real sector limit. */ 423 /* No real sector limit. */
431 blk_queue_max_hw_sectors(q, -1U); 424 blk_queue_max_hw_sectors(q, -1U);
432 425
433 /* Host can optionally specify maximum segment size and number of 426 /* Host can optionally specify maximum segment size and number of
434 * segments. */ 427 * segments. */
435 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, 428 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
436 offsetof(struct virtio_blk_config, size_max), 429 offsetof(struct virtio_blk_config, size_max),
437 &v); 430 &v);
438 if (!err) 431 if (!err)
439 blk_queue_max_segment_size(q, v); 432 blk_queue_max_segment_size(q, v);
440 else 433 else
441 blk_queue_max_segment_size(q, -1U); 434 blk_queue_max_segment_size(q, -1U);
442 435
443 /* Host can optionally specify the block size of the device */ 436 /* Host can optionally specify the block size of the device */
444 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, 437 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
445 offsetof(struct virtio_blk_config, blk_size), 438 offsetof(struct virtio_blk_config, blk_size),
446 &blk_size); 439 &blk_size);
447 if (!err) 440 if (!err)
448 blk_queue_logical_block_size(q, blk_size); 441 blk_queue_logical_block_size(q, blk_size);
449 else 442 else
450 blk_size = queue_logical_block_size(q); 443 blk_size = queue_logical_block_size(q);
451 444
452 /* Use topology information if available */ 445 /* Use topology information if available */
453 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 446 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
454 offsetof(struct virtio_blk_config, physical_block_exp), 447 offsetof(struct virtio_blk_config, physical_block_exp),
455 &physical_block_exp); 448 &physical_block_exp);
456 if (!err && physical_block_exp) 449 if (!err && physical_block_exp)
457 blk_queue_physical_block_size(q, 450 blk_queue_physical_block_size(q,
458 blk_size * (1 << physical_block_exp)); 451 blk_size * (1 << physical_block_exp));
459 452
460 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 453 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
461 offsetof(struct virtio_blk_config, alignment_offset), 454 offsetof(struct virtio_blk_config, alignment_offset),
462 &alignment_offset); 455 &alignment_offset);
463 if (!err && alignment_offset) 456 if (!err && alignment_offset)
464 blk_queue_alignment_offset(q, blk_size * alignment_offset); 457 blk_queue_alignment_offset(q, blk_size * alignment_offset);
465 458
466 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 459 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
467 offsetof(struct virtio_blk_config, min_io_size), 460 offsetof(struct virtio_blk_config, min_io_size),
468 &min_io_size); 461 &min_io_size);
469 if (!err && min_io_size) 462 if (!err && min_io_size)
470 blk_queue_io_min(q, blk_size * min_io_size); 463 blk_queue_io_min(q, blk_size * min_io_size);
471 464
472 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 465 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
473 offsetof(struct virtio_blk_config, opt_io_size), 466 offsetof(struct virtio_blk_config, opt_io_size),
474 &opt_io_size); 467 &opt_io_size);
475 if (!err && opt_io_size) 468 if (!err && opt_io_size)
476 blk_queue_io_opt(q, blk_size * opt_io_size); 469 blk_queue_io_opt(q, blk_size * opt_io_size);
477 470
478 471
479 add_disk(vblk->disk); 472 add_disk(vblk->disk);
480 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); 473 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
481 if (err) 474 if (err)
482 goto out_del_disk; 475 goto out_del_disk;
483 476
484 return 0; 477 return 0;
485 478
486 out_del_disk: 479 out_del_disk:
487 del_gendisk(vblk->disk); 480 del_gendisk(vblk->disk);
488 blk_cleanup_queue(vblk->disk->queue); 481 blk_cleanup_queue(vblk->disk->queue);
489 out_put_disk: 482 out_put_disk:
490 put_disk(vblk->disk); 483 put_disk(vblk->disk);
491 out_mempool: 484 out_mempool:
492 mempool_destroy(vblk->pool); 485 mempool_destroy(vblk->pool);
493 out_free_vq: 486 out_free_vq:
494 vdev->config->del_vqs(vdev); 487 vdev->config->del_vqs(vdev);
495 out_free_vblk: 488 out_free_vblk:
496 kfree(vblk); 489 kfree(vblk);
497 out: 490 out:
498 return err; 491 return err;
499 } 492 }
500 493
501 static void __devexit virtblk_remove(struct virtio_device *vdev) 494 static void __devexit virtblk_remove(struct virtio_device *vdev)
502 { 495 {
503 struct virtio_blk *vblk = vdev->priv; 496 struct virtio_blk *vblk = vdev->priv;
504 497
505 /* Nothing should be pending. */ 498 /* Nothing should be pending. */
506 BUG_ON(!list_empty(&vblk->reqs)); 499 BUG_ON(!list_empty(&vblk->reqs));
507 500
508 /* Stop all the virtqueues. */ 501 /* Stop all the virtqueues. */
509 vdev->config->reset(vdev); 502 vdev->config->reset(vdev);
510 503
511 del_gendisk(vblk->disk); 504 del_gendisk(vblk->disk);
512 blk_cleanup_queue(vblk->disk->queue); 505 blk_cleanup_queue(vblk->disk->queue);
513 put_disk(vblk->disk); 506 put_disk(vblk->disk);
514 mempool_destroy(vblk->pool); 507 mempool_destroy(vblk->pool);
515 vdev->config->del_vqs(vdev); 508 vdev->config->del_vqs(vdev);
516 kfree(vblk); 509 kfree(vblk);
517 } 510 }
518 511
519 static const struct virtio_device_id id_table[] = { 512 static const struct virtio_device_id id_table[] = {
520 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 513 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
521 { 0 }, 514 { 0 },
522 }; 515 };
523 516
524 static unsigned int features[] = { 517 static unsigned int features[] = {
525 VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, 518 VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
526 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 519 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
527 VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY 520 VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
528 }; 521 };
529 522
530 /* 523 /*
531 * virtio_blk causes spurious section mismatch warning by 524 * virtio_blk causes spurious section mismatch warning by
532 * simultaneously referring to a __devinit and a __devexit function. 525 * simultaneously referring to a __devinit and a __devexit function.
533 * Use __refdata to avoid this warning. 526 * Use __refdata to avoid this warning.
534 */ 527 */
535 static struct virtio_driver __refdata virtio_blk = { 528 static struct virtio_driver __refdata virtio_blk = {
536 .feature_table = features, 529 .feature_table = features,
537 .feature_table_size = ARRAY_SIZE(features), 530 .feature_table_size = ARRAY_SIZE(features),
538 .driver.name = KBUILD_MODNAME, 531 .driver.name = KBUILD_MODNAME,
539 .driver.owner = THIS_MODULE, 532 .driver.owner = THIS_MODULE,
540 .id_table = id_table, 533 .id_table = id_table,
541 .probe = virtblk_probe, 534 .probe = virtblk_probe,
542 .remove = __devexit_p(virtblk_remove), 535 .remove = __devexit_p(virtblk_remove),
543 }; 536 };
544 537
545 static int __init init(void) 538 static int __init init(void)
546 { 539 {
547 major = register_blkdev(0, "virtblk"); 540 major = register_blkdev(0, "virtblk");
548 if (major < 0) 541 if (major < 0)
549 return major; 542 return major;
550 return register_virtio_driver(&virtio_blk); 543 return register_virtio_driver(&virtio_blk);
551 } 544 }
552 545
553 static void __exit fini(void) 546 static void __exit fini(void)
554 { 547 {
555 unregister_blkdev(major, "virtblk"); 548 unregister_blkdev(major, "virtblk");
556 unregister_virtio_driver(&virtio_blk); 549 unregister_virtio_driver(&virtio_blk);
557 } 550 }
558 module_init(init); 551 module_init(init);
559 module_exit(fini); 552 module_exit(fini);
560 553
561 MODULE_DEVICE_TABLE(virtio, id_table); 554 MODULE_DEVICE_TABLE(virtio, id_table);
562 MODULE_DESCRIPTION("Virtio block driver"); 555 MODULE_DESCRIPTION("Virtio block driver");
563 MODULE_LICENSE("GPL"); 556 MODULE_LICENSE("GPL");
564 557
drivers/block/xen-blkfront.c
1 /* 1 /*
2 * blkfront.c 2 * blkfront.c
3 * 3 *
4 * XenLinux virtual block device driver. 4 * XenLinux virtual block device driver.
5 * 5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach 8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield 9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd 11 * Copyright (c) 2005, XenSource Ltd
12 * 12 *
13 * This program is free software; you can redistribute it and/or 13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2 14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed 15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other 16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license: 17 * software packages, subject to the following license:
18 * 18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy 19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without 20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify, 21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to 23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions: 24 * the following conditions:
25 * 25 *
26 * The above copyright notice and this permission notice shall be included in 26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software. 27 * all copies or substantial portions of the Software.
28 * 28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE. 35 * IN THE SOFTWARE.
36 */ 36 */
37 37
38 #include <linux/interrupt.h> 38 #include <linux/interrupt.h>
39 #include <linux/blkdev.h> 39 #include <linux/blkdev.h>
40 #include <linux/hdreg.h> 40 #include <linux/hdreg.h>
41 #include <linux/cdrom.h> 41 #include <linux/cdrom.h>
42 #include <linux/module.h> 42 #include <linux/module.h>
43 #include <linux/slab.h> 43 #include <linux/slab.h>
44 #include <linux/smp_lock.h> 44 #include <linux/smp_lock.h>
45 #include <linux/scatterlist.h> 45 #include <linux/scatterlist.h>
46 46
47 #include <xen/xen.h> 47 #include <xen/xen.h>
48 #include <xen/xenbus.h> 48 #include <xen/xenbus.h>
49 #include <xen/grant_table.h> 49 #include <xen/grant_table.h>
50 #include <xen/events.h> 50 #include <xen/events.h>
51 #include <xen/page.h> 51 #include <xen/page.h>
52 #include <xen/platform_pci.h> 52 #include <xen/platform_pci.h>
53 53
54 #include <xen/interface/grant_table.h> 54 #include <xen/interface/grant_table.h>
55 #include <xen/interface/io/blkif.h> 55 #include <xen/interface/io/blkif.h>
56 #include <xen/interface/io/protocols.h> 56 #include <xen/interface/io/protocols.h>
57 57
58 #include <asm/xen/hypervisor.h> 58 #include <asm/xen/hypervisor.h>
59 59
60 enum blkif_state { 60 enum blkif_state {
61 BLKIF_STATE_DISCONNECTED, 61 BLKIF_STATE_DISCONNECTED,
62 BLKIF_STATE_CONNECTED, 62 BLKIF_STATE_CONNECTED,
63 BLKIF_STATE_SUSPENDED, 63 BLKIF_STATE_SUSPENDED,
64 }; 64 };
65 65
66 struct blk_shadow { 66 struct blk_shadow {
67 struct blkif_request req; 67 struct blkif_request req;
68 unsigned long request; 68 unsigned long request;
69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70 }; 70 };
71 71
72 static const struct block_device_operations xlvbd_block_fops; 72 static const struct block_device_operations xlvbd_block_fops;
73 73
74 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) 74 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
75 75
76 /* 76 /*
77 * We have one of these per vbd, whether ide, scsi or 'other'. They 77 * We have one of these per vbd, whether ide, scsi or 'other'. They
78 * hang in private_data off the gendisk structure. We may end up 78 * hang in private_data off the gendisk structure. We may end up
79 * putting all kinds of interesting stuff here :-) 79 * putting all kinds of interesting stuff here :-)
80 */ 80 */
81 struct blkfront_info 81 struct blkfront_info
82 { 82 {
83 struct mutex mutex; 83 struct mutex mutex;
84 struct xenbus_device *xbdev; 84 struct xenbus_device *xbdev;
85 struct gendisk *gd; 85 struct gendisk *gd;
86 int vdevice; 86 int vdevice;
87 blkif_vdev_t handle; 87 blkif_vdev_t handle;
88 enum blkif_state connected; 88 enum blkif_state connected;
89 int ring_ref; 89 int ring_ref;
90 struct blkif_front_ring ring; 90 struct blkif_front_ring ring;
91 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 91 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
92 unsigned int evtchn, irq; 92 unsigned int evtchn, irq;
93 struct request_queue *rq; 93 struct request_queue *rq;
94 struct work_struct work; 94 struct work_struct work;
95 struct gnttab_free_callback callback; 95 struct gnttab_free_callback callback;
96 struct blk_shadow shadow[BLK_RING_SIZE]; 96 struct blk_shadow shadow[BLK_RING_SIZE];
97 unsigned long shadow_free; 97 unsigned long shadow_free;
98 int feature_barrier; 98 unsigned int feature_flush;
99 int is_ready; 99 int is_ready;
100 }; 100 };
101 101
102 static DEFINE_SPINLOCK(blkif_io_lock); 102 static DEFINE_SPINLOCK(blkif_io_lock);
103 103
104 static unsigned int nr_minors; 104 static unsigned int nr_minors;
105 static unsigned long *minors; 105 static unsigned long *minors;
106 static DEFINE_SPINLOCK(minor_lock); 106 static DEFINE_SPINLOCK(minor_lock);
107 107
108 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 108 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
109 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 109 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
110 #define GRANT_INVALID_REF 0 110 #define GRANT_INVALID_REF 0
111 111
112 #define PARTS_PER_DISK 16 112 #define PARTS_PER_DISK 16
113 #define PARTS_PER_EXT_DISK 256 113 #define PARTS_PER_EXT_DISK 256
114 114
115 #define BLKIF_MAJOR(dev) ((dev)>>8) 115 #define BLKIF_MAJOR(dev) ((dev)>>8)
116 #define BLKIF_MINOR(dev) ((dev) & 0xff) 116 #define BLKIF_MINOR(dev) ((dev) & 0xff)
117 117
118 #define EXT_SHIFT 28 118 #define EXT_SHIFT 28
119 #define EXTENDED (1<<EXT_SHIFT) 119 #define EXTENDED (1<<EXT_SHIFT)
120 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 120 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
121 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 121 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
122 122
123 #define DEV_NAME "xvd" /* name in /dev */ 123 #define DEV_NAME "xvd" /* name in /dev */
124 124
125 static int get_id_from_freelist(struct blkfront_info *info) 125 static int get_id_from_freelist(struct blkfront_info *info)
126 { 126 {
127 unsigned long free = info->shadow_free; 127 unsigned long free = info->shadow_free;
128 BUG_ON(free >= BLK_RING_SIZE); 128 BUG_ON(free >= BLK_RING_SIZE);
129 info->shadow_free = info->shadow[free].req.id; 129 info->shadow_free = info->shadow[free].req.id;
130 info->shadow[free].req.id = 0x0fffffee; /* debug */ 130 info->shadow[free].req.id = 0x0fffffee; /* debug */
131 return free; 131 return free;
132 } 132 }
133 133
134 static void add_id_to_freelist(struct blkfront_info *info, 134 static void add_id_to_freelist(struct blkfront_info *info,
135 unsigned long id) 135 unsigned long id)
136 { 136 {
137 info->shadow[id].req.id = info->shadow_free; 137 info->shadow[id].req.id = info->shadow_free;
138 info->shadow[id].request = 0; 138 info->shadow[id].request = 0;
139 info->shadow_free = id; 139 info->shadow_free = id;
140 } 140 }
141 141
142 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) 142 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
143 { 143 {
144 unsigned int end = minor + nr; 144 unsigned int end = minor + nr;
145 int rc; 145 int rc;
146 146
147 if (end > nr_minors) { 147 if (end > nr_minors) {
148 unsigned long *bitmap, *old; 148 unsigned long *bitmap, *old;
149 149
150 bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), 150 bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
151 GFP_KERNEL); 151 GFP_KERNEL);
152 if (bitmap == NULL) 152 if (bitmap == NULL)
153 return -ENOMEM; 153 return -ENOMEM;
154 154
155 spin_lock(&minor_lock); 155 spin_lock(&minor_lock);
156 if (end > nr_minors) { 156 if (end > nr_minors) {
157 old = minors; 157 old = minors;
158 memcpy(bitmap, minors, 158 memcpy(bitmap, minors,
159 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); 159 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
160 minors = bitmap; 160 minors = bitmap;
161 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; 161 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
162 } else 162 } else
163 old = bitmap; 163 old = bitmap;
164 spin_unlock(&minor_lock); 164 spin_unlock(&minor_lock);
165 kfree(old); 165 kfree(old);
166 } 166 }
167 167
168 spin_lock(&minor_lock); 168 spin_lock(&minor_lock);
169 if (find_next_bit(minors, end, minor) >= end) { 169 if (find_next_bit(minors, end, minor) >= end) {
170 for (; minor < end; ++minor) 170 for (; minor < end; ++minor)
171 __set_bit(minor, minors); 171 __set_bit(minor, minors);
172 rc = 0; 172 rc = 0;
173 } else 173 } else
174 rc = -EBUSY; 174 rc = -EBUSY;
175 spin_unlock(&minor_lock); 175 spin_unlock(&minor_lock);
176 176
177 return rc; 177 return rc;
178 } 178 }
179 179
180 static void xlbd_release_minors(unsigned int minor, unsigned int nr) 180 static void xlbd_release_minors(unsigned int minor, unsigned int nr)
181 { 181 {
182 unsigned int end = minor + nr; 182 unsigned int end = minor + nr;
183 183
184 BUG_ON(end > nr_minors); 184 BUG_ON(end > nr_minors);
185 spin_lock(&minor_lock); 185 spin_lock(&minor_lock);
186 for (; minor < end; ++minor) 186 for (; minor < end; ++minor)
187 __clear_bit(minor, minors); 187 __clear_bit(minor, minors);
188 spin_unlock(&minor_lock); 188 spin_unlock(&minor_lock);
189 } 189 }
190 190
191 static void blkif_restart_queue_callback(void *arg) 191 static void blkif_restart_queue_callback(void *arg)
192 { 192 {
193 struct blkfront_info *info = (struct blkfront_info *)arg; 193 struct blkfront_info *info = (struct blkfront_info *)arg;
194 schedule_work(&info->work); 194 schedule_work(&info->work);
195 } 195 }
196 196
197 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) 197 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
198 { 198 {
199 /* We don't have real geometry info, but let's at least return 199 /* We don't have real geometry info, but let's at least return
200 values consistent with the size of the device */ 200 values consistent with the size of the device */
201 sector_t nsect = get_capacity(bd->bd_disk); 201 sector_t nsect = get_capacity(bd->bd_disk);
202 sector_t cylinders = nsect; 202 sector_t cylinders = nsect;
203 203
204 hg->heads = 0xff; 204 hg->heads = 0xff;
205 hg->sectors = 0x3f; 205 hg->sectors = 0x3f;
206 sector_div(cylinders, hg->heads * hg->sectors); 206 sector_div(cylinders, hg->heads * hg->sectors);
207 hg->cylinders = cylinders; 207 hg->cylinders = cylinders;
208 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) 208 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
209 hg->cylinders = 0xffff; 209 hg->cylinders = 0xffff;
210 return 0; 210 return 0;
211 } 211 }
212 212
213 static int blkif_ioctl(struct block_device *bdev, fmode_t mode, 213 static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
214 unsigned command, unsigned long argument) 214 unsigned command, unsigned long argument)
215 { 215 {
216 struct blkfront_info *info = bdev->bd_disk->private_data; 216 struct blkfront_info *info = bdev->bd_disk->private_data;
217 int i; 217 int i;
218 218
219 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", 219 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
220 command, (long)argument); 220 command, (long)argument);
221 221
222 switch (command) { 222 switch (command) {
223 case CDROMMULTISESSION: 223 case CDROMMULTISESSION:
224 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); 224 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
225 for (i = 0; i < sizeof(struct cdrom_multisession); i++) 225 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
226 if (put_user(0, (char __user *)(argument + i))) 226 if (put_user(0, (char __user *)(argument + i)))
227 return -EFAULT; 227 return -EFAULT;
228 return 0; 228 return 0;
229 229
230 case CDROM_GET_CAPABILITY: { 230 case CDROM_GET_CAPABILITY: {
231 struct gendisk *gd = info->gd; 231 struct gendisk *gd = info->gd;
232 if (gd->flags & GENHD_FL_CD) 232 if (gd->flags & GENHD_FL_CD)
233 return 0; 233 return 0;
234 return -EINVAL; 234 return -EINVAL;
235 } 235 }
236 236
237 default: 237 default:
238 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 238 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
239 command);*/ 239 command);*/
240 return -EINVAL; /* same return as native Linux */ 240 return -EINVAL; /* same return as native Linux */
241 } 241 }
242 242
243 return 0; 243 return 0;
244 } 244 }
245 245
246 /* 246 /*
247 * blkif_queue_request 247 * blkif_queue_request
248 * 248 *
249 * request block io 249 * request block io
250 * 250 *
251 * id: for guest use only. 251 * id: for guest use only.
252 * operation: BLKIF_OP_{READ,WRITE,PROBE} 252 * operation: BLKIF_OP_{READ,WRITE,PROBE}
253 * buffer: buffer to read/write into. this should be a 253 * buffer: buffer to read/write into. this should be a
254 * virtual address in the guest os. 254 * virtual address in the guest os.
255 */ 255 */
256 static int blkif_queue_request(struct request *req) 256 static int blkif_queue_request(struct request *req)
257 { 257 {
258 struct blkfront_info *info = req->rq_disk->private_data; 258 struct blkfront_info *info = req->rq_disk->private_data;
259 unsigned long buffer_mfn; 259 unsigned long buffer_mfn;
260 struct blkif_request *ring_req; 260 struct blkif_request *ring_req;
261 unsigned long id; 261 unsigned long id;
262 unsigned int fsect, lsect; 262 unsigned int fsect, lsect;
263 int i, ref; 263 int i, ref;
264 grant_ref_t gref_head; 264 grant_ref_t gref_head;
265 struct scatterlist *sg; 265 struct scatterlist *sg;
266 266
267 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 267 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
268 return 1; 268 return 1;
269 269
270 if (gnttab_alloc_grant_references( 270 if (gnttab_alloc_grant_references(
271 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 271 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
272 gnttab_request_free_callback( 272 gnttab_request_free_callback(
273 &info->callback, 273 &info->callback,
274 blkif_restart_queue_callback, 274 blkif_restart_queue_callback,
275 info, 275 info,
276 BLKIF_MAX_SEGMENTS_PER_REQUEST); 276 BLKIF_MAX_SEGMENTS_PER_REQUEST);
277 return 1; 277 return 1;
278 } 278 }
279 279
280 /* Fill out a communications ring structure. */ 280 /* Fill out a communications ring structure. */
281 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 281 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
282 id = get_id_from_freelist(info); 282 id = get_id_from_freelist(info);
283 info->shadow[id].request = (unsigned long)req; 283 info->shadow[id].request = (unsigned long)req;
284 284
285 ring_req->id = id; 285 ring_req->id = id;
286 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); 286 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
287 ring_req->handle = info->handle; 287 ring_req->handle = info->handle;
288 288
289 ring_req->operation = rq_data_dir(req) ? 289 ring_req->operation = rq_data_dir(req) ?
290 BLKIF_OP_WRITE : BLKIF_OP_READ; 290 BLKIF_OP_WRITE : BLKIF_OP_READ;
291 if (req->cmd_flags & REQ_HARDBARRIER) 291 if (req->cmd_flags & REQ_HARDBARRIER)
292 ring_req->operation = BLKIF_OP_WRITE_BARRIER; 292 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
293 293
294 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 294 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
295 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 295 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
296 296
297 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { 297 for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
298 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 298 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
299 fsect = sg->offset >> 9; 299 fsect = sg->offset >> 9;
300 lsect = fsect + (sg->length >> 9) - 1; 300 lsect = fsect + (sg->length >> 9) - 1;
301 /* install a grant reference. */ 301 /* install a grant reference. */
302 ref = gnttab_claim_grant_reference(&gref_head); 302 ref = gnttab_claim_grant_reference(&gref_head);
303 BUG_ON(ref == -ENOSPC); 303 BUG_ON(ref == -ENOSPC);
304 304
305 gnttab_grant_foreign_access_ref( 305 gnttab_grant_foreign_access_ref(
306 ref, 306 ref,
307 info->xbdev->otherend_id, 307 info->xbdev->otherend_id,
308 buffer_mfn, 308 buffer_mfn,
309 rq_data_dir(req) ); 309 rq_data_dir(req) );
310 310
311 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 311 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
312 ring_req->seg[i] = 312 ring_req->seg[i] =
313 (struct blkif_request_segment) { 313 (struct blkif_request_segment) {
314 .gref = ref, 314 .gref = ref,
315 .first_sect = fsect, 315 .first_sect = fsect,
316 .last_sect = lsect }; 316 .last_sect = lsect };
317 } 317 }
318 318
319 info->ring.req_prod_pvt++; 319 info->ring.req_prod_pvt++;
320 320
321 /* Keep a private copy so we can reissue requests when recovering. */ 321 /* Keep a private copy so we can reissue requests when recovering. */
322 info->shadow[id].req = *ring_req; 322 info->shadow[id].req = *ring_req;
323 323
324 gnttab_free_grant_references(gref_head); 324 gnttab_free_grant_references(gref_head);
325 325
326 return 0; 326 return 0;
327 } 327 }
328 328
329 329
330 static inline void flush_requests(struct blkfront_info *info) 330 static inline void flush_requests(struct blkfront_info *info)
331 { 331 {
332 int notify; 332 int notify;
333 333
334 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 334 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
335 335
336 if (notify) 336 if (notify)
337 notify_remote_via_irq(info->irq); 337 notify_remote_via_irq(info->irq);
338 } 338 }
339 339
340 /* 340 /*
341 * do_blkif_request 341 * do_blkif_request
342 * read a block; request is in a request queue 342 * read a block; request is in a request queue
343 */ 343 */
344 static void do_blkif_request(struct request_queue *rq) 344 static void do_blkif_request(struct request_queue *rq)
345 { 345 {
346 struct blkfront_info *info = NULL; 346 struct blkfront_info *info = NULL;
347 struct request *req; 347 struct request *req;
348 int queued; 348 int queued;
349 349
350 pr_debug("Entered do_blkif_request\n"); 350 pr_debug("Entered do_blkif_request\n");
351 351
352 queued = 0; 352 queued = 0;
353 353
354 while ((req = blk_peek_request(rq)) != NULL) { 354 while ((req = blk_peek_request(rq)) != NULL) {
355 info = req->rq_disk->private_data; 355 info = req->rq_disk->private_data;
356 356
357 if (RING_FULL(&info->ring)) 357 if (RING_FULL(&info->ring))
358 goto wait; 358 goto wait;
359 359
360 blk_start_request(req); 360 blk_start_request(req);
361 361
362 if (req->cmd_type != REQ_TYPE_FS) { 362 if (req->cmd_type != REQ_TYPE_FS) {
363 __blk_end_request_all(req, -EIO); 363 __blk_end_request_all(req, -EIO);
364 continue; 364 continue;
365 } 365 }
366 366
367 pr_debug("do_blk_req %p: cmd %p, sec %lx, " 367 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
368 "(%u/%u) buffer:%p [%s]\n", 368 "(%u/%u) buffer:%p [%s]\n",
369 req, req->cmd, (unsigned long)blk_rq_pos(req), 369 req, req->cmd, (unsigned long)blk_rq_pos(req),
370 blk_rq_cur_sectors(req), blk_rq_sectors(req), 370 blk_rq_cur_sectors(req), blk_rq_sectors(req),
371 req->buffer, rq_data_dir(req) ? "write" : "read"); 371 req->buffer, rq_data_dir(req) ? "write" : "read");
372 372
373 if (blkif_queue_request(req)) { 373 if (blkif_queue_request(req)) {
374 blk_requeue_request(rq, req); 374 blk_requeue_request(rq, req);
375 wait: 375 wait:
376 /* Avoid pointless unplugs. */ 376 /* Avoid pointless unplugs. */
377 blk_stop_queue(rq); 377 blk_stop_queue(rq);
378 break; 378 break;
379 } 379 }
380 380
381 queued++; 381 queued++;
382 } 382 }
383 383
384 if (queued != 0) 384 if (queued != 0)
385 flush_requests(info); 385 flush_requests(info);
386 } 386 }
387 387
388 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 388 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
389 { 389 {
390 struct request_queue *rq; 390 struct request_queue *rq;
391 391
392 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 392 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
393 if (rq == NULL) 393 if (rq == NULL)
394 return -1; 394 return -1;
395 395
396 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 396 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
397 397
398 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 398 /* Hard sector size and max sectors impersonate the equiv. hardware. */
399 blk_queue_logical_block_size(rq, sector_size); 399 blk_queue_logical_block_size(rq, sector_size);
400 blk_queue_max_hw_sectors(rq, 512); 400 blk_queue_max_hw_sectors(rq, 512);
401 401
402 /* Each segment in a request is up to an aligned page in size. */ 402 /* Each segment in a request is up to an aligned page in size. */
403 blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 403 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
404 blk_queue_max_segment_size(rq, PAGE_SIZE); 404 blk_queue_max_segment_size(rq, PAGE_SIZE);
405 405
406 /* Ensure a merged request will fit in a single I/O ring slot. */ 406 /* Ensure a merged request will fit in a single I/O ring slot. */
407 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); 407 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
408 408
409 /* Make sure buffer addresses are sector-aligned. */ 409 /* Make sure buffer addresses are sector-aligned. */
410 blk_queue_dma_alignment(rq, 511); 410 blk_queue_dma_alignment(rq, 511);
411 411
412 /* Make sure we don't use bounce buffers. */ 412 /* Make sure we don't use bounce buffers. */
413 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); 413 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
414 414
415 gd->queue = rq; 415 gd->queue = rq;
416 416
417 return 0; 417 return 0;
418 } 418 }
419 419
420 420
421 static int xlvbd_barrier(struct blkfront_info *info) 421 static void xlvbd_flush(struct blkfront_info *info)
422 { 422 {
423 int err; 423 blk_queue_flush(info->rq, info->feature_flush);
424 const char *barrier;
425
426 switch (info->feature_barrier) {
427 case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break;
428 case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
429 default: return -EINVAL;
430 }
431
432 err = blk_queue_ordered(info->rq, info->feature_barrier);
433
434 if (err)
435 return err;
436
437 printk(KERN_INFO "blkfront: %s: barriers %s\n", 424 printk(KERN_INFO "blkfront: %s: barriers %s\n",
438 info->gd->disk_name, barrier); 425 info->gd->disk_name,
439 return 0; 426 info->feature_flush ? "enabled" : "disabled");
440 } 427 }
441 428
442 429
443 static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 430 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
444 struct blkfront_info *info, 431 struct blkfront_info *info,
445 u16 vdisk_info, u16 sector_size) 432 u16 vdisk_info, u16 sector_size)
446 { 433 {
447 struct gendisk *gd; 434 struct gendisk *gd;
448 int nr_minors = 1; 435 int nr_minors = 1;
449 int err = -ENODEV; 436 int err = -ENODEV;
450 unsigned int offset; 437 unsigned int offset;
451 int minor; 438 int minor;
452 int nr_parts; 439 int nr_parts;
453 440
454 BUG_ON(info->gd != NULL); 441 BUG_ON(info->gd != NULL);
455 BUG_ON(info->rq != NULL); 442 BUG_ON(info->rq != NULL);
456 443
457 if ((info->vdevice>>EXT_SHIFT) > 1) { 444 if ((info->vdevice>>EXT_SHIFT) > 1) {
458 /* this is above the extended range; something is wrong */ 445 /* this is above the extended range; something is wrong */
459 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); 446 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
460 return -ENODEV; 447 return -ENODEV;
461 } 448 }
462 449
463 if (!VDEV_IS_EXTENDED(info->vdevice)) { 450 if (!VDEV_IS_EXTENDED(info->vdevice)) {
464 minor = BLKIF_MINOR(info->vdevice); 451 minor = BLKIF_MINOR(info->vdevice);
465 nr_parts = PARTS_PER_DISK; 452 nr_parts = PARTS_PER_DISK;
466 } else { 453 } else {
467 minor = BLKIF_MINOR_EXT(info->vdevice); 454 minor = BLKIF_MINOR_EXT(info->vdevice);
468 nr_parts = PARTS_PER_EXT_DISK; 455 nr_parts = PARTS_PER_EXT_DISK;
469 } 456 }
470 457
471 if ((minor % nr_parts) == 0) 458 if ((minor % nr_parts) == 0)
472 nr_minors = nr_parts; 459 nr_minors = nr_parts;
473 460
474 err = xlbd_reserve_minors(minor, nr_minors); 461 err = xlbd_reserve_minors(minor, nr_minors);
475 if (err) 462 if (err)
476 goto out; 463 goto out;
477 err = -ENODEV; 464 err = -ENODEV;
478 465
479 gd = alloc_disk(nr_minors); 466 gd = alloc_disk(nr_minors);
480 if (gd == NULL) 467 if (gd == NULL)
481 goto release; 468 goto release;
482 469
483 offset = minor / nr_parts; 470 offset = minor / nr_parts;
484 471
485 if (nr_minors > 1) { 472 if (nr_minors > 1) {
486 if (offset < 26) 473 if (offset < 26)
487 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 474 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
488 else 475 else
489 sprintf(gd->disk_name, "%s%c%c", DEV_NAME, 476 sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
490 'a' + ((offset / 26)-1), 'a' + (offset % 26)); 477 'a' + ((offset / 26)-1), 'a' + (offset % 26));
491 } else { 478 } else {
492 if (offset < 26) 479 if (offset < 26)
493 sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 480 sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
494 'a' + offset, 481 'a' + offset,
495 minor & (nr_parts - 1)); 482 minor & (nr_parts - 1));
496 else 483 else
497 sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, 484 sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
498 'a' + ((offset / 26) - 1), 485 'a' + ((offset / 26) - 1),
499 'a' + (offset % 26), 486 'a' + (offset % 26),
500 minor & (nr_parts - 1)); 487 minor & (nr_parts - 1));
501 } 488 }
502 489
503 gd->major = XENVBD_MAJOR; 490 gd->major = XENVBD_MAJOR;
504 gd->first_minor = minor; 491 gd->first_minor = minor;
505 gd->fops = &xlvbd_block_fops; 492 gd->fops = &xlvbd_block_fops;
506 gd->private_data = info; 493 gd->private_data = info;
507 gd->driverfs_dev = &(info->xbdev->dev); 494 gd->driverfs_dev = &(info->xbdev->dev);
508 set_capacity(gd, capacity); 495 set_capacity(gd, capacity);
509 496
510 if (xlvbd_init_blk_queue(gd, sector_size)) { 497 if (xlvbd_init_blk_queue(gd, sector_size)) {
511 del_gendisk(gd); 498 del_gendisk(gd);
512 goto release; 499 goto release;
513 } 500 }
514 501
515 info->rq = gd->queue; 502 info->rq = gd->queue;
516 info->gd = gd; 503 info->gd = gd;
517 504
518 xlvbd_barrier(info); 505 xlvbd_flush(info);
519 506
520 if (vdisk_info & VDISK_READONLY) 507 if (vdisk_info & VDISK_READONLY)
521 set_disk_ro(gd, 1); 508 set_disk_ro(gd, 1);
522 509
523 if (vdisk_info & VDISK_REMOVABLE) 510 if (vdisk_info & VDISK_REMOVABLE)
524 gd->flags |= GENHD_FL_REMOVABLE; 511 gd->flags |= GENHD_FL_REMOVABLE;
525 512
526 if (vdisk_info & VDISK_CDROM) 513 if (vdisk_info & VDISK_CDROM)
527 gd->flags |= GENHD_FL_CD; 514 gd->flags |= GENHD_FL_CD;
528 515
529 return 0; 516 return 0;
530 517
531 release: 518 release:
532 xlbd_release_minors(minor, nr_minors); 519 xlbd_release_minors(minor, nr_minors);
533 out: 520 out:
534 return err; 521 return err;
535 } 522 }
536 523
537 static void xlvbd_release_gendisk(struct blkfront_info *info) 524 static void xlvbd_release_gendisk(struct blkfront_info *info)
538 { 525 {
539 unsigned int minor, nr_minors; 526 unsigned int minor, nr_minors;
540 unsigned long flags; 527 unsigned long flags;
541 528
542 if (info->rq == NULL) 529 if (info->rq == NULL)
543 return; 530 return;
544 531
545 spin_lock_irqsave(&blkif_io_lock, flags); 532 spin_lock_irqsave(&blkif_io_lock, flags);
546 533
547 /* No more blkif_request(). */ 534 /* No more blkif_request(). */
548 blk_stop_queue(info->rq); 535 blk_stop_queue(info->rq);
549 536
550 /* No more gnttab callback work. */ 537 /* No more gnttab callback work. */
551 gnttab_cancel_free_callback(&info->callback); 538 gnttab_cancel_free_callback(&info->callback);
552 spin_unlock_irqrestore(&blkif_io_lock, flags); 539 spin_unlock_irqrestore(&blkif_io_lock, flags);
553 540
554 /* Flush gnttab callback work. Must be done with no locks held. */ 541 /* Flush gnttab callback work. Must be done with no locks held. */
555 flush_scheduled_work(); 542 flush_scheduled_work();
556 543
557 del_gendisk(info->gd); 544 del_gendisk(info->gd);
558 545
559 minor = info->gd->first_minor; 546 minor = info->gd->first_minor;
560 nr_minors = info->gd->minors; 547 nr_minors = info->gd->minors;
561 xlbd_release_minors(minor, nr_minors); 548 xlbd_release_minors(minor, nr_minors);
562 549
563 blk_cleanup_queue(info->rq); 550 blk_cleanup_queue(info->rq);
564 info->rq = NULL; 551 info->rq = NULL;
565 552
566 put_disk(info->gd); 553 put_disk(info->gd);
567 info->gd = NULL; 554 info->gd = NULL;
568 } 555 }
569 556
570 static void kick_pending_request_queues(struct blkfront_info *info) 557 static void kick_pending_request_queues(struct blkfront_info *info)
571 { 558 {
572 if (!RING_FULL(&info->ring)) { 559 if (!RING_FULL(&info->ring)) {
573 /* Re-enable calldowns. */ 560 /* Re-enable calldowns. */
574 blk_start_queue(info->rq); 561 blk_start_queue(info->rq);
575 /* Kick things off immediately. */ 562 /* Kick things off immediately. */
576 do_blkif_request(info->rq); 563 do_blkif_request(info->rq);
577 } 564 }
578 } 565 }
579 566
580 static void blkif_restart_queue(struct work_struct *work) 567 static void blkif_restart_queue(struct work_struct *work)
581 { 568 {
582 struct blkfront_info *info = container_of(work, struct blkfront_info, work); 569 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
583 570
584 spin_lock_irq(&blkif_io_lock); 571 spin_lock_irq(&blkif_io_lock);
585 if (info->connected == BLKIF_STATE_CONNECTED) 572 if (info->connected == BLKIF_STATE_CONNECTED)
586 kick_pending_request_queues(info); 573 kick_pending_request_queues(info);
587 spin_unlock_irq(&blkif_io_lock); 574 spin_unlock_irq(&blkif_io_lock);
588 } 575 }
589 576
590 static void blkif_free(struct blkfront_info *info, int suspend) 577 static void blkif_free(struct blkfront_info *info, int suspend)
591 { 578 {
592 /* Prevent new requests being issued until we fix things up. */ 579 /* Prevent new requests being issued until we fix things up. */
593 spin_lock_irq(&blkif_io_lock); 580 spin_lock_irq(&blkif_io_lock);
594 info->connected = suspend ? 581 info->connected = suspend ?
595 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 582 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
596 /* No more blkif_request(). */ 583 /* No more blkif_request(). */
597 if (info->rq) 584 if (info->rq)
598 blk_stop_queue(info->rq); 585 blk_stop_queue(info->rq);
599 /* No more gnttab callback work. */ 586 /* No more gnttab callback work. */
600 gnttab_cancel_free_callback(&info->callback); 587 gnttab_cancel_free_callback(&info->callback);
601 spin_unlock_irq(&blkif_io_lock); 588 spin_unlock_irq(&blkif_io_lock);
602 589
603 /* Flush gnttab callback work. Must be done with no locks held. */ 590 /* Flush gnttab callback work. Must be done with no locks held. */
604 flush_scheduled_work(); 591 flush_scheduled_work();
605 592
606 /* Free resources associated with old device channel. */ 593 /* Free resources associated with old device channel. */
607 if (info->ring_ref != GRANT_INVALID_REF) { 594 if (info->ring_ref != GRANT_INVALID_REF) {
608 gnttab_end_foreign_access(info->ring_ref, 0, 595 gnttab_end_foreign_access(info->ring_ref, 0,
609 (unsigned long)info->ring.sring); 596 (unsigned long)info->ring.sring);
610 info->ring_ref = GRANT_INVALID_REF; 597 info->ring_ref = GRANT_INVALID_REF;
611 info->ring.sring = NULL; 598 info->ring.sring = NULL;
612 } 599 }
613 if (info->irq) 600 if (info->irq)
614 unbind_from_irqhandler(info->irq, info); 601 unbind_from_irqhandler(info->irq, info);
615 info->evtchn = info->irq = 0; 602 info->evtchn = info->irq = 0;
616 603
617 } 604 }
618 605
619 static void blkif_completion(struct blk_shadow *s) 606 static void blkif_completion(struct blk_shadow *s)
620 { 607 {
621 int i; 608 int i;
622 for (i = 0; i < s->req.nr_segments; i++) 609 for (i = 0; i < s->req.nr_segments; i++)
623 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 610 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
624 } 611 }
625 612
626 static irqreturn_t blkif_interrupt(int irq, void *dev_id) 613 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
627 { 614 {
628 struct request *req; 615 struct request *req;
629 struct blkif_response *bret; 616 struct blkif_response *bret;
630 RING_IDX i, rp; 617 RING_IDX i, rp;
631 unsigned long flags; 618 unsigned long flags;
632 struct blkfront_info *info = (struct blkfront_info *)dev_id; 619 struct blkfront_info *info = (struct blkfront_info *)dev_id;
633 int error; 620 int error;
634 621
635 spin_lock_irqsave(&blkif_io_lock, flags); 622 spin_lock_irqsave(&blkif_io_lock, flags);
636 623
637 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 624 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
638 spin_unlock_irqrestore(&blkif_io_lock, flags); 625 spin_unlock_irqrestore(&blkif_io_lock, flags);
639 return IRQ_HANDLED; 626 return IRQ_HANDLED;
640 } 627 }
641 628
642 again: 629 again:
643 rp = info->ring.sring->rsp_prod; 630 rp = info->ring.sring->rsp_prod;
644 rmb(); /* Ensure we see queued responses up to 'rp'. */ 631 rmb(); /* Ensure we see queued responses up to 'rp'. */
645 632
646 for (i = info->ring.rsp_cons; i != rp; i++) { 633 for (i = info->ring.rsp_cons; i != rp; i++) {
647 unsigned long id; 634 unsigned long id;
648 635
649 bret = RING_GET_RESPONSE(&info->ring, i); 636 bret = RING_GET_RESPONSE(&info->ring, i);
650 id = bret->id; 637 id = bret->id;
651 req = (struct request *)info->shadow[id].request; 638 req = (struct request *)info->shadow[id].request;
652 639
653 blkif_completion(&info->shadow[id]); 640 blkif_completion(&info->shadow[id]);
654 641
655 add_id_to_freelist(info, id); 642 add_id_to_freelist(info, id);
656 643
657 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 644 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
658 switch (bret->operation) { 645 switch (bret->operation) {
659 case BLKIF_OP_WRITE_BARRIER: 646 case BLKIF_OP_WRITE_BARRIER:
660 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 647 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
661 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", 648 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
662 info->gd->disk_name); 649 info->gd->disk_name);
663 error = -EOPNOTSUPP; 650 error = -EOPNOTSUPP;
664 info->feature_barrier = QUEUE_ORDERED_NONE; 651 info->feature_flush = 0;
665 xlvbd_barrier(info); 652 xlvbd_flush(info);
666 } 653 }
667 /* fall through */ 654 /* fall through */
668 case BLKIF_OP_READ: 655 case BLKIF_OP_READ:
669 case BLKIF_OP_WRITE: 656 case BLKIF_OP_WRITE:
670 if (unlikely(bret->status != BLKIF_RSP_OKAY)) 657 if (unlikely(bret->status != BLKIF_RSP_OKAY))
671 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " 658 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
672 "request: %x\n", bret->status); 659 "request: %x\n", bret->status);
673 660
674 __blk_end_request_all(req, error); 661 __blk_end_request_all(req, error);
675 break; 662 break;
676 default: 663 default:
677 BUG(); 664 BUG();
678 } 665 }
679 } 666 }
680 667
681 info->ring.rsp_cons = i; 668 info->ring.rsp_cons = i;
682 669
683 if (i != info->ring.req_prod_pvt) { 670 if (i != info->ring.req_prod_pvt) {
684 int more_to_do; 671 int more_to_do;
685 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 672 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
686 if (more_to_do) 673 if (more_to_do)
687 goto again; 674 goto again;
688 } else 675 } else
689 info->ring.sring->rsp_event = i + 1; 676 info->ring.sring->rsp_event = i + 1;
690 677
691 kick_pending_request_queues(info); 678 kick_pending_request_queues(info);
692 679
693 spin_unlock_irqrestore(&blkif_io_lock, flags); 680 spin_unlock_irqrestore(&blkif_io_lock, flags);
694 681
695 return IRQ_HANDLED; 682 return IRQ_HANDLED;
696 } 683 }
697 684
698 685
699 static int setup_blkring(struct xenbus_device *dev, 686 static int setup_blkring(struct xenbus_device *dev,
700 struct blkfront_info *info) 687 struct blkfront_info *info)
701 { 688 {
702 struct blkif_sring *sring; 689 struct blkif_sring *sring;
703 int err; 690 int err;
704 691
705 info->ring_ref = GRANT_INVALID_REF; 692 info->ring_ref = GRANT_INVALID_REF;
706 693
707 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); 694 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
708 if (!sring) { 695 if (!sring) {
709 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 696 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
710 return -ENOMEM; 697 return -ENOMEM;
711 } 698 }
712 SHARED_RING_INIT(sring); 699 SHARED_RING_INIT(sring);
713 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 700 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
714 701
715 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); 702 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
716 703
717 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); 704 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
718 if (err < 0) { 705 if (err < 0) {
719 free_page((unsigned long)sring); 706 free_page((unsigned long)sring);
720 info->ring.sring = NULL; 707 info->ring.sring = NULL;
721 goto fail; 708 goto fail;
722 } 709 }
723 info->ring_ref = err; 710 info->ring_ref = err;
724 711
725 err = xenbus_alloc_evtchn(dev, &info->evtchn); 712 err = xenbus_alloc_evtchn(dev, &info->evtchn);
726 if (err) 713 if (err)
727 goto fail; 714 goto fail;
728 715
729 err = bind_evtchn_to_irqhandler(info->evtchn, 716 err = bind_evtchn_to_irqhandler(info->evtchn,
730 blkif_interrupt, 717 blkif_interrupt,
731 IRQF_SAMPLE_RANDOM, "blkif", info); 718 IRQF_SAMPLE_RANDOM, "blkif", info);
732 if (err <= 0) { 719 if (err <= 0) {
733 xenbus_dev_fatal(dev, err, 720 xenbus_dev_fatal(dev, err,
734 "bind_evtchn_to_irqhandler failed"); 721 "bind_evtchn_to_irqhandler failed");
735 goto fail; 722 goto fail;
736 } 723 }
737 info->irq = err; 724 info->irq = err;
738 725
739 return 0; 726 return 0;
740 fail: 727 fail:
741 blkif_free(info, 0); 728 blkif_free(info, 0);
742 return err; 729 return err;
743 } 730 }
744 731
745 732
746 /* Common code used when first setting up, and when resuming. */ 733 /* Common code used when first setting up, and when resuming. */
747 static int talk_to_blkback(struct xenbus_device *dev, 734 static int talk_to_blkback(struct xenbus_device *dev,
748 struct blkfront_info *info) 735 struct blkfront_info *info)
749 { 736 {
750 const char *message = NULL; 737 const char *message = NULL;
751 struct xenbus_transaction xbt; 738 struct xenbus_transaction xbt;
752 int err; 739 int err;
753 740
754 /* Create shared ring, alloc event channel. */ 741 /* Create shared ring, alloc event channel. */
755 err = setup_blkring(dev, info); 742 err = setup_blkring(dev, info);
756 if (err) 743 if (err)
757 goto out; 744 goto out;
758 745
759 again: 746 again:
760 err = xenbus_transaction_start(&xbt); 747 err = xenbus_transaction_start(&xbt);
761 if (err) { 748 if (err) {
762 xenbus_dev_fatal(dev, err, "starting transaction"); 749 xenbus_dev_fatal(dev, err, "starting transaction");
763 goto destroy_blkring; 750 goto destroy_blkring;
764 } 751 }
765 752
766 err = xenbus_printf(xbt, dev->nodename, 753 err = xenbus_printf(xbt, dev->nodename,
767 "ring-ref", "%u", info->ring_ref); 754 "ring-ref", "%u", info->ring_ref);
768 if (err) { 755 if (err) {
769 message = "writing ring-ref"; 756 message = "writing ring-ref";
770 goto abort_transaction; 757 goto abort_transaction;
771 } 758 }
772 err = xenbus_printf(xbt, dev->nodename, 759 err = xenbus_printf(xbt, dev->nodename,
773 "event-channel", "%u", info->evtchn); 760 "event-channel", "%u", info->evtchn);
774 if (err) { 761 if (err) {
775 message = "writing event-channel"; 762 message = "writing event-channel";
776 goto abort_transaction; 763 goto abort_transaction;
777 } 764 }
778 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", 765 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
779 XEN_IO_PROTO_ABI_NATIVE); 766 XEN_IO_PROTO_ABI_NATIVE);
780 if (err) { 767 if (err) {
781 message = "writing protocol"; 768 message = "writing protocol";
782 goto abort_transaction; 769 goto abort_transaction;
783 } 770 }
784 771
785 err = xenbus_transaction_end(xbt, 0); 772 err = xenbus_transaction_end(xbt, 0);
786 if (err) { 773 if (err) {
787 if (err == -EAGAIN) 774 if (err == -EAGAIN)
788 goto again; 775 goto again;
789 xenbus_dev_fatal(dev, err, "completing transaction"); 776 xenbus_dev_fatal(dev, err, "completing transaction");
790 goto destroy_blkring; 777 goto destroy_blkring;
791 } 778 }
792 779
793 xenbus_switch_state(dev, XenbusStateInitialised); 780 xenbus_switch_state(dev, XenbusStateInitialised);
794 781
795 return 0; 782 return 0;
796 783
797 abort_transaction: 784 abort_transaction:
798 xenbus_transaction_end(xbt, 1); 785 xenbus_transaction_end(xbt, 1);
799 if (message) 786 if (message)
800 xenbus_dev_fatal(dev, err, "%s", message); 787 xenbus_dev_fatal(dev, err, "%s", message);
801 destroy_blkring: 788 destroy_blkring:
802 blkif_free(info, 0); 789 blkif_free(info, 0);
803 out: 790 out:
804 return err; 791 return err;
805 } 792 }
806 793
807 /** 794 /**
808 * Entry point to this code when a new device is created. Allocate the basic 795 * Entry point to this code when a new device is created. Allocate the basic
809 * structures and the ring buffer for communication with the backend, and 796 * structures and the ring buffer for communication with the backend, and
810 * inform the backend of the appropriate details for those. Switch to 797 * inform the backend of the appropriate details for those. Switch to
811 * Initialised state. 798 * Initialised state.
812 */ 799 */
813 static int blkfront_probe(struct xenbus_device *dev, 800 static int blkfront_probe(struct xenbus_device *dev,
814 const struct xenbus_device_id *id) 801 const struct xenbus_device_id *id)
815 { 802 {
816 int err, vdevice, i; 803 int err, vdevice, i;
817 struct blkfront_info *info; 804 struct blkfront_info *info;
818 805
819 /* FIXME: Use dynamic device id if this is not set. */ 806 /* FIXME: Use dynamic device id if this is not set. */
820 err = xenbus_scanf(XBT_NIL, dev->nodename, 807 err = xenbus_scanf(XBT_NIL, dev->nodename,
821 "virtual-device", "%i", &vdevice); 808 "virtual-device", "%i", &vdevice);
822 if (err != 1) { 809 if (err != 1) {
823 /* go looking in the extended area instead */ 810 /* go looking in the extended area instead */
824 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", 811 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
825 "%i", &vdevice); 812 "%i", &vdevice);
826 if (err != 1) { 813 if (err != 1) {
827 xenbus_dev_fatal(dev, err, "reading virtual-device"); 814 xenbus_dev_fatal(dev, err, "reading virtual-device");
828 return err; 815 return err;
829 } 816 }
830 } 817 }
831 818
832 if (xen_hvm_domain()) { 819 if (xen_hvm_domain()) {
833 char *type; 820 char *type;
834 int len; 821 int len;
835 /* no unplug has been done: do not hook devices != xen vbds */ 822 /* no unplug has been done: do not hook devices != xen vbds */
836 if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { 823 if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) {
837 int major; 824 int major;
838 825
839 if (!VDEV_IS_EXTENDED(vdevice)) 826 if (!VDEV_IS_EXTENDED(vdevice))
840 major = BLKIF_MAJOR(vdevice); 827 major = BLKIF_MAJOR(vdevice);
841 else 828 else
842 major = XENVBD_MAJOR; 829 major = XENVBD_MAJOR;
843 830
844 if (major != XENVBD_MAJOR) { 831 if (major != XENVBD_MAJOR) {
845 printk(KERN_INFO 832 printk(KERN_INFO
846 "%s: HVM does not support vbd %d as xen block device\n", 833 "%s: HVM does not support vbd %d as xen block device\n",
847 __FUNCTION__, vdevice); 834 __FUNCTION__, vdevice);
848 return -ENODEV; 835 return -ENODEV;
849 } 836 }
850 } 837 }
851 /* do not create a PV cdrom device if we are an HVM guest */ 838 /* do not create a PV cdrom device if we are an HVM guest */
852 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); 839 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
853 if (IS_ERR(type)) 840 if (IS_ERR(type))
854 return -ENODEV; 841 return -ENODEV;
855 if (strncmp(type, "cdrom", 5) == 0) { 842 if (strncmp(type, "cdrom", 5) == 0) {
856 kfree(type); 843 kfree(type);
857 return -ENODEV; 844 return -ENODEV;
858 } 845 }
859 kfree(type); 846 kfree(type);
860 } 847 }
861 info = kzalloc(sizeof(*info), GFP_KERNEL); 848 info = kzalloc(sizeof(*info), GFP_KERNEL);
862 if (!info) { 849 if (!info) {
863 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); 850 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
864 return -ENOMEM; 851 return -ENOMEM;
865 } 852 }
866 853
867 mutex_init(&info->mutex); 854 mutex_init(&info->mutex);
868 info->xbdev = dev; 855 info->xbdev = dev;
869 info->vdevice = vdevice; 856 info->vdevice = vdevice;
870 info->connected = BLKIF_STATE_DISCONNECTED; 857 info->connected = BLKIF_STATE_DISCONNECTED;
871 INIT_WORK(&info->work, blkif_restart_queue); 858 INIT_WORK(&info->work, blkif_restart_queue);
872 859
873 for (i = 0; i < BLK_RING_SIZE; i++) 860 for (i = 0; i < BLK_RING_SIZE; i++)
874 info->shadow[i].req.id = i+1; 861 info->shadow[i].req.id = i+1;
875 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 862 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
876 863
877 /* Front end dir is a number, which is used as the id. */ 864 /* Front end dir is a number, which is used as the id. */
878 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 865 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
879 dev_set_drvdata(&dev->dev, info); 866 dev_set_drvdata(&dev->dev, info);
880 867
881 err = talk_to_blkback(dev, info); 868 err = talk_to_blkback(dev, info);
882 if (err) { 869 if (err) {
883 kfree(info); 870 kfree(info);
884 dev_set_drvdata(&dev->dev, NULL); 871 dev_set_drvdata(&dev->dev, NULL);
885 return err; 872 return err;
886 } 873 }
887 874
888 return 0; 875 return 0;
889 } 876 }
890 877
891 878
892 static int blkif_recover(struct blkfront_info *info) 879 static int blkif_recover(struct blkfront_info *info)
893 { 880 {
894 int i; 881 int i;
895 struct blkif_request *req; 882 struct blkif_request *req;
896 struct blk_shadow *copy; 883 struct blk_shadow *copy;
897 int j; 884 int j;
898 885
899 /* Stage 1: Make a safe copy of the shadow state. */ 886 /* Stage 1: Make a safe copy of the shadow state. */
900 copy = kmalloc(sizeof(info->shadow), 887 copy = kmalloc(sizeof(info->shadow),
901 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); 888 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
902 if (!copy) 889 if (!copy)
903 return -ENOMEM; 890 return -ENOMEM;
904 memcpy(copy, info->shadow, sizeof(info->shadow)); 891 memcpy(copy, info->shadow, sizeof(info->shadow));
905 892
906 /* Stage 2: Set up free list. */ 893 /* Stage 2: Set up free list. */
907 memset(&info->shadow, 0, sizeof(info->shadow)); 894 memset(&info->shadow, 0, sizeof(info->shadow));
908 for (i = 0; i < BLK_RING_SIZE; i++) 895 for (i = 0; i < BLK_RING_SIZE; i++)
909 info->shadow[i].req.id = i+1; 896 info->shadow[i].req.id = i+1;
910 info->shadow_free = info->ring.req_prod_pvt; 897 info->shadow_free = info->ring.req_prod_pvt;
911 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 898 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
912 899
913 /* Stage 3: Find pending requests and requeue them. */ 900 /* Stage 3: Find pending requests and requeue them. */
914 for (i = 0; i < BLK_RING_SIZE; i++) { 901 for (i = 0; i < BLK_RING_SIZE; i++) {
915 /* Not in use? */ 902 /* Not in use? */
916 if (copy[i].request == 0) 903 if (copy[i].request == 0)
917 continue; 904 continue;
918 905
919 /* Grab a request slot and copy shadow state into it. */ 906 /* Grab a request slot and copy shadow state into it. */
920 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 907 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
921 *req = copy[i].req; 908 *req = copy[i].req;
922 909
923 /* We get a new request id, and must reset the shadow state. */ 910 /* We get a new request id, and must reset the shadow state. */
924 req->id = get_id_from_freelist(info); 911 req->id = get_id_from_freelist(info);
925 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i])); 912 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
926 913
927 /* Rewrite any grant references invalidated by susp/resume. */ 914 /* Rewrite any grant references invalidated by susp/resume. */
928 for (j = 0; j < req->nr_segments; j++) 915 for (j = 0; j < req->nr_segments; j++)
929 gnttab_grant_foreign_access_ref( 916 gnttab_grant_foreign_access_ref(
930 req->seg[j].gref, 917 req->seg[j].gref,
931 info->xbdev->otherend_id, 918 info->xbdev->otherend_id,
932 pfn_to_mfn(info->shadow[req->id].frame[j]), 919 pfn_to_mfn(info->shadow[req->id].frame[j]),
933 rq_data_dir( 920 rq_data_dir(
934 (struct request *) 921 (struct request *)
935 info->shadow[req->id].request)); 922 info->shadow[req->id].request));
936 info->shadow[req->id].req = *req; 923 info->shadow[req->id].req = *req;
937 924
938 info->ring.req_prod_pvt++; 925 info->ring.req_prod_pvt++;
939 } 926 }
940 927
941 kfree(copy); 928 kfree(copy);
942 929
943 xenbus_switch_state(info->xbdev, XenbusStateConnected); 930 xenbus_switch_state(info->xbdev, XenbusStateConnected);
944 931
945 spin_lock_irq(&blkif_io_lock); 932 spin_lock_irq(&blkif_io_lock);
946 933
947 /* Now safe for us to use the shared ring */ 934 /* Now safe for us to use the shared ring */
948 info->connected = BLKIF_STATE_CONNECTED; 935 info->connected = BLKIF_STATE_CONNECTED;
949 936
950 /* Send off requeued requests */ 937 /* Send off requeued requests */
951 flush_requests(info); 938 flush_requests(info);
952 939
953 /* Kick any other new requests queued since we resumed */ 940 /* Kick any other new requests queued since we resumed */
954 kick_pending_request_queues(info); 941 kick_pending_request_queues(info);
955 942
956 spin_unlock_irq(&blkif_io_lock); 943 spin_unlock_irq(&blkif_io_lock);
957 944
958 return 0; 945 return 0;
959 } 946 }
960 947
961 /** 948 /**
962 * We are reconnecting to the backend, due to a suspend/resume, or a backend 949 * We are reconnecting to the backend, due to a suspend/resume, or a backend
963 * driver restart. We tear down our blkif structure and recreate it, but 950 * driver restart. We tear down our blkif structure and recreate it, but
964 * leave the device-layer structures intact so that this is transparent to the 951 * leave the device-layer structures intact so that this is transparent to the
965 * rest of the kernel. 952 * rest of the kernel.
966 */ 953 */
967 static int blkfront_resume(struct xenbus_device *dev) 954 static int blkfront_resume(struct xenbus_device *dev)
968 { 955 {
969 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 956 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
970 int err; 957 int err;
971 958
972 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); 959 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
973 960
974 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); 961 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
975 962
976 err = talk_to_blkback(dev, info); 963 err = talk_to_blkback(dev, info);
977 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 964 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
978 err = blkif_recover(info); 965 err = blkif_recover(info);
979 966
980 return err; 967 return err;
981 } 968 }
982 969
983 static void 970 static void
984 blkfront_closing(struct blkfront_info *info) 971 blkfront_closing(struct blkfront_info *info)
985 { 972 {
986 struct xenbus_device *xbdev = info->xbdev; 973 struct xenbus_device *xbdev = info->xbdev;
987 struct block_device *bdev = NULL; 974 struct block_device *bdev = NULL;
988 975
989 mutex_lock(&info->mutex); 976 mutex_lock(&info->mutex);
990 977
991 if (xbdev->state == XenbusStateClosing) { 978 if (xbdev->state == XenbusStateClosing) {
992 mutex_unlock(&info->mutex); 979 mutex_unlock(&info->mutex);
993 return; 980 return;
994 } 981 }
995 982
996 if (info->gd) 983 if (info->gd)
997 bdev = bdget_disk(info->gd, 0); 984 bdev = bdget_disk(info->gd, 0);
998 985
999 mutex_unlock(&info->mutex); 986 mutex_unlock(&info->mutex);
1000 987
1001 if (!bdev) { 988 if (!bdev) {
1002 xenbus_frontend_closed(xbdev); 989 xenbus_frontend_closed(xbdev);
1003 return; 990 return;
1004 } 991 }
1005 992
1006 mutex_lock(&bdev->bd_mutex); 993 mutex_lock(&bdev->bd_mutex);
1007 994
1008 if (bdev->bd_openers) { 995 if (bdev->bd_openers) {
1009 xenbus_dev_error(xbdev, -EBUSY, 996 xenbus_dev_error(xbdev, -EBUSY,
1010 "Device in use; refusing to close"); 997 "Device in use; refusing to close");
1011 xenbus_switch_state(xbdev, XenbusStateClosing); 998 xenbus_switch_state(xbdev, XenbusStateClosing);
1012 } else { 999 } else {
1013 xlvbd_release_gendisk(info); 1000 xlvbd_release_gendisk(info);
1014 xenbus_frontend_closed(xbdev); 1001 xenbus_frontend_closed(xbdev);
1015 } 1002 }
1016 1003
1017 mutex_unlock(&bdev->bd_mutex); 1004 mutex_unlock(&bdev->bd_mutex);
1018 bdput(bdev); 1005 bdput(bdev);
1019 } 1006 }
1020 1007
1021 /* 1008 /*
1022 * Invoked when the backend is finally 'ready' (and has told produced 1009 * Invoked when the backend is finally 'ready' (and has told produced
1023 * the details about the physical device - #sectors, size, etc). 1010 * the details about the physical device - #sectors, size, etc).
1024 */ 1011 */
1025 static void blkfront_connect(struct blkfront_info *info) 1012 static void blkfront_connect(struct blkfront_info *info)
1026 { 1013 {
1027 unsigned long long sectors; 1014 unsigned long long sectors;
1028 unsigned long sector_size; 1015 unsigned long sector_size;
1029 unsigned int binfo; 1016 unsigned int binfo;
1030 int err; 1017 int err;
1031 int barrier; 1018 int barrier;
1032 1019
1033 switch (info->connected) { 1020 switch (info->connected) {
1034 case BLKIF_STATE_CONNECTED: 1021 case BLKIF_STATE_CONNECTED:
1035 /* 1022 /*
1036 * Potentially, the back-end may be signalling 1023 * Potentially, the back-end may be signalling
1037 * a capacity change; update the capacity. 1024 * a capacity change; update the capacity.
1038 */ 1025 */
1039 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, 1026 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1040 "sectors", "%Lu", &sectors); 1027 "sectors", "%Lu", &sectors);
1041 if (XENBUS_EXIST_ERR(err)) 1028 if (XENBUS_EXIST_ERR(err))
1042 return; 1029 return;
1043 printk(KERN_INFO "Setting capacity to %Lu\n", 1030 printk(KERN_INFO "Setting capacity to %Lu\n",
1044 sectors); 1031 sectors);
1045 set_capacity(info->gd, sectors); 1032 set_capacity(info->gd, sectors);
1046 revalidate_disk(info->gd); 1033 revalidate_disk(info->gd);
1047 1034
1048 /* fall through */ 1035 /* fall through */
1049 case BLKIF_STATE_SUSPENDED: 1036 case BLKIF_STATE_SUSPENDED:
1050 return; 1037 return;
1051 1038
1052 default: 1039 default:
1053 break; 1040 break;
1054 } 1041 }
1055 1042
1056 dev_dbg(&info->xbdev->dev, "%s:%s.\n", 1043 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
1057 __func__, info->xbdev->otherend); 1044 __func__, info->xbdev->otherend);
1058 1045
1059 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1046 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1060 "sectors", "%llu", &sectors, 1047 "sectors", "%llu", &sectors,
1061 "info", "%u", &binfo, 1048 "info", "%u", &binfo,
1062 "sector-size", "%lu", &sector_size, 1049 "sector-size", "%lu", &sector_size,
1063 NULL); 1050 NULL);
1064 if (err) { 1051 if (err) {
1065 xenbus_dev_fatal(info->xbdev, err, 1052 xenbus_dev_fatal(info->xbdev, err,
1066 "reading backend fields at %s", 1053 "reading backend fields at %s",
1067 info->xbdev->otherend); 1054 info->xbdev->otherend);
1068 return; 1055 return;
1069 } 1056 }
1070 1057
1071 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1058 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1072 "feature-barrier", "%lu", &barrier, 1059 "feature-barrier", "%lu", &barrier,
1073 NULL); 1060 NULL);
1074 1061
1075 /* 1062 /*
1076 * If there's no "feature-barrier" defined, then it means 1063 * If there's no "feature-barrier" defined, then it means
1077 * we're dealing with a very old backend which writes 1064 * we're dealing with a very old backend which writes
1078 * synchronously; draining will do what needs to get done. 1065 * synchronously; nothing to do.
1079 * 1066 *
1080 * If there are barriers, then we use flush. 1067 * If there are barriers, then we use flush.
1081 *
1082 * If barriers are not supported, then there's no much we can
1083 * do, so just set ordering to NONE.
1084 */ 1068 */
1085 if (err) 1069 info->feature_flush = 0;
1086 info->feature_barrier = QUEUE_ORDERED_DRAIN; 1070 if (!err && barrier)
1087 else if (barrier) 1071 info->feature_flush = REQ_FLUSH;
1088 info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH;
1089 else
1090 info->feature_barrier = QUEUE_ORDERED_NONE;
1091 1072
1092 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1073 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1093 if (err) { 1074 if (err) {
1094 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1075 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
1095 info->xbdev->otherend); 1076 info->xbdev->otherend);
1096 return; 1077 return;
1097 } 1078 }
1098 1079
1099 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1080 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1100 1081
1101 /* Kick pending requests. */ 1082 /* Kick pending requests. */
1102 spin_lock_irq(&blkif_io_lock); 1083 spin_lock_irq(&blkif_io_lock);
1103 info->connected = BLKIF_STATE_CONNECTED; 1084 info->connected = BLKIF_STATE_CONNECTED;
1104 kick_pending_request_queues(info); 1085 kick_pending_request_queues(info);
1105 spin_unlock_irq(&blkif_io_lock); 1086 spin_unlock_irq(&blkif_io_lock);
1106 1087
1107 add_disk(info->gd); 1088 add_disk(info->gd);
1108 1089
1109 info->is_ready = 1; 1090 info->is_ready = 1;
1110 } 1091 }
1111 1092
1112 /** 1093 /**
1113 * Callback received when the backend's state changes. 1094 * Callback received when the backend's state changes.
1114 */ 1095 */
1115 static void blkback_changed(struct xenbus_device *dev, 1096 static void blkback_changed(struct xenbus_device *dev,
1116 enum xenbus_state backend_state) 1097 enum xenbus_state backend_state)
1117 { 1098 {
1118 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1099 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1119 1100
1120 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); 1101 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
1121 1102
1122 switch (backend_state) { 1103 switch (backend_state) {
1123 case XenbusStateInitialising: 1104 case XenbusStateInitialising:
1124 case XenbusStateInitWait: 1105 case XenbusStateInitWait:
1125 case XenbusStateInitialised: 1106 case XenbusStateInitialised:
1126 case XenbusStateUnknown: 1107 case XenbusStateUnknown:
1127 case XenbusStateClosed: 1108 case XenbusStateClosed:
1128 break; 1109 break;
1129 1110
1130 case XenbusStateConnected: 1111 case XenbusStateConnected:
1131 blkfront_connect(info); 1112 blkfront_connect(info);
1132 break; 1113 break;
1133 1114
1134 case XenbusStateClosing: 1115 case XenbusStateClosing:
1135 blkfront_closing(info); 1116 blkfront_closing(info);
1136 break; 1117 break;
1137 } 1118 }
1138 } 1119 }
1139 1120
1140 static int blkfront_remove(struct xenbus_device *xbdev) 1121 static int blkfront_remove(struct xenbus_device *xbdev)
1141 { 1122 {
1142 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); 1123 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
1143 struct block_device *bdev = NULL; 1124 struct block_device *bdev = NULL;
1144 struct gendisk *disk; 1125 struct gendisk *disk;
1145 1126
1146 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); 1127 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
1147 1128
1148 blkif_free(info, 0); 1129 blkif_free(info, 0);
1149 1130
1150 mutex_lock(&info->mutex); 1131 mutex_lock(&info->mutex);
1151 1132
1152 disk = info->gd; 1133 disk = info->gd;
1153 if (disk) 1134 if (disk)
1154 bdev = bdget_disk(disk, 0); 1135 bdev = bdget_disk(disk, 0);
1155 1136
1156 info->xbdev = NULL; 1137 info->xbdev = NULL;
1157 mutex_unlock(&info->mutex); 1138 mutex_unlock(&info->mutex);
1158 1139
1159 if (!bdev) { 1140 if (!bdev) {
1160 kfree(info); 1141 kfree(info);
1161 return 0; 1142 return 0;
1162 } 1143 }
1163 1144
1164 /* 1145 /*
1165 * The xbdev was removed before we reached the Closed 1146 * The xbdev was removed before we reached the Closed
1166 * state. See if it's safe to remove the disk. If the bdev 1147 * state. See if it's safe to remove the disk. If the bdev
1167 * isn't closed yet, we let release take care of it. 1148 * isn't closed yet, we let release take care of it.
1168 */ 1149 */
1169 1150
1170 mutex_lock(&bdev->bd_mutex); 1151 mutex_lock(&bdev->bd_mutex);
1171 info = disk->private_data; 1152 info = disk->private_data;
1172 1153
1173 dev_warn(disk_to_dev(disk), 1154 dev_warn(disk_to_dev(disk),
1174 "%s was hot-unplugged, %d stale handles\n", 1155 "%s was hot-unplugged, %d stale handles\n",
1175 xbdev->nodename, bdev->bd_openers); 1156 xbdev->nodename, bdev->bd_openers);
1176 1157
1177 if (info && !bdev->bd_openers) { 1158 if (info && !bdev->bd_openers) {
1178 xlvbd_release_gendisk(info); 1159 xlvbd_release_gendisk(info);
1179 disk->private_data = NULL; 1160 disk->private_data = NULL;
1180 kfree(info); 1161 kfree(info);
1181 } 1162 }
1182 1163
1183 mutex_unlock(&bdev->bd_mutex); 1164 mutex_unlock(&bdev->bd_mutex);
1184 bdput(bdev); 1165 bdput(bdev);
1185 1166
1186 return 0; 1167 return 0;
1187 } 1168 }
1188 1169
1189 static int blkfront_is_ready(struct xenbus_device *dev) 1170 static int blkfront_is_ready(struct xenbus_device *dev)
1190 { 1171 {
1191 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1172 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1192 1173
1193 return info->is_ready && info->xbdev; 1174 return info->is_ready && info->xbdev;
1194 } 1175 }
1195 1176
1196 static int blkif_open(struct block_device *bdev, fmode_t mode) 1177 static int blkif_open(struct block_device *bdev, fmode_t mode)
1197 { 1178 {
1198 struct gendisk *disk = bdev->bd_disk; 1179 struct gendisk *disk = bdev->bd_disk;
1199 struct blkfront_info *info; 1180 struct blkfront_info *info;
1200 int err = 0; 1181 int err = 0;
1201 1182
1202 lock_kernel(); 1183 lock_kernel();
1203 1184
1204 info = disk->private_data; 1185 info = disk->private_data;
1205 if (!info) { 1186 if (!info) {
1206 /* xbdev gone */ 1187 /* xbdev gone */
1207 err = -ERESTARTSYS; 1188 err = -ERESTARTSYS;
1208 goto out; 1189 goto out;
1209 } 1190 }
1210 1191
1211 mutex_lock(&info->mutex); 1192 mutex_lock(&info->mutex);
1212 1193
1213 if (!info->gd) 1194 if (!info->gd)
1214 /* xbdev is closed */ 1195 /* xbdev is closed */
1215 err = -ERESTARTSYS; 1196 err = -ERESTARTSYS;
1216 1197
1217 mutex_unlock(&info->mutex); 1198 mutex_unlock(&info->mutex);
1218 1199
1219 out: 1200 out:
1220 unlock_kernel(); 1201 unlock_kernel();
1221 return err; 1202 return err;
1222 } 1203 }
1223 1204
1224 static int blkif_release(struct gendisk *disk, fmode_t mode) 1205 static int blkif_release(struct gendisk *disk, fmode_t mode)
1225 { 1206 {
1226 struct blkfront_info *info = disk->private_data; 1207 struct blkfront_info *info = disk->private_data;
1227 struct block_device *bdev; 1208 struct block_device *bdev;
1228 struct xenbus_device *xbdev; 1209 struct xenbus_device *xbdev;
1229 1210
1230 lock_kernel(); 1211 lock_kernel();
1231 1212
1232 bdev = bdget_disk(disk, 0); 1213 bdev = bdget_disk(disk, 0);
1233 bdput(bdev); 1214 bdput(bdev);
1234 1215
1235 if (bdev->bd_openers) 1216 if (bdev->bd_openers)
1236 goto out; 1217 goto out;
1237 1218
1238 /* 1219 /*
1239 * Check if we have been instructed to close. We will have 1220 * Check if we have been instructed to close. We will have
1240 * deferred this request, because the bdev was still open. 1221 * deferred this request, because the bdev was still open.
1241 */ 1222 */
1242 1223
1243 mutex_lock(&info->mutex); 1224 mutex_lock(&info->mutex);
1244 xbdev = info->xbdev; 1225 xbdev = info->xbdev;
1245 1226
1246 if (xbdev && xbdev->state == XenbusStateClosing) { 1227 if (xbdev && xbdev->state == XenbusStateClosing) {
1247 /* pending switch to state closed */ 1228 /* pending switch to state closed */
1248 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 1229 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1249 xlvbd_release_gendisk(info); 1230 xlvbd_release_gendisk(info);
1250 xenbus_frontend_closed(info->xbdev); 1231 xenbus_frontend_closed(info->xbdev);
1251 } 1232 }
1252 1233
1253 mutex_unlock(&info->mutex); 1234 mutex_unlock(&info->mutex);
1254 1235
1255 if (!xbdev) { 1236 if (!xbdev) {
1256 /* sudden device removal */ 1237 /* sudden device removal */
1257 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 1238 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1258 xlvbd_release_gendisk(info); 1239 xlvbd_release_gendisk(info);
1259 disk->private_data = NULL; 1240 disk->private_data = NULL;
1260 kfree(info); 1241 kfree(info);
1261 } 1242 }
1262 1243
1263 out: 1244 out:
1264 unlock_kernel(); 1245 unlock_kernel();
1265 return 0; 1246 return 0;
1266 } 1247 }
1267 1248
1268 static const struct block_device_operations xlvbd_block_fops = 1249 static const struct block_device_operations xlvbd_block_fops =
1269 { 1250 {
1270 .owner = THIS_MODULE, 1251 .owner = THIS_MODULE,
1271 .open = blkif_open, 1252 .open = blkif_open,
1272 .release = blkif_release, 1253 .release = blkif_release,
1273 .getgeo = blkif_getgeo, 1254 .getgeo = blkif_getgeo,
1274 .ioctl = blkif_ioctl, 1255 .ioctl = blkif_ioctl,
1275 }; 1256 };
1276 1257
1277 1258
1278 static const struct xenbus_device_id blkfront_ids[] = { 1259 static const struct xenbus_device_id blkfront_ids[] = {
1279 { "vbd" }, 1260 { "vbd" },
1280 { "" } 1261 { "" }
1281 }; 1262 };
1282 1263
1283 static struct xenbus_driver blkfront = { 1264 static struct xenbus_driver blkfront = {
1284 .name = "vbd", 1265 .name = "vbd",
1285 .owner = THIS_MODULE, 1266 .owner = THIS_MODULE,
1286 .ids = blkfront_ids, 1267 .ids = blkfront_ids,
1287 .probe = blkfront_probe, 1268 .probe = blkfront_probe,
1288 .remove = blkfront_remove, 1269 .remove = blkfront_remove,
1289 .resume = blkfront_resume, 1270 .resume = blkfront_resume,
1290 .otherend_changed = blkback_changed, 1271 .otherend_changed = blkback_changed,
1291 .is_ready = blkfront_is_ready, 1272 .is_ready = blkfront_is_ready,
1292 }; 1273 };
1293 1274
1294 static int __init xlblk_init(void) 1275 static int __init xlblk_init(void)
1295 { 1276 {
1296 if (!xen_domain()) 1277 if (!xen_domain())
1297 return -ENODEV; 1278 return -ENODEV;
1298 1279
1299 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 1280 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1300 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", 1281 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1301 XENVBD_MAJOR, DEV_NAME); 1282 XENVBD_MAJOR, DEV_NAME);
1302 return -ENODEV; 1283 return -ENODEV;
1303 } 1284 }
1304 1285
1305 return xenbus_register_frontend(&blkfront); 1286 return xenbus_register_frontend(&blkfront);
1306 } 1287 }
1307 module_init(xlblk_init); 1288 module_init(xlblk_init);
1308 1289
1309 1290
1310 static void __exit xlblk_exit(void) 1291 static void __exit xlblk_exit(void)
1311 { 1292 {
1312 return xenbus_unregister_driver(&blkfront); 1293 return xenbus_unregister_driver(&blkfront);
1313 } 1294 }
1314 module_exit(xlblk_exit); 1295 module_exit(xlblk_exit);
1315 1296
1316 MODULE_DESCRIPTION("Xen virtual block device frontend"); 1297 MODULE_DESCRIPTION("Xen virtual block device frontend");
1317 MODULE_LICENSE("GPL"); 1298 MODULE_LICENSE("GPL");
1318 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); 1299 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
1319 MODULE_ALIAS("xen:vbd"); 1300 MODULE_ALIAS("xen:vbd");
1320 MODULE_ALIAS("xenblk"); 1301 MODULE_ALIAS("xenblk");
1321 1302
drivers/ide/ide-disk.c
1 /* 1 /*
2 * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) 2 * Copyright (C) 1994-1998 Linus Torvalds & authors (see below)
3 * Copyright (C) 1998-2002 Linux ATA Development 3 * Copyright (C) 1998-2002 Linux ATA Development
4 * Andre Hedrick <andre@linux-ide.org> 4 * Andre Hedrick <andre@linux-ide.org>
5 * Copyright (C) 2003 Red Hat 5 * Copyright (C) 2003 Red Hat
6 * Copyright (C) 2003-2005, 2007 Bartlomiej Zolnierkiewicz 6 * Copyright (C) 2003-2005, 2007 Bartlomiej Zolnierkiewicz
7 */ 7 */
8 8
9 /* 9 /*
10 * Mostly written by Mark Lord <mlord@pobox.com> 10 * Mostly written by Mark Lord <mlord@pobox.com>
11 * and Gadi Oxman <gadio@netvision.net.il> 11 * and Gadi Oxman <gadio@netvision.net.il>
12 * and Andre Hedrick <andre@linux-ide.org> 12 * and Andre Hedrick <andre@linux-ide.org>
13 * 13 *
14 * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. 14 * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
15 */ 15 */
16 16
17 #include <linux/types.h> 17 #include <linux/types.h>
18 #include <linux/string.h> 18 #include <linux/string.h>
19 #include <linux/kernel.h> 19 #include <linux/kernel.h>
20 #include <linux/timer.h> 20 #include <linux/timer.h>
21 #include <linux/mm.h> 21 #include <linux/mm.h>
22 #include <linux/interrupt.h> 22 #include <linux/interrupt.h>
23 #include <linux/major.h> 23 #include <linux/major.h>
24 #include <linux/errno.h> 24 #include <linux/errno.h>
25 #include <linux/genhd.h> 25 #include <linux/genhd.h>
26 #include <linux/slab.h> 26 #include <linux/slab.h>
27 #include <linux/delay.h> 27 #include <linux/delay.h>
28 #include <linux/mutex.h> 28 #include <linux/mutex.h>
29 #include <linux/leds.h> 29 #include <linux/leds.h>
30 #include <linux/ide.h> 30 #include <linux/ide.h>
31 31
32 #include <asm/byteorder.h> 32 #include <asm/byteorder.h>
33 #include <asm/irq.h> 33 #include <asm/irq.h>
34 #include <asm/uaccess.h> 34 #include <asm/uaccess.h>
35 #include <asm/io.h> 35 #include <asm/io.h>
36 #include <asm/div64.h> 36 #include <asm/div64.h>
37 37
38 #include "ide-disk.h" 38 #include "ide-disk.h"
39 39
40 static const u8 ide_rw_cmds[] = { 40 static const u8 ide_rw_cmds[] = {
41 ATA_CMD_READ_MULTI, 41 ATA_CMD_READ_MULTI,
42 ATA_CMD_WRITE_MULTI, 42 ATA_CMD_WRITE_MULTI,
43 ATA_CMD_READ_MULTI_EXT, 43 ATA_CMD_READ_MULTI_EXT,
44 ATA_CMD_WRITE_MULTI_EXT, 44 ATA_CMD_WRITE_MULTI_EXT,
45 ATA_CMD_PIO_READ, 45 ATA_CMD_PIO_READ,
46 ATA_CMD_PIO_WRITE, 46 ATA_CMD_PIO_WRITE,
47 ATA_CMD_PIO_READ_EXT, 47 ATA_CMD_PIO_READ_EXT,
48 ATA_CMD_PIO_WRITE_EXT, 48 ATA_CMD_PIO_WRITE_EXT,
49 ATA_CMD_READ, 49 ATA_CMD_READ,
50 ATA_CMD_WRITE, 50 ATA_CMD_WRITE,
51 ATA_CMD_READ_EXT, 51 ATA_CMD_READ_EXT,
52 ATA_CMD_WRITE_EXT, 52 ATA_CMD_WRITE_EXT,
53 }; 53 };
54 54
55 static void ide_tf_set_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 dma) 55 static void ide_tf_set_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 dma)
56 { 56 {
57 u8 index, lba48, write; 57 u8 index, lba48, write;
58 58
59 lba48 = (cmd->tf_flags & IDE_TFLAG_LBA48) ? 2 : 0; 59 lba48 = (cmd->tf_flags & IDE_TFLAG_LBA48) ? 2 : 0;
60 write = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 0; 60 write = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 0;
61 61
62 if (dma) { 62 if (dma) {
63 cmd->protocol = ATA_PROT_DMA; 63 cmd->protocol = ATA_PROT_DMA;
64 index = 8; 64 index = 8;
65 } else { 65 } else {
66 cmd->protocol = ATA_PROT_PIO; 66 cmd->protocol = ATA_PROT_PIO;
67 if (drive->mult_count) { 67 if (drive->mult_count) {
68 cmd->tf_flags |= IDE_TFLAG_MULTI_PIO; 68 cmd->tf_flags |= IDE_TFLAG_MULTI_PIO;
69 index = 0; 69 index = 0;
70 } else 70 } else
71 index = 4; 71 index = 4;
72 } 72 }
73 73
74 cmd->tf.command = ide_rw_cmds[index + lba48 + write]; 74 cmd->tf.command = ide_rw_cmds[index + lba48 + write];
75 } 75 }
76 76
77 /* 77 /*
78 * __ide_do_rw_disk() issues READ and WRITE commands to a disk, 78 * __ide_do_rw_disk() issues READ and WRITE commands to a disk,
79 * using LBA if supported, or CHS otherwise, to address sectors. 79 * using LBA if supported, or CHS otherwise, to address sectors.
80 */ 80 */
81 static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq, 81 static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
82 sector_t block) 82 sector_t block)
83 { 83 {
84 ide_hwif_t *hwif = drive->hwif; 84 ide_hwif_t *hwif = drive->hwif;
85 u16 nsectors = (u16)blk_rq_sectors(rq); 85 u16 nsectors = (u16)blk_rq_sectors(rq);
86 u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48); 86 u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48);
87 u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); 87 u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
88 struct ide_cmd cmd; 88 struct ide_cmd cmd;
89 struct ide_taskfile *tf = &cmd.tf; 89 struct ide_taskfile *tf = &cmd.tf;
90 ide_startstop_t rc; 90 ide_startstop_t rc;
91 91
92 if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) { 92 if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) {
93 if (block + blk_rq_sectors(rq) > 1ULL << 28) 93 if (block + blk_rq_sectors(rq) > 1ULL << 28)
94 dma = 0; 94 dma = 0;
95 else 95 else
96 lba48 = 0; 96 lba48 = 0;
97 } 97 }
98 98
99 memset(&cmd, 0, sizeof(cmd)); 99 memset(&cmd, 0, sizeof(cmd));
100 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 100 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
101 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 101 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
102 102
103 if (drive->dev_flags & IDE_DFLAG_LBA) { 103 if (drive->dev_flags & IDE_DFLAG_LBA) {
104 if (lba48) { 104 if (lba48) {
105 pr_debug("%s: LBA=0x%012llx\n", drive->name, 105 pr_debug("%s: LBA=0x%012llx\n", drive->name,
106 (unsigned long long)block); 106 (unsigned long long)block);
107 107
108 tf->nsect = nsectors & 0xff; 108 tf->nsect = nsectors & 0xff;
109 tf->lbal = (u8) block; 109 tf->lbal = (u8) block;
110 tf->lbam = (u8)(block >> 8); 110 tf->lbam = (u8)(block >> 8);
111 tf->lbah = (u8)(block >> 16); 111 tf->lbah = (u8)(block >> 16);
112 tf->device = ATA_LBA; 112 tf->device = ATA_LBA;
113 113
114 tf = &cmd.hob; 114 tf = &cmd.hob;
115 tf->nsect = (nsectors >> 8) & 0xff; 115 tf->nsect = (nsectors >> 8) & 0xff;
116 tf->lbal = (u8)(block >> 24); 116 tf->lbal = (u8)(block >> 24);
117 if (sizeof(block) != 4) { 117 if (sizeof(block) != 4) {
118 tf->lbam = (u8)((u64)block >> 32); 118 tf->lbam = (u8)((u64)block >> 32);
119 tf->lbah = (u8)((u64)block >> 40); 119 tf->lbah = (u8)((u64)block >> 40);
120 } 120 }
121 121
122 cmd.valid.out.hob = IDE_VALID_OUT_HOB; 122 cmd.valid.out.hob = IDE_VALID_OUT_HOB;
123 cmd.valid.in.hob = IDE_VALID_IN_HOB; 123 cmd.valid.in.hob = IDE_VALID_IN_HOB;
124 cmd.tf_flags |= IDE_TFLAG_LBA48; 124 cmd.tf_flags |= IDE_TFLAG_LBA48;
125 } else { 125 } else {
126 tf->nsect = nsectors & 0xff; 126 tf->nsect = nsectors & 0xff;
127 tf->lbal = block; 127 tf->lbal = block;
128 tf->lbam = block >>= 8; 128 tf->lbam = block >>= 8;
129 tf->lbah = block >>= 8; 129 tf->lbah = block >>= 8;
130 tf->device = ((block >> 8) & 0xf) | ATA_LBA; 130 tf->device = ((block >> 8) & 0xf) | ATA_LBA;
131 } 131 }
132 } else { 132 } else {
133 unsigned int sect, head, cyl, track; 133 unsigned int sect, head, cyl, track;
134 134
135 track = (int)block / drive->sect; 135 track = (int)block / drive->sect;
136 sect = (int)block % drive->sect + 1; 136 sect = (int)block % drive->sect + 1;
137 head = track % drive->head; 137 head = track % drive->head;
138 cyl = track / drive->head; 138 cyl = track / drive->head;
139 139
140 pr_debug("%s: CHS=%u/%u/%u\n", drive->name, cyl, head, sect); 140 pr_debug("%s: CHS=%u/%u/%u\n", drive->name, cyl, head, sect);
141 141
142 tf->nsect = nsectors & 0xff; 142 tf->nsect = nsectors & 0xff;
143 tf->lbal = sect; 143 tf->lbal = sect;
144 tf->lbam = cyl; 144 tf->lbam = cyl;
145 tf->lbah = cyl >> 8; 145 tf->lbah = cyl >> 8;
146 tf->device = head; 146 tf->device = head;
147 } 147 }
148 148
149 cmd.tf_flags |= IDE_TFLAG_FS; 149 cmd.tf_flags |= IDE_TFLAG_FS;
150 150
151 if (rq_data_dir(rq)) 151 if (rq_data_dir(rq))
152 cmd.tf_flags |= IDE_TFLAG_WRITE; 152 cmd.tf_flags |= IDE_TFLAG_WRITE;
153 153
154 ide_tf_set_cmd(drive, &cmd, dma); 154 ide_tf_set_cmd(drive, &cmd, dma);
155 cmd.rq = rq; 155 cmd.rq = rq;
156 156
157 if (dma == 0) { 157 if (dma == 0) {
158 ide_init_sg_cmd(&cmd, nsectors << 9); 158 ide_init_sg_cmd(&cmd, nsectors << 9);
159 ide_map_sg(drive, &cmd); 159 ide_map_sg(drive, &cmd);
160 } 160 }
161 161
162 rc = do_rw_taskfile(drive, &cmd); 162 rc = do_rw_taskfile(drive, &cmd);
163 163
164 if (rc == ide_stopped && dma) { 164 if (rc == ide_stopped && dma) {
165 /* fallback to PIO */ 165 /* fallback to PIO */
166 cmd.tf_flags |= IDE_TFLAG_DMA_PIO_FALLBACK; 166 cmd.tf_flags |= IDE_TFLAG_DMA_PIO_FALLBACK;
167 ide_tf_set_cmd(drive, &cmd, 0); 167 ide_tf_set_cmd(drive, &cmd, 0);
168 ide_init_sg_cmd(&cmd, nsectors << 9); 168 ide_init_sg_cmd(&cmd, nsectors << 9);
169 rc = do_rw_taskfile(drive, &cmd); 169 rc = do_rw_taskfile(drive, &cmd);
170 } 170 }
171 171
172 return rc; 172 return rc;
173 } 173 }
174 174
175 /* 175 /*
176 * 268435455 == 137439 MB or 28bit limit 176 * 268435455 == 137439 MB or 28bit limit
177 * 320173056 == 163929 MB or 48bit addressing 177 * 320173056 == 163929 MB or 48bit addressing
178 * 1073741822 == 549756 MB or 48bit addressing fake drive 178 * 1073741822 == 549756 MB or 48bit addressing fake drive
179 */ 179 */
180 180
181 static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, 181 static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
182 sector_t block) 182 sector_t block)
183 { 183 {
184 ide_hwif_t *hwif = drive->hwif; 184 ide_hwif_t *hwif = drive->hwif;
185 185
186 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); 186 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
187 BUG_ON(rq->cmd_type != REQ_TYPE_FS); 187 BUG_ON(rq->cmd_type != REQ_TYPE_FS);
188 188
189 ledtrig_ide_activity(); 189 ledtrig_ide_activity();
190 190
191 pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n", 191 pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n",
192 drive->name, rq_data_dir(rq) == READ ? "read" : "writ", 192 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
193 (unsigned long long)block, blk_rq_sectors(rq), 193 (unsigned long long)block, blk_rq_sectors(rq),
194 (unsigned long)rq->buffer); 194 (unsigned long)rq->buffer);
195 195
196 if (hwif->rw_disk) 196 if (hwif->rw_disk)
197 hwif->rw_disk(drive, rq); 197 hwif->rw_disk(drive, rq);
198 198
199 return __ide_do_rw_disk(drive, rq, block); 199 return __ide_do_rw_disk(drive, rq, block);
200 } 200 }
201 201
202 /* 202 /*
203 * Queries for true maximum capacity of the drive. 203 * Queries for true maximum capacity of the drive.
204 * Returns maximum LBA address (> 0) of the drive, 0 if failed. 204 * Returns maximum LBA address (> 0) of the drive, 0 if failed.
205 */ 205 */
206 static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48) 206 static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48)
207 { 207 {
208 struct ide_cmd cmd; 208 struct ide_cmd cmd;
209 struct ide_taskfile *tf = &cmd.tf; 209 struct ide_taskfile *tf = &cmd.tf;
210 u64 addr = 0; 210 u64 addr = 0;
211 211
212 memset(&cmd, 0, sizeof(cmd)); 212 memset(&cmd, 0, sizeof(cmd));
213 if (lba48) 213 if (lba48)
214 tf->command = ATA_CMD_READ_NATIVE_MAX_EXT; 214 tf->command = ATA_CMD_READ_NATIVE_MAX_EXT;
215 else 215 else
216 tf->command = ATA_CMD_READ_NATIVE_MAX; 216 tf->command = ATA_CMD_READ_NATIVE_MAX;
217 tf->device = ATA_LBA; 217 tf->device = ATA_LBA;
218 218
219 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 219 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
220 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 220 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
221 if (lba48) { 221 if (lba48) {
222 cmd.valid.out.hob = IDE_VALID_OUT_HOB; 222 cmd.valid.out.hob = IDE_VALID_OUT_HOB;
223 cmd.valid.in.hob = IDE_VALID_IN_HOB; 223 cmd.valid.in.hob = IDE_VALID_IN_HOB;
224 cmd.tf_flags = IDE_TFLAG_LBA48; 224 cmd.tf_flags = IDE_TFLAG_LBA48;
225 } 225 }
226 226
227 ide_no_data_taskfile(drive, &cmd); 227 ide_no_data_taskfile(drive, &cmd);
228 228
229 /* if OK, compute maximum address value */ 229 /* if OK, compute maximum address value */
230 if (!(tf->status & ATA_ERR)) 230 if (!(tf->status & ATA_ERR))
231 addr = ide_get_lba_addr(&cmd, lba48) + 1; 231 addr = ide_get_lba_addr(&cmd, lba48) + 1;
232 232
233 return addr; 233 return addr;
234 } 234 }
235 235
236 /* 236 /*
237 * Sets maximum virtual LBA address of the drive. 237 * Sets maximum virtual LBA address of the drive.
238 * Returns new maximum virtual LBA address (> 0) or 0 on failure. 238 * Returns new maximum virtual LBA address (> 0) or 0 on failure.
239 */ 239 */
240 static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48) 240 static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48)
241 { 241 {
242 struct ide_cmd cmd; 242 struct ide_cmd cmd;
243 struct ide_taskfile *tf = &cmd.tf; 243 struct ide_taskfile *tf = &cmd.tf;
244 u64 addr_set = 0; 244 u64 addr_set = 0;
245 245
246 addr_req--; 246 addr_req--;
247 247
248 memset(&cmd, 0, sizeof(cmd)); 248 memset(&cmd, 0, sizeof(cmd));
249 tf->lbal = (addr_req >> 0) & 0xff; 249 tf->lbal = (addr_req >> 0) & 0xff;
250 tf->lbam = (addr_req >>= 8) & 0xff; 250 tf->lbam = (addr_req >>= 8) & 0xff;
251 tf->lbah = (addr_req >>= 8) & 0xff; 251 tf->lbah = (addr_req >>= 8) & 0xff;
252 if (lba48) { 252 if (lba48) {
253 cmd.hob.lbal = (addr_req >>= 8) & 0xff; 253 cmd.hob.lbal = (addr_req >>= 8) & 0xff;
254 cmd.hob.lbam = (addr_req >>= 8) & 0xff; 254 cmd.hob.lbam = (addr_req >>= 8) & 0xff;
255 cmd.hob.lbah = (addr_req >>= 8) & 0xff; 255 cmd.hob.lbah = (addr_req >>= 8) & 0xff;
256 tf->command = ATA_CMD_SET_MAX_EXT; 256 tf->command = ATA_CMD_SET_MAX_EXT;
257 } else { 257 } else {
258 tf->device = (addr_req >>= 8) & 0x0f; 258 tf->device = (addr_req >>= 8) & 0x0f;
259 tf->command = ATA_CMD_SET_MAX; 259 tf->command = ATA_CMD_SET_MAX;
260 } 260 }
261 tf->device |= ATA_LBA; 261 tf->device |= ATA_LBA;
262 262
263 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 263 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
264 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 264 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
265 if (lba48) { 265 if (lba48) {
266 cmd.valid.out.hob = IDE_VALID_OUT_HOB; 266 cmd.valid.out.hob = IDE_VALID_OUT_HOB;
267 cmd.valid.in.hob = IDE_VALID_IN_HOB; 267 cmd.valid.in.hob = IDE_VALID_IN_HOB;
268 cmd.tf_flags = IDE_TFLAG_LBA48; 268 cmd.tf_flags = IDE_TFLAG_LBA48;
269 } 269 }
270 270
271 ide_no_data_taskfile(drive, &cmd); 271 ide_no_data_taskfile(drive, &cmd);
272 272
273 /* if OK, compute maximum address value */ 273 /* if OK, compute maximum address value */
274 if (!(tf->status & ATA_ERR)) 274 if (!(tf->status & ATA_ERR))
275 addr_set = ide_get_lba_addr(&cmd, lba48) + 1; 275 addr_set = ide_get_lba_addr(&cmd, lba48) + 1;
276 276
277 return addr_set; 277 return addr_set;
278 } 278 }
279 279
280 static unsigned long long sectors_to_MB(unsigned long long n) 280 static unsigned long long sectors_to_MB(unsigned long long n)
281 { 281 {
282 n <<= 9; /* make it bytes */ 282 n <<= 9; /* make it bytes */
283 do_div(n, 1000000); /* make it MB */ 283 do_div(n, 1000000); /* make it MB */
284 return n; 284 return n;
285 } 285 }
286 286
287 /* 287 /*
288 * Some disks report total number of sectors instead of 288 * Some disks report total number of sectors instead of
289 * maximum sector address. We list them here. 289 * maximum sector address. We list them here.
290 */ 290 */
291 static const struct drive_list_entry hpa_list[] = { 291 static const struct drive_list_entry hpa_list[] = {
292 { "ST340823A", NULL }, 292 { "ST340823A", NULL },
293 { "ST320413A", NULL }, 293 { "ST320413A", NULL },
294 { "ST310211A", NULL }, 294 { "ST310211A", NULL },
295 { NULL, NULL } 295 { NULL, NULL }
296 }; 296 };
297 297
298 static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48) 298 static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
299 { 299 {
300 u64 capacity, set_max; 300 u64 capacity, set_max;
301 301
302 capacity = drive->capacity64; 302 capacity = drive->capacity64;
303 set_max = idedisk_read_native_max_address(drive, lba48); 303 set_max = idedisk_read_native_max_address(drive, lba48);
304 304
305 if (ide_in_drive_list(drive->id, hpa_list)) { 305 if (ide_in_drive_list(drive->id, hpa_list)) {
306 /* 306 /*
307 * Since we are inclusive wrt to firmware revisions do this 307 * Since we are inclusive wrt to firmware revisions do this
308 * extra check and apply the workaround only when needed. 308 * extra check and apply the workaround only when needed.
309 */ 309 */
310 if (set_max == capacity + 1) 310 if (set_max == capacity + 1)
311 set_max--; 311 set_max--;
312 } 312 }
313 313
314 return set_max; 314 return set_max;
315 } 315 }
316 316
317 static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48) 317 static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
318 { 318 {
319 set_max = idedisk_set_max_address(drive, set_max, lba48); 319 set_max = idedisk_set_max_address(drive, set_max, lba48);
320 if (set_max) 320 if (set_max)
321 drive->capacity64 = set_max; 321 drive->capacity64 = set_max;
322 322
323 return set_max; 323 return set_max;
324 } 324 }
325 325
326 static void idedisk_check_hpa(ide_drive_t *drive) 326 static void idedisk_check_hpa(ide_drive_t *drive)
327 { 327 {
328 u64 capacity, set_max; 328 u64 capacity, set_max;
329 int lba48 = ata_id_lba48_enabled(drive->id); 329 int lba48 = ata_id_lba48_enabled(drive->id);
330 330
331 capacity = drive->capacity64; 331 capacity = drive->capacity64;
332 set_max = ide_disk_hpa_get_native_capacity(drive, lba48); 332 set_max = ide_disk_hpa_get_native_capacity(drive, lba48);
333 333
334 if (set_max <= capacity) 334 if (set_max <= capacity)
335 return; 335 return;
336 336
337 drive->probed_capacity = set_max; 337 drive->probed_capacity = set_max;
338 338
339 printk(KERN_INFO "%s: Host Protected Area detected.\n" 339 printk(KERN_INFO "%s: Host Protected Area detected.\n"
340 "\tcurrent capacity is %llu sectors (%llu MB)\n" 340 "\tcurrent capacity is %llu sectors (%llu MB)\n"
341 "\tnative capacity is %llu sectors (%llu MB)\n", 341 "\tnative capacity is %llu sectors (%llu MB)\n",
342 drive->name, 342 drive->name,
343 capacity, sectors_to_MB(capacity), 343 capacity, sectors_to_MB(capacity),
344 set_max, sectors_to_MB(set_max)); 344 set_max, sectors_to_MB(set_max));
345 345
346 if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0) 346 if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
347 return; 347 return;
348 348
349 set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48); 349 set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
350 if (set_max) 350 if (set_max)
351 printk(KERN_INFO "%s: Host Protected Area disabled.\n", 351 printk(KERN_INFO "%s: Host Protected Area disabled.\n",
352 drive->name); 352 drive->name);
353 } 353 }
354 354
355 static int ide_disk_get_capacity(ide_drive_t *drive) 355 static int ide_disk_get_capacity(ide_drive_t *drive)
356 { 356 {
357 u16 *id = drive->id; 357 u16 *id = drive->id;
358 int lba; 358 int lba;
359 359
360 if (ata_id_lba48_enabled(id)) { 360 if (ata_id_lba48_enabled(id)) {
361 /* drive speaks 48-bit LBA */ 361 /* drive speaks 48-bit LBA */
362 lba = 1; 362 lba = 1;
363 drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); 363 drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2);
364 } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) { 364 } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) {
365 /* drive speaks 28-bit LBA */ 365 /* drive speaks 28-bit LBA */
366 lba = 1; 366 lba = 1;
367 drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY); 367 drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
368 } else { 368 } else {
369 /* drive speaks boring old 28-bit CHS */ 369 /* drive speaks boring old 28-bit CHS */
370 lba = 0; 370 lba = 0;
371 drive->capacity64 = drive->cyl * drive->head * drive->sect; 371 drive->capacity64 = drive->cyl * drive->head * drive->sect;
372 } 372 }
373 373
374 drive->probed_capacity = drive->capacity64; 374 drive->probed_capacity = drive->capacity64;
375 375
376 if (lba) { 376 if (lba) {
377 drive->dev_flags |= IDE_DFLAG_LBA; 377 drive->dev_flags |= IDE_DFLAG_LBA;
378 378
379 /* 379 /*
380 * If this device supports the Host Protected Area feature set, 380 * If this device supports the Host Protected Area feature set,
381 * then we may need to change our opinion about its capacity. 381 * then we may need to change our opinion about its capacity.
382 */ 382 */
383 if (ata_id_hpa_enabled(id)) 383 if (ata_id_hpa_enabled(id))
384 idedisk_check_hpa(drive); 384 idedisk_check_hpa(drive);
385 } 385 }
386 386
387 /* limit drive capacity to 137GB if LBA48 cannot be used */ 387 /* limit drive capacity to 137GB if LBA48 cannot be used */
388 if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 && 388 if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
389 drive->capacity64 > 1ULL << 28) { 389 drive->capacity64 > 1ULL << 28) {
390 printk(KERN_WARNING "%s: cannot use LBA48 - full capacity " 390 printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
391 "%llu sectors (%llu MB)\n", 391 "%llu sectors (%llu MB)\n",
392 drive->name, (unsigned long long)drive->capacity64, 392 drive->name, (unsigned long long)drive->capacity64,
393 sectors_to_MB(drive->capacity64)); 393 sectors_to_MB(drive->capacity64));
394 drive->probed_capacity = drive->capacity64 = 1ULL << 28; 394 drive->probed_capacity = drive->capacity64 = 1ULL << 28;
395 } 395 }
396 396
397 if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && 397 if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
398 (drive->dev_flags & IDE_DFLAG_LBA48)) { 398 (drive->dev_flags & IDE_DFLAG_LBA48)) {
399 if (drive->capacity64 > 1ULL << 28) { 399 if (drive->capacity64 > 1ULL << 28) {
400 printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode" 400 printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
401 " will be used for accessing sectors " 401 " will be used for accessing sectors "
402 "> %u\n", drive->name, 1 << 28); 402 "> %u\n", drive->name, 1 << 28);
403 } else 403 } else
404 drive->dev_flags &= ~IDE_DFLAG_LBA48; 404 drive->dev_flags &= ~IDE_DFLAG_LBA48;
405 } 405 }
406 406
407 return 0; 407 return 0;
408 } 408 }
409 409
410 static void ide_disk_unlock_native_capacity(ide_drive_t *drive) 410 static void ide_disk_unlock_native_capacity(ide_drive_t *drive)
411 { 411 {
412 u16 *id = drive->id; 412 u16 *id = drive->id;
413 int lba48 = ata_id_lba48_enabled(id); 413 int lba48 = ata_id_lba48_enabled(id);
414 414
415 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 || 415 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
416 ata_id_hpa_enabled(id) == 0) 416 ata_id_hpa_enabled(id) == 0)
417 return; 417 return;
418 418
419 /* 419 /*
420 * according to the spec the SET MAX ADDRESS command shall be 420 * according to the spec the SET MAX ADDRESS command shall be
421 * immediately preceded by a READ NATIVE MAX ADDRESS command 421 * immediately preceded by a READ NATIVE MAX ADDRESS command
422 */ 422 */
423 if (!ide_disk_hpa_get_native_capacity(drive, lba48)) 423 if (!ide_disk_hpa_get_native_capacity(drive, lba48))
424 return; 424 return;
425 425
426 if (ide_disk_hpa_set_capacity(drive, drive->probed_capacity, lba48)) 426 if (ide_disk_hpa_set_capacity(drive, drive->probed_capacity, lba48))
427 drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ 427 drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */
428 } 428 }
429 429
430 static int idedisk_prep_fn(struct request_queue *q, struct request *rq) 430 static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
431 { 431 {
432 ide_drive_t *drive = q->queuedata; 432 ide_drive_t *drive = q->queuedata;
433 struct ide_cmd *cmd; 433 struct ide_cmd *cmd;
434 434
435 if (!(rq->cmd_flags & REQ_FLUSH)) 435 if (!(rq->cmd_flags & REQ_FLUSH))
436 return BLKPREP_OK; 436 return BLKPREP_OK;
437 437
438 cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); 438 cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
439 439
440 /* FIXME: map struct ide_taskfile on rq->cmd[] */ 440 /* FIXME: map struct ide_taskfile on rq->cmd[] */
441 BUG_ON(cmd == NULL); 441 BUG_ON(cmd == NULL);
442 442
443 memset(cmd, 0, sizeof(*cmd)); 443 memset(cmd, 0, sizeof(*cmd));
444 if (ata_id_flush_ext_enabled(drive->id) && 444 if (ata_id_flush_ext_enabled(drive->id) &&
445 (drive->capacity64 >= (1UL << 28))) 445 (drive->capacity64 >= (1UL << 28)))
446 cmd->tf.command = ATA_CMD_FLUSH_EXT; 446 cmd->tf.command = ATA_CMD_FLUSH_EXT;
447 else 447 else
448 cmd->tf.command = ATA_CMD_FLUSH; 448 cmd->tf.command = ATA_CMD_FLUSH;
449 cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 449 cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
450 cmd->tf_flags = IDE_TFLAG_DYN; 450 cmd->tf_flags = IDE_TFLAG_DYN;
451 cmd->protocol = ATA_PROT_NODATA; 451 cmd->protocol = ATA_PROT_NODATA;
452 452
453 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 453 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
454 rq->special = cmd; 454 rq->special = cmd;
455 cmd->rq = rq; 455 cmd->rq = rq;
456 456
457 return BLKPREP_OK; 457 return BLKPREP_OK;
458 } 458 }
459 459
460 ide_devset_get(multcount, mult_count); 460 ide_devset_get(multcount, mult_count);
461 461
462 /* 462 /*
463 * This is tightly woven into the driver->do_special can not touch. 463 * This is tightly woven into the driver->do_special can not touch.
464 * DON'T do it again until a total personality rewrite is committed. 464 * DON'T do it again until a total personality rewrite is committed.
465 */ 465 */
466 static int set_multcount(ide_drive_t *drive, int arg) 466 static int set_multcount(ide_drive_t *drive, int arg)
467 { 467 {
468 struct request *rq; 468 struct request *rq;
469 int error; 469 int error;
470 470
471 if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) 471 if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
472 return -EINVAL; 472 return -EINVAL;
473 473
474 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) 474 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
475 return -EBUSY; 475 return -EBUSY;
476 476
477 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 477 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
478 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 478 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
479 479
480 drive->mult_req = arg; 480 drive->mult_req = arg;
481 drive->special_flags |= IDE_SFLAG_SET_MULTMODE; 481 drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
482 error = blk_execute_rq(drive->queue, NULL, rq, 0); 482 error = blk_execute_rq(drive->queue, NULL, rq, 0);
483 blk_put_request(rq); 483 blk_put_request(rq);
484 484
485 return (drive->mult_count == arg) ? 0 : -EIO; 485 return (drive->mult_count == arg) ? 0 : -EIO;
486 } 486 }
487 487
488 ide_devset_get_flag(nowerr, IDE_DFLAG_NOWERR); 488 ide_devset_get_flag(nowerr, IDE_DFLAG_NOWERR);
489 489
490 static int set_nowerr(ide_drive_t *drive, int arg) 490 static int set_nowerr(ide_drive_t *drive, int arg)
491 { 491 {
492 if (arg < 0 || arg > 1) 492 if (arg < 0 || arg > 1)
493 return -EINVAL; 493 return -EINVAL;
494 494
495 if (arg) 495 if (arg)
496 drive->dev_flags |= IDE_DFLAG_NOWERR; 496 drive->dev_flags |= IDE_DFLAG_NOWERR;
497 else 497 else
498 drive->dev_flags &= ~IDE_DFLAG_NOWERR; 498 drive->dev_flags &= ~IDE_DFLAG_NOWERR;
499 499
500 drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; 500 drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
501 501
502 return 0; 502 return 0;
503 } 503 }
504 504
505 static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) 505 static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect)
506 { 506 {
507 struct ide_cmd cmd; 507 struct ide_cmd cmd;
508 508
509 memset(&cmd, 0, sizeof(cmd)); 509 memset(&cmd, 0, sizeof(cmd));
510 cmd.tf.feature = feature; 510 cmd.tf.feature = feature;
511 cmd.tf.nsect = nsect; 511 cmd.tf.nsect = nsect;
512 cmd.tf.command = ATA_CMD_SET_FEATURES; 512 cmd.tf.command = ATA_CMD_SET_FEATURES;
513 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 513 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
514 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 514 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
515 515
516 return ide_no_data_taskfile(drive, &cmd); 516 return ide_no_data_taskfile(drive, &cmd);
517 } 517 }
518 518
519 static void update_ordered(ide_drive_t *drive) 519 static void update_flush(ide_drive_t *drive)
520 { 520 {
521 u16 *id = drive->id; 521 u16 *id = drive->id;
522 unsigned ordered = QUEUE_ORDERED_NONE; 522 unsigned flush = 0;
523 523
524 if (drive->dev_flags & IDE_DFLAG_WCACHE) { 524 if (drive->dev_flags & IDE_DFLAG_WCACHE) {
525 unsigned long long capacity; 525 unsigned long long capacity;
526 int barrier; 526 int barrier;
527 /* 527 /*
528 * We must avoid issuing commands a drive does not 528 * We must avoid issuing commands a drive does not
529 * understand or we may crash it. We check flush cache 529 * understand or we may crash it. We check flush cache
530 * is supported. We also check we have the LBA48 flush 530 * is supported. We also check we have the LBA48 flush
531 * cache if the drive capacity is too large. By this 531 * cache if the drive capacity is too large. By this
532 * time we have trimmed the drive capacity if LBA48 is 532 * time we have trimmed the drive capacity if LBA48 is
533 * not available so we don't need to recheck that. 533 * not available so we don't need to recheck that.
534 */ 534 */
535 capacity = ide_gd_capacity(drive); 535 capacity = ide_gd_capacity(drive);
536 barrier = ata_id_flush_enabled(id) && 536 barrier = ata_id_flush_enabled(id) &&
537 (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 && 537 (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
538 ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 || 538 ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
539 capacity <= (1ULL << 28) || 539 capacity <= (1ULL << 28) ||
540 ata_id_flush_ext_enabled(id)); 540 ata_id_flush_ext_enabled(id));
541 541
542 printk(KERN_INFO "%s: cache flushes %ssupported\n", 542 printk(KERN_INFO "%s: cache flushes %ssupported\n",
543 drive->name, barrier ? "" : "not "); 543 drive->name, barrier ? "" : "not ");
544 544
545 if (barrier) { 545 if (barrier) {
546 ordered = QUEUE_ORDERED_DRAIN_FLUSH; 546 flush = REQ_FLUSH;
547 blk_queue_prep_rq(drive->queue, idedisk_prep_fn); 547 blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
548 } 548 }
549 } else 549 }
550 ordered = QUEUE_ORDERED_DRAIN;
551 550
552 blk_queue_ordered(drive->queue, ordered); 551 blk_queue_flush(drive->queue, flush);
553 } 552 }
554 553
555 ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); 554 ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
556 555
557 static int set_wcache(ide_drive_t *drive, int arg) 556 static int set_wcache(ide_drive_t *drive, int arg)
558 { 557 {
559 int err = 1; 558 int err = 1;
560 559
561 if (arg < 0 || arg > 1) 560 if (arg < 0 || arg > 1)
562 return -EINVAL; 561 return -EINVAL;
563 562
564 if (ata_id_flush_enabled(drive->id)) { 563 if (ata_id_flush_enabled(drive->id)) {
565 err = ide_do_setfeature(drive, 564 err = ide_do_setfeature(drive,
566 arg ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF, 0); 565 arg ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF, 0);
567 if (err == 0) { 566 if (err == 0) {
568 if (arg) 567 if (arg)
569 drive->dev_flags |= IDE_DFLAG_WCACHE; 568 drive->dev_flags |= IDE_DFLAG_WCACHE;
570 else 569 else
571 drive->dev_flags &= ~IDE_DFLAG_WCACHE; 570 drive->dev_flags &= ~IDE_DFLAG_WCACHE;
572 } 571 }
573 } 572 }
574 573
575 update_ordered(drive); 574 update_flush(drive);
576 575
577 return err; 576 return err;
578 } 577 }
579 578
580 static int do_idedisk_flushcache(ide_drive_t *drive) 579 static int do_idedisk_flushcache(ide_drive_t *drive)
581 { 580 {
582 struct ide_cmd cmd; 581 struct ide_cmd cmd;
583 582
584 memset(&cmd, 0, sizeof(cmd)); 583 memset(&cmd, 0, sizeof(cmd));
585 if (ata_id_flush_ext_enabled(drive->id)) 584 if (ata_id_flush_ext_enabled(drive->id))
586 cmd.tf.command = ATA_CMD_FLUSH_EXT; 585 cmd.tf.command = ATA_CMD_FLUSH_EXT;
587 else 586 else
588 cmd.tf.command = ATA_CMD_FLUSH; 587 cmd.tf.command = ATA_CMD_FLUSH;
589 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 588 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
590 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 589 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
591 590
592 return ide_no_data_taskfile(drive, &cmd); 591 return ide_no_data_taskfile(drive, &cmd);
593 } 592 }
594 593
595 ide_devset_get(acoustic, acoustic); 594 ide_devset_get(acoustic, acoustic);
596 595
597 static int set_acoustic(ide_drive_t *drive, int arg) 596 static int set_acoustic(ide_drive_t *drive, int arg)
598 { 597 {
599 if (arg < 0 || arg > 254) 598 if (arg < 0 || arg > 254)
600 return -EINVAL; 599 return -EINVAL;
601 600
602 ide_do_setfeature(drive, 601 ide_do_setfeature(drive,
603 arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF, arg); 602 arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF, arg);
604 603
605 drive->acoustic = arg; 604 drive->acoustic = arg;
606 605
607 return 0; 606 return 0;
608 } 607 }
609 608
610 ide_devset_get_flag(addressing, IDE_DFLAG_LBA48); 609 ide_devset_get_flag(addressing, IDE_DFLAG_LBA48);
611 610
612 /* 611 /*
613 * drive->addressing: 612 * drive->addressing:
614 * 0: 28-bit 613 * 0: 28-bit
615 * 1: 48-bit 614 * 1: 48-bit
616 * 2: 48-bit capable doing 28-bit 615 * 2: 48-bit capable doing 28-bit
617 */ 616 */
618 static int set_addressing(ide_drive_t *drive, int arg) 617 static int set_addressing(ide_drive_t *drive, int arg)
619 { 618 {
620 if (arg < 0 || arg > 2) 619 if (arg < 0 || arg > 2)
621 return -EINVAL; 620 return -EINVAL;
622 621
623 if (arg && ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) || 622 if (arg && ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
624 ata_id_lba48_enabled(drive->id) == 0)) 623 ata_id_lba48_enabled(drive->id) == 0))
625 return -EIO; 624 return -EIO;
626 625
627 if (arg == 2) 626 if (arg == 2)
628 arg = 0; 627 arg = 0;
629 628
630 if (arg) 629 if (arg)
631 drive->dev_flags |= IDE_DFLAG_LBA48; 630 drive->dev_flags |= IDE_DFLAG_LBA48;
632 else 631 else
633 drive->dev_flags &= ~IDE_DFLAG_LBA48; 632 drive->dev_flags &= ~IDE_DFLAG_LBA48;
634 633
635 return 0; 634 return 0;
636 } 635 }
637 636
638 ide_ext_devset_rw(acoustic, acoustic); 637 ide_ext_devset_rw(acoustic, acoustic);
639 ide_ext_devset_rw(address, addressing); 638 ide_ext_devset_rw(address, addressing);
640 ide_ext_devset_rw(multcount, multcount); 639 ide_ext_devset_rw(multcount, multcount);
641 ide_ext_devset_rw(wcache, wcache); 640 ide_ext_devset_rw(wcache, wcache);
642 641
643 ide_ext_devset_rw_sync(nowerr, nowerr); 642 ide_ext_devset_rw_sync(nowerr, nowerr);
644 643
645 static int ide_disk_check(ide_drive_t *drive, const char *s) 644 static int ide_disk_check(ide_drive_t *drive, const char *s)
646 { 645 {
647 return 1; 646 return 1;
648 } 647 }
649 648
650 static void ide_disk_setup(ide_drive_t *drive) 649 static void ide_disk_setup(ide_drive_t *drive)
651 { 650 {
652 struct ide_disk_obj *idkp = drive->driver_data; 651 struct ide_disk_obj *idkp = drive->driver_data;
653 struct request_queue *q = drive->queue; 652 struct request_queue *q = drive->queue;
654 ide_hwif_t *hwif = drive->hwif; 653 ide_hwif_t *hwif = drive->hwif;
655 u16 *id = drive->id; 654 u16 *id = drive->id;
656 char *m = (char *)&id[ATA_ID_PROD]; 655 char *m = (char *)&id[ATA_ID_PROD];
657 unsigned long long capacity; 656 unsigned long long capacity;
658 657
659 ide_proc_register_driver(drive, idkp->driver); 658 ide_proc_register_driver(drive, idkp->driver);
660 659
661 if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) 660 if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0)
662 return; 661 return;
663 662
664 if (drive->dev_flags & IDE_DFLAG_REMOVABLE) { 663 if (drive->dev_flags & IDE_DFLAG_REMOVABLE) {
665 /* 664 /*
666 * Removable disks (eg. SYQUEST); ignore 'WD' drives 665 * Removable disks (eg. SYQUEST); ignore 'WD' drives
667 */ 666 */
668 if (m[0] != 'W' || m[1] != 'D') 667 if (m[0] != 'W' || m[1] != 'D')
669 drive->dev_flags |= IDE_DFLAG_DOORLOCKING; 668 drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
670 } 669 }
671 670
672 (void)set_addressing(drive, 1); 671 (void)set_addressing(drive, 1);
673 672
674 if (drive->dev_flags & IDE_DFLAG_LBA48) { 673 if (drive->dev_flags & IDE_DFLAG_LBA48) {
675 int max_s = 2048; 674 int max_s = 2048;
676 675
677 if (max_s > hwif->rqsize) 676 if (max_s > hwif->rqsize)
678 max_s = hwif->rqsize; 677 max_s = hwif->rqsize;
679 678
680 blk_queue_max_hw_sectors(q, max_s); 679 blk_queue_max_hw_sectors(q, max_s);
681 } 680 }
682 681
683 printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name, 682 printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name,
684 queue_max_sectors(q) / 2); 683 queue_max_sectors(q) / 2);
685 684
686 if (ata_id_is_ssd(id)) 685 if (ata_id_is_ssd(id))
687 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 686 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
688 687
689 /* calculate drive capacity, and select LBA if possible */ 688 /* calculate drive capacity, and select LBA if possible */
690 ide_disk_get_capacity(drive); 689 ide_disk_get_capacity(drive);
691 690
692 /* 691 /*
693 * if possible, give fdisk access to more of the drive, 692 * if possible, give fdisk access to more of the drive,
694 * by correcting bios_cyls: 693 * by correcting bios_cyls:
695 */ 694 */
696 capacity = ide_gd_capacity(drive); 695 capacity = ide_gd_capacity(drive);
697 696
698 if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) { 697 if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
699 if (ata_id_lba48_enabled(drive->id)) { 698 if (ata_id_lba48_enabled(drive->id)) {
700 /* compatibility */ 699 /* compatibility */
701 drive->bios_sect = 63; 700 drive->bios_sect = 63;
702 drive->bios_head = 255; 701 drive->bios_head = 255;
703 } 702 }
704 703
705 if (drive->bios_sect && drive->bios_head) { 704 if (drive->bios_sect && drive->bios_head) {
706 unsigned int cap0 = capacity; /* truncate to 32 bits */ 705 unsigned int cap0 = capacity; /* truncate to 32 bits */
707 unsigned int cylsz, cyl; 706 unsigned int cylsz, cyl;
708 707
709 if (cap0 != capacity) 708 if (cap0 != capacity)
710 drive->bios_cyl = 65535; 709 drive->bios_cyl = 65535;
711 else { 710 else {
712 cylsz = drive->bios_sect * drive->bios_head; 711 cylsz = drive->bios_sect * drive->bios_head;
713 cyl = cap0 / cylsz; 712 cyl = cap0 / cylsz;
714 if (cyl > 65535) 713 if (cyl > 65535)
715 cyl = 65535; 714 cyl = 65535;
716 if (cyl > drive->bios_cyl) 715 if (cyl > drive->bios_cyl)
717 drive->bios_cyl = cyl; 716 drive->bios_cyl = cyl;
718 } 717 }
719 } 718 }
720 } 719 }
721 printk(KERN_INFO "%s: %llu sectors (%llu MB)", 720 printk(KERN_INFO "%s: %llu sectors (%llu MB)",
722 drive->name, capacity, sectors_to_MB(capacity)); 721 drive->name, capacity, sectors_to_MB(capacity));
723 722
724 /* Only print cache size when it was specified */ 723 /* Only print cache size when it was specified */
725 if (id[ATA_ID_BUF_SIZE]) 724 if (id[ATA_ID_BUF_SIZE])
726 printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2); 725 printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2);
727 726
728 printk(KERN_CONT ", CHS=%d/%d/%d\n", 727 printk(KERN_CONT ", CHS=%d/%d/%d\n",
729 drive->bios_cyl, drive->bios_head, drive->bios_sect); 728 drive->bios_cyl, drive->bios_head, drive->bios_sect);
730 729
731 /* write cache enabled? */ 730 /* write cache enabled? */
732 if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id)) 731 if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id))
733 drive->dev_flags |= IDE_DFLAG_WCACHE; 732 drive->dev_flags |= IDE_DFLAG_WCACHE;
734 733
735 set_wcache(drive, 1); 734 set_wcache(drive, 1);
736 735
737 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 && 736 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
738 (drive->head == 0 || drive->head > 16)) { 737 (drive->head == 0 || drive->head > 16)) {
739 printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n", 738 printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
740 drive->name, drive->head); 739 drive->name, drive->head);
741 drive->dev_flags &= ~IDE_DFLAG_ATTACH; 740 drive->dev_flags &= ~IDE_DFLAG_ATTACH;
742 } else 741 } else
743 drive->dev_flags |= IDE_DFLAG_ATTACH; 742 drive->dev_flags |= IDE_DFLAG_ATTACH;
744 } 743 }
745 744
746 static void ide_disk_flush(ide_drive_t *drive) 745 static void ide_disk_flush(ide_drive_t *drive)
747 { 746 {
748 if (ata_id_flush_enabled(drive->id) == 0 || 747 if (ata_id_flush_enabled(drive->id) == 0 ||
749 (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) 748 (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
750 return; 749 return;
751 750
752 if (do_idedisk_flushcache(drive)) 751 if (do_idedisk_flushcache(drive))
753 printk(KERN_INFO "%s: wcache flush failed!\n", drive->name); 752 printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
754 } 753 }
755 754
756 static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk) 755 static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
757 { 756 {
758 return 0; 757 return 0;
759 } 758 }
760 759
761 static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk, 760 static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
762 int on) 761 int on)
763 { 762 {
764 struct ide_cmd cmd; 763 struct ide_cmd cmd;
765 int ret; 764 int ret;
766 765
767 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) 766 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
768 return 0; 767 return 0;
769 768
770 memset(&cmd, 0, sizeof(cmd)); 769 memset(&cmd, 0, sizeof(cmd));
771 cmd.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; 770 cmd.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
772 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 771 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
773 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 772 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
774 773
775 ret = ide_no_data_taskfile(drive, &cmd); 774 ret = ide_no_data_taskfile(drive, &cmd);
776 775
777 if (ret) 776 if (ret)
778 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING; 777 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
779 778
780 return ret; 779 return ret;
781 } 780 }
782 781
783 const struct ide_disk_ops ide_ata_disk_ops = { 782 const struct ide_disk_ops ide_ata_disk_ops = {
784 .check = ide_disk_check, 783 .check = ide_disk_check,
785 .unlock_native_capacity = ide_disk_unlock_native_capacity, 784 .unlock_native_capacity = ide_disk_unlock_native_capacity,
786 .get_capacity = ide_disk_get_capacity, 785 .get_capacity = ide_disk_get_capacity,
787 .setup = ide_disk_setup, 786 .setup = ide_disk_setup,
788 .flush = ide_disk_flush, 787 .flush = ide_disk_flush,
789 .init_media = ide_disk_init_media, 788 .init_media = ide_disk_init_media,
790 .set_doorlock = ide_disk_set_doorlock, 789 .set_doorlock = ide_disk_set_doorlock,
791 .do_request = ide_do_rw_disk, 790 .do_request = ide_do_rw_disk,
792 .ioctl = ide_disk_ioctl, 791 .ioctl = ide_disk_ioctl,
793 }; 792 };
794 793
1 /* 1 /*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
7 7
8 #include "dm.h" 8 #include "dm.h"
9 #include "dm-uevent.h" 9 #include "dm-uevent.h"
10 10
11 #include <linux/init.h> 11 #include <linux/init.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/mutex.h> 13 #include <linux/mutex.h>
14 #include <linux/moduleparam.h> 14 #include <linux/moduleparam.h>
15 #include <linux/blkpg.h> 15 #include <linux/blkpg.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/buffer_head.h> 17 #include <linux/buffer_head.h>
18 #include <linux/smp_lock.h> 18 #include <linux/smp_lock.h>
19 #include <linux/mempool.h> 19 #include <linux/mempool.h>
20 #include <linux/slab.h> 20 #include <linux/slab.h>
21 #include <linux/idr.h> 21 #include <linux/idr.h>
22 #include <linux/hdreg.h> 22 #include <linux/hdreg.h>
23 #include <linux/delay.h> 23 #include <linux/delay.h>
24 24
25 #include <trace/events/block.h> 25 #include <trace/events/block.h>
26 26
27 #define DM_MSG_PREFIX "core" 27 #define DM_MSG_PREFIX "core"
28 28
29 /* 29 /*
30 * Cookies are numeric values sent with CHANGE and REMOVE 30 * Cookies are numeric values sent with CHANGE and REMOVE
31 * uevents while resuming, removing or renaming the device. 31 * uevents while resuming, removing or renaming the device.
32 */ 32 */
33 #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" 33 #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
34 #define DM_COOKIE_LENGTH 24 34 #define DM_COOKIE_LENGTH 24
35 35
36 static const char *_name = DM_NAME; 36 static const char *_name = DM_NAME;
37 37
38 static unsigned int major = 0; 38 static unsigned int major = 0;
39 static unsigned int _major = 0; 39 static unsigned int _major = 0;
40 40
41 static DEFINE_SPINLOCK(_minor_lock); 41 static DEFINE_SPINLOCK(_minor_lock);
42 /* 42 /*
43 * For bio-based dm. 43 * For bio-based dm.
44 * One of these is allocated per bio. 44 * One of these is allocated per bio.
45 */ 45 */
46 struct dm_io { 46 struct dm_io {
47 struct mapped_device *md; 47 struct mapped_device *md;
48 int error; 48 int error;
49 atomic_t io_count; 49 atomic_t io_count;
50 struct bio *bio; 50 struct bio *bio;
51 unsigned long start_time; 51 unsigned long start_time;
52 spinlock_t endio_lock; 52 spinlock_t endio_lock;
53 }; 53 };
54 54
55 /* 55 /*
56 * For bio-based dm. 56 * For bio-based dm.
57 * One of these is allocated per target within a bio. Hopefully 57 * One of these is allocated per target within a bio. Hopefully
58 * this will be simplified out one day. 58 * this will be simplified out one day.
59 */ 59 */
60 struct dm_target_io { 60 struct dm_target_io {
61 struct dm_io *io; 61 struct dm_io *io;
62 struct dm_target *ti; 62 struct dm_target *ti;
63 union map_info info; 63 union map_info info;
64 }; 64 };
65 65
66 /* 66 /*
67 * For request-based dm. 67 * For request-based dm.
68 * One of these is allocated per request. 68 * One of these is allocated per request.
69 */ 69 */
70 struct dm_rq_target_io { 70 struct dm_rq_target_io {
71 struct mapped_device *md; 71 struct mapped_device *md;
72 struct dm_target *ti; 72 struct dm_target *ti;
73 struct request *orig, clone; 73 struct request *orig, clone;
74 int error; 74 int error;
75 union map_info info; 75 union map_info info;
76 }; 76 };
77 77
78 /* 78 /*
79 * For request-based dm. 79 * For request-based dm.
80 * One of these is allocated per bio. 80 * One of these is allocated per bio.
81 */ 81 */
82 struct dm_rq_clone_bio_info { 82 struct dm_rq_clone_bio_info {
83 struct bio *orig; 83 struct bio *orig;
84 struct dm_rq_target_io *tio; 84 struct dm_rq_target_io *tio;
85 }; 85 };
86 86
87 union map_info *dm_get_mapinfo(struct bio *bio) 87 union map_info *dm_get_mapinfo(struct bio *bio)
88 { 88 {
89 if (bio && bio->bi_private) 89 if (bio && bio->bi_private)
90 return &((struct dm_target_io *)bio->bi_private)->info; 90 return &((struct dm_target_io *)bio->bi_private)->info;
91 return NULL; 91 return NULL;
92 } 92 }
93 93
94 union map_info *dm_get_rq_mapinfo(struct request *rq) 94 union map_info *dm_get_rq_mapinfo(struct request *rq)
95 { 95 {
96 if (rq && rq->end_io_data) 96 if (rq && rq->end_io_data)
97 return &((struct dm_rq_target_io *)rq->end_io_data)->info; 97 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
98 return NULL; 98 return NULL;
99 } 99 }
100 EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); 100 EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
101 101
102 #define MINOR_ALLOCED ((void *)-1) 102 #define MINOR_ALLOCED ((void *)-1)
103 103
104 /* 104 /*
105 * Bits for the md->flags field. 105 * Bits for the md->flags field.
106 */ 106 */
107 #define DMF_BLOCK_IO_FOR_SUSPEND 0 107 #define DMF_BLOCK_IO_FOR_SUSPEND 0
108 #define DMF_SUSPENDED 1 108 #define DMF_SUSPENDED 1
109 #define DMF_FROZEN 2 109 #define DMF_FROZEN 2
110 #define DMF_FREEING 3 110 #define DMF_FREEING 3
111 #define DMF_DELETING 4 111 #define DMF_DELETING 4
112 #define DMF_NOFLUSH_SUSPENDING 5 112 #define DMF_NOFLUSH_SUSPENDING 5
113 #define DMF_QUEUE_IO_TO_THREAD 6 113 #define DMF_QUEUE_IO_TO_THREAD 6
114 114
115 /* 115 /*
116 * Work processed by per-device workqueue. 116 * Work processed by per-device workqueue.
117 */ 117 */
118 struct mapped_device { 118 struct mapped_device {
119 struct rw_semaphore io_lock; 119 struct rw_semaphore io_lock;
120 struct mutex suspend_lock; 120 struct mutex suspend_lock;
121 rwlock_t map_lock; 121 rwlock_t map_lock;
122 atomic_t holders; 122 atomic_t holders;
123 atomic_t open_count; 123 atomic_t open_count;
124 124
125 unsigned long flags; 125 unsigned long flags;
126 126
127 struct request_queue *queue; 127 struct request_queue *queue;
128 unsigned type; 128 unsigned type;
129 /* Protect queue and type against concurrent access. */ 129 /* Protect queue and type against concurrent access. */
130 struct mutex type_lock; 130 struct mutex type_lock;
131 131
132 struct gendisk *disk; 132 struct gendisk *disk;
133 char name[16]; 133 char name[16];
134 134
135 void *interface_ptr; 135 void *interface_ptr;
136 136
137 /* 137 /*
138 * A list of ios that arrived while we were suspended. 138 * A list of ios that arrived while we were suspended.
139 */ 139 */
140 atomic_t pending[2]; 140 atomic_t pending[2];
141 wait_queue_head_t wait; 141 wait_queue_head_t wait;
142 struct work_struct work; 142 struct work_struct work;
143 struct bio_list deferred; 143 struct bio_list deferred;
144 spinlock_t deferred_lock; 144 spinlock_t deferred_lock;
145 145
146 /* 146 /*
147 * An error from the barrier request currently being processed. 147 * An error from the barrier request currently being processed.
148 */ 148 */
149 int barrier_error; 149 int barrier_error;
150 150
151 /* 151 /*
152 * Protect barrier_error from concurrent endio processing 152 * Protect barrier_error from concurrent endio processing
153 * in request-based dm. 153 * in request-based dm.
154 */ 154 */
155 spinlock_t barrier_error_lock; 155 spinlock_t barrier_error_lock;
156 156
157 /* 157 /*
158 * Processing queue (flush/barriers) 158 * Processing queue (flush/barriers)
159 */ 159 */
160 struct workqueue_struct *wq; 160 struct workqueue_struct *wq;
161 struct work_struct barrier_work; 161 struct work_struct barrier_work;
162 162
163 /* A pointer to the currently processing pre/post flush request */ 163 /* A pointer to the currently processing pre/post flush request */
164 struct request *flush_request; 164 struct request *flush_request;
165 165
166 /* 166 /*
167 * The current mapping. 167 * The current mapping.
168 */ 168 */
169 struct dm_table *map; 169 struct dm_table *map;
170 170
171 /* 171 /*
172 * io objects are allocated from here. 172 * io objects are allocated from here.
173 */ 173 */
174 mempool_t *io_pool; 174 mempool_t *io_pool;
175 mempool_t *tio_pool; 175 mempool_t *tio_pool;
176 176
177 struct bio_set *bs; 177 struct bio_set *bs;
178 178
179 /* 179 /*
180 * Event handling. 180 * Event handling.
181 */ 181 */
182 atomic_t event_nr; 182 atomic_t event_nr;
183 wait_queue_head_t eventq; 183 wait_queue_head_t eventq;
184 atomic_t uevent_seq; 184 atomic_t uevent_seq;
185 struct list_head uevent_list; 185 struct list_head uevent_list;
186 spinlock_t uevent_lock; /* Protect access to uevent_list */ 186 spinlock_t uevent_lock; /* Protect access to uevent_list */
187 187
188 /* 188 /*
189 * freeze/thaw support require holding onto a super block 189 * freeze/thaw support require holding onto a super block
190 */ 190 */
191 struct super_block *frozen_sb; 191 struct super_block *frozen_sb;
192 struct block_device *bdev; 192 struct block_device *bdev;
193 193
194 /* forced geometry settings */ 194 /* forced geometry settings */
195 struct hd_geometry geometry; 195 struct hd_geometry geometry;
196 196
197 /* For saving the address of __make_request for request based dm */ 197 /* For saving the address of __make_request for request based dm */
198 make_request_fn *saved_make_request_fn; 198 make_request_fn *saved_make_request_fn;
199 199
200 /* sysfs handle */ 200 /* sysfs handle */
201 struct kobject kobj; 201 struct kobject kobj;
202 202
203 /* zero-length barrier that will be cloned and submitted to targets */ 203 /* zero-length barrier that will be cloned and submitted to targets */
204 struct bio barrier_bio; 204 struct bio barrier_bio;
205 }; 205 };
206 206
207 /* 207 /*
208 * For mempools pre-allocation at the table loading time. 208 * For mempools pre-allocation at the table loading time.
209 */ 209 */
210 struct dm_md_mempools { 210 struct dm_md_mempools {
211 mempool_t *io_pool; 211 mempool_t *io_pool;
212 mempool_t *tio_pool; 212 mempool_t *tio_pool;
213 struct bio_set *bs; 213 struct bio_set *bs;
214 }; 214 };
215 215
216 #define MIN_IOS 256 216 #define MIN_IOS 256
217 static struct kmem_cache *_io_cache; 217 static struct kmem_cache *_io_cache;
218 static struct kmem_cache *_tio_cache; 218 static struct kmem_cache *_tio_cache;
219 static struct kmem_cache *_rq_tio_cache; 219 static struct kmem_cache *_rq_tio_cache;
220 static struct kmem_cache *_rq_bio_info_cache; 220 static struct kmem_cache *_rq_bio_info_cache;
221 221
222 static int __init local_init(void) 222 static int __init local_init(void)
223 { 223 {
224 int r = -ENOMEM; 224 int r = -ENOMEM;
225 225
226 /* allocate a slab for the dm_ios */ 226 /* allocate a slab for the dm_ios */
227 _io_cache = KMEM_CACHE(dm_io, 0); 227 _io_cache = KMEM_CACHE(dm_io, 0);
228 if (!_io_cache) 228 if (!_io_cache)
229 return r; 229 return r;
230 230
231 /* allocate a slab for the target ios */ 231 /* allocate a slab for the target ios */
232 _tio_cache = KMEM_CACHE(dm_target_io, 0); 232 _tio_cache = KMEM_CACHE(dm_target_io, 0);
233 if (!_tio_cache) 233 if (!_tio_cache)
234 goto out_free_io_cache; 234 goto out_free_io_cache;
235 235
236 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); 236 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
237 if (!_rq_tio_cache) 237 if (!_rq_tio_cache)
238 goto out_free_tio_cache; 238 goto out_free_tio_cache;
239 239
240 _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); 240 _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
241 if (!_rq_bio_info_cache) 241 if (!_rq_bio_info_cache)
242 goto out_free_rq_tio_cache; 242 goto out_free_rq_tio_cache;
243 243
244 r = dm_uevent_init(); 244 r = dm_uevent_init();
245 if (r) 245 if (r)
246 goto out_free_rq_bio_info_cache; 246 goto out_free_rq_bio_info_cache;
247 247
248 _major = major; 248 _major = major;
249 r = register_blkdev(_major, _name); 249 r = register_blkdev(_major, _name);
250 if (r < 0) 250 if (r < 0)
251 goto out_uevent_exit; 251 goto out_uevent_exit;
252 252
253 if (!_major) 253 if (!_major)
254 _major = r; 254 _major = r;
255 255
256 return 0; 256 return 0;
257 257
258 out_uevent_exit: 258 out_uevent_exit:
259 dm_uevent_exit(); 259 dm_uevent_exit();
260 out_free_rq_bio_info_cache: 260 out_free_rq_bio_info_cache:
261 kmem_cache_destroy(_rq_bio_info_cache); 261 kmem_cache_destroy(_rq_bio_info_cache);
262 out_free_rq_tio_cache: 262 out_free_rq_tio_cache:
263 kmem_cache_destroy(_rq_tio_cache); 263 kmem_cache_destroy(_rq_tio_cache);
264 out_free_tio_cache: 264 out_free_tio_cache:
265 kmem_cache_destroy(_tio_cache); 265 kmem_cache_destroy(_tio_cache);
266 out_free_io_cache: 266 out_free_io_cache:
267 kmem_cache_destroy(_io_cache); 267 kmem_cache_destroy(_io_cache);
268 268
269 return r; 269 return r;
270 } 270 }
271 271
272 static void local_exit(void) 272 static void local_exit(void)
273 { 273 {
274 kmem_cache_destroy(_rq_bio_info_cache); 274 kmem_cache_destroy(_rq_bio_info_cache);
275 kmem_cache_destroy(_rq_tio_cache); 275 kmem_cache_destroy(_rq_tio_cache);
276 kmem_cache_destroy(_tio_cache); 276 kmem_cache_destroy(_tio_cache);
277 kmem_cache_destroy(_io_cache); 277 kmem_cache_destroy(_io_cache);
278 unregister_blkdev(_major, _name); 278 unregister_blkdev(_major, _name);
279 dm_uevent_exit(); 279 dm_uevent_exit();
280 280
281 _major = 0; 281 _major = 0;
282 282
283 DMINFO("cleaned up"); 283 DMINFO("cleaned up");
284 } 284 }
285 285
286 static int (*_inits[])(void) __initdata = { 286 static int (*_inits[])(void) __initdata = {
287 local_init, 287 local_init,
288 dm_target_init, 288 dm_target_init,
289 dm_linear_init, 289 dm_linear_init,
290 dm_stripe_init, 290 dm_stripe_init,
291 dm_io_init, 291 dm_io_init,
292 dm_kcopyd_init, 292 dm_kcopyd_init,
293 dm_interface_init, 293 dm_interface_init,
294 }; 294 };
295 295
296 static void (*_exits[])(void) = { 296 static void (*_exits[])(void) = {
297 local_exit, 297 local_exit,
298 dm_target_exit, 298 dm_target_exit,
299 dm_linear_exit, 299 dm_linear_exit,
300 dm_stripe_exit, 300 dm_stripe_exit,
301 dm_io_exit, 301 dm_io_exit,
302 dm_kcopyd_exit, 302 dm_kcopyd_exit,
303 dm_interface_exit, 303 dm_interface_exit,
304 }; 304 };
305 305
306 static int __init dm_init(void) 306 static int __init dm_init(void)
307 { 307 {
308 const int count = ARRAY_SIZE(_inits); 308 const int count = ARRAY_SIZE(_inits);
309 309
310 int r, i; 310 int r, i;
311 311
312 for (i = 0; i < count; i++) { 312 for (i = 0; i < count; i++) {
313 r = _inits[i](); 313 r = _inits[i]();
314 if (r) 314 if (r)
315 goto bad; 315 goto bad;
316 } 316 }
317 317
318 return 0; 318 return 0;
319 319
320 bad: 320 bad:
321 while (i--) 321 while (i--)
322 _exits[i](); 322 _exits[i]();
323 323
324 return r; 324 return r;
325 } 325 }
326 326
327 static void __exit dm_exit(void) 327 static void __exit dm_exit(void)
328 { 328 {
329 int i = ARRAY_SIZE(_exits); 329 int i = ARRAY_SIZE(_exits);
330 330
331 while (i--) 331 while (i--)
332 _exits[i](); 332 _exits[i]();
333 } 333 }
334 334
335 /* 335 /*
336 * Block device functions 336 * Block device functions
337 */ 337 */
338 int dm_deleting_md(struct mapped_device *md) 338 int dm_deleting_md(struct mapped_device *md)
339 { 339 {
340 return test_bit(DMF_DELETING, &md->flags); 340 return test_bit(DMF_DELETING, &md->flags);
341 } 341 }
342 342
343 static int dm_blk_open(struct block_device *bdev, fmode_t mode) 343 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
344 { 344 {
345 struct mapped_device *md; 345 struct mapped_device *md;
346 346
347 lock_kernel(); 347 lock_kernel();
348 spin_lock(&_minor_lock); 348 spin_lock(&_minor_lock);
349 349
350 md = bdev->bd_disk->private_data; 350 md = bdev->bd_disk->private_data;
351 if (!md) 351 if (!md)
352 goto out; 352 goto out;
353 353
354 if (test_bit(DMF_FREEING, &md->flags) || 354 if (test_bit(DMF_FREEING, &md->flags) ||
355 dm_deleting_md(md)) { 355 dm_deleting_md(md)) {
356 md = NULL; 356 md = NULL;
357 goto out; 357 goto out;
358 } 358 }
359 359
360 dm_get(md); 360 dm_get(md);
361 atomic_inc(&md->open_count); 361 atomic_inc(&md->open_count);
362 362
363 out: 363 out:
364 spin_unlock(&_minor_lock); 364 spin_unlock(&_minor_lock);
365 unlock_kernel(); 365 unlock_kernel();
366 366
367 return md ? 0 : -ENXIO; 367 return md ? 0 : -ENXIO;
368 } 368 }
369 369
370 static int dm_blk_close(struct gendisk *disk, fmode_t mode) 370 static int dm_blk_close(struct gendisk *disk, fmode_t mode)
371 { 371 {
372 struct mapped_device *md = disk->private_data; 372 struct mapped_device *md = disk->private_data;
373 373
374 lock_kernel(); 374 lock_kernel();
375 atomic_dec(&md->open_count); 375 atomic_dec(&md->open_count);
376 dm_put(md); 376 dm_put(md);
377 unlock_kernel(); 377 unlock_kernel();
378 378
379 return 0; 379 return 0;
380 } 380 }
381 381
382 int dm_open_count(struct mapped_device *md) 382 int dm_open_count(struct mapped_device *md)
383 { 383 {
384 return atomic_read(&md->open_count); 384 return atomic_read(&md->open_count);
385 } 385 }
386 386
387 /* 387 /*
388 * Guarantees nothing is using the device before it's deleted. 388 * Guarantees nothing is using the device before it's deleted.
389 */ 389 */
390 int dm_lock_for_deletion(struct mapped_device *md) 390 int dm_lock_for_deletion(struct mapped_device *md)
391 { 391 {
392 int r = 0; 392 int r = 0;
393 393
394 spin_lock(&_minor_lock); 394 spin_lock(&_minor_lock);
395 395
396 if (dm_open_count(md)) 396 if (dm_open_count(md))
397 r = -EBUSY; 397 r = -EBUSY;
398 else 398 else
399 set_bit(DMF_DELETING, &md->flags); 399 set_bit(DMF_DELETING, &md->flags);
400 400
401 spin_unlock(&_minor_lock); 401 spin_unlock(&_minor_lock);
402 402
403 return r; 403 return r;
404 } 404 }
405 405
406 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 406 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
407 { 407 {
408 struct mapped_device *md = bdev->bd_disk->private_data; 408 struct mapped_device *md = bdev->bd_disk->private_data;
409 409
410 return dm_get_geometry(md, geo); 410 return dm_get_geometry(md, geo);
411 } 411 }
412 412
413 static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, 413 static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
414 unsigned int cmd, unsigned long arg) 414 unsigned int cmd, unsigned long arg)
415 { 415 {
416 struct mapped_device *md = bdev->bd_disk->private_data; 416 struct mapped_device *md = bdev->bd_disk->private_data;
417 struct dm_table *map = dm_get_live_table(md); 417 struct dm_table *map = dm_get_live_table(md);
418 struct dm_target *tgt; 418 struct dm_target *tgt;
419 int r = -ENOTTY; 419 int r = -ENOTTY;
420 420
421 if (!map || !dm_table_get_size(map)) 421 if (!map || !dm_table_get_size(map))
422 goto out; 422 goto out;
423 423
424 /* We only support devices that have a single target */ 424 /* We only support devices that have a single target */
425 if (dm_table_get_num_targets(map) != 1) 425 if (dm_table_get_num_targets(map) != 1)
426 goto out; 426 goto out;
427 427
428 tgt = dm_table_get_target(map, 0); 428 tgt = dm_table_get_target(map, 0);
429 429
430 if (dm_suspended_md(md)) { 430 if (dm_suspended_md(md)) {
431 r = -EAGAIN; 431 r = -EAGAIN;
432 goto out; 432 goto out;
433 } 433 }
434 434
435 if (tgt->type->ioctl) 435 if (tgt->type->ioctl)
436 r = tgt->type->ioctl(tgt, cmd, arg); 436 r = tgt->type->ioctl(tgt, cmd, arg);
437 437
438 out: 438 out:
439 dm_table_put(map); 439 dm_table_put(map);
440 440
441 return r; 441 return r;
442 } 442 }
443 443
444 static struct dm_io *alloc_io(struct mapped_device *md) 444 static struct dm_io *alloc_io(struct mapped_device *md)
445 { 445 {
446 return mempool_alloc(md->io_pool, GFP_NOIO); 446 return mempool_alloc(md->io_pool, GFP_NOIO);
447 } 447 }
448 448
449 static void free_io(struct mapped_device *md, struct dm_io *io) 449 static void free_io(struct mapped_device *md, struct dm_io *io)
450 { 450 {
451 mempool_free(io, md->io_pool); 451 mempool_free(io, md->io_pool);
452 } 452 }
453 453
454 static void free_tio(struct mapped_device *md, struct dm_target_io *tio) 454 static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
455 { 455 {
456 mempool_free(tio, md->tio_pool); 456 mempool_free(tio, md->tio_pool);
457 } 457 }
458 458
459 static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, 459 static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
460 gfp_t gfp_mask) 460 gfp_t gfp_mask)
461 { 461 {
462 return mempool_alloc(md->tio_pool, gfp_mask); 462 return mempool_alloc(md->tio_pool, gfp_mask);
463 } 463 }
464 464
465 static void free_rq_tio(struct dm_rq_target_io *tio) 465 static void free_rq_tio(struct dm_rq_target_io *tio)
466 { 466 {
467 mempool_free(tio, tio->md->tio_pool); 467 mempool_free(tio, tio->md->tio_pool);
468 } 468 }
469 469
470 static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) 470 static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
471 { 471 {
472 return mempool_alloc(md->io_pool, GFP_ATOMIC); 472 return mempool_alloc(md->io_pool, GFP_ATOMIC);
473 } 473 }
474 474
475 static void free_bio_info(struct dm_rq_clone_bio_info *info) 475 static void free_bio_info(struct dm_rq_clone_bio_info *info)
476 { 476 {
477 mempool_free(info, info->tio->md->io_pool); 477 mempool_free(info, info->tio->md->io_pool);
478 } 478 }
479 479
480 static int md_in_flight(struct mapped_device *md) 480 static int md_in_flight(struct mapped_device *md)
481 { 481 {
482 return atomic_read(&md->pending[READ]) + 482 return atomic_read(&md->pending[READ]) +
483 atomic_read(&md->pending[WRITE]); 483 atomic_read(&md->pending[WRITE]);
484 } 484 }
485 485
486 static void start_io_acct(struct dm_io *io) 486 static void start_io_acct(struct dm_io *io)
487 { 487 {
488 struct mapped_device *md = io->md; 488 struct mapped_device *md = io->md;
489 int cpu; 489 int cpu;
490 int rw = bio_data_dir(io->bio); 490 int rw = bio_data_dir(io->bio);
491 491
492 io->start_time = jiffies; 492 io->start_time = jiffies;
493 493
494 cpu = part_stat_lock(); 494 cpu = part_stat_lock();
495 part_round_stats(cpu, &dm_disk(md)->part0); 495 part_round_stats(cpu, &dm_disk(md)->part0);
496 part_stat_unlock(); 496 part_stat_unlock();
497 dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); 497 dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
498 } 498 }
499 499
500 static void end_io_acct(struct dm_io *io) 500 static void end_io_acct(struct dm_io *io)
501 { 501 {
502 struct mapped_device *md = io->md; 502 struct mapped_device *md = io->md;
503 struct bio *bio = io->bio; 503 struct bio *bio = io->bio;
504 unsigned long duration = jiffies - io->start_time; 504 unsigned long duration = jiffies - io->start_time;
505 int pending, cpu; 505 int pending, cpu;
506 int rw = bio_data_dir(bio); 506 int rw = bio_data_dir(bio);
507 507
508 cpu = part_stat_lock(); 508 cpu = part_stat_lock();
509 part_round_stats(cpu, &dm_disk(md)->part0); 509 part_round_stats(cpu, &dm_disk(md)->part0);
510 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); 510 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
511 part_stat_unlock(); 511 part_stat_unlock();
512 512
513 /* 513 /*
514 * After this is decremented the bio must not be touched if it is 514 * After this is decremented the bio must not be touched if it is
515 * a barrier. 515 * a barrier.
516 */ 516 */
517 dm_disk(md)->part0.in_flight[rw] = pending = 517 dm_disk(md)->part0.in_flight[rw] = pending =
518 atomic_dec_return(&md->pending[rw]); 518 atomic_dec_return(&md->pending[rw]);
519 pending += atomic_read(&md->pending[rw^0x1]); 519 pending += atomic_read(&md->pending[rw^0x1]);
520 520
521 /* nudge anyone waiting on suspend queue */ 521 /* nudge anyone waiting on suspend queue */
522 if (!pending) 522 if (!pending)
523 wake_up(&md->wait); 523 wake_up(&md->wait);
524 } 524 }
525 525
526 /* 526 /*
527 * Add the bio to the list of deferred io. 527 * Add the bio to the list of deferred io.
528 */ 528 */
529 static void queue_io(struct mapped_device *md, struct bio *bio) 529 static void queue_io(struct mapped_device *md, struct bio *bio)
530 { 530 {
531 down_write(&md->io_lock); 531 down_write(&md->io_lock);
532 532
533 spin_lock_irq(&md->deferred_lock); 533 spin_lock_irq(&md->deferred_lock);
534 bio_list_add(&md->deferred, bio); 534 bio_list_add(&md->deferred, bio);
535 spin_unlock_irq(&md->deferred_lock); 535 spin_unlock_irq(&md->deferred_lock);
536 536
537 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) 537 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
538 queue_work(md->wq, &md->work); 538 queue_work(md->wq, &md->work);
539 539
540 up_write(&md->io_lock); 540 up_write(&md->io_lock);
541 } 541 }
542 542
543 /* 543 /*
544 * Everyone (including functions in this file), should use this 544 * Everyone (including functions in this file), should use this
545 * function to access the md->map field, and make sure they call 545 * function to access the md->map field, and make sure they call
546 * dm_table_put() when finished. 546 * dm_table_put() when finished.
547 */ 547 */
548 struct dm_table *dm_get_live_table(struct mapped_device *md) 548 struct dm_table *dm_get_live_table(struct mapped_device *md)
549 { 549 {
550 struct dm_table *t; 550 struct dm_table *t;
551 unsigned long flags; 551 unsigned long flags;
552 552
553 read_lock_irqsave(&md->map_lock, flags); 553 read_lock_irqsave(&md->map_lock, flags);
554 t = md->map; 554 t = md->map;
555 if (t) 555 if (t)
556 dm_table_get(t); 556 dm_table_get(t);
557 read_unlock_irqrestore(&md->map_lock, flags); 557 read_unlock_irqrestore(&md->map_lock, flags);
558 558
559 return t; 559 return t;
560 } 560 }
561 561
562 /* 562 /*
563 * Get the geometry associated with a dm device 563 * Get the geometry associated with a dm device
564 */ 564 */
565 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 565 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
566 { 566 {
567 *geo = md->geometry; 567 *geo = md->geometry;
568 568
569 return 0; 569 return 0;
570 } 570 }
571 571
572 /* 572 /*
573 * Set the geometry of a device. 573 * Set the geometry of a device.
574 */ 574 */
575 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 575 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
576 { 576 {
577 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 577 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
578 578
579 if (geo->start > sz) { 579 if (geo->start > sz) {
580 DMWARN("Start sector is beyond the geometry limits."); 580 DMWARN("Start sector is beyond the geometry limits.");
581 return -EINVAL; 581 return -EINVAL;
582 } 582 }
583 583
584 md->geometry = *geo; 584 md->geometry = *geo;
585 585
586 return 0; 586 return 0;
587 } 587 }
588 588
589 /*----------------------------------------------------------------- 589 /*-----------------------------------------------------------------
590 * CRUD START: 590 * CRUD START:
591 * A more elegant soln is in the works that uses the queue 591 * A more elegant soln is in the works that uses the queue
592 * merge fn, unfortunately there are a couple of changes to 592 * merge fn, unfortunately there are a couple of changes to
593 * the block layer that I want to make for this. So in the 593 * the block layer that I want to make for this. So in the
594 * interests of getting something for people to use I give 594 * interests of getting something for people to use I give
595 * you this clearly demarcated crap. 595 * you this clearly demarcated crap.
596 *---------------------------------------------------------------*/ 596 *---------------------------------------------------------------*/
597 597
598 static int __noflush_suspending(struct mapped_device *md) 598 static int __noflush_suspending(struct mapped_device *md)
599 { 599 {
600 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 600 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
601 } 601 }
602 602
603 /* 603 /*
604 * Decrements the number of outstanding ios that a bio has been 604 * Decrements the number of outstanding ios that a bio has been
605 * cloned into, completing the original io if necc. 605 * cloned into, completing the original io if necc.
606 */ 606 */
607 static void dec_pending(struct dm_io *io, int error) 607 static void dec_pending(struct dm_io *io, int error)
608 { 608 {
609 unsigned long flags; 609 unsigned long flags;
610 int io_error; 610 int io_error;
611 struct bio *bio; 611 struct bio *bio;
612 struct mapped_device *md = io->md; 612 struct mapped_device *md = io->md;
613 613
614 /* Push-back supersedes any I/O errors */ 614 /* Push-back supersedes any I/O errors */
615 if (unlikely(error)) { 615 if (unlikely(error)) {
616 spin_lock_irqsave(&io->endio_lock, flags); 616 spin_lock_irqsave(&io->endio_lock, flags);
617 if (!(io->error > 0 && __noflush_suspending(md))) 617 if (!(io->error > 0 && __noflush_suspending(md)))
618 io->error = error; 618 io->error = error;
619 spin_unlock_irqrestore(&io->endio_lock, flags); 619 spin_unlock_irqrestore(&io->endio_lock, flags);
620 } 620 }
621 621
622 if (atomic_dec_and_test(&io->io_count)) { 622 if (atomic_dec_and_test(&io->io_count)) {
623 if (io->error == DM_ENDIO_REQUEUE) { 623 if (io->error == DM_ENDIO_REQUEUE) {
624 /* 624 /*
625 * Target requested pushing back the I/O. 625 * Target requested pushing back the I/O.
626 */ 626 */
627 spin_lock_irqsave(&md->deferred_lock, flags); 627 spin_lock_irqsave(&md->deferred_lock, flags);
628 if (__noflush_suspending(md)) { 628 if (__noflush_suspending(md)) {
629 if (!(io->bio->bi_rw & REQ_HARDBARRIER)) 629 if (!(io->bio->bi_rw & REQ_HARDBARRIER))
630 bio_list_add_head(&md->deferred, 630 bio_list_add_head(&md->deferred,
631 io->bio); 631 io->bio);
632 } else 632 } else
633 /* noflush suspend was interrupted. */ 633 /* noflush suspend was interrupted. */
634 io->error = -EIO; 634 io->error = -EIO;
635 spin_unlock_irqrestore(&md->deferred_lock, flags); 635 spin_unlock_irqrestore(&md->deferred_lock, flags);
636 } 636 }
637 637
638 io_error = io->error; 638 io_error = io->error;
639 bio = io->bio; 639 bio = io->bio;
640 640
641 if (bio->bi_rw & REQ_HARDBARRIER) { 641 if (bio->bi_rw & REQ_HARDBARRIER) {
642 /* 642 /*
643 * There can be just one barrier request so we use 643 * There can be just one barrier request so we use
644 * a per-device variable for error reporting. 644 * a per-device variable for error reporting.
645 * Note that you can't touch the bio after end_io_acct 645 * Note that you can't touch the bio after end_io_acct
646 * 646 *
647 * We ignore -EOPNOTSUPP for empty flush reported by 647 * We ignore -EOPNOTSUPP for empty flush reported by
648 * underlying devices. We assume that if the device 648 * underlying devices. We assume that if the device
649 * doesn't support empty barriers, it doesn't need 649 * doesn't support empty barriers, it doesn't need
650 * cache flushing commands. 650 * cache flushing commands.
651 */ 651 */
652 if (!md->barrier_error && 652 if (!md->barrier_error &&
653 !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) 653 !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
654 md->barrier_error = io_error; 654 md->barrier_error = io_error;
655 end_io_acct(io); 655 end_io_acct(io);
656 free_io(md, io); 656 free_io(md, io);
657 } else { 657 } else {
658 end_io_acct(io); 658 end_io_acct(io);
659 free_io(md, io); 659 free_io(md, io);
660 660
661 if (io_error != DM_ENDIO_REQUEUE) { 661 if (io_error != DM_ENDIO_REQUEUE) {
662 trace_block_bio_complete(md->queue, bio); 662 trace_block_bio_complete(md->queue, bio);
663 663
664 bio_endio(bio, io_error); 664 bio_endio(bio, io_error);
665 } 665 }
666 } 666 }
667 } 667 }
668 } 668 }
669 669
670 static void clone_endio(struct bio *bio, int error) 670 static void clone_endio(struct bio *bio, int error)
671 { 671 {
672 int r = 0; 672 int r = 0;
673 struct dm_target_io *tio = bio->bi_private; 673 struct dm_target_io *tio = bio->bi_private;
674 struct dm_io *io = tio->io; 674 struct dm_io *io = tio->io;
675 struct mapped_device *md = tio->io->md; 675 struct mapped_device *md = tio->io->md;
676 dm_endio_fn endio = tio->ti->type->end_io; 676 dm_endio_fn endio = tio->ti->type->end_io;
677 677
678 if (!bio_flagged(bio, BIO_UPTODATE) && !error) 678 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
679 error = -EIO; 679 error = -EIO;
680 680
681 if (endio) { 681 if (endio) {
682 r = endio(tio->ti, bio, error, &tio->info); 682 r = endio(tio->ti, bio, error, &tio->info);
683 if (r < 0 || r == DM_ENDIO_REQUEUE) 683 if (r < 0 || r == DM_ENDIO_REQUEUE)
684 /* 684 /*
685 * error and requeue request are handled 685 * error and requeue request are handled
686 * in dec_pending(). 686 * in dec_pending().
687 */ 687 */
688 error = r; 688 error = r;
689 else if (r == DM_ENDIO_INCOMPLETE) 689 else if (r == DM_ENDIO_INCOMPLETE)
690 /* The target will handle the io */ 690 /* The target will handle the io */
691 return; 691 return;
692 else if (r) { 692 else if (r) {
693 DMWARN("unimplemented target endio return value: %d", r); 693 DMWARN("unimplemented target endio return value: %d", r);
694 BUG(); 694 BUG();
695 } 695 }
696 } 696 }
697 697
698 /* 698 /*
699 * Store md for cleanup instead of tio which is about to get freed. 699 * Store md for cleanup instead of tio which is about to get freed.
700 */ 700 */
701 bio->bi_private = md->bs; 701 bio->bi_private = md->bs;
702 702
703 free_tio(md, tio); 703 free_tio(md, tio);
704 bio_put(bio); 704 bio_put(bio);
705 dec_pending(io, error); 705 dec_pending(io, error);
706 } 706 }
707 707
708 /* 708 /*
709 * Partial completion handling for request-based dm 709 * Partial completion handling for request-based dm
710 */ 710 */
711 static void end_clone_bio(struct bio *clone, int error) 711 static void end_clone_bio(struct bio *clone, int error)
712 { 712 {
713 struct dm_rq_clone_bio_info *info = clone->bi_private; 713 struct dm_rq_clone_bio_info *info = clone->bi_private;
714 struct dm_rq_target_io *tio = info->tio; 714 struct dm_rq_target_io *tio = info->tio;
715 struct bio *bio = info->orig; 715 struct bio *bio = info->orig;
716 unsigned int nr_bytes = info->orig->bi_size; 716 unsigned int nr_bytes = info->orig->bi_size;
717 717
718 bio_put(clone); 718 bio_put(clone);
719 719
720 if (tio->error) 720 if (tio->error)
721 /* 721 /*
722 * An error has already been detected on the request. 722 * An error has already been detected on the request.
723 * Once error occurred, just let clone->end_io() handle 723 * Once error occurred, just let clone->end_io() handle
724 * the remainder. 724 * the remainder.
725 */ 725 */
726 return; 726 return;
727 else if (error) { 727 else if (error) {
728 /* 728 /*
729 * Don't notice the error to the upper layer yet. 729 * Don't notice the error to the upper layer yet.
730 * The error handling decision is made by the target driver, 730 * The error handling decision is made by the target driver,
731 * when the request is completed. 731 * when the request is completed.
732 */ 732 */
733 tio->error = error; 733 tio->error = error;
734 return; 734 return;
735 } 735 }
736 736
737 /* 737 /*
738 * I/O for the bio successfully completed. 738 * I/O for the bio successfully completed.
739 * Notice the data completion to the upper layer. 739 * Notice the data completion to the upper layer.
740 */ 740 */
741 741
742 /* 742 /*
743 * bios are processed from the head of the list. 743 * bios are processed from the head of the list.
744 * So the completing bio should always be rq->bio. 744 * So the completing bio should always be rq->bio.
745 * If it's not, something wrong is happening. 745 * If it's not, something wrong is happening.
746 */ 746 */
747 if (tio->orig->bio != bio) 747 if (tio->orig->bio != bio)
748 DMERR("bio completion is going in the middle of the request"); 748 DMERR("bio completion is going in the middle of the request");
749 749
750 /* 750 /*
751 * Update the original request. 751 * Update the original request.
752 * Do not use blk_end_request() here, because it may complete 752 * Do not use blk_end_request() here, because it may complete
753 * the original request before the clone, and break the ordering. 753 * the original request before the clone, and break the ordering.
754 */ 754 */
755 blk_update_request(tio->orig, 0, nr_bytes); 755 blk_update_request(tio->orig, 0, nr_bytes);
756 } 756 }
757 757
758 static void store_barrier_error(struct mapped_device *md, int error) 758 static void store_barrier_error(struct mapped_device *md, int error)
759 { 759 {
760 unsigned long flags; 760 unsigned long flags;
761 761
762 spin_lock_irqsave(&md->barrier_error_lock, flags); 762 spin_lock_irqsave(&md->barrier_error_lock, flags);
763 /* 763 /*
764 * Basically, the first error is taken, but: 764 * Basically, the first error is taken, but:
765 * -EOPNOTSUPP supersedes any I/O error. 765 * -EOPNOTSUPP supersedes any I/O error.
766 * Requeue request supersedes any I/O error but -EOPNOTSUPP. 766 * Requeue request supersedes any I/O error but -EOPNOTSUPP.
767 */ 767 */
768 if (!md->barrier_error || error == -EOPNOTSUPP || 768 if (!md->barrier_error || error == -EOPNOTSUPP ||
769 (md->barrier_error != -EOPNOTSUPP && 769 (md->barrier_error != -EOPNOTSUPP &&
770 error == DM_ENDIO_REQUEUE)) 770 error == DM_ENDIO_REQUEUE))
771 md->barrier_error = error; 771 md->barrier_error = error;
772 spin_unlock_irqrestore(&md->barrier_error_lock, flags); 772 spin_unlock_irqrestore(&md->barrier_error_lock, flags);
773 } 773 }
774 774
775 /* 775 /*
776 * Don't touch any member of the md after calling this function because 776 * Don't touch any member of the md after calling this function because
777 * the md may be freed in dm_put() at the end of this function. 777 * the md may be freed in dm_put() at the end of this function.
778 * Or do dm_get() before calling this function and dm_put() later. 778 * Or do dm_get() before calling this function and dm_put() later.
779 */ 779 */
780 static void rq_completed(struct mapped_device *md, int rw, int run_queue) 780 static void rq_completed(struct mapped_device *md, int rw, int run_queue)
781 { 781 {
782 atomic_dec(&md->pending[rw]); 782 atomic_dec(&md->pending[rw]);
783 783
784 /* nudge anyone waiting on suspend queue */ 784 /* nudge anyone waiting on suspend queue */
785 if (!md_in_flight(md)) 785 if (!md_in_flight(md))
786 wake_up(&md->wait); 786 wake_up(&md->wait);
787 787
788 if (run_queue) 788 if (run_queue)
789 blk_run_queue(md->queue); 789 blk_run_queue(md->queue);
790 790
791 /* 791 /*
792 * dm_put() must be at the end of this function. See the comment above 792 * dm_put() must be at the end of this function. See the comment above
793 */ 793 */
794 dm_put(md); 794 dm_put(md);
795 } 795 }
796 796
797 static void free_rq_clone(struct request *clone) 797 static void free_rq_clone(struct request *clone)
798 { 798 {
799 struct dm_rq_target_io *tio = clone->end_io_data; 799 struct dm_rq_target_io *tio = clone->end_io_data;
800 800
801 blk_rq_unprep_clone(clone); 801 blk_rq_unprep_clone(clone);
802 free_rq_tio(tio); 802 free_rq_tio(tio);
803 } 803 }
804 804
805 /* 805 /*
806 * Complete the clone and the original request. 806 * Complete the clone and the original request.
807 * Must be called without queue lock. 807 * Must be called without queue lock.
808 */ 808 */
809 static void dm_end_request(struct request *clone, int error) 809 static void dm_end_request(struct request *clone, int error)
810 { 810 {
811 int rw = rq_data_dir(clone); 811 int rw = rq_data_dir(clone);
812 int run_queue = 1; 812 int run_queue = 1;
813 bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; 813 bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
814 struct dm_rq_target_io *tio = clone->end_io_data; 814 struct dm_rq_target_io *tio = clone->end_io_data;
815 struct mapped_device *md = tio->md; 815 struct mapped_device *md = tio->md;
816 struct request *rq = tio->orig; 816 struct request *rq = tio->orig;
817 817
818 if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { 818 if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
819 rq->errors = clone->errors; 819 rq->errors = clone->errors;
820 rq->resid_len = clone->resid_len; 820 rq->resid_len = clone->resid_len;
821 821
822 if (rq->sense) 822 if (rq->sense)
823 /* 823 /*
824 * We are using the sense buffer of the original 824 * We are using the sense buffer of the original
825 * request. 825 * request.
826 * So setting the length of the sense data is enough. 826 * So setting the length of the sense data is enough.
827 */ 827 */
828 rq->sense_len = clone->sense_len; 828 rq->sense_len = clone->sense_len;
829 } 829 }
830 830
831 free_rq_clone(clone); 831 free_rq_clone(clone);
832 832
833 if (unlikely(is_barrier)) { 833 if (unlikely(is_barrier)) {
834 if (unlikely(error)) 834 if (unlikely(error))
835 store_barrier_error(md, error); 835 store_barrier_error(md, error);
836 run_queue = 0; 836 run_queue = 0;
837 } else 837 } else
838 blk_end_request_all(rq, error); 838 blk_end_request_all(rq, error);
839 839
840 rq_completed(md, rw, run_queue); 840 rq_completed(md, rw, run_queue);
841 } 841 }
842 842
843 static void dm_unprep_request(struct request *rq) 843 static void dm_unprep_request(struct request *rq)
844 { 844 {
845 struct request *clone = rq->special; 845 struct request *clone = rq->special;
846 846
847 rq->special = NULL; 847 rq->special = NULL;
848 rq->cmd_flags &= ~REQ_DONTPREP; 848 rq->cmd_flags &= ~REQ_DONTPREP;
849 849
850 free_rq_clone(clone); 850 free_rq_clone(clone);
851 } 851 }
852 852
853 /* 853 /*
854 * Requeue the original request of a clone. 854 * Requeue the original request of a clone.
855 */ 855 */
856 void dm_requeue_unmapped_request(struct request *clone) 856 void dm_requeue_unmapped_request(struct request *clone)
857 { 857 {
858 int rw = rq_data_dir(clone); 858 int rw = rq_data_dir(clone);
859 struct dm_rq_target_io *tio = clone->end_io_data; 859 struct dm_rq_target_io *tio = clone->end_io_data;
860 struct mapped_device *md = tio->md; 860 struct mapped_device *md = tio->md;
861 struct request *rq = tio->orig; 861 struct request *rq = tio->orig;
862 struct request_queue *q = rq->q; 862 struct request_queue *q = rq->q;
863 unsigned long flags; 863 unsigned long flags;
864 864
865 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { 865 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
866 /* 866 /*
867 * Barrier clones share an original request. 867 * Barrier clones share an original request.
868 * Leave it to dm_end_request(), which handles this special 868 * Leave it to dm_end_request(), which handles this special
869 * case. 869 * case.
870 */ 870 */
871 dm_end_request(clone, DM_ENDIO_REQUEUE); 871 dm_end_request(clone, DM_ENDIO_REQUEUE);
872 return; 872 return;
873 } 873 }
874 874
875 dm_unprep_request(rq); 875 dm_unprep_request(rq);
876 876
877 spin_lock_irqsave(q->queue_lock, flags); 877 spin_lock_irqsave(q->queue_lock, flags);
878 if (elv_queue_empty(q)) 878 if (elv_queue_empty(q))
879 blk_plug_device(q); 879 blk_plug_device(q);
880 blk_requeue_request(q, rq); 880 blk_requeue_request(q, rq);
881 spin_unlock_irqrestore(q->queue_lock, flags); 881 spin_unlock_irqrestore(q->queue_lock, flags);
882 882
883 rq_completed(md, rw, 0); 883 rq_completed(md, rw, 0);
884 } 884 }
885 EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); 885 EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
886 886
887 static void __stop_queue(struct request_queue *q) 887 static void __stop_queue(struct request_queue *q)
888 { 888 {
889 blk_stop_queue(q); 889 blk_stop_queue(q);
890 } 890 }
891 891
892 static void stop_queue(struct request_queue *q) 892 static void stop_queue(struct request_queue *q)
893 { 893 {
894 unsigned long flags; 894 unsigned long flags;
895 895
896 spin_lock_irqsave(q->queue_lock, flags); 896 spin_lock_irqsave(q->queue_lock, flags);
897 __stop_queue(q); 897 __stop_queue(q);
898 spin_unlock_irqrestore(q->queue_lock, flags); 898 spin_unlock_irqrestore(q->queue_lock, flags);
899 } 899 }
900 900
901 static void __start_queue(struct request_queue *q) 901 static void __start_queue(struct request_queue *q)
902 { 902 {
903 if (blk_queue_stopped(q)) 903 if (blk_queue_stopped(q))
904 blk_start_queue(q); 904 blk_start_queue(q);
905 } 905 }
906 906
907 static void start_queue(struct request_queue *q) 907 static void start_queue(struct request_queue *q)
908 { 908 {
909 unsigned long flags; 909 unsigned long flags;
910 910
911 spin_lock_irqsave(q->queue_lock, flags); 911 spin_lock_irqsave(q->queue_lock, flags);
912 __start_queue(q); 912 __start_queue(q);
913 spin_unlock_irqrestore(q->queue_lock, flags); 913 spin_unlock_irqrestore(q->queue_lock, flags);
914 } 914 }
915 915
916 static void dm_done(struct request *clone, int error, bool mapped) 916 static void dm_done(struct request *clone, int error, bool mapped)
917 { 917 {
918 int r = error; 918 int r = error;
919 struct dm_rq_target_io *tio = clone->end_io_data; 919 struct dm_rq_target_io *tio = clone->end_io_data;
920 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; 920 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
921 921
922 if (mapped && rq_end_io) 922 if (mapped && rq_end_io)
923 r = rq_end_io(tio->ti, clone, error, &tio->info); 923 r = rq_end_io(tio->ti, clone, error, &tio->info);
924 924
925 if (r <= 0) 925 if (r <= 0)
926 /* The target wants to complete the I/O */ 926 /* The target wants to complete the I/O */
927 dm_end_request(clone, r); 927 dm_end_request(clone, r);
928 else if (r == DM_ENDIO_INCOMPLETE) 928 else if (r == DM_ENDIO_INCOMPLETE)
929 /* The target will handle the I/O */ 929 /* The target will handle the I/O */
930 return; 930 return;
931 else if (r == DM_ENDIO_REQUEUE) 931 else if (r == DM_ENDIO_REQUEUE)
932 /* The target wants to requeue the I/O */ 932 /* The target wants to requeue the I/O */
933 dm_requeue_unmapped_request(clone); 933 dm_requeue_unmapped_request(clone);
934 else { 934 else {
935 DMWARN("unimplemented target endio return value: %d", r); 935 DMWARN("unimplemented target endio return value: %d", r);
936 BUG(); 936 BUG();
937 } 937 }
938 } 938 }
939 939
940 /* 940 /*
941 * Request completion handler for request-based dm 941 * Request completion handler for request-based dm
942 */ 942 */
943 static void dm_softirq_done(struct request *rq) 943 static void dm_softirq_done(struct request *rq)
944 { 944 {
945 bool mapped = true; 945 bool mapped = true;
946 struct request *clone = rq->completion_data; 946 struct request *clone = rq->completion_data;
947 struct dm_rq_target_io *tio = clone->end_io_data; 947 struct dm_rq_target_io *tio = clone->end_io_data;
948 948
949 if (rq->cmd_flags & REQ_FAILED) 949 if (rq->cmd_flags & REQ_FAILED)
950 mapped = false; 950 mapped = false;
951 951
952 dm_done(clone, tio->error, mapped); 952 dm_done(clone, tio->error, mapped);
953 } 953 }
954 954
955 /* 955 /*
956 * Complete the clone and the original request with the error status 956 * Complete the clone and the original request with the error status
957 * through softirq context. 957 * through softirq context.
958 */ 958 */
959 static void dm_complete_request(struct request *clone, int error) 959 static void dm_complete_request(struct request *clone, int error)
960 { 960 {
961 struct dm_rq_target_io *tio = clone->end_io_data; 961 struct dm_rq_target_io *tio = clone->end_io_data;
962 struct request *rq = tio->orig; 962 struct request *rq = tio->orig;
963 963
964 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { 964 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
965 /* 965 /*
966 * Barrier clones share an original request. So can't use 966 * Barrier clones share an original request. So can't use
967 * softirq_done with the original. 967 * softirq_done with the original.
968 * Pass the clone to dm_done() directly in this special case. 968 * Pass the clone to dm_done() directly in this special case.
969 * It is safe (even if clone->q->queue_lock is held here) 969 * It is safe (even if clone->q->queue_lock is held here)
970 * because there is no I/O dispatching during the completion 970 * because there is no I/O dispatching during the completion
971 * of barrier clone. 971 * of barrier clone.
972 */ 972 */
973 dm_done(clone, error, true); 973 dm_done(clone, error, true);
974 return; 974 return;
975 } 975 }
976 976
977 tio->error = error; 977 tio->error = error;
978 rq->completion_data = clone; 978 rq->completion_data = clone;
979 blk_complete_request(rq); 979 blk_complete_request(rq);
980 } 980 }
981 981
982 /* 982 /*
983 * Complete the not-mapped clone and the original request with the error status 983 * Complete the not-mapped clone and the original request with the error status
984 * through softirq context. 984 * through softirq context.
985 * Target's rq_end_io() function isn't called. 985 * Target's rq_end_io() function isn't called.
986 * This may be used when the target's map_rq() function fails. 986 * This may be used when the target's map_rq() function fails.
987 */ 987 */
988 void dm_kill_unmapped_request(struct request *clone, int error) 988 void dm_kill_unmapped_request(struct request *clone, int error)
989 { 989 {
990 struct dm_rq_target_io *tio = clone->end_io_data; 990 struct dm_rq_target_io *tio = clone->end_io_data;
991 struct request *rq = tio->orig; 991 struct request *rq = tio->orig;
992 992
993 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { 993 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
994 /* 994 /*
995 * Barrier clones share an original request. 995 * Barrier clones share an original request.
996 * Leave it to dm_end_request(), which handles this special 996 * Leave it to dm_end_request(), which handles this special
997 * case. 997 * case.
998 */ 998 */
999 BUG_ON(error > 0); 999 BUG_ON(error > 0);
1000 dm_end_request(clone, error); 1000 dm_end_request(clone, error);
1001 return; 1001 return;
1002 } 1002 }
1003 1003
1004 rq->cmd_flags |= REQ_FAILED; 1004 rq->cmd_flags |= REQ_FAILED;
1005 dm_complete_request(clone, error); 1005 dm_complete_request(clone, error);
1006 } 1006 }
1007 EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); 1007 EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
1008 1008
1009 /* 1009 /*
1010 * Called with the queue lock held 1010 * Called with the queue lock held
1011 */ 1011 */
1012 static void end_clone_request(struct request *clone, int error) 1012 static void end_clone_request(struct request *clone, int error)
1013 { 1013 {
1014 /* 1014 /*
1015 * For just cleaning up the information of the queue in which 1015 * For just cleaning up the information of the queue in which
1016 * the clone was dispatched. 1016 * the clone was dispatched.
1017 * The clone is *NOT* freed actually here because it is alloced from 1017 * The clone is *NOT* freed actually here because it is alloced from
1018 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. 1018 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
1019 */ 1019 */
1020 __blk_put_request(clone->q, clone); 1020 __blk_put_request(clone->q, clone);
1021 1021
1022 /* 1022 /*
1023 * Actual request completion is done in a softirq context which doesn't 1023 * Actual request completion is done in a softirq context which doesn't
1024 * hold the queue lock. Otherwise, deadlock could occur because: 1024 * hold the queue lock. Otherwise, deadlock could occur because:
1025 * - another request may be submitted by the upper level driver 1025 * - another request may be submitted by the upper level driver
1026 * of the stacking during the completion 1026 * of the stacking during the completion
1027 * - the submission which requires queue lock may be done 1027 * - the submission which requires queue lock may be done
1028 * against this queue 1028 * against this queue
1029 */ 1029 */
1030 dm_complete_request(clone, error); 1030 dm_complete_request(clone, error);
1031 } 1031 }
1032 1032
1033 /* 1033 /*
1034 * Return maximum size of I/O possible at the supplied sector up to the current 1034 * Return maximum size of I/O possible at the supplied sector up to the current
1035 * target boundary. 1035 * target boundary.
1036 */ 1036 */
1037 static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) 1037 static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
1038 { 1038 {
1039 sector_t target_offset = dm_target_offset(ti, sector); 1039 sector_t target_offset = dm_target_offset(ti, sector);
1040 1040
1041 return ti->len - target_offset; 1041 return ti->len - target_offset;
1042 } 1042 }
1043 1043
1044 static sector_t max_io_len(sector_t sector, struct dm_target *ti) 1044 static sector_t max_io_len(sector_t sector, struct dm_target *ti)
1045 { 1045 {
1046 sector_t len = max_io_len_target_boundary(sector, ti); 1046 sector_t len = max_io_len_target_boundary(sector, ti);
1047 1047
1048 /* 1048 /*
1049 * Does the target need to split even further ? 1049 * Does the target need to split even further ?
1050 */ 1050 */
1051 if (ti->split_io) { 1051 if (ti->split_io) {
1052 sector_t boundary; 1052 sector_t boundary;
1053 sector_t offset = dm_target_offset(ti, sector); 1053 sector_t offset = dm_target_offset(ti, sector);
1054 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) 1054 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
1055 - offset; 1055 - offset;
1056 if (len > boundary) 1056 if (len > boundary)
1057 len = boundary; 1057 len = boundary;
1058 } 1058 }
1059 1059
1060 return len; 1060 return len;
1061 } 1061 }
1062 1062
1063 static void __map_bio(struct dm_target *ti, struct bio *clone, 1063 static void __map_bio(struct dm_target *ti, struct bio *clone,
1064 struct dm_target_io *tio) 1064 struct dm_target_io *tio)
1065 { 1065 {
1066 int r; 1066 int r;
1067 sector_t sector; 1067 sector_t sector;
1068 struct mapped_device *md; 1068 struct mapped_device *md;
1069 1069
1070 clone->bi_end_io = clone_endio; 1070 clone->bi_end_io = clone_endio;
1071 clone->bi_private = tio; 1071 clone->bi_private = tio;
1072 1072
1073 /* 1073 /*
1074 * Map the clone. If r == 0 we don't need to do 1074 * Map the clone. If r == 0 we don't need to do
1075 * anything, the target has assumed ownership of 1075 * anything, the target has assumed ownership of
1076 * this io. 1076 * this io.
1077 */ 1077 */
1078 atomic_inc(&tio->io->io_count); 1078 atomic_inc(&tio->io->io_count);
1079 sector = clone->bi_sector; 1079 sector = clone->bi_sector;
1080 r = ti->type->map(ti, clone, &tio->info); 1080 r = ti->type->map(ti, clone, &tio->info);
1081 if (r == DM_MAPIO_REMAPPED) { 1081 if (r == DM_MAPIO_REMAPPED) {
1082 /* the bio has been remapped so dispatch it */ 1082 /* the bio has been remapped so dispatch it */
1083 1083
1084 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, 1084 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
1085 tio->io->bio->bi_bdev->bd_dev, sector); 1085 tio->io->bio->bi_bdev->bd_dev, sector);
1086 1086
1087 generic_make_request(clone); 1087 generic_make_request(clone);
1088 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 1088 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
1089 /* error the io and bail out, or requeue it if needed */ 1089 /* error the io and bail out, or requeue it if needed */
1090 md = tio->io->md; 1090 md = tio->io->md;
1091 dec_pending(tio->io, r); 1091 dec_pending(tio->io, r);
1092 /* 1092 /*
1093 * Store bio_set for cleanup. 1093 * Store bio_set for cleanup.
1094 */ 1094 */
1095 clone->bi_private = md->bs; 1095 clone->bi_private = md->bs;
1096 bio_put(clone); 1096 bio_put(clone);
1097 free_tio(md, tio); 1097 free_tio(md, tio);
1098 } else if (r) { 1098 } else if (r) {
1099 DMWARN("unimplemented target map return value: %d", r); 1099 DMWARN("unimplemented target map return value: %d", r);
1100 BUG(); 1100 BUG();
1101 } 1101 }
1102 } 1102 }
1103 1103
1104 struct clone_info { 1104 struct clone_info {
1105 struct mapped_device *md; 1105 struct mapped_device *md;
1106 struct dm_table *map; 1106 struct dm_table *map;
1107 struct bio *bio; 1107 struct bio *bio;
1108 struct dm_io *io; 1108 struct dm_io *io;
1109 sector_t sector; 1109 sector_t sector;
1110 sector_t sector_count; 1110 sector_t sector_count;
1111 unsigned short idx; 1111 unsigned short idx;
1112 }; 1112 };
1113 1113
1114 static void dm_bio_destructor(struct bio *bio) 1114 static void dm_bio_destructor(struct bio *bio)
1115 { 1115 {
1116 struct bio_set *bs = bio->bi_private; 1116 struct bio_set *bs = bio->bi_private;
1117 1117
1118 bio_free(bio, bs); 1118 bio_free(bio, bs);
1119 } 1119 }
1120 1120
1121 /* 1121 /*
1122 * Creates a little bio that is just does part of a bvec. 1122 * Creates a little bio that is just does part of a bvec.
1123 */ 1123 */
1124 static struct bio *split_bvec(struct bio *bio, sector_t sector, 1124 static struct bio *split_bvec(struct bio *bio, sector_t sector,
1125 unsigned short idx, unsigned int offset, 1125 unsigned short idx, unsigned int offset,
1126 unsigned int len, struct bio_set *bs) 1126 unsigned int len, struct bio_set *bs)
1127 { 1127 {
1128 struct bio *clone; 1128 struct bio *clone;
1129 struct bio_vec *bv = bio->bi_io_vec + idx; 1129 struct bio_vec *bv = bio->bi_io_vec + idx;
1130 1130
1131 clone = bio_alloc_bioset(GFP_NOIO, 1, bs); 1131 clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
1132 clone->bi_destructor = dm_bio_destructor; 1132 clone->bi_destructor = dm_bio_destructor;
1133 *clone->bi_io_vec = *bv; 1133 *clone->bi_io_vec = *bv;
1134 1134
1135 clone->bi_sector = sector; 1135 clone->bi_sector = sector;
1136 clone->bi_bdev = bio->bi_bdev; 1136 clone->bi_bdev = bio->bi_bdev;
1137 clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; 1137 clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
1138 clone->bi_vcnt = 1; 1138 clone->bi_vcnt = 1;
1139 clone->bi_size = to_bytes(len); 1139 clone->bi_size = to_bytes(len);
1140 clone->bi_io_vec->bv_offset = offset; 1140 clone->bi_io_vec->bv_offset = offset;
1141 clone->bi_io_vec->bv_len = clone->bi_size; 1141 clone->bi_io_vec->bv_len = clone->bi_size;
1142 clone->bi_flags |= 1 << BIO_CLONED; 1142 clone->bi_flags |= 1 << BIO_CLONED;
1143 1143
1144 if (bio_integrity(bio)) { 1144 if (bio_integrity(bio)) {
1145 bio_integrity_clone(clone, bio, GFP_NOIO, bs); 1145 bio_integrity_clone(clone, bio, GFP_NOIO, bs);
1146 bio_integrity_trim(clone, 1146 bio_integrity_trim(clone,
1147 bio_sector_offset(bio, idx, offset), len); 1147 bio_sector_offset(bio, idx, offset), len);
1148 } 1148 }
1149 1149
1150 return clone; 1150 return clone;
1151 } 1151 }
1152 1152
1153 /* 1153 /*
1154 * Creates a bio that consists of range of complete bvecs. 1154 * Creates a bio that consists of range of complete bvecs.
1155 */ 1155 */
1156 static struct bio *clone_bio(struct bio *bio, sector_t sector, 1156 static struct bio *clone_bio(struct bio *bio, sector_t sector,
1157 unsigned short idx, unsigned short bv_count, 1157 unsigned short idx, unsigned short bv_count,
1158 unsigned int len, struct bio_set *bs) 1158 unsigned int len, struct bio_set *bs)
1159 { 1159 {
1160 struct bio *clone; 1160 struct bio *clone;
1161 1161
1162 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 1162 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
1163 __bio_clone(clone, bio); 1163 __bio_clone(clone, bio);
1164 clone->bi_rw &= ~REQ_HARDBARRIER; 1164 clone->bi_rw &= ~REQ_HARDBARRIER;
1165 clone->bi_destructor = dm_bio_destructor; 1165 clone->bi_destructor = dm_bio_destructor;
1166 clone->bi_sector = sector; 1166 clone->bi_sector = sector;
1167 clone->bi_idx = idx; 1167 clone->bi_idx = idx;
1168 clone->bi_vcnt = idx + bv_count; 1168 clone->bi_vcnt = idx + bv_count;
1169 clone->bi_size = to_bytes(len); 1169 clone->bi_size = to_bytes(len);
1170 clone->bi_flags &= ~(1 << BIO_SEG_VALID); 1170 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
1171 1171
1172 if (bio_integrity(bio)) { 1172 if (bio_integrity(bio)) {
1173 bio_integrity_clone(clone, bio, GFP_NOIO, bs); 1173 bio_integrity_clone(clone, bio, GFP_NOIO, bs);
1174 1174
1175 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) 1175 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
1176 bio_integrity_trim(clone, 1176 bio_integrity_trim(clone,
1177 bio_sector_offset(bio, idx, 0), len); 1177 bio_sector_offset(bio, idx, 0), len);
1178 } 1178 }
1179 1179
1180 return clone; 1180 return clone;
1181 } 1181 }
1182 1182
1183 static struct dm_target_io *alloc_tio(struct clone_info *ci, 1183 static struct dm_target_io *alloc_tio(struct clone_info *ci,
1184 struct dm_target *ti) 1184 struct dm_target *ti)
1185 { 1185 {
1186 struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); 1186 struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO);
1187 1187
1188 tio->io = ci->io; 1188 tio->io = ci->io;
1189 tio->ti = ti; 1189 tio->ti = ti;
1190 memset(&tio->info, 0, sizeof(tio->info)); 1190 memset(&tio->info, 0, sizeof(tio->info));
1191 1191
1192 return tio; 1192 return tio;
1193 } 1193 }
1194 1194
1195 static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, 1195 static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
1196 unsigned request_nr, sector_t len) 1196 unsigned request_nr, sector_t len)
1197 { 1197 {
1198 struct dm_target_io *tio = alloc_tio(ci, ti); 1198 struct dm_target_io *tio = alloc_tio(ci, ti);
1199 struct bio *clone; 1199 struct bio *clone;
1200 1200
1201 tio->info.target_request_nr = request_nr; 1201 tio->info.target_request_nr = request_nr;
1202 1202
1203 /* 1203 /*
1204 * Discard requests require the bio's inline iovecs be initialized. 1204 * Discard requests require the bio's inline iovecs be initialized.
1205 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush 1205 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
1206 * and discard, so no need for concern about wasted bvec allocations. 1206 * and discard, so no need for concern about wasted bvec allocations.
1207 */ 1207 */
1208 clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); 1208 clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs);
1209 __bio_clone(clone, ci->bio); 1209 __bio_clone(clone, ci->bio);
1210 clone->bi_destructor = dm_bio_destructor; 1210 clone->bi_destructor = dm_bio_destructor;
1211 if (len) { 1211 if (len) {
1212 clone->bi_sector = ci->sector; 1212 clone->bi_sector = ci->sector;
1213 clone->bi_size = to_bytes(len); 1213 clone->bi_size = to_bytes(len);
1214 } 1214 }
1215 1215
1216 __map_bio(ti, clone, tio); 1216 __map_bio(ti, clone, tio);
1217 } 1217 }
1218 1218
1219 static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, 1219 static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
1220 unsigned num_requests, sector_t len) 1220 unsigned num_requests, sector_t len)
1221 { 1221 {
1222 unsigned request_nr; 1222 unsigned request_nr;
1223 1223
1224 for (request_nr = 0; request_nr < num_requests; request_nr++) 1224 for (request_nr = 0; request_nr < num_requests; request_nr++)
1225 __issue_target_request(ci, ti, request_nr, len); 1225 __issue_target_request(ci, ti, request_nr, len);
1226 } 1226 }
1227 1227
1228 static int __clone_and_map_empty_barrier(struct clone_info *ci) 1228 static int __clone_and_map_empty_barrier(struct clone_info *ci)
1229 { 1229 {
1230 unsigned target_nr = 0; 1230 unsigned target_nr = 0;
1231 struct dm_target *ti; 1231 struct dm_target *ti;
1232 1232
1233 while ((ti = dm_table_get_target(ci->map, target_nr++))) 1233 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1234 __issue_target_requests(ci, ti, ti->num_flush_requests, 0); 1234 __issue_target_requests(ci, ti, ti->num_flush_requests, 0);
1235 1235
1236 ci->sector_count = 0; 1236 ci->sector_count = 0;
1237 1237
1238 return 0; 1238 return 0;
1239 } 1239 }
1240 1240
1241 /* 1241 /*
1242 * Perform all io with a single clone. 1242 * Perform all io with a single clone.
1243 */ 1243 */
1244 static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) 1244 static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
1245 { 1245 {
1246 struct bio *clone, *bio = ci->bio; 1246 struct bio *clone, *bio = ci->bio;
1247 struct dm_target_io *tio; 1247 struct dm_target_io *tio;
1248 1248
1249 tio = alloc_tio(ci, ti); 1249 tio = alloc_tio(ci, ti);
1250 clone = clone_bio(bio, ci->sector, ci->idx, 1250 clone = clone_bio(bio, ci->sector, ci->idx,
1251 bio->bi_vcnt - ci->idx, ci->sector_count, 1251 bio->bi_vcnt - ci->idx, ci->sector_count,
1252 ci->md->bs); 1252 ci->md->bs);
1253 __map_bio(ti, clone, tio); 1253 __map_bio(ti, clone, tio);
1254 ci->sector_count = 0; 1254 ci->sector_count = 0;
1255 } 1255 }
1256 1256
1257 static int __clone_and_map_discard(struct clone_info *ci) 1257 static int __clone_and_map_discard(struct clone_info *ci)
1258 { 1258 {
1259 struct dm_target *ti; 1259 struct dm_target *ti;
1260 sector_t len; 1260 sector_t len;
1261 1261
1262 do { 1262 do {
1263 ti = dm_table_find_target(ci->map, ci->sector); 1263 ti = dm_table_find_target(ci->map, ci->sector);
1264 if (!dm_target_is_valid(ti)) 1264 if (!dm_target_is_valid(ti))
1265 return -EIO; 1265 return -EIO;
1266 1266
1267 /* 1267 /*
1268 * Even though the device advertised discard support, 1268 * Even though the device advertised discard support,
1269 * reconfiguration might have changed that since the 1269 * reconfiguration might have changed that since the
1270 * check was performed. 1270 * check was performed.
1271 */ 1271 */
1272 if (!ti->num_discard_requests) 1272 if (!ti->num_discard_requests)
1273 return -EOPNOTSUPP; 1273 return -EOPNOTSUPP;
1274 1274
1275 len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 1275 len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1276 1276
1277 __issue_target_requests(ci, ti, ti->num_discard_requests, len); 1277 __issue_target_requests(ci, ti, ti->num_discard_requests, len);
1278 1278
1279 ci->sector += len; 1279 ci->sector += len;
1280 } while (ci->sector_count -= len); 1280 } while (ci->sector_count -= len);
1281 1281
1282 return 0; 1282 return 0;
1283 } 1283 }
1284 1284
1285 static int __clone_and_map(struct clone_info *ci) 1285 static int __clone_and_map(struct clone_info *ci)
1286 { 1286 {
1287 struct bio *clone, *bio = ci->bio; 1287 struct bio *clone, *bio = ci->bio;
1288 struct dm_target *ti; 1288 struct dm_target *ti;
1289 sector_t len = 0, max; 1289 sector_t len = 0, max;
1290 struct dm_target_io *tio; 1290 struct dm_target_io *tio;
1291 1291
1292 if (unlikely(bio_empty_barrier(bio))) 1292 if (unlikely(bio_empty_barrier(bio)))
1293 return __clone_and_map_empty_barrier(ci); 1293 return __clone_and_map_empty_barrier(ci);
1294 1294
1295 if (unlikely(bio->bi_rw & REQ_DISCARD)) 1295 if (unlikely(bio->bi_rw & REQ_DISCARD))
1296 return __clone_and_map_discard(ci); 1296 return __clone_and_map_discard(ci);
1297 1297
1298 ti = dm_table_find_target(ci->map, ci->sector); 1298 ti = dm_table_find_target(ci->map, ci->sector);
1299 if (!dm_target_is_valid(ti)) 1299 if (!dm_target_is_valid(ti))
1300 return -EIO; 1300 return -EIO;
1301 1301
1302 max = max_io_len(ci->sector, ti); 1302 max = max_io_len(ci->sector, ti);
1303 1303
1304 if (ci->sector_count <= max) { 1304 if (ci->sector_count <= max) {
1305 /* 1305 /*
1306 * Optimise for the simple case where we can do all of 1306 * Optimise for the simple case where we can do all of
1307 * the remaining io with a single clone. 1307 * the remaining io with a single clone.
1308 */ 1308 */
1309 __clone_and_map_simple(ci, ti); 1309 __clone_and_map_simple(ci, ti);
1310 1310
1311 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { 1311 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
1312 /* 1312 /*
1313 * There are some bvecs that don't span targets. 1313 * There are some bvecs that don't span targets.
1314 * Do as many of these as possible. 1314 * Do as many of these as possible.
1315 */ 1315 */
1316 int i; 1316 int i;
1317 sector_t remaining = max; 1317 sector_t remaining = max;
1318 sector_t bv_len; 1318 sector_t bv_len;
1319 1319
1320 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { 1320 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
1321 bv_len = to_sector(bio->bi_io_vec[i].bv_len); 1321 bv_len = to_sector(bio->bi_io_vec[i].bv_len);
1322 1322
1323 if (bv_len > remaining) 1323 if (bv_len > remaining)
1324 break; 1324 break;
1325 1325
1326 remaining -= bv_len; 1326 remaining -= bv_len;
1327 len += bv_len; 1327 len += bv_len;
1328 } 1328 }
1329 1329
1330 tio = alloc_tio(ci, ti); 1330 tio = alloc_tio(ci, ti);
1331 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, 1331 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
1332 ci->md->bs); 1332 ci->md->bs);
1333 __map_bio(ti, clone, tio); 1333 __map_bio(ti, clone, tio);
1334 1334
1335 ci->sector += len; 1335 ci->sector += len;
1336 ci->sector_count -= len; 1336 ci->sector_count -= len;
1337 ci->idx = i; 1337 ci->idx = i;
1338 1338
1339 } else { 1339 } else {
1340 /* 1340 /*
1341 * Handle a bvec that must be split between two or more targets. 1341 * Handle a bvec that must be split between two or more targets.
1342 */ 1342 */
1343 struct bio_vec *bv = bio->bi_io_vec + ci->idx; 1343 struct bio_vec *bv = bio->bi_io_vec + ci->idx;
1344 sector_t remaining = to_sector(bv->bv_len); 1344 sector_t remaining = to_sector(bv->bv_len);
1345 unsigned int offset = 0; 1345 unsigned int offset = 0;
1346 1346
1347 do { 1347 do {
1348 if (offset) { 1348 if (offset) {
1349 ti = dm_table_find_target(ci->map, ci->sector); 1349 ti = dm_table_find_target(ci->map, ci->sector);
1350 if (!dm_target_is_valid(ti)) 1350 if (!dm_target_is_valid(ti))
1351 return -EIO; 1351 return -EIO;
1352 1352
1353 max = max_io_len(ci->sector, ti); 1353 max = max_io_len(ci->sector, ti);
1354 } 1354 }
1355 1355
1356 len = min(remaining, max); 1356 len = min(remaining, max);
1357 1357
1358 tio = alloc_tio(ci, ti); 1358 tio = alloc_tio(ci, ti);
1359 clone = split_bvec(bio, ci->sector, ci->idx, 1359 clone = split_bvec(bio, ci->sector, ci->idx,
1360 bv->bv_offset + offset, len, 1360 bv->bv_offset + offset, len,
1361 ci->md->bs); 1361 ci->md->bs);
1362 1362
1363 __map_bio(ti, clone, tio); 1363 __map_bio(ti, clone, tio);
1364 1364
1365 ci->sector += len; 1365 ci->sector += len;
1366 ci->sector_count -= len; 1366 ci->sector_count -= len;
1367 offset += to_bytes(len); 1367 offset += to_bytes(len);
1368 } while (remaining -= len); 1368 } while (remaining -= len);
1369 1369
1370 ci->idx++; 1370 ci->idx++;
1371 } 1371 }
1372 1372
1373 return 0; 1373 return 0;
1374 } 1374 }
1375 1375
1376 /* 1376 /*
1377 * Split the bio into several clones and submit it to targets. 1377 * Split the bio into several clones and submit it to targets.
1378 */ 1378 */
1379 static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) 1379 static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1380 { 1380 {
1381 struct clone_info ci; 1381 struct clone_info ci;
1382 int error = 0; 1382 int error = 0;
1383 1383
1384 ci.map = dm_get_live_table(md); 1384 ci.map = dm_get_live_table(md);
1385 if (unlikely(!ci.map)) { 1385 if (unlikely(!ci.map)) {
1386 if (!(bio->bi_rw & REQ_HARDBARRIER)) 1386 if (!(bio->bi_rw & REQ_HARDBARRIER))
1387 bio_io_error(bio); 1387 bio_io_error(bio);
1388 else 1388 else
1389 if (!md->barrier_error) 1389 if (!md->barrier_error)
1390 md->barrier_error = -EIO; 1390 md->barrier_error = -EIO;
1391 return; 1391 return;
1392 } 1392 }
1393 1393
1394 ci.md = md; 1394 ci.md = md;
1395 ci.bio = bio; 1395 ci.bio = bio;
1396 ci.io = alloc_io(md); 1396 ci.io = alloc_io(md);
1397 ci.io->error = 0; 1397 ci.io->error = 0;
1398 atomic_set(&ci.io->io_count, 1); 1398 atomic_set(&ci.io->io_count, 1);
1399 ci.io->bio = bio; 1399 ci.io->bio = bio;
1400 ci.io->md = md; 1400 ci.io->md = md;
1401 spin_lock_init(&ci.io->endio_lock); 1401 spin_lock_init(&ci.io->endio_lock);
1402 ci.sector = bio->bi_sector; 1402 ci.sector = bio->bi_sector;
1403 ci.sector_count = bio_sectors(bio); 1403 ci.sector_count = bio_sectors(bio);
1404 if (unlikely(bio_empty_barrier(bio))) 1404 if (unlikely(bio_empty_barrier(bio)))
1405 ci.sector_count = 1; 1405 ci.sector_count = 1;
1406 ci.idx = bio->bi_idx; 1406 ci.idx = bio->bi_idx;
1407 1407
1408 start_io_acct(ci.io); 1408 start_io_acct(ci.io);
1409 while (ci.sector_count && !error) 1409 while (ci.sector_count && !error)
1410 error = __clone_and_map(&ci); 1410 error = __clone_and_map(&ci);
1411 1411
1412 /* drop the extra reference count */ 1412 /* drop the extra reference count */
1413 dec_pending(ci.io, error); 1413 dec_pending(ci.io, error);
1414 dm_table_put(ci.map); 1414 dm_table_put(ci.map);
1415 } 1415 }
1416 /*----------------------------------------------------------------- 1416 /*-----------------------------------------------------------------
1417 * CRUD END 1417 * CRUD END
1418 *---------------------------------------------------------------*/ 1418 *---------------------------------------------------------------*/
1419 1419
1420 static int dm_merge_bvec(struct request_queue *q, 1420 static int dm_merge_bvec(struct request_queue *q,
1421 struct bvec_merge_data *bvm, 1421 struct bvec_merge_data *bvm,
1422 struct bio_vec *biovec) 1422 struct bio_vec *biovec)
1423 { 1423 {
1424 struct mapped_device *md = q->queuedata; 1424 struct mapped_device *md = q->queuedata;
1425 struct dm_table *map = dm_get_live_table(md); 1425 struct dm_table *map = dm_get_live_table(md);
1426 struct dm_target *ti; 1426 struct dm_target *ti;
1427 sector_t max_sectors; 1427 sector_t max_sectors;
1428 int max_size = 0; 1428 int max_size = 0;
1429 1429
1430 if (unlikely(!map)) 1430 if (unlikely(!map))
1431 goto out; 1431 goto out;
1432 1432
1433 ti = dm_table_find_target(map, bvm->bi_sector); 1433 ti = dm_table_find_target(map, bvm->bi_sector);
1434 if (!dm_target_is_valid(ti)) 1434 if (!dm_target_is_valid(ti))
1435 goto out_table; 1435 goto out_table;
1436 1436
1437 /* 1437 /*
1438 * Find maximum amount of I/O that won't need splitting 1438 * Find maximum amount of I/O that won't need splitting
1439 */ 1439 */
1440 max_sectors = min(max_io_len(bvm->bi_sector, ti), 1440 max_sectors = min(max_io_len(bvm->bi_sector, ti),
1441 (sector_t) BIO_MAX_SECTORS); 1441 (sector_t) BIO_MAX_SECTORS);
1442 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; 1442 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
1443 if (max_size < 0) 1443 if (max_size < 0)
1444 max_size = 0; 1444 max_size = 0;
1445 1445
1446 /* 1446 /*
1447 * merge_bvec_fn() returns number of bytes 1447 * merge_bvec_fn() returns number of bytes
1448 * it can accept at this offset 1448 * it can accept at this offset
1449 * max is precomputed maximal io size 1449 * max is precomputed maximal io size
1450 */ 1450 */
1451 if (max_size && ti->type->merge) 1451 if (max_size && ti->type->merge)
1452 max_size = ti->type->merge(ti, bvm, biovec, max_size); 1452 max_size = ti->type->merge(ti, bvm, biovec, max_size);
1453 /* 1453 /*
1454 * If the target doesn't support merge method and some of the devices 1454 * If the target doesn't support merge method and some of the devices
1455 * provided their merge_bvec method (we know this by looking at 1455 * provided their merge_bvec method (we know this by looking at
1456 * queue_max_hw_sectors), then we can't allow bios with multiple vector 1456 * queue_max_hw_sectors), then we can't allow bios with multiple vector
1457 * entries. So always set max_size to 0, and the code below allows 1457 * entries. So always set max_size to 0, and the code below allows
1458 * just one page. 1458 * just one page.
1459 */ 1459 */
1460 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) 1460 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1461 1461
1462 max_size = 0; 1462 max_size = 0;
1463 1463
1464 out_table: 1464 out_table:
1465 dm_table_put(map); 1465 dm_table_put(map);
1466 1466
1467 out: 1467 out:
1468 /* 1468 /*
1469 * Always allow an entire first page 1469 * Always allow an entire first page
1470 */ 1470 */
1471 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) 1471 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
1472 max_size = biovec->bv_len; 1472 max_size = biovec->bv_len;
1473 1473
1474 return max_size; 1474 return max_size;
1475 } 1475 }
1476 1476
1477 /* 1477 /*
1478 * The request function that just remaps the bio built up by 1478 * The request function that just remaps the bio built up by
1479 * dm_merge_bvec. 1479 * dm_merge_bvec.
1480 */ 1480 */
1481 static int _dm_request(struct request_queue *q, struct bio *bio) 1481 static int _dm_request(struct request_queue *q, struct bio *bio)
1482 { 1482 {
1483 int rw = bio_data_dir(bio); 1483 int rw = bio_data_dir(bio);
1484 struct mapped_device *md = q->queuedata; 1484 struct mapped_device *md = q->queuedata;
1485 int cpu; 1485 int cpu;
1486 1486
1487 down_read(&md->io_lock); 1487 down_read(&md->io_lock);
1488 1488
1489 cpu = part_stat_lock(); 1489 cpu = part_stat_lock();
1490 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); 1490 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
1491 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); 1491 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
1492 part_stat_unlock(); 1492 part_stat_unlock();
1493 1493
1494 /* 1494 /*
1495 * If we're suspended or the thread is processing barriers 1495 * If we're suspended or the thread is processing barriers
1496 * we have to queue this io for later. 1496 * we have to queue this io for later.
1497 */ 1497 */
1498 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || 1498 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1499 unlikely(bio->bi_rw & REQ_HARDBARRIER)) { 1499 unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
1500 up_read(&md->io_lock); 1500 up_read(&md->io_lock);
1501 1501
1502 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 1502 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
1503 bio_rw(bio) == READA) { 1503 bio_rw(bio) == READA) {
1504 bio_io_error(bio); 1504 bio_io_error(bio);
1505 return 0; 1505 return 0;
1506 } 1506 }
1507 1507
1508 queue_io(md, bio); 1508 queue_io(md, bio);
1509 1509
1510 return 0; 1510 return 0;
1511 } 1511 }
1512 1512
1513 __split_and_process_bio(md, bio); 1513 __split_and_process_bio(md, bio);
1514 up_read(&md->io_lock); 1514 up_read(&md->io_lock);
1515 return 0; 1515 return 0;
1516 } 1516 }
1517 1517
1518 static int dm_make_request(struct request_queue *q, struct bio *bio) 1518 static int dm_make_request(struct request_queue *q, struct bio *bio)
1519 { 1519 {
1520 struct mapped_device *md = q->queuedata; 1520 struct mapped_device *md = q->queuedata;
1521 1521
1522 return md->saved_make_request_fn(q, bio); /* call __make_request() */ 1522 return md->saved_make_request_fn(q, bio); /* call __make_request() */
1523 } 1523 }
1524 1524
1525 static int dm_request_based(struct mapped_device *md) 1525 static int dm_request_based(struct mapped_device *md)
1526 { 1526 {
1527 return blk_queue_stackable(md->queue); 1527 return blk_queue_stackable(md->queue);
1528 } 1528 }
1529 1529
1530 static int dm_request(struct request_queue *q, struct bio *bio) 1530 static int dm_request(struct request_queue *q, struct bio *bio)
1531 { 1531 {
1532 struct mapped_device *md = q->queuedata; 1532 struct mapped_device *md = q->queuedata;
1533 1533
1534 if (dm_request_based(md)) 1534 if (dm_request_based(md))
1535 return dm_make_request(q, bio); 1535 return dm_make_request(q, bio);
1536 1536
1537 return _dm_request(q, bio); 1537 return _dm_request(q, bio);
1538 } 1538 }
1539 1539
1540 static bool dm_rq_is_flush_request(struct request *rq) 1540 static bool dm_rq_is_flush_request(struct request *rq)
1541 { 1541 {
1542 if (rq->cmd_flags & REQ_FLUSH) 1542 if (rq->cmd_flags & REQ_FLUSH)
1543 return true; 1543 return true;
1544 else 1544 else
1545 return false; 1545 return false;
1546 } 1546 }
1547 1547
1548 void dm_dispatch_request(struct request *rq) 1548 void dm_dispatch_request(struct request *rq)
1549 { 1549 {
1550 int r; 1550 int r;
1551 1551
1552 if (blk_queue_io_stat(rq->q)) 1552 if (blk_queue_io_stat(rq->q))
1553 rq->cmd_flags |= REQ_IO_STAT; 1553 rq->cmd_flags |= REQ_IO_STAT;
1554 1554
1555 rq->start_time = jiffies; 1555 rq->start_time = jiffies;
1556 r = blk_insert_cloned_request(rq->q, rq); 1556 r = blk_insert_cloned_request(rq->q, rq);
1557 if (r) 1557 if (r)
1558 dm_complete_request(rq, r); 1558 dm_complete_request(rq, r);
1559 } 1559 }
1560 EXPORT_SYMBOL_GPL(dm_dispatch_request); 1560 EXPORT_SYMBOL_GPL(dm_dispatch_request);
1561 1561
1562 static void dm_rq_bio_destructor(struct bio *bio) 1562 static void dm_rq_bio_destructor(struct bio *bio)
1563 { 1563 {
1564 struct dm_rq_clone_bio_info *info = bio->bi_private; 1564 struct dm_rq_clone_bio_info *info = bio->bi_private;
1565 struct mapped_device *md = info->tio->md; 1565 struct mapped_device *md = info->tio->md;
1566 1566
1567 free_bio_info(info); 1567 free_bio_info(info);
1568 bio_free(bio, md->bs); 1568 bio_free(bio, md->bs);
1569 } 1569 }
1570 1570
1571 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1571 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1572 void *data) 1572 void *data)
1573 { 1573 {
1574 struct dm_rq_target_io *tio = data; 1574 struct dm_rq_target_io *tio = data;
1575 struct mapped_device *md = tio->md; 1575 struct mapped_device *md = tio->md;
1576 struct dm_rq_clone_bio_info *info = alloc_bio_info(md); 1576 struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
1577 1577
1578 if (!info) 1578 if (!info)
1579 return -ENOMEM; 1579 return -ENOMEM;
1580 1580
1581 info->orig = bio_orig; 1581 info->orig = bio_orig;
1582 info->tio = tio; 1582 info->tio = tio;
1583 bio->bi_end_io = end_clone_bio; 1583 bio->bi_end_io = end_clone_bio;
1584 bio->bi_private = info; 1584 bio->bi_private = info;
1585 bio->bi_destructor = dm_rq_bio_destructor; 1585 bio->bi_destructor = dm_rq_bio_destructor;
1586 1586
1587 return 0; 1587 return 0;
1588 } 1588 }
1589 1589
1590 static int setup_clone(struct request *clone, struct request *rq, 1590 static int setup_clone(struct request *clone, struct request *rq,
1591 struct dm_rq_target_io *tio) 1591 struct dm_rq_target_io *tio)
1592 { 1592 {
1593 int r; 1593 int r;
1594 1594
1595 if (dm_rq_is_flush_request(rq)) { 1595 if (dm_rq_is_flush_request(rq)) {
1596 blk_rq_init(NULL, clone); 1596 blk_rq_init(NULL, clone);
1597 clone->cmd_type = REQ_TYPE_FS; 1597 clone->cmd_type = REQ_TYPE_FS;
1598 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); 1598 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
1599 } else { 1599 } else {
1600 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, 1600 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1601 dm_rq_bio_constructor, tio); 1601 dm_rq_bio_constructor, tio);
1602 if (r) 1602 if (r)
1603 return r; 1603 return r;
1604 1604
1605 clone->cmd = rq->cmd; 1605 clone->cmd = rq->cmd;
1606 clone->cmd_len = rq->cmd_len; 1606 clone->cmd_len = rq->cmd_len;
1607 clone->sense = rq->sense; 1607 clone->sense = rq->sense;
1608 clone->buffer = rq->buffer; 1608 clone->buffer = rq->buffer;
1609 } 1609 }
1610 1610
1611 clone->end_io = end_clone_request; 1611 clone->end_io = end_clone_request;
1612 clone->end_io_data = tio; 1612 clone->end_io_data = tio;
1613 1613
1614 return 0; 1614 return 0;
1615 } 1615 }
1616 1616
1617 static struct request *clone_rq(struct request *rq, struct mapped_device *md, 1617 static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1618 gfp_t gfp_mask) 1618 gfp_t gfp_mask)
1619 { 1619 {
1620 struct request *clone; 1620 struct request *clone;
1621 struct dm_rq_target_io *tio; 1621 struct dm_rq_target_io *tio;
1622 1622
1623 tio = alloc_rq_tio(md, gfp_mask); 1623 tio = alloc_rq_tio(md, gfp_mask);
1624 if (!tio) 1624 if (!tio)
1625 return NULL; 1625 return NULL;
1626 1626
1627 tio->md = md; 1627 tio->md = md;
1628 tio->ti = NULL; 1628 tio->ti = NULL;
1629 tio->orig = rq; 1629 tio->orig = rq;
1630 tio->error = 0; 1630 tio->error = 0;
1631 memset(&tio->info, 0, sizeof(tio->info)); 1631 memset(&tio->info, 0, sizeof(tio->info));
1632 1632
1633 clone = &tio->clone; 1633 clone = &tio->clone;
1634 if (setup_clone(clone, rq, tio)) { 1634 if (setup_clone(clone, rq, tio)) {
1635 /* -ENOMEM */ 1635 /* -ENOMEM */
1636 free_rq_tio(tio); 1636 free_rq_tio(tio);
1637 return NULL; 1637 return NULL;
1638 } 1638 }
1639 1639
1640 return clone; 1640 return clone;
1641 } 1641 }
1642 1642
1643 /* 1643 /*
1644 * Called with the queue lock held. 1644 * Called with the queue lock held.
1645 */ 1645 */
1646 static int dm_prep_fn(struct request_queue *q, struct request *rq) 1646 static int dm_prep_fn(struct request_queue *q, struct request *rq)
1647 { 1647 {
1648 struct mapped_device *md = q->queuedata; 1648 struct mapped_device *md = q->queuedata;
1649 struct request *clone; 1649 struct request *clone;
1650 1650
1651 if (unlikely(dm_rq_is_flush_request(rq))) 1651 if (unlikely(dm_rq_is_flush_request(rq)))
1652 return BLKPREP_OK; 1652 return BLKPREP_OK;
1653 1653
1654 if (unlikely(rq->special)) { 1654 if (unlikely(rq->special)) {
1655 DMWARN("Already has something in rq->special."); 1655 DMWARN("Already has something in rq->special.");
1656 return BLKPREP_KILL; 1656 return BLKPREP_KILL;
1657 } 1657 }
1658 1658
1659 clone = clone_rq(rq, md, GFP_ATOMIC); 1659 clone = clone_rq(rq, md, GFP_ATOMIC);
1660 if (!clone) 1660 if (!clone)
1661 return BLKPREP_DEFER; 1661 return BLKPREP_DEFER;
1662 1662
1663 rq->special = clone; 1663 rq->special = clone;
1664 rq->cmd_flags |= REQ_DONTPREP; 1664 rq->cmd_flags |= REQ_DONTPREP;
1665 1665
1666 return BLKPREP_OK; 1666 return BLKPREP_OK;
1667 } 1667 }
1668 1668
1669 /* 1669 /*
1670 * Returns: 1670 * Returns:
1671 * 0 : the request has been processed (not requeued) 1671 * 0 : the request has been processed (not requeued)
1672 * !0 : the request has been requeued 1672 * !0 : the request has been requeued
1673 */ 1673 */
1674 static int map_request(struct dm_target *ti, struct request *clone, 1674 static int map_request(struct dm_target *ti, struct request *clone,
1675 struct mapped_device *md) 1675 struct mapped_device *md)
1676 { 1676 {
1677 int r, requeued = 0; 1677 int r, requeued = 0;
1678 struct dm_rq_target_io *tio = clone->end_io_data; 1678 struct dm_rq_target_io *tio = clone->end_io_data;
1679 1679
1680 /* 1680 /*
1681 * Hold the md reference here for the in-flight I/O. 1681 * Hold the md reference here for the in-flight I/O.
1682 * We can't rely on the reference count by device opener, 1682 * We can't rely on the reference count by device opener,
1683 * because the device may be closed during the request completion 1683 * because the device may be closed during the request completion
1684 * when all bios are completed. 1684 * when all bios are completed.
1685 * See the comment in rq_completed() too. 1685 * See the comment in rq_completed() too.
1686 */ 1686 */
1687 dm_get(md); 1687 dm_get(md);
1688 1688
1689 tio->ti = ti; 1689 tio->ti = ti;
1690 r = ti->type->map_rq(ti, clone, &tio->info); 1690 r = ti->type->map_rq(ti, clone, &tio->info);
1691 switch (r) { 1691 switch (r) {
1692 case DM_MAPIO_SUBMITTED: 1692 case DM_MAPIO_SUBMITTED:
1693 /* The target has taken the I/O to submit by itself later */ 1693 /* The target has taken the I/O to submit by itself later */
1694 break; 1694 break;
1695 case DM_MAPIO_REMAPPED: 1695 case DM_MAPIO_REMAPPED:
1696 /* The target has remapped the I/O so dispatch it */ 1696 /* The target has remapped the I/O so dispatch it */
1697 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 1697 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1698 blk_rq_pos(tio->orig)); 1698 blk_rq_pos(tio->orig));
1699 dm_dispatch_request(clone); 1699 dm_dispatch_request(clone);
1700 break; 1700 break;
1701 case DM_MAPIO_REQUEUE: 1701 case DM_MAPIO_REQUEUE:
1702 /* The target wants to requeue the I/O */ 1702 /* The target wants to requeue the I/O */
1703 dm_requeue_unmapped_request(clone); 1703 dm_requeue_unmapped_request(clone);
1704 requeued = 1; 1704 requeued = 1;
1705 break; 1705 break;
1706 default: 1706 default:
1707 if (r > 0) { 1707 if (r > 0) {
1708 DMWARN("unimplemented target map return value: %d", r); 1708 DMWARN("unimplemented target map return value: %d", r);
1709 BUG(); 1709 BUG();
1710 } 1710 }
1711 1711
1712 /* The target wants to complete the I/O */ 1712 /* The target wants to complete the I/O */
1713 dm_kill_unmapped_request(clone, r); 1713 dm_kill_unmapped_request(clone, r);
1714 break; 1714 break;
1715 } 1715 }
1716 1716
1717 return requeued; 1717 return requeued;
1718 } 1718 }
1719 1719
1720 /* 1720 /*
1721 * q->request_fn for request-based dm. 1721 * q->request_fn for request-based dm.
1722 * Called with the queue lock held. 1722 * Called with the queue lock held.
1723 */ 1723 */
1724 static void dm_request_fn(struct request_queue *q) 1724 static void dm_request_fn(struct request_queue *q)
1725 { 1725 {
1726 struct mapped_device *md = q->queuedata; 1726 struct mapped_device *md = q->queuedata;
1727 struct dm_table *map = dm_get_live_table(md); 1727 struct dm_table *map = dm_get_live_table(md);
1728 struct dm_target *ti; 1728 struct dm_target *ti;
1729 struct request *rq, *clone; 1729 struct request *rq, *clone;
1730 1730
1731 /* 1731 /*
1732 * For suspend, check blk_queue_stopped() and increment 1732 * For suspend, check blk_queue_stopped() and increment
1733 * ->pending within a single queue_lock not to increment the 1733 * ->pending within a single queue_lock not to increment the
1734 * number of in-flight I/Os after the queue is stopped in 1734 * number of in-flight I/Os after the queue is stopped in
1735 * dm_suspend(). 1735 * dm_suspend().
1736 */ 1736 */
1737 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { 1737 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1738 rq = blk_peek_request(q); 1738 rq = blk_peek_request(q);
1739 if (!rq) 1739 if (!rq)
1740 goto plug_and_out; 1740 goto plug_and_out;
1741 1741
1742 if (unlikely(dm_rq_is_flush_request(rq))) { 1742 if (unlikely(dm_rq_is_flush_request(rq))) {
1743 BUG_ON(md->flush_request); 1743 BUG_ON(md->flush_request);
1744 md->flush_request = rq; 1744 md->flush_request = rq;
1745 blk_start_request(rq); 1745 blk_start_request(rq);
1746 queue_work(md->wq, &md->barrier_work); 1746 queue_work(md->wq, &md->barrier_work);
1747 goto out; 1747 goto out;
1748 } 1748 }
1749 1749
1750 ti = dm_table_find_target(map, blk_rq_pos(rq)); 1750 ti = dm_table_find_target(map, blk_rq_pos(rq));
1751 if (ti->type->busy && ti->type->busy(ti)) 1751 if (ti->type->busy && ti->type->busy(ti))
1752 goto plug_and_out; 1752 goto plug_and_out;
1753 1753
1754 blk_start_request(rq); 1754 blk_start_request(rq);
1755 clone = rq->special; 1755 clone = rq->special;
1756 atomic_inc(&md->pending[rq_data_dir(clone)]); 1756 atomic_inc(&md->pending[rq_data_dir(clone)]);
1757 1757
1758 spin_unlock(q->queue_lock); 1758 spin_unlock(q->queue_lock);
1759 if (map_request(ti, clone, md)) 1759 if (map_request(ti, clone, md))
1760 goto requeued; 1760 goto requeued;
1761 1761
1762 spin_lock_irq(q->queue_lock); 1762 spin_lock_irq(q->queue_lock);
1763 } 1763 }
1764 1764
1765 goto out; 1765 goto out;
1766 1766
1767 requeued: 1767 requeued:
1768 spin_lock_irq(q->queue_lock); 1768 spin_lock_irq(q->queue_lock);
1769 1769
1770 plug_and_out: 1770 plug_and_out:
1771 if (!elv_queue_empty(q)) 1771 if (!elv_queue_empty(q))
1772 /* Some requests still remain, retry later */ 1772 /* Some requests still remain, retry later */
1773 blk_plug_device(q); 1773 blk_plug_device(q);
1774 1774
1775 out: 1775 out:
1776 dm_table_put(map); 1776 dm_table_put(map);
1777 1777
1778 return; 1778 return;
1779 } 1779 }
1780 1780
1781 int dm_underlying_device_busy(struct request_queue *q) 1781 int dm_underlying_device_busy(struct request_queue *q)
1782 { 1782 {
1783 return blk_lld_busy(q); 1783 return blk_lld_busy(q);
1784 } 1784 }
1785 EXPORT_SYMBOL_GPL(dm_underlying_device_busy); 1785 EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
1786 1786
1787 static int dm_lld_busy(struct request_queue *q) 1787 static int dm_lld_busy(struct request_queue *q)
1788 { 1788 {
1789 int r; 1789 int r;
1790 struct mapped_device *md = q->queuedata; 1790 struct mapped_device *md = q->queuedata;
1791 struct dm_table *map = dm_get_live_table(md); 1791 struct dm_table *map = dm_get_live_table(md);
1792 1792
1793 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) 1793 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
1794 r = 1; 1794 r = 1;
1795 else 1795 else
1796 r = dm_table_any_busy_target(map); 1796 r = dm_table_any_busy_target(map);
1797 1797
1798 dm_table_put(map); 1798 dm_table_put(map);
1799 1799
1800 return r; 1800 return r;
1801 } 1801 }
1802 1802
1803 static void dm_unplug_all(struct request_queue *q) 1803 static void dm_unplug_all(struct request_queue *q)
1804 { 1804 {
1805 struct mapped_device *md = q->queuedata; 1805 struct mapped_device *md = q->queuedata;
1806 struct dm_table *map = dm_get_live_table(md); 1806 struct dm_table *map = dm_get_live_table(md);
1807 1807
1808 if (map) { 1808 if (map) {
1809 if (dm_request_based(md)) 1809 if (dm_request_based(md))
1810 generic_unplug_device(q); 1810 generic_unplug_device(q);
1811 1811
1812 dm_table_unplug_all(map); 1812 dm_table_unplug_all(map);
1813 dm_table_put(map); 1813 dm_table_put(map);
1814 } 1814 }
1815 } 1815 }
1816 1816
1817 static int dm_any_congested(void *congested_data, int bdi_bits) 1817 static int dm_any_congested(void *congested_data, int bdi_bits)
1818 { 1818 {
1819 int r = bdi_bits; 1819 int r = bdi_bits;
1820 struct mapped_device *md = congested_data; 1820 struct mapped_device *md = congested_data;
1821 struct dm_table *map; 1821 struct dm_table *map;
1822 1822
1823 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1823 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1824 map = dm_get_live_table(md); 1824 map = dm_get_live_table(md);
1825 if (map) { 1825 if (map) {
1826 /* 1826 /*
1827 * Request-based dm cares about only own queue for 1827 * Request-based dm cares about only own queue for
1828 * the query about congestion status of request_queue 1828 * the query about congestion status of request_queue
1829 */ 1829 */
1830 if (dm_request_based(md)) 1830 if (dm_request_based(md))
1831 r = md->queue->backing_dev_info.state & 1831 r = md->queue->backing_dev_info.state &
1832 bdi_bits; 1832 bdi_bits;
1833 else 1833 else
1834 r = dm_table_any_congested(map, bdi_bits); 1834 r = dm_table_any_congested(map, bdi_bits);
1835 1835
1836 dm_table_put(map); 1836 dm_table_put(map);
1837 } 1837 }
1838 } 1838 }
1839 1839
1840 return r; 1840 return r;
1841 } 1841 }
1842 1842
1843 /*----------------------------------------------------------------- 1843 /*-----------------------------------------------------------------
1844 * An IDR is used to keep track of allocated minor numbers. 1844 * An IDR is used to keep track of allocated minor numbers.
1845 *---------------------------------------------------------------*/ 1845 *---------------------------------------------------------------*/
1846 static DEFINE_IDR(_minor_idr); 1846 static DEFINE_IDR(_minor_idr);
1847 1847
1848 static void free_minor(int minor) 1848 static void free_minor(int minor)
1849 { 1849 {
1850 spin_lock(&_minor_lock); 1850 spin_lock(&_minor_lock);
1851 idr_remove(&_minor_idr, minor); 1851 idr_remove(&_minor_idr, minor);
1852 spin_unlock(&_minor_lock); 1852 spin_unlock(&_minor_lock);
1853 } 1853 }
1854 1854
1855 /* 1855 /*
1856 * See if the device with a specific minor # is free. 1856 * See if the device with a specific minor # is free.
1857 */ 1857 */
1858 static int specific_minor(int minor) 1858 static int specific_minor(int minor)
1859 { 1859 {
1860 int r, m; 1860 int r, m;
1861 1861
1862 if (minor >= (1 << MINORBITS)) 1862 if (minor >= (1 << MINORBITS))
1863 return -EINVAL; 1863 return -EINVAL;
1864 1864
1865 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 1865 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
1866 if (!r) 1866 if (!r)
1867 return -ENOMEM; 1867 return -ENOMEM;
1868 1868
1869 spin_lock(&_minor_lock); 1869 spin_lock(&_minor_lock);
1870 1870
1871 if (idr_find(&_minor_idr, minor)) { 1871 if (idr_find(&_minor_idr, minor)) {
1872 r = -EBUSY; 1872 r = -EBUSY;
1873 goto out; 1873 goto out;
1874 } 1874 }
1875 1875
1876 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); 1876 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
1877 if (r) 1877 if (r)
1878 goto out; 1878 goto out;
1879 1879
1880 if (m != minor) { 1880 if (m != minor) {
1881 idr_remove(&_minor_idr, m); 1881 idr_remove(&_minor_idr, m);
1882 r = -EBUSY; 1882 r = -EBUSY;
1883 goto out; 1883 goto out;
1884 } 1884 }
1885 1885
1886 out: 1886 out:
1887 spin_unlock(&_minor_lock); 1887 spin_unlock(&_minor_lock);
1888 return r; 1888 return r;
1889 } 1889 }
1890 1890
1891 static int next_free_minor(int *minor) 1891 static int next_free_minor(int *minor)
1892 { 1892 {
1893 int r, m; 1893 int r, m;
1894 1894
1895 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 1895 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
1896 if (!r) 1896 if (!r)
1897 return -ENOMEM; 1897 return -ENOMEM;
1898 1898
1899 spin_lock(&_minor_lock); 1899 spin_lock(&_minor_lock);
1900 1900
1901 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); 1901 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
1902 if (r) 1902 if (r)
1903 goto out; 1903 goto out;
1904 1904
1905 if (m >= (1 << MINORBITS)) { 1905 if (m >= (1 << MINORBITS)) {
1906 idr_remove(&_minor_idr, m); 1906 idr_remove(&_minor_idr, m);
1907 r = -ENOSPC; 1907 r = -ENOSPC;
1908 goto out; 1908 goto out;
1909 } 1909 }
1910 1910
1911 *minor = m; 1911 *minor = m;
1912 1912
1913 out: 1913 out:
1914 spin_unlock(&_minor_lock); 1914 spin_unlock(&_minor_lock);
1915 return r; 1915 return r;
1916 } 1916 }
1917 1917
1918 static const struct block_device_operations dm_blk_dops; 1918 static const struct block_device_operations dm_blk_dops;
1919 1919
1920 static void dm_wq_work(struct work_struct *work); 1920 static void dm_wq_work(struct work_struct *work);
1921 static void dm_rq_barrier_work(struct work_struct *work); 1921 static void dm_rq_barrier_work(struct work_struct *work);
1922 1922
1923 static void dm_init_md_queue(struct mapped_device *md) 1923 static void dm_init_md_queue(struct mapped_device *md)
1924 { 1924 {
1925 /* 1925 /*
1926 * Request-based dm devices cannot be stacked on top of bio-based dm 1926 * Request-based dm devices cannot be stacked on top of bio-based dm
1927 * devices. The type of this dm device has not been decided yet. 1927 * devices. The type of this dm device has not been decided yet.
1928 * The type is decided at the first table loading time. 1928 * The type is decided at the first table loading time.
1929 * To prevent problematic device stacking, clear the queue flag 1929 * To prevent problematic device stacking, clear the queue flag
1930 * for request stacking support until then. 1930 * for request stacking support until then.
1931 * 1931 *
1932 * This queue is new, so no concurrency on the queue_flags. 1932 * This queue is new, so no concurrency on the queue_flags.
1933 */ 1933 */
1934 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); 1934 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
1935 1935
1936 md->queue->queuedata = md; 1936 md->queue->queuedata = md;
1937 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1937 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1938 md->queue->backing_dev_info.congested_data = md; 1938 md->queue->backing_dev_info.congested_data = md;
1939 blk_queue_make_request(md->queue, dm_request); 1939 blk_queue_make_request(md->queue, dm_request);
1940 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1940 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1941 md->queue->unplug_fn = dm_unplug_all; 1941 md->queue->unplug_fn = dm_unplug_all;
1942 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1942 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1943 } 1943 }
1944 1944
1945 /* 1945 /*
1946 * Allocate and initialise a blank device with a given minor. 1946 * Allocate and initialise a blank device with a given minor.
1947 */ 1947 */
1948 static struct mapped_device *alloc_dev(int minor) 1948 static struct mapped_device *alloc_dev(int minor)
1949 { 1949 {
1950 int r; 1950 int r;
1951 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); 1951 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
1952 void *old_md; 1952 void *old_md;
1953 1953
1954 if (!md) { 1954 if (!md) {
1955 DMWARN("unable to allocate device, out of memory."); 1955 DMWARN("unable to allocate device, out of memory.");
1956 return NULL; 1956 return NULL;
1957 } 1957 }
1958 1958
1959 if (!try_module_get(THIS_MODULE)) 1959 if (!try_module_get(THIS_MODULE))
1960 goto bad_module_get; 1960 goto bad_module_get;
1961 1961
1962 /* get a minor number for the dev */ 1962 /* get a minor number for the dev */
1963 if (minor == DM_ANY_MINOR) 1963 if (minor == DM_ANY_MINOR)
1964 r = next_free_minor(&minor); 1964 r = next_free_minor(&minor);
1965 else 1965 else
1966 r = specific_minor(minor); 1966 r = specific_minor(minor);
1967 if (r < 0) 1967 if (r < 0)
1968 goto bad_minor; 1968 goto bad_minor;
1969 1969
1970 md->type = DM_TYPE_NONE; 1970 md->type = DM_TYPE_NONE;
1971 init_rwsem(&md->io_lock); 1971 init_rwsem(&md->io_lock);
1972 mutex_init(&md->suspend_lock); 1972 mutex_init(&md->suspend_lock);
1973 mutex_init(&md->type_lock); 1973 mutex_init(&md->type_lock);
1974 spin_lock_init(&md->deferred_lock); 1974 spin_lock_init(&md->deferred_lock);
1975 spin_lock_init(&md->barrier_error_lock); 1975 spin_lock_init(&md->barrier_error_lock);
1976 rwlock_init(&md->map_lock); 1976 rwlock_init(&md->map_lock);
1977 atomic_set(&md->holders, 1); 1977 atomic_set(&md->holders, 1);
1978 atomic_set(&md->open_count, 0); 1978 atomic_set(&md->open_count, 0);
1979 atomic_set(&md->event_nr, 0); 1979 atomic_set(&md->event_nr, 0);
1980 atomic_set(&md->uevent_seq, 0); 1980 atomic_set(&md->uevent_seq, 0);
1981 INIT_LIST_HEAD(&md->uevent_list); 1981 INIT_LIST_HEAD(&md->uevent_list);
1982 spin_lock_init(&md->uevent_lock); 1982 spin_lock_init(&md->uevent_lock);
1983 1983
1984 md->queue = blk_alloc_queue(GFP_KERNEL); 1984 md->queue = blk_alloc_queue(GFP_KERNEL);
1985 if (!md->queue) 1985 if (!md->queue)
1986 goto bad_queue; 1986 goto bad_queue;
1987 1987
1988 dm_init_md_queue(md); 1988 dm_init_md_queue(md);
1989 1989
1990 md->disk = alloc_disk(1); 1990 md->disk = alloc_disk(1);
1991 if (!md->disk) 1991 if (!md->disk)
1992 goto bad_disk; 1992 goto bad_disk;
1993 1993
1994 atomic_set(&md->pending[0], 0); 1994 atomic_set(&md->pending[0], 0);
1995 atomic_set(&md->pending[1], 0); 1995 atomic_set(&md->pending[1], 0);
1996 init_waitqueue_head(&md->wait); 1996 init_waitqueue_head(&md->wait);
1997 INIT_WORK(&md->work, dm_wq_work); 1997 INIT_WORK(&md->work, dm_wq_work);
1998 INIT_WORK(&md->barrier_work, dm_rq_barrier_work); 1998 INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1999 init_waitqueue_head(&md->eventq); 1999 init_waitqueue_head(&md->eventq);
2000 2000
2001 md->disk->major = _major; 2001 md->disk->major = _major;
2002 md->disk->first_minor = minor; 2002 md->disk->first_minor = minor;
2003 md->disk->fops = &dm_blk_dops; 2003 md->disk->fops = &dm_blk_dops;
2004 md->disk->queue = md->queue; 2004 md->disk->queue = md->queue;
2005 md->disk->private_data = md; 2005 md->disk->private_data = md;
2006 sprintf(md->disk->disk_name, "dm-%d", minor); 2006 sprintf(md->disk->disk_name, "dm-%d", minor);
2007 add_disk(md->disk); 2007 add_disk(md->disk);
2008 format_dev_t(md->name, MKDEV(_major, minor)); 2008 format_dev_t(md->name, MKDEV(_major, minor));
2009 2009
2010 md->wq = create_singlethread_workqueue("kdmflush"); 2010 md->wq = create_singlethread_workqueue("kdmflush");
2011 if (!md->wq) 2011 if (!md->wq)
2012 goto bad_thread; 2012 goto bad_thread;
2013 2013
2014 md->bdev = bdget_disk(md->disk, 0); 2014 md->bdev = bdget_disk(md->disk, 0);
2015 if (!md->bdev) 2015 if (!md->bdev)
2016 goto bad_bdev; 2016 goto bad_bdev;
2017 2017
2018 /* Populate the mapping, nobody knows we exist yet */ 2018 /* Populate the mapping, nobody knows we exist yet */
2019 spin_lock(&_minor_lock); 2019 spin_lock(&_minor_lock);
2020 old_md = idr_replace(&_minor_idr, md, minor); 2020 old_md = idr_replace(&_minor_idr, md, minor);
2021 spin_unlock(&_minor_lock); 2021 spin_unlock(&_minor_lock);
2022 2022
2023 BUG_ON(old_md != MINOR_ALLOCED); 2023 BUG_ON(old_md != MINOR_ALLOCED);
2024 2024
2025 return md; 2025 return md;
2026 2026
2027 bad_bdev: 2027 bad_bdev:
2028 destroy_workqueue(md->wq); 2028 destroy_workqueue(md->wq);
2029 bad_thread: 2029 bad_thread:
2030 del_gendisk(md->disk); 2030 del_gendisk(md->disk);
2031 put_disk(md->disk); 2031 put_disk(md->disk);
2032 bad_disk: 2032 bad_disk:
2033 blk_cleanup_queue(md->queue); 2033 blk_cleanup_queue(md->queue);
2034 bad_queue: 2034 bad_queue:
2035 free_minor(minor); 2035 free_minor(minor);
2036 bad_minor: 2036 bad_minor:
2037 module_put(THIS_MODULE); 2037 module_put(THIS_MODULE);
2038 bad_module_get: 2038 bad_module_get:
2039 kfree(md); 2039 kfree(md);
2040 return NULL; 2040 return NULL;
2041 } 2041 }
2042 2042
2043 static void unlock_fs(struct mapped_device *md); 2043 static void unlock_fs(struct mapped_device *md);
2044 2044
2045 static void free_dev(struct mapped_device *md) 2045 static void free_dev(struct mapped_device *md)
2046 { 2046 {
2047 int minor = MINOR(disk_devt(md->disk)); 2047 int minor = MINOR(disk_devt(md->disk));
2048 2048
2049 unlock_fs(md); 2049 unlock_fs(md);
2050 bdput(md->bdev); 2050 bdput(md->bdev);
2051 destroy_workqueue(md->wq); 2051 destroy_workqueue(md->wq);
2052 if (md->tio_pool) 2052 if (md->tio_pool)
2053 mempool_destroy(md->tio_pool); 2053 mempool_destroy(md->tio_pool);
2054 if (md->io_pool) 2054 if (md->io_pool)
2055 mempool_destroy(md->io_pool); 2055 mempool_destroy(md->io_pool);
2056 if (md->bs) 2056 if (md->bs)
2057 bioset_free(md->bs); 2057 bioset_free(md->bs);
2058 blk_integrity_unregister(md->disk); 2058 blk_integrity_unregister(md->disk);
2059 del_gendisk(md->disk); 2059 del_gendisk(md->disk);
2060 free_minor(minor); 2060 free_minor(minor);
2061 2061
2062 spin_lock(&_minor_lock); 2062 spin_lock(&_minor_lock);
2063 md->disk->private_data = NULL; 2063 md->disk->private_data = NULL;
2064 spin_unlock(&_minor_lock); 2064 spin_unlock(&_minor_lock);
2065 2065
2066 put_disk(md->disk); 2066 put_disk(md->disk);
2067 blk_cleanup_queue(md->queue); 2067 blk_cleanup_queue(md->queue);
2068 module_put(THIS_MODULE); 2068 module_put(THIS_MODULE);
2069 kfree(md); 2069 kfree(md);
2070 } 2070 }
2071 2071
2072 static void __bind_mempools(struct mapped_device *md, struct dm_table *t) 2072 static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2073 { 2073 {
2074 struct dm_md_mempools *p; 2074 struct dm_md_mempools *p;
2075 2075
2076 if (md->io_pool && md->tio_pool && md->bs) 2076 if (md->io_pool && md->tio_pool && md->bs)
2077 /* the md already has necessary mempools */ 2077 /* the md already has necessary mempools */
2078 goto out; 2078 goto out;
2079 2079
2080 p = dm_table_get_md_mempools(t); 2080 p = dm_table_get_md_mempools(t);
2081 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); 2081 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
2082 2082
2083 md->io_pool = p->io_pool; 2083 md->io_pool = p->io_pool;
2084 p->io_pool = NULL; 2084 p->io_pool = NULL;
2085 md->tio_pool = p->tio_pool; 2085 md->tio_pool = p->tio_pool;
2086 p->tio_pool = NULL; 2086 p->tio_pool = NULL;
2087 md->bs = p->bs; 2087 md->bs = p->bs;
2088 p->bs = NULL; 2088 p->bs = NULL;
2089 2089
2090 out: 2090 out:
2091 /* mempool bind completed, now no need any mempools in the table */ 2091 /* mempool bind completed, now no need any mempools in the table */
2092 dm_table_free_md_mempools(t); 2092 dm_table_free_md_mempools(t);
2093 } 2093 }
2094 2094
2095 /* 2095 /*
2096 * Bind a table to the device. 2096 * Bind a table to the device.
2097 */ 2097 */
2098 static void event_callback(void *context) 2098 static void event_callback(void *context)
2099 { 2099 {
2100 unsigned long flags; 2100 unsigned long flags;
2101 LIST_HEAD(uevents); 2101 LIST_HEAD(uevents);
2102 struct mapped_device *md = (struct mapped_device *) context; 2102 struct mapped_device *md = (struct mapped_device *) context;
2103 2103
2104 spin_lock_irqsave(&md->uevent_lock, flags); 2104 spin_lock_irqsave(&md->uevent_lock, flags);
2105 list_splice_init(&md->uevent_list, &uevents); 2105 list_splice_init(&md->uevent_list, &uevents);
2106 spin_unlock_irqrestore(&md->uevent_lock, flags); 2106 spin_unlock_irqrestore(&md->uevent_lock, flags);
2107 2107
2108 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 2108 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
2109 2109
2110 atomic_inc(&md->event_nr); 2110 atomic_inc(&md->event_nr);
2111 wake_up(&md->eventq); 2111 wake_up(&md->eventq);
2112 } 2112 }
2113 2113
2114 static void __set_size(struct mapped_device *md, sector_t size) 2114 static void __set_size(struct mapped_device *md, sector_t size)
2115 { 2115 {
2116 set_capacity(md->disk, size); 2116 set_capacity(md->disk, size);
2117 2117
2118 mutex_lock(&md->bdev->bd_inode->i_mutex); 2118 mutex_lock(&md->bdev->bd_inode->i_mutex);
2119 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 2119 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
2120 mutex_unlock(&md->bdev->bd_inode->i_mutex); 2120 mutex_unlock(&md->bdev->bd_inode->i_mutex);
2121 } 2121 }
2122 2122
2123 /* 2123 /*
2124 * Returns old map, which caller must destroy. 2124 * Returns old map, which caller must destroy.
2125 */ 2125 */
2126 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 2126 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2127 struct queue_limits *limits) 2127 struct queue_limits *limits)
2128 { 2128 {
2129 struct dm_table *old_map; 2129 struct dm_table *old_map;
2130 struct request_queue *q = md->queue; 2130 struct request_queue *q = md->queue;
2131 sector_t size; 2131 sector_t size;
2132 unsigned long flags; 2132 unsigned long flags;
2133 2133
2134 size = dm_table_get_size(t); 2134 size = dm_table_get_size(t);
2135 2135
2136 /* 2136 /*
2137 * Wipe any geometry if the size of the table changed. 2137 * Wipe any geometry if the size of the table changed.
2138 */ 2138 */
2139 if (size != get_capacity(md->disk)) 2139 if (size != get_capacity(md->disk))
2140 memset(&md->geometry, 0, sizeof(md->geometry)); 2140 memset(&md->geometry, 0, sizeof(md->geometry));
2141 2141
2142 __set_size(md, size); 2142 __set_size(md, size);
2143 2143
2144 dm_table_event_callback(t, event_callback, md); 2144 dm_table_event_callback(t, event_callback, md);
2145 2145
2146 /* 2146 /*
2147 * The queue hasn't been stopped yet, if the old table type wasn't 2147 * The queue hasn't been stopped yet, if the old table type wasn't
2148 * for request-based during suspension. So stop it to prevent 2148 * for request-based during suspension. So stop it to prevent
2149 * I/O mapping before resume. 2149 * I/O mapping before resume.
2150 * This must be done before setting the queue restrictions, 2150 * This must be done before setting the queue restrictions,
2151 * because request-based dm may be run just after the setting. 2151 * because request-based dm may be run just after the setting.
2152 */ 2152 */
2153 if (dm_table_request_based(t) && !blk_queue_stopped(q)) 2153 if (dm_table_request_based(t) && !blk_queue_stopped(q))
2154 stop_queue(q); 2154 stop_queue(q);
2155 2155
2156 __bind_mempools(md, t); 2156 __bind_mempools(md, t);
2157 2157
2158 write_lock_irqsave(&md->map_lock, flags); 2158 write_lock_irqsave(&md->map_lock, flags);
2159 old_map = md->map; 2159 old_map = md->map;
2160 md->map = t; 2160 md->map = t;
2161 dm_table_set_restrictions(t, q, limits); 2161 dm_table_set_restrictions(t, q, limits);
2162 write_unlock_irqrestore(&md->map_lock, flags); 2162 write_unlock_irqrestore(&md->map_lock, flags);
2163 2163
2164 return old_map; 2164 return old_map;
2165 } 2165 }
2166 2166
2167 /* 2167 /*
2168 * Returns unbound table for the caller to free. 2168 * Returns unbound table for the caller to free.
2169 */ 2169 */
2170 static struct dm_table *__unbind(struct mapped_device *md) 2170 static struct dm_table *__unbind(struct mapped_device *md)
2171 { 2171 {
2172 struct dm_table *map = md->map; 2172 struct dm_table *map = md->map;
2173 unsigned long flags; 2173 unsigned long flags;
2174 2174
2175 if (!map) 2175 if (!map)
2176 return NULL; 2176 return NULL;
2177 2177
2178 dm_table_event_callback(map, NULL, NULL); 2178 dm_table_event_callback(map, NULL, NULL);
2179 write_lock_irqsave(&md->map_lock, flags); 2179 write_lock_irqsave(&md->map_lock, flags);
2180 md->map = NULL; 2180 md->map = NULL;
2181 write_unlock_irqrestore(&md->map_lock, flags); 2181 write_unlock_irqrestore(&md->map_lock, flags);
2182 2182
2183 return map; 2183 return map;
2184 } 2184 }
2185 2185
2186 /* 2186 /*
2187 * Constructor for a new device. 2187 * Constructor for a new device.
2188 */ 2188 */
2189 int dm_create(int minor, struct mapped_device **result) 2189 int dm_create(int minor, struct mapped_device **result)
2190 { 2190 {
2191 struct mapped_device *md; 2191 struct mapped_device *md;
2192 2192
2193 md = alloc_dev(minor); 2193 md = alloc_dev(minor);
2194 if (!md) 2194 if (!md)
2195 return -ENXIO; 2195 return -ENXIO;
2196 2196
2197 dm_sysfs_init(md); 2197 dm_sysfs_init(md);
2198 2198
2199 *result = md; 2199 *result = md;
2200 return 0; 2200 return 0;
2201 } 2201 }
2202 2202
2203 /* 2203 /*
2204 * Functions to manage md->type. 2204 * Functions to manage md->type.
2205 * All are required to hold md->type_lock. 2205 * All are required to hold md->type_lock.
2206 */ 2206 */
2207 void dm_lock_md_type(struct mapped_device *md) 2207 void dm_lock_md_type(struct mapped_device *md)
2208 { 2208 {
2209 mutex_lock(&md->type_lock); 2209 mutex_lock(&md->type_lock);
2210 } 2210 }
2211 2211
2212 void dm_unlock_md_type(struct mapped_device *md) 2212 void dm_unlock_md_type(struct mapped_device *md)
2213 { 2213 {
2214 mutex_unlock(&md->type_lock); 2214 mutex_unlock(&md->type_lock);
2215 } 2215 }
2216 2216
2217 void dm_set_md_type(struct mapped_device *md, unsigned type) 2217 void dm_set_md_type(struct mapped_device *md, unsigned type)
2218 { 2218 {
2219 md->type = type; 2219 md->type = type;
2220 } 2220 }
2221 2221
2222 unsigned dm_get_md_type(struct mapped_device *md) 2222 unsigned dm_get_md_type(struct mapped_device *md)
2223 { 2223 {
2224 return md->type; 2224 return md->type;
2225 } 2225 }
2226 2226
2227 /* 2227 /*
2228 * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 2228 * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
2229 */ 2229 */
2230 static int dm_init_request_based_queue(struct mapped_device *md) 2230 static int dm_init_request_based_queue(struct mapped_device *md)
2231 { 2231 {
2232 struct request_queue *q = NULL; 2232 struct request_queue *q = NULL;
2233 2233
2234 if (md->queue->elevator) 2234 if (md->queue->elevator)
2235 return 1; 2235 return 1;
2236 2236
2237 /* Fully initialize the queue */ 2237 /* Fully initialize the queue */
2238 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); 2238 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
2239 if (!q) 2239 if (!q)
2240 return 0; 2240 return 0;
2241 2241
2242 md->queue = q; 2242 md->queue = q;
2243 md->saved_make_request_fn = md->queue->make_request_fn; 2243 md->saved_make_request_fn = md->queue->make_request_fn;
2244 dm_init_md_queue(md); 2244 dm_init_md_queue(md);
2245 blk_queue_softirq_done(md->queue, dm_softirq_done); 2245 blk_queue_softirq_done(md->queue, dm_softirq_done);
2246 blk_queue_prep_rq(md->queue, dm_prep_fn); 2246 blk_queue_prep_rq(md->queue, dm_prep_fn);
2247 blk_queue_lld_busy(md->queue, dm_lld_busy); 2247 blk_queue_lld_busy(md->queue, dm_lld_busy);
2248 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); 2248 blk_queue_flush(md->queue, REQ_FLUSH);
2249 2249
2250 elv_register_queue(md->queue); 2250 elv_register_queue(md->queue);
2251 2251
2252 return 1; 2252 return 1;
2253 } 2253 }
2254 2254
2255 /* 2255 /*
2256 * Setup the DM device's queue based on md's type 2256 * Setup the DM device's queue based on md's type
2257 */ 2257 */
2258 int dm_setup_md_queue(struct mapped_device *md) 2258 int dm_setup_md_queue(struct mapped_device *md)
2259 { 2259 {
2260 if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && 2260 if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
2261 !dm_init_request_based_queue(md)) { 2261 !dm_init_request_based_queue(md)) {
2262 DMWARN("Cannot initialize queue for request-based mapped device"); 2262 DMWARN("Cannot initialize queue for request-based mapped device");
2263 return -EINVAL; 2263 return -EINVAL;
2264 } 2264 }
2265 2265
2266 return 0; 2266 return 0;
2267 } 2267 }
2268 2268
2269 static struct mapped_device *dm_find_md(dev_t dev) 2269 static struct mapped_device *dm_find_md(dev_t dev)
2270 { 2270 {
2271 struct mapped_device *md; 2271 struct mapped_device *md;
2272 unsigned minor = MINOR(dev); 2272 unsigned minor = MINOR(dev);
2273 2273
2274 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 2274 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2275 return NULL; 2275 return NULL;
2276 2276
2277 spin_lock(&_minor_lock); 2277 spin_lock(&_minor_lock);
2278 2278
2279 md = idr_find(&_minor_idr, minor); 2279 md = idr_find(&_minor_idr, minor);
2280 if (md && (md == MINOR_ALLOCED || 2280 if (md && (md == MINOR_ALLOCED ||
2281 (MINOR(disk_devt(dm_disk(md))) != minor) || 2281 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2282 dm_deleting_md(md) || 2282 dm_deleting_md(md) ||
2283 test_bit(DMF_FREEING, &md->flags))) { 2283 test_bit(DMF_FREEING, &md->flags))) {
2284 md = NULL; 2284 md = NULL;
2285 goto out; 2285 goto out;
2286 } 2286 }
2287 2287
2288 out: 2288 out:
2289 spin_unlock(&_minor_lock); 2289 spin_unlock(&_minor_lock);
2290 2290
2291 return md; 2291 return md;
2292 } 2292 }
2293 2293
2294 struct mapped_device *dm_get_md(dev_t dev) 2294 struct mapped_device *dm_get_md(dev_t dev)
2295 { 2295 {
2296 struct mapped_device *md = dm_find_md(dev); 2296 struct mapped_device *md = dm_find_md(dev);
2297 2297
2298 if (md) 2298 if (md)
2299 dm_get(md); 2299 dm_get(md);
2300 2300
2301 return md; 2301 return md;
2302 } 2302 }
2303 2303
2304 void *dm_get_mdptr(struct mapped_device *md) 2304 void *dm_get_mdptr(struct mapped_device *md)
2305 { 2305 {
2306 return md->interface_ptr; 2306 return md->interface_ptr;
2307 } 2307 }
2308 2308
2309 void dm_set_mdptr(struct mapped_device *md, void *ptr) 2309 void dm_set_mdptr(struct mapped_device *md, void *ptr)
2310 { 2310 {
2311 md->interface_ptr = ptr; 2311 md->interface_ptr = ptr;
2312 } 2312 }
2313 2313
2314 void dm_get(struct mapped_device *md) 2314 void dm_get(struct mapped_device *md)
2315 { 2315 {
2316 atomic_inc(&md->holders); 2316 atomic_inc(&md->holders);
2317 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 2317 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2318 } 2318 }
2319 2319
2320 const char *dm_device_name(struct mapped_device *md) 2320 const char *dm_device_name(struct mapped_device *md)
2321 { 2321 {
2322 return md->name; 2322 return md->name;
2323 } 2323 }
2324 EXPORT_SYMBOL_GPL(dm_device_name); 2324 EXPORT_SYMBOL_GPL(dm_device_name);
2325 2325
2326 static void __dm_destroy(struct mapped_device *md, bool wait) 2326 static void __dm_destroy(struct mapped_device *md, bool wait)
2327 { 2327 {
2328 struct dm_table *map; 2328 struct dm_table *map;
2329 2329
2330 might_sleep(); 2330 might_sleep();
2331 2331
2332 spin_lock(&_minor_lock); 2332 spin_lock(&_minor_lock);
2333 map = dm_get_live_table(md); 2333 map = dm_get_live_table(md);
2334 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 2334 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2335 set_bit(DMF_FREEING, &md->flags); 2335 set_bit(DMF_FREEING, &md->flags);
2336 spin_unlock(&_minor_lock); 2336 spin_unlock(&_minor_lock);
2337 2337
2338 if (!dm_suspended_md(md)) { 2338 if (!dm_suspended_md(md)) {
2339 dm_table_presuspend_targets(map); 2339 dm_table_presuspend_targets(map);
2340 dm_table_postsuspend_targets(map); 2340 dm_table_postsuspend_targets(map);
2341 } 2341 }
2342 2342
2343 /* 2343 /*
2344 * Rare, but there may be I/O requests still going to complete, 2344 * Rare, but there may be I/O requests still going to complete,
2345 * for example. Wait for all references to disappear. 2345 * for example. Wait for all references to disappear.
2346 * No one should increment the reference count of the mapped_device, 2346 * No one should increment the reference count of the mapped_device,
2347 * after the mapped_device state becomes DMF_FREEING. 2347 * after the mapped_device state becomes DMF_FREEING.
2348 */ 2348 */
2349 if (wait) 2349 if (wait)
2350 while (atomic_read(&md->holders)) 2350 while (atomic_read(&md->holders))
2351 msleep(1); 2351 msleep(1);
2352 else if (atomic_read(&md->holders)) 2352 else if (atomic_read(&md->holders))
2353 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", 2353 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2354 dm_device_name(md), atomic_read(&md->holders)); 2354 dm_device_name(md), atomic_read(&md->holders));
2355 2355
2356 dm_sysfs_exit(md); 2356 dm_sysfs_exit(md);
2357 dm_table_put(map); 2357 dm_table_put(map);
2358 dm_table_destroy(__unbind(md)); 2358 dm_table_destroy(__unbind(md));
2359 free_dev(md); 2359 free_dev(md);
2360 } 2360 }
2361 2361
2362 void dm_destroy(struct mapped_device *md) 2362 void dm_destroy(struct mapped_device *md)
2363 { 2363 {
2364 __dm_destroy(md, true); 2364 __dm_destroy(md, true);
2365 } 2365 }
2366 2366
2367 void dm_destroy_immediate(struct mapped_device *md) 2367 void dm_destroy_immediate(struct mapped_device *md)
2368 { 2368 {
2369 __dm_destroy(md, false); 2369 __dm_destroy(md, false);
2370 } 2370 }
2371 2371
2372 void dm_put(struct mapped_device *md) 2372 void dm_put(struct mapped_device *md)
2373 { 2373 {
2374 atomic_dec(&md->holders); 2374 atomic_dec(&md->holders);
2375 } 2375 }
2376 EXPORT_SYMBOL_GPL(dm_put); 2376 EXPORT_SYMBOL_GPL(dm_put);
2377 2377
2378 static int dm_wait_for_completion(struct mapped_device *md, int interruptible) 2378 static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2379 { 2379 {
2380 int r = 0; 2380 int r = 0;
2381 DECLARE_WAITQUEUE(wait, current); 2381 DECLARE_WAITQUEUE(wait, current);
2382 2382
2383 dm_unplug_all(md->queue); 2383 dm_unplug_all(md->queue);
2384 2384
2385 add_wait_queue(&md->wait, &wait); 2385 add_wait_queue(&md->wait, &wait);
2386 2386
2387 while (1) { 2387 while (1) {
2388 set_current_state(interruptible); 2388 set_current_state(interruptible);
2389 2389
2390 smp_mb(); 2390 smp_mb();
2391 if (!md_in_flight(md)) 2391 if (!md_in_flight(md))
2392 break; 2392 break;
2393 2393
2394 if (interruptible == TASK_INTERRUPTIBLE && 2394 if (interruptible == TASK_INTERRUPTIBLE &&
2395 signal_pending(current)) { 2395 signal_pending(current)) {
2396 r = -EINTR; 2396 r = -EINTR;
2397 break; 2397 break;
2398 } 2398 }
2399 2399
2400 io_schedule(); 2400 io_schedule();
2401 } 2401 }
2402 set_current_state(TASK_RUNNING); 2402 set_current_state(TASK_RUNNING);
2403 2403
2404 remove_wait_queue(&md->wait, &wait); 2404 remove_wait_queue(&md->wait, &wait);
2405 2405
2406 return r; 2406 return r;
2407 } 2407 }
2408 2408
2409 static void dm_flush(struct mapped_device *md) 2409 static void dm_flush(struct mapped_device *md)
2410 { 2410 {
2411 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2411 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2412 2412
2413 bio_init(&md->barrier_bio); 2413 bio_init(&md->barrier_bio);
2414 md->barrier_bio.bi_bdev = md->bdev; 2414 md->barrier_bio.bi_bdev = md->bdev;
2415 md->barrier_bio.bi_rw = WRITE_BARRIER; 2415 md->barrier_bio.bi_rw = WRITE_BARRIER;
2416 __split_and_process_bio(md, &md->barrier_bio); 2416 __split_and_process_bio(md, &md->barrier_bio);
2417 2417
2418 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2418 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2419 } 2419 }
2420 2420
2421 static void process_barrier(struct mapped_device *md, struct bio *bio) 2421 static void process_barrier(struct mapped_device *md, struct bio *bio)
2422 { 2422 {
2423 md->barrier_error = 0; 2423 md->barrier_error = 0;
2424 2424
2425 dm_flush(md); 2425 dm_flush(md);
2426 2426
2427 if (!bio_empty_barrier(bio)) { 2427 if (!bio_empty_barrier(bio)) {
2428 __split_and_process_bio(md, bio); 2428 __split_and_process_bio(md, bio);
2429 /* 2429 /*
2430 * If the request isn't supported, don't waste time with 2430 * If the request isn't supported, don't waste time with
2431 * the second flush. 2431 * the second flush.
2432 */ 2432 */
2433 if (md->barrier_error != -EOPNOTSUPP) 2433 if (md->barrier_error != -EOPNOTSUPP)
2434 dm_flush(md); 2434 dm_flush(md);
2435 } 2435 }
2436 2436
2437 if (md->barrier_error != DM_ENDIO_REQUEUE) 2437 if (md->barrier_error != DM_ENDIO_REQUEUE)
2438 bio_endio(bio, md->barrier_error); 2438 bio_endio(bio, md->barrier_error);
2439 else { 2439 else {
2440 spin_lock_irq(&md->deferred_lock); 2440 spin_lock_irq(&md->deferred_lock);
2441 bio_list_add_head(&md->deferred, bio); 2441 bio_list_add_head(&md->deferred, bio);
2442 spin_unlock_irq(&md->deferred_lock); 2442 spin_unlock_irq(&md->deferred_lock);
2443 } 2443 }
2444 } 2444 }
2445 2445
2446 /* 2446 /*
2447 * Process the deferred bios 2447 * Process the deferred bios
2448 */ 2448 */
2449 static void dm_wq_work(struct work_struct *work) 2449 static void dm_wq_work(struct work_struct *work)
2450 { 2450 {
2451 struct mapped_device *md = container_of(work, struct mapped_device, 2451 struct mapped_device *md = container_of(work, struct mapped_device,
2452 work); 2452 work);
2453 struct bio *c; 2453 struct bio *c;
2454 2454
2455 down_write(&md->io_lock); 2455 down_write(&md->io_lock);
2456 2456
2457 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2457 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2458 spin_lock_irq(&md->deferred_lock); 2458 spin_lock_irq(&md->deferred_lock);
2459 c = bio_list_pop(&md->deferred); 2459 c = bio_list_pop(&md->deferred);
2460 spin_unlock_irq(&md->deferred_lock); 2460 spin_unlock_irq(&md->deferred_lock);
2461 2461
2462 if (!c) { 2462 if (!c) {
2463 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 2463 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2464 break; 2464 break;
2465 } 2465 }
2466 2466
2467 up_write(&md->io_lock); 2467 up_write(&md->io_lock);
2468 2468
2469 if (dm_request_based(md)) 2469 if (dm_request_based(md))
2470 generic_make_request(c); 2470 generic_make_request(c);
2471 else { 2471 else {
2472 if (c->bi_rw & REQ_HARDBARRIER) 2472 if (c->bi_rw & REQ_HARDBARRIER)
2473 process_barrier(md, c); 2473 process_barrier(md, c);
2474 else 2474 else
2475 __split_and_process_bio(md, c); 2475 __split_and_process_bio(md, c);
2476 } 2476 }
2477 2477
2478 down_write(&md->io_lock); 2478 down_write(&md->io_lock);
2479 } 2479 }
2480 2480
2481 up_write(&md->io_lock); 2481 up_write(&md->io_lock);
2482 } 2482 }
2483 2483
2484 static void dm_queue_flush(struct mapped_device *md) 2484 static void dm_queue_flush(struct mapped_device *md)
2485 { 2485 {
2486 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2486 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2487 smp_mb__after_clear_bit(); 2487 smp_mb__after_clear_bit();
2488 queue_work(md->wq, &md->work); 2488 queue_work(md->wq, &md->work);
2489 } 2489 }
2490 2490
2491 static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) 2491 static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
2492 { 2492 {
2493 struct dm_rq_target_io *tio = clone->end_io_data; 2493 struct dm_rq_target_io *tio = clone->end_io_data;
2494 2494
2495 tio->info.target_request_nr = request_nr; 2495 tio->info.target_request_nr = request_nr;
2496 } 2496 }
2497 2497
2498 /* Issue barrier requests to targets and wait for their completion. */ 2498 /* Issue barrier requests to targets and wait for their completion. */
2499 static int dm_rq_barrier(struct mapped_device *md) 2499 static int dm_rq_barrier(struct mapped_device *md)
2500 { 2500 {
2501 int i, j; 2501 int i, j;
2502 struct dm_table *map = dm_get_live_table(md); 2502 struct dm_table *map = dm_get_live_table(md);
2503 unsigned num_targets = dm_table_get_num_targets(map); 2503 unsigned num_targets = dm_table_get_num_targets(map);
2504 struct dm_target *ti; 2504 struct dm_target *ti;
2505 struct request *clone; 2505 struct request *clone;
2506 2506
2507 md->barrier_error = 0; 2507 md->barrier_error = 0;
2508 2508
2509 for (i = 0; i < num_targets; i++) { 2509 for (i = 0; i < num_targets; i++) {
2510 ti = dm_table_get_target(map, i); 2510 ti = dm_table_get_target(map, i);
2511 for (j = 0; j < ti->num_flush_requests; j++) { 2511 for (j = 0; j < ti->num_flush_requests; j++) {
2512 clone = clone_rq(md->flush_request, md, GFP_NOIO); 2512 clone = clone_rq(md->flush_request, md, GFP_NOIO);
2513 dm_rq_set_target_request_nr(clone, j); 2513 dm_rq_set_target_request_nr(clone, j);
2514 atomic_inc(&md->pending[rq_data_dir(clone)]); 2514 atomic_inc(&md->pending[rq_data_dir(clone)]);
2515 map_request(ti, clone, md); 2515 map_request(ti, clone, md);
2516 } 2516 }
2517 } 2517 }
2518 2518
2519 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2519 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2520 dm_table_put(map); 2520 dm_table_put(map);
2521 2521
2522 return md->barrier_error; 2522 return md->barrier_error;
2523 } 2523 }
2524 2524
2525 static void dm_rq_barrier_work(struct work_struct *work) 2525 static void dm_rq_barrier_work(struct work_struct *work)
2526 { 2526 {
2527 int error; 2527 int error;
2528 struct mapped_device *md = container_of(work, struct mapped_device, 2528 struct mapped_device *md = container_of(work, struct mapped_device,
2529 barrier_work); 2529 barrier_work);
2530 struct request_queue *q = md->queue; 2530 struct request_queue *q = md->queue;
2531 struct request *rq; 2531 struct request *rq;
2532 unsigned long flags; 2532 unsigned long flags;
2533 2533
2534 /* 2534 /*
2535 * Hold the md reference here and leave it at the last part so that 2535 * Hold the md reference here and leave it at the last part so that
2536 * the md can't be deleted by device opener when the barrier request 2536 * the md can't be deleted by device opener when the barrier request
2537 * completes. 2537 * completes.
2538 */ 2538 */
2539 dm_get(md); 2539 dm_get(md);
2540 2540
2541 error = dm_rq_barrier(md); 2541 error = dm_rq_barrier(md);
2542 2542
2543 rq = md->flush_request; 2543 rq = md->flush_request;
2544 md->flush_request = NULL; 2544 md->flush_request = NULL;
2545 2545
2546 if (error == DM_ENDIO_REQUEUE) { 2546 if (error == DM_ENDIO_REQUEUE) {
2547 spin_lock_irqsave(q->queue_lock, flags); 2547 spin_lock_irqsave(q->queue_lock, flags);
2548 blk_requeue_request(q, rq); 2548 blk_requeue_request(q, rq);
2549 spin_unlock_irqrestore(q->queue_lock, flags); 2549 spin_unlock_irqrestore(q->queue_lock, flags);
2550 } else 2550 } else
2551 blk_end_request_all(rq, error); 2551 blk_end_request_all(rq, error);
2552 2552
2553 blk_run_queue(q); 2553 blk_run_queue(q);
2554 2554
2555 dm_put(md); 2555 dm_put(md);
2556 } 2556 }
2557 2557
2558 /* 2558 /*
2559 * Swap in a new table, returning the old one for the caller to destroy. 2559 * Swap in a new table, returning the old one for the caller to destroy.
2560 */ 2560 */
2561 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 2561 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2562 { 2562 {
2563 struct dm_table *map = ERR_PTR(-EINVAL); 2563 struct dm_table *map = ERR_PTR(-EINVAL);
2564 struct queue_limits limits; 2564 struct queue_limits limits;
2565 int r; 2565 int r;
2566 2566
2567 mutex_lock(&md->suspend_lock); 2567 mutex_lock(&md->suspend_lock);
2568 2568
2569 /* device must be suspended */ 2569 /* device must be suspended */
2570 if (!dm_suspended_md(md)) 2570 if (!dm_suspended_md(md))
2571 goto out; 2571 goto out;
2572 2572
2573 r = dm_calculate_queue_limits(table, &limits); 2573 r = dm_calculate_queue_limits(table, &limits);
2574 if (r) { 2574 if (r) {
2575 map = ERR_PTR(r); 2575 map = ERR_PTR(r);
2576 goto out; 2576 goto out;
2577 } 2577 }
2578 2578
2579 map = __bind(md, table, &limits); 2579 map = __bind(md, table, &limits);
2580 2580
2581 out: 2581 out:
2582 mutex_unlock(&md->suspend_lock); 2582 mutex_unlock(&md->suspend_lock);
2583 return map; 2583 return map;
2584 } 2584 }
2585 2585
2586 /* 2586 /*
2587 * Functions to lock and unlock any filesystem running on the 2587 * Functions to lock and unlock any filesystem running on the
2588 * device. 2588 * device.
2589 */ 2589 */
2590 static int lock_fs(struct mapped_device *md) 2590 static int lock_fs(struct mapped_device *md)
2591 { 2591 {
2592 int r; 2592 int r;
2593 2593
2594 WARN_ON(md->frozen_sb); 2594 WARN_ON(md->frozen_sb);
2595 2595
2596 md->frozen_sb = freeze_bdev(md->bdev); 2596 md->frozen_sb = freeze_bdev(md->bdev);
2597 if (IS_ERR(md->frozen_sb)) { 2597 if (IS_ERR(md->frozen_sb)) {
2598 r = PTR_ERR(md->frozen_sb); 2598 r = PTR_ERR(md->frozen_sb);
2599 md->frozen_sb = NULL; 2599 md->frozen_sb = NULL;
2600 return r; 2600 return r;
2601 } 2601 }
2602 2602
2603 set_bit(DMF_FROZEN, &md->flags); 2603 set_bit(DMF_FROZEN, &md->flags);
2604 2604
2605 return 0; 2605 return 0;
2606 } 2606 }
2607 2607
2608 static void unlock_fs(struct mapped_device *md) 2608 static void unlock_fs(struct mapped_device *md)
2609 { 2609 {
2610 if (!test_bit(DMF_FROZEN, &md->flags)) 2610 if (!test_bit(DMF_FROZEN, &md->flags))
2611 return; 2611 return;
2612 2612
2613 thaw_bdev(md->bdev, md->frozen_sb); 2613 thaw_bdev(md->bdev, md->frozen_sb);
2614 md->frozen_sb = NULL; 2614 md->frozen_sb = NULL;
2615 clear_bit(DMF_FROZEN, &md->flags); 2615 clear_bit(DMF_FROZEN, &md->flags);
2616 } 2616 }
2617 2617
2618 /* 2618 /*
2619 * We need to be able to change a mapping table under a mounted 2619 * We need to be able to change a mapping table under a mounted
2620 * filesystem. For example we might want to move some data in 2620 * filesystem. For example we might want to move some data in
2621 * the background. Before the table can be swapped with 2621 * the background. Before the table can be swapped with
2622 * dm_bind_table, dm_suspend must be called to flush any in 2622 * dm_bind_table, dm_suspend must be called to flush any in
2623 * flight bios and ensure that any further io gets deferred. 2623 * flight bios and ensure that any further io gets deferred.
2624 */ 2624 */
2625 /* 2625 /*
2626 * Suspend mechanism in request-based dm. 2626 * Suspend mechanism in request-based dm.
2627 * 2627 *
2628 * 1. Flush all I/Os by lock_fs() if needed. 2628 * 1. Flush all I/Os by lock_fs() if needed.
2629 * 2. Stop dispatching any I/O by stopping the request_queue. 2629 * 2. Stop dispatching any I/O by stopping the request_queue.
2630 * 3. Wait for all in-flight I/Os to be completed or requeued. 2630 * 3. Wait for all in-flight I/Os to be completed or requeued.
2631 * 2631 *
2632 * To abort suspend, start the request_queue. 2632 * To abort suspend, start the request_queue.
2633 */ 2633 */
2634 int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2634 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2635 { 2635 {
2636 struct dm_table *map = NULL; 2636 struct dm_table *map = NULL;
2637 int r = 0; 2637 int r = 0;
2638 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 2638 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
2639 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; 2639 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
2640 2640
2641 mutex_lock(&md->suspend_lock); 2641 mutex_lock(&md->suspend_lock);
2642 2642
2643 if (dm_suspended_md(md)) { 2643 if (dm_suspended_md(md)) {
2644 r = -EINVAL; 2644 r = -EINVAL;
2645 goto out_unlock; 2645 goto out_unlock;
2646 } 2646 }
2647 2647
2648 map = dm_get_live_table(md); 2648 map = dm_get_live_table(md);
2649 2649
2650 /* 2650 /*
2651 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 2651 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
2652 * This flag is cleared before dm_suspend returns. 2652 * This flag is cleared before dm_suspend returns.
2653 */ 2653 */
2654 if (noflush) 2654 if (noflush)
2655 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 2655 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2656 2656
2657 /* This does not get reverted if there's an error later. */ 2657 /* This does not get reverted if there's an error later. */
2658 dm_table_presuspend_targets(map); 2658 dm_table_presuspend_targets(map);
2659 2659
2660 /* 2660 /*
2661 * Flush I/O to the device. 2661 * Flush I/O to the device.
2662 * Any I/O submitted after lock_fs() may not be flushed. 2662 * Any I/O submitted after lock_fs() may not be flushed.
2663 * noflush takes precedence over do_lockfs. 2663 * noflush takes precedence over do_lockfs.
2664 * (lock_fs() flushes I/Os and waits for them to complete.) 2664 * (lock_fs() flushes I/Os and waits for them to complete.)
2665 */ 2665 */
2666 if (!noflush && do_lockfs) { 2666 if (!noflush && do_lockfs) {
2667 r = lock_fs(md); 2667 r = lock_fs(md);
2668 if (r) 2668 if (r)
2669 goto out; 2669 goto out;
2670 } 2670 }
2671 2671
2672 /* 2672 /*
2673 * Here we must make sure that no processes are submitting requests 2673 * Here we must make sure that no processes are submitting requests
2674 * to target drivers i.e. no one may be executing 2674 * to target drivers i.e. no one may be executing
2675 * __split_and_process_bio. This is called from dm_request and 2675 * __split_and_process_bio. This is called from dm_request and
2676 * dm_wq_work. 2676 * dm_wq_work.
2677 * 2677 *
2678 * To get all processes out of __split_and_process_bio in dm_request, 2678 * To get all processes out of __split_and_process_bio in dm_request,
2679 * we take the write lock. To prevent any process from reentering 2679 * we take the write lock. To prevent any process from reentering
2680 * __split_and_process_bio from dm_request, we set 2680 * __split_and_process_bio from dm_request, we set
2681 * DMF_QUEUE_IO_TO_THREAD. 2681 * DMF_QUEUE_IO_TO_THREAD.
2682 * 2682 *
2683 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND 2683 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
2684 * and call flush_workqueue(md->wq). flush_workqueue will wait until 2684 * and call flush_workqueue(md->wq). flush_workqueue will wait until
2685 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any 2685 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
2686 * further calls to __split_and_process_bio from dm_wq_work. 2686 * further calls to __split_and_process_bio from dm_wq_work.
2687 */ 2687 */
2688 down_write(&md->io_lock); 2688 down_write(&md->io_lock);
2689 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2689 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2690 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 2690 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2691 up_write(&md->io_lock); 2691 up_write(&md->io_lock);
2692 2692
2693 /* 2693 /*
2694 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which 2694 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
2695 * can be kicked until md->queue is stopped. So stop md->queue before 2695 * can be kicked until md->queue is stopped. So stop md->queue before
2696 * flushing md->wq. 2696 * flushing md->wq.
2697 */ 2697 */
2698 if (dm_request_based(md)) 2698 if (dm_request_based(md))
2699 stop_queue(md->queue); 2699 stop_queue(md->queue);
2700 2700
2701 flush_workqueue(md->wq); 2701 flush_workqueue(md->wq);
2702 2702
2703 /* 2703 /*
2704 * At this point no more requests are entering target request routines. 2704 * At this point no more requests are entering target request routines.
2705 * We call dm_wait_for_completion to wait for all existing requests 2705 * We call dm_wait_for_completion to wait for all existing requests
2706 * to finish. 2706 * to finish.
2707 */ 2707 */
2708 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); 2708 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
2709 2709
2710 down_write(&md->io_lock); 2710 down_write(&md->io_lock);
2711 if (noflush) 2711 if (noflush)
2712 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 2712 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2713 up_write(&md->io_lock); 2713 up_write(&md->io_lock);
2714 2714
2715 /* were we interrupted ? */ 2715 /* were we interrupted ? */
2716 if (r < 0) { 2716 if (r < 0) {
2717 dm_queue_flush(md); 2717 dm_queue_flush(md);
2718 2718
2719 if (dm_request_based(md)) 2719 if (dm_request_based(md))
2720 start_queue(md->queue); 2720 start_queue(md->queue);
2721 2721
2722 unlock_fs(md); 2722 unlock_fs(md);
2723 goto out; /* pushback list is already flushed, so skip flush */ 2723 goto out; /* pushback list is already flushed, so skip flush */
2724 } 2724 }
2725 2725
2726 /* 2726 /*
2727 * If dm_wait_for_completion returned 0, the device is completely 2727 * If dm_wait_for_completion returned 0, the device is completely
2728 * quiescent now. There is no request-processing activity. All new 2728 * quiescent now. There is no request-processing activity. All new
2729 * requests are being added to md->deferred list. 2729 * requests are being added to md->deferred list.
2730 */ 2730 */
2731 2731
2732 set_bit(DMF_SUSPENDED, &md->flags); 2732 set_bit(DMF_SUSPENDED, &md->flags);
2733 2733
2734 dm_table_postsuspend_targets(map); 2734 dm_table_postsuspend_targets(map);
2735 2735
2736 out: 2736 out:
2737 dm_table_put(map); 2737 dm_table_put(map);
2738 2738
2739 out_unlock: 2739 out_unlock:
2740 mutex_unlock(&md->suspend_lock); 2740 mutex_unlock(&md->suspend_lock);
2741 return r; 2741 return r;
2742 } 2742 }
2743 2743
2744 int dm_resume(struct mapped_device *md) 2744 int dm_resume(struct mapped_device *md)
2745 { 2745 {
2746 int r = -EINVAL; 2746 int r = -EINVAL;
2747 struct dm_table *map = NULL; 2747 struct dm_table *map = NULL;
2748 2748
2749 mutex_lock(&md->suspend_lock); 2749 mutex_lock(&md->suspend_lock);
2750 if (!dm_suspended_md(md)) 2750 if (!dm_suspended_md(md))
2751 goto out; 2751 goto out;
2752 2752
2753 map = dm_get_live_table(md); 2753 map = dm_get_live_table(md);
2754 if (!map || !dm_table_get_size(map)) 2754 if (!map || !dm_table_get_size(map))
2755 goto out; 2755 goto out;
2756 2756
2757 r = dm_table_resume_targets(map); 2757 r = dm_table_resume_targets(map);
2758 if (r) 2758 if (r)
2759 goto out; 2759 goto out;
2760 2760
2761 dm_queue_flush(md); 2761 dm_queue_flush(md);
2762 2762
2763 /* 2763 /*
2764 * Flushing deferred I/Os must be done after targets are resumed 2764 * Flushing deferred I/Os must be done after targets are resumed
2765 * so that mapping of targets can work correctly. 2765 * so that mapping of targets can work correctly.
2766 * Request-based dm is queueing the deferred I/Os in its request_queue. 2766 * Request-based dm is queueing the deferred I/Os in its request_queue.
2767 */ 2767 */
2768 if (dm_request_based(md)) 2768 if (dm_request_based(md))
2769 start_queue(md->queue); 2769 start_queue(md->queue);
2770 2770
2771 unlock_fs(md); 2771 unlock_fs(md);
2772 2772
2773 clear_bit(DMF_SUSPENDED, &md->flags); 2773 clear_bit(DMF_SUSPENDED, &md->flags);
2774 2774
2775 dm_table_unplug_all(map); 2775 dm_table_unplug_all(map);
2776 r = 0; 2776 r = 0;
2777 out: 2777 out:
2778 dm_table_put(map); 2778 dm_table_put(map);
2779 mutex_unlock(&md->suspend_lock); 2779 mutex_unlock(&md->suspend_lock);
2780 2780
2781 return r; 2781 return r;
2782 } 2782 }
2783 2783
2784 /*----------------------------------------------------------------- 2784 /*-----------------------------------------------------------------
2785 * Event notification. 2785 * Event notification.
2786 *---------------------------------------------------------------*/ 2786 *---------------------------------------------------------------*/
2787 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 2787 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
2788 unsigned cookie) 2788 unsigned cookie)
2789 { 2789 {
2790 char udev_cookie[DM_COOKIE_LENGTH]; 2790 char udev_cookie[DM_COOKIE_LENGTH];
2791 char *envp[] = { udev_cookie, NULL }; 2791 char *envp[] = { udev_cookie, NULL };
2792 2792
2793 if (!cookie) 2793 if (!cookie)
2794 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); 2794 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
2795 else { 2795 else {
2796 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 2796 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2797 DM_COOKIE_ENV_VAR_NAME, cookie); 2797 DM_COOKIE_ENV_VAR_NAME, cookie);
2798 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, 2798 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
2799 action, envp); 2799 action, envp);
2800 } 2800 }
2801 } 2801 }
2802 2802
2803 uint32_t dm_next_uevent_seq(struct mapped_device *md) 2803 uint32_t dm_next_uevent_seq(struct mapped_device *md)
2804 { 2804 {
2805 return atomic_add_return(1, &md->uevent_seq); 2805 return atomic_add_return(1, &md->uevent_seq);
2806 } 2806 }
2807 2807
2808 uint32_t dm_get_event_nr(struct mapped_device *md) 2808 uint32_t dm_get_event_nr(struct mapped_device *md)
2809 { 2809 {
2810 return atomic_read(&md->event_nr); 2810 return atomic_read(&md->event_nr);
2811 } 2811 }
2812 2812
2813 int dm_wait_event(struct mapped_device *md, int event_nr) 2813 int dm_wait_event(struct mapped_device *md, int event_nr)
2814 { 2814 {
2815 return wait_event_interruptible(md->eventq, 2815 return wait_event_interruptible(md->eventq,
2816 (event_nr != atomic_read(&md->event_nr))); 2816 (event_nr != atomic_read(&md->event_nr)));
2817 } 2817 }
2818 2818
2819 void dm_uevent_add(struct mapped_device *md, struct list_head *elist) 2819 void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
2820 { 2820 {
2821 unsigned long flags; 2821 unsigned long flags;
2822 2822
2823 spin_lock_irqsave(&md->uevent_lock, flags); 2823 spin_lock_irqsave(&md->uevent_lock, flags);
2824 list_add(elist, &md->uevent_list); 2824 list_add(elist, &md->uevent_list);
2825 spin_unlock_irqrestore(&md->uevent_lock, flags); 2825 spin_unlock_irqrestore(&md->uevent_lock, flags);
2826 } 2826 }
2827 2827
2828 /* 2828 /*
2829 * The gendisk is only valid as long as you have a reference 2829 * The gendisk is only valid as long as you have a reference
2830 * count on 'md'. 2830 * count on 'md'.
2831 */ 2831 */
2832 struct gendisk *dm_disk(struct mapped_device *md) 2832 struct gendisk *dm_disk(struct mapped_device *md)
2833 { 2833 {
2834 return md->disk; 2834 return md->disk;
2835 } 2835 }
2836 2836
2837 struct kobject *dm_kobject(struct mapped_device *md) 2837 struct kobject *dm_kobject(struct mapped_device *md)
2838 { 2838 {
2839 return &md->kobj; 2839 return &md->kobj;
2840 } 2840 }
2841 2841
2842 /* 2842 /*
2843 * struct mapped_device should not be exported outside of dm.c 2843 * struct mapped_device should not be exported outside of dm.c
2844 * so use this check to verify that kobj is part of md structure 2844 * so use this check to verify that kobj is part of md structure
2845 */ 2845 */
2846 struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 2846 struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2847 { 2847 {
2848 struct mapped_device *md; 2848 struct mapped_device *md;
2849 2849
2850 md = container_of(kobj, struct mapped_device, kobj); 2850 md = container_of(kobj, struct mapped_device, kobj);
2851 if (&md->kobj != kobj) 2851 if (&md->kobj != kobj)
2852 return NULL; 2852 return NULL;
2853 2853
2854 if (test_bit(DMF_FREEING, &md->flags) || 2854 if (test_bit(DMF_FREEING, &md->flags) ||
2855 dm_deleting_md(md)) 2855 dm_deleting_md(md))
2856 return NULL; 2856 return NULL;
2857 2857
2858 dm_get(md); 2858 dm_get(md);
2859 return md; 2859 return md;
2860 } 2860 }
2861 2861
2862 int dm_suspended_md(struct mapped_device *md) 2862 int dm_suspended_md(struct mapped_device *md)
2863 { 2863 {
2864 return test_bit(DMF_SUSPENDED, &md->flags); 2864 return test_bit(DMF_SUSPENDED, &md->flags);
2865 } 2865 }
2866 2866
2867 int dm_suspended(struct dm_target *ti) 2867 int dm_suspended(struct dm_target *ti)
2868 { 2868 {
2869 return dm_suspended_md(dm_table_get_md(ti->table)); 2869 return dm_suspended_md(dm_table_get_md(ti->table));
2870 } 2870 }
2871 EXPORT_SYMBOL_GPL(dm_suspended); 2871 EXPORT_SYMBOL_GPL(dm_suspended);
2872 2872
2873 int dm_noflush_suspending(struct dm_target *ti) 2873 int dm_noflush_suspending(struct dm_target *ti)
2874 { 2874 {
2875 return __noflush_suspending(dm_table_get_md(ti->table)); 2875 return __noflush_suspending(dm_table_get_md(ti->table));
2876 } 2876 }
2877 EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2877 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2878 2878
2879 struct dm_md_mempools *dm_alloc_md_mempools(unsigned type) 2879 struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
2880 { 2880 {
2881 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); 2881 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
2882 2882
2883 if (!pools) 2883 if (!pools)
2884 return NULL; 2884 return NULL;
2885 2885
2886 pools->io_pool = (type == DM_TYPE_BIO_BASED) ? 2886 pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
2887 mempool_create_slab_pool(MIN_IOS, _io_cache) : 2887 mempool_create_slab_pool(MIN_IOS, _io_cache) :
2888 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); 2888 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
2889 if (!pools->io_pool) 2889 if (!pools->io_pool)
2890 goto free_pools_and_out; 2890 goto free_pools_and_out;
2891 2891
2892 pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? 2892 pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
2893 mempool_create_slab_pool(MIN_IOS, _tio_cache) : 2893 mempool_create_slab_pool(MIN_IOS, _tio_cache) :
2894 mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); 2894 mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
2895 if (!pools->tio_pool) 2895 if (!pools->tio_pool)
2896 goto free_io_pool_and_out; 2896 goto free_io_pool_and_out;
2897 2897
2898 pools->bs = (type == DM_TYPE_BIO_BASED) ? 2898 pools->bs = (type == DM_TYPE_BIO_BASED) ?
2899 bioset_create(16, 0) : bioset_create(MIN_IOS, 0); 2899 bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
2900 if (!pools->bs) 2900 if (!pools->bs)
2901 goto free_tio_pool_and_out; 2901 goto free_tio_pool_and_out;
2902 2902
2903 return pools; 2903 return pools;
2904 2904
2905 free_tio_pool_and_out: 2905 free_tio_pool_and_out:
2906 mempool_destroy(pools->tio_pool); 2906 mempool_destroy(pools->tio_pool);
2907 2907
2908 free_io_pool_and_out: 2908 free_io_pool_and_out:
2909 mempool_destroy(pools->io_pool); 2909 mempool_destroy(pools->io_pool);
2910 2910
2911 free_pools_and_out: 2911 free_pools_and_out:
2912 kfree(pools); 2912 kfree(pools);
2913 2913
2914 return NULL; 2914 return NULL;
2915 } 2915 }
2916 2916
2917 void dm_free_md_mempools(struct dm_md_mempools *pools) 2917 void dm_free_md_mempools(struct dm_md_mempools *pools)
2918 { 2918 {
2919 if (!pools) 2919 if (!pools)
2920 return; 2920 return;
2921 2921
2922 if (pools->io_pool) 2922 if (pools->io_pool)
2923 mempool_destroy(pools->io_pool); 2923 mempool_destroy(pools->io_pool);
2924 2924
2925 if (pools->tio_pool) 2925 if (pools->tio_pool)
2926 mempool_destroy(pools->tio_pool); 2926 mempool_destroy(pools->tio_pool);
2927 2927
2928 if (pools->bs) 2928 if (pools->bs)
2929 bioset_free(pools->bs); 2929 bioset_free(pools->bs);
2930 2930
2931 kfree(pools); 2931 kfree(pools);
2932 } 2932 }
2933 2933
2934 static const struct block_device_operations dm_blk_dops = { 2934 static const struct block_device_operations dm_blk_dops = {
2935 .open = dm_blk_open, 2935 .open = dm_blk_open,
2936 .release = dm_blk_close, 2936 .release = dm_blk_close,
2937 .ioctl = dm_blk_ioctl, 2937 .ioctl = dm_blk_ioctl,
2938 .getgeo = dm_blk_getgeo, 2938 .getgeo = dm_blk_getgeo,
2939 .owner = THIS_MODULE 2939 .owner = THIS_MODULE
2940 }; 2940 };
2941 2941
2942 EXPORT_SYMBOL(dm_get_mapinfo); 2942 EXPORT_SYMBOL(dm_get_mapinfo);
2943 2943
2944 /* 2944 /*
2945 * module hooks 2945 * module hooks
2946 */ 2946 */
2947 module_init(dm_init); 2947 module_init(dm_init);
2948 module_exit(dm_exit); 2948 module_exit(dm_exit);
2949 2949
2950 module_param(major, uint, 0); 2950 module_param(major, uint, 0);
2951 MODULE_PARM_DESC(major, "The major number of the device mapper"); 2951 MODULE_PARM_DESC(major, "The major number of the device mapper");
2952 MODULE_DESCRIPTION(DM_NAME " driver"); 2952 MODULE_DESCRIPTION(DM_NAME " driver");
2953 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 2953 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
2954 MODULE_LICENSE("GPL"); 2954 MODULE_LICENSE("GPL");
2955 2955
drivers/mmc/card/queue.c
1 /* 1 /*
2 * linux/drivers/mmc/card/queue.c 2 * linux/drivers/mmc/card/queue.c
3 * 3 *
4 * Copyright (C) 2003 Russell King, All Rights Reserved. 4 * Copyright (C) 2003 Russell King, All Rights Reserved.
5 * Copyright 2006-2007 Pierre Ossman 5 * Copyright 2006-2007 Pierre Ossman
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 * 10 *
11 */ 11 */
12 #include <linux/slab.h> 12 #include <linux/slab.h>
13 #include <linux/module.h> 13 #include <linux/module.h>
14 #include <linux/blkdev.h> 14 #include <linux/blkdev.h>
15 #include <linux/freezer.h> 15 #include <linux/freezer.h>
16 #include <linux/kthread.h> 16 #include <linux/kthread.h>
17 #include <linux/scatterlist.h> 17 #include <linux/scatterlist.h>
18 18
19 #include <linux/mmc/card.h> 19 #include <linux/mmc/card.h>
20 #include <linux/mmc/host.h> 20 #include <linux/mmc/host.h>
21 #include "queue.h" 21 #include "queue.h"
22 22
23 #define MMC_QUEUE_BOUNCESZ 65536 23 #define MMC_QUEUE_BOUNCESZ 65536
24 24
25 #define MMC_QUEUE_SUSPENDED (1 << 0) 25 #define MMC_QUEUE_SUSPENDED (1 << 0)
26 26
27 /* 27 /*
28 * Prepare a MMC request. This just filters out odd stuff. 28 * Prepare a MMC request. This just filters out odd stuff.
29 */ 29 */
30 static int mmc_prep_request(struct request_queue *q, struct request *req) 30 static int mmc_prep_request(struct request_queue *q, struct request *req)
31 { 31 {
32 /* 32 /*
33 * We only like normal block requests and discards. 33 * We only like normal block requests and discards.
34 */ 34 */
35 if (req->cmd_type != REQ_TYPE_FS && !(req->cmd_flags & REQ_DISCARD)) { 35 if (req->cmd_type != REQ_TYPE_FS && !(req->cmd_flags & REQ_DISCARD)) {
36 blk_dump_rq_flags(req, "MMC bad request"); 36 blk_dump_rq_flags(req, "MMC bad request");
37 return BLKPREP_KILL; 37 return BLKPREP_KILL;
38 } 38 }
39 39
40 req->cmd_flags |= REQ_DONTPREP; 40 req->cmd_flags |= REQ_DONTPREP;
41 41
42 return BLKPREP_OK; 42 return BLKPREP_OK;
43 } 43 }
44 44
45 static int mmc_queue_thread(void *d) 45 static int mmc_queue_thread(void *d)
46 { 46 {
47 struct mmc_queue *mq = d; 47 struct mmc_queue *mq = d;
48 struct request_queue *q = mq->queue; 48 struct request_queue *q = mq->queue;
49 49
50 current->flags |= PF_MEMALLOC; 50 current->flags |= PF_MEMALLOC;
51 51
52 down(&mq->thread_sem); 52 down(&mq->thread_sem);
53 do { 53 do {
54 struct request *req = NULL; 54 struct request *req = NULL;
55 55
56 spin_lock_irq(q->queue_lock); 56 spin_lock_irq(q->queue_lock);
57 set_current_state(TASK_INTERRUPTIBLE); 57 set_current_state(TASK_INTERRUPTIBLE);
58 if (!blk_queue_plugged(q)) 58 if (!blk_queue_plugged(q))
59 req = blk_fetch_request(q); 59 req = blk_fetch_request(q);
60 mq->req = req; 60 mq->req = req;
61 spin_unlock_irq(q->queue_lock); 61 spin_unlock_irq(q->queue_lock);
62 62
63 if (!req) { 63 if (!req) {
64 if (kthread_should_stop()) { 64 if (kthread_should_stop()) {
65 set_current_state(TASK_RUNNING); 65 set_current_state(TASK_RUNNING);
66 break; 66 break;
67 } 67 }
68 up(&mq->thread_sem); 68 up(&mq->thread_sem);
69 schedule(); 69 schedule();
70 down(&mq->thread_sem); 70 down(&mq->thread_sem);
71 continue; 71 continue;
72 } 72 }
73 set_current_state(TASK_RUNNING); 73 set_current_state(TASK_RUNNING);
74 74
75 mq->issue_fn(mq, req); 75 mq->issue_fn(mq, req);
76 } while (1); 76 } while (1);
77 up(&mq->thread_sem); 77 up(&mq->thread_sem);
78 78
79 return 0; 79 return 0;
80 } 80 }
81 81
82 /* 82 /*
83 * Generic MMC request handler. This is called for any queue on a 83 * Generic MMC request handler. This is called for any queue on a
84 * particular host. When the host is not busy, we look for a request 84 * particular host. When the host is not busy, we look for a request
85 * on any queue on this host, and attempt to issue it. This may 85 * on any queue on this host, and attempt to issue it. This may
86 * not be the queue we were asked to process. 86 * not be the queue we were asked to process.
87 */ 87 */
88 static void mmc_request(struct request_queue *q) 88 static void mmc_request(struct request_queue *q)
89 { 89 {
90 struct mmc_queue *mq = q->queuedata; 90 struct mmc_queue *mq = q->queuedata;
91 struct request *req; 91 struct request *req;
92 92
93 if (!mq) { 93 if (!mq) {
94 while ((req = blk_fetch_request(q)) != NULL) { 94 while ((req = blk_fetch_request(q)) != NULL) {
95 req->cmd_flags |= REQ_QUIET; 95 req->cmd_flags |= REQ_QUIET;
96 __blk_end_request_all(req, -EIO); 96 __blk_end_request_all(req, -EIO);
97 } 97 }
98 return; 98 return;
99 } 99 }
100 100
101 if (!mq->req) 101 if (!mq->req)
102 wake_up_process(mq->thread); 102 wake_up_process(mq->thread);
103 } 103 }
104 104
105 /** 105 /**
106 * mmc_init_queue - initialise a queue structure. 106 * mmc_init_queue - initialise a queue structure.
107 * @mq: mmc queue 107 * @mq: mmc queue
108 * @card: mmc card to attach this queue 108 * @card: mmc card to attach this queue
109 * @lock: queue lock 109 * @lock: queue lock
110 * 110 *
111 * Initialise a MMC card request queue. 111 * Initialise a MMC card request queue.
112 */ 112 */
113 int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock) 113 int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock)
114 { 114 {
115 struct mmc_host *host = card->host; 115 struct mmc_host *host = card->host;
116 u64 limit = BLK_BOUNCE_HIGH; 116 u64 limit = BLK_BOUNCE_HIGH;
117 int ret; 117 int ret;
118 118
119 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) 119 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
120 limit = *mmc_dev(host)->dma_mask; 120 limit = *mmc_dev(host)->dma_mask;
121 121
122 mq->card = card; 122 mq->card = card;
123 mq->queue = blk_init_queue(mmc_request, lock); 123 mq->queue = blk_init_queue(mmc_request, lock);
124 if (!mq->queue) 124 if (!mq->queue)
125 return -ENOMEM; 125 return -ENOMEM;
126 126
127 mq->queue->queuedata = mq; 127 mq->queue->queuedata = mq;
128 mq->req = NULL; 128 mq->req = NULL;
129 129
130 blk_queue_prep_rq(mq->queue, mmc_prep_request); 130 blk_queue_prep_rq(mq->queue, mmc_prep_request);
131 blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN);
132 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); 131 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
133 if (mmc_can_erase(card)) { 132 if (mmc_can_erase(card)) {
134 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); 133 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
135 mq->queue->limits.max_discard_sectors = UINT_MAX; 134 mq->queue->limits.max_discard_sectors = UINT_MAX;
136 if (card->erased_byte == 0) 135 if (card->erased_byte == 0)
137 mq->queue->limits.discard_zeroes_data = 1; 136 mq->queue->limits.discard_zeroes_data = 1;
138 if (!mmc_can_trim(card) && is_power_of_2(card->erase_size)) { 137 if (!mmc_can_trim(card) && is_power_of_2(card->erase_size)) {
139 mq->queue->limits.discard_granularity = 138 mq->queue->limits.discard_granularity =
140 card->erase_size << 9; 139 card->erase_size << 9;
141 mq->queue->limits.discard_alignment = 140 mq->queue->limits.discard_alignment =
142 card->erase_size << 9; 141 card->erase_size << 9;
143 } 142 }
144 if (mmc_can_secure_erase_trim(card)) 143 if (mmc_can_secure_erase_trim(card))
145 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, 144 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD,
146 mq->queue); 145 mq->queue);
147 } 146 }
148 147
149 #ifdef CONFIG_MMC_BLOCK_BOUNCE 148 #ifdef CONFIG_MMC_BLOCK_BOUNCE
150 if (host->max_hw_segs == 1) { 149 if (host->max_hw_segs == 1) {
151 unsigned int bouncesz; 150 unsigned int bouncesz;
152 151
153 bouncesz = MMC_QUEUE_BOUNCESZ; 152 bouncesz = MMC_QUEUE_BOUNCESZ;
154 153
155 if (bouncesz > host->max_req_size) 154 if (bouncesz > host->max_req_size)
156 bouncesz = host->max_req_size; 155 bouncesz = host->max_req_size;
157 if (bouncesz > host->max_seg_size) 156 if (bouncesz > host->max_seg_size)
158 bouncesz = host->max_seg_size; 157 bouncesz = host->max_seg_size;
159 if (bouncesz > (host->max_blk_count * 512)) 158 if (bouncesz > (host->max_blk_count * 512))
160 bouncesz = host->max_blk_count * 512; 159 bouncesz = host->max_blk_count * 512;
161 160
162 if (bouncesz > 512) { 161 if (bouncesz > 512) {
163 mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); 162 mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
164 if (!mq->bounce_buf) { 163 if (!mq->bounce_buf) {
165 printk(KERN_WARNING "%s: unable to " 164 printk(KERN_WARNING "%s: unable to "
166 "allocate bounce buffer\n", 165 "allocate bounce buffer\n",
167 mmc_card_name(card)); 166 mmc_card_name(card));
168 } 167 }
169 } 168 }
170 169
171 if (mq->bounce_buf) { 170 if (mq->bounce_buf) {
172 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY); 171 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
173 blk_queue_max_hw_sectors(mq->queue, bouncesz / 512); 172 blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
174 blk_queue_max_segments(mq->queue, bouncesz / 512); 173 blk_queue_max_segments(mq->queue, bouncesz / 512);
175 blk_queue_max_segment_size(mq->queue, bouncesz); 174 blk_queue_max_segment_size(mq->queue, bouncesz);
176 175
177 mq->sg = kmalloc(sizeof(struct scatterlist), 176 mq->sg = kmalloc(sizeof(struct scatterlist),
178 GFP_KERNEL); 177 GFP_KERNEL);
179 if (!mq->sg) { 178 if (!mq->sg) {
180 ret = -ENOMEM; 179 ret = -ENOMEM;
181 goto cleanup_queue; 180 goto cleanup_queue;
182 } 181 }
183 sg_init_table(mq->sg, 1); 182 sg_init_table(mq->sg, 1);
184 183
185 mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * 184 mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
186 bouncesz / 512, GFP_KERNEL); 185 bouncesz / 512, GFP_KERNEL);
187 if (!mq->bounce_sg) { 186 if (!mq->bounce_sg) {
188 ret = -ENOMEM; 187 ret = -ENOMEM;
189 goto cleanup_queue; 188 goto cleanup_queue;
190 } 189 }
191 sg_init_table(mq->bounce_sg, bouncesz / 512); 190 sg_init_table(mq->bounce_sg, bouncesz / 512);
192 } 191 }
193 } 192 }
194 #endif 193 #endif
195 194
196 if (!mq->bounce_buf) { 195 if (!mq->bounce_buf) {
197 blk_queue_bounce_limit(mq->queue, limit); 196 blk_queue_bounce_limit(mq->queue, limit);
198 blk_queue_max_hw_sectors(mq->queue, 197 blk_queue_max_hw_sectors(mq->queue,
199 min(host->max_blk_count, host->max_req_size / 512)); 198 min(host->max_blk_count, host->max_req_size / 512));
200 blk_queue_max_segments(mq->queue, host->max_hw_segs); 199 blk_queue_max_segments(mq->queue, host->max_hw_segs);
201 blk_queue_max_segment_size(mq->queue, host->max_seg_size); 200 blk_queue_max_segment_size(mq->queue, host->max_seg_size);
202 201
203 mq->sg = kmalloc(sizeof(struct scatterlist) * 202 mq->sg = kmalloc(sizeof(struct scatterlist) *
204 host->max_phys_segs, GFP_KERNEL); 203 host->max_phys_segs, GFP_KERNEL);
205 if (!mq->sg) { 204 if (!mq->sg) {
206 ret = -ENOMEM; 205 ret = -ENOMEM;
207 goto cleanup_queue; 206 goto cleanup_queue;
208 } 207 }
209 sg_init_table(mq->sg, host->max_phys_segs); 208 sg_init_table(mq->sg, host->max_phys_segs);
210 } 209 }
211 210
212 init_MUTEX(&mq->thread_sem); 211 init_MUTEX(&mq->thread_sem);
213 212
214 mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); 213 mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd");
215 if (IS_ERR(mq->thread)) { 214 if (IS_ERR(mq->thread)) {
216 ret = PTR_ERR(mq->thread); 215 ret = PTR_ERR(mq->thread);
217 goto free_bounce_sg; 216 goto free_bounce_sg;
218 } 217 }
219 218
220 return 0; 219 return 0;
221 free_bounce_sg: 220 free_bounce_sg:
222 if (mq->bounce_sg) 221 if (mq->bounce_sg)
223 kfree(mq->bounce_sg); 222 kfree(mq->bounce_sg);
224 mq->bounce_sg = NULL; 223 mq->bounce_sg = NULL;
225 cleanup_queue: 224 cleanup_queue:
226 if (mq->sg) 225 if (mq->sg)
227 kfree(mq->sg); 226 kfree(mq->sg);
228 mq->sg = NULL; 227 mq->sg = NULL;
229 if (mq->bounce_buf) 228 if (mq->bounce_buf)
230 kfree(mq->bounce_buf); 229 kfree(mq->bounce_buf);
231 mq->bounce_buf = NULL; 230 mq->bounce_buf = NULL;
232 blk_cleanup_queue(mq->queue); 231 blk_cleanup_queue(mq->queue);
233 return ret; 232 return ret;
234 } 233 }
235 234
236 void mmc_cleanup_queue(struct mmc_queue *mq) 235 void mmc_cleanup_queue(struct mmc_queue *mq)
237 { 236 {
238 struct request_queue *q = mq->queue; 237 struct request_queue *q = mq->queue;
239 unsigned long flags; 238 unsigned long flags;
240 239
241 /* Make sure the queue isn't suspended, as that will deadlock */ 240 /* Make sure the queue isn't suspended, as that will deadlock */
242 mmc_queue_resume(mq); 241 mmc_queue_resume(mq);
243 242
244 /* Then terminate our worker thread */ 243 /* Then terminate our worker thread */
245 kthread_stop(mq->thread); 244 kthread_stop(mq->thread);
246 245
247 /* Empty the queue */ 246 /* Empty the queue */
248 spin_lock_irqsave(q->queue_lock, flags); 247 spin_lock_irqsave(q->queue_lock, flags);
249 q->queuedata = NULL; 248 q->queuedata = NULL;
250 blk_start_queue(q); 249 blk_start_queue(q);
251 spin_unlock_irqrestore(q->queue_lock, flags); 250 spin_unlock_irqrestore(q->queue_lock, flags);
252 251
253 if (mq->bounce_sg) 252 if (mq->bounce_sg)
254 kfree(mq->bounce_sg); 253 kfree(mq->bounce_sg);
255 mq->bounce_sg = NULL; 254 mq->bounce_sg = NULL;
256 255
257 kfree(mq->sg); 256 kfree(mq->sg);
258 mq->sg = NULL; 257 mq->sg = NULL;
259 258
260 if (mq->bounce_buf) 259 if (mq->bounce_buf)
261 kfree(mq->bounce_buf); 260 kfree(mq->bounce_buf);
262 mq->bounce_buf = NULL; 261 mq->bounce_buf = NULL;
263 262
264 mq->card = NULL; 263 mq->card = NULL;
265 } 264 }
266 EXPORT_SYMBOL(mmc_cleanup_queue); 265 EXPORT_SYMBOL(mmc_cleanup_queue);
267 266
268 /** 267 /**
269 * mmc_queue_suspend - suspend a MMC request queue 268 * mmc_queue_suspend - suspend a MMC request queue
270 * @mq: MMC queue to suspend 269 * @mq: MMC queue to suspend
271 * 270 *
272 * Stop the block request queue, and wait for our thread to 271 * Stop the block request queue, and wait for our thread to
273 * complete any outstanding requests. This ensures that we 272 * complete any outstanding requests. This ensures that we
274 * won't suspend while a request is being processed. 273 * won't suspend while a request is being processed.
275 */ 274 */
276 void mmc_queue_suspend(struct mmc_queue *mq) 275 void mmc_queue_suspend(struct mmc_queue *mq)
277 { 276 {
278 struct request_queue *q = mq->queue; 277 struct request_queue *q = mq->queue;
279 unsigned long flags; 278 unsigned long flags;
280 279
281 if (!(mq->flags & MMC_QUEUE_SUSPENDED)) { 280 if (!(mq->flags & MMC_QUEUE_SUSPENDED)) {
282 mq->flags |= MMC_QUEUE_SUSPENDED; 281 mq->flags |= MMC_QUEUE_SUSPENDED;
283 282
284 spin_lock_irqsave(q->queue_lock, flags); 283 spin_lock_irqsave(q->queue_lock, flags);
285 blk_stop_queue(q); 284 blk_stop_queue(q);
286 spin_unlock_irqrestore(q->queue_lock, flags); 285 spin_unlock_irqrestore(q->queue_lock, flags);
287 286
288 down(&mq->thread_sem); 287 down(&mq->thread_sem);
289 } 288 }
290 } 289 }
291 290
292 /** 291 /**
293 * mmc_queue_resume - resume a previously suspended MMC request queue 292 * mmc_queue_resume - resume a previously suspended MMC request queue
294 * @mq: MMC queue to resume 293 * @mq: MMC queue to resume
295 */ 294 */
296 void mmc_queue_resume(struct mmc_queue *mq) 295 void mmc_queue_resume(struct mmc_queue *mq)
297 { 296 {
298 struct request_queue *q = mq->queue; 297 struct request_queue *q = mq->queue;
299 unsigned long flags; 298 unsigned long flags;
300 299
301 if (mq->flags & MMC_QUEUE_SUSPENDED) { 300 if (mq->flags & MMC_QUEUE_SUSPENDED) {
302 mq->flags &= ~MMC_QUEUE_SUSPENDED; 301 mq->flags &= ~MMC_QUEUE_SUSPENDED;
303 302
304 up(&mq->thread_sem); 303 up(&mq->thread_sem);
305 304
306 spin_lock_irqsave(q->queue_lock, flags); 305 spin_lock_irqsave(q->queue_lock, flags);
307 blk_start_queue(q); 306 blk_start_queue(q);
308 spin_unlock_irqrestore(q->queue_lock, flags); 307 spin_unlock_irqrestore(q->queue_lock, flags);
309 } 308 }
310 } 309 }
311 310
312 /* 311 /*
313 * Prepare the sg list(s) to be handed of to the host driver 312 * Prepare the sg list(s) to be handed of to the host driver
314 */ 313 */
315 unsigned int mmc_queue_map_sg(struct mmc_queue *mq) 314 unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
316 { 315 {
317 unsigned int sg_len; 316 unsigned int sg_len;
318 size_t buflen; 317 size_t buflen;
319 struct scatterlist *sg; 318 struct scatterlist *sg;
320 int i; 319 int i;
321 320
322 if (!mq->bounce_buf) 321 if (!mq->bounce_buf)
323 return blk_rq_map_sg(mq->queue, mq->req, mq->sg); 322 return blk_rq_map_sg(mq->queue, mq->req, mq->sg);
324 323
325 BUG_ON(!mq->bounce_sg); 324 BUG_ON(!mq->bounce_sg);
326 325
327 sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); 326 sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg);
328 327
329 mq->bounce_sg_len = sg_len; 328 mq->bounce_sg_len = sg_len;
330 329
331 buflen = 0; 330 buflen = 0;
332 for_each_sg(mq->bounce_sg, sg, sg_len, i) 331 for_each_sg(mq->bounce_sg, sg, sg_len, i)
333 buflen += sg->length; 332 buflen += sg->length;
334 333
335 sg_init_one(mq->sg, mq->bounce_buf, buflen); 334 sg_init_one(mq->sg, mq->bounce_buf, buflen);
336 335
337 return 1; 336 return 1;
338 } 337 }
339 338
340 /* 339 /*
341 * If writing, bounce the data to the buffer before the request 340 * If writing, bounce the data to the buffer before the request
342 * is sent to the host driver 341 * is sent to the host driver
343 */ 342 */
344 void mmc_queue_bounce_pre(struct mmc_queue *mq) 343 void mmc_queue_bounce_pre(struct mmc_queue *mq)
345 { 344 {
346 unsigned long flags; 345 unsigned long flags;
347 346
348 if (!mq->bounce_buf) 347 if (!mq->bounce_buf)
349 return; 348 return;
350 349
351 if (rq_data_dir(mq->req) != WRITE) 350 if (rq_data_dir(mq->req) != WRITE)
352 return; 351 return;
353 352
354 local_irq_save(flags); 353 local_irq_save(flags);
355 sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len, 354 sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len,
356 mq->bounce_buf, mq->sg[0].length); 355 mq->bounce_buf, mq->sg[0].length);
357 local_irq_restore(flags); 356 local_irq_restore(flags);
358 } 357 }
359 358
360 /* 359 /*
361 * If reading, bounce the data from the buffer after the request 360 * If reading, bounce the data from the buffer after the request
362 * has been handled by the host driver 361 * has been handled by the host driver
363 */ 362 */
364 void mmc_queue_bounce_post(struct mmc_queue *mq) 363 void mmc_queue_bounce_post(struct mmc_queue *mq)
365 { 364 {
366 unsigned long flags; 365 unsigned long flags;
367 366
368 if (!mq->bounce_buf) 367 if (!mq->bounce_buf)
369 return; 368 return;
370 369
371 if (rq_data_dir(mq->req) != READ) 370 if (rq_data_dir(mq->req) != READ)
372 return; 371 return;
373 372
374 local_irq_save(flags); 373 local_irq_save(flags);
375 sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len, 374 sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len,
376 mq->bounce_buf, mq->sg[0].length); 375 mq->bounce_buf, mq->sg[0].length);
377 local_irq_restore(flags); 376 local_irq_restore(flags);
378 } 377 }
379 378
380 379
drivers/s390/block/dasd.c
1 /* 1 /*
2 * File...........: linux/drivers/s390/block/dasd.c 2 * File...........: linux/drivers/s390/block/dasd.c
3 * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> 3 * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
4 * Horst Hummel <Horst.Hummel@de.ibm.com> 4 * Horst Hummel <Horst.Hummel@de.ibm.com>
5 * Carsten Otte <Cotte@de.ibm.com> 5 * Carsten Otte <Cotte@de.ibm.com>
6 * Martin Schwidefsky <schwidefsky@de.ibm.com> 6 * Martin Schwidefsky <schwidefsky@de.ibm.com>
7 * Bugreports.to..: <Linux390@de.ibm.com> 7 * Bugreports.to..: <Linux390@de.ibm.com>
8 * Copyright IBM Corp. 1999, 2009 8 * Copyright IBM Corp. 1999, 2009
9 */ 9 */
10 10
11 #define KMSG_COMPONENT "dasd" 11 #define KMSG_COMPONENT "dasd"
12 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 12 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
13 13
14 #include <linux/kmod.h> 14 #include <linux/kmod.h>
15 #include <linux/init.h> 15 #include <linux/init.h>
16 #include <linux/interrupt.h> 16 #include <linux/interrupt.h>
17 #include <linux/ctype.h> 17 #include <linux/ctype.h>
18 #include <linux/major.h> 18 #include <linux/major.h>
19 #include <linux/slab.h> 19 #include <linux/slab.h>
20 #include <linux/buffer_head.h> 20 #include <linux/buffer_head.h>
21 #include <linux/hdreg.h> 21 #include <linux/hdreg.h>
22 #include <linux/async.h> 22 #include <linux/async.h>
23 #include <linux/mutex.h> 23 #include <linux/mutex.h>
24 #include <linux/smp_lock.h> 24 #include <linux/smp_lock.h>
25 25
26 #include <asm/ccwdev.h> 26 #include <asm/ccwdev.h>
27 #include <asm/ebcdic.h> 27 #include <asm/ebcdic.h>
28 #include <asm/idals.h> 28 #include <asm/idals.h>
29 #include <asm/itcw.h> 29 #include <asm/itcw.h>
30 #include <asm/diag.h> 30 #include <asm/diag.h>
31 31
32 /* This is ugly... */ 32 /* This is ugly... */
33 #define PRINTK_HEADER "dasd:" 33 #define PRINTK_HEADER "dasd:"
34 34
35 #include "dasd_int.h" 35 #include "dasd_int.h"
36 /* 36 /*
37 * SECTION: Constant definitions to be used within this file 37 * SECTION: Constant definitions to be used within this file
38 */ 38 */
39 #define DASD_CHANQ_MAX_SIZE 4 39 #define DASD_CHANQ_MAX_SIZE 4
40 40
41 #define DASD_SLEEPON_START_TAG (void *) 1 41 #define DASD_SLEEPON_START_TAG (void *) 1
42 #define DASD_SLEEPON_END_TAG (void *) 2 42 #define DASD_SLEEPON_END_TAG (void *) 2
43 43
44 /* 44 /*
45 * SECTION: exported variables of dasd.c 45 * SECTION: exported variables of dasd.c
46 */ 46 */
47 debug_info_t *dasd_debug_area; 47 debug_info_t *dasd_debug_area;
48 struct dasd_discipline *dasd_diag_discipline_pointer; 48 struct dasd_discipline *dasd_diag_discipline_pointer;
49 void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *); 49 void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *);
50 50
51 MODULE_AUTHOR("Holger Smolinski <Holger.Smolinski@de.ibm.com>"); 51 MODULE_AUTHOR("Holger Smolinski <Holger.Smolinski@de.ibm.com>");
52 MODULE_DESCRIPTION("Linux on S/390 DASD device driver," 52 MODULE_DESCRIPTION("Linux on S/390 DASD device driver,"
53 " Copyright 2000 IBM Corporation"); 53 " Copyright 2000 IBM Corporation");
54 MODULE_SUPPORTED_DEVICE("dasd"); 54 MODULE_SUPPORTED_DEVICE("dasd");
55 MODULE_LICENSE("GPL"); 55 MODULE_LICENSE("GPL");
56 56
57 /* 57 /*
58 * SECTION: prototypes for static functions of dasd.c 58 * SECTION: prototypes for static functions of dasd.c
59 */ 59 */
60 static int dasd_alloc_queue(struct dasd_block *); 60 static int dasd_alloc_queue(struct dasd_block *);
61 static void dasd_setup_queue(struct dasd_block *); 61 static void dasd_setup_queue(struct dasd_block *);
62 static void dasd_free_queue(struct dasd_block *); 62 static void dasd_free_queue(struct dasd_block *);
63 static void dasd_flush_request_queue(struct dasd_block *); 63 static void dasd_flush_request_queue(struct dasd_block *);
64 static int dasd_flush_block_queue(struct dasd_block *); 64 static int dasd_flush_block_queue(struct dasd_block *);
65 static void dasd_device_tasklet(struct dasd_device *); 65 static void dasd_device_tasklet(struct dasd_device *);
66 static void dasd_block_tasklet(struct dasd_block *); 66 static void dasd_block_tasklet(struct dasd_block *);
67 static void do_kick_device(struct work_struct *); 67 static void do_kick_device(struct work_struct *);
68 static void do_restore_device(struct work_struct *); 68 static void do_restore_device(struct work_struct *);
69 static void do_reload_device(struct work_struct *); 69 static void do_reload_device(struct work_struct *);
70 static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); 70 static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *);
71 static void dasd_device_timeout(unsigned long); 71 static void dasd_device_timeout(unsigned long);
72 static void dasd_block_timeout(unsigned long); 72 static void dasd_block_timeout(unsigned long);
73 static void __dasd_process_erp(struct dasd_device *, struct dasd_ccw_req *); 73 static void __dasd_process_erp(struct dasd_device *, struct dasd_ccw_req *);
74 74
75 /* 75 /*
76 * SECTION: Operations on the device structure. 76 * SECTION: Operations on the device structure.
77 */ 77 */
78 static wait_queue_head_t dasd_init_waitq; 78 static wait_queue_head_t dasd_init_waitq;
79 static wait_queue_head_t dasd_flush_wq; 79 static wait_queue_head_t dasd_flush_wq;
80 static wait_queue_head_t generic_waitq; 80 static wait_queue_head_t generic_waitq;
81 81
82 /* 82 /*
83 * Allocate memory for a new device structure. 83 * Allocate memory for a new device structure.
84 */ 84 */
85 struct dasd_device *dasd_alloc_device(void) 85 struct dasd_device *dasd_alloc_device(void)
86 { 86 {
87 struct dasd_device *device; 87 struct dasd_device *device;
88 88
89 device = kzalloc(sizeof(struct dasd_device), GFP_ATOMIC); 89 device = kzalloc(sizeof(struct dasd_device), GFP_ATOMIC);
90 if (!device) 90 if (!device)
91 return ERR_PTR(-ENOMEM); 91 return ERR_PTR(-ENOMEM);
92 92
93 /* Get two pages for normal block device operations. */ 93 /* Get two pages for normal block device operations. */
94 device->ccw_mem = (void *) __get_free_pages(GFP_ATOMIC | GFP_DMA, 1); 94 device->ccw_mem = (void *) __get_free_pages(GFP_ATOMIC | GFP_DMA, 1);
95 if (!device->ccw_mem) { 95 if (!device->ccw_mem) {
96 kfree(device); 96 kfree(device);
97 return ERR_PTR(-ENOMEM); 97 return ERR_PTR(-ENOMEM);
98 } 98 }
99 /* Get one page for error recovery. */ 99 /* Get one page for error recovery. */
100 device->erp_mem = (void *) get_zeroed_page(GFP_ATOMIC | GFP_DMA); 100 device->erp_mem = (void *) get_zeroed_page(GFP_ATOMIC | GFP_DMA);
101 if (!device->erp_mem) { 101 if (!device->erp_mem) {
102 free_pages((unsigned long) device->ccw_mem, 1); 102 free_pages((unsigned long) device->ccw_mem, 1);
103 kfree(device); 103 kfree(device);
104 return ERR_PTR(-ENOMEM); 104 return ERR_PTR(-ENOMEM);
105 } 105 }
106 106
107 dasd_init_chunklist(&device->ccw_chunks, device->ccw_mem, PAGE_SIZE*2); 107 dasd_init_chunklist(&device->ccw_chunks, device->ccw_mem, PAGE_SIZE*2);
108 dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE); 108 dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE);
109 spin_lock_init(&device->mem_lock); 109 spin_lock_init(&device->mem_lock);
110 atomic_set(&device->tasklet_scheduled, 0); 110 atomic_set(&device->tasklet_scheduled, 0);
111 tasklet_init(&device->tasklet, 111 tasklet_init(&device->tasklet,
112 (void (*)(unsigned long)) dasd_device_tasklet, 112 (void (*)(unsigned long)) dasd_device_tasklet,
113 (unsigned long) device); 113 (unsigned long) device);
114 INIT_LIST_HEAD(&device->ccw_queue); 114 INIT_LIST_HEAD(&device->ccw_queue);
115 init_timer(&device->timer); 115 init_timer(&device->timer);
116 device->timer.function = dasd_device_timeout; 116 device->timer.function = dasd_device_timeout;
117 device->timer.data = (unsigned long) device; 117 device->timer.data = (unsigned long) device;
118 INIT_WORK(&device->kick_work, do_kick_device); 118 INIT_WORK(&device->kick_work, do_kick_device);
119 INIT_WORK(&device->restore_device, do_restore_device); 119 INIT_WORK(&device->restore_device, do_restore_device);
120 INIT_WORK(&device->reload_device, do_reload_device); 120 INIT_WORK(&device->reload_device, do_reload_device);
121 device->state = DASD_STATE_NEW; 121 device->state = DASD_STATE_NEW;
122 device->target = DASD_STATE_NEW; 122 device->target = DASD_STATE_NEW;
123 mutex_init(&device->state_mutex); 123 mutex_init(&device->state_mutex);
124 124
125 return device; 125 return device;
126 } 126 }
127 127
128 /* 128 /*
129 * Free memory of a device structure. 129 * Free memory of a device structure.
130 */ 130 */
131 void dasd_free_device(struct dasd_device *device) 131 void dasd_free_device(struct dasd_device *device)
132 { 132 {
133 kfree(device->private); 133 kfree(device->private);
134 free_page((unsigned long) device->erp_mem); 134 free_page((unsigned long) device->erp_mem);
135 free_pages((unsigned long) device->ccw_mem, 1); 135 free_pages((unsigned long) device->ccw_mem, 1);
136 kfree(device); 136 kfree(device);
137 } 137 }
138 138
139 /* 139 /*
140 * Allocate memory for a new device structure. 140 * Allocate memory for a new device structure.
141 */ 141 */
142 struct dasd_block *dasd_alloc_block(void) 142 struct dasd_block *dasd_alloc_block(void)
143 { 143 {
144 struct dasd_block *block; 144 struct dasd_block *block;
145 145
146 block = kzalloc(sizeof(*block), GFP_ATOMIC); 146 block = kzalloc(sizeof(*block), GFP_ATOMIC);
147 if (!block) 147 if (!block)
148 return ERR_PTR(-ENOMEM); 148 return ERR_PTR(-ENOMEM);
149 /* open_count = 0 means device online but not in use */ 149 /* open_count = 0 means device online but not in use */
150 atomic_set(&block->open_count, -1); 150 atomic_set(&block->open_count, -1);
151 151
152 spin_lock_init(&block->request_queue_lock); 152 spin_lock_init(&block->request_queue_lock);
153 atomic_set(&block->tasklet_scheduled, 0); 153 atomic_set(&block->tasklet_scheduled, 0);
154 tasklet_init(&block->tasklet, 154 tasklet_init(&block->tasklet,
155 (void (*)(unsigned long)) dasd_block_tasklet, 155 (void (*)(unsigned long)) dasd_block_tasklet,
156 (unsigned long) block); 156 (unsigned long) block);
157 INIT_LIST_HEAD(&block->ccw_queue); 157 INIT_LIST_HEAD(&block->ccw_queue);
158 spin_lock_init(&block->queue_lock); 158 spin_lock_init(&block->queue_lock);
159 init_timer(&block->timer); 159 init_timer(&block->timer);
160 block->timer.function = dasd_block_timeout; 160 block->timer.function = dasd_block_timeout;
161 block->timer.data = (unsigned long) block; 161 block->timer.data = (unsigned long) block;
162 162
163 return block; 163 return block;
164 } 164 }
165 165
166 /* 166 /*
167 * Free memory of a device structure. 167 * Free memory of a device structure.
168 */ 168 */
169 void dasd_free_block(struct dasd_block *block) 169 void dasd_free_block(struct dasd_block *block)
170 { 170 {
171 kfree(block); 171 kfree(block);
172 } 172 }
173 173
174 /* 174 /*
175 * Make a new device known to the system. 175 * Make a new device known to the system.
176 */ 176 */
177 static int dasd_state_new_to_known(struct dasd_device *device) 177 static int dasd_state_new_to_known(struct dasd_device *device)
178 { 178 {
179 int rc; 179 int rc;
180 180
181 /* 181 /*
182 * As long as the device is not in state DASD_STATE_NEW we want to 182 * As long as the device is not in state DASD_STATE_NEW we want to
183 * keep the reference count > 0. 183 * keep the reference count > 0.
184 */ 184 */
185 dasd_get_device(device); 185 dasd_get_device(device);
186 186
187 if (device->block) { 187 if (device->block) {
188 rc = dasd_alloc_queue(device->block); 188 rc = dasd_alloc_queue(device->block);
189 if (rc) { 189 if (rc) {
190 dasd_put_device(device); 190 dasd_put_device(device);
191 return rc; 191 return rc;
192 } 192 }
193 } 193 }
194 device->state = DASD_STATE_KNOWN; 194 device->state = DASD_STATE_KNOWN;
195 return 0; 195 return 0;
196 } 196 }
197 197
198 /* 198 /*
199 * Let the system forget about a device. 199 * Let the system forget about a device.
200 */ 200 */
201 static int dasd_state_known_to_new(struct dasd_device *device) 201 static int dasd_state_known_to_new(struct dasd_device *device)
202 { 202 {
203 /* Disable extended error reporting for this device. */ 203 /* Disable extended error reporting for this device. */
204 dasd_eer_disable(device); 204 dasd_eer_disable(device);
205 /* Forget the discipline information. */ 205 /* Forget the discipline information. */
206 if (device->discipline) { 206 if (device->discipline) {
207 if (device->discipline->uncheck_device) 207 if (device->discipline->uncheck_device)
208 device->discipline->uncheck_device(device); 208 device->discipline->uncheck_device(device);
209 module_put(device->discipline->owner); 209 module_put(device->discipline->owner);
210 } 210 }
211 device->discipline = NULL; 211 device->discipline = NULL;
212 if (device->base_discipline) 212 if (device->base_discipline)
213 module_put(device->base_discipline->owner); 213 module_put(device->base_discipline->owner);
214 device->base_discipline = NULL; 214 device->base_discipline = NULL;
215 device->state = DASD_STATE_NEW; 215 device->state = DASD_STATE_NEW;
216 216
217 if (device->block) 217 if (device->block)
218 dasd_free_queue(device->block); 218 dasd_free_queue(device->block);
219 219
220 /* Give up reference we took in dasd_state_new_to_known. */ 220 /* Give up reference we took in dasd_state_new_to_known. */
221 dasd_put_device(device); 221 dasd_put_device(device);
222 return 0; 222 return 0;
223 } 223 }
224 224
225 /* 225 /*
226 * Request the irq line for the device. 226 * Request the irq line for the device.
227 */ 227 */
228 static int dasd_state_known_to_basic(struct dasd_device *device) 228 static int dasd_state_known_to_basic(struct dasd_device *device)
229 { 229 {
230 int rc; 230 int rc;
231 231
232 /* Allocate and register gendisk structure. */ 232 /* Allocate and register gendisk structure. */
233 if (device->block) { 233 if (device->block) {
234 rc = dasd_gendisk_alloc(device->block); 234 rc = dasd_gendisk_alloc(device->block);
235 if (rc) 235 if (rc)
236 return rc; 236 return rc;
237 } 237 }
238 /* register 'device' debug area, used for all DBF_DEV_XXX calls */ 238 /* register 'device' debug area, used for all DBF_DEV_XXX calls */
239 device->debug_area = debug_register(dev_name(&device->cdev->dev), 4, 1, 239 device->debug_area = debug_register(dev_name(&device->cdev->dev), 4, 1,
240 8 * sizeof(long)); 240 8 * sizeof(long));
241 debug_register_view(device->debug_area, &debug_sprintf_view); 241 debug_register_view(device->debug_area, &debug_sprintf_view);
242 debug_set_level(device->debug_area, DBF_WARNING); 242 debug_set_level(device->debug_area, DBF_WARNING);
243 DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created"); 243 DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created");
244 244
245 device->state = DASD_STATE_BASIC; 245 device->state = DASD_STATE_BASIC;
246 return 0; 246 return 0;
247 } 247 }
248 248
249 /* 249 /*
250 * Release the irq line for the device. Terminate any running i/o. 250 * Release the irq line for the device. Terminate any running i/o.
251 */ 251 */
252 static int dasd_state_basic_to_known(struct dasd_device *device) 252 static int dasd_state_basic_to_known(struct dasd_device *device)
253 { 253 {
254 int rc; 254 int rc;
255 if (device->block) { 255 if (device->block) {
256 dasd_gendisk_free(device->block); 256 dasd_gendisk_free(device->block);
257 dasd_block_clear_timer(device->block); 257 dasd_block_clear_timer(device->block);
258 } 258 }
259 rc = dasd_flush_device_queue(device); 259 rc = dasd_flush_device_queue(device);
260 if (rc) 260 if (rc)
261 return rc; 261 return rc;
262 dasd_device_clear_timer(device); 262 dasd_device_clear_timer(device);
263 263
264 DBF_DEV_EVENT(DBF_EMERG, device, "%p debug area deleted", device); 264 DBF_DEV_EVENT(DBF_EMERG, device, "%p debug area deleted", device);
265 if (device->debug_area != NULL) { 265 if (device->debug_area != NULL) {
266 debug_unregister(device->debug_area); 266 debug_unregister(device->debug_area);
267 device->debug_area = NULL; 267 device->debug_area = NULL;
268 } 268 }
269 device->state = DASD_STATE_KNOWN; 269 device->state = DASD_STATE_KNOWN;
270 return 0; 270 return 0;
271 } 271 }
272 272
273 /* 273 /*
274 * Do the initial analysis. The do_analysis function may return 274 * Do the initial analysis. The do_analysis function may return
275 * -EAGAIN in which case the device keeps the state DASD_STATE_BASIC 275 * -EAGAIN in which case the device keeps the state DASD_STATE_BASIC
276 * until the discipline decides to continue the startup sequence 276 * until the discipline decides to continue the startup sequence
277 * by calling the function dasd_change_state. The eckd disciplines 277 * by calling the function dasd_change_state. The eckd disciplines
278 * uses this to start a ccw that detects the format. The completion 278 * uses this to start a ccw that detects the format. The completion
279 * interrupt for this detection ccw uses the kernel event daemon to 279 * interrupt for this detection ccw uses the kernel event daemon to
280 * trigger the call to dasd_change_state. All this is done in the 280 * trigger the call to dasd_change_state. All this is done in the
281 * discipline code, see dasd_eckd.c. 281 * discipline code, see dasd_eckd.c.
282 * After the analysis ccw is done (do_analysis returned 0) the block 282 * After the analysis ccw is done (do_analysis returned 0) the block
283 * device is setup. 283 * device is setup.
284 * In case the analysis returns an error, the device setup is stopped 284 * In case the analysis returns an error, the device setup is stopped
285 * (a fake disk was already added to allow formatting). 285 * (a fake disk was already added to allow formatting).
286 */ 286 */
287 static int dasd_state_basic_to_ready(struct dasd_device *device) 287 static int dasd_state_basic_to_ready(struct dasd_device *device)
288 { 288 {
289 int rc; 289 int rc;
290 struct dasd_block *block; 290 struct dasd_block *block;
291 291
292 rc = 0; 292 rc = 0;
293 block = device->block; 293 block = device->block;
294 /* make disk known with correct capacity */ 294 /* make disk known with correct capacity */
295 if (block) { 295 if (block) {
296 if (block->base->discipline->do_analysis != NULL) 296 if (block->base->discipline->do_analysis != NULL)
297 rc = block->base->discipline->do_analysis(block); 297 rc = block->base->discipline->do_analysis(block);
298 if (rc) { 298 if (rc) {
299 if (rc != -EAGAIN) 299 if (rc != -EAGAIN)
300 device->state = DASD_STATE_UNFMT; 300 device->state = DASD_STATE_UNFMT;
301 return rc; 301 return rc;
302 } 302 }
303 dasd_setup_queue(block); 303 dasd_setup_queue(block);
304 set_capacity(block->gdp, 304 set_capacity(block->gdp,
305 block->blocks << block->s2b_shift); 305 block->blocks << block->s2b_shift);
306 device->state = DASD_STATE_READY; 306 device->state = DASD_STATE_READY;
307 rc = dasd_scan_partitions(block); 307 rc = dasd_scan_partitions(block);
308 if (rc) 308 if (rc)
309 device->state = DASD_STATE_BASIC; 309 device->state = DASD_STATE_BASIC;
310 } else { 310 } else {
311 device->state = DASD_STATE_READY; 311 device->state = DASD_STATE_READY;
312 } 312 }
313 return rc; 313 return rc;
314 } 314 }
315 315
316 /* 316 /*
317 * Remove device from block device layer. Destroy dirty buffers. 317 * Remove device from block device layer. Destroy dirty buffers.
318 * Forget format information. Check if the target level is basic 318 * Forget format information. Check if the target level is basic
319 * and if it is create fake disk for formatting. 319 * and if it is create fake disk for formatting.
320 */ 320 */
321 static int dasd_state_ready_to_basic(struct dasd_device *device) 321 static int dasd_state_ready_to_basic(struct dasd_device *device)
322 { 322 {
323 int rc; 323 int rc;
324 324
325 device->state = DASD_STATE_BASIC; 325 device->state = DASD_STATE_BASIC;
326 if (device->block) { 326 if (device->block) {
327 struct dasd_block *block = device->block; 327 struct dasd_block *block = device->block;
328 rc = dasd_flush_block_queue(block); 328 rc = dasd_flush_block_queue(block);
329 if (rc) { 329 if (rc) {
330 device->state = DASD_STATE_READY; 330 device->state = DASD_STATE_READY;
331 return rc; 331 return rc;
332 } 332 }
333 dasd_flush_request_queue(block); 333 dasd_flush_request_queue(block);
334 dasd_destroy_partitions(block); 334 dasd_destroy_partitions(block);
335 block->blocks = 0; 335 block->blocks = 0;
336 block->bp_block = 0; 336 block->bp_block = 0;
337 block->s2b_shift = 0; 337 block->s2b_shift = 0;
338 } 338 }
339 return 0; 339 return 0;
340 } 340 }
341 341
342 /* 342 /*
343 * Back to basic. 343 * Back to basic.
344 */ 344 */
345 static int dasd_state_unfmt_to_basic(struct dasd_device *device) 345 static int dasd_state_unfmt_to_basic(struct dasd_device *device)
346 { 346 {
347 device->state = DASD_STATE_BASIC; 347 device->state = DASD_STATE_BASIC;
348 return 0; 348 return 0;
349 } 349 }
350 350
351 /* 351 /*
352 * Make the device online and schedule the bottom half to start 352 * Make the device online and schedule the bottom half to start
353 * the requeueing of requests from the linux request queue to the 353 * the requeueing of requests from the linux request queue to the
354 * ccw queue. 354 * ccw queue.
355 */ 355 */
356 static int 356 static int
357 dasd_state_ready_to_online(struct dasd_device * device) 357 dasd_state_ready_to_online(struct dasd_device * device)
358 { 358 {
359 int rc; 359 int rc;
360 struct gendisk *disk; 360 struct gendisk *disk;
361 struct disk_part_iter piter; 361 struct disk_part_iter piter;
362 struct hd_struct *part; 362 struct hd_struct *part;
363 363
364 if (device->discipline->ready_to_online) { 364 if (device->discipline->ready_to_online) {
365 rc = device->discipline->ready_to_online(device); 365 rc = device->discipline->ready_to_online(device);
366 if (rc) 366 if (rc)
367 return rc; 367 return rc;
368 } 368 }
369 device->state = DASD_STATE_ONLINE; 369 device->state = DASD_STATE_ONLINE;
370 if (device->block) { 370 if (device->block) {
371 dasd_schedule_block_bh(device->block); 371 dasd_schedule_block_bh(device->block);
372 disk = device->block->bdev->bd_disk; 372 disk = device->block->bdev->bd_disk;
373 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 373 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
374 while ((part = disk_part_iter_next(&piter))) 374 while ((part = disk_part_iter_next(&piter)))
375 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); 375 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE);
376 disk_part_iter_exit(&piter); 376 disk_part_iter_exit(&piter);
377 } 377 }
378 return 0; 378 return 0;
379 } 379 }
380 380
381 /* 381 /*
382 * Stop the requeueing of requests again. 382 * Stop the requeueing of requests again.
383 */ 383 */
384 static int dasd_state_online_to_ready(struct dasd_device *device) 384 static int dasd_state_online_to_ready(struct dasd_device *device)
385 { 385 {
386 int rc; 386 int rc;
387 struct gendisk *disk; 387 struct gendisk *disk;
388 struct disk_part_iter piter; 388 struct disk_part_iter piter;
389 struct hd_struct *part; 389 struct hd_struct *part;
390 390
391 if (device->discipline->online_to_ready) { 391 if (device->discipline->online_to_ready) {
392 rc = device->discipline->online_to_ready(device); 392 rc = device->discipline->online_to_ready(device);
393 if (rc) 393 if (rc)
394 return rc; 394 return rc;
395 } 395 }
396 device->state = DASD_STATE_READY; 396 device->state = DASD_STATE_READY;
397 if (device->block) { 397 if (device->block) {
398 disk = device->block->bdev->bd_disk; 398 disk = device->block->bdev->bd_disk;
399 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 399 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
400 while ((part = disk_part_iter_next(&piter))) 400 while ((part = disk_part_iter_next(&piter)))
401 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); 401 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE);
402 disk_part_iter_exit(&piter); 402 disk_part_iter_exit(&piter);
403 } 403 }
404 return 0; 404 return 0;
405 } 405 }
406 406
407 /* 407 /*
408 * Device startup state changes. 408 * Device startup state changes.
409 */ 409 */
410 static int dasd_increase_state(struct dasd_device *device) 410 static int dasd_increase_state(struct dasd_device *device)
411 { 411 {
412 int rc; 412 int rc;
413 413
414 rc = 0; 414 rc = 0;
415 if (device->state == DASD_STATE_NEW && 415 if (device->state == DASD_STATE_NEW &&
416 device->target >= DASD_STATE_KNOWN) 416 device->target >= DASD_STATE_KNOWN)
417 rc = dasd_state_new_to_known(device); 417 rc = dasd_state_new_to_known(device);
418 418
419 if (!rc && 419 if (!rc &&
420 device->state == DASD_STATE_KNOWN && 420 device->state == DASD_STATE_KNOWN &&
421 device->target >= DASD_STATE_BASIC) 421 device->target >= DASD_STATE_BASIC)
422 rc = dasd_state_known_to_basic(device); 422 rc = dasd_state_known_to_basic(device);
423 423
424 if (!rc && 424 if (!rc &&
425 device->state == DASD_STATE_BASIC && 425 device->state == DASD_STATE_BASIC &&
426 device->target >= DASD_STATE_READY) 426 device->target >= DASD_STATE_READY)
427 rc = dasd_state_basic_to_ready(device); 427 rc = dasd_state_basic_to_ready(device);
428 428
429 if (!rc && 429 if (!rc &&
430 device->state == DASD_STATE_UNFMT && 430 device->state == DASD_STATE_UNFMT &&
431 device->target > DASD_STATE_UNFMT) 431 device->target > DASD_STATE_UNFMT)
432 rc = -EPERM; 432 rc = -EPERM;
433 433
434 if (!rc && 434 if (!rc &&
435 device->state == DASD_STATE_READY && 435 device->state == DASD_STATE_READY &&
436 device->target >= DASD_STATE_ONLINE) 436 device->target >= DASD_STATE_ONLINE)
437 rc = dasd_state_ready_to_online(device); 437 rc = dasd_state_ready_to_online(device);
438 438
439 return rc; 439 return rc;
440 } 440 }
441 441
442 /* 442 /*
443 * Device shutdown state changes. 443 * Device shutdown state changes.
444 */ 444 */
445 static int dasd_decrease_state(struct dasd_device *device) 445 static int dasd_decrease_state(struct dasd_device *device)
446 { 446 {
447 int rc; 447 int rc;
448 448
449 rc = 0; 449 rc = 0;
450 if (device->state == DASD_STATE_ONLINE && 450 if (device->state == DASD_STATE_ONLINE &&
451 device->target <= DASD_STATE_READY) 451 device->target <= DASD_STATE_READY)
452 rc = dasd_state_online_to_ready(device); 452 rc = dasd_state_online_to_ready(device);
453 453
454 if (!rc && 454 if (!rc &&
455 device->state == DASD_STATE_READY && 455 device->state == DASD_STATE_READY &&
456 device->target <= DASD_STATE_BASIC) 456 device->target <= DASD_STATE_BASIC)
457 rc = dasd_state_ready_to_basic(device); 457 rc = dasd_state_ready_to_basic(device);
458 458
459 if (!rc && 459 if (!rc &&
460 device->state == DASD_STATE_UNFMT && 460 device->state == DASD_STATE_UNFMT &&
461 device->target <= DASD_STATE_BASIC) 461 device->target <= DASD_STATE_BASIC)
462 rc = dasd_state_unfmt_to_basic(device); 462 rc = dasd_state_unfmt_to_basic(device);
463 463
464 if (!rc && 464 if (!rc &&
465 device->state == DASD_STATE_BASIC && 465 device->state == DASD_STATE_BASIC &&
466 device->target <= DASD_STATE_KNOWN) 466 device->target <= DASD_STATE_KNOWN)
467 rc = dasd_state_basic_to_known(device); 467 rc = dasd_state_basic_to_known(device);
468 468
469 if (!rc && 469 if (!rc &&
470 device->state == DASD_STATE_KNOWN && 470 device->state == DASD_STATE_KNOWN &&
471 device->target <= DASD_STATE_NEW) 471 device->target <= DASD_STATE_NEW)
472 rc = dasd_state_known_to_new(device); 472 rc = dasd_state_known_to_new(device);
473 473
474 return rc; 474 return rc;
475 } 475 }
476 476
477 /* 477 /*
478 * This is the main startup/shutdown routine. 478 * This is the main startup/shutdown routine.
479 */ 479 */
480 static void dasd_change_state(struct dasd_device *device) 480 static void dasd_change_state(struct dasd_device *device)
481 { 481 {
482 int rc; 482 int rc;
483 483
484 if (device->state == device->target) 484 if (device->state == device->target)
485 /* Already where we want to go today... */ 485 /* Already where we want to go today... */
486 return; 486 return;
487 if (device->state < device->target) 487 if (device->state < device->target)
488 rc = dasd_increase_state(device); 488 rc = dasd_increase_state(device);
489 else 489 else
490 rc = dasd_decrease_state(device); 490 rc = dasd_decrease_state(device);
491 if (rc == -EAGAIN) 491 if (rc == -EAGAIN)
492 return; 492 return;
493 if (rc) 493 if (rc)
494 device->target = device->state; 494 device->target = device->state;
495 495
496 if (device->state == device->target) 496 if (device->state == device->target)
497 wake_up(&dasd_init_waitq); 497 wake_up(&dasd_init_waitq);
498 498
499 /* let user-space know that the device status changed */ 499 /* let user-space know that the device status changed */
500 kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE); 500 kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE);
501 } 501 }
502 502
503 /* 503 /*
504 * Kick starter for devices that did not complete the startup/shutdown 504 * Kick starter for devices that did not complete the startup/shutdown
505 * procedure or were sleeping because of a pending state. 505 * procedure or were sleeping because of a pending state.
506 * dasd_kick_device will schedule a call do do_kick_device to the kernel 506 * dasd_kick_device will schedule a call do do_kick_device to the kernel
507 * event daemon. 507 * event daemon.
508 */ 508 */
509 static void do_kick_device(struct work_struct *work) 509 static void do_kick_device(struct work_struct *work)
510 { 510 {
511 struct dasd_device *device = container_of(work, struct dasd_device, kick_work); 511 struct dasd_device *device = container_of(work, struct dasd_device, kick_work);
512 mutex_lock(&device->state_mutex); 512 mutex_lock(&device->state_mutex);
513 dasd_change_state(device); 513 dasd_change_state(device);
514 mutex_unlock(&device->state_mutex); 514 mutex_unlock(&device->state_mutex);
515 dasd_schedule_device_bh(device); 515 dasd_schedule_device_bh(device);
516 dasd_put_device(device); 516 dasd_put_device(device);
517 } 517 }
518 518
519 void dasd_kick_device(struct dasd_device *device) 519 void dasd_kick_device(struct dasd_device *device)
520 { 520 {
521 dasd_get_device(device); 521 dasd_get_device(device);
522 /* queue call to dasd_kick_device to the kernel event daemon. */ 522 /* queue call to dasd_kick_device to the kernel event daemon. */
523 schedule_work(&device->kick_work); 523 schedule_work(&device->kick_work);
524 } 524 }
525 525
526 /* 526 /*
527 * dasd_reload_device will schedule a call do do_reload_device to the kernel 527 * dasd_reload_device will schedule a call do do_reload_device to the kernel
528 * event daemon. 528 * event daemon.
529 */ 529 */
530 static void do_reload_device(struct work_struct *work) 530 static void do_reload_device(struct work_struct *work)
531 { 531 {
532 struct dasd_device *device = container_of(work, struct dasd_device, 532 struct dasd_device *device = container_of(work, struct dasd_device,
533 reload_device); 533 reload_device);
534 device->discipline->reload(device); 534 device->discipline->reload(device);
535 dasd_put_device(device); 535 dasd_put_device(device);
536 } 536 }
537 537
538 void dasd_reload_device(struct dasd_device *device) 538 void dasd_reload_device(struct dasd_device *device)
539 { 539 {
540 dasd_get_device(device); 540 dasd_get_device(device);
541 /* queue call to dasd_reload_device to the kernel event daemon. */ 541 /* queue call to dasd_reload_device to the kernel event daemon. */
542 schedule_work(&device->reload_device); 542 schedule_work(&device->reload_device);
543 } 543 }
544 EXPORT_SYMBOL(dasd_reload_device); 544 EXPORT_SYMBOL(dasd_reload_device);
545 545
546 /* 546 /*
547 * dasd_restore_device will schedule a call do do_restore_device to the kernel 547 * dasd_restore_device will schedule a call do do_restore_device to the kernel
548 * event daemon. 548 * event daemon.
549 */ 549 */
550 static void do_restore_device(struct work_struct *work) 550 static void do_restore_device(struct work_struct *work)
551 { 551 {
552 struct dasd_device *device = container_of(work, struct dasd_device, 552 struct dasd_device *device = container_of(work, struct dasd_device,
553 restore_device); 553 restore_device);
554 device->cdev->drv->restore(device->cdev); 554 device->cdev->drv->restore(device->cdev);
555 dasd_put_device(device); 555 dasd_put_device(device);
556 } 556 }
557 557
558 void dasd_restore_device(struct dasd_device *device) 558 void dasd_restore_device(struct dasd_device *device)
559 { 559 {
560 dasd_get_device(device); 560 dasd_get_device(device);
561 /* queue call to dasd_restore_device to the kernel event daemon. */ 561 /* queue call to dasd_restore_device to the kernel event daemon. */
562 schedule_work(&device->restore_device); 562 schedule_work(&device->restore_device);
563 } 563 }
564 564
565 /* 565 /*
566 * Set the target state for a device and starts the state change. 566 * Set the target state for a device and starts the state change.
567 */ 567 */
568 void dasd_set_target_state(struct dasd_device *device, int target) 568 void dasd_set_target_state(struct dasd_device *device, int target)
569 { 569 {
570 dasd_get_device(device); 570 dasd_get_device(device);
571 mutex_lock(&device->state_mutex); 571 mutex_lock(&device->state_mutex);
572 /* If we are in probeonly mode stop at DASD_STATE_READY. */ 572 /* If we are in probeonly mode stop at DASD_STATE_READY. */
573 if (dasd_probeonly && target > DASD_STATE_READY) 573 if (dasd_probeonly && target > DASD_STATE_READY)
574 target = DASD_STATE_READY; 574 target = DASD_STATE_READY;
575 if (device->target != target) { 575 if (device->target != target) {
576 if (device->state == target) 576 if (device->state == target)
577 wake_up(&dasd_init_waitq); 577 wake_up(&dasd_init_waitq);
578 device->target = target; 578 device->target = target;
579 } 579 }
580 if (device->state != device->target) 580 if (device->state != device->target)
581 dasd_change_state(device); 581 dasd_change_state(device);
582 mutex_unlock(&device->state_mutex); 582 mutex_unlock(&device->state_mutex);
583 dasd_put_device(device); 583 dasd_put_device(device);
584 } 584 }
585 585
586 /* 586 /*
587 * Enable devices with device numbers in [from..to]. 587 * Enable devices with device numbers in [from..to].
588 */ 588 */
589 static inline int _wait_for_device(struct dasd_device *device) 589 static inline int _wait_for_device(struct dasd_device *device)
590 { 590 {
591 return (device->state == device->target); 591 return (device->state == device->target);
592 } 592 }
593 593
594 void dasd_enable_device(struct dasd_device *device) 594 void dasd_enable_device(struct dasd_device *device)
595 { 595 {
596 dasd_set_target_state(device, DASD_STATE_ONLINE); 596 dasd_set_target_state(device, DASD_STATE_ONLINE);
597 if (device->state <= DASD_STATE_KNOWN) 597 if (device->state <= DASD_STATE_KNOWN)
598 /* No discipline for device found. */ 598 /* No discipline for device found. */
599 dasd_set_target_state(device, DASD_STATE_NEW); 599 dasd_set_target_state(device, DASD_STATE_NEW);
600 /* Now wait for the devices to come up. */ 600 /* Now wait for the devices to come up. */
601 wait_event(dasd_init_waitq, _wait_for_device(device)); 601 wait_event(dasd_init_waitq, _wait_for_device(device));
602 } 602 }
603 603
604 /* 604 /*
605 * SECTION: device operation (interrupt handler, start i/o, term i/o ...) 605 * SECTION: device operation (interrupt handler, start i/o, term i/o ...)
606 */ 606 */
607 #ifdef CONFIG_DASD_PROFILE 607 #ifdef CONFIG_DASD_PROFILE
608 608
609 struct dasd_profile_info_t dasd_global_profile; 609 struct dasd_profile_info_t dasd_global_profile;
610 unsigned int dasd_profile_level = DASD_PROFILE_OFF; 610 unsigned int dasd_profile_level = DASD_PROFILE_OFF;
611 611
612 /* 612 /*
613 * Increments counter in global and local profiling structures. 613 * Increments counter in global and local profiling structures.
614 */ 614 */
615 #define dasd_profile_counter(value, counter, block) \ 615 #define dasd_profile_counter(value, counter, block) \
616 { \ 616 { \
617 int index; \ 617 int index; \
618 for (index = 0; index < 31 && value >> (2+index); index++); \ 618 for (index = 0; index < 31 && value >> (2+index); index++); \
619 dasd_global_profile.counter[index]++; \ 619 dasd_global_profile.counter[index]++; \
620 block->profile.counter[index]++; \ 620 block->profile.counter[index]++; \
621 } 621 }
622 622
623 /* 623 /*
624 * Add profiling information for cqr before execution. 624 * Add profiling information for cqr before execution.
625 */ 625 */
626 static void dasd_profile_start(struct dasd_block *block, 626 static void dasd_profile_start(struct dasd_block *block,
627 struct dasd_ccw_req *cqr, 627 struct dasd_ccw_req *cqr,
628 struct request *req) 628 struct request *req)
629 { 629 {
630 struct list_head *l; 630 struct list_head *l;
631 unsigned int counter; 631 unsigned int counter;
632 632
633 if (dasd_profile_level != DASD_PROFILE_ON) 633 if (dasd_profile_level != DASD_PROFILE_ON)
634 return; 634 return;
635 635
636 /* count the length of the chanq for statistics */ 636 /* count the length of the chanq for statistics */
637 counter = 0; 637 counter = 0;
638 list_for_each(l, &block->ccw_queue) 638 list_for_each(l, &block->ccw_queue)
639 if (++counter >= 31) 639 if (++counter >= 31)
640 break; 640 break;
641 dasd_global_profile.dasd_io_nr_req[counter]++; 641 dasd_global_profile.dasd_io_nr_req[counter]++;
642 block->profile.dasd_io_nr_req[counter]++; 642 block->profile.dasd_io_nr_req[counter]++;
643 } 643 }
644 644
645 /* 645 /*
646 * Add profiling information for cqr after execution. 646 * Add profiling information for cqr after execution.
647 */ 647 */
648 static void dasd_profile_end(struct dasd_block *block, 648 static void dasd_profile_end(struct dasd_block *block,
649 struct dasd_ccw_req *cqr, 649 struct dasd_ccw_req *cqr,
650 struct request *req) 650 struct request *req)
651 { 651 {
652 long strtime, irqtime, endtime, tottime; /* in microseconds */ 652 long strtime, irqtime, endtime, tottime; /* in microseconds */
653 long tottimeps, sectors; 653 long tottimeps, sectors;
654 654
655 if (dasd_profile_level != DASD_PROFILE_ON) 655 if (dasd_profile_level != DASD_PROFILE_ON)
656 return; 656 return;
657 657
658 sectors = blk_rq_sectors(req); 658 sectors = blk_rq_sectors(req);
659 if (!cqr->buildclk || !cqr->startclk || 659 if (!cqr->buildclk || !cqr->startclk ||
660 !cqr->stopclk || !cqr->endclk || 660 !cqr->stopclk || !cqr->endclk ||
661 !sectors) 661 !sectors)
662 return; 662 return;
663 663
664 strtime = ((cqr->startclk - cqr->buildclk) >> 12); 664 strtime = ((cqr->startclk - cqr->buildclk) >> 12);
665 irqtime = ((cqr->stopclk - cqr->startclk) >> 12); 665 irqtime = ((cqr->stopclk - cqr->startclk) >> 12);
666 endtime = ((cqr->endclk - cqr->stopclk) >> 12); 666 endtime = ((cqr->endclk - cqr->stopclk) >> 12);
667 tottime = ((cqr->endclk - cqr->buildclk) >> 12); 667 tottime = ((cqr->endclk - cqr->buildclk) >> 12);
668 tottimeps = tottime / sectors; 668 tottimeps = tottime / sectors;
669 669
670 if (!dasd_global_profile.dasd_io_reqs) 670 if (!dasd_global_profile.dasd_io_reqs)
671 memset(&dasd_global_profile, 0, 671 memset(&dasd_global_profile, 0,
672 sizeof(struct dasd_profile_info_t)); 672 sizeof(struct dasd_profile_info_t));
673 dasd_global_profile.dasd_io_reqs++; 673 dasd_global_profile.dasd_io_reqs++;
674 dasd_global_profile.dasd_io_sects += sectors; 674 dasd_global_profile.dasd_io_sects += sectors;
675 675
676 if (!block->profile.dasd_io_reqs) 676 if (!block->profile.dasd_io_reqs)
677 memset(&block->profile, 0, 677 memset(&block->profile, 0,
678 sizeof(struct dasd_profile_info_t)); 678 sizeof(struct dasd_profile_info_t));
679 block->profile.dasd_io_reqs++; 679 block->profile.dasd_io_reqs++;
680 block->profile.dasd_io_sects += sectors; 680 block->profile.dasd_io_sects += sectors;
681 681
682 dasd_profile_counter(sectors, dasd_io_secs, block); 682 dasd_profile_counter(sectors, dasd_io_secs, block);
683 dasd_profile_counter(tottime, dasd_io_times, block); 683 dasd_profile_counter(tottime, dasd_io_times, block);
684 dasd_profile_counter(tottimeps, dasd_io_timps, block); 684 dasd_profile_counter(tottimeps, dasd_io_timps, block);
685 dasd_profile_counter(strtime, dasd_io_time1, block); 685 dasd_profile_counter(strtime, dasd_io_time1, block);
686 dasd_profile_counter(irqtime, dasd_io_time2, block); 686 dasd_profile_counter(irqtime, dasd_io_time2, block);
687 dasd_profile_counter(irqtime / sectors, dasd_io_time2ps, block); 687 dasd_profile_counter(irqtime / sectors, dasd_io_time2ps, block);
688 dasd_profile_counter(endtime, dasd_io_time3, block); 688 dasd_profile_counter(endtime, dasd_io_time3, block);
689 } 689 }
690 #else 690 #else
691 #define dasd_profile_start(block, cqr, req) do {} while (0) 691 #define dasd_profile_start(block, cqr, req) do {} while (0)
692 #define dasd_profile_end(block, cqr, req) do {} while (0) 692 #define dasd_profile_end(block, cqr, req) do {} while (0)
693 #endif /* CONFIG_DASD_PROFILE */ 693 #endif /* CONFIG_DASD_PROFILE */
694 694
695 /* 695 /*
696 * Allocate memory for a channel program with 'cplength' channel 696 * Allocate memory for a channel program with 'cplength' channel
697 * command words and 'datasize' additional space. There are two 697 * command words and 'datasize' additional space. There are two
698 * variantes: 1) dasd_kmalloc_request uses kmalloc to get the needed 698 * variantes: 1) dasd_kmalloc_request uses kmalloc to get the needed
699 * memory and 2) dasd_smalloc_request uses the static ccw memory 699 * memory and 2) dasd_smalloc_request uses the static ccw memory
700 * that gets allocated for each device. 700 * that gets allocated for each device.
701 */ 701 */
702 struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength, 702 struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength,
703 int datasize, 703 int datasize,
704 struct dasd_device *device) 704 struct dasd_device *device)
705 { 705 {
706 struct dasd_ccw_req *cqr; 706 struct dasd_ccw_req *cqr;
707 707
708 /* Sanity checks */ 708 /* Sanity checks */
709 BUG_ON(datasize > PAGE_SIZE || 709 BUG_ON(datasize > PAGE_SIZE ||
710 (cplength*sizeof(struct ccw1)) > PAGE_SIZE); 710 (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
711 711
712 cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC); 712 cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC);
713 if (cqr == NULL) 713 if (cqr == NULL)
714 return ERR_PTR(-ENOMEM); 714 return ERR_PTR(-ENOMEM);
715 cqr->cpaddr = NULL; 715 cqr->cpaddr = NULL;
716 if (cplength > 0) { 716 if (cplength > 0) {
717 cqr->cpaddr = kcalloc(cplength, sizeof(struct ccw1), 717 cqr->cpaddr = kcalloc(cplength, sizeof(struct ccw1),
718 GFP_ATOMIC | GFP_DMA); 718 GFP_ATOMIC | GFP_DMA);
719 if (cqr->cpaddr == NULL) { 719 if (cqr->cpaddr == NULL) {
720 kfree(cqr); 720 kfree(cqr);
721 return ERR_PTR(-ENOMEM); 721 return ERR_PTR(-ENOMEM);
722 } 722 }
723 } 723 }
724 cqr->data = NULL; 724 cqr->data = NULL;
725 if (datasize > 0) { 725 if (datasize > 0) {
726 cqr->data = kzalloc(datasize, GFP_ATOMIC | GFP_DMA); 726 cqr->data = kzalloc(datasize, GFP_ATOMIC | GFP_DMA);
727 if (cqr->data == NULL) { 727 if (cqr->data == NULL) {
728 kfree(cqr->cpaddr); 728 kfree(cqr->cpaddr);
729 kfree(cqr); 729 kfree(cqr);
730 return ERR_PTR(-ENOMEM); 730 return ERR_PTR(-ENOMEM);
731 } 731 }
732 } 732 }
733 cqr->magic = magic; 733 cqr->magic = magic;
734 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); 734 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
735 dasd_get_device(device); 735 dasd_get_device(device);
736 return cqr; 736 return cqr;
737 } 737 }
738 738
739 struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, 739 struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength,
740 int datasize, 740 int datasize,
741 struct dasd_device *device) 741 struct dasd_device *device)
742 { 742 {
743 unsigned long flags; 743 unsigned long flags;
744 struct dasd_ccw_req *cqr; 744 struct dasd_ccw_req *cqr;
745 char *data; 745 char *data;
746 int size; 746 int size;
747 747
748 /* Sanity checks */ 748 /* Sanity checks */
749 BUG_ON(datasize > PAGE_SIZE || 749 BUG_ON(datasize > PAGE_SIZE ||
750 (cplength*sizeof(struct ccw1)) > PAGE_SIZE); 750 (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
751 751
752 size = (sizeof(struct dasd_ccw_req) + 7L) & -8L; 752 size = (sizeof(struct dasd_ccw_req) + 7L) & -8L;
753 if (cplength > 0) 753 if (cplength > 0)
754 size += cplength * sizeof(struct ccw1); 754 size += cplength * sizeof(struct ccw1);
755 if (datasize > 0) 755 if (datasize > 0)
756 size += datasize; 756 size += datasize;
757 spin_lock_irqsave(&device->mem_lock, flags); 757 spin_lock_irqsave(&device->mem_lock, flags);
758 cqr = (struct dasd_ccw_req *) 758 cqr = (struct dasd_ccw_req *)
759 dasd_alloc_chunk(&device->ccw_chunks, size); 759 dasd_alloc_chunk(&device->ccw_chunks, size);
760 spin_unlock_irqrestore(&device->mem_lock, flags); 760 spin_unlock_irqrestore(&device->mem_lock, flags);
761 if (cqr == NULL) 761 if (cqr == NULL)
762 return ERR_PTR(-ENOMEM); 762 return ERR_PTR(-ENOMEM);
763 memset(cqr, 0, sizeof(struct dasd_ccw_req)); 763 memset(cqr, 0, sizeof(struct dasd_ccw_req));
764 data = (char *) cqr + ((sizeof(struct dasd_ccw_req) + 7L) & -8L); 764 data = (char *) cqr + ((sizeof(struct dasd_ccw_req) + 7L) & -8L);
765 cqr->cpaddr = NULL; 765 cqr->cpaddr = NULL;
766 if (cplength > 0) { 766 if (cplength > 0) {
767 cqr->cpaddr = (struct ccw1 *) data; 767 cqr->cpaddr = (struct ccw1 *) data;
768 data += cplength*sizeof(struct ccw1); 768 data += cplength*sizeof(struct ccw1);
769 memset(cqr->cpaddr, 0, cplength*sizeof(struct ccw1)); 769 memset(cqr->cpaddr, 0, cplength*sizeof(struct ccw1));
770 } 770 }
771 cqr->data = NULL; 771 cqr->data = NULL;
772 if (datasize > 0) { 772 if (datasize > 0) {
773 cqr->data = data; 773 cqr->data = data;
774 memset(cqr->data, 0, datasize); 774 memset(cqr->data, 0, datasize);
775 } 775 }
776 cqr->magic = magic; 776 cqr->magic = magic;
777 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); 777 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
778 dasd_get_device(device); 778 dasd_get_device(device);
779 return cqr; 779 return cqr;
780 } 780 }
781 781
782 /* 782 /*
783 * Free memory of a channel program. This function needs to free all the 783 * Free memory of a channel program. This function needs to free all the
784 * idal lists that might have been created by dasd_set_cda and the 784 * idal lists that might have been created by dasd_set_cda and the
785 * struct dasd_ccw_req itself. 785 * struct dasd_ccw_req itself.
786 */ 786 */
787 void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) 787 void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device)
788 { 788 {
789 #ifdef CONFIG_64BIT 789 #ifdef CONFIG_64BIT
790 struct ccw1 *ccw; 790 struct ccw1 *ccw;
791 791
792 /* Clear any idals used for the request. */ 792 /* Clear any idals used for the request. */
793 ccw = cqr->cpaddr; 793 ccw = cqr->cpaddr;
794 do { 794 do {
795 clear_normalized_cda(ccw); 795 clear_normalized_cda(ccw);
796 } while (ccw++->flags & (CCW_FLAG_CC | CCW_FLAG_DC)); 796 } while (ccw++->flags & (CCW_FLAG_CC | CCW_FLAG_DC));
797 #endif 797 #endif
798 kfree(cqr->cpaddr); 798 kfree(cqr->cpaddr);
799 kfree(cqr->data); 799 kfree(cqr->data);
800 kfree(cqr); 800 kfree(cqr);
801 dasd_put_device(device); 801 dasd_put_device(device);
802 } 802 }
803 803
804 void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) 804 void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device)
805 { 805 {
806 unsigned long flags; 806 unsigned long flags;
807 807
808 spin_lock_irqsave(&device->mem_lock, flags); 808 spin_lock_irqsave(&device->mem_lock, flags);
809 dasd_free_chunk(&device->ccw_chunks, cqr); 809 dasd_free_chunk(&device->ccw_chunks, cqr);
810 spin_unlock_irqrestore(&device->mem_lock, flags); 810 spin_unlock_irqrestore(&device->mem_lock, flags);
811 dasd_put_device(device); 811 dasd_put_device(device);
812 } 812 }
813 813
814 /* 814 /*
815 * Check discipline magic in cqr. 815 * Check discipline magic in cqr.
816 */ 816 */
817 static inline int dasd_check_cqr(struct dasd_ccw_req *cqr) 817 static inline int dasd_check_cqr(struct dasd_ccw_req *cqr)
818 { 818 {
819 struct dasd_device *device; 819 struct dasd_device *device;
820 820
821 if (cqr == NULL) 821 if (cqr == NULL)
822 return -EINVAL; 822 return -EINVAL;
823 device = cqr->startdev; 823 device = cqr->startdev;
824 if (strncmp((char *) &cqr->magic, device->discipline->ebcname, 4)) { 824 if (strncmp((char *) &cqr->magic, device->discipline->ebcname, 4)) {
825 DBF_DEV_EVENT(DBF_WARNING, device, 825 DBF_DEV_EVENT(DBF_WARNING, device,
826 " dasd_ccw_req 0x%08x magic doesn't match" 826 " dasd_ccw_req 0x%08x magic doesn't match"
827 " discipline 0x%08x", 827 " discipline 0x%08x",
828 cqr->magic, 828 cqr->magic,
829 *(unsigned int *) device->discipline->name); 829 *(unsigned int *) device->discipline->name);
830 return -EINVAL; 830 return -EINVAL;
831 } 831 }
832 return 0; 832 return 0;
833 } 833 }
834 834
835 /* 835 /*
836 * Terminate the current i/o and set the request to clear_pending. 836 * Terminate the current i/o and set the request to clear_pending.
837 * Timer keeps device runnig. 837 * Timer keeps device runnig.
838 * ccw_device_clear can fail if the i/o subsystem 838 * ccw_device_clear can fail if the i/o subsystem
839 * is in a bad mood. 839 * is in a bad mood.
840 */ 840 */
841 int dasd_term_IO(struct dasd_ccw_req *cqr) 841 int dasd_term_IO(struct dasd_ccw_req *cqr)
842 { 842 {
843 struct dasd_device *device; 843 struct dasd_device *device;
844 int retries, rc; 844 int retries, rc;
845 char errorstring[ERRORLENGTH]; 845 char errorstring[ERRORLENGTH];
846 846
847 /* Check the cqr */ 847 /* Check the cqr */
848 rc = dasd_check_cqr(cqr); 848 rc = dasd_check_cqr(cqr);
849 if (rc) 849 if (rc)
850 return rc; 850 return rc;
851 retries = 0; 851 retries = 0;
852 device = (struct dasd_device *) cqr->startdev; 852 device = (struct dasd_device *) cqr->startdev;
853 while ((retries < 5) && (cqr->status == DASD_CQR_IN_IO)) { 853 while ((retries < 5) && (cqr->status == DASD_CQR_IN_IO)) {
854 rc = ccw_device_clear(device->cdev, (long) cqr); 854 rc = ccw_device_clear(device->cdev, (long) cqr);
855 switch (rc) { 855 switch (rc) {
856 case 0: /* termination successful */ 856 case 0: /* termination successful */
857 cqr->retries--; 857 cqr->retries--;
858 cqr->status = DASD_CQR_CLEAR_PENDING; 858 cqr->status = DASD_CQR_CLEAR_PENDING;
859 cqr->stopclk = get_clock(); 859 cqr->stopclk = get_clock();
860 cqr->starttime = 0; 860 cqr->starttime = 0;
861 DBF_DEV_EVENT(DBF_DEBUG, device, 861 DBF_DEV_EVENT(DBF_DEBUG, device,
862 "terminate cqr %p successful", 862 "terminate cqr %p successful",
863 cqr); 863 cqr);
864 break; 864 break;
865 case -ENODEV: 865 case -ENODEV:
866 DBF_DEV_EVENT(DBF_ERR, device, "%s", 866 DBF_DEV_EVENT(DBF_ERR, device, "%s",
867 "device gone, retry"); 867 "device gone, retry");
868 break; 868 break;
869 case -EIO: 869 case -EIO:
870 DBF_DEV_EVENT(DBF_ERR, device, "%s", 870 DBF_DEV_EVENT(DBF_ERR, device, "%s",
871 "I/O error, retry"); 871 "I/O error, retry");
872 break; 872 break;
873 case -EINVAL: 873 case -EINVAL:
874 case -EBUSY: 874 case -EBUSY:
875 DBF_DEV_EVENT(DBF_ERR, device, "%s", 875 DBF_DEV_EVENT(DBF_ERR, device, "%s",
876 "device busy, retry later"); 876 "device busy, retry later");
877 break; 877 break;
878 default: 878 default:
879 /* internal error 10 - unknown rc*/ 879 /* internal error 10 - unknown rc*/
880 snprintf(errorstring, ERRORLENGTH, "10 %d", rc); 880 snprintf(errorstring, ERRORLENGTH, "10 %d", rc);
881 dev_err(&device->cdev->dev, "An error occurred in the " 881 dev_err(&device->cdev->dev, "An error occurred in the "
882 "DASD device driver, reason=%s\n", errorstring); 882 "DASD device driver, reason=%s\n", errorstring);
883 BUG(); 883 BUG();
884 break; 884 break;
885 } 885 }
886 retries++; 886 retries++;
887 } 887 }
888 dasd_schedule_device_bh(device); 888 dasd_schedule_device_bh(device);
889 return rc; 889 return rc;
890 } 890 }
891 891
892 /* 892 /*
893 * Start the i/o. This start_IO can fail if the channel is really busy. 893 * Start the i/o. This start_IO can fail if the channel is really busy.
894 * In that case set up a timer to start the request later. 894 * In that case set up a timer to start the request later.
895 */ 895 */
896 int dasd_start_IO(struct dasd_ccw_req *cqr) 896 int dasd_start_IO(struct dasd_ccw_req *cqr)
897 { 897 {
898 struct dasd_device *device; 898 struct dasd_device *device;
899 int rc; 899 int rc;
900 char errorstring[ERRORLENGTH]; 900 char errorstring[ERRORLENGTH];
901 901
902 /* Check the cqr */ 902 /* Check the cqr */
903 rc = dasd_check_cqr(cqr); 903 rc = dasd_check_cqr(cqr);
904 if (rc) { 904 if (rc) {
905 cqr->intrc = rc; 905 cqr->intrc = rc;
906 return rc; 906 return rc;
907 } 907 }
908 device = (struct dasd_device *) cqr->startdev; 908 device = (struct dasd_device *) cqr->startdev;
909 if (cqr->retries < 0) { 909 if (cqr->retries < 0) {
910 /* internal error 14 - start_IO run out of retries */ 910 /* internal error 14 - start_IO run out of retries */
911 sprintf(errorstring, "14 %p", cqr); 911 sprintf(errorstring, "14 %p", cqr);
912 dev_err(&device->cdev->dev, "An error occurred in the DASD " 912 dev_err(&device->cdev->dev, "An error occurred in the DASD "
913 "device driver, reason=%s\n", errorstring); 913 "device driver, reason=%s\n", errorstring);
914 cqr->status = DASD_CQR_ERROR; 914 cqr->status = DASD_CQR_ERROR;
915 return -EIO; 915 return -EIO;
916 } 916 }
917 cqr->startclk = get_clock(); 917 cqr->startclk = get_clock();
918 cqr->starttime = jiffies; 918 cqr->starttime = jiffies;
919 cqr->retries--; 919 cqr->retries--;
920 if (cqr->cpmode == 1) { 920 if (cqr->cpmode == 1) {
921 rc = ccw_device_tm_start(device->cdev, cqr->cpaddr, 921 rc = ccw_device_tm_start(device->cdev, cqr->cpaddr,
922 (long) cqr, cqr->lpm); 922 (long) cqr, cqr->lpm);
923 } else { 923 } else {
924 rc = ccw_device_start(device->cdev, cqr->cpaddr, 924 rc = ccw_device_start(device->cdev, cqr->cpaddr,
925 (long) cqr, cqr->lpm, 0); 925 (long) cqr, cqr->lpm, 0);
926 } 926 }
927 switch (rc) { 927 switch (rc) {
928 case 0: 928 case 0:
929 cqr->status = DASD_CQR_IN_IO; 929 cqr->status = DASD_CQR_IN_IO;
930 break; 930 break;
931 case -EBUSY: 931 case -EBUSY:
932 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 932 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
933 "start_IO: device busy, retry later"); 933 "start_IO: device busy, retry later");
934 break; 934 break;
935 case -ETIMEDOUT: 935 case -ETIMEDOUT:
936 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 936 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
937 "start_IO: request timeout, retry later"); 937 "start_IO: request timeout, retry later");
938 break; 938 break;
939 case -EACCES: 939 case -EACCES:
940 /* -EACCES indicates that the request used only a 940 /* -EACCES indicates that the request used only a
941 * subset of the available pathes and all these 941 * subset of the available pathes and all these
942 * pathes are gone. 942 * pathes are gone.
943 * Do a retry with all available pathes. 943 * Do a retry with all available pathes.
944 */ 944 */
945 cqr->lpm = LPM_ANYPATH; 945 cqr->lpm = LPM_ANYPATH;
946 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 946 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
947 "start_IO: selected pathes gone," 947 "start_IO: selected pathes gone,"
948 " retry on all pathes"); 948 " retry on all pathes");
949 break; 949 break;
950 case -ENODEV: 950 case -ENODEV:
951 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 951 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
952 "start_IO: -ENODEV device gone, retry"); 952 "start_IO: -ENODEV device gone, retry");
953 break; 953 break;
954 case -EIO: 954 case -EIO:
955 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 955 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
956 "start_IO: -EIO device gone, retry"); 956 "start_IO: -EIO device gone, retry");
957 break; 957 break;
958 case -EINVAL: 958 case -EINVAL:
959 /* most likely caused in power management context */ 959 /* most likely caused in power management context */
960 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 960 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
961 "start_IO: -EINVAL device currently " 961 "start_IO: -EINVAL device currently "
962 "not accessible"); 962 "not accessible");
963 break; 963 break;
964 default: 964 default:
965 /* internal error 11 - unknown rc */ 965 /* internal error 11 - unknown rc */
966 snprintf(errorstring, ERRORLENGTH, "11 %d", rc); 966 snprintf(errorstring, ERRORLENGTH, "11 %d", rc);
967 dev_err(&device->cdev->dev, 967 dev_err(&device->cdev->dev,
968 "An error occurred in the DASD device driver, " 968 "An error occurred in the DASD device driver, "
969 "reason=%s\n", errorstring); 969 "reason=%s\n", errorstring);
970 BUG(); 970 BUG();
971 break; 971 break;
972 } 972 }
973 cqr->intrc = rc; 973 cqr->intrc = rc;
974 return rc; 974 return rc;
975 } 975 }
976 976
977 /* 977 /*
978 * Timeout function for dasd devices. This is used for different purposes 978 * Timeout function for dasd devices. This is used for different purposes
979 * 1) missing interrupt handler for normal operation 979 * 1) missing interrupt handler for normal operation
980 * 2) delayed start of request where start_IO failed with -EBUSY 980 * 2) delayed start of request where start_IO failed with -EBUSY
981 * 3) timeout for missing state change interrupts 981 * 3) timeout for missing state change interrupts
982 * The head of the ccw queue will have status DASD_CQR_IN_IO for 1), 982 * The head of the ccw queue will have status DASD_CQR_IN_IO for 1),
983 * DASD_CQR_QUEUED for 2) and 3). 983 * DASD_CQR_QUEUED for 2) and 3).
984 */ 984 */
985 static void dasd_device_timeout(unsigned long ptr) 985 static void dasd_device_timeout(unsigned long ptr)
986 { 986 {
987 unsigned long flags; 987 unsigned long flags;
988 struct dasd_device *device; 988 struct dasd_device *device;
989 989
990 device = (struct dasd_device *) ptr; 990 device = (struct dasd_device *) ptr;
991 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); 991 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
992 /* re-activate request queue */ 992 /* re-activate request queue */
993 dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); 993 dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING);
994 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); 994 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
995 dasd_schedule_device_bh(device); 995 dasd_schedule_device_bh(device);
996 } 996 }
997 997
998 /* 998 /*
999 * Setup timeout for a device in jiffies. 999 * Setup timeout for a device in jiffies.
1000 */ 1000 */
1001 void dasd_device_set_timer(struct dasd_device *device, int expires) 1001 void dasd_device_set_timer(struct dasd_device *device, int expires)
1002 { 1002 {
1003 if (expires == 0) 1003 if (expires == 0)
1004 del_timer(&device->timer); 1004 del_timer(&device->timer);
1005 else 1005 else
1006 mod_timer(&device->timer, jiffies + expires); 1006 mod_timer(&device->timer, jiffies + expires);
1007 } 1007 }
1008 1008
1009 /* 1009 /*
1010 * Clear timeout for a device. 1010 * Clear timeout for a device.
1011 */ 1011 */
1012 void dasd_device_clear_timer(struct dasd_device *device) 1012 void dasd_device_clear_timer(struct dasd_device *device)
1013 { 1013 {
1014 del_timer(&device->timer); 1014 del_timer(&device->timer);
1015 } 1015 }
1016 1016
1017 static void dasd_handle_killed_request(struct ccw_device *cdev, 1017 static void dasd_handle_killed_request(struct ccw_device *cdev,
1018 unsigned long intparm) 1018 unsigned long intparm)
1019 { 1019 {
1020 struct dasd_ccw_req *cqr; 1020 struct dasd_ccw_req *cqr;
1021 struct dasd_device *device; 1021 struct dasd_device *device;
1022 1022
1023 if (!intparm) 1023 if (!intparm)
1024 return; 1024 return;
1025 cqr = (struct dasd_ccw_req *) intparm; 1025 cqr = (struct dasd_ccw_req *) intparm;
1026 if (cqr->status != DASD_CQR_IN_IO) { 1026 if (cqr->status != DASD_CQR_IN_IO) {
1027 DBF_EVENT_DEVID(DBF_DEBUG, cdev, 1027 DBF_EVENT_DEVID(DBF_DEBUG, cdev,
1028 "invalid status in handle_killed_request: " 1028 "invalid status in handle_killed_request: "
1029 "%02x", cqr->status); 1029 "%02x", cqr->status);
1030 return; 1030 return;
1031 } 1031 }
1032 1032
1033 device = dasd_device_from_cdev_locked(cdev); 1033 device = dasd_device_from_cdev_locked(cdev);
1034 if (IS_ERR(device)) { 1034 if (IS_ERR(device)) {
1035 DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", 1035 DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s",
1036 "unable to get device from cdev"); 1036 "unable to get device from cdev");
1037 return; 1037 return;
1038 } 1038 }
1039 1039
1040 if (!cqr->startdev || 1040 if (!cqr->startdev ||
1041 device != cqr->startdev || 1041 device != cqr->startdev ||
1042 strncmp(cqr->startdev->discipline->ebcname, 1042 strncmp(cqr->startdev->discipline->ebcname,
1043 (char *) &cqr->magic, 4)) { 1043 (char *) &cqr->magic, 4)) {
1044 DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", 1044 DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s",
1045 "invalid device in request"); 1045 "invalid device in request");
1046 dasd_put_device(device); 1046 dasd_put_device(device);
1047 return; 1047 return;
1048 } 1048 }
1049 1049
1050 /* Schedule request to be retried. */ 1050 /* Schedule request to be retried. */
1051 cqr->status = DASD_CQR_QUEUED; 1051 cqr->status = DASD_CQR_QUEUED;
1052 1052
1053 dasd_device_clear_timer(device); 1053 dasd_device_clear_timer(device);
1054 dasd_schedule_device_bh(device); 1054 dasd_schedule_device_bh(device);
1055 dasd_put_device(device); 1055 dasd_put_device(device);
1056 } 1056 }
1057 1057
1058 void dasd_generic_handle_state_change(struct dasd_device *device) 1058 void dasd_generic_handle_state_change(struct dasd_device *device)
1059 { 1059 {
1060 /* First of all start sense subsystem status request. */ 1060 /* First of all start sense subsystem status request. */
1061 dasd_eer_snss(device); 1061 dasd_eer_snss(device);
1062 1062
1063 dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); 1063 dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING);
1064 dasd_schedule_device_bh(device); 1064 dasd_schedule_device_bh(device);
1065 if (device->block) 1065 if (device->block)
1066 dasd_schedule_block_bh(device->block); 1066 dasd_schedule_block_bh(device->block);
1067 } 1067 }
1068 1068
1069 /* 1069 /*
1070 * Interrupt handler for "normal" ssch-io based dasd devices. 1070 * Interrupt handler for "normal" ssch-io based dasd devices.
1071 */ 1071 */
1072 void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, 1072 void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
1073 struct irb *irb) 1073 struct irb *irb)
1074 { 1074 {
1075 struct dasd_ccw_req *cqr, *next; 1075 struct dasd_ccw_req *cqr, *next;
1076 struct dasd_device *device; 1076 struct dasd_device *device;
1077 unsigned long long now; 1077 unsigned long long now;
1078 int expires; 1078 int expires;
1079 1079
1080 if (IS_ERR(irb)) { 1080 if (IS_ERR(irb)) {
1081 switch (PTR_ERR(irb)) { 1081 switch (PTR_ERR(irb)) {
1082 case -EIO: 1082 case -EIO:
1083 break; 1083 break;
1084 case -ETIMEDOUT: 1084 case -ETIMEDOUT:
1085 DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " 1085 DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
1086 "request timed out\n", __func__); 1086 "request timed out\n", __func__);
1087 break; 1087 break;
1088 default: 1088 default:
1089 DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " 1089 DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
1090 "unknown error %ld\n", __func__, 1090 "unknown error %ld\n", __func__,
1091 PTR_ERR(irb)); 1091 PTR_ERR(irb));
1092 } 1092 }
1093 dasd_handle_killed_request(cdev, intparm); 1093 dasd_handle_killed_request(cdev, intparm);
1094 return; 1094 return;
1095 } 1095 }
1096 1096
1097 now = get_clock(); 1097 now = get_clock();
1098 1098
1099 /* check for unsolicited interrupts */ 1099 /* check for unsolicited interrupts */
1100 cqr = (struct dasd_ccw_req *) intparm; 1100 cqr = (struct dasd_ccw_req *) intparm;
1101 if (!cqr || ((scsw_cc(&irb->scsw) == 1) && 1101 if (!cqr || ((scsw_cc(&irb->scsw) == 1) &&
1102 (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC) && 1102 (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC) &&
1103 (scsw_stctl(&irb->scsw) & SCSW_STCTL_STATUS_PEND))) { 1103 (scsw_stctl(&irb->scsw) & SCSW_STCTL_STATUS_PEND))) {
1104 if (cqr && cqr->status == DASD_CQR_IN_IO) 1104 if (cqr && cqr->status == DASD_CQR_IN_IO)
1105 cqr->status = DASD_CQR_QUEUED; 1105 cqr->status = DASD_CQR_QUEUED;
1106 device = dasd_device_from_cdev_locked(cdev); 1106 device = dasd_device_from_cdev_locked(cdev);
1107 if (!IS_ERR(device)) { 1107 if (!IS_ERR(device)) {
1108 dasd_device_clear_timer(device); 1108 dasd_device_clear_timer(device);
1109 device->discipline->handle_unsolicited_interrupt(device, 1109 device->discipline->handle_unsolicited_interrupt(device,
1110 irb); 1110 irb);
1111 dasd_put_device(device); 1111 dasd_put_device(device);
1112 } 1112 }
1113 return; 1113 return;
1114 } 1114 }
1115 1115
1116 device = (struct dasd_device *) cqr->startdev; 1116 device = (struct dasd_device *) cqr->startdev;
1117 if (!device || 1117 if (!device ||
1118 strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) { 1118 strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) {
1119 DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", 1119 DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s",
1120 "invalid device in request"); 1120 "invalid device in request");
1121 return; 1121 return;
1122 } 1122 }
1123 1123
1124 /* Check for clear pending */ 1124 /* Check for clear pending */
1125 if (cqr->status == DASD_CQR_CLEAR_PENDING && 1125 if (cqr->status == DASD_CQR_CLEAR_PENDING &&
1126 scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) { 1126 scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) {
1127 cqr->status = DASD_CQR_CLEARED; 1127 cqr->status = DASD_CQR_CLEARED;
1128 dasd_device_clear_timer(device); 1128 dasd_device_clear_timer(device);
1129 wake_up(&dasd_flush_wq); 1129 wake_up(&dasd_flush_wq);
1130 dasd_schedule_device_bh(device); 1130 dasd_schedule_device_bh(device);
1131 return; 1131 return;
1132 } 1132 }
1133 1133
1134 /* check status - the request might have been killed by dyn detach */ 1134 /* check status - the request might have been killed by dyn detach */
1135 if (cqr->status != DASD_CQR_IN_IO) { 1135 if (cqr->status != DASD_CQR_IN_IO) {
1136 DBF_DEV_EVENT(DBF_DEBUG, device, "invalid status: bus_id %s, " 1136 DBF_DEV_EVENT(DBF_DEBUG, device, "invalid status: bus_id %s, "
1137 "status %02x", dev_name(&cdev->dev), cqr->status); 1137 "status %02x", dev_name(&cdev->dev), cqr->status);
1138 return; 1138 return;
1139 } 1139 }
1140 1140
1141 next = NULL; 1141 next = NULL;
1142 expires = 0; 1142 expires = 0;
1143 if (scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) && 1143 if (scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
1144 scsw_cstat(&irb->scsw) == 0) { 1144 scsw_cstat(&irb->scsw) == 0) {
1145 /* request was completed successfully */ 1145 /* request was completed successfully */
1146 cqr->status = DASD_CQR_SUCCESS; 1146 cqr->status = DASD_CQR_SUCCESS;
1147 cqr->stopclk = now; 1147 cqr->stopclk = now;
1148 /* Start first request on queue if possible -> fast_io. */ 1148 /* Start first request on queue if possible -> fast_io. */
1149 if (cqr->devlist.next != &device->ccw_queue) { 1149 if (cqr->devlist.next != &device->ccw_queue) {
1150 next = list_entry(cqr->devlist.next, 1150 next = list_entry(cqr->devlist.next,
1151 struct dasd_ccw_req, devlist); 1151 struct dasd_ccw_req, devlist);
1152 } 1152 }
1153 } else { /* error */ 1153 } else { /* error */
1154 memcpy(&cqr->irb, irb, sizeof(struct irb)); 1154 memcpy(&cqr->irb, irb, sizeof(struct irb));
1155 /* log sense for every failed I/O to s390 debugfeature */ 1155 /* log sense for every failed I/O to s390 debugfeature */
1156 dasd_log_sense_dbf(cqr, irb); 1156 dasd_log_sense_dbf(cqr, irb);
1157 if (device->features & DASD_FEATURE_ERPLOG) { 1157 if (device->features & DASD_FEATURE_ERPLOG) {
1158 dasd_log_sense(cqr, irb); 1158 dasd_log_sense(cqr, irb);
1159 } 1159 }
1160 1160
1161 /* 1161 /*
1162 * If we don't want complex ERP for this request, then just 1162 * If we don't want complex ERP for this request, then just
1163 * reset this and retry it in the fastpath 1163 * reset this and retry it in the fastpath
1164 */ 1164 */
1165 if (!test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags) && 1165 if (!test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags) &&
1166 cqr->retries > 0) { 1166 cqr->retries > 0) {
1167 if (cqr->lpm == LPM_ANYPATH) 1167 if (cqr->lpm == LPM_ANYPATH)
1168 DBF_DEV_EVENT(DBF_DEBUG, device, 1168 DBF_DEV_EVENT(DBF_DEBUG, device,
1169 "default ERP in fastpath " 1169 "default ERP in fastpath "
1170 "(%i retries left)", 1170 "(%i retries left)",
1171 cqr->retries); 1171 cqr->retries);
1172 cqr->lpm = LPM_ANYPATH; 1172 cqr->lpm = LPM_ANYPATH;
1173 cqr->status = DASD_CQR_QUEUED; 1173 cqr->status = DASD_CQR_QUEUED;
1174 next = cqr; 1174 next = cqr;
1175 } else 1175 } else
1176 cqr->status = DASD_CQR_ERROR; 1176 cqr->status = DASD_CQR_ERROR;
1177 } 1177 }
1178 if (next && (next->status == DASD_CQR_QUEUED) && 1178 if (next && (next->status == DASD_CQR_QUEUED) &&
1179 (!device->stopped)) { 1179 (!device->stopped)) {
1180 if (device->discipline->start_IO(next) == 0) 1180 if (device->discipline->start_IO(next) == 0)
1181 expires = next->expires; 1181 expires = next->expires;
1182 } 1182 }
1183 if (expires != 0) 1183 if (expires != 0)
1184 dasd_device_set_timer(device, expires); 1184 dasd_device_set_timer(device, expires);
1185 else 1185 else
1186 dasd_device_clear_timer(device); 1186 dasd_device_clear_timer(device);
1187 dasd_schedule_device_bh(device); 1187 dasd_schedule_device_bh(device);
1188 } 1188 }
1189 1189
1190 enum uc_todo dasd_generic_uc_handler(struct ccw_device *cdev, struct irb *irb) 1190 enum uc_todo dasd_generic_uc_handler(struct ccw_device *cdev, struct irb *irb)
1191 { 1191 {
1192 struct dasd_device *device; 1192 struct dasd_device *device;
1193 1193
1194 device = dasd_device_from_cdev_locked(cdev); 1194 device = dasd_device_from_cdev_locked(cdev);
1195 1195
1196 if (IS_ERR(device)) 1196 if (IS_ERR(device))
1197 goto out; 1197 goto out;
1198 if (test_bit(DASD_FLAG_OFFLINE, &device->flags) || 1198 if (test_bit(DASD_FLAG_OFFLINE, &device->flags) ||
1199 device->state != device->target || 1199 device->state != device->target ||
1200 !device->discipline->handle_unsolicited_interrupt){ 1200 !device->discipline->handle_unsolicited_interrupt){
1201 dasd_put_device(device); 1201 dasd_put_device(device);
1202 goto out; 1202 goto out;
1203 } 1203 }
1204 1204
1205 dasd_device_clear_timer(device); 1205 dasd_device_clear_timer(device);
1206 device->discipline->handle_unsolicited_interrupt(device, irb); 1206 device->discipline->handle_unsolicited_interrupt(device, irb);
1207 dasd_put_device(device); 1207 dasd_put_device(device);
1208 out: 1208 out:
1209 return UC_TODO_RETRY; 1209 return UC_TODO_RETRY;
1210 } 1210 }
1211 EXPORT_SYMBOL_GPL(dasd_generic_uc_handler); 1211 EXPORT_SYMBOL_GPL(dasd_generic_uc_handler);
1212 1212
1213 /* 1213 /*
1214 * If we have an error on a dasd_block layer request then we cancel 1214 * If we have an error on a dasd_block layer request then we cancel
1215 * and return all further requests from the same dasd_block as well. 1215 * and return all further requests from the same dasd_block as well.
1216 */ 1216 */
1217 static void __dasd_device_recovery(struct dasd_device *device, 1217 static void __dasd_device_recovery(struct dasd_device *device,
1218 struct dasd_ccw_req *ref_cqr) 1218 struct dasd_ccw_req *ref_cqr)
1219 { 1219 {
1220 struct list_head *l, *n; 1220 struct list_head *l, *n;
1221 struct dasd_ccw_req *cqr; 1221 struct dasd_ccw_req *cqr;
1222 1222
1223 /* 1223 /*
1224 * only requeue request that came from the dasd_block layer 1224 * only requeue request that came from the dasd_block layer
1225 */ 1225 */
1226 if (!ref_cqr->block) 1226 if (!ref_cqr->block)
1227 return; 1227 return;
1228 1228
1229 list_for_each_safe(l, n, &device->ccw_queue) { 1229 list_for_each_safe(l, n, &device->ccw_queue) {
1230 cqr = list_entry(l, struct dasd_ccw_req, devlist); 1230 cqr = list_entry(l, struct dasd_ccw_req, devlist);
1231 if (cqr->status == DASD_CQR_QUEUED && 1231 if (cqr->status == DASD_CQR_QUEUED &&
1232 ref_cqr->block == cqr->block) { 1232 ref_cqr->block == cqr->block) {
1233 cqr->status = DASD_CQR_CLEARED; 1233 cqr->status = DASD_CQR_CLEARED;
1234 } 1234 }
1235 } 1235 }
1236 }; 1236 };
1237 1237
1238 /* 1238 /*
1239 * Remove those ccw requests from the queue that need to be returned 1239 * Remove those ccw requests from the queue that need to be returned
1240 * to the upper layer. 1240 * to the upper layer.
1241 */ 1241 */
1242 static void __dasd_device_process_ccw_queue(struct dasd_device *device, 1242 static void __dasd_device_process_ccw_queue(struct dasd_device *device,
1243 struct list_head *final_queue) 1243 struct list_head *final_queue)
1244 { 1244 {
1245 struct list_head *l, *n; 1245 struct list_head *l, *n;
1246 struct dasd_ccw_req *cqr; 1246 struct dasd_ccw_req *cqr;
1247 1247
1248 /* Process request with final status. */ 1248 /* Process request with final status. */
1249 list_for_each_safe(l, n, &device->ccw_queue) { 1249 list_for_each_safe(l, n, &device->ccw_queue) {
1250 cqr = list_entry(l, struct dasd_ccw_req, devlist); 1250 cqr = list_entry(l, struct dasd_ccw_req, devlist);
1251 1251
1252 /* Stop list processing at the first non-final request. */ 1252 /* Stop list processing at the first non-final request. */
1253 if (cqr->status == DASD_CQR_QUEUED || 1253 if (cqr->status == DASD_CQR_QUEUED ||
1254 cqr->status == DASD_CQR_IN_IO || 1254 cqr->status == DASD_CQR_IN_IO ||
1255 cqr->status == DASD_CQR_CLEAR_PENDING) 1255 cqr->status == DASD_CQR_CLEAR_PENDING)
1256 break; 1256 break;
1257 if (cqr->status == DASD_CQR_ERROR) { 1257 if (cqr->status == DASD_CQR_ERROR) {
1258 __dasd_device_recovery(device, cqr); 1258 __dasd_device_recovery(device, cqr);
1259 } 1259 }
1260 /* Rechain finished requests to final queue */ 1260 /* Rechain finished requests to final queue */
1261 list_move_tail(&cqr->devlist, final_queue); 1261 list_move_tail(&cqr->devlist, final_queue);
1262 } 1262 }
1263 } 1263 }
1264 1264
1265 /* 1265 /*
1266 * the cqrs from the final queue are returned to the upper layer 1266 * the cqrs from the final queue are returned to the upper layer
1267 * by setting a dasd_block state and calling the callback function 1267 * by setting a dasd_block state and calling the callback function
1268 */ 1268 */
1269 static void __dasd_device_process_final_queue(struct dasd_device *device, 1269 static void __dasd_device_process_final_queue(struct dasd_device *device,
1270 struct list_head *final_queue) 1270 struct list_head *final_queue)
1271 { 1271 {
1272 struct list_head *l, *n; 1272 struct list_head *l, *n;
1273 struct dasd_ccw_req *cqr; 1273 struct dasd_ccw_req *cqr;
1274 struct dasd_block *block; 1274 struct dasd_block *block;
1275 void (*callback)(struct dasd_ccw_req *, void *data); 1275 void (*callback)(struct dasd_ccw_req *, void *data);
1276 void *callback_data; 1276 void *callback_data;
1277 char errorstring[ERRORLENGTH]; 1277 char errorstring[ERRORLENGTH];
1278 1278
1279 list_for_each_safe(l, n, final_queue) { 1279 list_for_each_safe(l, n, final_queue) {
1280 cqr = list_entry(l, struct dasd_ccw_req, devlist); 1280 cqr = list_entry(l, struct dasd_ccw_req, devlist);
1281 list_del_init(&cqr->devlist); 1281 list_del_init(&cqr->devlist);
1282 block = cqr->block; 1282 block = cqr->block;
1283 callback = cqr->callback; 1283 callback = cqr->callback;
1284 callback_data = cqr->callback_data; 1284 callback_data = cqr->callback_data;
1285 if (block) 1285 if (block)
1286 spin_lock_bh(&block->queue_lock); 1286 spin_lock_bh(&block->queue_lock);
1287 switch (cqr->status) { 1287 switch (cqr->status) {
1288 case DASD_CQR_SUCCESS: 1288 case DASD_CQR_SUCCESS:
1289 cqr->status = DASD_CQR_DONE; 1289 cqr->status = DASD_CQR_DONE;
1290 break; 1290 break;
1291 case DASD_CQR_ERROR: 1291 case DASD_CQR_ERROR:
1292 cqr->status = DASD_CQR_NEED_ERP; 1292 cqr->status = DASD_CQR_NEED_ERP;
1293 break; 1293 break;
1294 case DASD_CQR_CLEARED: 1294 case DASD_CQR_CLEARED:
1295 cqr->status = DASD_CQR_TERMINATED; 1295 cqr->status = DASD_CQR_TERMINATED;
1296 break; 1296 break;
1297 default: 1297 default:
1298 /* internal error 12 - wrong cqr status*/ 1298 /* internal error 12 - wrong cqr status*/
1299 snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status); 1299 snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status);
1300 dev_err(&device->cdev->dev, 1300 dev_err(&device->cdev->dev,
1301 "An error occurred in the DASD device driver, " 1301 "An error occurred in the DASD device driver, "
1302 "reason=%s\n", errorstring); 1302 "reason=%s\n", errorstring);
1303 BUG(); 1303 BUG();
1304 } 1304 }
1305 if (cqr->callback != NULL) 1305 if (cqr->callback != NULL)
1306 (callback)(cqr, callback_data); 1306 (callback)(cqr, callback_data);
1307 if (block) 1307 if (block)
1308 spin_unlock_bh(&block->queue_lock); 1308 spin_unlock_bh(&block->queue_lock);
1309 } 1309 }
1310 } 1310 }
1311 1311
1312 /* 1312 /*
1313 * Take a look at the first request on the ccw queue and check 1313 * Take a look at the first request on the ccw queue and check
1314 * if it reached its expire time. If so, terminate the IO. 1314 * if it reached its expire time. If so, terminate the IO.
1315 */ 1315 */
1316 static void __dasd_device_check_expire(struct dasd_device *device) 1316 static void __dasd_device_check_expire(struct dasd_device *device)
1317 { 1317 {
1318 struct dasd_ccw_req *cqr; 1318 struct dasd_ccw_req *cqr;
1319 1319
1320 if (list_empty(&device->ccw_queue)) 1320 if (list_empty(&device->ccw_queue))
1321 return; 1321 return;
1322 cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); 1322 cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
1323 if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) && 1323 if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) &&
1324 (time_after_eq(jiffies, cqr->expires + cqr->starttime))) { 1324 (time_after_eq(jiffies, cqr->expires + cqr->starttime))) {
1325 if (device->discipline->term_IO(cqr) != 0) { 1325 if (device->discipline->term_IO(cqr) != 0) {
1326 /* Hmpf, try again in 5 sec */ 1326 /* Hmpf, try again in 5 sec */
1327 dev_err(&device->cdev->dev, 1327 dev_err(&device->cdev->dev,
1328 "cqr %p timed out (%lus) but cannot be " 1328 "cqr %p timed out (%lus) but cannot be "
1329 "ended, retrying in 5 s\n", 1329 "ended, retrying in 5 s\n",
1330 cqr, (cqr->expires/HZ)); 1330 cqr, (cqr->expires/HZ));
1331 cqr->expires += 5*HZ; 1331 cqr->expires += 5*HZ;
1332 dasd_device_set_timer(device, 5*HZ); 1332 dasd_device_set_timer(device, 5*HZ);
1333 } else { 1333 } else {
1334 dev_err(&device->cdev->dev, 1334 dev_err(&device->cdev->dev,
1335 "cqr %p timed out (%lus), %i retries " 1335 "cqr %p timed out (%lus), %i retries "
1336 "remaining\n", cqr, (cqr->expires/HZ), 1336 "remaining\n", cqr, (cqr->expires/HZ),
1337 cqr->retries); 1337 cqr->retries);
1338 } 1338 }
1339 } 1339 }
1340 } 1340 }
1341 1341
1342 /* 1342 /*
1343 * Take a look at the first request on the ccw queue and check 1343 * Take a look at the first request on the ccw queue and check
1344 * if it needs to be started. 1344 * if it needs to be started.
1345 */ 1345 */
1346 static void __dasd_device_start_head(struct dasd_device *device) 1346 static void __dasd_device_start_head(struct dasd_device *device)
1347 { 1347 {
1348 struct dasd_ccw_req *cqr; 1348 struct dasd_ccw_req *cqr;
1349 int rc; 1349 int rc;
1350 1350
1351 if (list_empty(&device->ccw_queue)) 1351 if (list_empty(&device->ccw_queue))
1352 return; 1352 return;
1353 cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); 1353 cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
1354 if (cqr->status != DASD_CQR_QUEUED) 1354 if (cqr->status != DASD_CQR_QUEUED)
1355 return; 1355 return;
1356 /* when device is stopped, return request to previous layer */ 1356 /* when device is stopped, return request to previous layer */
1357 if (device->stopped) { 1357 if (device->stopped) {
1358 cqr->status = DASD_CQR_CLEARED; 1358 cqr->status = DASD_CQR_CLEARED;
1359 dasd_schedule_device_bh(device); 1359 dasd_schedule_device_bh(device);
1360 return; 1360 return;
1361 } 1361 }
1362 1362
1363 rc = device->discipline->start_IO(cqr); 1363 rc = device->discipline->start_IO(cqr);
1364 if (rc == 0) 1364 if (rc == 0)
1365 dasd_device_set_timer(device, cqr->expires); 1365 dasd_device_set_timer(device, cqr->expires);
1366 else if (rc == -EACCES) { 1366 else if (rc == -EACCES) {
1367 dasd_schedule_device_bh(device); 1367 dasd_schedule_device_bh(device);
1368 } else 1368 } else
1369 /* Hmpf, try again in 1/2 sec */ 1369 /* Hmpf, try again in 1/2 sec */
1370 dasd_device_set_timer(device, 50); 1370 dasd_device_set_timer(device, 50);
1371 } 1371 }
1372 1372
1373 /* 1373 /*
1374 * Go through all request on the dasd_device request queue, 1374 * Go through all request on the dasd_device request queue,
1375 * terminate them on the cdev if necessary, and return them to the 1375 * terminate them on the cdev if necessary, and return them to the
1376 * submitting layer via callback. 1376 * submitting layer via callback.
1377 * Note: 1377 * Note:
1378 * Make sure that all 'submitting layers' still exist when 1378 * Make sure that all 'submitting layers' still exist when
1379 * this function is called!. In other words, when 'device' is a base 1379 * this function is called!. In other words, when 'device' is a base
1380 * device then all block layer requests must have been removed before 1380 * device then all block layer requests must have been removed before
1381 * via dasd_flush_block_queue. 1381 * via dasd_flush_block_queue.
1382 */ 1382 */
1383 int dasd_flush_device_queue(struct dasd_device *device) 1383 int dasd_flush_device_queue(struct dasd_device *device)
1384 { 1384 {
1385 struct dasd_ccw_req *cqr, *n; 1385 struct dasd_ccw_req *cqr, *n;
1386 int rc; 1386 int rc;
1387 struct list_head flush_queue; 1387 struct list_head flush_queue;
1388 1388
1389 INIT_LIST_HEAD(&flush_queue); 1389 INIT_LIST_HEAD(&flush_queue);
1390 spin_lock_irq(get_ccwdev_lock(device->cdev)); 1390 spin_lock_irq(get_ccwdev_lock(device->cdev));
1391 rc = 0; 1391 rc = 0;
1392 list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) { 1392 list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
1393 /* Check status and move request to flush_queue */ 1393 /* Check status and move request to flush_queue */
1394 switch (cqr->status) { 1394 switch (cqr->status) {
1395 case DASD_CQR_IN_IO: 1395 case DASD_CQR_IN_IO:
1396 rc = device->discipline->term_IO(cqr); 1396 rc = device->discipline->term_IO(cqr);
1397 if (rc) { 1397 if (rc) {
1398 /* unable to terminate requeust */ 1398 /* unable to terminate requeust */
1399 dev_err(&device->cdev->dev, 1399 dev_err(&device->cdev->dev,
1400 "Flushing the DASD request queue " 1400 "Flushing the DASD request queue "
1401 "failed for request %p\n", cqr); 1401 "failed for request %p\n", cqr);
1402 /* stop flush processing */ 1402 /* stop flush processing */
1403 goto finished; 1403 goto finished;
1404 } 1404 }
1405 break; 1405 break;
1406 case DASD_CQR_QUEUED: 1406 case DASD_CQR_QUEUED:
1407 cqr->stopclk = get_clock(); 1407 cqr->stopclk = get_clock();
1408 cqr->status = DASD_CQR_CLEARED; 1408 cqr->status = DASD_CQR_CLEARED;
1409 break; 1409 break;
1410 default: /* no need to modify the others */ 1410 default: /* no need to modify the others */
1411 break; 1411 break;
1412 } 1412 }
1413 list_move_tail(&cqr->devlist, &flush_queue); 1413 list_move_tail(&cqr->devlist, &flush_queue);
1414 } 1414 }
1415 finished: 1415 finished:
1416 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1416 spin_unlock_irq(get_ccwdev_lock(device->cdev));
1417 /* 1417 /*
1418 * After this point all requests must be in state CLEAR_PENDING, 1418 * After this point all requests must be in state CLEAR_PENDING,
1419 * CLEARED, SUCCESS or ERROR. Now wait for CLEAR_PENDING to become 1419 * CLEARED, SUCCESS or ERROR. Now wait for CLEAR_PENDING to become
1420 * one of the others. 1420 * one of the others.
1421 */ 1421 */
1422 list_for_each_entry_safe(cqr, n, &flush_queue, devlist) 1422 list_for_each_entry_safe(cqr, n, &flush_queue, devlist)
1423 wait_event(dasd_flush_wq, 1423 wait_event(dasd_flush_wq,
1424 (cqr->status != DASD_CQR_CLEAR_PENDING)); 1424 (cqr->status != DASD_CQR_CLEAR_PENDING));
1425 /* 1425 /*
1426 * Now set each request back to TERMINATED, DONE or NEED_ERP 1426 * Now set each request back to TERMINATED, DONE or NEED_ERP
1427 * and call the callback function of flushed requests 1427 * and call the callback function of flushed requests
1428 */ 1428 */
1429 __dasd_device_process_final_queue(device, &flush_queue); 1429 __dasd_device_process_final_queue(device, &flush_queue);
1430 return rc; 1430 return rc;
1431 } 1431 }
1432 1432
1433 /* 1433 /*
1434 * Acquire the device lock and process queues for the device. 1434 * Acquire the device lock and process queues for the device.
1435 */ 1435 */
1436 static void dasd_device_tasklet(struct dasd_device *device) 1436 static void dasd_device_tasklet(struct dasd_device *device)
1437 { 1437 {
1438 struct list_head final_queue; 1438 struct list_head final_queue;
1439 1439
1440 atomic_set (&device->tasklet_scheduled, 0); 1440 atomic_set (&device->tasklet_scheduled, 0);
1441 INIT_LIST_HEAD(&final_queue); 1441 INIT_LIST_HEAD(&final_queue);
1442 spin_lock_irq(get_ccwdev_lock(device->cdev)); 1442 spin_lock_irq(get_ccwdev_lock(device->cdev));
1443 /* Check expire time of first request on the ccw queue. */ 1443 /* Check expire time of first request on the ccw queue. */
1444 __dasd_device_check_expire(device); 1444 __dasd_device_check_expire(device);
1445 /* find final requests on ccw queue */ 1445 /* find final requests on ccw queue */
1446 __dasd_device_process_ccw_queue(device, &final_queue); 1446 __dasd_device_process_ccw_queue(device, &final_queue);
1447 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1447 spin_unlock_irq(get_ccwdev_lock(device->cdev));
1448 /* Now call the callback function of requests with final status */ 1448 /* Now call the callback function of requests with final status */
1449 __dasd_device_process_final_queue(device, &final_queue); 1449 __dasd_device_process_final_queue(device, &final_queue);
1450 spin_lock_irq(get_ccwdev_lock(device->cdev)); 1450 spin_lock_irq(get_ccwdev_lock(device->cdev));
1451 /* Now check if the head of the ccw queue needs to be started. */ 1451 /* Now check if the head of the ccw queue needs to be started. */
1452 __dasd_device_start_head(device); 1452 __dasd_device_start_head(device);
1453 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1453 spin_unlock_irq(get_ccwdev_lock(device->cdev));
1454 dasd_put_device(device); 1454 dasd_put_device(device);
1455 } 1455 }
1456 1456
1457 /* 1457 /*
1458 * Schedules a call to dasd_tasklet over the device tasklet. 1458 * Schedules a call to dasd_tasklet over the device tasklet.
1459 */ 1459 */
1460 void dasd_schedule_device_bh(struct dasd_device *device) 1460 void dasd_schedule_device_bh(struct dasd_device *device)
1461 { 1461 {
1462 /* Protect against rescheduling. */ 1462 /* Protect against rescheduling. */
1463 if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0) 1463 if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0)
1464 return; 1464 return;
1465 dasd_get_device(device); 1465 dasd_get_device(device);
1466 tasklet_hi_schedule(&device->tasklet); 1466 tasklet_hi_schedule(&device->tasklet);
1467 } 1467 }
1468 1468
1469 void dasd_device_set_stop_bits(struct dasd_device *device, int bits) 1469 void dasd_device_set_stop_bits(struct dasd_device *device, int bits)
1470 { 1470 {
1471 device->stopped |= bits; 1471 device->stopped |= bits;
1472 } 1472 }
1473 EXPORT_SYMBOL_GPL(dasd_device_set_stop_bits); 1473 EXPORT_SYMBOL_GPL(dasd_device_set_stop_bits);
1474 1474
1475 void dasd_device_remove_stop_bits(struct dasd_device *device, int bits) 1475 void dasd_device_remove_stop_bits(struct dasd_device *device, int bits)
1476 { 1476 {
1477 device->stopped &= ~bits; 1477 device->stopped &= ~bits;
1478 if (!device->stopped) 1478 if (!device->stopped)
1479 wake_up(&generic_waitq); 1479 wake_up(&generic_waitq);
1480 } 1480 }
1481 EXPORT_SYMBOL_GPL(dasd_device_remove_stop_bits); 1481 EXPORT_SYMBOL_GPL(dasd_device_remove_stop_bits);
1482 1482
1483 /* 1483 /*
1484 * Queue a request to the head of the device ccw_queue. 1484 * Queue a request to the head of the device ccw_queue.
1485 * Start the I/O if possible. 1485 * Start the I/O if possible.
1486 */ 1486 */
1487 void dasd_add_request_head(struct dasd_ccw_req *cqr) 1487 void dasd_add_request_head(struct dasd_ccw_req *cqr)
1488 { 1488 {
1489 struct dasd_device *device; 1489 struct dasd_device *device;
1490 unsigned long flags; 1490 unsigned long flags;
1491 1491
1492 device = cqr->startdev; 1492 device = cqr->startdev;
1493 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); 1493 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
1494 cqr->status = DASD_CQR_QUEUED; 1494 cqr->status = DASD_CQR_QUEUED;
1495 list_add(&cqr->devlist, &device->ccw_queue); 1495 list_add(&cqr->devlist, &device->ccw_queue);
1496 /* let the bh start the request to keep them in order */ 1496 /* let the bh start the request to keep them in order */
1497 dasd_schedule_device_bh(device); 1497 dasd_schedule_device_bh(device);
1498 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); 1498 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
1499 } 1499 }
1500 1500
1501 /* 1501 /*
1502 * Queue a request to the tail of the device ccw_queue. 1502 * Queue a request to the tail of the device ccw_queue.
1503 * Start the I/O if possible. 1503 * Start the I/O if possible.
1504 */ 1504 */
1505 void dasd_add_request_tail(struct dasd_ccw_req *cqr) 1505 void dasd_add_request_tail(struct dasd_ccw_req *cqr)
1506 { 1506 {
1507 struct dasd_device *device; 1507 struct dasd_device *device;
1508 unsigned long flags; 1508 unsigned long flags;
1509 1509
1510 device = cqr->startdev; 1510 device = cqr->startdev;
1511 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); 1511 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
1512 cqr->status = DASD_CQR_QUEUED; 1512 cqr->status = DASD_CQR_QUEUED;
1513 list_add_tail(&cqr->devlist, &device->ccw_queue); 1513 list_add_tail(&cqr->devlist, &device->ccw_queue);
1514 /* let the bh start the request to keep them in order */ 1514 /* let the bh start the request to keep them in order */
1515 dasd_schedule_device_bh(device); 1515 dasd_schedule_device_bh(device);
1516 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); 1516 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
1517 } 1517 }
1518 1518
1519 /* 1519 /*
1520 * Wakeup helper for the 'sleep_on' functions. 1520 * Wakeup helper for the 'sleep_on' functions.
1521 */ 1521 */
1522 static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) 1522 static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data)
1523 { 1523 {
1524 spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev)); 1524 spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev));
1525 cqr->callback_data = DASD_SLEEPON_END_TAG; 1525 cqr->callback_data = DASD_SLEEPON_END_TAG;
1526 spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev)); 1526 spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev));
1527 wake_up(&generic_waitq); 1527 wake_up(&generic_waitq);
1528 } 1528 }
1529 1529
1530 static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) 1530 static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr)
1531 { 1531 {
1532 struct dasd_device *device; 1532 struct dasd_device *device;
1533 int rc; 1533 int rc;
1534 1534
1535 device = cqr->startdev; 1535 device = cqr->startdev;
1536 spin_lock_irq(get_ccwdev_lock(device->cdev)); 1536 spin_lock_irq(get_ccwdev_lock(device->cdev));
1537 rc = (cqr->callback_data == DASD_SLEEPON_END_TAG); 1537 rc = (cqr->callback_data == DASD_SLEEPON_END_TAG);
1538 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1538 spin_unlock_irq(get_ccwdev_lock(device->cdev));
1539 return rc; 1539 return rc;
1540 } 1540 }
1541 1541
1542 /* 1542 /*
1543 * checks if error recovery is necessary, returns 1 if yes, 0 otherwise. 1543 * checks if error recovery is necessary, returns 1 if yes, 0 otherwise.
1544 */ 1544 */
1545 static int __dasd_sleep_on_erp(struct dasd_ccw_req *cqr) 1545 static int __dasd_sleep_on_erp(struct dasd_ccw_req *cqr)
1546 { 1546 {
1547 struct dasd_device *device; 1547 struct dasd_device *device;
1548 dasd_erp_fn_t erp_fn; 1548 dasd_erp_fn_t erp_fn;
1549 1549
1550 if (cqr->status == DASD_CQR_FILLED) 1550 if (cqr->status == DASD_CQR_FILLED)
1551 return 0; 1551 return 0;
1552 device = cqr->startdev; 1552 device = cqr->startdev;
1553 if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { 1553 if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) {
1554 if (cqr->status == DASD_CQR_TERMINATED) { 1554 if (cqr->status == DASD_CQR_TERMINATED) {
1555 device->discipline->handle_terminated_request(cqr); 1555 device->discipline->handle_terminated_request(cqr);
1556 return 1; 1556 return 1;
1557 } 1557 }
1558 if (cqr->status == DASD_CQR_NEED_ERP) { 1558 if (cqr->status == DASD_CQR_NEED_ERP) {
1559 erp_fn = device->discipline->erp_action(cqr); 1559 erp_fn = device->discipline->erp_action(cqr);
1560 erp_fn(cqr); 1560 erp_fn(cqr);
1561 return 1; 1561 return 1;
1562 } 1562 }
1563 if (cqr->status == DASD_CQR_FAILED) 1563 if (cqr->status == DASD_CQR_FAILED)
1564 dasd_log_sense(cqr, &cqr->irb); 1564 dasd_log_sense(cqr, &cqr->irb);
1565 if (cqr->refers) { 1565 if (cqr->refers) {
1566 __dasd_process_erp(device, cqr); 1566 __dasd_process_erp(device, cqr);
1567 return 1; 1567 return 1;
1568 } 1568 }
1569 } 1569 }
1570 return 0; 1570 return 0;
1571 } 1571 }
1572 1572
1573 static int __dasd_sleep_on_loop_condition(struct dasd_ccw_req *cqr) 1573 static int __dasd_sleep_on_loop_condition(struct dasd_ccw_req *cqr)
1574 { 1574 {
1575 if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { 1575 if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) {
1576 if (cqr->refers) /* erp is not done yet */ 1576 if (cqr->refers) /* erp is not done yet */
1577 return 1; 1577 return 1;
1578 return ((cqr->status != DASD_CQR_DONE) && 1578 return ((cqr->status != DASD_CQR_DONE) &&
1579 (cqr->status != DASD_CQR_FAILED)); 1579 (cqr->status != DASD_CQR_FAILED));
1580 } else 1580 } else
1581 return (cqr->status == DASD_CQR_FILLED); 1581 return (cqr->status == DASD_CQR_FILLED);
1582 } 1582 }
1583 1583
1584 static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) 1584 static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
1585 { 1585 {
1586 struct dasd_device *device; 1586 struct dasd_device *device;
1587 int rc; 1587 int rc;
1588 struct list_head ccw_queue; 1588 struct list_head ccw_queue;
1589 struct dasd_ccw_req *cqr; 1589 struct dasd_ccw_req *cqr;
1590 1590
1591 INIT_LIST_HEAD(&ccw_queue); 1591 INIT_LIST_HEAD(&ccw_queue);
1592 maincqr->status = DASD_CQR_FILLED; 1592 maincqr->status = DASD_CQR_FILLED;
1593 device = maincqr->startdev; 1593 device = maincqr->startdev;
1594 list_add(&maincqr->blocklist, &ccw_queue); 1594 list_add(&maincqr->blocklist, &ccw_queue);
1595 for (cqr = maincqr; __dasd_sleep_on_loop_condition(cqr); 1595 for (cqr = maincqr; __dasd_sleep_on_loop_condition(cqr);
1596 cqr = list_first_entry(&ccw_queue, 1596 cqr = list_first_entry(&ccw_queue,
1597 struct dasd_ccw_req, blocklist)) { 1597 struct dasd_ccw_req, blocklist)) {
1598 1598
1599 if (__dasd_sleep_on_erp(cqr)) 1599 if (__dasd_sleep_on_erp(cqr))
1600 continue; 1600 continue;
1601 if (cqr->status != DASD_CQR_FILLED) /* could be failed */ 1601 if (cqr->status != DASD_CQR_FILLED) /* could be failed */
1602 continue; 1602 continue;
1603 1603
1604 /* Non-temporary stop condition will trigger fail fast */ 1604 /* Non-temporary stop condition will trigger fail fast */
1605 if (device->stopped & ~DASD_STOPPED_PENDING && 1605 if (device->stopped & ~DASD_STOPPED_PENDING &&
1606 test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && 1606 test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
1607 (!dasd_eer_enabled(device))) { 1607 (!dasd_eer_enabled(device))) {
1608 cqr->status = DASD_CQR_FAILED; 1608 cqr->status = DASD_CQR_FAILED;
1609 continue; 1609 continue;
1610 } 1610 }
1611 1611
1612 /* Don't try to start requests if device is stopped */ 1612 /* Don't try to start requests if device is stopped */
1613 if (interruptible) { 1613 if (interruptible) {
1614 rc = wait_event_interruptible( 1614 rc = wait_event_interruptible(
1615 generic_waitq, !(device->stopped)); 1615 generic_waitq, !(device->stopped));
1616 if (rc == -ERESTARTSYS) { 1616 if (rc == -ERESTARTSYS) {
1617 cqr->status = DASD_CQR_FAILED; 1617 cqr->status = DASD_CQR_FAILED;
1618 maincqr->intrc = rc; 1618 maincqr->intrc = rc;
1619 continue; 1619 continue;
1620 } 1620 }
1621 } else 1621 } else
1622 wait_event(generic_waitq, !(device->stopped)); 1622 wait_event(generic_waitq, !(device->stopped));
1623 1623
1624 cqr->callback = dasd_wakeup_cb; 1624 cqr->callback = dasd_wakeup_cb;
1625 cqr->callback_data = DASD_SLEEPON_START_TAG; 1625 cqr->callback_data = DASD_SLEEPON_START_TAG;
1626 dasd_add_request_tail(cqr); 1626 dasd_add_request_tail(cqr);
1627 if (interruptible) { 1627 if (interruptible) {
1628 rc = wait_event_interruptible( 1628 rc = wait_event_interruptible(
1629 generic_waitq, _wait_for_wakeup(cqr)); 1629 generic_waitq, _wait_for_wakeup(cqr));
1630 if (rc == -ERESTARTSYS) { 1630 if (rc == -ERESTARTSYS) {
1631 dasd_cancel_req(cqr); 1631 dasd_cancel_req(cqr);
1632 /* wait (non-interruptible) for final status */ 1632 /* wait (non-interruptible) for final status */
1633 wait_event(generic_waitq, 1633 wait_event(generic_waitq,
1634 _wait_for_wakeup(cqr)); 1634 _wait_for_wakeup(cqr));
1635 cqr->status = DASD_CQR_FAILED; 1635 cqr->status = DASD_CQR_FAILED;
1636 maincqr->intrc = rc; 1636 maincqr->intrc = rc;
1637 continue; 1637 continue;
1638 } 1638 }
1639 } else 1639 } else
1640 wait_event(generic_waitq, _wait_for_wakeup(cqr)); 1640 wait_event(generic_waitq, _wait_for_wakeup(cqr));
1641 } 1641 }
1642 1642
1643 maincqr->endclk = get_clock(); 1643 maincqr->endclk = get_clock();
1644 if ((maincqr->status != DASD_CQR_DONE) && 1644 if ((maincqr->status != DASD_CQR_DONE) &&
1645 (maincqr->intrc != -ERESTARTSYS)) 1645 (maincqr->intrc != -ERESTARTSYS))
1646 dasd_log_sense(maincqr, &maincqr->irb); 1646 dasd_log_sense(maincqr, &maincqr->irb);
1647 if (maincqr->status == DASD_CQR_DONE) 1647 if (maincqr->status == DASD_CQR_DONE)
1648 rc = 0; 1648 rc = 0;
1649 else if (maincqr->intrc) 1649 else if (maincqr->intrc)
1650 rc = maincqr->intrc; 1650 rc = maincqr->intrc;
1651 else 1651 else
1652 rc = -EIO; 1652 rc = -EIO;
1653 return rc; 1653 return rc;
1654 } 1654 }
1655 1655
1656 /* 1656 /*
1657 * Queue a request to the tail of the device ccw_queue and wait for 1657 * Queue a request to the tail of the device ccw_queue and wait for
1658 * it's completion. 1658 * it's completion.
1659 */ 1659 */
1660 int dasd_sleep_on(struct dasd_ccw_req *cqr) 1660 int dasd_sleep_on(struct dasd_ccw_req *cqr)
1661 { 1661 {
1662 return _dasd_sleep_on(cqr, 0); 1662 return _dasd_sleep_on(cqr, 0);
1663 } 1663 }
1664 1664
1665 /* 1665 /*
1666 * Queue a request to the tail of the device ccw_queue and wait 1666 * Queue a request to the tail of the device ccw_queue and wait
1667 * interruptible for it's completion. 1667 * interruptible for it's completion.
1668 */ 1668 */
1669 int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr) 1669 int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr)
1670 { 1670 {
1671 return _dasd_sleep_on(cqr, 1); 1671 return _dasd_sleep_on(cqr, 1);
1672 } 1672 }
1673 1673
1674 /* 1674 /*
1675 * Whoa nelly now it gets really hairy. For some functions (e.g. steal lock 1675 * Whoa nelly now it gets really hairy. For some functions (e.g. steal lock
1676 * for eckd devices) the currently running request has to be terminated 1676 * for eckd devices) the currently running request has to be terminated
1677 * and be put back to status queued, before the special request is added 1677 * and be put back to status queued, before the special request is added
1678 * to the head of the queue. Then the special request is waited on normally. 1678 * to the head of the queue. Then the special request is waited on normally.
1679 */ 1679 */
1680 static inline int _dasd_term_running_cqr(struct dasd_device *device) 1680 static inline int _dasd_term_running_cqr(struct dasd_device *device)
1681 { 1681 {
1682 struct dasd_ccw_req *cqr; 1682 struct dasd_ccw_req *cqr;
1683 1683
1684 if (list_empty(&device->ccw_queue)) 1684 if (list_empty(&device->ccw_queue))
1685 return 0; 1685 return 0;
1686 cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); 1686 cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
1687 return device->discipline->term_IO(cqr); 1687 return device->discipline->term_IO(cqr);
1688 } 1688 }
1689 1689
1690 int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) 1690 int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr)
1691 { 1691 {
1692 struct dasd_device *device; 1692 struct dasd_device *device;
1693 int rc; 1693 int rc;
1694 1694
1695 device = cqr->startdev; 1695 device = cqr->startdev;
1696 spin_lock_irq(get_ccwdev_lock(device->cdev)); 1696 spin_lock_irq(get_ccwdev_lock(device->cdev));
1697 rc = _dasd_term_running_cqr(device); 1697 rc = _dasd_term_running_cqr(device);
1698 if (rc) { 1698 if (rc) {
1699 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1699 spin_unlock_irq(get_ccwdev_lock(device->cdev));
1700 return rc; 1700 return rc;
1701 } 1701 }
1702 1702
1703 cqr->callback = dasd_wakeup_cb; 1703 cqr->callback = dasd_wakeup_cb;
1704 cqr->callback_data = DASD_SLEEPON_START_TAG; 1704 cqr->callback_data = DASD_SLEEPON_START_TAG;
1705 cqr->status = DASD_CQR_QUEUED; 1705 cqr->status = DASD_CQR_QUEUED;
1706 list_add(&cqr->devlist, &device->ccw_queue); 1706 list_add(&cqr->devlist, &device->ccw_queue);
1707 1707
1708 /* let the bh start the request to keep them in order */ 1708 /* let the bh start the request to keep them in order */
1709 dasd_schedule_device_bh(device); 1709 dasd_schedule_device_bh(device);
1710 1710
1711 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1711 spin_unlock_irq(get_ccwdev_lock(device->cdev));
1712 1712
1713 wait_event(generic_waitq, _wait_for_wakeup(cqr)); 1713 wait_event(generic_waitq, _wait_for_wakeup(cqr));
1714 1714
1715 if (cqr->status == DASD_CQR_DONE) 1715 if (cqr->status == DASD_CQR_DONE)
1716 rc = 0; 1716 rc = 0;
1717 else if (cqr->intrc) 1717 else if (cqr->intrc)
1718 rc = cqr->intrc; 1718 rc = cqr->intrc;
1719 else 1719 else
1720 rc = -EIO; 1720 rc = -EIO;
1721 return rc; 1721 return rc;
1722 } 1722 }
1723 1723
1724 /* 1724 /*
1725 * Cancels a request that was started with dasd_sleep_on_req. 1725 * Cancels a request that was started with dasd_sleep_on_req.
1726 * This is useful to timeout requests. The request will be 1726 * This is useful to timeout requests. The request will be
1727 * terminated if it is currently in i/o. 1727 * terminated if it is currently in i/o.
1728 * Returns 1 if the request has been terminated. 1728 * Returns 1 if the request has been terminated.
1729 * 0 if there was no need to terminate the request (not started yet) 1729 * 0 if there was no need to terminate the request (not started yet)
1730 * negative error code if termination failed 1730 * negative error code if termination failed
1731 * Cancellation of a request is an asynchronous operation! The calling 1731 * Cancellation of a request is an asynchronous operation! The calling
1732 * function has to wait until the request is properly returned via callback. 1732 * function has to wait until the request is properly returned via callback.
1733 */ 1733 */
1734 int dasd_cancel_req(struct dasd_ccw_req *cqr) 1734 int dasd_cancel_req(struct dasd_ccw_req *cqr)
1735 { 1735 {
1736 struct dasd_device *device = cqr->startdev; 1736 struct dasd_device *device = cqr->startdev;
1737 unsigned long flags; 1737 unsigned long flags;
1738 int rc; 1738 int rc;
1739 1739
1740 rc = 0; 1740 rc = 0;
1741 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); 1741 spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
1742 switch (cqr->status) { 1742 switch (cqr->status) {
1743 case DASD_CQR_QUEUED: 1743 case DASD_CQR_QUEUED:
1744 /* request was not started - just set to cleared */ 1744 /* request was not started - just set to cleared */
1745 cqr->status = DASD_CQR_CLEARED; 1745 cqr->status = DASD_CQR_CLEARED;
1746 break; 1746 break;
1747 case DASD_CQR_IN_IO: 1747 case DASD_CQR_IN_IO:
1748 /* request in IO - terminate IO and release again */ 1748 /* request in IO - terminate IO and release again */
1749 rc = device->discipline->term_IO(cqr); 1749 rc = device->discipline->term_IO(cqr);
1750 if (rc) { 1750 if (rc) {
1751 dev_err(&device->cdev->dev, 1751 dev_err(&device->cdev->dev,
1752 "Cancelling request %p failed with rc=%d\n", 1752 "Cancelling request %p failed with rc=%d\n",
1753 cqr, rc); 1753 cqr, rc);
1754 } else { 1754 } else {
1755 cqr->stopclk = get_clock(); 1755 cqr->stopclk = get_clock();
1756 } 1756 }
1757 break; 1757 break;
1758 default: /* already finished or clear pending - do nothing */ 1758 default: /* already finished or clear pending - do nothing */
1759 break; 1759 break;
1760 } 1760 }
1761 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); 1761 spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
1762 dasd_schedule_device_bh(device); 1762 dasd_schedule_device_bh(device);
1763 return rc; 1763 return rc;
1764 } 1764 }
1765 1765
1766 1766
1767 /* 1767 /*
1768 * SECTION: Operations of the dasd_block layer. 1768 * SECTION: Operations of the dasd_block layer.
1769 */ 1769 */
1770 1770
1771 /* 1771 /*
1772 * Timeout function for dasd_block. This is used when the block layer 1772 * Timeout function for dasd_block. This is used when the block layer
1773 * is waiting for something that may not come reliably, (e.g. a state 1773 * is waiting for something that may not come reliably, (e.g. a state
1774 * change interrupt) 1774 * change interrupt)
1775 */ 1775 */
1776 static void dasd_block_timeout(unsigned long ptr) 1776 static void dasd_block_timeout(unsigned long ptr)
1777 { 1777 {
1778 unsigned long flags; 1778 unsigned long flags;
1779 struct dasd_block *block; 1779 struct dasd_block *block;
1780 1780
1781 block = (struct dasd_block *) ptr; 1781 block = (struct dasd_block *) ptr;
1782 spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags); 1782 spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags);
1783 /* re-activate request queue */ 1783 /* re-activate request queue */
1784 dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING); 1784 dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING);
1785 spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags); 1785 spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags);
1786 dasd_schedule_block_bh(block); 1786 dasd_schedule_block_bh(block);
1787 } 1787 }
1788 1788
1789 /* 1789 /*
1790 * Setup timeout for a dasd_block in jiffies. 1790 * Setup timeout for a dasd_block in jiffies.
1791 */ 1791 */
1792 void dasd_block_set_timer(struct dasd_block *block, int expires) 1792 void dasd_block_set_timer(struct dasd_block *block, int expires)
1793 { 1793 {
1794 if (expires == 0) 1794 if (expires == 0)
1795 del_timer(&block->timer); 1795 del_timer(&block->timer);
1796 else 1796 else
1797 mod_timer(&block->timer, jiffies + expires); 1797 mod_timer(&block->timer, jiffies + expires);
1798 } 1798 }
1799 1799
1800 /* 1800 /*
1801 * Clear timeout for a dasd_block. 1801 * Clear timeout for a dasd_block.
1802 */ 1802 */
1803 void dasd_block_clear_timer(struct dasd_block *block) 1803 void dasd_block_clear_timer(struct dasd_block *block)
1804 { 1804 {
1805 del_timer(&block->timer); 1805 del_timer(&block->timer);
1806 } 1806 }
1807 1807
1808 /* 1808 /*
1809 * Process finished error recovery ccw. 1809 * Process finished error recovery ccw.
1810 */ 1810 */
1811 static void __dasd_process_erp(struct dasd_device *device, 1811 static void __dasd_process_erp(struct dasd_device *device,
1812 struct dasd_ccw_req *cqr) 1812 struct dasd_ccw_req *cqr)
1813 { 1813 {
1814 dasd_erp_fn_t erp_fn; 1814 dasd_erp_fn_t erp_fn;
1815 1815
1816 if (cqr->status == DASD_CQR_DONE) 1816 if (cqr->status == DASD_CQR_DONE)
1817 DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful"); 1817 DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful");
1818 else 1818 else
1819 dev_err(&device->cdev->dev, "ERP failed for the DASD\n"); 1819 dev_err(&device->cdev->dev, "ERP failed for the DASD\n");
1820 erp_fn = device->discipline->erp_postaction(cqr); 1820 erp_fn = device->discipline->erp_postaction(cqr);
1821 erp_fn(cqr); 1821 erp_fn(cqr);
1822 } 1822 }
1823 1823
1824 /* 1824 /*
1825 * Fetch requests from the block device queue. 1825 * Fetch requests from the block device queue.
1826 */ 1826 */
1827 static void __dasd_process_request_queue(struct dasd_block *block) 1827 static void __dasd_process_request_queue(struct dasd_block *block)
1828 { 1828 {
1829 struct request_queue *queue; 1829 struct request_queue *queue;
1830 struct request *req; 1830 struct request *req;
1831 struct dasd_ccw_req *cqr; 1831 struct dasd_ccw_req *cqr;
1832 struct dasd_device *basedev; 1832 struct dasd_device *basedev;
1833 unsigned long flags; 1833 unsigned long flags;
1834 queue = block->request_queue; 1834 queue = block->request_queue;
1835 basedev = block->base; 1835 basedev = block->base;
1836 /* No queue ? Then there is nothing to do. */ 1836 /* No queue ? Then there is nothing to do. */
1837 if (queue == NULL) 1837 if (queue == NULL)
1838 return; 1838 return;
1839 1839
1840 /* 1840 /*
1841 * We requeue request from the block device queue to the ccw 1841 * We requeue request from the block device queue to the ccw
1842 * queue only in two states. In state DASD_STATE_READY the 1842 * queue only in two states. In state DASD_STATE_READY the
1843 * partition detection is done and we need to requeue requests 1843 * partition detection is done and we need to requeue requests
1844 * for that. State DASD_STATE_ONLINE is normal block device 1844 * for that. State DASD_STATE_ONLINE is normal block device
1845 * operation. 1845 * operation.
1846 */ 1846 */
1847 if (basedev->state < DASD_STATE_READY) { 1847 if (basedev->state < DASD_STATE_READY) {
1848 while ((req = blk_fetch_request(block->request_queue))) 1848 while ((req = blk_fetch_request(block->request_queue)))
1849 __blk_end_request_all(req, -EIO); 1849 __blk_end_request_all(req, -EIO);
1850 return; 1850 return;
1851 } 1851 }
1852 /* Now we try to fetch requests from the request queue */ 1852 /* Now we try to fetch requests from the request queue */
1853 while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) { 1853 while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
1854 if (basedev->features & DASD_FEATURE_READONLY && 1854 if (basedev->features & DASD_FEATURE_READONLY &&
1855 rq_data_dir(req) == WRITE) { 1855 rq_data_dir(req) == WRITE) {
1856 DBF_DEV_EVENT(DBF_ERR, basedev, 1856 DBF_DEV_EVENT(DBF_ERR, basedev,
1857 "Rejecting write request %p", 1857 "Rejecting write request %p",
1858 req); 1858 req);
1859 blk_start_request(req); 1859 blk_start_request(req);
1860 __blk_end_request_all(req, -EIO); 1860 __blk_end_request_all(req, -EIO);
1861 continue; 1861 continue;
1862 } 1862 }
1863 cqr = basedev->discipline->build_cp(basedev, block, req); 1863 cqr = basedev->discipline->build_cp(basedev, block, req);
1864 if (IS_ERR(cqr)) { 1864 if (IS_ERR(cqr)) {
1865 if (PTR_ERR(cqr) == -EBUSY) 1865 if (PTR_ERR(cqr) == -EBUSY)
1866 break; /* normal end condition */ 1866 break; /* normal end condition */
1867 if (PTR_ERR(cqr) == -ENOMEM) 1867 if (PTR_ERR(cqr) == -ENOMEM)
1868 break; /* terminate request queue loop */ 1868 break; /* terminate request queue loop */
1869 if (PTR_ERR(cqr) == -EAGAIN) { 1869 if (PTR_ERR(cqr) == -EAGAIN) {
1870 /* 1870 /*
1871 * The current request cannot be build right 1871 * The current request cannot be build right
1872 * now, we have to try later. If this request 1872 * now, we have to try later. If this request
1873 * is the head-of-queue we stop the device 1873 * is the head-of-queue we stop the device
1874 * for 1/2 second. 1874 * for 1/2 second.
1875 */ 1875 */
1876 if (!list_empty(&block->ccw_queue)) 1876 if (!list_empty(&block->ccw_queue))
1877 break; 1877 break;
1878 spin_lock_irqsave( 1878 spin_lock_irqsave(
1879 get_ccwdev_lock(basedev->cdev), flags); 1879 get_ccwdev_lock(basedev->cdev), flags);
1880 dasd_device_set_stop_bits(basedev, 1880 dasd_device_set_stop_bits(basedev,
1881 DASD_STOPPED_PENDING); 1881 DASD_STOPPED_PENDING);
1882 spin_unlock_irqrestore( 1882 spin_unlock_irqrestore(
1883 get_ccwdev_lock(basedev->cdev), flags); 1883 get_ccwdev_lock(basedev->cdev), flags);
1884 dasd_block_set_timer(block, HZ/2); 1884 dasd_block_set_timer(block, HZ/2);
1885 break; 1885 break;
1886 } 1886 }
1887 DBF_DEV_EVENT(DBF_ERR, basedev, 1887 DBF_DEV_EVENT(DBF_ERR, basedev,
1888 "CCW creation failed (rc=%ld) " 1888 "CCW creation failed (rc=%ld) "
1889 "on request %p", 1889 "on request %p",
1890 PTR_ERR(cqr), req); 1890 PTR_ERR(cqr), req);
1891 blk_start_request(req); 1891 blk_start_request(req);
1892 __blk_end_request_all(req, -EIO); 1892 __blk_end_request_all(req, -EIO);
1893 continue; 1893 continue;
1894 } 1894 }
1895 /* 1895 /*
1896 * Note: callback is set to dasd_return_cqr_cb in 1896 * Note: callback is set to dasd_return_cqr_cb in
1897 * __dasd_block_start_head to cover erp requests as well 1897 * __dasd_block_start_head to cover erp requests as well
1898 */ 1898 */
1899 cqr->callback_data = (void *) req; 1899 cqr->callback_data = (void *) req;
1900 cqr->status = DASD_CQR_FILLED; 1900 cqr->status = DASD_CQR_FILLED;
1901 blk_start_request(req); 1901 blk_start_request(req);
1902 list_add_tail(&cqr->blocklist, &block->ccw_queue); 1902 list_add_tail(&cqr->blocklist, &block->ccw_queue);
1903 dasd_profile_start(block, cqr, req); 1903 dasd_profile_start(block, cqr, req);
1904 } 1904 }
1905 } 1905 }
1906 1906
1907 static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) 1907 static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
1908 { 1908 {
1909 struct request *req; 1909 struct request *req;
1910 int status; 1910 int status;
1911 int error = 0; 1911 int error = 0;
1912 1912
1913 req = (struct request *) cqr->callback_data; 1913 req = (struct request *) cqr->callback_data;
1914 dasd_profile_end(cqr->block, cqr, req); 1914 dasd_profile_end(cqr->block, cqr, req);
1915 status = cqr->block->base->discipline->free_cp(cqr, req); 1915 status = cqr->block->base->discipline->free_cp(cqr, req);
1916 if (status <= 0) 1916 if (status <= 0)
1917 error = status ? status : -EIO; 1917 error = status ? status : -EIO;
1918 __blk_end_request_all(req, error); 1918 __blk_end_request_all(req, error);
1919 } 1919 }
1920 1920
1921 /* 1921 /*
1922 * Process ccw request queue. 1922 * Process ccw request queue.
1923 */ 1923 */
1924 static void __dasd_process_block_ccw_queue(struct dasd_block *block, 1924 static void __dasd_process_block_ccw_queue(struct dasd_block *block,
1925 struct list_head *final_queue) 1925 struct list_head *final_queue)
1926 { 1926 {
1927 struct list_head *l, *n; 1927 struct list_head *l, *n;
1928 struct dasd_ccw_req *cqr; 1928 struct dasd_ccw_req *cqr;
1929 dasd_erp_fn_t erp_fn; 1929 dasd_erp_fn_t erp_fn;
1930 unsigned long flags; 1930 unsigned long flags;
1931 struct dasd_device *base = block->base; 1931 struct dasd_device *base = block->base;
1932 1932
1933 restart: 1933 restart:
1934 /* Process request with final status. */ 1934 /* Process request with final status. */
1935 list_for_each_safe(l, n, &block->ccw_queue) { 1935 list_for_each_safe(l, n, &block->ccw_queue) {
1936 cqr = list_entry(l, struct dasd_ccw_req, blocklist); 1936 cqr = list_entry(l, struct dasd_ccw_req, blocklist);
1937 if (cqr->status != DASD_CQR_DONE && 1937 if (cqr->status != DASD_CQR_DONE &&
1938 cqr->status != DASD_CQR_FAILED && 1938 cqr->status != DASD_CQR_FAILED &&
1939 cqr->status != DASD_CQR_NEED_ERP && 1939 cqr->status != DASD_CQR_NEED_ERP &&
1940 cqr->status != DASD_CQR_TERMINATED) 1940 cqr->status != DASD_CQR_TERMINATED)
1941 continue; 1941 continue;
1942 1942
1943 if (cqr->status == DASD_CQR_TERMINATED) { 1943 if (cqr->status == DASD_CQR_TERMINATED) {
1944 base->discipline->handle_terminated_request(cqr); 1944 base->discipline->handle_terminated_request(cqr);
1945 goto restart; 1945 goto restart;
1946 } 1946 }
1947 1947
1948 /* Process requests that may be recovered */ 1948 /* Process requests that may be recovered */
1949 if (cqr->status == DASD_CQR_NEED_ERP) { 1949 if (cqr->status == DASD_CQR_NEED_ERP) {
1950 erp_fn = base->discipline->erp_action(cqr); 1950 erp_fn = base->discipline->erp_action(cqr);
1951 if (IS_ERR(erp_fn(cqr))) 1951 if (IS_ERR(erp_fn(cqr)))
1952 continue; 1952 continue;
1953 goto restart; 1953 goto restart;
1954 } 1954 }
1955 1955
1956 /* log sense for fatal error */ 1956 /* log sense for fatal error */
1957 if (cqr->status == DASD_CQR_FAILED) { 1957 if (cqr->status == DASD_CQR_FAILED) {
1958 dasd_log_sense(cqr, &cqr->irb); 1958 dasd_log_sense(cqr, &cqr->irb);
1959 } 1959 }
1960 1960
1961 /* First of all call extended error reporting. */ 1961 /* First of all call extended error reporting. */
1962 if (dasd_eer_enabled(base) && 1962 if (dasd_eer_enabled(base) &&
1963 cqr->status == DASD_CQR_FAILED) { 1963 cqr->status == DASD_CQR_FAILED) {
1964 dasd_eer_write(base, cqr, DASD_EER_FATALERROR); 1964 dasd_eer_write(base, cqr, DASD_EER_FATALERROR);
1965 1965
1966 /* restart request */ 1966 /* restart request */
1967 cqr->status = DASD_CQR_FILLED; 1967 cqr->status = DASD_CQR_FILLED;
1968 cqr->retries = 255; 1968 cqr->retries = 255;
1969 spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); 1969 spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
1970 dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); 1970 dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE);
1971 spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), 1971 spin_unlock_irqrestore(get_ccwdev_lock(base->cdev),
1972 flags); 1972 flags);
1973 goto restart; 1973 goto restart;
1974 } 1974 }
1975 1975
1976 /* Process finished ERP request. */ 1976 /* Process finished ERP request. */
1977 if (cqr->refers) { 1977 if (cqr->refers) {
1978 __dasd_process_erp(base, cqr); 1978 __dasd_process_erp(base, cqr);
1979 goto restart; 1979 goto restart;
1980 } 1980 }
1981 1981
1982 /* Rechain finished requests to final queue */ 1982 /* Rechain finished requests to final queue */
1983 cqr->endclk = get_clock(); 1983 cqr->endclk = get_clock();
1984 list_move_tail(&cqr->blocklist, final_queue); 1984 list_move_tail(&cqr->blocklist, final_queue);
1985 } 1985 }
1986 } 1986 }
1987 1987
1988 static void dasd_return_cqr_cb(struct dasd_ccw_req *cqr, void *data) 1988 static void dasd_return_cqr_cb(struct dasd_ccw_req *cqr, void *data)
1989 { 1989 {
1990 dasd_schedule_block_bh(cqr->block); 1990 dasd_schedule_block_bh(cqr->block);
1991 } 1991 }
1992 1992
1993 static void __dasd_block_start_head(struct dasd_block *block) 1993 static void __dasd_block_start_head(struct dasd_block *block)
1994 { 1994 {
1995 struct dasd_ccw_req *cqr; 1995 struct dasd_ccw_req *cqr;
1996 1996
1997 if (list_empty(&block->ccw_queue)) 1997 if (list_empty(&block->ccw_queue))
1998 return; 1998 return;
1999 /* We allways begin with the first requests on the queue, as some 1999 /* We allways begin with the first requests on the queue, as some
2000 * of previously started requests have to be enqueued on a 2000 * of previously started requests have to be enqueued on a
2001 * dasd_device again for error recovery. 2001 * dasd_device again for error recovery.
2002 */ 2002 */
2003 list_for_each_entry(cqr, &block->ccw_queue, blocklist) { 2003 list_for_each_entry(cqr, &block->ccw_queue, blocklist) {
2004 if (cqr->status != DASD_CQR_FILLED) 2004 if (cqr->status != DASD_CQR_FILLED)
2005 continue; 2005 continue;
2006 /* Non-temporary stop condition will trigger fail fast */ 2006 /* Non-temporary stop condition will trigger fail fast */
2007 if (block->base->stopped & ~DASD_STOPPED_PENDING && 2007 if (block->base->stopped & ~DASD_STOPPED_PENDING &&
2008 test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && 2008 test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
2009 (!dasd_eer_enabled(block->base))) { 2009 (!dasd_eer_enabled(block->base))) {
2010 cqr->status = DASD_CQR_FAILED; 2010 cqr->status = DASD_CQR_FAILED;
2011 dasd_schedule_block_bh(block); 2011 dasd_schedule_block_bh(block);
2012 continue; 2012 continue;
2013 } 2013 }
2014 /* Don't try to start requests if device is stopped */ 2014 /* Don't try to start requests if device is stopped */
2015 if (block->base->stopped) 2015 if (block->base->stopped)
2016 return; 2016 return;
2017 2017
2018 /* just a fail safe check, should not happen */ 2018 /* just a fail safe check, should not happen */
2019 if (!cqr->startdev) 2019 if (!cqr->startdev)
2020 cqr->startdev = block->base; 2020 cqr->startdev = block->base;
2021 2021
2022 /* make sure that the requests we submit find their way back */ 2022 /* make sure that the requests we submit find their way back */
2023 cqr->callback = dasd_return_cqr_cb; 2023 cqr->callback = dasd_return_cqr_cb;
2024 2024
2025 dasd_add_request_tail(cqr); 2025 dasd_add_request_tail(cqr);
2026 } 2026 }
2027 } 2027 }
2028 2028
2029 /* 2029 /*
2030 * Central dasd_block layer routine. Takes requests from the generic 2030 * Central dasd_block layer routine. Takes requests from the generic
2031 * block layer request queue, creates ccw requests, enqueues them on 2031 * block layer request queue, creates ccw requests, enqueues them on
2032 * a dasd_device and processes ccw requests that have been returned. 2032 * a dasd_device and processes ccw requests that have been returned.
2033 */ 2033 */
2034 static void dasd_block_tasklet(struct dasd_block *block) 2034 static void dasd_block_tasklet(struct dasd_block *block)
2035 { 2035 {
2036 struct list_head final_queue; 2036 struct list_head final_queue;
2037 struct list_head *l, *n; 2037 struct list_head *l, *n;
2038 struct dasd_ccw_req *cqr; 2038 struct dasd_ccw_req *cqr;
2039 2039
2040 atomic_set(&block->tasklet_scheduled, 0); 2040 atomic_set(&block->tasklet_scheduled, 0);
2041 INIT_LIST_HEAD(&final_queue); 2041 INIT_LIST_HEAD(&final_queue);
2042 spin_lock(&block->queue_lock); 2042 spin_lock(&block->queue_lock);
2043 /* Finish off requests on ccw queue */ 2043 /* Finish off requests on ccw queue */
2044 __dasd_process_block_ccw_queue(block, &final_queue); 2044 __dasd_process_block_ccw_queue(block, &final_queue);
2045 spin_unlock(&block->queue_lock); 2045 spin_unlock(&block->queue_lock);
2046 /* Now call the callback function of requests with final status */ 2046 /* Now call the callback function of requests with final status */
2047 spin_lock_irq(&block->request_queue_lock); 2047 spin_lock_irq(&block->request_queue_lock);
2048 list_for_each_safe(l, n, &final_queue) { 2048 list_for_each_safe(l, n, &final_queue) {
2049 cqr = list_entry(l, struct dasd_ccw_req, blocklist); 2049 cqr = list_entry(l, struct dasd_ccw_req, blocklist);
2050 list_del_init(&cqr->blocklist); 2050 list_del_init(&cqr->blocklist);
2051 __dasd_cleanup_cqr(cqr); 2051 __dasd_cleanup_cqr(cqr);
2052 } 2052 }
2053 spin_lock(&block->queue_lock); 2053 spin_lock(&block->queue_lock);
2054 /* Get new request from the block device request queue */ 2054 /* Get new request from the block device request queue */
2055 __dasd_process_request_queue(block); 2055 __dasd_process_request_queue(block);
2056 /* Now check if the head of the ccw queue needs to be started. */ 2056 /* Now check if the head of the ccw queue needs to be started. */
2057 __dasd_block_start_head(block); 2057 __dasd_block_start_head(block);
2058 spin_unlock(&block->queue_lock); 2058 spin_unlock(&block->queue_lock);
2059 spin_unlock_irq(&block->request_queue_lock); 2059 spin_unlock_irq(&block->request_queue_lock);
2060 dasd_put_device(block->base); 2060 dasd_put_device(block->base);
2061 } 2061 }
2062 2062
2063 static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data) 2063 static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
2064 { 2064 {
2065 wake_up(&dasd_flush_wq); 2065 wake_up(&dasd_flush_wq);
2066 } 2066 }
2067 2067
2068 /* 2068 /*
2069 * Go through all request on the dasd_block request queue, cancel them 2069 * Go through all request on the dasd_block request queue, cancel them
2070 * on the respective dasd_device, and return them to the generic 2070 * on the respective dasd_device, and return them to the generic
2071 * block layer. 2071 * block layer.
2072 */ 2072 */
2073 static int dasd_flush_block_queue(struct dasd_block *block) 2073 static int dasd_flush_block_queue(struct dasd_block *block)
2074 { 2074 {
2075 struct dasd_ccw_req *cqr, *n; 2075 struct dasd_ccw_req *cqr, *n;
2076 int rc, i; 2076 int rc, i;
2077 struct list_head flush_queue; 2077 struct list_head flush_queue;
2078 2078
2079 INIT_LIST_HEAD(&flush_queue); 2079 INIT_LIST_HEAD(&flush_queue);
2080 spin_lock_bh(&block->queue_lock); 2080 spin_lock_bh(&block->queue_lock);
2081 rc = 0; 2081 rc = 0;
2082 restart: 2082 restart:
2083 list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) { 2083 list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
2084 /* if this request currently owned by a dasd_device cancel it */ 2084 /* if this request currently owned by a dasd_device cancel it */
2085 if (cqr->status >= DASD_CQR_QUEUED) 2085 if (cqr->status >= DASD_CQR_QUEUED)
2086 rc = dasd_cancel_req(cqr); 2086 rc = dasd_cancel_req(cqr);
2087 if (rc < 0) 2087 if (rc < 0)
2088 break; 2088 break;
2089 /* Rechain request (including erp chain) so it won't be 2089 /* Rechain request (including erp chain) so it won't be
2090 * touched by the dasd_block_tasklet anymore. 2090 * touched by the dasd_block_tasklet anymore.
2091 * Replace the callback so we notice when the request 2091 * Replace the callback so we notice when the request
2092 * is returned from the dasd_device layer. 2092 * is returned from the dasd_device layer.
2093 */ 2093 */
2094 cqr->callback = _dasd_wake_block_flush_cb; 2094 cqr->callback = _dasd_wake_block_flush_cb;
2095 for (i = 0; cqr != NULL; cqr = cqr->refers, i++) 2095 for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
2096 list_move_tail(&cqr->blocklist, &flush_queue); 2096 list_move_tail(&cqr->blocklist, &flush_queue);
2097 if (i > 1) 2097 if (i > 1)
2098 /* moved more than one request - need to restart */ 2098 /* moved more than one request - need to restart */
2099 goto restart; 2099 goto restart;
2100 } 2100 }
2101 spin_unlock_bh(&block->queue_lock); 2101 spin_unlock_bh(&block->queue_lock);
2102 /* Now call the callback function of flushed requests */ 2102 /* Now call the callback function of flushed requests */
2103 restart_cb: 2103 restart_cb:
2104 list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) { 2104 list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
2105 wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED)); 2105 wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
2106 /* Process finished ERP request. */ 2106 /* Process finished ERP request. */
2107 if (cqr->refers) { 2107 if (cqr->refers) {
2108 spin_lock_bh(&block->queue_lock); 2108 spin_lock_bh(&block->queue_lock);
2109 __dasd_process_erp(block->base, cqr); 2109 __dasd_process_erp(block->base, cqr);
2110 spin_unlock_bh(&block->queue_lock); 2110 spin_unlock_bh(&block->queue_lock);
2111 /* restart list_for_xx loop since dasd_process_erp 2111 /* restart list_for_xx loop since dasd_process_erp
2112 * might remove multiple elements */ 2112 * might remove multiple elements */
2113 goto restart_cb; 2113 goto restart_cb;
2114 } 2114 }
2115 /* call the callback function */ 2115 /* call the callback function */
2116 spin_lock_irq(&block->request_queue_lock); 2116 spin_lock_irq(&block->request_queue_lock);
2117 cqr->endclk = get_clock(); 2117 cqr->endclk = get_clock();
2118 list_del_init(&cqr->blocklist); 2118 list_del_init(&cqr->blocklist);
2119 __dasd_cleanup_cqr(cqr); 2119 __dasd_cleanup_cqr(cqr);
2120 spin_unlock_irq(&block->request_queue_lock); 2120 spin_unlock_irq(&block->request_queue_lock);
2121 } 2121 }
2122 return rc; 2122 return rc;
2123 } 2123 }
2124 2124
2125 /* 2125 /*
2126 * Schedules a call to dasd_tasklet over the device tasklet. 2126 * Schedules a call to dasd_tasklet over the device tasklet.
2127 */ 2127 */
2128 void dasd_schedule_block_bh(struct dasd_block *block) 2128 void dasd_schedule_block_bh(struct dasd_block *block)
2129 { 2129 {
2130 /* Protect against rescheduling. */ 2130 /* Protect against rescheduling. */
2131 if (atomic_cmpxchg(&block->tasklet_scheduled, 0, 1) != 0) 2131 if (atomic_cmpxchg(&block->tasklet_scheduled, 0, 1) != 0)
2132 return; 2132 return;
2133 /* life cycle of block is bound to it's base device */ 2133 /* life cycle of block is bound to it's base device */
2134 dasd_get_device(block->base); 2134 dasd_get_device(block->base);
2135 tasklet_hi_schedule(&block->tasklet); 2135 tasklet_hi_schedule(&block->tasklet);
2136 } 2136 }
2137 2137
2138 2138
2139 /* 2139 /*
2140 * SECTION: external block device operations 2140 * SECTION: external block device operations
2141 * (request queue handling, open, release, etc.) 2141 * (request queue handling, open, release, etc.)
2142 */ 2142 */
2143 2143
2144 /* 2144 /*
2145 * Dasd request queue function. Called from ll_rw_blk.c 2145 * Dasd request queue function. Called from ll_rw_blk.c
2146 */ 2146 */
2147 static void do_dasd_request(struct request_queue *queue) 2147 static void do_dasd_request(struct request_queue *queue)
2148 { 2148 {
2149 struct dasd_block *block; 2149 struct dasd_block *block;
2150 2150
2151 block = queue->queuedata; 2151 block = queue->queuedata;
2152 spin_lock(&block->queue_lock); 2152 spin_lock(&block->queue_lock);
2153 /* Get new request from the block device request queue */ 2153 /* Get new request from the block device request queue */
2154 __dasd_process_request_queue(block); 2154 __dasd_process_request_queue(block);
2155 /* Now check if the head of the ccw queue needs to be started. */ 2155 /* Now check if the head of the ccw queue needs to be started. */
2156 __dasd_block_start_head(block); 2156 __dasd_block_start_head(block);
2157 spin_unlock(&block->queue_lock); 2157 spin_unlock(&block->queue_lock);
2158 } 2158 }
2159 2159
2160 /* 2160 /*
2161 * Allocate and initialize request queue and default I/O scheduler. 2161 * Allocate and initialize request queue and default I/O scheduler.
2162 */ 2162 */
2163 static int dasd_alloc_queue(struct dasd_block *block) 2163 static int dasd_alloc_queue(struct dasd_block *block)
2164 { 2164 {
2165 int rc; 2165 int rc;
2166 2166
2167 block->request_queue = blk_init_queue(do_dasd_request, 2167 block->request_queue = blk_init_queue(do_dasd_request,
2168 &block->request_queue_lock); 2168 &block->request_queue_lock);
2169 if (block->request_queue == NULL) 2169 if (block->request_queue == NULL)
2170 return -ENOMEM; 2170 return -ENOMEM;
2171 2171
2172 block->request_queue->queuedata = block; 2172 block->request_queue->queuedata = block;
2173 2173
2174 elevator_exit(block->request_queue->elevator); 2174 elevator_exit(block->request_queue->elevator);
2175 block->request_queue->elevator = NULL; 2175 block->request_queue->elevator = NULL;
2176 rc = elevator_init(block->request_queue, "deadline"); 2176 rc = elevator_init(block->request_queue, "deadline");
2177 if (rc) { 2177 if (rc) {
2178 blk_cleanup_queue(block->request_queue); 2178 blk_cleanup_queue(block->request_queue);
2179 return rc; 2179 return rc;
2180 } 2180 }
2181 return 0; 2181 return 0;
2182 } 2182 }
2183 2183
2184 /* 2184 /*
2185 * Allocate and initialize request queue. 2185 * Allocate and initialize request queue.
2186 */ 2186 */
2187 static void dasd_setup_queue(struct dasd_block *block) 2187 static void dasd_setup_queue(struct dasd_block *block)
2188 { 2188 {
2189 int max; 2189 int max;
2190 2190
2191 blk_queue_logical_block_size(block->request_queue, block->bp_block); 2191 blk_queue_logical_block_size(block->request_queue, block->bp_block);
2192 max = block->base->discipline->max_blocks << block->s2b_shift; 2192 max = block->base->discipline->max_blocks << block->s2b_shift;
2193 blk_queue_max_hw_sectors(block->request_queue, max); 2193 blk_queue_max_hw_sectors(block->request_queue, max);
2194 blk_queue_max_segments(block->request_queue, -1L); 2194 blk_queue_max_segments(block->request_queue, -1L);
2195 /* with page sized segments we can translate each segement into 2195 /* with page sized segments we can translate each segement into
2196 * one idaw/tidaw 2196 * one idaw/tidaw
2197 */ 2197 */
2198 blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); 2198 blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
2199 blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); 2199 blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
2200 blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN);
2201 } 2200 }
2202 2201
2203 /* 2202 /*
2204 * Deactivate and free request queue. 2203 * Deactivate and free request queue.
2205 */ 2204 */
2206 static void dasd_free_queue(struct dasd_block *block) 2205 static void dasd_free_queue(struct dasd_block *block)
2207 { 2206 {
2208 if (block->request_queue) { 2207 if (block->request_queue) {
2209 blk_cleanup_queue(block->request_queue); 2208 blk_cleanup_queue(block->request_queue);
2210 block->request_queue = NULL; 2209 block->request_queue = NULL;
2211 } 2210 }
2212 } 2211 }
2213 2212
2214 /* 2213 /*
2215 * Flush request on the request queue. 2214 * Flush request on the request queue.
2216 */ 2215 */
2217 static void dasd_flush_request_queue(struct dasd_block *block) 2216 static void dasd_flush_request_queue(struct dasd_block *block)
2218 { 2217 {
2219 struct request *req; 2218 struct request *req;
2220 2219
2221 if (!block->request_queue) 2220 if (!block->request_queue)
2222 return; 2221 return;
2223 2222
2224 spin_lock_irq(&block->request_queue_lock); 2223 spin_lock_irq(&block->request_queue_lock);
2225 while ((req = blk_fetch_request(block->request_queue))) 2224 while ((req = blk_fetch_request(block->request_queue)))
2226 __blk_end_request_all(req, -EIO); 2225 __blk_end_request_all(req, -EIO);
2227 spin_unlock_irq(&block->request_queue_lock); 2226 spin_unlock_irq(&block->request_queue_lock);
2228 } 2227 }
2229 2228
2230 static int dasd_open(struct block_device *bdev, fmode_t mode) 2229 static int dasd_open(struct block_device *bdev, fmode_t mode)
2231 { 2230 {
2232 struct dasd_block *block = bdev->bd_disk->private_data; 2231 struct dasd_block *block = bdev->bd_disk->private_data;
2233 struct dasd_device *base; 2232 struct dasd_device *base;
2234 int rc; 2233 int rc;
2235 2234
2236 if (!block) 2235 if (!block)
2237 return -ENODEV; 2236 return -ENODEV;
2238 2237
2239 lock_kernel(); 2238 lock_kernel();
2240 base = block->base; 2239 base = block->base;
2241 atomic_inc(&block->open_count); 2240 atomic_inc(&block->open_count);
2242 if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) { 2241 if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) {
2243 rc = -ENODEV; 2242 rc = -ENODEV;
2244 goto unlock; 2243 goto unlock;
2245 } 2244 }
2246 2245
2247 if (!try_module_get(base->discipline->owner)) { 2246 if (!try_module_get(base->discipline->owner)) {
2248 rc = -EINVAL; 2247 rc = -EINVAL;
2249 goto unlock; 2248 goto unlock;
2250 } 2249 }
2251 2250
2252 if (dasd_probeonly) { 2251 if (dasd_probeonly) {
2253 dev_info(&base->cdev->dev, 2252 dev_info(&base->cdev->dev,
2254 "Accessing the DASD failed because it is in " 2253 "Accessing the DASD failed because it is in "
2255 "probeonly mode\n"); 2254 "probeonly mode\n");
2256 rc = -EPERM; 2255 rc = -EPERM;
2257 goto out; 2256 goto out;
2258 } 2257 }
2259 2258
2260 if (base->state <= DASD_STATE_BASIC) { 2259 if (base->state <= DASD_STATE_BASIC) {
2261 DBF_DEV_EVENT(DBF_ERR, base, " %s", 2260 DBF_DEV_EVENT(DBF_ERR, base, " %s",
2262 " Cannot open unrecognized device"); 2261 " Cannot open unrecognized device");
2263 rc = -ENODEV; 2262 rc = -ENODEV;
2264 goto out; 2263 goto out;
2265 } 2264 }
2266 2265
2267 if ((mode & FMODE_WRITE) && 2266 if ((mode & FMODE_WRITE) &&
2268 (test_bit(DASD_FLAG_DEVICE_RO, &base->flags) || 2267 (test_bit(DASD_FLAG_DEVICE_RO, &base->flags) ||
2269 (base->features & DASD_FEATURE_READONLY))) { 2268 (base->features & DASD_FEATURE_READONLY))) {
2270 rc = -EROFS; 2269 rc = -EROFS;
2271 goto out; 2270 goto out;
2272 } 2271 }
2273 2272
2274 unlock_kernel(); 2273 unlock_kernel();
2275 return 0; 2274 return 0;
2276 2275
2277 out: 2276 out:
2278 module_put(base->discipline->owner); 2277 module_put(base->discipline->owner);
2279 unlock: 2278 unlock:
2280 atomic_dec(&block->open_count); 2279 atomic_dec(&block->open_count);
2281 unlock_kernel(); 2280 unlock_kernel();
2282 return rc; 2281 return rc;
2283 } 2282 }
2284 2283
2285 static int dasd_release(struct gendisk *disk, fmode_t mode) 2284 static int dasd_release(struct gendisk *disk, fmode_t mode)
2286 { 2285 {
2287 struct dasd_block *block = disk->private_data; 2286 struct dasd_block *block = disk->private_data;
2288 2287
2289 lock_kernel(); 2288 lock_kernel();
2290 atomic_dec(&block->open_count); 2289 atomic_dec(&block->open_count);
2291 module_put(block->base->discipline->owner); 2290 module_put(block->base->discipline->owner);
2292 unlock_kernel(); 2291 unlock_kernel();
2293 return 0; 2292 return 0;
2294 } 2293 }
2295 2294
2296 /* 2295 /*
2297 * Return disk geometry. 2296 * Return disk geometry.
2298 */ 2297 */
2299 static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 2298 static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
2300 { 2299 {
2301 struct dasd_block *block; 2300 struct dasd_block *block;
2302 struct dasd_device *base; 2301 struct dasd_device *base;
2303 2302
2304 block = bdev->bd_disk->private_data; 2303 block = bdev->bd_disk->private_data;
2305 if (!block) 2304 if (!block)
2306 return -ENODEV; 2305 return -ENODEV;
2307 base = block->base; 2306 base = block->base;
2308 2307
2309 if (!base->discipline || 2308 if (!base->discipline ||
2310 !base->discipline->fill_geometry) 2309 !base->discipline->fill_geometry)
2311 return -EINVAL; 2310 return -EINVAL;
2312 2311
2313 base->discipline->fill_geometry(block, geo); 2312 base->discipline->fill_geometry(block, geo);
2314 geo->start = get_start_sect(bdev) >> block->s2b_shift; 2313 geo->start = get_start_sect(bdev) >> block->s2b_shift;
2315 return 0; 2314 return 0;
2316 } 2315 }
2317 2316
2318 const struct block_device_operations 2317 const struct block_device_operations
2319 dasd_device_operations = { 2318 dasd_device_operations = {
2320 .owner = THIS_MODULE, 2319 .owner = THIS_MODULE,
2321 .open = dasd_open, 2320 .open = dasd_open,
2322 .release = dasd_release, 2321 .release = dasd_release,
2323 .ioctl = dasd_ioctl, 2322 .ioctl = dasd_ioctl,
2324 .compat_ioctl = dasd_ioctl, 2323 .compat_ioctl = dasd_ioctl,
2325 .getgeo = dasd_getgeo, 2324 .getgeo = dasd_getgeo,
2326 }; 2325 };
2327 2326
2328 /******************************************************************************* 2327 /*******************************************************************************
2329 * end of block device operations 2328 * end of block device operations
2330 */ 2329 */
2331 2330
2332 static void 2331 static void
2333 dasd_exit(void) 2332 dasd_exit(void)
2334 { 2333 {
2335 #ifdef CONFIG_PROC_FS 2334 #ifdef CONFIG_PROC_FS
2336 dasd_proc_exit(); 2335 dasd_proc_exit();
2337 #endif 2336 #endif
2338 dasd_eer_exit(); 2337 dasd_eer_exit();
2339 if (dasd_page_cache != NULL) { 2338 if (dasd_page_cache != NULL) {
2340 kmem_cache_destroy(dasd_page_cache); 2339 kmem_cache_destroy(dasd_page_cache);
2341 dasd_page_cache = NULL; 2340 dasd_page_cache = NULL;
2342 } 2341 }
2343 dasd_gendisk_exit(); 2342 dasd_gendisk_exit();
2344 dasd_devmap_exit(); 2343 dasd_devmap_exit();
2345 if (dasd_debug_area != NULL) { 2344 if (dasd_debug_area != NULL) {
2346 debug_unregister(dasd_debug_area); 2345 debug_unregister(dasd_debug_area);
2347 dasd_debug_area = NULL; 2346 dasd_debug_area = NULL;
2348 } 2347 }
2349 } 2348 }
2350 2349
2351 /* 2350 /*
2352 * SECTION: common functions for ccw_driver use 2351 * SECTION: common functions for ccw_driver use
2353 */ 2352 */
2354 2353
2355 /* 2354 /*
2356 * Is the device read-only? 2355 * Is the device read-only?
2357 * Note that this function does not report the setting of the 2356 * Note that this function does not report the setting of the
2358 * readonly device attribute, but how it is configured in z/VM. 2357 * readonly device attribute, but how it is configured in z/VM.
2359 */ 2358 */
2360 int dasd_device_is_ro(struct dasd_device *device) 2359 int dasd_device_is_ro(struct dasd_device *device)
2361 { 2360 {
2362 struct ccw_dev_id dev_id; 2361 struct ccw_dev_id dev_id;
2363 struct diag210 diag_data; 2362 struct diag210 diag_data;
2364 int rc; 2363 int rc;
2365 2364
2366 if (!MACHINE_IS_VM) 2365 if (!MACHINE_IS_VM)
2367 return 0; 2366 return 0;
2368 ccw_device_get_id(device->cdev, &dev_id); 2367 ccw_device_get_id(device->cdev, &dev_id);
2369 memset(&diag_data, 0, sizeof(diag_data)); 2368 memset(&diag_data, 0, sizeof(diag_data));
2370 diag_data.vrdcdvno = dev_id.devno; 2369 diag_data.vrdcdvno = dev_id.devno;
2371 diag_data.vrdclen = sizeof(diag_data); 2370 diag_data.vrdclen = sizeof(diag_data);
2372 rc = diag210(&diag_data); 2371 rc = diag210(&diag_data);
2373 if (rc == 0 || rc == 2) { 2372 if (rc == 0 || rc == 2) {
2374 return diag_data.vrdcvfla & 0x80; 2373 return diag_data.vrdcvfla & 0x80;
2375 } else { 2374 } else {
2376 DBF_EVENT(DBF_WARNING, "diag210 failed for dev=%04x with rc=%d", 2375 DBF_EVENT(DBF_WARNING, "diag210 failed for dev=%04x with rc=%d",
2377 dev_id.devno, rc); 2376 dev_id.devno, rc);
2378 return 0; 2377 return 0;
2379 } 2378 }
2380 } 2379 }
2381 EXPORT_SYMBOL_GPL(dasd_device_is_ro); 2380 EXPORT_SYMBOL_GPL(dasd_device_is_ro);
2382 2381
2383 static void dasd_generic_auto_online(void *data, async_cookie_t cookie) 2382 static void dasd_generic_auto_online(void *data, async_cookie_t cookie)
2384 { 2383 {
2385 struct ccw_device *cdev = data; 2384 struct ccw_device *cdev = data;
2386 int ret; 2385 int ret;
2387 2386
2388 ret = ccw_device_set_online(cdev); 2387 ret = ccw_device_set_online(cdev);
2389 if (ret) 2388 if (ret)
2390 pr_warning("%s: Setting the DASD online failed with rc=%d\n", 2389 pr_warning("%s: Setting the DASD online failed with rc=%d\n",
2391 dev_name(&cdev->dev), ret); 2390 dev_name(&cdev->dev), ret);
2392 } 2391 }
2393 2392
2394 /* 2393 /*
2395 * Initial attempt at a probe function. this can be simplified once 2394 * Initial attempt at a probe function. this can be simplified once
2396 * the other detection code is gone. 2395 * the other detection code is gone.
2397 */ 2396 */
2398 int dasd_generic_probe(struct ccw_device *cdev, 2397 int dasd_generic_probe(struct ccw_device *cdev,
2399 struct dasd_discipline *discipline) 2398 struct dasd_discipline *discipline)
2400 { 2399 {
2401 int ret; 2400 int ret;
2402 2401
2403 ret = dasd_add_sysfs_files(cdev); 2402 ret = dasd_add_sysfs_files(cdev);
2404 if (ret) { 2403 if (ret) {
2405 DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s", 2404 DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s",
2406 "dasd_generic_probe: could not add " 2405 "dasd_generic_probe: could not add "
2407 "sysfs entries"); 2406 "sysfs entries");
2408 return ret; 2407 return ret;
2409 } 2408 }
2410 cdev->handler = &dasd_int_handler; 2409 cdev->handler = &dasd_int_handler;
2411 2410
2412 /* 2411 /*
2413 * Automatically online either all dasd devices (dasd_autodetect) 2412 * Automatically online either all dasd devices (dasd_autodetect)
2414 * or all devices specified with dasd= parameters during 2413 * or all devices specified with dasd= parameters during
2415 * initial probe. 2414 * initial probe.
2416 */ 2415 */
2417 if ((dasd_get_feature(cdev, DASD_FEATURE_INITIAL_ONLINE) > 0 ) || 2416 if ((dasd_get_feature(cdev, DASD_FEATURE_INITIAL_ONLINE) > 0 ) ||
2418 (dasd_autodetect && dasd_busid_known(dev_name(&cdev->dev)) != 0)) 2417 (dasd_autodetect && dasd_busid_known(dev_name(&cdev->dev)) != 0))
2419 async_schedule(dasd_generic_auto_online, cdev); 2418 async_schedule(dasd_generic_auto_online, cdev);
2420 return 0; 2419 return 0;
2421 } 2420 }
2422 2421
2423 /* 2422 /*
2424 * This will one day be called from a global not_oper handler. 2423 * This will one day be called from a global not_oper handler.
2425 * It is also used by driver_unregister during module unload. 2424 * It is also used by driver_unregister during module unload.
2426 */ 2425 */
2427 void dasd_generic_remove(struct ccw_device *cdev) 2426 void dasd_generic_remove(struct ccw_device *cdev)
2428 { 2427 {
2429 struct dasd_device *device; 2428 struct dasd_device *device;
2430 struct dasd_block *block; 2429 struct dasd_block *block;
2431 2430
2432 cdev->handler = NULL; 2431 cdev->handler = NULL;
2433 2432
2434 dasd_remove_sysfs_files(cdev); 2433 dasd_remove_sysfs_files(cdev);
2435 device = dasd_device_from_cdev(cdev); 2434 device = dasd_device_from_cdev(cdev);
2436 if (IS_ERR(device)) 2435 if (IS_ERR(device))
2437 return; 2436 return;
2438 if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) { 2437 if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) {
2439 /* Already doing offline processing */ 2438 /* Already doing offline processing */
2440 dasd_put_device(device); 2439 dasd_put_device(device);
2441 return; 2440 return;
2442 } 2441 }
2443 /* 2442 /*
2444 * This device is removed unconditionally. Set offline 2443 * This device is removed unconditionally. Set offline
2445 * flag to prevent dasd_open from opening it while it is 2444 * flag to prevent dasd_open from opening it while it is
2446 * no quite down yet. 2445 * no quite down yet.
2447 */ 2446 */
2448 dasd_set_target_state(device, DASD_STATE_NEW); 2447 dasd_set_target_state(device, DASD_STATE_NEW);
2449 /* dasd_delete_device destroys the device reference. */ 2448 /* dasd_delete_device destroys the device reference. */
2450 block = device->block; 2449 block = device->block;
2451 device->block = NULL; 2450 device->block = NULL;
2452 dasd_delete_device(device); 2451 dasd_delete_device(device);
2453 /* 2452 /*
2454 * life cycle of block is bound to device, so delete it after 2453 * life cycle of block is bound to device, so delete it after
2455 * device was safely removed 2454 * device was safely removed
2456 */ 2455 */
2457 if (block) 2456 if (block)
2458 dasd_free_block(block); 2457 dasd_free_block(block);
2459 } 2458 }
2460 2459
2461 /* 2460 /*
2462 * Activate a device. This is called from dasd_{eckd,fba}_probe() when either 2461 * Activate a device. This is called from dasd_{eckd,fba}_probe() when either
2463 * the device is detected for the first time and is supposed to be used 2462 * the device is detected for the first time and is supposed to be used
2464 * or the user has started activation through sysfs. 2463 * or the user has started activation through sysfs.
2465 */ 2464 */
2466 int dasd_generic_set_online(struct ccw_device *cdev, 2465 int dasd_generic_set_online(struct ccw_device *cdev,
2467 struct dasd_discipline *base_discipline) 2466 struct dasd_discipline *base_discipline)
2468 { 2467 {
2469 struct dasd_discipline *discipline; 2468 struct dasd_discipline *discipline;
2470 struct dasd_device *device; 2469 struct dasd_device *device;
2471 int rc; 2470 int rc;
2472 2471
2473 /* first online clears initial online feature flag */ 2472 /* first online clears initial online feature flag */
2474 dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); 2473 dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0);
2475 device = dasd_create_device(cdev); 2474 device = dasd_create_device(cdev);
2476 if (IS_ERR(device)) 2475 if (IS_ERR(device))
2477 return PTR_ERR(device); 2476 return PTR_ERR(device);
2478 2477
2479 discipline = base_discipline; 2478 discipline = base_discipline;
2480 if (device->features & DASD_FEATURE_USEDIAG) { 2479 if (device->features & DASD_FEATURE_USEDIAG) {
2481 if (!dasd_diag_discipline_pointer) { 2480 if (!dasd_diag_discipline_pointer) {
2482 pr_warning("%s Setting the DASD online failed because " 2481 pr_warning("%s Setting the DASD online failed because "
2483 "of missing DIAG discipline\n", 2482 "of missing DIAG discipline\n",
2484 dev_name(&cdev->dev)); 2483 dev_name(&cdev->dev));
2485 dasd_delete_device(device); 2484 dasd_delete_device(device);
2486 return -ENODEV; 2485 return -ENODEV;
2487 } 2486 }
2488 discipline = dasd_diag_discipline_pointer; 2487 discipline = dasd_diag_discipline_pointer;
2489 } 2488 }
2490 if (!try_module_get(base_discipline->owner)) { 2489 if (!try_module_get(base_discipline->owner)) {
2491 dasd_delete_device(device); 2490 dasd_delete_device(device);
2492 return -EINVAL; 2491 return -EINVAL;
2493 } 2492 }
2494 if (!try_module_get(discipline->owner)) { 2493 if (!try_module_get(discipline->owner)) {
2495 module_put(base_discipline->owner); 2494 module_put(base_discipline->owner);
2496 dasd_delete_device(device); 2495 dasd_delete_device(device);
2497 return -EINVAL; 2496 return -EINVAL;
2498 } 2497 }
2499 device->base_discipline = base_discipline; 2498 device->base_discipline = base_discipline;
2500 device->discipline = discipline; 2499 device->discipline = discipline;
2501 2500
2502 /* check_device will allocate block device if necessary */ 2501 /* check_device will allocate block device if necessary */
2503 rc = discipline->check_device(device); 2502 rc = discipline->check_device(device);
2504 if (rc) { 2503 if (rc) {
2505 pr_warning("%s Setting the DASD online with discipline %s " 2504 pr_warning("%s Setting the DASD online with discipline %s "
2506 "failed with rc=%i\n", 2505 "failed with rc=%i\n",
2507 dev_name(&cdev->dev), discipline->name, rc); 2506 dev_name(&cdev->dev), discipline->name, rc);
2508 module_put(discipline->owner); 2507 module_put(discipline->owner);
2509 module_put(base_discipline->owner); 2508 module_put(base_discipline->owner);
2510 dasd_delete_device(device); 2509 dasd_delete_device(device);
2511 return rc; 2510 return rc;
2512 } 2511 }
2513 2512
2514 dasd_set_target_state(device, DASD_STATE_ONLINE); 2513 dasd_set_target_state(device, DASD_STATE_ONLINE);
2515 if (device->state <= DASD_STATE_KNOWN) { 2514 if (device->state <= DASD_STATE_KNOWN) {
2516 pr_warning("%s Setting the DASD online failed because of a " 2515 pr_warning("%s Setting the DASD online failed because of a "
2517 "missing discipline\n", dev_name(&cdev->dev)); 2516 "missing discipline\n", dev_name(&cdev->dev));
2518 rc = -ENODEV; 2517 rc = -ENODEV;
2519 dasd_set_target_state(device, DASD_STATE_NEW); 2518 dasd_set_target_state(device, DASD_STATE_NEW);
2520 if (device->block) 2519 if (device->block)
2521 dasd_free_block(device->block); 2520 dasd_free_block(device->block);
2522 dasd_delete_device(device); 2521 dasd_delete_device(device);
2523 } else 2522 } else
2524 pr_debug("dasd_generic device %s found\n", 2523 pr_debug("dasd_generic device %s found\n",
2525 dev_name(&cdev->dev)); 2524 dev_name(&cdev->dev));
2526 2525
2527 wait_event(dasd_init_waitq, _wait_for_device(device)); 2526 wait_event(dasd_init_waitq, _wait_for_device(device));
2528 2527
2529 dasd_put_device(device); 2528 dasd_put_device(device);
2530 return rc; 2529 return rc;
2531 } 2530 }
2532 2531
2533 int dasd_generic_set_offline(struct ccw_device *cdev) 2532 int dasd_generic_set_offline(struct ccw_device *cdev)
2534 { 2533 {
2535 struct dasd_device *device; 2534 struct dasd_device *device;
2536 struct dasd_block *block; 2535 struct dasd_block *block;
2537 int max_count, open_count; 2536 int max_count, open_count;
2538 2537
2539 device = dasd_device_from_cdev(cdev); 2538 device = dasd_device_from_cdev(cdev);
2540 if (IS_ERR(device)) 2539 if (IS_ERR(device))
2541 return PTR_ERR(device); 2540 return PTR_ERR(device);
2542 if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) { 2541 if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) {
2543 /* Already doing offline processing */ 2542 /* Already doing offline processing */
2544 dasd_put_device(device); 2543 dasd_put_device(device);
2545 return 0; 2544 return 0;
2546 } 2545 }
2547 /* 2546 /*
2548 * We must make sure that this device is currently not in use. 2547 * We must make sure that this device is currently not in use.
2549 * The open_count is increased for every opener, that includes 2548 * The open_count is increased for every opener, that includes
2550 * the blkdev_get in dasd_scan_partitions. We are only interested 2549 * the blkdev_get in dasd_scan_partitions. We are only interested
2551 * in the other openers. 2550 * in the other openers.
2552 */ 2551 */
2553 if (device->block) { 2552 if (device->block) {
2554 max_count = device->block->bdev ? 0 : -1; 2553 max_count = device->block->bdev ? 0 : -1;
2555 open_count = atomic_read(&device->block->open_count); 2554 open_count = atomic_read(&device->block->open_count);
2556 if (open_count > max_count) { 2555 if (open_count > max_count) {
2557 if (open_count > 0) 2556 if (open_count > 0)
2558 pr_warning("%s: The DASD cannot be set offline " 2557 pr_warning("%s: The DASD cannot be set offline "
2559 "with open count %i\n", 2558 "with open count %i\n",
2560 dev_name(&cdev->dev), open_count); 2559 dev_name(&cdev->dev), open_count);
2561 else 2560 else
2562 pr_warning("%s: The DASD cannot be set offline " 2561 pr_warning("%s: The DASD cannot be set offline "
2563 "while it is in use\n", 2562 "while it is in use\n",
2564 dev_name(&cdev->dev)); 2563 dev_name(&cdev->dev));
2565 clear_bit(DASD_FLAG_OFFLINE, &device->flags); 2564 clear_bit(DASD_FLAG_OFFLINE, &device->flags);
2566 dasd_put_device(device); 2565 dasd_put_device(device);
2567 return -EBUSY; 2566 return -EBUSY;
2568 } 2567 }
2569 } 2568 }
2570 dasd_set_target_state(device, DASD_STATE_NEW); 2569 dasd_set_target_state(device, DASD_STATE_NEW);
2571 /* dasd_delete_device destroys the device reference. */ 2570 /* dasd_delete_device destroys the device reference. */
2572 block = device->block; 2571 block = device->block;
2573 device->block = NULL; 2572 device->block = NULL;
2574 dasd_delete_device(device); 2573 dasd_delete_device(device);
2575 /* 2574 /*
2576 * life cycle of block is bound to device, so delete it after 2575 * life cycle of block is bound to device, so delete it after
2577 * device was safely removed 2576 * device was safely removed
2578 */ 2577 */
2579 if (block) 2578 if (block)
2580 dasd_free_block(block); 2579 dasd_free_block(block);
2581 return 0; 2580 return 0;
2582 } 2581 }
2583 2582
2584 int dasd_generic_notify(struct ccw_device *cdev, int event) 2583 int dasd_generic_notify(struct ccw_device *cdev, int event)
2585 { 2584 {
2586 struct dasd_device *device; 2585 struct dasd_device *device;
2587 struct dasd_ccw_req *cqr; 2586 struct dasd_ccw_req *cqr;
2588 int ret; 2587 int ret;
2589 2588
2590 device = dasd_device_from_cdev_locked(cdev); 2589 device = dasd_device_from_cdev_locked(cdev);
2591 if (IS_ERR(device)) 2590 if (IS_ERR(device))
2592 return 0; 2591 return 0;
2593 ret = 0; 2592 ret = 0;
2594 switch (event) { 2593 switch (event) {
2595 case CIO_GONE: 2594 case CIO_GONE:
2596 case CIO_BOXED: 2595 case CIO_BOXED:
2597 case CIO_NO_PATH: 2596 case CIO_NO_PATH:
2598 /* First of all call extended error reporting. */ 2597 /* First of all call extended error reporting. */
2599 dasd_eer_write(device, NULL, DASD_EER_NOPATH); 2598 dasd_eer_write(device, NULL, DASD_EER_NOPATH);
2600 2599
2601 if (device->state < DASD_STATE_BASIC) 2600 if (device->state < DASD_STATE_BASIC)
2602 break; 2601 break;
2603 /* Device is active. We want to keep it. */ 2602 /* Device is active. We want to keep it. */
2604 list_for_each_entry(cqr, &device->ccw_queue, devlist) 2603 list_for_each_entry(cqr, &device->ccw_queue, devlist)
2605 if (cqr->status == DASD_CQR_IN_IO) { 2604 if (cqr->status == DASD_CQR_IN_IO) {
2606 cqr->status = DASD_CQR_QUEUED; 2605 cqr->status = DASD_CQR_QUEUED;
2607 cqr->retries++; 2606 cqr->retries++;
2608 } 2607 }
2609 dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); 2608 dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT);
2610 dasd_device_clear_timer(device); 2609 dasd_device_clear_timer(device);
2611 dasd_schedule_device_bh(device); 2610 dasd_schedule_device_bh(device);
2612 ret = 1; 2611 ret = 1;
2613 break; 2612 break;
2614 case CIO_OPER: 2613 case CIO_OPER:
2615 /* FIXME: add a sanity check. */ 2614 /* FIXME: add a sanity check. */
2616 dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT); 2615 dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT);
2617 if (device->stopped & DASD_UNRESUMED_PM) { 2616 if (device->stopped & DASD_UNRESUMED_PM) {
2618 dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM); 2617 dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM);
2619 dasd_restore_device(device); 2618 dasd_restore_device(device);
2620 ret = 1; 2619 ret = 1;
2621 break; 2620 break;
2622 } 2621 }
2623 dasd_schedule_device_bh(device); 2622 dasd_schedule_device_bh(device);
2624 if (device->block) 2623 if (device->block)
2625 dasd_schedule_block_bh(device->block); 2624 dasd_schedule_block_bh(device->block);
2626 ret = 1; 2625 ret = 1;
2627 break; 2626 break;
2628 } 2627 }
2629 dasd_put_device(device); 2628 dasd_put_device(device);
2630 return ret; 2629 return ret;
2631 } 2630 }
2632 2631
2633 int dasd_generic_pm_freeze(struct ccw_device *cdev) 2632 int dasd_generic_pm_freeze(struct ccw_device *cdev)
2634 { 2633 {
2635 struct dasd_ccw_req *cqr, *n; 2634 struct dasd_ccw_req *cqr, *n;
2636 int rc; 2635 int rc;
2637 struct list_head freeze_queue; 2636 struct list_head freeze_queue;
2638 struct dasd_device *device = dasd_device_from_cdev(cdev); 2637 struct dasd_device *device = dasd_device_from_cdev(cdev);
2639 2638
2640 if (IS_ERR(device)) 2639 if (IS_ERR(device))
2641 return PTR_ERR(device); 2640 return PTR_ERR(device);
2642 /* disallow new I/O */ 2641 /* disallow new I/O */
2643 dasd_device_set_stop_bits(device, DASD_STOPPED_PM); 2642 dasd_device_set_stop_bits(device, DASD_STOPPED_PM);
2644 /* clear active requests */ 2643 /* clear active requests */
2645 INIT_LIST_HEAD(&freeze_queue); 2644 INIT_LIST_HEAD(&freeze_queue);
2646 spin_lock_irq(get_ccwdev_lock(cdev)); 2645 spin_lock_irq(get_ccwdev_lock(cdev));
2647 rc = 0; 2646 rc = 0;
2648 list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) { 2647 list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
2649 /* Check status and move request to flush_queue */ 2648 /* Check status and move request to flush_queue */
2650 if (cqr->status == DASD_CQR_IN_IO) { 2649 if (cqr->status == DASD_CQR_IN_IO) {
2651 rc = device->discipline->term_IO(cqr); 2650 rc = device->discipline->term_IO(cqr);
2652 if (rc) { 2651 if (rc) {
2653 /* unable to terminate requeust */ 2652 /* unable to terminate requeust */
2654 dev_err(&device->cdev->dev, 2653 dev_err(&device->cdev->dev,
2655 "Unable to terminate request %p " 2654 "Unable to terminate request %p "
2656 "on suspend\n", cqr); 2655 "on suspend\n", cqr);
2657 spin_unlock_irq(get_ccwdev_lock(cdev)); 2656 spin_unlock_irq(get_ccwdev_lock(cdev));
2658 dasd_put_device(device); 2657 dasd_put_device(device);
2659 return rc; 2658 return rc;
2660 } 2659 }
2661 } 2660 }
2662 list_move_tail(&cqr->devlist, &freeze_queue); 2661 list_move_tail(&cqr->devlist, &freeze_queue);
2663 } 2662 }
2664 2663
2665 spin_unlock_irq(get_ccwdev_lock(cdev)); 2664 spin_unlock_irq(get_ccwdev_lock(cdev));
2666 2665
2667 list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) { 2666 list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) {
2668 wait_event(dasd_flush_wq, 2667 wait_event(dasd_flush_wq,
2669 (cqr->status != DASD_CQR_CLEAR_PENDING)); 2668 (cqr->status != DASD_CQR_CLEAR_PENDING));
2670 if (cqr->status == DASD_CQR_CLEARED) 2669 if (cqr->status == DASD_CQR_CLEARED)
2671 cqr->status = DASD_CQR_QUEUED; 2670 cqr->status = DASD_CQR_QUEUED;
2672 } 2671 }
2673 /* move freeze_queue to start of the ccw_queue */ 2672 /* move freeze_queue to start of the ccw_queue */
2674 spin_lock_irq(get_ccwdev_lock(cdev)); 2673 spin_lock_irq(get_ccwdev_lock(cdev));
2675 list_splice_tail(&freeze_queue, &device->ccw_queue); 2674 list_splice_tail(&freeze_queue, &device->ccw_queue);
2676 spin_unlock_irq(get_ccwdev_lock(cdev)); 2675 spin_unlock_irq(get_ccwdev_lock(cdev));
2677 2676
2678 if (device->discipline->freeze) 2677 if (device->discipline->freeze)
2679 rc = device->discipline->freeze(device); 2678 rc = device->discipline->freeze(device);
2680 2679
2681 dasd_put_device(device); 2680 dasd_put_device(device);
2682 return rc; 2681 return rc;
2683 } 2682 }
2684 EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze); 2683 EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze);
2685 2684
2686 int dasd_generic_restore_device(struct ccw_device *cdev) 2685 int dasd_generic_restore_device(struct ccw_device *cdev)
2687 { 2686 {
2688 struct dasd_device *device = dasd_device_from_cdev(cdev); 2687 struct dasd_device *device = dasd_device_from_cdev(cdev);
2689 int rc = 0; 2688 int rc = 0;
2690 2689
2691 if (IS_ERR(device)) 2690 if (IS_ERR(device))
2692 return PTR_ERR(device); 2691 return PTR_ERR(device);
2693 2692
2694 /* allow new IO again */ 2693 /* allow new IO again */
2695 dasd_device_remove_stop_bits(device, 2694 dasd_device_remove_stop_bits(device,
2696 (DASD_STOPPED_PM | DASD_UNRESUMED_PM)); 2695 (DASD_STOPPED_PM | DASD_UNRESUMED_PM));
2697 2696
2698 dasd_schedule_device_bh(device); 2697 dasd_schedule_device_bh(device);
2699 2698
2700 /* 2699 /*
2701 * call discipline restore function 2700 * call discipline restore function
2702 * if device is stopped do nothing e.g. for disconnected devices 2701 * if device is stopped do nothing e.g. for disconnected devices
2703 */ 2702 */
2704 if (device->discipline->restore && !(device->stopped)) 2703 if (device->discipline->restore && !(device->stopped))
2705 rc = device->discipline->restore(device); 2704 rc = device->discipline->restore(device);
2706 if (rc || device->stopped) 2705 if (rc || device->stopped)
2707 /* 2706 /*
2708 * if the resume failed for the DASD we put it in 2707 * if the resume failed for the DASD we put it in
2709 * an UNRESUMED stop state 2708 * an UNRESUMED stop state
2710 */ 2709 */
2711 device->stopped |= DASD_UNRESUMED_PM; 2710 device->stopped |= DASD_UNRESUMED_PM;
2712 2711
2713 if (device->block) 2712 if (device->block)
2714 dasd_schedule_block_bh(device->block); 2713 dasd_schedule_block_bh(device->block);
2715 2714
2716 dasd_put_device(device); 2715 dasd_put_device(device);
2717 return 0; 2716 return 0;
2718 } 2717 }
2719 EXPORT_SYMBOL_GPL(dasd_generic_restore_device); 2718 EXPORT_SYMBOL_GPL(dasd_generic_restore_device);
2720 2719
2721 static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, 2720 static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
2722 void *rdc_buffer, 2721 void *rdc_buffer,
2723 int rdc_buffer_size, 2722 int rdc_buffer_size,
2724 int magic) 2723 int magic)
2725 { 2724 {
2726 struct dasd_ccw_req *cqr; 2725 struct dasd_ccw_req *cqr;
2727 struct ccw1 *ccw; 2726 struct ccw1 *ccw;
2728 unsigned long *idaw; 2727 unsigned long *idaw;
2729 2728
2730 cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device); 2729 cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device);
2731 2730
2732 if (IS_ERR(cqr)) { 2731 if (IS_ERR(cqr)) {
2733 /* internal error 13 - Allocating the RDC request failed*/ 2732 /* internal error 13 - Allocating the RDC request failed*/
2734 dev_err(&device->cdev->dev, 2733 dev_err(&device->cdev->dev,
2735 "An error occurred in the DASD device driver, " 2734 "An error occurred in the DASD device driver, "
2736 "reason=%s\n", "13"); 2735 "reason=%s\n", "13");
2737 return cqr; 2736 return cqr;
2738 } 2737 }
2739 2738
2740 ccw = cqr->cpaddr; 2739 ccw = cqr->cpaddr;
2741 ccw->cmd_code = CCW_CMD_RDC; 2740 ccw->cmd_code = CCW_CMD_RDC;
2742 if (idal_is_needed(rdc_buffer, rdc_buffer_size)) { 2741 if (idal_is_needed(rdc_buffer, rdc_buffer_size)) {
2743 idaw = (unsigned long *) (cqr->data); 2742 idaw = (unsigned long *) (cqr->data);
2744 ccw->cda = (__u32)(addr_t) idaw; 2743 ccw->cda = (__u32)(addr_t) idaw;
2745 ccw->flags = CCW_FLAG_IDA; 2744 ccw->flags = CCW_FLAG_IDA;
2746 idaw = idal_create_words(idaw, rdc_buffer, rdc_buffer_size); 2745 idaw = idal_create_words(idaw, rdc_buffer, rdc_buffer_size);
2747 } else { 2746 } else {
2748 ccw->cda = (__u32)(addr_t) rdc_buffer; 2747 ccw->cda = (__u32)(addr_t) rdc_buffer;
2749 ccw->flags = 0; 2748 ccw->flags = 0;
2750 } 2749 }
2751 2750
2752 ccw->count = rdc_buffer_size; 2751 ccw->count = rdc_buffer_size;
2753 cqr->startdev = device; 2752 cqr->startdev = device;
2754 cqr->memdev = device; 2753 cqr->memdev = device;
2755 cqr->expires = 10*HZ; 2754 cqr->expires = 10*HZ;
2756 cqr->retries = 256; 2755 cqr->retries = 256;
2757 cqr->buildclk = get_clock(); 2756 cqr->buildclk = get_clock();
2758 cqr->status = DASD_CQR_FILLED; 2757 cqr->status = DASD_CQR_FILLED;
2759 return cqr; 2758 return cqr;
2760 } 2759 }
2761 2760
2762 2761
2763 int dasd_generic_read_dev_chars(struct dasd_device *device, int magic, 2762 int dasd_generic_read_dev_chars(struct dasd_device *device, int magic,
2764 void *rdc_buffer, int rdc_buffer_size) 2763 void *rdc_buffer, int rdc_buffer_size)
2765 { 2764 {
2766 int ret; 2765 int ret;
2767 struct dasd_ccw_req *cqr; 2766 struct dasd_ccw_req *cqr;
2768 2767
2769 cqr = dasd_generic_build_rdc(device, rdc_buffer, rdc_buffer_size, 2768 cqr = dasd_generic_build_rdc(device, rdc_buffer, rdc_buffer_size,
2770 magic); 2769 magic);
2771 if (IS_ERR(cqr)) 2770 if (IS_ERR(cqr))
2772 return PTR_ERR(cqr); 2771 return PTR_ERR(cqr);
2773 2772
2774 ret = dasd_sleep_on(cqr); 2773 ret = dasd_sleep_on(cqr);
2775 dasd_sfree_request(cqr, cqr->memdev); 2774 dasd_sfree_request(cqr, cqr->memdev);
2776 return ret; 2775 return ret;
2777 } 2776 }
2778 EXPORT_SYMBOL_GPL(dasd_generic_read_dev_chars); 2777 EXPORT_SYMBOL_GPL(dasd_generic_read_dev_chars);
2779 2778
2780 /* 2779 /*
2781 * In command mode and transport mode we need to look for sense 2780 * In command mode and transport mode we need to look for sense
2782 * data in different places. The sense data itself is allways 2781 * data in different places. The sense data itself is allways
2783 * an array of 32 bytes, so we can unify the sense data access 2782 * an array of 32 bytes, so we can unify the sense data access
2784 * for both modes. 2783 * for both modes.
2785 */ 2784 */
2786 char *dasd_get_sense(struct irb *irb) 2785 char *dasd_get_sense(struct irb *irb)
2787 { 2786 {
2788 struct tsb *tsb = NULL; 2787 struct tsb *tsb = NULL;
2789 char *sense = NULL; 2788 char *sense = NULL;
2790 2789
2791 if (scsw_is_tm(&irb->scsw) && (irb->scsw.tm.fcxs == 0x01)) { 2790 if (scsw_is_tm(&irb->scsw) && (irb->scsw.tm.fcxs == 0x01)) {
2792 if (irb->scsw.tm.tcw) 2791 if (irb->scsw.tm.tcw)
2793 tsb = tcw_get_tsb((struct tcw *)(unsigned long) 2792 tsb = tcw_get_tsb((struct tcw *)(unsigned long)
2794 irb->scsw.tm.tcw); 2793 irb->scsw.tm.tcw);
2795 if (tsb && tsb->length == 64 && tsb->flags) 2794 if (tsb && tsb->length == 64 && tsb->flags)
2796 switch (tsb->flags & 0x07) { 2795 switch (tsb->flags & 0x07) {
2797 case 1: /* tsa_iostat */ 2796 case 1: /* tsa_iostat */
2798 sense = tsb->tsa.iostat.sense; 2797 sense = tsb->tsa.iostat.sense;
2799 break; 2798 break;
2800 case 2: /* tsa_ddpc */ 2799 case 2: /* tsa_ddpc */
2801 sense = tsb->tsa.ddpc.sense; 2800 sense = tsb->tsa.ddpc.sense;
2802 break; 2801 break;
2803 default: 2802 default:
2804 /* currently we don't use interrogate data */ 2803 /* currently we don't use interrogate data */
2805 break; 2804 break;
2806 } 2805 }
2807 } else if (irb->esw.esw0.erw.cons) { 2806 } else if (irb->esw.esw0.erw.cons) {
2808 sense = irb->ecw; 2807 sense = irb->ecw;
2809 } 2808 }
2810 return sense; 2809 return sense;
2811 } 2810 }
2812 EXPORT_SYMBOL_GPL(dasd_get_sense); 2811 EXPORT_SYMBOL_GPL(dasd_get_sense);
2813 2812
2814 static int __init dasd_init(void) 2813 static int __init dasd_init(void)
2815 { 2814 {
2816 int rc; 2815 int rc;
2817 2816
2818 init_waitqueue_head(&dasd_init_waitq); 2817 init_waitqueue_head(&dasd_init_waitq);
2819 init_waitqueue_head(&dasd_flush_wq); 2818 init_waitqueue_head(&dasd_flush_wq);
2820 init_waitqueue_head(&generic_waitq); 2819 init_waitqueue_head(&generic_waitq);
2821 2820
2822 /* register 'common' DASD debug area, used for all DBF_XXX calls */ 2821 /* register 'common' DASD debug area, used for all DBF_XXX calls */
2823 dasd_debug_area = debug_register("dasd", 1, 1, 8 * sizeof(long)); 2822 dasd_debug_area = debug_register("dasd", 1, 1, 8 * sizeof(long));
2824 if (dasd_debug_area == NULL) { 2823 if (dasd_debug_area == NULL) {
2825 rc = -ENOMEM; 2824 rc = -ENOMEM;
2826 goto failed; 2825 goto failed;
2827 } 2826 }
2828 debug_register_view(dasd_debug_area, &debug_sprintf_view); 2827 debug_register_view(dasd_debug_area, &debug_sprintf_view);
2829 debug_set_level(dasd_debug_area, DBF_WARNING); 2828 debug_set_level(dasd_debug_area, DBF_WARNING);
2830 2829
2831 DBF_EVENT(DBF_EMERG, "%s", "debug area created"); 2830 DBF_EVENT(DBF_EMERG, "%s", "debug area created");
2832 2831
2833 dasd_diag_discipline_pointer = NULL; 2832 dasd_diag_discipline_pointer = NULL;
2834 2833
2835 rc = dasd_devmap_init(); 2834 rc = dasd_devmap_init();
2836 if (rc) 2835 if (rc)
2837 goto failed; 2836 goto failed;
2838 rc = dasd_gendisk_init(); 2837 rc = dasd_gendisk_init();
2839 if (rc) 2838 if (rc)
2840 goto failed; 2839 goto failed;
2841 rc = dasd_parse(); 2840 rc = dasd_parse();
2842 if (rc) 2841 if (rc)
2843 goto failed; 2842 goto failed;
2844 rc = dasd_eer_init(); 2843 rc = dasd_eer_init();
2845 if (rc) 2844 if (rc)
2846 goto failed; 2845 goto failed;
2847 #ifdef CONFIG_PROC_FS 2846 #ifdef CONFIG_PROC_FS
2848 rc = dasd_proc_init(); 2847 rc = dasd_proc_init();
2849 if (rc) 2848 if (rc)
2850 goto failed; 2849 goto failed;
2851 #endif 2850 #endif
2852 2851
2853 return 0; 2852 return 0;
2854 failed: 2853 failed:
2855 pr_info("The DASD device driver could not be initialized\n"); 2854 pr_info("The DASD device driver could not be initialized\n");
2856 dasd_exit(); 2855 dasd_exit();
2857 return rc; 2856 return rc;
2858 } 2857 }
2859 2858
2860 module_init(dasd_init); 2859 module_init(dasd_init);
2861 module_exit(dasd_exit); 2860 module_exit(dasd_exit);
2862 2861
2863 EXPORT_SYMBOL(dasd_debug_area); 2862 EXPORT_SYMBOL(dasd_debug_area);
2864 EXPORT_SYMBOL(dasd_diag_discipline_pointer); 2863 EXPORT_SYMBOL(dasd_diag_discipline_pointer);
2865 2864
2866 EXPORT_SYMBOL(dasd_add_request_head); 2865 EXPORT_SYMBOL(dasd_add_request_head);
2867 EXPORT_SYMBOL(dasd_add_request_tail); 2866 EXPORT_SYMBOL(dasd_add_request_tail);
2868 EXPORT_SYMBOL(dasd_cancel_req); 2867 EXPORT_SYMBOL(dasd_cancel_req);
2869 EXPORT_SYMBOL(dasd_device_clear_timer); 2868 EXPORT_SYMBOL(dasd_device_clear_timer);
2870 EXPORT_SYMBOL(dasd_block_clear_timer); 2869 EXPORT_SYMBOL(dasd_block_clear_timer);
2871 EXPORT_SYMBOL(dasd_enable_device); 2870 EXPORT_SYMBOL(dasd_enable_device);
2872 EXPORT_SYMBOL(dasd_int_handler); 2871 EXPORT_SYMBOL(dasd_int_handler);
2873 EXPORT_SYMBOL(dasd_kfree_request); 2872 EXPORT_SYMBOL(dasd_kfree_request);
2874 EXPORT_SYMBOL(dasd_kick_device); 2873 EXPORT_SYMBOL(dasd_kick_device);
2875 EXPORT_SYMBOL(dasd_kmalloc_request); 2874 EXPORT_SYMBOL(dasd_kmalloc_request);
2876 EXPORT_SYMBOL(dasd_schedule_device_bh); 2875 EXPORT_SYMBOL(dasd_schedule_device_bh);
2877 EXPORT_SYMBOL(dasd_schedule_block_bh); 2876 EXPORT_SYMBOL(dasd_schedule_block_bh);
2878 EXPORT_SYMBOL(dasd_set_target_state); 2877 EXPORT_SYMBOL(dasd_set_target_state);
2879 EXPORT_SYMBOL(dasd_device_set_timer); 2878 EXPORT_SYMBOL(dasd_device_set_timer);
2880 EXPORT_SYMBOL(dasd_block_set_timer); 2879 EXPORT_SYMBOL(dasd_block_set_timer);
2881 EXPORT_SYMBOL(dasd_sfree_request); 2880 EXPORT_SYMBOL(dasd_sfree_request);
2882 EXPORT_SYMBOL(dasd_sleep_on); 2881 EXPORT_SYMBOL(dasd_sleep_on);
2883 EXPORT_SYMBOL(dasd_sleep_on_immediatly); 2882 EXPORT_SYMBOL(dasd_sleep_on_immediatly);
2884 EXPORT_SYMBOL(dasd_sleep_on_interruptible); 2883 EXPORT_SYMBOL(dasd_sleep_on_interruptible);
2885 EXPORT_SYMBOL(dasd_smalloc_request); 2884 EXPORT_SYMBOL(dasd_smalloc_request);
2886 EXPORT_SYMBOL(dasd_start_IO); 2885 EXPORT_SYMBOL(dasd_start_IO);
2887 EXPORT_SYMBOL(dasd_term_IO); 2886 EXPORT_SYMBOL(dasd_term_IO);
2888 2887
2889 EXPORT_SYMBOL_GPL(dasd_generic_probe); 2888 EXPORT_SYMBOL_GPL(dasd_generic_probe);
2890 EXPORT_SYMBOL_GPL(dasd_generic_remove); 2889 EXPORT_SYMBOL_GPL(dasd_generic_remove);
2891 EXPORT_SYMBOL_GPL(dasd_generic_notify); 2890 EXPORT_SYMBOL_GPL(dasd_generic_notify);
2892 EXPORT_SYMBOL_GPL(dasd_generic_set_online); 2891 EXPORT_SYMBOL_GPL(dasd_generic_set_online);
2893 EXPORT_SYMBOL_GPL(dasd_generic_set_offline); 2892 EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
2894 EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); 2893 EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
2895 EXPORT_SYMBOL_GPL(dasd_flush_device_queue); 2894 EXPORT_SYMBOL_GPL(dasd_flush_device_queue);
2896 EXPORT_SYMBOL_GPL(dasd_alloc_block); 2895 EXPORT_SYMBOL_GPL(dasd_alloc_block);
2897 EXPORT_SYMBOL_GPL(dasd_free_block); 2896 EXPORT_SYMBOL_GPL(dasd_free_block);
2898 2897
1 /* 1 /*
2 * sd.c Copyright (C) 1992 Drew Eckhardt 2 * sd.c Copyright (C) 1992 Drew Eckhardt
3 * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale 3 * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
4 * 4 *
5 * Linux scsi disk driver 5 * Linux scsi disk driver
6 * Initial versions: Drew Eckhardt 6 * Initial versions: Drew Eckhardt
7 * Subsequent revisions: Eric Youngdale 7 * Subsequent revisions: Eric Youngdale
8 * Modification history: 8 * Modification history:
9 * - Drew Eckhardt <drew@colorado.edu> original 9 * - Drew Eckhardt <drew@colorado.edu> original
10 * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple 10 * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple
11 * outstanding request, and other enhancements. 11 * outstanding request, and other enhancements.
12 * Support loadable low-level scsi drivers. 12 * Support loadable low-level scsi drivers.
13 * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using 13 * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using
14 * eight major numbers. 14 * eight major numbers.
15 * - Richard Gooch <rgooch@atnf.csiro.au> support devfs. 15 * - Richard Gooch <rgooch@atnf.csiro.au> support devfs.
16 * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in 16 * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in
17 * sd_init and cleanups. 17 * sd_init and cleanups.
18 * - Alex Davis <letmein@erols.com> Fix problem where partition info 18 * - Alex Davis <letmein@erols.com> Fix problem where partition info
19 * not being read in sd_open. Fix problem where removable media 19 * not being read in sd_open. Fix problem where removable media
20 * could be ejected after sd_open. 20 * could be ejected after sd_open.
21 * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x 21 * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x
22 * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox 22 * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox
23 * <willy@debian.org>, Kurt Garloff <garloff@suse.de>: 23 * <willy@debian.org>, Kurt Garloff <garloff@suse.de>:
24 * Support 32k/1M disks. 24 * Support 32k/1M disks.
25 * 25 *
26 * Logging policy (needs CONFIG_SCSI_LOGGING defined): 26 * Logging policy (needs CONFIG_SCSI_LOGGING defined):
27 * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2 27 * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2
28 * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1 28 * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1
29 * - entering sd_ioctl: SCSI_LOG_IOCTL level 1 29 * - entering sd_ioctl: SCSI_LOG_IOCTL level 1
30 * - entering other commands: SCSI_LOG_HLQUEUE level 3 30 * - entering other commands: SCSI_LOG_HLQUEUE level 3
31 * Note: when the logging level is set by the user, it must be greater 31 * Note: when the logging level is set by the user, it must be greater
32 * than the level indicated above to trigger output. 32 * than the level indicated above to trigger output.
33 */ 33 */
34 34
35 #include <linux/module.h> 35 #include <linux/module.h>
36 #include <linux/fs.h> 36 #include <linux/fs.h>
37 #include <linux/kernel.h> 37 #include <linux/kernel.h>
38 #include <linux/mm.h> 38 #include <linux/mm.h>
39 #include <linux/bio.h> 39 #include <linux/bio.h>
40 #include <linux/genhd.h> 40 #include <linux/genhd.h>
41 #include <linux/hdreg.h> 41 #include <linux/hdreg.h>
42 #include <linux/errno.h> 42 #include <linux/errno.h>
43 #include <linux/idr.h> 43 #include <linux/idr.h>
44 #include <linux/interrupt.h> 44 #include <linux/interrupt.h>
45 #include <linux/init.h> 45 #include <linux/init.h>
46 #include <linux/blkdev.h> 46 #include <linux/blkdev.h>
47 #include <linux/blkpg.h> 47 #include <linux/blkpg.h>
48 #include <linux/delay.h> 48 #include <linux/delay.h>
49 #include <linux/smp_lock.h> 49 #include <linux/smp_lock.h>
50 #include <linux/mutex.h> 50 #include <linux/mutex.h>
51 #include <linux/string_helpers.h> 51 #include <linux/string_helpers.h>
52 #include <linux/async.h> 52 #include <linux/async.h>
53 #include <linux/slab.h> 53 #include <linux/slab.h>
54 #include <asm/uaccess.h> 54 #include <asm/uaccess.h>
55 #include <asm/unaligned.h> 55 #include <asm/unaligned.h>
56 56
57 #include <scsi/scsi.h> 57 #include <scsi/scsi.h>
58 #include <scsi/scsi_cmnd.h> 58 #include <scsi/scsi_cmnd.h>
59 #include <scsi/scsi_dbg.h> 59 #include <scsi/scsi_dbg.h>
60 #include <scsi/scsi_device.h> 60 #include <scsi/scsi_device.h>
61 #include <scsi/scsi_driver.h> 61 #include <scsi/scsi_driver.h>
62 #include <scsi/scsi_eh.h> 62 #include <scsi/scsi_eh.h>
63 #include <scsi/scsi_host.h> 63 #include <scsi/scsi_host.h>
64 #include <scsi/scsi_ioctl.h> 64 #include <scsi/scsi_ioctl.h>
65 #include <scsi/scsicam.h> 65 #include <scsi/scsicam.h>
66 66
67 #include "sd.h" 67 #include "sd.h"
68 #include "scsi_logging.h" 68 #include "scsi_logging.h"
69 69
70 MODULE_AUTHOR("Eric Youngdale"); 70 MODULE_AUTHOR("Eric Youngdale");
71 MODULE_DESCRIPTION("SCSI disk (sd) driver"); 71 MODULE_DESCRIPTION("SCSI disk (sd) driver");
72 MODULE_LICENSE("GPL"); 72 MODULE_LICENSE("GPL");
73 73
74 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR); 74 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR);
75 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR); 75 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR);
76 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR); 76 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR);
77 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR); 77 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR);
78 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR); 78 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR);
79 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR); 79 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR);
80 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR); 80 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR);
81 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR); 81 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR);
82 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR); 82 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR);
83 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR); 83 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR);
84 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR); 84 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR);
85 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR); 85 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR);
86 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR); 86 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR);
87 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR); 87 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR);
88 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); 88 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR);
89 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); 89 MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR);
90 MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); 90 MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK);
91 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); 91 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
92 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); 92 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
93 93
94 #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) 94 #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
95 #define SD_MINORS 16 95 #define SD_MINORS 16
96 #else 96 #else
97 #define SD_MINORS 0 97 #define SD_MINORS 0
98 #endif 98 #endif
99 99
100 static int sd_revalidate_disk(struct gendisk *); 100 static int sd_revalidate_disk(struct gendisk *);
101 static void sd_unlock_native_capacity(struct gendisk *disk); 101 static void sd_unlock_native_capacity(struct gendisk *disk);
102 static int sd_probe(struct device *); 102 static int sd_probe(struct device *);
103 static int sd_remove(struct device *); 103 static int sd_remove(struct device *);
104 static void sd_shutdown(struct device *); 104 static void sd_shutdown(struct device *);
105 static int sd_suspend(struct device *, pm_message_t state); 105 static int sd_suspend(struct device *, pm_message_t state);
106 static int sd_resume(struct device *); 106 static int sd_resume(struct device *);
107 static void sd_rescan(struct device *); 107 static void sd_rescan(struct device *);
108 static int sd_done(struct scsi_cmnd *); 108 static int sd_done(struct scsi_cmnd *);
109 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); 109 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
110 static void scsi_disk_release(struct device *cdev); 110 static void scsi_disk_release(struct device *cdev);
111 static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *); 111 static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *);
112 static void sd_print_result(struct scsi_disk *, int); 112 static void sd_print_result(struct scsi_disk *, int);
113 113
114 static DEFINE_SPINLOCK(sd_index_lock); 114 static DEFINE_SPINLOCK(sd_index_lock);
115 static DEFINE_IDA(sd_index_ida); 115 static DEFINE_IDA(sd_index_ida);
116 116
117 /* This semaphore is used to mediate the 0->1 reference get in the 117 /* This semaphore is used to mediate the 0->1 reference get in the
118 * face of object destruction (i.e. we can't allow a get on an 118 * face of object destruction (i.e. we can't allow a get on an
119 * object after last put) */ 119 * object after last put) */
120 static DEFINE_MUTEX(sd_ref_mutex); 120 static DEFINE_MUTEX(sd_ref_mutex);
121 121
122 static struct kmem_cache *sd_cdb_cache; 122 static struct kmem_cache *sd_cdb_cache;
123 static mempool_t *sd_cdb_pool; 123 static mempool_t *sd_cdb_pool;
124 124
125 static const char *sd_cache_types[] = { 125 static const char *sd_cache_types[] = {
126 "write through", "none", "write back", 126 "write through", "none", "write back",
127 "write back, no read (daft)" 127 "write back, no read (daft)"
128 }; 128 };
129 129
130 static ssize_t 130 static ssize_t
131 sd_store_cache_type(struct device *dev, struct device_attribute *attr, 131 sd_store_cache_type(struct device *dev, struct device_attribute *attr,
132 const char *buf, size_t count) 132 const char *buf, size_t count)
133 { 133 {
134 int i, ct = -1, rcd, wce, sp; 134 int i, ct = -1, rcd, wce, sp;
135 struct scsi_disk *sdkp = to_scsi_disk(dev); 135 struct scsi_disk *sdkp = to_scsi_disk(dev);
136 struct scsi_device *sdp = sdkp->device; 136 struct scsi_device *sdp = sdkp->device;
137 char buffer[64]; 137 char buffer[64];
138 char *buffer_data; 138 char *buffer_data;
139 struct scsi_mode_data data; 139 struct scsi_mode_data data;
140 struct scsi_sense_hdr sshdr; 140 struct scsi_sense_hdr sshdr;
141 int len; 141 int len;
142 142
143 if (sdp->type != TYPE_DISK) 143 if (sdp->type != TYPE_DISK)
144 /* no cache control on RBC devices; theoretically they 144 /* no cache control on RBC devices; theoretically they
145 * can do it, but there's probably so many exceptions 145 * can do it, but there's probably so many exceptions
146 * it's not worth the risk */ 146 * it's not worth the risk */
147 return -EINVAL; 147 return -EINVAL;
148 148
149 for (i = 0; i < ARRAY_SIZE(sd_cache_types); i++) { 149 for (i = 0; i < ARRAY_SIZE(sd_cache_types); i++) {
150 len = strlen(sd_cache_types[i]); 150 len = strlen(sd_cache_types[i]);
151 if (strncmp(sd_cache_types[i], buf, len) == 0 && 151 if (strncmp(sd_cache_types[i], buf, len) == 0 &&
152 buf[len] == '\n') { 152 buf[len] == '\n') {
153 ct = i; 153 ct = i;
154 break; 154 break;
155 } 155 }
156 } 156 }
157 if (ct < 0) 157 if (ct < 0)
158 return -EINVAL; 158 return -EINVAL;
159 rcd = ct & 0x01 ? 1 : 0; 159 rcd = ct & 0x01 ? 1 : 0;
160 wce = ct & 0x02 ? 1 : 0; 160 wce = ct & 0x02 ? 1 : 0;
161 if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT, 161 if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT,
162 SD_MAX_RETRIES, &data, NULL)) 162 SD_MAX_RETRIES, &data, NULL))
163 return -EINVAL; 163 return -EINVAL;
164 len = min_t(size_t, sizeof(buffer), data.length - data.header_length - 164 len = min_t(size_t, sizeof(buffer), data.length - data.header_length -
165 data.block_descriptor_length); 165 data.block_descriptor_length);
166 buffer_data = buffer + data.header_length + 166 buffer_data = buffer + data.header_length +
167 data.block_descriptor_length; 167 data.block_descriptor_length;
168 buffer_data[2] &= ~0x05; 168 buffer_data[2] &= ~0x05;
169 buffer_data[2] |= wce << 2 | rcd; 169 buffer_data[2] |= wce << 2 | rcd;
170 sp = buffer_data[0] & 0x80 ? 1 : 0; 170 sp = buffer_data[0] & 0x80 ? 1 : 0;
171 171
172 if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, 172 if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT,
173 SD_MAX_RETRIES, &data, &sshdr)) { 173 SD_MAX_RETRIES, &data, &sshdr)) {
174 if (scsi_sense_valid(&sshdr)) 174 if (scsi_sense_valid(&sshdr))
175 sd_print_sense_hdr(sdkp, &sshdr); 175 sd_print_sense_hdr(sdkp, &sshdr);
176 return -EINVAL; 176 return -EINVAL;
177 } 177 }
178 revalidate_disk(sdkp->disk); 178 revalidate_disk(sdkp->disk);
179 return count; 179 return count;
180 } 180 }
181 181
182 static ssize_t 182 static ssize_t
183 sd_store_manage_start_stop(struct device *dev, struct device_attribute *attr, 183 sd_store_manage_start_stop(struct device *dev, struct device_attribute *attr,
184 const char *buf, size_t count) 184 const char *buf, size_t count)
185 { 185 {
186 struct scsi_disk *sdkp = to_scsi_disk(dev); 186 struct scsi_disk *sdkp = to_scsi_disk(dev);
187 struct scsi_device *sdp = sdkp->device; 187 struct scsi_device *sdp = sdkp->device;
188 188
189 if (!capable(CAP_SYS_ADMIN)) 189 if (!capable(CAP_SYS_ADMIN))
190 return -EACCES; 190 return -EACCES;
191 191
192 sdp->manage_start_stop = simple_strtoul(buf, NULL, 10); 192 sdp->manage_start_stop = simple_strtoul(buf, NULL, 10);
193 193
194 return count; 194 return count;
195 } 195 }
196 196
197 static ssize_t 197 static ssize_t
198 sd_store_allow_restart(struct device *dev, struct device_attribute *attr, 198 sd_store_allow_restart(struct device *dev, struct device_attribute *attr,
199 const char *buf, size_t count) 199 const char *buf, size_t count)
200 { 200 {
201 struct scsi_disk *sdkp = to_scsi_disk(dev); 201 struct scsi_disk *sdkp = to_scsi_disk(dev);
202 struct scsi_device *sdp = sdkp->device; 202 struct scsi_device *sdp = sdkp->device;
203 203
204 if (!capable(CAP_SYS_ADMIN)) 204 if (!capable(CAP_SYS_ADMIN))
205 return -EACCES; 205 return -EACCES;
206 206
207 if (sdp->type != TYPE_DISK) 207 if (sdp->type != TYPE_DISK)
208 return -EINVAL; 208 return -EINVAL;
209 209
210 sdp->allow_restart = simple_strtoul(buf, NULL, 10); 210 sdp->allow_restart = simple_strtoul(buf, NULL, 10);
211 211
212 return count; 212 return count;
213 } 213 }
214 214
215 static ssize_t 215 static ssize_t
216 sd_show_cache_type(struct device *dev, struct device_attribute *attr, 216 sd_show_cache_type(struct device *dev, struct device_attribute *attr,
217 char *buf) 217 char *buf)
218 { 218 {
219 struct scsi_disk *sdkp = to_scsi_disk(dev); 219 struct scsi_disk *sdkp = to_scsi_disk(dev);
220 int ct = sdkp->RCD + 2*sdkp->WCE; 220 int ct = sdkp->RCD + 2*sdkp->WCE;
221 221
222 return snprintf(buf, 40, "%s\n", sd_cache_types[ct]); 222 return snprintf(buf, 40, "%s\n", sd_cache_types[ct]);
223 } 223 }
224 224
225 static ssize_t 225 static ssize_t
226 sd_show_fua(struct device *dev, struct device_attribute *attr, char *buf) 226 sd_show_fua(struct device *dev, struct device_attribute *attr, char *buf)
227 { 227 {
228 struct scsi_disk *sdkp = to_scsi_disk(dev); 228 struct scsi_disk *sdkp = to_scsi_disk(dev);
229 229
230 return snprintf(buf, 20, "%u\n", sdkp->DPOFUA); 230 return snprintf(buf, 20, "%u\n", sdkp->DPOFUA);
231 } 231 }
232 232
233 static ssize_t 233 static ssize_t
234 sd_show_manage_start_stop(struct device *dev, struct device_attribute *attr, 234 sd_show_manage_start_stop(struct device *dev, struct device_attribute *attr,
235 char *buf) 235 char *buf)
236 { 236 {
237 struct scsi_disk *sdkp = to_scsi_disk(dev); 237 struct scsi_disk *sdkp = to_scsi_disk(dev);
238 struct scsi_device *sdp = sdkp->device; 238 struct scsi_device *sdp = sdkp->device;
239 239
240 return snprintf(buf, 20, "%u\n", sdp->manage_start_stop); 240 return snprintf(buf, 20, "%u\n", sdp->manage_start_stop);
241 } 241 }
242 242
243 static ssize_t 243 static ssize_t
244 sd_show_allow_restart(struct device *dev, struct device_attribute *attr, 244 sd_show_allow_restart(struct device *dev, struct device_attribute *attr,
245 char *buf) 245 char *buf)
246 { 246 {
247 struct scsi_disk *sdkp = to_scsi_disk(dev); 247 struct scsi_disk *sdkp = to_scsi_disk(dev);
248 248
249 return snprintf(buf, 40, "%d\n", sdkp->device->allow_restart); 249 return snprintf(buf, 40, "%d\n", sdkp->device->allow_restart);
250 } 250 }
251 251
252 static ssize_t 252 static ssize_t
253 sd_show_protection_type(struct device *dev, struct device_attribute *attr, 253 sd_show_protection_type(struct device *dev, struct device_attribute *attr,
254 char *buf) 254 char *buf)
255 { 255 {
256 struct scsi_disk *sdkp = to_scsi_disk(dev); 256 struct scsi_disk *sdkp = to_scsi_disk(dev);
257 257
258 return snprintf(buf, 20, "%u\n", sdkp->protection_type); 258 return snprintf(buf, 20, "%u\n", sdkp->protection_type);
259 } 259 }
260 260
261 static ssize_t 261 static ssize_t
262 sd_show_app_tag_own(struct device *dev, struct device_attribute *attr, 262 sd_show_app_tag_own(struct device *dev, struct device_attribute *attr,
263 char *buf) 263 char *buf)
264 { 264 {
265 struct scsi_disk *sdkp = to_scsi_disk(dev); 265 struct scsi_disk *sdkp = to_scsi_disk(dev);
266 266
267 return snprintf(buf, 20, "%u\n", sdkp->ATO); 267 return snprintf(buf, 20, "%u\n", sdkp->ATO);
268 } 268 }
269 269
270 static ssize_t 270 static ssize_t
271 sd_show_thin_provisioning(struct device *dev, struct device_attribute *attr, 271 sd_show_thin_provisioning(struct device *dev, struct device_attribute *attr,
272 char *buf) 272 char *buf)
273 { 273 {
274 struct scsi_disk *sdkp = to_scsi_disk(dev); 274 struct scsi_disk *sdkp = to_scsi_disk(dev);
275 275
276 return snprintf(buf, 20, "%u\n", sdkp->thin_provisioning); 276 return snprintf(buf, 20, "%u\n", sdkp->thin_provisioning);
277 } 277 }
278 278
279 static struct device_attribute sd_disk_attrs[] = { 279 static struct device_attribute sd_disk_attrs[] = {
280 __ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type, 280 __ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type,
281 sd_store_cache_type), 281 sd_store_cache_type),
282 __ATTR(FUA, S_IRUGO, sd_show_fua, NULL), 282 __ATTR(FUA, S_IRUGO, sd_show_fua, NULL),
283 __ATTR(allow_restart, S_IRUGO|S_IWUSR, sd_show_allow_restart, 283 __ATTR(allow_restart, S_IRUGO|S_IWUSR, sd_show_allow_restart,
284 sd_store_allow_restart), 284 sd_store_allow_restart),
285 __ATTR(manage_start_stop, S_IRUGO|S_IWUSR, sd_show_manage_start_stop, 285 __ATTR(manage_start_stop, S_IRUGO|S_IWUSR, sd_show_manage_start_stop,
286 sd_store_manage_start_stop), 286 sd_store_manage_start_stop),
287 __ATTR(protection_type, S_IRUGO, sd_show_protection_type, NULL), 287 __ATTR(protection_type, S_IRUGO, sd_show_protection_type, NULL),
288 __ATTR(app_tag_own, S_IRUGO, sd_show_app_tag_own, NULL), 288 __ATTR(app_tag_own, S_IRUGO, sd_show_app_tag_own, NULL),
289 __ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL), 289 __ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL),
290 __ATTR_NULL, 290 __ATTR_NULL,
291 }; 291 };
292 292
293 static struct class sd_disk_class = { 293 static struct class sd_disk_class = {
294 .name = "scsi_disk", 294 .name = "scsi_disk",
295 .owner = THIS_MODULE, 295 .owner = THIS_MODULE,
296 .dev_release = scsi_disk_release, 296 .dev_release = scsi_disk_release,
297 .dev_attrs = sd_disk_attrs, 297 .dev_attrs = sd_disk_attrs,
298 }; 298 };
299 299
300 static struct scsi_driver sd_template = { 300 static struct scsi_driver sd_template = {
301 .owner = THIS_MODULE, 301 .owner = THIS_MODULE,
302 .gendrv = { 302 .gendrv = {
303 .name = "sd", 303 .name = "sd",
304 .probe = sd_probe, 304 .probe = sd_probe,
305 .remove = sd_remove, 305 .remove = sd_remove,
306 .suspend = sd_suspend, 306 .suspend = sd_suspend,
307 .resume = sd_resume, 307 .resume = sd_resume,
308 .shutdown = sd_shutdown, 308 .shutdown = sd_shutdown,
309 }, 309 },
310 .rescan = sd_rescan, 310 .rescan = sd_rescan,
311 .done = sd_done, 311 .done = sd_done,
312 }; 312 };
313 313
314 /* 314 /*
315 * Device no to disk mapping: 315 * Device no to disk mapping:
316 * 316 *
317 * major disc2 disc p1 317 * major disc2 disc p1
318 * |............|.............|....|....| <- dev_t 318 * |............|.............|....|....| <- dev_t
319 * 31 20 19 8 7 4 3 0 319 * 31 20 19 8 7 4 3 0
320 * 320 *
321 * Inside a major, we have 16k disks, however mapped non- 321 * Inside a major, we have 16k disks, however mapped non-
322 * contiguously. The first 16 disks are for major0, the next 322 * contiguously. The first 16 disks are for major0, the next
323 * ones with major1, ... Disk 256 is for major0 again, disk 272 323 * ones with major1, ... Disk 256 is for major0 again, disk 272
324 * for major1, ... 324 * for major1, ...
325 * As we stay compatible with our numbering scheme, we can reuse 325 * As we stay compatible with our numbering scheme, we can reuse
326 * the well-know SCSI majors 8, 65--71, 136--143. 326 * the well-know SCSI majors 8, 65--71, 136--143.
327 */ 327 */
328 static int sd_major(int major_idx) 328 static int sd_major(int major_idx)
329 { 329 {
330 switch (major_idx) { 330 switch (major_idx) {
331 case 0: 331 case 0:
332 return SCSI_DISK0_MAJOR; 332 return SCSI_DISK0_MAJOR;
333 case 1 ... 7: 333 case 1 ... 7:
334 return SCSI_DISK1_MAJOR + major_idx - 1; 334 return SCSI_DISK1_MAJOR + major_idx - 1;
335 case 8 ... 15: 335 case 8 ... 15:
336 return SCSI_DISK8_MAJOR + major_idx - 8; 336 return SCSI_DISK8_MAJOR + major_idx - 8;
337 default: 337 default:
338 BUG(); 338 BUG();
339 return 0; /* shut up gcc */ 339 return 0; /* shut up gcc */
340 } 340 }
341 } 341 }
342 342
343 static struct scsi_disk *__scsi_disk_get(struct gendisk *disk) 343 static struct scsi_disk *__scsi_disk_get(struct gendisk *disk)
344 { 344 {
345 struct scsi_disk *sdkp = NULL; 345 struct scsi_disk *sdkp = NULL;
346 346
347 if (disk->private_data) { 347 if (disk->private_data) {
348 sdkp = scsi_disk(disk); 348 sdkp = scsi_disk(disk);
349 if (scsi_device_get(sdkp->device) == 0) 349 if (scsi_device_get(sdkp->device) == 0)
350 get_device(&sdkp->dev); 350 get_device(&sdkp->dev);
351 else 351 else
352 sdkp = NULL; 352 sdkp = NULL;
353 } 353 }
354 return sdkp; 354 return sdkp;
355 } 355 }
356 356
357 static struct scsi_disk *scsi_disk_get(struct gendisk *disk) 357 static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
358 { 358 {
359 struct scsi_disk *sdkp; 359 struct scsi_disk *sdkp;
360 360
361 mutex_lock(&sd_ref_mutex); 361 mutex_lock(&sd_ref_mutex);
362 sdkp = __scsi_disk_get(disk); 362 sdkp = __scsi_disk_get(disk);
363 mutex_unlock(&sd_ref_mutex); 363 mutex_unlock(&sd_ref_mutex);
364 return sdkp; 364 return sdkp;
365 } 365 }
366 366
367 static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev) 367 static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev)
368 { 368 {
369 struct scsi_disk *sdkp; 369 struct scsi_disk *sdkp;
370 370
371 mutex_lock(&sd_ref_mutex); 371 mutex_lock(&sd_ref_mutex);
372 sdkp = dev_get_drvdata(dev); 372 sdkp = dev_get_drvdata(dev);
373 if (sdkp) 373 if (sdkp)
374 sdkp = __scsi_disk_get(sdkp->disk); 374 sdkp = __scsi_disk_get(sdkp->disk);
375 mutex_unlock(&sd_ref_mutex); 375 mutex_unlock(&sd_ref_mutex);
376 return sdkp; 376 return sdkp;
377 } 377 }
378 378
379 static void scsi_disk_put(struct scsi_disk *sdkp) 379 static void scsi_disk_put(struct scsi_disk *sdkp)
380 { 380 {
381 struct scsi_device *sdev = sdkp->device; 381 struct scsi_device *sdev = sdkp->device;
382 382
383 mutex_lock(&sd_ref_mutex); 383 mutex_lock(&sd_ref_mutex);
384 put_device(&sdkp->dev); 384 put_device(&sdkp->dev);
385 scsi_device_put(sdev); 385 scsi_device_put(sdev);
386 mutex_unlock(&sd_ref_mutex); 386 mutex_unlock(&sd_ref_mutex);
387 } 387 }
388 388
389 static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif) 389 static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif)
390 { 390 {
391 unsigned int prot_op = SCSI_PROT_NORMAL; 391 unsigned int prot_op = SCSI_PROT_NORMAL;
392 unsigned int dix = scsi_prot_sg_count(scmd); 392 unsigned int dix = scsi_prot_sg_count(scmd);
393 393
394 if (scmd->sc_data_direction == DMA_FROM_DEVICE) { 394 if (scmd->sc_data_direction == DMA_FROM_DEVICE) {
395 if (dif && dix) 395 if (dif && dix)
396 prot_op = SCSI_PROT_READ_PASS; 396 prot_op = SCSI_PROT_READ_PASS;
397 else if (dif && !dix) 397 else if (dif && !dix)
398 prot_op = SCSI_PROT_READ_STRIP; 398 prot_op = SCSI_PROT_READ_STRIP;
399 else if (!dif && dix) 399 else if (!dif && dix)
400 prot_op = SCSI_PROT_READ_INSERT; 400 prot_op = SCSI_PROT_READ_INSERT;
401 } else { 401 } else {
402 if (dif && dix) 402 if (dif && dix)
403 prot_op = SCSI_PROT_WRITE_PASS; 403 prot_op = SCSI_PROT_WRITE_PASS;
404 else if (dif && !dix) 404 else if (dif && !dix)
405 prot_op = SCSI_PROT_WRITE_INSERT; 405 prot_op = SCSI_PROT_WRITE_INSERT;
406 else if (!dif && dix) 406 else if (!dif && dix)
407 prot_op = SCSI_PROT_WRITE_STRIP; 407 prot_op = SCSI_PROT_WRITE_STRIP;
408 } 408 }
409 409
410 scsi_set_prot_op(scmd, prot_op); 410 scsi_set_prot_op(scmd, prot_op);
411 scsi_set_prot_type(scmd, dif); 411 scsi_set_prot_type(scmd, dif);
412 } 412 }
413 413
414 /** 414 /**
415 * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device 415 * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device
416 * @sdp: scsi device to operate one 416 * @sdp: scsi device to operate one
417 * @rq: Request to prepare 417 * @rq: Request to prepare
418 * 418 *
419 * Will issue either UNMAP or WRITE SAME(16) depending on preference 419 * Will issue either UNMAP or WRITE SAME(16) depending on preference
420 * indicated by target device. 420 * indicated by target device.
421 **/ 421 **/
422 static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) 422 static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
423 { 423 {
424 struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); 424 struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
425 struct bio *bio = rq->bio; 425 struct bio *bio = rq->bio;
426 sector_t sector = bio->bi_sector; 426 sector_t sector = bio->bi_sector;
427 unsigned int nr_sectors = bio_sectors(bio); 427 unsigned int nr_sectors = bio_sectors(bio);
428 unsigned int len; 428 unsigned int len;
429 int ret; 429 int ret;
430 struct page *page; 430 struct page *page;
431 431
432 if (sdkp->device->sector_size == 4096) { 432 if (sdkp->device->sector_size == 4096) {
433 sector >>= 3; 433 sector >>= 3;
434 nr_sectors >>= 3; 434 nr_sectors >>= 3;
435 } 435 }
436 436
437 rq->timeout = SD_TIMEOUT; 437 rq->timeout = SD_TIMEOUT;
438 438
439 memset(rq->cmd, 0, rq->cmd_len); 439 memset(rq->cmd, 0, rq->cmd_len);
440 440
441 page = alloc_page(GFP_ATOMIC | __GFP_ZERO); 441 page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
442 if (!page) 442 if (!page)
443 return BLKPREP_DEFER; 443 return BLKPREP_DEFER;
444 444
445 if (sdkp->unmap) { 445 if (sdkp->unmap) {
446 char *buf = page_address(page); 446 char *buf = page_address(page);
447 447
448 rq->cmd_len = 10; 448 rq->cmd_len = 10;
449 rq->cmd[0] = UNMAP; 449 rq->cmd[0] = UNMAP;
450 rq->cmd[8] = 24; 450 rq->cmd[8] = 24;
451 451
452 put_unaligned_be16(6 + 16, &buf[0]); 452 put_unaligned_be16(6 + 16, &buf[0]);
453 put_unaligned_be16(16, &buf[2]); 453 put_unaligned_be16(16, &buf[2]);
454 put_unaligned_be64(sector, &buf[8]); 454 put_unaligned_be64(sector, &buf[8]);
455 put_unaligned_be32(nr_sectors, &buf[16]); 455 put_unaligned_be32(nr_sectors, &buf[16]);
456 456
457 len = 24; 457 len = 24;
458 } else { 458 } else {
459 rq->cmd_len = 16; 459 rq->cmd_len = 16;
460 rq->cmd[0] = WRITE_SAME_16; 460 rq->cmd[0] = WRITE_SAME_16;
461 rq->cmd[1] = 0x8; /* UNMAP */ 461 rq->cmd[1] = 0x8; /* UNMAP */
462 put_unaligned_be64(sector, &rq->cmd[2]); 462 put_unaligned_be64(sector, &rq->cmd[2]);
463 put_unaligned_be32(nr_sectors, &rq->cmd[10]); 463 put_unaligned_be32(nr_sectors, &rq->cmd[10]);
464 464
465 len = sdkp->device->sector_size; 465 len = sdkp->device->sector_size;
466 } 466 }
467 467
468 blk_add_request_payload(rq, page, len); 468 blk_add_request_payload(rq, page, len);
469 ret = scsi_setup_blk_pc_cmnd(sdp, rq); 469 ret = scsi_setup_blk_pc_cmnd(sdp, rq);
470 rq->buffer = page_address(page); 470 rq->buffer = page_address(page);
471 if (ret != BLKPREP_OK) { 471 if (ret != BLKPREP_OK) {
472 __free_page(page); 472 __free_page(page);
473 rq->buffer = NULL; 473 rq->buffer = NULL;
474 } 474 }
475 return ret; 475 return ret;
476 } 476 }
477 477
478 static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) 478 static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
479 { 479 {
480 rq->timeout = SD_TIMEOUT; 480 rq->timeout = SD_TIMEOUT;
481 rq->retries = SD_MAX_RETRIES; 481 rq->retries = SD_MAX_RETRIES;
482 rq->cmd[0] = SYNCHRONIZE_CACHE; 482 rq->cmd[0] = SYNCHRONIZE_CACHE;
483 rq->cmd_len = 10; 483 rq->cmd_len = 10;
484 484
485 return scsi_setup_blk_pc_cmnd(sdp, rq); 485 return scsi_setup_blk_pc_cmnd(sdp, rq);
486 } 486 }
487 487
488 static void sd_unprep_fn(struct request_queue *q, struct request *rq) 488 static void sd_unprep_fn(struct request_queue *q, struct request *rq)
489 { 489 {
490 if (rq->cmd_flags & REQ_DISCARD) { 490 if (rq->cmd_flags & REQ_DISCARD) {
491 free_page((unsigned long)rq->buffer); 491 free_page((unsigned long)rq->buffer);
492 rq->buffer = NULL; 492 rq->buffer = NULL;
493 } 493 }
494 } 494 }
495 495
496 /** 496 /**
497 * sd_init_command - build a scsi (read or write) command from 497 * sd_init_command - build a scsi (read or write) command from
498 * information in the request structure. 498 * information in the request structure.
499 * @SCpnt: pointer to mid-level's per scsi command structure that 499 * @SCpnt: pointer to mid-level's per scsi command structure that
500 * contains request and into which the scsi command is written 500 * contains request and into which the scsi command is written
501 * 501 *
502 * Returns 1 if successful and 0 if error (or cannot be done now). 502 * Returns 1 if successful and 0 if error (or cannot be done now).
503 **/ 503 **/
504 static int sd_prep_fn(struct request_queue *q, struct request *rq) 504 static int sd_prep_fn(struct request_queue *q, struct request *rq)
505 { 505 {
506 struct scsi_cmnd *SCpnt; 506 struct scsi_cmnd *SCpnt;
507 struct scsi_device *sdp = q->queuedata; 507 struct scsi_device *sdp = q->queuedata;
508 struct gendisk *disk = rq->rq_disk; 508 struct gendisk *disk = rq->rq_disk;
509 struct scsi_disk *sdkp; 509 struct scsi_disk *sdkp;
510 sector_t block = blk_rq_pos(rq); 510 sector_t block = blk_rq_pos(rq);
511 sector_t threshold; 511 sector_t threshold;
512 unsigned int this_count = blk_rq_sectors(rq); 512 unsigned int this_count = blk_rq_sectors(rq);
513 int ret, host_dif; 513 int ret, host_dif;
514 unsigned char protect; 514 unsigned char protect;
515 515
516 /* 516 /*
517 * Discard request come in as REQ_TYPE_FS but we turn them into 517 * Discard request come in as REQ_TYPE_FS but we turn them into
518 * block PC requests to make life easier. 518 * block PC requests to make life easier.
519 */ 519 */
520 if (rq->cmd_flags & REQ_DISCARD) { 520 if (rq->cmd_flags & REQ_DISCARD) {
521 ret = scsi_setup_discard_cmnd(sdp, rq); 521 ret = scsi_setup_discard_cmnd(sdp, rq);
522 goto out; 522 goto out;
523 } else if (rq->cmd_flags & REQ_FLUSH) { 523 } else if (rq->cmd_flags & REQ_FLUSH) {
524 ret = scsi_setup_flush_cmnd(sdp, rq); 524 ret = scsi_setup_flush_cmnd(sdp, rq);
525 goto out; 525 goto out;
526 } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 526 } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
527 ret = scsi_setup_blk_pc_cmnd(sdp, rq); 527 ret = scsi_setup_blk_pc_cmnd(sdp, rq);
528 goto out; 528 goto out;
529 } else if (rq->cmd_type != REQ_TYPE_FS) { 529 } else if (rq->cmd_type != REQ_TYPE_FS) {
530 ret = BLKPREP_KILL; 530 ret = BLKPREP_KILL;
531 goto out; 531 goto out;
532 } 532 }
533 ret = scsi_setup_fs_cmnd(sdp, rq); 533 ret = scsi_setup_fs_cmnd(sdp, rq);
534 if (ret != BLKPREP_OK) 534 if (ret != BLKPREP_OK)
535 goto out; 535 goto out;
536 SCpnt = rq->special; 536 SCpnt = rq->special;
537 sdkp = scsi_disk(disk); 537 sdkp = scsi_disk(disk);
538 538
539 /* from here on until we're complete, any goto out 539 /* from here on until we're complete, any goto out
540 * is used for a killable error condition */ 540 * is used for a killable error condition */
541 ret = BLKPREP_KILL; 541 ret = BLKPREP_KILL;
542 542
543 SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, 543 SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt,
544 "sd_init_command: block=%llu, " 544 "sd_init_command: block=%llu, "
545 "count=%d\n", 545 "count=%d\n",
546 (unsigned long long)block, 546 (unsigned long long)block,
547 this_count)); 547 this_count));
548 548
549 if (!sdp || !scsi_device_online(sdp) || 549 if (!sdp || !scsi_device_online(sdp) ||
550 block + blk_rq_sectors(rq) > get_capacity(disk)) { 550 block + blk_rq_sectors(rq) > get_capacity(disk)) {
551 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, 551 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
552 "Finishing %u sectors\n", 552 "Finishing %u sectors\n",
553 blk_rq_sectors(rq))); 553 blk_rq_sectors(rq)));
554 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, 554 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
555 "Retry with 0x%p\n", SCpnt)); 555 "Retry with 0x%p\n", SCpnt));
556 goto out; 556 goto out;
557 } 557 }
558 558
559 if (sdp->changed) { 559 if (sdp->changed) {
560 /* 560 /*
561 * quietly refuse to do anything to a changed disc until 561 * quietly refuse to do anything to a changed disc until
562 * the changed bit has been reset 562 * the changed bit has been reset
563 */ 563 */
564 /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */ 564 /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
565 goto out; 565 goto out;
566 } 566 }
567 567
568 /* 568 /*
569 * Some SD card readers can't handle multi-sector accesses which touch 569 * Some SD card readers can't handle multi-sector accesses which touch
570 * the last one or two hardware sectors. Split accesses as needed. 570 * the last one or two hardware sectors. Split accesses as needed.
571 */ 571 */
572 threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS * 572 threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
573 (sdp->sector_size / 512); 573 (sdp->sector_size / 512);
574 574
575 if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) { 575 if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
576 if (block < threshold) { 576 if (block < threshold) {
577 /* Access up to the threshold but not beyond */ 577 /* Access up to the threshold but not beyond */
578 this_count = threshold - block; 578 this_count = threshold - block;
579 } else { 579 } else {
580 /* Access only a single hardware sector */ 580 /* Access only a single hardware sector */
581 this_count = sdp->sector_size / 512; 581 this_count = sdp->sector_size / 512;
582 } 582 }
583 } 583 }
584 584
585 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n", 585 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
586 (unsigned long long)block)); 586 (unsigned long long)block));
587 587
588 /* 588 /*
589 * If we have a 1K hardware sectorsize, prevent access to single 589 * If we have a 1K hardware sectorsize, prevent access to single
590 * 512 byte sectors. In theory we could handle this - in fact 590 * 512 byte sectors. In theory we could handle this - in fact
591 * the scsi cdrom driver must be able to handle this because 591 * the scsi cdrom driver must be able to handle this because
592 * we typically use 1K blocksizes, and cdroms typically have 592 * we typically use 1K blocksizes, and cdroms typically have
593 * 2K hardware sectorsizes. Of course, things are simpler 593 * 2K hardware sectorsizes. Of course, things are simpler
594 * with the cdrom, since it is read-only. For performance 594 * with the cdrom, since it is read-only. For performance
595 * reasons, the filesystems should be able to handle this 595 * reasons, the filesystems should be able to handle this
596 * and not force the scsi disk driver to use bounce buffers 596 * and not force the scsi disk driver to use bounce buffers
597 * for this. 597 * for this.
598 */ 598 */
599 if (sdp->sector_size == 1024) { 599 if (sdp->sector_size == 1024) {
600 if ((block & 1) || (blk_rq_sectors(rq) & 1)) { 600 if ((block & 1) || (blk_rq_sectors(rq) & 1)) {
601 scmd_printk(KERN_ERR, SCpnt, 601 scmd_printk(KERN_ERR, SCpnt,
602 "Bad block number requested\n"); 602 "Bad block number requested\n");
603 goto out; 603 goto out;
604 } else { 604 } else {
605 block = block >> 1; 605 block = block >> 1;
606 this_count = this_count >> 1; 606 this_count = this_count >> 1;
607 } 607 }
608 } 608 }
609 if (sdp->sector_size == 2048) { 609 if (sdp->sector_size == 2048) {
610 if ((block & 3) || (blk_rq_sectors(rq) & 3)) { 610 if ((block & 3) || (blk_rq_sectors(rq) & 3)) {
611 scmd_printk(KERN_ERR, SCpnt, 611 scmd_printk(KERN_ERR, SCpnt,
612 "Bad block number requested\n"); 612 "Bad block number requested\n");
613 goto out; 613 goto out;
614 } else { 614 } else {
615 block = block >> 2; 615 block = block >> 2;
616 this_count = this_count >> 2; 616 this_count = this_count >> 2;
617 } 617 }
618 } 618 }
619 if (sdp->sector_size == 4096) { 619 if (sdp->sector_size == 4096) {
620 if ((block & 7) || (blk_rq_sectors(rq) & 7)) { 620 if ((block & 7) || (blk_rq_sectors(rq) & 7)) {
621 scmd_printk(KERN_ERR, SCpnt, 621 scmd_printk(KERN_ERR, SCpnt,
622 "Bad block number requested\n"); 622 "Bad block number requested\n");
623 goto out; 623 goto out;
624 } else { 624 } else {
625 block = block >> 3; 625 block = block >> 3;
626 this_count = this_count >> 3; 626 this_count = this_count >> 3;
627 } 627 }
628 } 628 }
629 if (rq_data_dir(rq) == WRITE) { 629 if (rq_data_dir(rq) == WRITE) {
630 if (!sdp->writeable) { 630 if (!sdp->writeable) {
631 goto out; 631 goto out;
632 } 632 }
633 SCpnt->cmnd[0] = WRITE_6; 633 SCpnt->cmnd[0] = WRITE_6;
634 SCpnt->sc_data_direction = DMA_TO_DEVICE; 634 SCpnt->sc_data_direction = DMA_TO_DEVICE;
635 635
636 if (blk_integrity_rq(rq) && 636 if (blk_integrity_rq(rq) &&
637 sd_dif_prepare(rq, block, sdp->sector_size) == -EIO) 637 sd_dif_prepare(rq, block, sdp->sector_size) == -EIO)
638 goto out; 638 goto out;
639 639
640 } else if (rq_data_dir(rq) == READ) { 640 } else if (rq_data_dir(rq) == READ) {
641 SCpnt->cmnd[0] = READ_6; 641 SCpnt->cmnd[0] = READ_6;
642 SCpnt->sc_data_direction = DMA_FROM_DEVICE; 642 SCpnt->sc_data_direction = DMA_FROM_DEVICE;
643 } else { 643 } else {
644 scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags); 644 scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags);
645 goto out; 645 goto out;
646 } 646 }
647 647
648 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, 648 SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
649 "%s %d/%u 512 byte blocks.\n", 649 "%s %d/%u 512 byte blocks.\n",
650 (rq_data_dir(rq) == WRITE) ? 650 (rq_data_dir(rq) == WRITE) ?
651 "writing" : "reading", this_count, 651 "writing" : "reading", this_count,
652 blk_rq_sectors(rq))); 652 blk_rq_sectors(rq)));
653 653
654 /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ 654 /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
655 host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); 655 host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
656 if (host_dif) 656 if (host_dif)
657 protect = 1 << 5; 657 protect = 1 << 5;
658 else 658 else
659 protect = 0; 659 protect = 0;
660 660
661 if (host_dif == SD_DIF_TYPE2_PROTECTION) { 661 if (host_dif == SD_DIF_TYPE2_PROTECTION) {
662 SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); 662 SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
663 663
664 if (unlikely(SCpnt->cmnd == NULL)) { 664 if (unlikely(SCpnt->cmnd == NULL)) {
665 ret = BLKPREP_DEFER; 665 ret = BLKPREP_DEFER;
666 goto out; 666 goto out;
667 } 667 }
668 668
669 SCpnt->cmd_len = SD_EXT_CDB_SIZE; 669 SCpnt->cmd_len = SD_EXT_CDB_SIZE;
670 memset(SCpnt->cmnd, 0, SCpnt->cmd_len); 670 memset(SCpnt->cmnd, 0, SCpnt->cmd_len);
671 SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD; 671 SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD;
672 SCpnt->cmnd[7] = 0x18; 672 SCpnt->cmnd[7] = 0x18;
673 SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32; 673 SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
674 SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); 674 SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
675 675
676 /* LBA */ 676 /* LBA */
677 SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; 677 SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
678 SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; 678 SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
679 SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; 679 SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
680 SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; 680 SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0;
681 SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff; 681 SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff;
682 SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff; 682 SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff;
683 SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff; 683 SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff;
684 SCpnt->cmnd[19] = (unsigned char) block & 0xff; 684 SCpnt->cmnd[19] = (unsigned char) block & 0xff;
685 685
686 /* Expected Indirect LBA */ 686 /* Expected Indirect LBA */
687 SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff; 687 SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff;
688 SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff; 688 SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff;
689 SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff; 689 SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff;
690 SCpnt->cmnd[23] = (unsigned char) block & 0xff; 690 SCpnt->cmnd[23] = (unsigned char) block & 0xff;
691 691
692 /* Transfer length */ 692 /* Transfer length */
693 SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff; 693 SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff;
694 SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff; 694 SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff;
695 SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff; 695 SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff;
696 SCpnt->cmnd[31] = (unsigned char) this_count & 0xff; 696 SCpnt->cmnd[31] = (unsigned char) this_count & 0xff;
697 } else if (block > 0xffffffff) { 697 } else if (block > 0xffffffff) {
698 SCpnt->cmnd[0] += READ_16 - READ_6; 698 SCpnt->cmnd[0] += READ_16 - READ_6;
699 SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); 699 SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
700 SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; 700 SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
701 SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; 701 SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
702 SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; 702 SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
703 SCpnt->cmnd[5] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; 703 SCpnt->cmnd[5] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0;
704 SCpnt->cmnd[6] = (unsigned char) (block >> 24) & 0xff; 704 SCpnt->cmnd[6] = (unsigned char) (block >> 24) & 0xff;
705 SCpnt->cmnd[7] = (unsigned char) (block >> 16) & 0xff; 705 SCpnt->cmnd[7] = (unsigned char) (block >> 16) & 0xff;
706 SCpnt->cmnd[8] = (unsigned char) (block >> 8) & 0xff; 706 SCpnt->cmnd[8] = (unsigned char) (block >> 8) & 0xff;
707 SCpnt->cmnd[9] = (unsigned char) block & 0xff; 707 SCpnt->cmnd[9] = (unsigned char) block & 0xff;
708 SCpnt->cmnd[10] = (unsigned char) (this_count >> 24) & 0xff; 708 SCpnt->cmnd[10] = (unsigned char) (this_count >> 24) & 0xff;
709 SCpnt->cmnd[11] = (unsigned char) (this_count >> 16) & 0xff; 709 SCpnt->cmnd[11] = (unsigned char) (this_count >> 16) & 0xff;
710 SCpnt->cmnd[12] = (unsigned char) (this_count >> 8) & 0xff; 710 SCpnt->cmnd[12] = (unsigned char) (this_count >> 8) & 0xff;
711 SCpnt->cmnd[13] = (unsigned char) this_count & 0xff; 711 SCpnt->cmnd[13] = (unsigned char) this_count & 0xff;
712 SCpnt->cmnd[14] = SCpnt->cmnd[15] = 0; 712 SCpnt->cmnd[14] = SCpnt->cmnd[15] = 0;
713 } else if ((this_count > 0xff) || (block > 0x1fffff) || 713 } else if ((this_count > 0xff) || (block > 0x1fffff) ||
714 scsi_device_protection(SCpnt->device) || 714 scsi_device_protection(SCpnt->device) ||
715 SCpnt->device->use_10_for_rw) { 715 SCpnt->device->use_10_for_rw) {
716 if (this_count > 0xffff) 716 if (this_count > 0xffff)
717 this_count = 0xffff; 717 this_count = 0xffff;
718 718
719 SCpnt->cmnd[0] += READ_10 - READ_6; 719 SCpnt->cmnd[0] += READ_10 - READ_6;
720 SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); 720 SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
721 SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; 721 SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
722 SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; 722 SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
723 SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; 723 SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
724 SCpnt->cmnd[5] = (unsigned char) block & 0xff; 724 SCpnt->cmnd[5] = (unsigned char) block & 0xff;
725 SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0; 725 SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0;
726 SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; 726 SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
727 SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; 727 SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
728 } else { 728 } else {
729 if (unlikely(rq->cmd_flags & REQ_FUA)) { 729 if (unlikely(rq->cmd_flags & REQ_FUA)) {
730 /* 730 /*
731 * This happens only if this drive failed 731 * This happens only if this drive failed
732 * 10byte rw command with ILLEGAL_REQUEST 732 * 10byte rw command with ILLEGAL_REQUEST
733 * during operation and thus turned off 733 * during operation and thus turned off
734 * use_10_for_rw. 734 * use_10_for_rw.
735 */ 735 */
736 scmd_printk(KERN_ERR, SCpnt, 736 scmd_printk(KERN_ERR, SCpnt,
737 "FUA write on READ/WRITE(6) drive\n"); 737 "FUA write on READ/WRITE(6) drive\n");
738 goto out; 738 goto out;
739 } 739 }
740 740
741 SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); 741 SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
742 SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff); 742 SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
743 SCpnt->cmnd[3] = (unsigned char) block & 0xff; 743 SCpnt->cmnd[3] = (unsigned char) block & 0xff;
744 SCpnt->cmnd[4] = (unsigned char) this_count; 744 SCpnt->cmnd[4] = (unsigned char) this_count;
745 SCpnt->cmnd[5] = 0; 745 SCpnt->cmnd[5] = 0;
746 } 746 }
747 SCpnt->sdb.length = this_count * sdp->sector_size; 747 SCpnt->sdb.length = this_count * sdp->sector_size;
748 748
749 /* If DIF or DIX is enabled, tell HBA how to handle request */ 749 /* If DIF or DIX is enabled, tell HBA how to handle request */
750 if (host_dif || scsi_prot_sg_count(SCpnt)) 750 if (host_dif || scsi_prot_sg_count(SCpnt))
751 sd_prot_op(SCpnt, host_dif); 751 sd_prot_op(SCpnt, host_dif);
752 752
753 /* 753 /*
754 * We shouldn't disconnect in the middle of a sector, so with a dumb 754 * We shouldn't disconnect in the middle of a sector, so with a dumb
755 * host adapter, it's safe to assume that we can at least transfer 755 * host adapter, it's safe to assume that we can at least transfer
756 * this many bytes between each connect / disconnect. 756 * this many bytes between each connect / disconnect.
757 */ 757 */
758 SCpnt->transfersize = sdp->sector_size; 758 SCpnt->transfersize = sdp->sector_size;
759 SCpnt->underflow = this_count << 9; 759 SCpnt->underflow = this_count << 9;
760 SCpnt->allowed = SD_MAX_RETRIES; 760 SCpnt->allowed = SD_MAX_RETRIES;
761 761
762 /* 762 /*
763 * This indicates that the command is ready from our end to be 763 * This indicates that the command is ready from our end to be
764 * queued. 764 * queued.
765 */ 765 */
766 ret = BLKPREP_OK; 766 ret = BLKPREP_OK;
767 out: 767 out:
768 return scsi_prep_return(q, rq, ret); 768 return scsi_prep_return(q, rq, ret);
769 } 769 }
770 770
771 /** 771 /**
772 * sd_open - open a scsi disk device 772 * sd_open - open a scsi disk device
773 * @inode: only i_rdev member may be used 773 * @inode: only i_rdev member may be used
774 * @filp: only f_mode and f_flags may be used 774 * @filp: only f_mode and f_flags may be used
775 * 775 *
776 * Returns 0 if successful. Returns a negated errno value in case 776 * Returns 0 if successful. Returns a negated errno value in case
777 * of error. 777 * of error.
778 * 778 *
779 * Note: This can be called from a user context (e.g. fsck(1) ) 779 * Note: This can be called from a user context (e.g. fsck(1) )
780 * or from within the kernel (e.g. as a result of a mount(1) ). 780 * or from within the kernel (e.g. as a result of a mount(1) ).
781 * In the latter case @inode and @filp carry an abridged amount 781 * In the latter case @inode and @filp carry an abridged amount
782 * of information as noted above. 782 * of information as noted above.
783 * 783 *
784 * Locking: called with bdev->bd_mutex held. 784 * Locking: called with bdev->bd_mutex held.
785 **/ 785 **/
786 static int sd_open(struct block_device *bdev, fmode_t mode) 786 static int sd_open(struct block_device *bdev, fmode_t mode)
787 { 787 {
788 struct scsi_disk *sdkp = scsi_disk_get(bdev->bd_disk); 788 struct scsi_disk *sdkp = scsi_disk_get(bdev->bd_disk);
789 struct scsi_device *sdev; 789 struct scsi_device *sdev;
790 int retval; 790 int retval;
791 791
792 if (!sdkp) 792 if (!sdkp)
793 return -ENXIO; 793 return -ENXIO;
794 794
795 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); 795 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n"));
796 796
797 sdev = sdkp->device; 797 sdev = sdkp->device;
798 798
799 retval = scsi_autopm_get_device(sdev); 799 retval = scsi_autopm_get_device(sdev);
800 if (retval) 800 if (retval)
801 goto error_autopm; 801 goto error_autopm;
802 802
803 /* 803 /*
804 * If the device is in error recovery, wait until it is done. 804 * If the device is in error recovery, wait until it is done.
805 * If the device is offline, then disallow any access to it. 805 * If the device is offline, then disallow any access to it.
806 */ 806 */
807 retval = -ENXIO; 807 retval = -ENXIO;
808 if (!scsi_block_when_processing_errors(sdev)) 808 if (!scsi_block_when_processing_errors(sdev))
809 goto error_out; 809 goto error_out;
810 810
811 if (sdev->removable || sdkp->write_prot) 811 if (sdev->removable || sdkp->write_prot)
812 check_disk_change(bdev); 812 check_disk_change(bdev);
813 813
814 /* 814 /*
815 * If the drive is empty, just let the open fail. 815 * If the drive is empty, just let the open fail.
816 */ 816 */
817 retval = -ENOMEDIUM; 817 retval = -ENOMEDIUM;
818 if (sdev->removable && !sdkp->media_present && !(mode & FMODE_NDELAY)) 818 if (sdev->removable && !sdkp->media_present && !(mode & FMODE_NDELAY))
819 goto error_out; 819 goto error_out;
820 820
821 /* 821 /*
822 * If the device has the write protect tab set, have the open fail 822 * If the device has the write protect tab set, have the open fail
823 * if the user expects to be able to write to the thing. 823 * if the user expects to be able to write to the thing.
824 */ 824 */
825 retval = -EROFS; 825 retval = -EROFS;
826 if (sdkp->write_prot && (mode & FMODE_WRITE)) 826 if (sdkp->write_prot && (mode & FMODE_WRITE))
827 goto error_out; 827 goto error_out;
828 828
829 /* 829 /*
830 * It is possible that the disk changing stuff resulted in 830 * It is possible that the disk changing stuff resulted in
831 * the device being taken offline. If this is the case, 831 * the device being taken offline. If this is the case,
832 * report this to the user, and don't pretend that the 832 * report this to the user, and don't pretend that the
833 * open actually succeeded. 833 * open actually succeeded.
834 */ 834 */
835 retval = -ENXIO; 835 retval = -ENXIO;
836 if (!scsi_device_online(sdev)) 836 if (!scsi_device_online(sdev))
837 goto error_out; 837 goto error_out;
838 838
839 if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) { 839 if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) {
840 if (scsi_block_when_processing_errors(sdev)) 840 if (scsi_block_when_processing_errors(sdev))
841 scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); 841 scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
842 } 842 }
843 843
844 return 0; 844 return 0;
845 845
846 error_out: 846 error_out:
847 scsi_autopm_put_device(sdev); 847 scsi_autopm_put_device(sdev);
848 error_autopm: 848 error_autopm:
849 scsi_disk_put(sdkp); 849 scsi_disk_put(sdkp);
850 return retval; 850 return retval;
851 } 851 }
852 852
853 /** 853 /**
854 * sd_release - invoked when the (last) close(2) is called on this 854 * sd_release - invoked when the (last) close(2) is called on this
855 * scsi disk. 855 * scsi disk.
856 * @inode: only i_rdev member may be used 856 * @inode: only i_rdev member may be used
857 * @filp: only f_mode and f_flags may be used 857 * @filp: only f_mode and f_flags may be used
858 * 858 *
859 * Returns 0. 859 * Returns 0.
860 * 860 *
861 * Note: may block (uninterruptible) if error recovery is underway 861 * Note: may block (uninterruptible) if error recovery is underway
862 * on this disk. 862 * on this disk.
863 * 863 *
864 * Locking: called with bdev->bd_mutex held. 864 * Locking: called with bdev->bd_mutex held.
865 **/ 865 **/
866 static int sd_release(struct gendisk *disk, fmode_t mode) 866 static int sd_release(struct gendisk *disk, fmode_t mode)
867 { 867 {
868 struct scsi_disk *sdkp = scsi_disk(disk); 868 struct scsi_disk *sdkp = scsi_disk(disk);
869 struct scsi_device *sdev = sdkp->device; 869 struct scsi_device *sdev = sdkp->device;
870 870
871 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); 871 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n"));
872 872
873 if (atomic_dec_return(&sdkp->openers) && sdev->removable) { 873 if (atomic_dec_return(&sdkp->openers) && sdev->removable) {
874 if (scsi_block_when_processing_errors(sdev)) 874 if (scsi_block_when_processing_errors(sdev))
875 scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); 875 scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
876 } 876 }
877 877
878 /* 878 /*
879 * XXX and what if there are packets in flight and this close() 879 * XXX and what if there are packets in flight and this close()
880 * XXX is followed by a "rmmod sd_mod"? 880 * XXX is followed by a "rmmod sd_mod"?
881 */ 881 */
882 882
883 scsi_autopm_put_device(sdev); 883 scsi_autopm_put_device(sdev);
884 scsi_disk_put(sdkp); 884 scsi_disk_put(sdkp);
885 return 0; 885 return 0;
886 } 886 }
887 887
888 static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 888 static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
889 { 889 {
890 struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); 890 struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
891 struct scsi_device *sdp = sdkp->device; 891 struct scsi_device *sdp = sdkp->device;
892 struct Scsi_Host *host = sdp->host; 892 struct Scsi_Host *host = sdp->host;
893 int diskinfo[4]; 893 int diskinfo[4];
894 894
895 /* default to most commonly used values */ 895 /* default to most commonly used values */
896 diskinfo[0] = 0x40; /* 1 << 6 */ 896 diskinfo[0] = 0x40; /* 1 << 6 */
897 diskinfo[1] = 0x20; /* 1 << 5 */ 897 diskinfo[1] = 0x20; /* 1 << 5 */
898 diskinfo[2] = sdkp->capacity >> 11; 898 diskinfo[2] = sdkp->capacity >> 11;
899 899
900 /* override with calculated, extended default, or driver values */ 900 /* override with calculated, extended default, or driver values */
901 if (host->hostt->bios_param) 901 if (host->hostt->bios_param)
902 host->hostt->bios_param(sdp, bdev, sdkp->capacity, diskinfo); 902 host->hostt->bios_param(sdp, bdev, sdkp->capacity, diskinfo);
903 else 903 else
904 scsicam_bios_param(bdev, sdkp->capacity, diskinfo); 904 scsicam_bios_param(bdev, sdkp->capacity, diskinfo);
905 905
906 geo->heads = diskinfo[0]; 906 geo->heads = diskinfo[0];
907 geo->sectors = diskinfo[1]; 907 geo->sectors = diskinfo[1];
908 geo->cylinders = diskinfo[2]; 908 geo->cylinders = diskinfo[2];
909 return 0; 909 return 0;
910 } 910 }
911 911
912 /** 912 /**
913 * sd_ioctl - process an ioctl 913 * sd_ioctl - process an ioctl
914 * @inode: only i_rdev/i_bdev members may be used 914 * @inode: only i_rdev/i_bdev members may be used
915 * @filp: only f_mode and f_flags may be used 915 * @filp: only f_mode and f_flags may be used
916 * @cmd: ioctl command number 916 * @cmd: ioctl command number
917 * @arg: this is third argument given to ioctl(2) system call. 917 * @arg: this is third argument given to ioctl(2) system call.
918 * Often contains a pointer. 918 * Often contains a pointer.
919 * 919 *
920 * Returns 0 if successful (some ioctls return postive numbers on 920 * Returns 0 if successful (some ioctls return postive numbers on
921 * success as well). Returns a negated errno value in case of error. 921 * success as well). Returns a negated errno value in case of error.
922 * 922 *
923 * Note: most ioctls are forward onto the block subsystem or further 923 * Note: most ioctls are forward onto the block subsystem or further
924 * down in the scsi subsystem. 924 * down in the scsi subsystem.
925 **/ 925 **/
926 static int sd_ioctl(struct block_device *bdev, fmode_t mode, 926 static int sd_ioctl(struct block_device *bdev, fmode_t mode,
927 unsigned int cmd, unsigned long arg) 927 unsigned int cmd, unsigned long arg)
928 { 928 {
929 struct gendisk *disk = bdev->bd_disk; 929 struct gendisk *disk = bdev->bd_disk;
930 struct scsi_device *sdp = scsi_disk(disk)->device; 930 struct scsi_device *sdp = scsi_disk(disk)->device;
931 void __user *p = (void __user *)arg; 931 void __user *p = (void __user *)arg;
932 int error; 932 int error;
933 933
934 SCSI_LOG_IOCTL(1, printk("sd_ioctl: disk=%s, cmd=0x%x\n", 934 SCSI_LOG_IOCTL(1, printk("sd_ioctl: disk=%s, cmd=0x%x\n",
935 disk->disk_name, cmd)); 935 disk->disk_name, cmd));
936 936
937 /* 937 /*
938 * If we are in the middle of error recovery, don't let anyone 938 * If we are in the middle of error recovery, don't let anyone
939 * else try and use this device. Also, if error recovery fails, it 939 * else try and use this device. Also, if error recovery fails, it
940 * may try and take the device offline, in which case all further 940 * may try and take the device offline, in which case all further
941 * access to the device is prohibited. 941 * access to the device is prohibited.
942 */ 942 */
943 error = scsi_nonblockable_ioctl(sdp, cmd, p, 943 error = scsi_nonblockable_ioctl(sdp, cmd, p,
944 (mode & FMODE_NDELAY) != 0); 944 (mode & FMODE_NDELAY) != 0);
945 if (!scsi_block_when_processing_errors(sdp) || !error) 945 if (!scsi_block_when_processing_errors(sdp) || !error)
946 goto out; 946 goto out;
947 947
948 /* 948 /*
949 * Send SCSI addressing ioctls directly to mid level, send other 949 * Send SCSI addressing ioctls directly to mid level, send other
950 * ioctls to block level and then onto mid level if they can't be 950 * ioctls to block level and then onto mid level if they can't be
951 * resolved. 951 * resolved.
952 */ 952 */
953 switch (cmd) { 953 switch (cmd) {
954 case SCSI_IOCTL_GET_IDLUN: 954 case SCSI_IOCTL_GET_IDLUN:
955 case SCSI_IOCTL_GET_BUS_NUMBER: 955 case SCSI_IOCTL_GET_BUS_NUMBER:
956 error = scsi_ioctl(sdp, cmd, p); 956 error = scsi_ioctl(sdp, cmd, p);
957 break; 957 break;
958 default: 958 default:
959 error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p); 959 error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p);
960 if (error != -ENOTTY) 960 if (error != -ENOTTY)
961 break; 961 break;
962 error = scsi_ioctl(sdp, cmd, p); 962 error = scsi_ioctl(sdp, cmd, p);
963 break; 963 break;
964 } 964 }
965 out: 965 out:
966 return error; 966 return error;
967 } 967 }
968 968
969 static void set_media_not_present(struct scsi_disk *sdkp) 969 static void set_media_not_present(struct scsi_disk *sdkp)
970 { 970 {
971 sdkp->media_present = 0; 971 sdkp->media_present = 0;
972 sdkp->capacity = 0; 972 sdkp->capacity = 0;
973 sdkp->device->changed = 1; 973 sdkp->device->changed = 1;
974 } 974 }
975 975
976 /** 976 /**
977 * sd_media_changed - check if our medium changed 977 * sd_media_changed - check if our medium changed
978 * @disk: kernel device descriptor 978 * @disk: kernel device descriptor
979 * 979 *
980 * Returns 0 if not applicable or no change; 1 if change 980 * Returns 0 if not applicable or no change; 1 if change
981 * 981 *
982 * Note: this function is invoked from the block subsystem. 982 * Note: this function is invoked from the block subsystem.
983 **/ 983 **/
984 static int sd_media_changed(struct gendisk *disk) 984 static int sd_media_changed(struct gendisk *disk)
985 { 985 {
986 struct scsi_disk *sdkp = scsi_disk(disk); 986 struct scsi_disk *sdkp = scsi_disk(disk);
987 struct scsi_device *sdp = sdkp->device; 987 struct scsi_device *sdp = sdkp->device;
988 struct scsi_sense_hdr *sshdr = NULL; 988 struct scsi_sense_hdr *sshdr = NULL;
989 int retval; 989 int retval;
990 990
991 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_media_changed\n")); 991 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_media_changed\n"));
992 992
993 if (!sdp->removable) 993 if (!sdp->removable)
994 return 0; 994 return 0;
995 995
996 /* 996 /*
997 * If the device is offline, don't send any commands - just pretend as 997 * If the device is offline, don't send any commands - just pretend as
998 * if the command failed. If the device ever comes back online, we 998 * if the command failed. If the device ever comes back online, we
999 * can deal with it then. It is only because of unrecoverable errors 999 * can deal with it then. It is only because of unrecoverable errors
1000 * that we would ever take a device offline in the first place. 1000 * that we would ever take a device offline in the first place.
1001 */ 1001 */
1002 if (!scsi_device_online(sdp)) { 1002 if (!scsi_device_online(sdp)) {
1003 set_media_not_present(sdkp); 1003 set_media_not_present(sdkp);
1004 retval = 1; 1004 retval = 1;
1005 goto out; 1005 goto out;
1006 } 1006 }
1007 1007
1008 /* 1008 /*
1009 * Using TEST_UNIT_READY enables differentiation between drive with 1009 * Using TEST_UNIT_READY enables differentiation between drive with
1010 * no cartridge loaded - NOT READY, drive with changed cartridge - 1010 * no cartridge loaded - NOT READY, drive with changed cartridge -
1011 * UNIT ATTENTION, or with same cartridge - GOOD STATUS. 1011 * UNIT ATTENTION, or with same cartridge - GOOD STATUS.
1012 * 1012 *
1013 * Drives that auto spin down. eg iomega jaz 1G, will be started 1013 * Drives that auto spin down. eg iomega jaz 1G, will be started
1014 * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever 1014 * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever
1015 * sd_revalidate() is called. 1015 * sd_revalidate() is called.
1016 */ 1016 */
1017 retval = -ENODEV; 1017 retval = -ENODEV;
1018 1018
1019 if (scsi_block_when_processing_errors(sdp)) { 1019 if (scsi_block_when_processing_errors(sdp)) {
1020 sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); 1020 sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL);
1021 retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES, 1021 retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES,
1022 sshdr); 1022 sshdr);
1023 } 1023 }
1024 1024
1025 /* 1025 /*
1026 * Unable to test, unit probably not ready. This usually 1026 * Unable to test, unit probably not ready. This usually
1027 * means there is no disc in the drive. Mark as changed, 1027 * means there is no disc in the drive. Mark as changed,
1028 * and we will figure it out later once the drive is 1028 * and we will figure it out later once the drive is
1029 * available again. 1029 * available again.
1030 */ 1030 */
1031 if (retval || (scsi_sense_valid(sshdr) && 1031 if (retval || (scsi_sense_valid(sshdr) &&
1032 /* 0x3a is medium not present */ 1032 /* 0x3a is medium not present */
1033 sshdr->asc == 0x3a)) { 1033 sshdr->asc == 0x3a)) {
1034 set_media_not_present(sdkp); 1034 set_media_not_present(sdkp);
1035 retval = 1; 1035 retval = 1;
1036 goto out; 1036 goto out;
1037 } 1037 }
1038 1038
1039 /* 1039 /*
1040 * For removable scsi disk we have to recognise the presence 1040 * For removable scsi disk we have to recognise the presence
1041 * of a disk in the drive. This is kept in the struct scsi_disk 1041 * of a disk in the drive. This is kept in the struct scsi_disk
1042 * struct and tested at open ! Daniel Roche (dan@lectra.fr) 1042 * struct and tested at open ! Daniel Roche (dan@lectra.fr)
1043 */ 1043 */
1044 sdkp->media_present = 1; 1044 sdkp->media_present = 1;
1045 1045
1046 retval = sdp->changed; 1046 retval = sdp->changed;
1047 sdp->changed = 0; 1047 sdp->changed = 0;
1048 out: 1048 out:
1049 if (retval != sdkp->previous_state) 1049 if (retval != sdkp->previous_state)
1050 sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL); 1050 sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL);
1051 sdkp->previous_state = retval; 1051 sdkp->previous_state = retval;
1052 kfree(sshdr); 1052 kfree(sshdr);
1053 return retval; 1053 return retval;
1054 } 1054 }
1055 1055
1056 static int sd_sync_cache(struct scsi_disk *sdkp) 1056 static int sd_sync_cache(struct scsi_disk *sdkp)
1057 { 1057 {
1058 int retries, res; 1058 int retries, res;
1059 struct scsi_device *sdp = sdkp->device; 1059 struct scsi_device *sdp = sdkp->device;
1060 struct scsi_sense_hdr sshdr; 1060 struct scsi_sense_hdr sshdr;
1061 1061
1062 if (!scsi_device_online(sdp)) 1062 if (!scsi_device_online(sdp))
1063 return -ENODEV; 1063 return -ENODEV;
1064 1064
1065 1065
1066 for (retries = 3; retries > 0; --retries) { 1066 for (retries = 3; retries > 0; --retries) {
1067 unsigned char cmd[10] = { 0 }; 1067 unsigned char cmd[10] = { 0 };
1068 1068
1069 cmd[0] = SYNCHRONIZE_CACHE; 1069 cmd[0] = SYNCHRONIZE_CACHE;
1070 /* 1070 /*
1071 * Leave the rest of the command zero to indicate 1071 * Leave the rest of the command zero to indicate
1072 * flush everything. 1072 * flush everything.
1073 */ 1073 */
1074 res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, 1074 res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
1075 SD_TIMEOUT, SD_MAX_RETRIES, NULL); 1075 SD_TIMEOUT, SD_MAX_RETRIES, NULL);
1076 if (res == 0) 1076 if (res == 0)
1077 break; 1077 break;
1078 } 1078 }
1079 1079
1080 if (res) { 1080 if (res) {
1081 sd_print_result(sdkp, res); 1081 sd_print_result(sdkp, res);
1082 if (driver_byte(res) & DRIVER_SENSE) 1082 if (driver_byte(res) & DRIVER_SENSE)
1083 sd_print_sense_hdr(sdkp, &sshdr); 1083 sd_print_sense_hdr(sdkp, &sshdr);
1084 } 1084 }
1085 1085
1086 if (res) 1086 if (res)
1087 return -EIO; 1087 return -EIO;
1088 return 0; 1088 return 0;
1089 } 1089 }
1090 1090
1091 static void sd_rescan(struct device *dev) 1091 static void sd_rescan(struct device *dev)
1092 { 1092 {
1093 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); 1093 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
1094 1094
1095 if (sdkp) { 1095 if (sdkp) {
1096 revalidate_disk(sdkp->disk); 1096 revalidate_disk(sdkp->disk);
1097 scsi_disk_put(sdkp); 1097 scsi_disk_put(sdkp);
1098 } 1098 }
1099 } 1099 }
1100 1100
1101 1101
1102 #ifdef CONFIG_COMPAT 1102 #ifdef CONFIG_COMPAT
1103 /* 1103 /*
1104 * This gets directly called from VFS. When the ioctl 1104 * This gets directly called from VFS. When the ioctl
1105 * is not recognized we go back to the other translation paths. 1105 * is not recognized we go back to the other translation paths.
1106 */ 1106 */
1107 static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, 1107 static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
1108 unsigned int cmd, unsigned long arg) 1108 unsigned int cmd, unsigned long arg)
1109 { 1109 {
1110 struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; 1110 struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device;
1111 1111
1112 /* 1112 /*
1113 * If we are in the middle of error recovery, don't let anyone 1113 * If we are in the middle of error recovery, don't let anyone
1114 * else try and use this device. Also, if error recovery fails, it 1114 * else try and use this device. Also, if error recovery fails, it
1115 * may try and take the device offline, in which case all further 1115 * may try and take the device offline, in which case all further
1116 * access to the device is prohibited. 1116 * access to the device is prohibited.
1117 */ 1117 */
1118 if (!scsi_block_when_processing_errors(sdev)) 1118 if (!scsi_block_when_processing_errors(sdev))
1119 return -ENODEV; 1119 return -ENODEV;
1120 1120
1121 if (sdev->host->hostt->compat_ioctl) { 1121 if (sdev->host->hostt->compat_ioctl) {
1122 int ret; 1122 int ret;
1123 1123
1124 ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); 1124 ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg);
1125 1125
1126 return ret; 1126 return ret;
1127 } 1127 }
1128 1128
1129 /* 1129 /*
1130 * Let the static ioctl translation table take care of it. 1130 * Let the static ioctl translation table take care of it.
1131 */ 1131 */
1132 return -ENOIOCTLCMD; 1132 return -ENOIOCTLCMD;
1133 } 1133 }
1134 #endif 1134 #endif
1135 1135
1136 static const struct block_device_operations sd_fops = { 1136 static const struct block_device_operations sd_fops = {
1137 .owner = THIS_MODULE, 1137 .owner = THIS_MODULE,
1138 .open = sd_open, 1138 .open = sd_open,
1139 .release = sd_release, 1139 .release = sd_release,
1140 .ioctl = sd_ioctl, 1140 .ioctl = sd_ioctl,
1141 .getgeo = sd_getgeo, 1141 .getgeo = sd_getgeo,
1142 #ifdef CONFIG_COMPAT 1142 #ifdef CONFIG_COMPAT
1143 .compat_ioctl = sd_compat_ioctl, 1143 .compat_ioctl = sd_compat_ioctl,
1144 #endif 1144 #endif
1145 .media_changed = sd_media_changed, 1145 .media_changed = sd_media_changed,
1146 .revalidate_disk = sd_revalidate_disk, 1146 .revalidate_disk = sd_revalidate_disk,
1147 .unlock_native_capacity = sd_unlock_native_capacity, 1147 .unlock_native_capacity = sd_unlock_native_capacity,
1148 }; 1148 };
1149 1149
1150 static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) 1150 static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
1151 { 1151 {
1152 u64 start_lba = blk_rq_pos(scmd->request); 1152 u64 start_lba = blk_rq_pos(scmd->request);
1153 u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512); 1153 u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
1154 u64 bad_lba; 1154 u64 bad_lba;
1155 int info_valid; 1155 int info_valid;
1156 1156
1157 if (scmd->request->cmd_type != REQ_TYPE_FS) 1157 if (scmd->request->cmd_type != REQ_TYPE_FS)
1158 return 0; 1158 return 0;
1159 1159
1160 info_valid = scsi_get_sense_info_fld(scmd->sense_buffer, 1160 info_valid = scsi_get_sense_info_fld(scmd->sense_buffer,
1161 SCSI_SENSE_BUFFERSIZE, 1161 SCSI_SENSE_BUFFERSIZE,
1162 &bad_lba); 1162 &bad_lba);
1163 if (!info_valid) 1163 if (!info_valid)
1164 return 0; 1164 return 0;
1165 1165
1166 if (scsi_bufflen(scmd) <= scmd->device->sector_size) 1166 if (scsi_bufflen(scmd) <= scmd->device->sector_size)
1167 return 0; 1167 return 0;
1168 1168
1169 if (scmd->device->sector_size < 512) { 1169 if (scmd->device->sector_size < 512) {
1170 /* only legitimate sector_size here is 256 */ 1170 /* only legitimate sector_size here is 256 */
1171 start_lba <<= 1; 1171 start_lba <<= 1;
1172 end_lba <<= 1; 1172 end_lba <<= 1;
1173 } else { 1173 } else {
1174 /* be careful ... don't want any overflows */ 1174 /* be careful ... don't want any overflows */
1175 u64 factor = scmd->device->sector_size / 512; 1175 u64 factor = scmd->device->sector_size / 512;
1176 do_div(start_lba, factor); 1176 do_div(start_lba, factor);
1177 do_div(end_lba, factor); 1177 do_div(end_lba, factor);
1178 } 1178 }
1179 1179
1180 /* The bad lba was reported incorrectly, we have no idea where 1180 /* The bad lba was reported incorrectly, we have no idea where
1181 * the error is. 1181 * the error is.
1182 */ 1182 */
1183 if (bad_lba < start_lba || bad_lba >= end_lba) 1183 if (bad_lba < start_lba || bad_lba >= end_lba)
1184 return 0; 1184 return 0;
1185 1185
1186 /* This computation should always be done in terms of 1186 /* This computation should always be done in terms of
1187 * the resolution of the device's medium. 1187 * the resolution of the device's medium.
1188 */ 1188 */
1189 return (bad_lba - start_lba) * scmd->device->sector_size; 1189 return (bad_lba - start_lba) * scmd->device->sector_size;
1190 } 1190 }
1191 1191
1192 /** 1192 /**
1193 * sd_done - bottom half handler: called when the lower level 1193 * sd_done - bottom half handler: called when the lower level
1194 * driver has completed (successfully or otherwise) a scsi command. 1194 * driver has completed (successfully or otherwise) a scsi command.
1195 * @SCpnt: mid-level's per command structure. 1195 * @SCpnt: mid-level's per command structure.
1196 * 1196 *
1197 * Note: potentially run from within an ISR. Must not block. 1197 * Note: potentially run from within an ISR. Must not block.
1198 **/ 1198 **/
1199 static int sd_done(struct scsi_cmnd *SCpnt) 1199 static int sd_done(struct scsi_cmnd *SCpnt)
1200 { 1200 {
1201 int result = SCpnt->result; 1201 int result = SCpnt->result;
1202 unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); 1202 unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
1203 struct scsi_sense_hdr sshdr; 1203 struct scsi_sense_hdr sshdr;
1204 struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); 1204 struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
1205 int sense_valid = 0; 1205 int sense_valid = 0;
1206 int sense_deferred = 0; 1206 int sense_deferred = 0;
1207 1207
1208 if (SCpnt->request->cmd_flags & REQ_DISCARD) { 1208 if (SCpnt->request->cmd_flags & REQ_DISCARD) {
1209 if (!result) 1209 if (!result)
1210 scsi_set_resid(SCpnt, 0); 1210 scsi_set_resid(SCpnt, 0);
1211 return good_bytes; 1211 return good_bytes;
1212 } 1212 }
1213 1213
1214 if (result) { 1214 if (result) {
1215 sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); 1215 sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
1216 if (sense_valid) 1216 if (sense_valid)
1217 sense_deferred = scsi_sense_is_deferred(&sshdr); 1217 sense_deferred = scsi_sense_is_deferred(&sshdr);
1218 } 1218 }
1219 #ifdef CONFIG_SCSI_LOGGING 1219 #ifdef CONFIG_SCSI_LOGGING
1220 SCSI_LOG_HLCOMPLETE(1, scsi_print_result(SCpnt)); 1220 SCSI_LOG_HLCOMPLETE(1, scsi_print_result(SCpnt));
1221 if (sense_valid) { 1221 if (sense_valid) {
1222 SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, 1222 SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
1223 "sd_done: sb[respc,sk,asc," 1223 "sd_done: sb[respc,sk,asc,"
1224 "ascq]=%x,%x,%x,%x\n", 1224 "ascq]=%x,%x,%x,%x\n",
1225 sshdr.response_code, 1225 sshdr.response_code,
1226 sshdr.sense_key, sshdr.asc, 1226 sshdr.sense_key, sshdr.asc,
1227 sshdr.ascq)); 1227 sshdr.ascq));
1228 } 1228 }
1229 #endif 1229 #endif
1230 if (driver_byte(result) != DRIVER_SENSE && 1230 if (driver_byte(result) != DRIVER_SENSE &&
1231 (!sense_valid || sense_deferred)) 1231 (!sense_valid || sense_deferred))
1232 goto out; 1232 goto out;
1233 1233
1234 switch (sshdr.sense_key) { 1234 switch (sshdr.sense_key) {
1235 case HARDWARE_ERROR: 1235 case HARDWARE_ERROR:
1236 case MEDIUM_ERROR: 1236 case MEDIUM_ERROR:
1237 good_bytes = sd_completed_bytes(SCpnt); 1237 good_bytes = sd_completed_bytes(SCpnt);
1238 break; 1238 break;
1239 case RECOVERED_ERROR: 1239 case RECOVERED_ERROR:
1240 good_bytes = scsi_bufflen(SCpnt); 1240 good_bytes = scsi_bufflen(SCpnt);
1241 break; 1241 break;
1242 case NO_SENSE: 1242 case NO_SENSE:
1243 /* This indicates a false check condition, so ignore it. An 1243 /* This indicates a false check condition, so ignore it. An
1244 * unknown amount of data was transferred so treat it as an 1244 * unknown amount of data was transferred so treat it as an
1245 * error. 1245 * error.
1246 */ 1246 */
1247 scsi_print_sense("sd", SCpnt); 1247 scsi_print_sense("sd", SCpnt);
1248 SCpnt->result = 0; 1248 SCpnt->result = 0;
1249 memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); 1249 memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
1250 break; 1250 break;
1251 case ABORTED_COMMAND: /* DIF: Target detected corruption */ 1251 case ABORTED_COMMAND: /* DIF: Target detected corruption */
1252 case ILLEGAL_REQUEST: /* DIX: Host detected corruption */ 1252 case ILLEGAL_REQUEST: /* DIX: Host detected corruption */
1253 if (sshdr.asc == 0x10) 1253 if (sshdr.asc == 0x10)
1254 good_bytes = sd_completed_bytes(SCpnt); 1254 good_bytes = sd_completed_bytes(SCpnt);
1255 break; 1255 break;
1256 default: 1256 default:
1257 break; 1257 break;
1258 } 1258 }
1259 out: 1259 out:
1260 if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) 1260 if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
1261 sd_dif_complete(SCpnt, good_bytes); 1261 sd_dif_complete(SCpnt, good_bytes);
1262 1262
1263 if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type) 1263 if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type)
1264 == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) { 1264 == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) {
1265 1265
1266 /* We have to print a failed command here as the 1266 /* We have to print a failed command here as the
1267 * extended CDB gets freed before scsi_io_completion() 1267 * extended CDB gets freed before scsi_io_completion()
1268 * is called. 1268 * is called.
1269 */ 1269 */
1270 if (result) 1270 if (result)
1271 scsi_print_command(SCpnt); 1271 scsi_print_command(SCpnt);
1272 1272
1273 mempool_free(SCpnt->cmnd, sd_cdb_pool); 1273 mempool_free(SCpnt->cmnd, sd_cdb_pool);
1274 SCpnt->cmnd = NULL; 1274 SCpnt->cmnd = NULL;
1275 SCpnt->cmd_len = 0; 1275 SCpnt->cmd_len = 0;
1276 } 1276 }
1277 1277
1278 return good_bytes; 1278 return good_bytes;
1279 } 1279 }
1280 1280
1281 static int media_not_present(struct scsi_disk *sdkp, 1281 static int media_not_present(struct scsi_disk *sdkp,
1282 struct scsi_sense_hdr *sshdr) 1282 struct scsi_sense_hdr *sshdr)
1283 { 1283 {
1284 1284
1285 if (!scsi_sense_valid(sshdr)) 1285 if (!scsi_sense_valid(sshdr))
1286 return 0; 1286 return 0;
1287 /* not invoked for commands that could return deferred errors */ 1287 /* not invoked for commands that could return deferred errors */
1288 if (sshdr->sense_key != NOT_READY && 1288 if (sshdr->sense_key != NOT_READY &&
1289 sshdr->sense_key != UNIT_ATTENTION) 1289 sshdr->sense_key != UNIT_ATTENTION)
1290 return 0; 1290 return 0;
1291 if (sshdr->asc != 0x3A) /* medium not present */ 1291 if (sshdr->asc != 0x3A) /* medium not present */
1292 return 0; 1292 return 0;
1293 1293
1294 set_media_not_present(sdkp); 1294 set_media_not_present(sdkp);
1295 return 1; 1295 return 1;
1296 } 1296 }
1297 1297
1298 /* 1298 /*
1299 * spinup disk - called only in sd_revalidate_disk() 1299 * spinup disk - called only in sd_revalidate_disk()
1300 */ 1300 */
1301 static void 1301 static void
1302 sd_spinup_disk(struct scsi_disk *sdkp) 1302 sd_spinup_disk(struct scsi_disk *sdkp)
1303 { 1303 {
1304 unsigned char cmd[10]; 1304 unsigned char cmd[10];
1305 unsigned long spintime_expire = 0; 1305 unsigned long spintime_expire = 0;
1306 int retries, spintime; 1306 int retries, spintime;
1307 unsigned int the_result; 1307 unsigned int the_result;
1308 struct scsi_sense_hdr sshdr; 1308 struct scsi_sense_hdr sshdr;
1309 int sense_valid = 0; 1309 int sense_valid = 0;
1310 1310
1311 spintime = 0; 1311 spintime = 0;
1312 1312
1313 /* Spin up drives, as required. Only do this at boot time */ 1313 /* Spin up drives, as required. Only do this at boot time */
1314 /* Spinup needs to be done for module loads too. */ 1314 /* Spinup needs to be done for module loads too. */
1315 do { 1315 do {
1316 retries = 0; 1316 retries = 0;
1317 1317
1318 do { 1318 do {
1319 cmd[0] = TEST_UNIT_READY; 1319 cmd[0] = TEST_UNIT_READY;
1320 memset((void *) &cmd[1], 0, 9); 1320 memset((void *) &cmd[1], 0, 9);
1321 1321
1322 the_result = scsi_execute_req(sdkp->device, cmd, 1322 the_result = scsi_execute_req(sdkp->device, cmd,
1323 DMA_NONE, NULL, 0, 1323 DMA_NONE, NULL, 0,
1324 &sshdr, SD_TIMEOUT, 1324 &sshdr, SD_TIMEOUT,
1325 SD_MAX_RETRIES, NULL); 1325 SD_MAX_RETRIES, NULL);
1326 1326
1327 /* 1327 /*
1328 * If the drive has indicated to us that it 1328 * If the drive has indicated to us that it
1329 * doesn't have any media in it, don't bother 1329 * doesn't have any media in it, don't bother
1330 * with any more polling. 1330 * with any more polling.
1331 */ 1331 */
1332 if (media_not_present(sdkp, &sshdr)) 1332 if (media_not_present(sdkp, &sshdr))
1333 return; 1333 return;
1334 1334
1335 if (the_result) 1335 if (the_result)
1336 sense_valid = scsi_sense_valid(&sshdr); 1336 sense_valid = scsi_sense_valid(&sshdr);
1337 retries++; 1337 retries++;
1338 } while (retries < 3 && 1338 } while (retries < 3 &&
1339 (!scsi_status_is_good(the_result) || 1339 (!scsi_status_is_good(the_result) ||
1340 ((driver_byte(the_result) & DRIVER_SENSE) && 1340 ((driver_byte(the_result) & DRIVER_SENSE) &&
1341 sense_valid && sshdr.sense_key == UNIT_ATTENTION))); 1341 sense_valid && sshdr.sense_key == UNIT_ATTENTION)));
1342 1342
1343 if ((driver_byte(the_result) & DRIVER_SENSE) == 0) { 1343 if ((driver_byte(the_result) & DRIVER_SENSE) == 0) {
1344 /* no sense, TUR either succeeded or failed 1344 /* no sense, TUR either succeeded or failed
1345 * with a status error */ 1345 * with a status error */
1346 if(!spintime && !scsi_status_is_good(the_result)) { 1346 if(!spintime && !scsi_status_is_good(the_result)) {
1347 sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); 1347 sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n");
1348 sd_print_result(sdkp, the_result); 1348 sd_print_result(sdkp, the_result);
1349 } 1349 }
1350 break; 1350 break;
1351 } 1351 }
1352 1352
1353 /* 1353 /*
1354 * The device does not want the automatic start to be issued. 1354 * The device does not want the automatic start to be issued.
1355 */ 1355 */
1356 if (sdkp->device->no_start_on_add) 1356 if (sdkp->device->no_start_on_add)
1357 break; 1357 break;
1358 1358
1359 if (sense_valid && sshdr.sense_key == NOT_READY) { 1359 if (sense_valid && sshdr.sense_key == NOT_READY) {
1360 if (sshdr.asc == 4 && sshdr.ascq == 3) 1360 if (sshdr.asc == 4 && sshdr.ascq == 3)
1361 break; /* manual intervention required */ 1361 break; /* manual intervention required */
1362 if (sshdr.asc == 4 && sshdr.ascq == 0xb) 1362 if (sshdr.asc == 4 && sshdr.ascq == 0xb)
1363 break; /* standby */ 1363 break; /* standby */
1364 if (sshdr.asc == 4 && sshdr.ascq == 0xc) 1364 if (sshdr.asc == 4 && sshdr.ascq == 0xc)
1365 break; /* unavailable */ 1365 break; /* unavailable */
1366 /* 1366 /*
1367 * Issue command to spin up drive when not ready 1367 * Issue command to spin up drive when not ready
1368 */ 1368 */
1369 if (!spintime) { 1369 if (!spintime) {
1370 sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); 1370 sd_printk(KERN_NOTICE, sdkp, "Spinning up disk...");
1371 cmd[0] = START_STOP; 1371 cmd[0] = START_STOP;
1372 cmd[1] = 1; /* Return immediately */ 1372 cmd[1] = 1; /* Return immediately */
1373 memset((void *) &cmd[2], 0, 8); 1373 memset((void *) &cmd[2], 0, 8);
1374 cmd[4] = 1; /* Start spin cycle */ 1374 cmd[4] = 1; /* Start spin cycle */
1375 if (sdkp->device->start_stop_pwr_cond) 1375 if (sdkp->device->start_stop_pwr_cond)
1376 cmd[4] |= 1 << 4; 1376 cmd[4] |= 1 << 4;
1377 scsi_execute_req(sdkp->device, cmd, DMA_NONE, 1377 scsi_execute_req(sdkp->device, cmd, DMA_NONE,
1378 NULL, 0, &sshdr, 1378 NULL, 0, &sshdr,
1379 SD_TIMEOUT, SD_MAX_RETRIES, 1379 SD_TIMEOUT, SD_MAX_RETRIES,
1380 NULL); 1380 NULL);
1381 spintime_expire = jiffies + 100 * HZ; 1381 spintime_expire = jiffies + 100 * HZ;
1382 spintime = 1; 1382 spintime = 1;
1383 } 1383 }
1384 /* Wait 1 second for next try */ 1384 /* Wait 1 second for next try */
1385 msleep(1000); 1385 msleep(1000);
1386 printk("."); 1386 printk(".");
1387 1387
1388 /* 1388 /*
1389 * Wait for USB flash devices with slow firmware. 1389 * Wait for USB flash devices with slow firmware.
1390 * Yes, this sense key/ASC combination shouldn't 1390 * Yes, this sense key/ASC combination shouldn't
1391 * occur here. It's characteristic of these devices. 1391 * occur here. It's characteristic of these devices.
1392 */ 1392 */
1393 } else if (sense_valid && 1393 } else if (sense_valid &&
1394 sshdr.sense_key == UNIT_ATTENTION && 1394 sshdr.sense_key == UNIT_ATTENTION &&
1395 sshdr.asc == 0x28) { 1395 sshdr.asc == 0x28) {
1396 if (!spintime) { 1396 if (!spintime) {
1397 spintime_expire = jiffies + 5 * HZ; 1397 spintime_expire = jiffies + 5 * HZ;
1398 spintime = 1; 1398 spintime = 1;
1399 } 1399 }
1400 /* Wait 1 second for next try */ 1400 /* Wait 1 second for next try */
1401 msleep(1000); 1401 msleep(1000);
1402 } else { 1402 } else {
1403 /* we don't understand the sense code, so it's 1403 /* we don't understand the sense code, so it's
1404 * probably pointless to loop */ 1404 * probably pointless to loop */
1405 if(!spintime) { 1405 if(!spintime) {
1406 sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); 1406 sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n");
1407 sd_print_sense_hdr(sdkp, &sshdr); 1407 sd_print_sense_hdr(sdkp, &sshdr);
1408 } 1408 }
1409 break; 1409 break;
1410 } 1410 }
1411 1411
1412 } while (spintime && time_before_eq(jiffies, spintime_expire)); 1412 } while (spintime && time_before_eq(jiffies, spintime_expire));
1413 1413
1414 if (spintime) { 1414 if (spintime) {
1415 if (scsi_status_is_good(the_result)) 1415 if (scsi_status_is_good(the_result))
1416 printk("ready\n"); 1416 printk("ready\n");
1417 else 1417 else
1418 printk("not responding...\n"); 1418 printk("not responding...\n");
1419 } 1419 }
1420 } 1420 }
1421 1421
1422 1422
1423 /* 1423 /*
1424 * Determine whether disk supports Data Integrity Field. 1424 * Determine whether disk supports Data Integrity Field.
1425 */ 1425 */
1426 static void sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer) 1426 static void sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer)
1427 { 1427 {
1428 struct scsi_device *sdp = sdkp->device; 1428 struct scsi_device *sdp = sdkp->device;
1429 u8 type; 1429 u8 type;
1430 1430
1431 if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) 1431 if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0)
1432 return; 1432 return;
1433 1433
1434 type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ 1434 type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */
1435 1435
1436 if (type == sdkp->protection_type || !sdkp->first_scan) 1436 if (type == sdkp->protection_type || !sdkp->first_scan)
1437 return; 1437 return;
1438 1438
1439 sdkp->protection_type = type; 1439 sdkp->protection_type = type;
1440 1440
1441 if (type > SD_DIF_TYPE3_PROTECTION) { 1441 if (type > SD_DIF_TYPE3_PROTECTION) {
1442 sd_printk(KERN_ERR, sdkp, "formatted with unsupported " \ 1442 sd_printk(KERN_ERR, sdkp, "formatted with unsupported " \
1443 "protection type %u. Disabling disk!\n", type); 1443 "protection type %u. Disabling disk!\n", type);
1444 sdkp->capacity = 0; 1444 sdkp->capacity = 0;
1445 return; 1445 return;
1446 } 1446 }
1447 1447
1448 if (scsi_host_dif_capable(sdp->host, type)) 1448 if (scsi_host_dif_capable(sdp->host, type))
1449 sd_printk(KERN_NOTICE, sdkp, 1449 sd_printk(KERN_NOTICE, sdkp,
1450 "Enabling DIF Type %u protection\n", type); 1450 "Enabling DIF Type %u protection\n", type);
1451 else 1451 else
1452 sd_printk(KERN_NOTICE, sdkp, 1452 sd_printk(KERN_NOTICE, sdkp,
1453 "Disabling DIF Type %u protection\n", type); 1453 "Disabling DIF Type %u protection\n", type);
1454 } 1454 }
1455 1455
1456 static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, 1456 static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
1457 struct scsi_sense_hdr *sshdr, int sense_valid, 1457 struct scsi_sense_hdr *sshdr, int sense_valid,
1458 int the_result) 1458 int the_result)
1459 { 1459 {
1460 sd_print_result(sdkp, the_result); 1460 sd_print_result(sdkp, the_result);
1461 if (driver_byte(the_result) & DRIVER_SENSE) 1461 if (driver_byte(the_result) & DRIVER_SENSE)
1462 sd_print_sense_hdr(sdkp, sshdr); 1462 sd_print_sense_hdr(sdkp, sshdr);
1463 else 1463 else
1464 sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n"); 1464 sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n");
1465 1465
1466 /* 1466 /*
1467 * Set dirty bit for removable devices if not ready - 1467 * Set dirty bit for removable devices if not ready -
1468 * sometimes drives will not report this properly. 1468 * sometimes drives will not report this properly.
1469 */ 1469 */
1470 if (sdp->removable && 1470 if (sdp->removable &&
1471 sense_valid && sshdr->sense_key == NOT_READY) 1471 sense_valid && sshdr->sense_key == NOT_READY)
1472 sdp->changed = 1; 1472 sdp->changed = 1;
1473 1473
1474 /* 1474 /*
1475 * We used to set media_present to 0 here to indicate no media 1475 * We used to set media_present to 0 here to indicate no media
1476 * in the drive, but some drives fail read capacity even with 1476 * in the drive, but some drives fail read capacity even with
1477 * media present, so we can't do that. 1477 * media present, so we can't do that.
1478 */ 1478 */
1479 sdkp->capacity = 0; /* unknown mapped to zero - as usual */ 1479 sdkp->capacity = 0; /* unknown mapped to zero - as usual */
1480 } 1480 }
1481 1481
1482 #define RC16_LEN 32 1482 #define RC16_LEN 32
1483 #if RC16_LEN > SD_BUF_SIZE 1483 #if RC16_LEN > SD_BUF_SIZE
1484 #error RC16_LEN must not be more than SD_BUF_SIZE 1484 #error RC16_LEN must not be more than SD_BUF_SIZE
1485 #endif 1485 #endif
1486 1486
1487 #define READ_CAPACITY_RETRIES_ON_RESET 10 1487 #define READ_CAPACITY_RETRIES_ON_RESET 10
1488 1488
1489 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, 1489 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
1490 unsigned char *buffer) 1490 unsigned char *buffer)
1491 { 1491 {
1492 unsigned char cmd[16]; 1492 unsigned char cmd[16];
1493 struct scsi_sense_hdr sshdr; 1493 struct scsi_sense_hdr sshdr;
1494 int sense_valid = 0; 1494 int sense_valid = 0;
1495 int the_result; 1495 int the_result;
1496 int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; 1496 int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET;
1497 unsigned int alignment; 1497 unsigned int alignment;
1498 unsigned long long lba; 1498 unsigned long long lba;
1499 unsigned sector_size; 1499 unsigned sector_size;
1500 1500
1501 do { 1501 do {
1502 memset(cmd, 0, 16); 1502 memset(cmd, 0, 16);
1503 cmd[0] = SERVICE_ACTION_IN; 1503 cmd[0] = SERVICE_ACTION_IN;
1504 cmd[1] = SAI_READ_CAPACITY_16; 1504 cmd[1] = SAI_READ_CAPACITY_16;
1505 cmd[13] = RC16_LEN; 1505 cmd[13] = RC16_LEN;
1506 memset(buffer, 0, RC16_LEN); 1506 memset(buffer, 0, RC16_LEN);
1507 1507
1508 the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, 1508 the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
1509 buffer, RC16_LEN, &sshdr, 1509 buffer, RC16_LEN, &sshdr,
1510 SD_TIMEOUT, SD_MAX_RETRIES, NULL); 1510 SD_TIMEOUT, SD_MAX_RETRIES, NULL);
1511 1511
1512 if (media_not_present(sdkp, &sshdr)) 1512 if (media_not_present(sdkp, &sshdr))
1513 return -ENODEV; 1513 return -ENODEV;
1514 1514
1515 if (the_result) { 1515 if (the_result) {
1516 sense_valid = scsi_sense_valid(&sshdr); 1516 sense_valid = scsi_sense_valid(&sshdr);
1517 if (sense_valid && 1517 if (sense_valid &&
1518 sshdr.sense_key == ILLEGAL_REQUEST && 1518 sshdr.sense_key == ILLEGAL_REQUEST &&
1519 (sshdr.asc == 0x20 || sshdr.asc == 0x24) && 1519 (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
1520 sshdr.ascq == 0x00) 1520 sshdr.ascq == 0x00)
1521 /* Invalid Command Operation Code or 1521 /* Invalid Command Operation Code or
1522 * Invalid Field in CDB, just retry 1522 * Invalid Field in CDB, just retry
1523 * silently with RC10 */ 1523 * silently with RC10 */
1524 return -EINVAL; 1524 return -EINVAL;
1525 if (sense_valid && 1525 if (sense_valid &&
1526 sshdr.sense_key == UNIT_ATTENTION && 1526 sshdr.sense_key == UNIT_ATTENTION &&
1527 sshdr.asc == 0x29 && sshdr.ascq == 0x00) 1527 sshdr.asc == 0x29 && sshdr.ascq == 0x00)
1528 /* Device reset might occur several times, 1528 /* Device reset might occur several times,
1529 * give it one more chance */ 1529 * give it one more chance */
1530 if (--reset_retries > 0) 1530 if (--reset_retries > 0)
1531 continue; 1531 continue;
1532 } 1532 }
1533 retries--; 1533 retries--;
1534 1534
1535 } while (the_result && retries); 1535 } while (the_result && retries);
1536 1536
1537 if (the_result) { 1537 if (the_result) {
1538 sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY(16) failed\n"); 1538 sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY(16) failed\n");
1539 read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); 1539 read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result);
1540 return -EINVAL; 1540 return -EINVAL;
1541 } 1541 }
1542 1542
1543 sector_size = get_unaligned_be32(&buffer[8]); 1543 sector_size = get_unaligned_be32(&buffer[8]);
1544 lba = get_unaligned_be64(&buffer[0]); 1544 lba = get_unaligned_be64(&buffer[0]);
1545 1545
1546 sd_read_protection_type(sdkp, buffer); 1546 sd_read_protection_type(sdkp, buffer);
1547 1547
1548 if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) { 1548 if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) {
1549 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " 1549 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
1550 "kernel compiled with support for large block " 1550 "kernel compiled with support for large block "
1551 "devices.\n"); 1551 "devices.\n");
1552 sdkp->capacity = 0; 1552 sdkp->capacity = 0;
1553 return -EOVERFLOW; 1553 return -EOVERFLOW;
1554 } 1554 }
1555 1555
1556 /* Logical blocks per physical block exponent */ 1556 /* Logical blocks per physical block exponent */
1557 sdkp->hw_sector_size = (1 << (buffer[13] & 0xf)) * sector_size; 1557 sdkp->hw_sector_size = (1 << (buffer[13] & 0xf)) * sector_size;
1558 1558
1559 /* Lowest aligned logical block */ 1559 /* Lowest aligned logical block */
1560 alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; 1560 alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size;
1561 blk_queue_alignment_offset(sdp->request_queue, alignment); 1561 blk_queue_alignment_offset(sdp->request_queue, alignment);
1562 if (alignment && sdkp->first_scan) 1562 if (alignment && sdkp->first_scan)
1563 sd_printk(KERN_NOTICE, sdkp, 1563 sd_printk(KERN_NOTICE, sdkp,
1564 "physical block alignment offset: %u\n", alignment); 1564 "physical block alignment offset: %u\n", alignment);
1565 1565
1566 if (buffer[14] & 0x80) { /* TPE */ 1566 if (buffer[14] & 0x80) { /* TPE */
1567 struct request_queue *q = sdp->request_queue; 1567 struct request_queue *q = sdp->request_queue;
1568 1568
1569 sdkp->thin_provisioning = 1; 1569 sdkp->thin_provisioning = 1;
1570 q->limits.discard_granularity = sdkp->hw_sector_size; 1570 q->limits.discard_granularity = sdkp->hw_sector_size;
1571 q->limits.max_discard_sectors = 0xffffffff; 1571 q->limits.max_discard_sectors = 0xffffffff;
1572 1572
1573 if (buffer[14] & 0x40) /* TPRZ */ 1573 if (buffer[14] & 0x40) /* TPRZ */
1574 q->limits.discard_zeroes_data = 1; 1574 q->limits.discard_zeroes_data = 1;
1575 1575
1576 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 1576 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1577 } 1577 }
1578 1578
1579 sdkp->capacity = lba + 1; 1579 sdkp->capacity = lba + 1;
1580 return sector_size; 1580 return sector_size;
1581 } 1581 }
1582 1582
1583 static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, 1583 static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
1584 unsigned char *buffer) 1584 unsigned char *buffer)
1585 { 1585 {
1586 unsigned char cmd[16]; 1586 unsigned char cmd[16];
1587 struct scsi_sense_hdr sshdr; 1587 struct scsi_sense_hdr sshdr;
1588 int sense_valid = 0; 1588 int sense_valid = 0;
1589 int the_result; 1589 int the_result;
1590 int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; 1590 int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET;
1591 sector_t lba; 1591 sector_t lba;
1592 unsigned sector_size; 1592 unsigned sector_size;
1593 1593
1594 do { 1594 do {
1595 cmd[0] = READ_CAPACITY; 1595 cmd[0] = READ_CAPACITY;
1596 memset(&cmd[1], 0, 9); 1596 memset(&cmd[1], 0, 9);
1597 memset(buffer, 0, 8); 1597 memset(buffer, 0, 8);
1598 1598
1599 the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, 1599 the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
1600 buffer, 8, &sshdr, 1600 buffer, 8, &sshdr,
1601 SD_TIMEOUT, SD_MAX_RETRIES, NULL); 1601 SD_TIMEOUT, SD_MAX_RETRIES, NULL);
1602 1602
1603 if (media_not_present(sdkp, &sshdr)) 1603 if (media_not_present(sdkp, &sshdr))
1604 return -ENODEV; 1604 return -ENODEV;
1605 1605
1606 if (the_result) { 1606 if (the_result) {
1607 sense_valid = scsi_sense_valid(&sshdr); 1607 sense_valid = scsi_sense_valid(&sshdr);
1608 if (sense_valid && 1608 if (sense_valid &&
1609 sshdr.sense_key == UNIT_ATTENTION && 1609 sshdr.sense_key == UNIT_ATTENTION &&
1610 sshdr.asc == 0x29 && sshdr.ascq == 0x00) 1610 sshdr.asc == 0x29 && sshdr.ascq == 0x00)
1611 /* Device reset might occur several times, 1611 /* Device reset might occur several times,
1612 * give it one more chance */ 1612 * give it one more chance */
1613 if (--reset_retries > 0) 1613 if (--reset_retries > 0)
1614 continue; 1614 continue;
1615 } 1615 }
1616 retries--; 1616 retries--;
1617 1617
1618 } while (the_result && retries); 1618 } while (the_result && retries);
1619 1619
1620 if (the_result) { 1620 if (the_result) {
1621 sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY failed\n"); 1621 sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY failed\n");
1622 read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); 1622 read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result);
1623 return -EINVAL; 1623 return -EINVAL;
1624 } 1624 }
1625 1625
1626 sector_size = get_unaligned_be32(&buffer[4]); 1626 sector_size = get_unaligned_be32(&buffer[4]);
1627 lba = get_unaligned_be32(&buffer[0]); 1627 lba = get_unaligned_be32(&buffer[0]);
1628 1628
1629 if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) { 1629 if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) {
1630 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " 1630 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
1631 "kernel compiled with support for large block " 1631 "kernel compiled with support for large block "
1632 "devices.\n"); 1632 "devices.\n");
1633 sdkp->capacity = 0; 1633 sdkp->capacity = 0;
1634 return -EOVERFLOW; 1634 return -EOVERFLOW;
1635 } 1635 }
1636 1636
1637 sdkp->capacity = lba + 1; 1637 sdkp->capacity = lba + 1;
1638 sdkp->hw_sector_size = sector_size; 1638 sdkp->hw_sector_size = sector_size;
1639 return sector_size; 1639 return sector_size;
1640 } 1640 }
1641 1641
1642 static int sd_try_rc16_first(struct scsi_device *sdp) 1642 static int sd_try_rc16_first(struct scsi_device *sdp)
1643 { 1643 {
1644 if (sdp->host->max_cmd_len < 16) 1644 if (sdp->host->max_cmd_len < 16)
1645 return 0; 1645 return 0;
1646 if (sdp->scsi_level > SCSI_SPC_2) 1646 if (sdp->scsi_level > SCSI_SPC_2)
1647 return 1; 1647 return 1;
1648 if (scsi_device_protection(sdp)) 1648 if (scsi_device_protection(sdp))
1649 return 1; 1649 return 1;
1650 return 0; 1650 return 0;
1651 } 1651 }
1652 1652
1653 /* 1653 /*
1654 * read disk capacity 1654 * read disk capacity
1655 */ 1655 */
1656 static void 1656 static void
1657 sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) 1657 sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
1658 { 1658 {
1659 int sector_size; 1659 int sector_size;
1660 struct scsi_device *sdp = sdkp->device; 1660 struct scsi_device *sdp = sdkp->device;
1661 sector_t old_capacity = sdkp->capacity; 1661 sector_t old_capacity = sdkp->capacity;
1662 1662
1663 if (sd_try_rc16_first(sdp)) { 1663 if (sd_try_rc16_first(sdp)) {
1664 sector_size = read_capacity_16(sdkp, sdp, buffer); 1664 sector_size = read_capacity_16(sdkp, sdp, buffer);
1665 if (sector_size == -EOVERFLOW) 1665 if (sector_size == -EOVERFLOW)
1666 goto got_data; 1666 goto got_data;
1667 if (sector_size == -ENODEV) 1667 if (sector_size == -ENODEV)
1668 return; 1668 return;
1669 if (sector_size < 0) 1669 if (sector_size < 0)
1670 sector_size = read_capacity_10(sdkp, sdp, buffer); 1670 sector_size = read_capacity_10(sdkp, sdp, buffer);
1671 if (sector_size < 0) 1671 if (sector_size < 0)
1672 return; 1672 return;
1673 } else { 1673 } else {
1674 sector_size = read_capacity_10(sdkp, sdp, buffer); 1674 sector_size = read_capacity_10(sdkp, sdp, buffer);
1675 if (sector_size == -EOVERFLOW) 1675 if (sector_size == -EOVERFLOW)
1676 goto got_data; 1676 goto got_data;
1677 if (sector_size < 0) 1677 if (sector_size < 0)
1678 return; 1678 return;
1679 if ((sizeof(sdkp->capacity) > 4) && 1679 if ((sizeof(sdkp->capacity) > 4) &&
1680 (sdkp->capacity > 0xffffffffULL)) { 1680 (sdkp->capacity > 0xffffffffULL)) {
1681 int old_sector_size = sector_size; 1681 int old_sector_size = sector_size;
1682 sd_printk(KERN_NOTICE, sdkp, "Very big device. " 1682 sd_printk(KERN_NOTICE, sdkp, "Very big device. "
1683 "Trying to use READ CAPACITY(16).\n"); 1683 "Trying to use READ CAPACITY(16).\n");
1684 sector_size = read_capacity_16(sdkp, sdp, buffer); 1684 sector_size = read_capacity_16(sdkp, sdp, buffer);
1685 if (sector_size < 0) { 1685 if (sector_size < 0) {
1686 sd_printk(KERN_NOTICE, sdkp, 1686 sd_printk(KERN_NOTICE, sdkp,
1687 "Using 0xffffffff as device size\n"); 1687 "Using 0xffffffff as device size\n");
1688 sdkp->capacity = 1 + (sector_t) 0xffffffff; 1688 sdkp->capacity = 1 + (sector_t) 0xffffffff;
1689 sector_size = old_sector_size; 1689 sector_size = old_sector_size;
1690 goto got_data; 1690 goto got_data;
1691 } 1691 }
1692 } 1692 }
1693 } 1693 }
1694 1694
1695 /* Some devices are known to return the total number of blocks, 1695 /* Some devices are known to return the total number of blocks,
1696 * not the highest block number. Some devices have versions 1696 * not the highest block number. Some devices have versions
1697 * which do this and others which do not. Some devices we might 1697 * which do this and others which do not. Some devices we might
1698 * suspect of doing this but we don't know for certain. 1698 * suspect of doing this but we don't know for certain.
1699 * 1699 *
1700 * If we know the reported capacity is wrong, decrement it. If 1700 * If we know the reported capacity is wrong, decrement it. If
1701 * we can only guess, then assume the number of blocks is even 1701 * we can only guess, then assume the number of blocks is even
1702 * (usually true but not always) and err on the side of lowering 1702 * (usually true but not always) and err on the side of lowering
1703 * the capacity. 1703 * the capacity.
1704 */ 1704 */
1705 if (sdp->fix_capacity || 1705 if (sdp->fix_capacity ||
1706 (sdp->guess_capacity && (sdkp->capacity & 0x01))) { 1706 (sdp->guess_capacity && (sdkp->capacity & 0x01))) {
1707 sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " 1707 sd_printk(KERN_INFO, sdkp, "Adjusting the sector count "
1708 "from its reported value: %llu\n", 1708 "from its reported value: %llu\n",
1709 (unsigned long long) sdkp->capacity); 1709 (unsigned long long) sdkp->capacity);
1710 --sdkp->capacity; 1710 --sdkp->capacity;
1711 } 1711 }
1712 1712
1713 got_data: 1713 got_data:
1714 if (sector_size == 0) { 1714 if (sector_size == 0) {
1715 sector_size = 512; 1715 sector_size = 512;
1716 sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, " 1716 sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, "
1717 "assuming 512.\n"); 1717 "assuming 512.\n");
1718 } 1718 }
1719 1719
1720 if (sector_size != 512 && 1720 if (sector_size != 512 &&
1721 sector_size != 1024 && 1721 sector_size != 1024 &&
1722 sector_size != 2048 && 1722 sector_size != 2048 &&
1723 sector_size != 4096 && 1723 sector_size != 4096 &&
1724 sector_size != 256) { 1724 sector_size != 256) {
1725 sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", 1725 sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n",
1726 sector_size); 1726 sector_size);
1727 /* 1727 /*
1728 * The user might want to re-format the drive with 1728 * The user might want to re-format the drive with
1729 * a supported sectorsize. Once this happens, it 1729 * a supported sectorsize. Once this happens, it
1730 * would be relatively trivial to set the thing up. 1730 * would be relatively trivial to set the thing up.
1731 * For this reason, we leave the thing in the table. 1731 * For this reason, we leave the thing in the table.
1732 */ 1732 */
1733 sdkp->capacity = 0; 1733 sdkp->capacity = 0;
1734 /* 1734 /*
1735 * set a bogus sector size so the normal read/write 1735 * set a bogus sector size so the normal read/write
1736 * logic in the block layer will eventually refuse any 1736 * logic in the block layer will eventually refuse any
1737 * request on this device without tripping over power 1737 * request on this device without tripping over power
1738 * of two sector size assumptions 1738 * of two sector size assumptions
1739 */ 1739 */
1740 sector_size = 512; 1740 sector_size = 512;
1741 } 1741 }
1742 blk_queue_logical_block_size(sdp->request_queue, sector_size); 1742 blk_queue_logical_block_size(sdp->request_queue, sector_size);
1743 1743
1744 { 1744 {
1745 char cap_str_2[10], cap_str_10[10]; 1745 char cap_str_2[10], cap_str_10[10];
1746 u64 sz = (u64)sdkp->capacity << ilog2(sector_size); 1746 u64 sz = (u64)sdkp->capacity << ilog2(sector_size);
1747 1747
1748 string_get_size(sz, STRING_UNITS_2, cap_str_2, 1748 string_get_size(sz, STRING_UNITS_2, cap_str_2,
1749 sizeof(cap_str_2)); 1749 sizeof(cap_str_2));
1750 string_get_size(sz, STRING_UNITS_10, cap_str_10, 1750 string_get_size(sz, STRING_UNITS_10, cap_str_10,
1751 sizeof(cap_str_10)); 1751 sizeof(cap_str_10));
1752 1752
1753 if (sdkp->first_scan || old_capacity != sdkp->capacity) { 1753 if (sdkp->first_scan || old_capacity != sdkp->capacity) {
1754 sd_printk(KERN_NOTICE, sdkp, 1754 sd_printk(KERN_NOTICE, sdkp,
1755 "%llu %d-byte logical blocks: (%s/%s)\n", 1755 "%llu %d-byte logical blocks: (%s/%s)\n",
1756 (unsigned long long)sdkp->capacity, 1756 (unsigned long long)sdkp->capacity,
1757 sector_size, cap_str_10, cap_str_2); 1757 sector_size, cap_str_10, cap_str_2);
1758 1758
1759 if (sdkp->hw_sector_size != sector_size) 1759 if (sdkp->hw_sector_size != sector_size)
1760 sd_printk(KERN_NOTICE, sdkp, 1760 sd_printk(KERN_NOTICE, sdkp,
1761 "%u-byte physical blocks\n", 1761 "%u-byte physical blocks\n",
1762 sdkp->hw_sector_size); 1762 sdkp->hw_sector_size);
1763 } 1763 }
1764 } 1764 }
1765 1765
1766 /* Rescale capacity to 512-byte units */ 1766 /* Rescale capacity to 512-byte units */
1767 if (sector_size == 4096) 1767 if (sector_size == 4096)
1768 sdkp->capacity <<= 3; 1768 sdkp->capacity <<= 3;
1769 else if (sector_size == 2048) 1769 else if (sector_size == 2048)
1770 sdkp->capacity <<= 2; 1770 sdkp->capacity <<= 2;
1771 else if (sector_size == 1024) 1771 else if (sector_size == 1024)
1772 sdkp->capacity <<= 1; 1772 sdkp->capacity <<= 1;
1773 else if (sector_size == 256) 1773 else if (sector_size == 256)
1774 sdkp->capacity >>= 1; 1774 sdkp->capacity >>= 1;
1775 1775
1776 blk_queue_physical_block_size(sdp->request_queue, sdkp->hw_sector_size); 1776 blk_queue_physical_block_size(sdp->request_queue, sdkp->hw_sector_size);
1777 sdkp->device->sector_size = sector_size; 1777 sdkp->device->sector_size = sector_size;
1778 } 1778 }
1779 1779
1780 /* called with buffer of length 512 */ 1780 /* called with buffer of length 512 */
1781 static inline int 1781 static inline int
1782 sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage, 1782 sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage,
1783 unsigned char *buffer, int len, struct scsi_mode_data *data, 1783 unsigned char *buffer, int len, struct scsi_mode_data *data,
1784 struct scsi_sense_hdr *sshdr) 1784 struct scsi_sense_hdr *sshdr)
1785 { 1785 {
1786 return scsi_mode_sense(sdp, dbd, modepage, buffer, len, 1786 return scsi_mode_sense(sdp, dbd, modepage, buffer, len,
1787 SD_TIMEOUT, SD_MAX_RETRIES, data, 1787 SD_TIMEOUT, SD_MAX_RETRIES, data,
1788 sshdr); 1788 sshdr);
1789 } 1789 }
1790 1790
1791 /* 1791 /*
1792 * read write protect setting, if possible - called only in sd_revalidate_disk() 1792 * read write protect setting, if possible - called only in sd_revalidate_disk()
1793 * called with buffer of length SD_BUF_SIZE 1793 * called with buffer of length SD_BUF_SIZE
1794 */ 1794 */
1795 static void 1795 static void
1796 sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) 1796 sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
1797 { 1797 {
1798 int res; 1798 int res;
1799 struct scsi_device *sdp = sdkp->device; 1799 struct scsi_device *sdp = sdkp->device;
1800 struct scsi_mode_data data; 1800 struct scsi_mode_data data;
1801 int old_wp = sdkp->write_prot; 1801 int old_wp = sdkp->write_prot;
1802 1802
1803 set_disk_ro(sdkp->disk, 0); 1803 set_disk_ro(sdkp->disk, 0);
1804 if (sdp->skip_ms_page_3f) { 1804 if (sdp->skip_ms_page_3f) {
1805 sd_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); 1805 sd_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n");
1806 return; 1806 return;
1807 } 1807 }
1808 1808
1809 if (sdp->use_192_bytes_for_3f) { 1809 if (sdp->use_192_bytes_for_3f) {
1810 res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 192, &data, NULL); 1810 res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 192, &data, NULL);
1811 } else { 1811 } else {
1812 /* 1812 /*
1813 * First attempt: ask for all pages (0x3F), but only 4 bytes. 1813 * First attempt: ask for all pages (0x3F), but only 4 bytes.
1814 * We have to start carefully: some devices hang if we ask 1814 * We have to start carefully: some devices hang if we ask
1815 * for more than is available. 1815 * for more than is available.
1816 */ 1816 */
1817 res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 4, &data, NULL); 1817 res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 4, &data, NULL);
1818 1818
1819 /* 1819 /*
1820 * Second attempt: ask for page 0 When only page 0 is 1820 * Second attempt: ask for page 0 When only page 0 is
1821 * implemented, a request for page 3F may return Sense Key 1821 * implemented, a request for page 3F may return Sense Key
1822 * 5: Illegal Request, Sense Code 24: Invalid field in 1822 * 5: Illegal Request, Sense Code 24: Invalid field in
1823 * CDB. 1823 * CDB.
1824 */ 1824 */
1825 if (!scsi_status_is_good(res)) 1825 if (!scsi_status_is_good(res))
1826 res = sd_do_mode_sense(sdp, 0, 0, buffer, 4, &data, NULL); 1826 res = sd_do_mode_sense(sdp, 0, 0, buffer, 4, &data, NULL);
1827 1827
1828 /* 1828 /*
1829 * Third attempt: ask 255 bytes, as we did earlier. 1829 * Third attempt: ask 255 bytes, as we did earlier.
1830 */ 1830 */
1831 if (!scsi_status_is_good(res)) 1831 if (!scsi_status_is_good(res))
1832 res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 255, 1832 res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 255,
1833 &data, NULL); 1833 &data, NULL);
1834 } 1834 }
1835 1835
1836 if (!scsi_status_is_good(res)) { 1836 if (!scsi_status_is_good(res)) {
1837 sd_printk(KERN_WARNING, sdkp, 1837 sd_printk(KERN_WARNING, sdkp,
1838 "Test WP failed, assume Write Enabled\n"); 1838 "Test WP failed, assume Write Enabled\n");
1839 } else { 1839 } else {
1840 sdkp->write_prot = ((data.device_specific & 0x80) != 0); 1840 sdkp->write_prot = ((data.device_specific & 0x80) != 0);
1841 set_disk_ro(sdkp->disk, sdkp->write_prot); 1841 set_disk_ro(sdkp->disk, sdkp->write_prot);
1842 if (sdkp->first_scan || old_wp != sdkp->write_prot) { 1842 if (sdkp->first_scan || old_wp != sdkp->write_prot) {
1843 sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", 1843 sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n",
1844 sdkp->write_prot ? "on" : "off"); 1844 sdkp->write_prot ? "on" : "off");
1845 sd_printk(KERN_DEBUG, sdkp, 1845 sd_printk(KERN_DEBUG, sdkp,
1846 "Mode Sense: %02x %02x %02x %02x\n", 1846 "Mode Sense: %02x %02x %02x %02x\n",
1847 buffer[0], buffer[1], buffer[2], buffer[3]); 1847 buffer[0], buffer[1], buffer[2], buffer[3]);
1848 } 1848 }
1849 } 1849 }
1850 } 1850 }
1851 1851
1852 /* 1852 /*
1853 * sd_read_cache_type - called only from sd_revalidate_disk() 1853 * sd_read_cache_type - called only from sd_revalidate_disk()
1854 * called with buffer of length SD_BUF_SIZE 1854 * called with buffer of length SD_BUF_SIZE
1855 */ 1855 */
1856 static void 1856 static void
1857 sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) 1857 sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
1858 { 1858 {
1859 int len = 0, res; 1859 int len = 0, res;
1860 struct scsi_device *sdp = sdkp->device; 1860 struct scsi_device *sdp = sdkp->device;
1861 1861
1862 int dbd; 1862 int dbd;
1863 int modepage; 1863 int modepage;
1864 struct scsi_mode_data data; 1864 struct scsi_mode_data data;
1865 struct scsi_sense_hdr sshdr; 1865 struct scsi_sense_hdr sshdr;
1866 int old_wce = sdkp->WCE; 1866 int old_wce = sdkp->WCE;
1867 int old_rcd = sdkp->RCD; 1867 int old_rcd = sdkp->RCD;
1868 int old_dpofua = sdkp->DPOFUA; 1868 int old_dpofua = sdkp->DPOFUA;
1869 1869
1870 if (sdp->skip_ms_page_8) 1870 if (sdp->skip_ms_page_8)
1871 goto defaults; 1871 goto defaults;
1872 1872
1873 if (sdp->type == TYPE_RBC) { 1873 if (sdp->type == TYPE_RBC) {
1874 modepage = 6; 1874 modepage = 6;
1875 dbd = 8; 1875 dbd = 8;
1876 } else { 1876 } else {
1877 modepage = 8; 1877 modepage = 8;
1878 dbd = 0; 1878 dbd = 0;
1879 } 1879 }
1880 1880
1881 /* cautiously ask */ 1881 /* cautiously ask */
1882 res = sd_do_mode_sense(sdp, dbd, modepage, buffer, 4, &data, &sshdr); 1882 res = sd_do_mode_sense(sdp, dbd, modepage, buffer, 4, &data, &sshdr);
1883 1883
1884 if (!scsi_status_is_good(res)) 1884 if (!scsi_status_is_good(res))
1885 goto bad_sense; 1885 goto bad_sense;
1886 1886
1887 if (!data.header_length) { 1887 if (!data.header_length) {
1888 modepage = 6; 1888 modepage = 6;
1889 sd_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n"); 1889 sd_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n");
1890 } 1890 }
1891 1891
1892 /* that went OK, now ask for the proper length */ 1892 /* that went OK, now ask for the proper length */
1893 len = data.length; 1893 len = data.length;
1894 1894
1895 /* 1895 /*
1896 * We're only interested in the first three bytes, actually. 1896 * We're only interested in the first three bytes, actually.
1897 * But the data cache page is defined for the first 20. 1897 * But the data cache page is defined for the first 20.
1898 */ 1898 */
1899 if (len < 3) 1899 if (len < 3)
1900 goto bad_sense; 1900 goto bad_sense;
1901 if (len > 20) 1901 if (len > 20)
1902 len = 20; 1902 len = 20;
1903 1903
1904 /* Take headers and block descriptors into account */ 1904 /* Take headers and block descriptors into account */
1905 len += data.header_length + data.block_descriptor_length; 1905 len += data.header_length + data.block_descriptor_length;
1906 if (len > SD_BUF_SIZE) 1906 if (len > SD_BUF_SIZE)
1907 goto bad_sense; 1907 goto bad_sense;
1908 1908
1909 /* Get the data */ 1909 /* Get the data */
1910 res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr); 1910 res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
1911 1911
1912 if (scsi_status_is_good(res)) { 1912 if (scsi_status_is_good(res)) {
1913 int offset = data.header_length + data.block_descriptor_length; 1913 int offset = data.header_length + data.block_descriptor_length;
1914 1914
1915 if (offset >= SD_BUF_SIZE - 2) { 1915 if (offset >= SD_BUF_SIZE - 2) {
1916 sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n"); 1916 sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n");
1917 goto defaults; 1917 goto defaults;
1918 } 1918 }
1919 1919
1920 if ((buffer[offset] & 0x3f) != modepage) { 1920 if ((buffer[offset] & 0x3f) != modepage) {
1921 sd_printk(KERN_ERR, sdkp, "Got wrong page\n"); 1921 sd_printk(KERN_ERR, sdkp, "Got wrong page\n");
1922 goto defaults; 1922 goto defaults;
1923 } 1923 }
1924 1924
1925 if (modepage == 8) { 1925 if (modepage == 8) {
1926 sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); 1926 sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
1927 sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0); 1927 sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0);
1928 } else { 1928 } else {
1929 sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0); 1929 sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0);
1930 sdkp->RCD = 0; 1930 sdkp->RCD = 0;
1931 } 1931 }
1932 1932
1933 sdkp->DPOFUA = (data.device_specific & 0x10) != 0; 1933 sdkp->DPOFUA = (data.device_specific & 0x10) != 0;
1934 if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) { 1934 if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
1935 sd_printk(KERN_NOTICE, sdkp, 1935 sd_printk(KERN_NOTICE, sdkp,
1936 "Uses READ/WRITE(6), disabling FUA\n"); 1936 "Uses READ/WRITE(6), disabling FUA\n");
1937 sdkp->DPOFUA = 0; 1937 sdkp->DPOFUA = 0;
1938 } 1938 }
1939 1939
1940 if (sdkp->first_scan || old_wce != sdkp->WCE || 1940 if (sdkp->first_scan || old_wce != sdkp->WCE ||
1941 old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA) 1941 old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA)
1942 sd_printk(KERN_NOTICE, sdkp, 1942 sd_printk(KERN_NOTICE, sdkp,
1943 "Write cache: %s, read cache: %s, %s\n", 1943 "Write cache: %s, read cache: %s, %s\n",
1944 sdkp->WCE ? "enabled" : "disabled", 1944 sdkp->WCE ? "enabled" : "disabled",
1945 sdkp->RCD ? "disabled" : "enabled", 1945 sdkp->RCD ? "disabled" : "enabled",
1946 sdkp->DPOFUA ? "supports DPO and FUA" 1946 sdkp->DPOFUA ? "supports DPO and FUA"
1947 : "doesn't support DPO or FUA"); 1947 : "doesn't support DPO or FUA");
1948 1948
1949 return; 1949 return;
1950 } 1950 }
1951 1951
1952 bad_sense: 1952 bad_sense:
1953 if (scsi_sense_valid(&sshdr) && 1953 if (scsi_sense_valid(&sshdr) &&
1954 sshdr.sense_key == ILLEGAL_REQUEST && 1954 sshdr.sense_key == ILLEGAL_REQUEST &&
1955 sshdr.asc == 0x24 && sshdr.ascq == 0x0) 1955 sshdr.asc == 0x24 && sshdr.ascq == 0x0)
1956 /* Invalid field in CDB */ 1956 /* Invalid field in CDB */
1957 sd_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n"); 1957 sd_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n");
1958 else 1958 else
1959 sd_printk(KERN_ERR, sdkp, "Asking for cache data failed\n"); 1959 sd_printk(KERN_ERR, sdkp, "Asking for cache data failed\n");
1960 1960
1961 defaults: 1961 defaults:
1962 sd_printk(KERN_ERR, sdkp, "Assuming drive cache: write through\n"); 1962 sd_printk(KERN_ERR, sdkp, "Assuming drive cache: write through\n");
1963 sdkp->WCE = 0; 1963 sdkp->WCE = 0;
1964 sdkp->RCD = 0; 1964 sdkp->RCD = 0;
1965 sdkp->DPOFUA = 0; 1965 sdkp->DPOFUA = 0;
1966 } 1966 }
1967 1967
1968 /* 1968 /*
1969 * The ATO bit indicates whether the DIF application tag is available 1969 * The ATO bit indicates whether the DIF application tag is available
1970 * for use by the operating system. 1970 * for use by the operating system.
1971 */ 1971 */
1972 static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) 1972 static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
1973 { 1973 {
1974 int res, offset; 1974 int res, offset;
1975 struct scsi_device *sdp = sdkp->device; 1975 struct scsi_device *sdp = sdkp->device;
1976 struct scsi_mode_data data; 1976 struct scsi_mode_data data;
1977 struct scsi_sense_hdr sshdr; 1977 struct scsi_sense_hdr sshdr;
1978 1978
1979 if (sdp->type != TYPE_DISK) 1979 if (sdp->type != TYPE_DISK)
1980 return; 1980 return;
1981 1981
1982 if (sdkp->protection_type == 0) 1982 if (sdkp->protection_type == 0)
1983 return; 1983 return;
1984 1984
1985 res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT, 1985 res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT,
1986 SD_MAX_RETRIES, &data, &sshdr); 1986 SD_MAX_RETRIES, &data, &sshdr);
1987 1987
1988 if (!scsi_status_is_good(res) || !data.header_length || 1988 if (!scsi_status_is_good(res) || !data.header_length ||
1989 data.length < 6) { 1989 data.length < 6) {
1990 sd_printk(KERN_WARNING, sdkp, 1990 sd_printk(KERN_WARNING, sdkp,
1991 "getting Control mode page failed, assume no ATO\n"); 1991 "getting Control mode page failed, assume no ATO\n");
1992 1992
1993 if (scsi_sense_valid(&sshdr)) 1993 if (scsi_sense_valid(&sshdr))
1994 sd_print_sense_hdr(sdkp, &sshdr); 1994 sd_print_sense_hdr(sdkp, &sshdr);
1995 1995
1996 return; 1996 return;
1997 } 1997 }
1998 1998
1999 offset = data.header_length + data.block_descriptor_length; 1999 offset = data.header_length + data.block_descriptor_length;
2000 2000
2001 if ((buffer[offset] & 0x3f) != 0x0a) { 2001 if ((buffer[offset] & 0x3f) != 0x0a) {
2002 sd_printk(KERN_ERR, sdkp, "ATO Got wrong page\n"); 2002 sd_printk(KERN_ERR, sdkp, "ATO Got wrong page\n");
2003 return; 2003 return;
2004 } 2004 }
2005 2005
2006 if ((buffer[offset + 5] & 0x80) == 0) 2006 if ((buffer[offset + 5] & 0x80) == 0)
2007 return; 2007 return;
2008 2008
2009 sdkp->ATO = 1; 2009 sdkp->ATO = 1;
2010 2010
2011 return; 2011 return;
2012 } 2012 }
2013 2013
2014 /** 2014 /**
2015 * sd_read_block_limits - Query disk device for preferred I/O sizes. 2015 * sd_read_block_limits - Query disk device for preferred I/O sizes.
2016 * @disk: disk to query 2016 * @disk: disk to query
2017 */ 2017 */
2018 static void sd_read_block_limits(struct scsi_disk *sdkp) 2018 static void sd_read_block_limits(struct scsi_disk *sdkp)
2019 { 2019 {
2020 struct request_queue *q = sdkp->disk->queue; 2020 struct request_queue *q = sdkp->disk->queue;
2021 unsigned int sector_sz = sdkp->device->sector_size; 2021 unsigned int sector_sz = sdkp->device->sector_size;
2022 const int vpd_len = 64; 2022 const int vpd_len = 64;
2023 unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL); 2023 unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL);
2024 2024
2025 if (!buffer || 2025 if (!buffer ||
2026 /* Block Limits VPD */ 2026 /* Block Limits VPD */
2027 scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len)) 2027 scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len))
2028 goto out; 2028 goto out;
2029 2029
2030 blk_queue_io_min(sdkp->disk->queue, 2030 blk_queue_io_min(sdkp->disk->queue,
2031 get_unaligned_be16(&buffer[6]) * sector_sz); 2031 get_unaligned_be16(&buffer[6]) * sector_sz);
2032 blk_queue_io_opt(sdkp->disk->queue, 2032 blk_queue_io_opt(sdkp->disk->queue,
2033 get_unaligned_be32(&buffer[12]) * sector_sz); 2033 get_unaligned_be32(&buffer[12]) * sector_sz);
2034 2034
2035 /* Thin provisioning enabled and page length indicates TP support */ 2035 /* Thin provisioning enabled and page length indicates TP support */
2036 if (sdkp->thin_provisioning && buffer[3] == 0x3c) { 2036 if (sdkp->thin_provisioning && buffer[3] == 0x3c) {
2037 unsigned int lba_count, desc_count, granularity; 2037 unsigned int lba_count, desc_count, granularity;
2038 2038
2039 lba_count = get_unaligned_be32(&buffer[20]); 2039 lba_count = get_unaligned_be32(&buffer[20]);
2040 desc_count = get_unaligned_be32(&buffer[24]); 2040 desc_count = get_unaligned_be32(&buffer[24]);
2041 2041
2042 if (lba_count) { 2042 if (lba_count) {
2043 q->limits.max_discard_sectors = 2043 q->limits.max_discard_sectors =
2044 lba_count * sector_sz >> 9; 2044 lba_count * sector_sz >> 9;
2045 2045
2046 if (desc_count) 2046 if (desc_count)
2047 sdkp->unmap = 1; 2047 sdkp->unmap = 1;
2048 } 2048 }
2049 2049
2050 granularity = get_unaligned_be32(&buffer[28]); 2050 granularity = get_unaligned_be32(&buffer[28]);
2051 2051
2052 if (granularity) 2052 if (granularity)
2053 q->limits.discard_granularity = granularity * sector_sz; 2053 q->limits.discard_granularity = granularity * sector_sz;
2054 2054
2055 if (buffer[32] & 0x80) 2055 if (buffer[32] & 0x80)
2056 q->limits.discard_alignment = 2056 q->limits.discard_alignment =
2057 get_unaligned_be32(&buffer[32]) & ~(1 << 31); 2057 get_unaligned_be32(&buffer[32]) & ~(1 << 31);
2058 } 2058 }
2059 2059
2060 out: 2060 out:
2061 kfree(buffer); 2061 kfree(buffer);
2062 } 2062 }
2063 2063
2064 /** 2064 /**
2065 * sd_read_block_characteristics - Query block dev. characteristics 2065 * sd_read_block_characteristics - Query block dev. characteristics
2066 * @disk: disk to query 2066 * @disk: disk to query
2067 */ 2067 */
2068 static void sd_read_block_characteristics(struct scsi_disk *sdkp) 2068 static void sd_read_block_characteristics(struct scsi_disk *sdkp)
2069 { 2069 {
2070 unsigned char *buffer; 2070 unsigned char *buffer;
2071 u16 rot; 2071 u16 rot;
2072 const int vpd_len = 64; 2072 const int vpd_len = 64;
2073 2073
2074 buffer = kmalloc(vpd_len, GFP_KERNEL); 2074 buffer = kmalloc(vpd_len, GFP_KERNEL);
2075 2075
2076 if (!buffer || 2076 if (!buffer ||
2077 /* Block Device Characteristics VPD */ 2077 /* Block Device Characteristics VPD */
2078 scsi_get_vpd_page(sdkp->device, 0xb1, buffer, vpd_len)) 2078 scsi_get_vpd_page(sdkp->device, 0xb1, buffer, vpd_len))
2079 goto out; 2079 goto out;
2080 2080
2081 rot = get_unaligned_be16(&buffer[4]); 2081 rot = get_unaligned_be16(&buffer[4]);
2082 2082
2083 if (rot == 1) 2083 if (rot == 1)
2084 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, sdkp->disk->queue); 2084 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, sdkp->disk->queue);
2085 2085
2086 out: 2086 out:
2087 kfree(buffer); 2087 kfree(buffer);
2088 } 2088 }
2089 2089
2090 static int sd_try_extended_inquiry(struct scsi_device *sdp) 2090 static int sd_try_extended_inquiry(struct scsi_device *sdp)
2091 { 2091 {
2092 /* 2092 /*
2093 * Although VPD inquiries can go to SCSI-2 type devices, 2093 * Although VPD inquiries can go to SCSI-2 type devices,
2094 * some USB ones crash on receiving them, and the pages 2094 * some USB ones crash on receiving them, and the pages
2095 * we currently ask for are for SPC-3 and beyond 2095 * we currently ask for are for SPC-3 and beyond
2096 */ 2096 */
2097 if (sdp->scsi_level > SCSI_SPC_2) 2097 if (sdp->scsi_level > SCSI_SPC_2)
2098 return 1; 2098 return 1;
2099 return 0; 2099 return 0;
2100 } 2100 }
2101 2101
2102 /** 2102 /**
2103 * sd_revalidate_disk - called the first time a new disk is seen, 2103 * sd_revalidate_disk - called the first time a new disk is seen,
2104 * performs disk spin up, read_capacity, etc. 2104 * performs disk spin up, read_capacity, etc.
2105 * @disk: struct gendisk we care about 2105 * @disk: struct gendisk we care about
2106 **/ 2106 **/
2107 static int sd_revalidate_disk(struct gendisk *disk) 2107 static int sd_revalidate_disk(struct gendisk *disk)
2108 { 2108 {
2109 struct scsi_disk *sdkp = scsi_disk(disk); 2109 struct scsi_disk *sdkp = scsi_disk(disk);
2110 struct scsi_device *sdp = sdkp->device; 2110 struct scsi_device *sdp = sdkp->device;
2111 unsigned char *buffer; 2111 unsigned char *buffer;
2112 unsigned ordered; 2112 unsigned flush = 0;
2113 2113
2114 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, 2114 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
2115 "sd_revalidate_disk\n")); 2115 "sd_revalidate_disk\n"));
2116 2116
2117 /* 2117 /*
2118 * If the device is offline, don't try and read capacity or any 2118 * If the device is offline, don't try and read capacity or any
2119 * of the other niceties. 2119 * of the other niceties.
2120 */ 2120 */
2121 if (!scsi_device_online(sdp)) 2121 if (!scsi_device_online(sdp))
2122 goto out; 2122 goto out;
2123 2123
2124 buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); 2124 buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL);
2125 if (!buffer) { 2125 if (!buffer) {
2126 sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " 2126 sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory "
2127 "allocation failure.\n"); 2127 "allocation failure.\n");
2128 goto out; 2128 goto out;
2129 } 2129 }
2130 2130
2131 sd_spinup_disk(sdkp); 2131 sd_spinup_disk(sdkp);
2132 2132
2133 /* 2133 /*
2134 * Without media there is no reason to ask; moreover, some devices 2134 * Without media there is no reason to ask; moreover, some devices
2135 * react badly if we do. 2135 * react badly if we do.
2136 */ 2136 */
2137 if (sdkp->media_present) { 2137 if (sdkp->media_present) {
2138 sd_read_capacity(sdkp, buffer); 2138 sd_read_capacity(sdkp, buffer);
2139 2139
2140 if (sd_try_extended_inquiry(sdp)) { 2140 if (sd_try_extended_inquiry(sdp)) {
2141 sd_read_block_limits(sdkp); 2141 sd_read_block_limits(sdkp);
2142 sd_read_block_characteristics(sdkp); 2142 sd_read_block_characteristics(sdkp);
2143 } 2143 }
2144 2144
2145 sd_read_write_protect_flag(sdkp, buffer); 2145 sd_read_write_protect_flag(sdkp, buffer);
2146 sd_read_cache_type(sdkp, buffer); 2146 sd_read_cache_type(sdkp, buffer);
2147 sd_read_app_tag_own(sdkp, buffer); 2147 sd_read_app_tag_own(sdkp, buffer);
2148 } 2148 }
2149 2149
2150 sdkp->first_scan = 0; 2150 sdkp->first_scan = 0;
2151 2151
2152 /* 2152 /*
2153 * We now have all cache related info, determine how we deal 2153 * We now have all cache related info, determine how we deal
2154 * with ordered requests. 2154 * with flush requests.
2155 */ 2155 */
2156 if (sdkp->WCE) 2156 if (sdkp->WCE) {
2157 ordered = sdkp->DPOFUA 2157 flush |= REQ_FLUSH;
2158 ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; 2158 if (sdkp->DPOFUA)
2159 else 2159 flush |= REQ_FUA;
2160 ordered = QUEUE_ORDERED_DRAIN; 2160 }
2161 2161
2162 blk_queue_ordered(sdkp->disk->queue, ordered); 2162 blk_queue_flush(sdkp->disk->queue, flush);
2163 2163
2164 set_capacity(disk, sdkp->capacity); 2164 set_capacity(disk, sdkp->capacity);
2165 kfree(buffer); 2165 kfree(buffer);
2166 2166
2167 out: 2167 out:
2168 return 0; 2168 return 0;
2169 } 2169 }
2170 2170
2171 /** 2171 /**
2172 * sd_unlock_native_capacity - unlock native capacity 2172 * sd_unlock_native_capacity - unlock native capacity
2173 * @disk: struct gendisk to set capacity for 2173 * @disk: struct gendisk to set capacity for
2174 * 2174 *
2175 * Block layer calls this function if it detects that partitions 2175 * Block layer calls this function if it detects that partitions
2176 * on @disk reach beyond the end of the device. If the SCSI host 2176 * on @disk reach beyond the end of the device. If the SCSI host
2177 * implements ->unlock_native_capacity() method, it's invoked to 2177 * implements ->unlock_native_capacity() method, it's invoked to
2178 * give it a chance to adjust the device capacity. 2178 * give it a chance to adjust the device capacity.
2179 * 2179 *
2180 * CONTEXT: 2180 * CONTEXT:
2181 * Defined by block layer. Might sleep. 2181 * Defined by block layer. Might sleep.
2182 */ 2182 */
2183 static void sd_unlock_native_capacity(struct gendisk *disk) 2183 static void sd_unlock_native_capacity(struct gendisk *disk)
2184 { 2184 {
2185 struct scsi_device *sdev = scsi_disk(disk)->device; 2185 struct scsi_device *sdev = scsi_disk(disk)->device;
2186 2186
2187 if (sdev->host->hostt->unlock_native_capacity) 2187 if (sdev->host->hostt->unlock_native_capacity)
2188 sdev->host->hostt->unlock_native_capacity(sdev); 2188 sdev->host->hostt->unlock_native_capacity(sdev);
2189 } 2189 }
2190 2190
2191 /** 2191 /**
2192 * sd_format_disk_name - format disk name 2192 * sd_format_disk_name - format disk name
2193 * @prefix: name prefix - ie. "sd" for SCSI disks 2193 * @prefix: name prefix - ie. "sd" for SCSI disks
2194 * @index: index of the disk to format name for 2194 * @index: index of the disk to format name for
2195 * @buf: output buffer 2195 * @buf: output buffer
2196 * @buflen: length of the output buffer 2196 * @buflen: length of the output buffer
2197 * 2197 *
2198 * SCSI disk names starts at sda. The 26th device is sdz and the 2198 * SCSI disk names starts at sda. The 26th device is sdz and the
2199 * 27th is sdaa. The last one for two lettered suffix is sdzz 2199 * 27th is sdaa. The last one for two lettered suffix is sdzz
2200 * which is followed by sdaaa. 2200 * which is followed by sdaaa.
2201 * 2201 *
2202 * This is basically 26 base counting with one extra 'nil' entry 2202 * This is basically 26 base counting with one extra 'nil' entry
2203 * at the beginning from the second digit on and can be 2203 * at the beginning from the second digit on and can be
2204 * determined using similar method as 26 base conversion with the 2204 * determined using similar method as 26 base conversion with the
2205 * index shifted -1 after each digit is computed. 2205 * index shifted -1 after each digit is computed.
2206 * 2206 *
2207 * CONTEXT: 2207 * CONTEXT:
2208 * Don't care. 2208 * Don't care.
2209 * 2209 *
2210 * RETURNS: 2210 * RETURNS:
2211 * 0 on success, -errno on failure. 2211 * 0 on success, -errno on failure.
2212 */ 2212 */
2213 static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) 2213 static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
2214 { 2214 {
2215 const int base = 'z' - 'a' + 1; 2215 const int base = 'z' - 'a' + 1;
2216 char *begin = buf + strlen(prefix); 2216 char *begin = buf + strlen(prefix);
2217 char *end = buf + buflen; 2217 char *end = buf + buflen;
2218 char *p; 2218 char *p;
2219 int unit; 2219 int unit;
2220 2220
2221 p = end - 1; 2221 p = end - 1;
2222 *p = '\0'; 2222 *p = '\0';
2223 unit = base; 2223 unit = base;
2224 do { 2224 do {
2225 if (p == begin) 2225 if (p == begin)
2226 return -EINVAL; 2226 return -EINVAL;
2227 *--p = 'a' + (index % unit); 2227 *--p = 'a' + (index % unit);
2228 index = (index / unit) - 1; 2228 index = (index / unit) - 1;
2229 } while (index >= 0); 2229 } while (index >= 0);
2230 2230
2231 memmove(begin, p, end - p); 2231 memmove(begin, p, end - p);
2232 memcpy(buf, prefix, strlen(prefix)); 2232 memcpy(buf, prefix, strlen(prefix));
2233 2233
2234 return 0; 2234 return 0;
2235 } 2235 }
2236 2236
2237 /* 2237 /*
2238 * The asynchronous part of sd_probe 2238 * The asynchronous part of sd_probe
2239 */ 2239 */
2240 static void sd_probe_async(void *data, async_cookie_t cookie) 2240 static void sd_probe_async(void *data, async_cookie_t cookie)
2241 { 2241 {
2242 struct scsi_disk *sdkp = data; 2242 struct scsi_disk *sdkp = data;
2243 struct scsi_device *sdp; 2243 struct scsi_device *sdp;
2244 struct gendisk *gd; 2244 struct gendisk *gd;
2245 u32 index; 2245 u32 index;
2246 struct device *dev; 2246 struct device *dev;
2247 2247
2248 sdp = sdkp->device; 2248 sdp = sdkp->device;
2249 gd = sdkp->disk; 2249 gd = sdkp->disk;
2250 index = sdkp->index; 2250 index = sdkp->index;
2251 dev = &sdp->sdev_gendev; 2251 dev = &sdp->sdev_gendev;
2252 2252
2253 if (index < SD_MAX_DISKS) { 2253 if (index < SD_MAX_DISKS) {
2254 gd->major = sd_major((index & 0xf0) >> 4); 2254 gd->major = sd_major((index & 0xf0) >> 4);
2255 gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); 2255 gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
2256 gd->minors = SD_MINORS; 2256 gd->minors = SD_MINORS;
2257 } 2257 }
2258 gd->fops = &sd_fops; 2258 gd->fops = &sd_fops;
2259 gd->private_data = &sdkp->driver; 2259 gd->private_data = &sdkp->driver;
2260 gd->queue = sdkp->device->request_queue; 2260 gd->queue = sdkp->device->request_queue;
2261 2261
2262 /* defaults, until the device tells us otherwise */ 2262 /* defaults, until the device tells us otherwise */
2263 sdp->sector_size = 512; 2263 sdp->sector_size = 512;
2264 sdkp->capacity = 0; 2264 sdkp->capacity = 0;
2265 sdkp->media_present = 1; 2265 sdkp->media_present = 1;
2266 sdkp->write_prot = 0; 2266 sdkp->write_prot = 0;
2267 sdkp->WCE = 0; 2267 sdkp->WCE = 0;
2268 sdkp->RCD = 0; 2268 sdkp->RCD = 0;
2269 sdkp->ATO = 0; 2269 sdkp->ATO = 0;
2270 sdkp->first_scan = 1; 2270 sdkp->first_scan = 1;
2271 2271
2272 sd_revalidate_disk(gd); 2272 sd_revalidate_disk(gd);
2273 2273
2274 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); 2274 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
2275 blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn); 2275 blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
2276 2276
2277 gd->driverfs_dev = &sdp->sdev_gendev; 2277 gd->driverfs_dev = &sdp->sdev_gendev;
2278 gd->flags = GENHD_FL_EXT_DEVT; 2278 gd->flags = GENHD_FL_EXT_DEVT;
2279 if (sdp->removable) 2279 if (sdp->removable)
2280 gd->flags |= GENHD_FL_REMOVABLE; 2280 gd->flags |= GENHD_FL_REMOVABLE;
2281 2281
2282 add_disk(gd); 2282 add_disk(gd);
2283 sd_dif_config_host(sdkp); 2283 sd_dif_config_host(sdkp);
2284 2284
2285 sd_revalidate_disk(gd); 2285 sd_revalidate_disk(gd);
2286 2286
2287 sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", 2287 sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
2288 sdp->removable ? "removable " : ""); 2288 sdp->removable ? "removable " : "");
2289 scsi_autopm_put_device(sdp); 2289 scsi_autopm_put_device(sdp);
2290 put_device(&sdkp->dev); 2290 put_device(&sdkp->dev);
2291 } 2291 }
2292 2292
2293 /** 2293 /**
2294 * sd_probe - called during driver initialization and whenever a 2294 * sd_probe - called during driver initialization and whenever a
2295 * new scsi device is attached to the system. It is called once 2295 * new scsi device is attached to the system. It is called once
2296 * for each scsi device (not just disks) present. 2296 * for each scsi device (not just disks) present.
2297 * @dev: pointer to device object 2297 * @dev: pointer to device object
2298 * 2298 *
2299 * Returns 0 if successful (or not interested in this scsi device 2299 * Returns 0 if successful (or not interested in this scsi device
2300 * (e.g. scanner)); 1 when there is an error. 2300 * (e.g. scanner)); 1 when there is an error.
2301 * 2301 *
2302 * Note: this function is invoked from the scsi mid-level. 2302 * Note: this function is invoked from the scsi mid-level.
2303 * This function sets up the mapping between a given 2303 * This function sets up the mapping between a given
2304 * <host,channel,id,lun> (found in sdp) and new device name 2304 * <host,channel,id,lun> (found in sdp) and new device name
2305 * (e.g. /dev/sda). More precisely it is the block device major 2305 * (e.g. /dev/sda). More precisely it is the block device major
2306 * and minor number that is chosen here. 2306 * and minor number that is chosen here.
2307 * 2307 *
2308 * Assume sd_attach is not re-entrant (for time being) 2308 * Assume sd_attach is not re-entrant (for time being)
2309 * Also think about sd_attach() and sd_remove() running coincidentally. 2309 * Also think about sd_attach() and sd_remove() running coincidentally.
2310 **/ 2310 **/
2311 static int sd_probe(struct device *dev) 2311 static int sd_probe(struct device *dev)
2312 { 2312 {
2313 struct scsi_device *sdp = to_scsi_device(dev); 2313 struct scsi_device *sdp = to_scsi_device(dev);
2314 struct scsi_disk *sdkp; 2314 struct scsi_disk *sdkp;
2315 struct gendisk *gd; 2315 struct gendisk *gd;
2316 int index; 2316 int index;
2317 int error; 2317 int error;
2318 2318
2319 error = -ENODEV; 2319 error = -ENODEV;
2320 if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC) 2320 if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
2321 goto out; 2321 goto out;
2322 2322
2323 SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, 2323 SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
2324 "sd_attach\n")); 2324 "sd_attach\n"));
2325 2325
2326 error = -ENOMEM; 2326 error = -ENOMEM;
2327 sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL); 2327 sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
2328 if (!sdkp) 2328 if (!sdkp)
2329 goto out; 2329 goto out;
2330 2330
2331 gd = alloc_disk(SD_MINORS); 2331 gd = alloc_disk(SD_MINORS);
2332 if (!gd) 2332 if (!gd)
2333 goto out_free; 2333 goto out_free;
2334 2334
2335 do { 2335 do {
2336 if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) 2336 if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
2337 goto out_put; 2337 goto out_put;
2338 2338
2339 spin_lock(&sd_index_lock); 2339 spin_lock(&sd_index_lock);
2340 error = ida_get_new(&sd_index_ida, &index); 2340 error = ida_get_new(&sd_index_ida, &index);
2341 spin_unlock(&sd_index_lock); 2341 spin_unlock(&sd_index_lock);
2342 } while (error == -EAGAIN); 2342 } while (error == -EAGAIN);
2343 2343
2344 if (error) 2344 if (error)
2345 goto out_put; 2345 goto out_put;
2346 2346
2347 error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); 2347 error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
2348 if (error) 2348 if (error)
2349 goto out_free_index; 2349 goto out_free_index;
2350 2350
2351 sdkp->device = sdp; 2351 sdkp->device = sdp;
2352 sdkp->driver = &sd_template; 2352 sdkp->driver = &sd_template;
2353 sdkp->disk = gd; 2353 sdkp->disk = gd;
2354 sdkp->index = index; 2354 sdkp->index = index;
2355 atomic_set(&sdkp->openers, 0); 2355 atomic_set(&sdkp->openers, 0);
2356 sdkp->previous_state = 1; 2356 sdkp->previous_state = 1;
2357 2357
2358 if (!sdp->request_queue->rq_timeout) { 2358 if (!sdp->request_queue->rq_timeout) {
2359 if (sdp->type != TYPE_MOD) 2359 if (sdp->type != TYPE_MOD)
2360 blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); 2360 blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
2361 else 2361 else
2362 blk_queue_rq_timeout(sdp->request_queue, 2362 blk_queue_rq_timeout(sdp->request_queue,
2363 SD_MOD_TIMEOUT); 2363 SD_MOD_TIMEOUT);
2364 } 2364 }
2365 2365
2366 device_initialize(&sdkp->dev); 2366 device_initialize(&sdkp->dev);
2367 sdkp->dev.parent = dev; 2367 sdkp->dev.parent = dev;
2368 sdkp->dev.class = &sd_disk_class; 2368 sdkp->dev.class = &sd_disk_class;
2369 dev_set_name(&sdkp->dev, dev_name(dev)); 2369 dev_set_name(&sdkp->dev, dev_name(dev));
2370 2370
2371 if (device_add(&sdkp->dev)) 2371 if (device_add(&sdkp->dev))
2372 goto out_free_index; 2372 goto out_free_index;
2373 2373
2374 get_device(dev); 2374 get_device(dev);
2375 dev_set_drvdata(dev, sdkp); 2375 dev_set_drvdata(dev, sdkp);
2376 2376
2377 get_device(&sdkp->dev); /* prevent release before async_schedule */ 2377 get_device(&sdkp->dev); /* prevent release before async_schedule */
2378 async_schedule(sd_probe_async, sdkp); 2378 async_schedule(sd_probe_async, sdkp);
2379 2379
2380 return 0; 2380 return 0;
2381 2381
2382 out_free_index: 2382 out_free_index:
2383 spin_lock(&sd_index_lock); 2383 spin_lock(&sd_index_lock);
2384 ida_remove(&sd_index_ida, index); 2384 ida_remove(&sd_index_ida, index);
2385 spin_unlock(&sd_index_lock); 2385 spin_unlock(&sd_index_lock);
2386 out_put: 2386 out_put:
2387 put_disk(gd); 2387 put_disk(gd);
2388 out_free: 2388 out_free:
2389 kfree(sdkp); 2389 kfree(sdkp);
2390 out: 2390 out:
2391 return error; 2391 return error;
2392 } 2392 }
2393 2393
2394 /** 2394 /**
2395 * sd_remove - called whenever a scsi disk (previously recognized by 2395 * sd_remove - called whenever a scsi disk (previously recognized by
2396 * sd_probe) is detached from the system. It is called (potentially 2396 * sd_probe) is detached from the system. It is called (potentially
2397 * multiple times) during sd module unload. 2397 * multiple times) during sd module unload.
2398 * @sdp: pointer to mid level scsi device object 2398 * @sdp: pointer to mid level scsi device object
2399 * 2399 *
2400 * Note: this function is invoked from the scsi mid-level. 2400 * Note: this function is invoked from the scsi mid-level.
2401 * This function potentially frees up a device name (e.g. /dev/sdc) 2401 * This function potentially frees up a device name (e.g. /dev/sdc)
2402 * that could be re-used by a subsequent sd_probe(). 2402 * that could be re-used by a subsequent sd_probe().
2403 * This function is not called when the built-in sd driver is "exit-ed". 2403 * This function is not called when the built-in sd driver is "exit-ed".
2404 **/ 2404 **/
2405 static int sd_remove(struct device *dev) 2405 static int sd_remove(struct device *dev)
2406 { 2406 {
2407 struct scsi_disk *sdkp; 2407 struct scsi_disk *sdkp;
2408 2408
2409 sdkp = dev_get_drvdata(dev); 2409 sdkp = dev_get_drvdata(dev);
2410 scsi_autopm_get_device(sdkp->device); 2410 scsi_autopm_get_device(sdkp->device);
2411 2411
2412 async_synchronize_full(); 2412 async_synchronize_full();
2413 blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn); 2413 blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
2414 blk_queue_unprep_rq(sdkp->device->request_queue, NULL); 2414 blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
2415 device_del(&sdkp->dev); 2415 device_del(&sdkp->dev);
2416 del_gendisk(sdkp->disk); 2416 del_gendisk(sdkp->disk);
2417 sd_shutdown(dev); 2417 sd_shutdown(dev);
2418 2418
2419 mutex_lock(&sd_ref_mutex); 2419 mutex_lock(&sd_ref_mutex);
2420 dev_set_drvdata(dev, NULL); 2420 dev_set_drvdata(dev, NULL);
2421 put_device(&sdkp->dev); 2421 put_device(&sdkp->dev);
2422 mutex_unlock(&sd_ref_mutex); 2422 mutex_unlock(&sd_ref_mutex);
2423 2423
2424 return 0; 2424 return 0;
2425 } 2425 }
2426 2426
2427 /** 2427 /**
2428 * scsi_disk_release - Called to free the scsi_disk structure 2428 * scsi_disk_release - Called to free the scsi_disk structure
2429 * @dev: pointer to embedded class device 2429 * @dev: pointer to embedded class device
2430 * 2430 *
2431 * sd_ref_mutex must be held entering this routine. Because it is 2431 * sd_ref_mutex must be held entering this routine. Because it is
2432 * called on last put, you should always use the scsi_disk_get() 2432 * called on last put, you should always use the scsi_disk_get()
2433 * scsi_disk_put() helpers which manipulate the semaphore directly 2433 * scsi_disk_put() helpers which manipulate the semaphore directly
2434 * and never do a direct put_device. 2434 * and never do a direct put_device.
2435 **/ 2435 **/
2436 static void scsi_disk_release(struct device *dev) 2436 static void scsi_disk_release(struct device *dev)
2437 { 2437 {
2438 struct scsi_disk *sdkp = to_scsi_disk(dev); 2438 struct scsi_disk *sdkp = to_scsi_disk(dev);
2439 struct gendisk *disk = sdkp->disk; 2439 struct gendisk *disk = sdkp->disk;
2440 2440
2441 spin_lock(&sd_index_lock); 2441 spin_lock(&sd_index_lock);
2442 ida_remove(&sd_index_ida, sdkp->index); 2442 ida_remove(&sd_index_ida, sdkp->index);
2443 spin_unlock(&sd_index_lock); 2443 spin_unlock(&sd_index_lock);
2444 2444
2445 disk->private_data = NULL; 2445 disk->private_data = NULL;
2446 put_disk(disk); 2446 put_disk(disk);
2447 put_device(&sdkp->device->sdev_gendev); 2447 put_device(&sdkp->device->sdev_gendev);
2448 2448
2449 kfree(sdkp); 2449 kfree(sdkp);
2450 } 2450 }
2451 2451
2452 static int sd_start_stop_device(struct scsi_disk *sdkp, int start) 2452 static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
2453 { 2453 {
2454 unsigned char cmd[6] = { START_STOP }; /* START_VALID */ 2454 unsigned char cmd[6] = { START_STOP }; /* START_VALID */
2455 struct scsi_sense_hdr sshdr; 2455 struct scsi_sense_hdr sshdr;
2456 struct scsi_device *sdp = sdkp->device; 2456 struct scsi_device *sdp = sdkp->device;
2457 int res; 2457 int res;
2458 2458
2459 if (start) 2459 if (start)
2460 cmd[4] |= 1; /* START */ 2460 cmd[4] |= 1; /* START */
2461 2461
2462 if (sdp->start_stop_pwr_cond) 2462 if (sdp->start_stop_pwr_cond)
2463 cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */ 2463 cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */
2464 2464
2465 if (!scsi_device_online(sdp)) 2465 if (!scsi_device_online(sdp))
2466 return -ENODEV; 2466 return -ENODEV;
2467 2467
2468 res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, 2468 res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
2469 SD_TIMEOUT, SD_MAX_RETRIES, NULL); 2469 SD_TIMEOUT, SD_MAX_RETRIES, NULL);
2470 if (res) { 2470 if (res) {
2471 sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n"); 2471 sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n");
2472 sd_print_result(sdkp, res); 2472 sd_print_result(sdkp, res);
2473 if (driver_byte(res) & DRIVER_SENSE) 2473 if (driver_byte(res) & DRIVER_SENSE)
2474 sd_print_sense_hdr(sdkp, &sshdr); 2474 sd_print_sense_hdr(sdkp, &sshdr);
2475 } 2475 }
2476 2476
2477 return res; 2477 return res;
2478 } 2478 }
2479 2479
2480 /* 2480 /*
2481 * Send a SYNCHRONIZE CACHE instruction down to the device through 2481 * Send a SYNCHRONIZE CACHE instruction down to the device through
2482 * the normal SCSI command structure. Wait for the command to 2482 * the normal SCSI command structure. Wait for the command to
2483 * complete. 2483 * complete.
2484 */ 2484 */
2485 static void sd_shutdown(struct device *dev) 2485 static void sd_shutdown(struct device *dev)
2486 { 2486 {
2487 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); 2487 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
2488 2488
2489 if (!sdkp) 2489 if (!sdkp)
2490 return; /* this can happen */ 2490 return; /* this can happen */
2491 2491
2492 if (sdkp->WCE) { 2492 if (sdkp->WCE) {
2493 sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); 2493 sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
2494 sd_sync_cache(sdkp); 2494 sd_sync_cache(sdkp);
2495 } 2495 }
2496 2496
2497 if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { 2497 if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
2498 sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); 2498 sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
2499 sd_start_stop_device(sdkp, 0); 2499 sd_start_stop_device(sdkp, 0);
2500 } 2500 }
2501 2501
2502 scsi_disk_put(sdkp); 2502 scsi_disk_put(sdkp);
2503 } 2503 }
2504 2504
2505 static int sd_suspend(struct device *dev, pm_message_t mesg) 2505 static int sd_suspend(struct device *dev, pm_message_t mesg)
2506 { 2506 {
2507 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); 2507 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
2508 int ret = 0; 2508 int ret = 0;
2509 2509
2510 if (!sdkp) 2510 if (!sdkp)
2511 return 0; /* this can happen */ 2511 return 0; /* this can happen */
2512 2512
2513 if (sdkp->WCE) { 2513 if (sdkp->WCE) {
2514 sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); 2514 sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
2515 ret = sd_sync_cache(sdkp); 2515 ret = sd_sync_cache(sdkp);
2516 if (ret) 2516 if (ret)
2517 goto done; 2517 goto done;
2518 } 2518 }
2519 2519
2520 if ((mesg.event & PM_EVENT_SLEEP) && sdkp->device->manage_start_stop) { 2520 if ((mesg.event & PM_EVENT_SLEEP) && sdkp->device->manage_start_stop) {
2521 sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); 2521 sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
2522 ret = sd_start_stop_device(sdkp, 0); 2522 ret = sd_start_stop_device(sdkp, 0);
2523 } 2523 }
2524 2524
2525 done: 2525 done:
2526 scsi_disk_put(sdkp); 2526 scsi_disk_put(sdkp);
2527 return ret; 2527 return ret;
2528 } 2528 }
2529 2529
2530 static int sd_resume(struct device *dev) 2530 static int sd_resume(struct device *dev)
2531 { 2531 {
2532 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); 2532 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
2533 int ret = 0; 2533 int ret = 0;
2534 2534
2535 if (!sdkp->device->manage_start_stop) 2535 if (!sdkp->device->manage_start_stop)
2536 goto done; 2536 goto done;
2537 2537
2538 sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); 2538 sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
2539 ret = sd_start_stop_device(sdkp, 1); 2539 ret = sd_start_stop_device(sdkp, 1);
2540 2540
2541 done: 2541 done:
2542 scsi_disk_put(sdkp); 2542 scsi_disk_put(sdkp);
2543 return ret; 2543 return ret;
2544 } 2544 }
2545 2545
2546 /** 2546 /**
2547 * init_sd - entry point for this driver (both when built in or when 2547 * init_sd - entry point for this driver (both when built in or when
2548 * a module). 2548 * a module).
2549 * 2549 *
2550 * Note: this function registers this driver with the scsi mid-level. 2550 * Note: this function registers this driver with the scsi mid-level.
2551 **/ 2551 **/
2552 static int __init init_sd(void) 2552 static int __init init_sd(void)
2553 { 2553 {
2554 int majors = 0, i, err; 2554 int majors = 0, i, err;
2555 2555
2556 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); 2556 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
2557 2557
2558 for (i = 0; i < SD_MAJORS; i++) 2558 for (i = 0; i < SD_MAJORS; i++)
2559 if (register_blkdev(sd_major(i), "sd") == 0) 2559 if (register_blkdev(sd_major(i), "sd") == 0)
2560 majors++; 2560 majors++;
2561 2561
2562 if (!majors) 2562 if (!majors)
2563 return -ENODEV; 2563 return -ENODEV;
2564 2564
2565 err = class_register(&sd_disk_class); 2565 err = class_register(&sd_disk_class);
2566 if (err) 2566 if (err)
2567 goto err_out; 2567 goto err_out;
2568 2568
2569 err = scsi_register_driver(&sd_template.gendrv); 2569 err = scsi_register_driver(&sd_template.gendrv);
2570 if (err) 2570 if (err)
2571 goto err_out_class; 2571 goto err_out_class;
2572 2572
2573 sd_cdb_cache = kmem_cache_create("sd_ext_cdb", SD_EXT_CDB_SIZE, 2573 sd_cdb_cache = kmem_cache_create("sd_ext_cdb", SD_EXT_CDB_SIZE,
2574 0, 0, NULL); 2574 0, 0, NULL);
2575 if (!sd_cdb_cache) { 2575 if (!sd_cdb_cache) {
2576 printk(KERN_ERR "sd: can't init extended cdb cache\n"); 2576 printk(KERN_ERR "sd: can't init extended cdb cache\n");
2577 goto err_out_class; 2577 goto err_out_class;
2578 } 2578 }
2579 2579
2580 sd_cdb_pool = mempool_create_slab_pool(SD_MEMPOOL_SIZE, sd_cdb_cache); 2580 sd_cdb_pool = mempool_create_slab_pool(SD_MEMPOOL_SIZE, sd_cdb_cache);
2581 if (!sd_cdb_pool) { 2581 if (!sd_cdb_pool) {
2582 printk(KERN_ERR "sd: can't init extended cdb pool\n"); 2582 printk(KERN_ERR "sd: can't init extended cdb pool\n");
2583 goto err_out_cache; 2583 goto err_out_cache;
2584 } 2584 }
2585 2585
2586 return 0; 2586 return 0;
2587 2587
2588 err_out_cache: 2588 err_out_cache:
2589 kmem_cache_destroy(sd_cdb_cache); 2589 kmem_cache_destroy(sd_cdb_cache);
2590 2590
2591 err_out_class: 2591 err_out_class:
2592 class_unregister(&sd_disk_class); 2592 class_unregister(&sd_disk_class);
2593 err_out: 2593 err_out:
2594 for (i = 0; i < SD_MAJORS; i++) 2594 for (i = 0; i < SD_MAJORS; i++)
2595 unregister_blkdev(sd_major(i), "sd"); 2595 unregister_blkdev(sd_major(i), "sd");
2596 return err; 2596 return err;
2597 } 2597 }
2598 2598
2599 /** 2599 /**
2600 * exit_sd - exit point for this driver (when it is a module). 2600 * exit_sd - exit point for this driver (when it is a module).
2601 * 2601 *
2602 * Note: this function unregisters this driver from the scsi mid-level. 2602 * Note: this function unregisters this driver from the scsi mid-level.
2603 **/ 2603 **/
2604 static void __exit exit_sd(void) 2604 static void __exit exit_sd(void)
2605 { 2605 {
2606 int i; 2606 int i;
2607 2607
2608 SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); 2608 SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
2609 2609
2610 mempool_destroy(sd_cdb_pool); 2610 mempool_destroy(sd_cdb_pool);
2611 kmem_cache_destroy(sd_cdb_cache); 2611 kmem_cache_destroy(sd_cdb_cache);
2612 2612
2613 scsi_unregister_driver(&sd_template.gendrv); 2613 scsi_unregister_driver(&sd_template.gendrv);
2614 class_unregister(&sd_disk_class); 2614 class_unregister(&sd_disk_class);
2615 2615
2616 for (i = 0; i < SD_MAJORS; i++) 2616 for (i = 0; i < SD_MAJORS; i++)
2617 unregister_blkdev(sd_major(i), "sd"); 2617 unregister_blkdev(sd_major(i), "sd");
2618 } 2618 }
2619 2619
2620 module_init(init_sd); 2620 module_init(init_sd);
2621 module_exit(exit_sd); 2621 module_exit(exit_sd);
2622 2622
2623 static void sd_print_sense_hdr(struct scsi_disk *sdkp, 2623 static void sd_print_sense_hdr(struct scsi_disk *sdkp,
2624 struct scsi_sense_hdr *sshdr) 2624 struct scsi_sense_hdr *sshdr)
2625 { 2625 {
2626 sd_printk(KERN_INFO, sdkp, ""); 2626 sd_printk(KERN_INFO, sdkp, "");
2627 scsi_show_sense_hdr(sshdr); 2627 scsi_show_sense_hdr(sshdr);
2628 sd_printk(KERN_INFO, sdkp, ""); 2628 sd_printk(KERN_INFO, sdkp, "");
2629 scsi_show_extd_sense(sshdr->asc, sshdr->ascq); 2629 scsi_show_extd_sense(sshdr->asc, sshdr->ascq);
2630 } 2630 }
2631 2631
2632 static void sd_print_result(struct scsi_disk *sdkp, int result) 2632 static void sd_print_result(struct scsi_disk *sdkp, int result)
2633 { 2633 {
2634 sd_printk(KERN_INFO, sdkp, ""); 2634 sd_printk(KERN_INFO, sdkp, "");
2635 scsi_show_result(result); 2635 scsi_show_result(result);
2636 } 2636 }
2637 2637
2638 2638
include/linux/blkdev.h
1 #ifndef _LINUX_BLKDEV_H 1 #ifndef _LINUX_BLKDEV_H
2 #define _LINUX_BLKDEV_H 2 #define _LINUX_BLKDEV_H
3 3
4 #ifdef CONFIG_BLOCK 4 #ifdef CONFIG_BLOCK
5 5
6 #include <linux/sched.h> 6 #include <linux/sched.h>
7 #include <linux/major.h> 7 #include <linux/major.h>
8 #include <linux/genhd.h> 8 #include <linux/genhd.h>
9 #include <linux/list.h> 9 #include <linux/list.h>
10 #include <linux/timer.h> 10 #include <linux/timer.h>
11 #include <linux/workqueue.h> 11 #include <linux/workqueue.h>
12 #include <linux/pagemap.h> 12 #include <linux/pagemap.h>
13 #include <linux/backing-dev.h> 13 #include <linux/backing-dev.h>
14 #include <linux/wait.h> 14 #include <linux/wait.h>
15 #include <linux/mempool.h> 15 #include <linux/mempool.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/stringify.h> 18 #include <linux/stringify.h>
19 #include <linux/gfp.h> 19 #include <linux/gfp.h>
20 #include <linux/bsg.h> 20 #include <linux/bsg.h>
21 #include <linux/smp.h> 21 #include <linux/smp.h>
22 22
23 #include <asm/scatterlist.h> 23 #include <asm/scatterlist.h>
24 24
25 struct scsi_ioctl_command; 25 struct scsi_ioctl_command;
26 26
27 struct request_queue; 27 struct request_queue;
28 struct elevator_queue; 28 struct elevator_queue;
29 struct request_pm_state; 29 struct request_pm_state;
30 struct blk_trace; 30 struct blk_trace;
31 struct request; 31 struct request;
32 struct sg_io_hdr; 32 struct sg_io_hdr;
33 33
34 #define BLKDEV_MIN_RQ 4 34 #define BLKDEV_MIN_RQ 4
35 #define BLKDEV_MAX_RQ 128 /* Default maximum */ 35 #define BLKDEV_MAX_RQ 128 /* Default maximum */
36 36
37 struct request; 37 struct request;
38 typedef void (rq_end_io_fn)(struct request *, int); 38 typedef void (rq_end_io_fn)(struct request *, int);
39 39
40 struct request_list { 40 struct request_list {
41 /* 41 /*
42 * count[], starved[], and wait[] are indexed by 42 * count[], starved[], and wait[] are indexed by
43 * BLK_RW_SYNC/BLK_RW_ASYNC 43 * BLK_RW_SYNC/BLK_RW_ASYNC
44 */ 44 */
45 int count[2]; 45 int count[2];
46 int starved[2]; 46 int starved[2];
47 int elvpriv; 47 int elvpriv;
48 mempool_t *rq_pool; 48 mempool_t *rq_pool;
49 wait_queue_head_t wait[2]; 49 wait_queue_head_t wait[2];
50 }; 50 };
51 51
52 /* 52 /*
53 * request command types 53 * request command types
54 */ 54 */
55 enum rq_cmd_type_bits { 55 enum rq_cmd_type_bits {
56 REQ_TYPE_FS = 1, /* fs request */ 56 REQ_TYPE_FS = 1, /* fs request */
57 REQ_TYPE_BLOCK_PC, /* scsi command */ 57 REQ_TYPE_BLOCK_PC, /* scsi command */
58 REQ_TYPE_SENSE, /* sense request */ 58 REQ_TYPE_SENSE, /* sense request */
59 REQ_TYPE_PM_SUSPEND, /* suspend request */ 59 REQ_TYPE_PM_SUSPEND, /* suspend request */
60 REQ_TYPE_PM_RESUME, /* resume request */ 60 REQ_TYPE_PM_RESUME, /* resume request */
61 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 61 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
62 REQ_TYPE_SPECIAL, /* driver defined type */ 62 REQ_TYPE_SPECIAL, /* driver defined type */
63 /* 63 /*
64 * for ATA/ATAPI devices. this really doesn't belong here, ide should 64 * for ATA/ATAPI devices. this really doesn't belong here, ide should
65 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver 65 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
66 * private REQ_LB opcodes to differentiate what type of request this is 66 * private REQ_LB opcodes to differentiate what type of request this is
67 */ 67 */
68 REQ_TYPE_ATA_TASKFILE, 68 REQ_TYPE_ATA_TASKFILE,
69 REQ_TYPE_ATA_PC, 69 REQ_TYPE_ATA_PC,
70 }; 70 };
71 71
72 #define BLK_MAX_CDB 16 72 #define BLK_MAX_CDB 16
73 73
74 /* 74 /*
75 * try to put the fields that are referenced together in the same cacheline. 75 * try to put the fields that are referenced together in the same cacheline.
76 * if you modify this structure, be sure to check block/blk-core.c:rq_init() 76 * if you modify this structure, be sure to check block/blk-core.c:rq_init()
77 * as well! 77 * as well!
78 */ 78 */
79 struct request { 79 struct request {
80 struct list_head queuelist; 80 struct list_head queuelist;
81 struct call_single_data csd; 81 struct call_single_data csd;
82 82
83 struct request_queue *q; 83 struct request_queue *q;
84 84
85 unsigned int cmd_flags; 85 unsigned int cmd_flags;
86 enum rq_cmd_type_bits cmd_type; 86 enum rq_cmd_type_bits cmd_type;
87 unsigned long atomic_flags; 87 unsigned long atomic_flags;
88 88
89 int cpu; 89 int cpu;
90 90
91 /* the following two fields are internal, NEVER access directly */ 91 /* the following two fields are internal, NEVER access directly */
92 unsigned int __data_len; /* total data len */ 92 unsigned int __data_len; /* total data len */
93 sector_t __sector; /* sector cursor */ 93 sector_t __sector; /* sector cursor */
94 94
95 struct bio *bio; 95 struct bio *bio;
96 struct bio *biotail; 96 struct bio *biotail;
97 97
98 struct hlist_node hash; /* merge hash */ 98 struct hlist_node hash; /* merge hash */
99 /* 99 /*
100 * The rb_node is only used inside the io scheduler, requests 100 * The rb_node is only used inside the io scheduler, requests
101 * are pruned when moved to the dispatch queue. So let the 101 * are pruned when moved to the dispatch queue. So let the
102 * completion_data share space with the rb_node. 102 * completion_data share space with the rb_node.
103 */ 103 */
104 union { 104 union {
105 struct rb_node rb_node; /* sort/lookup */ 105 struct rb_node rb_node; /* sort/lookup */
106 void *completion_data; 106 void *completion_data;
107 }; 107 };
108 108
109 /* 109 /*
110 * Three pointers are available for the IO schedulers, if they need 110 * Three pointers are available for the IO schedulers, if they need
111 * more they have to dynamically allocate it. 111 * more they have to dynamically allocate it.
112 */ 112 */
113 void *elevator_private; 113 void *elevator_private;
114 void *elevator_private2; 114 void *elevator_private2;
115 void *elevator_private3; 115 void *elevator_private3;
116 116
117 struct gendisk *rq_disk; 117 struct gendisk *rq_disk;
118 unsigned long start_time; 118 unsigned long start_time;
119 #ifdef CONFIG_BLK_CGROUP 119 #ifdef CONFIG_BLK_CGROUP
120 unsigned long long start_time_ns; 120 unsigned long long start_time_ns;
121 unsigned long long io_start_time_ns; /* when passed to hardware */ 121 unsigned long long io_start_time_ns; /* when passed to hardware */
122 #endif 122 #endif
123 /* Number of scatter-gather DMA addr+len pairs after 123 /* Number of scatter-gather DMA addr+len pairs after
124 * physical address coalescing is performed. 124 * physical address coalescing is performed.
125 */ 125 */
126 unsigned short nr_phys_segments; 126 unsigned short nr_phys_segments;
127 127
128 unsigned short ioprio; 128 unsigned short ioprio;
129 129
130 int ref_count; 130 int ref_count;
131 131
132 void *special; /* opaque pointer available for LLD use */ 132 void *special; /* opaque pointer available for LLD use */
133 char *buffer; /* kaddr of the current segment if available */ 133 char *buffer; /* kaddr of the current segment if available */
134 134
135 int tag; 135 int tag;
136 int errors; 136 int errors;
137 137
138 /* 138 /*
139 * when request is used as a packet command carrier 139 * when request is used as a packet command carrier
140 */ 140 */
141 unsigned char __cmd[BLK_MAX_CDB]; 141 unsigned char __cmd[BLK_MAX_CDB];
142 unsigned char *cmd; 142 unsigned char *cmd;
143 unsigned short cmd_len; 143 unsigned short cmd_len;
144 144
145 unsigned int extra_len; /* length of alignment and padding */ 145 unsigned int extra_len; /* length of alignment and padding */
146 unsigned int sense_len; 146 unsigned int sense_len;
147 unsigned int resid_len; /* residual count */ 147 unsigned int resid_len; /* residual count */
148 void *sense; 148 void *sense;
149 149
150 unsigned long deadline; 150 unsigned long deadline;
151 struct list_head timeout_list; 151 struct list_head timeout_list;
152 unsigned int timeout; 152 unsigned int timeout;
153 int retries; 153 int retries;
154 154
155 /* 155 /*
156 * completion callback. 156 * completion callback.
157 */ 157 */
158 rq_end_io_fn *end_io; 158 rq_end_io_fn *end_io;
159 void *end_io_data; 159 void *end_io_data;
160 160
161 /* for bidi */ 161 /* for bidi */
162 struct request *next_rq; 162 struct request *next_rq;
163 }; 163 };
164 164
165 static inline unsigned short req_get_ioprio(struct request *req) 165 static inline unsigned short req_get_ioprio(struct request *req)
166 { 166 {
167 return req->ioprio; 167 return req->ioprio;
168 } 168 }
169 169
170 /* 170 /*
171 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 171 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
172 * requests. Some step values could eventually be made generic. 172 * requests. Some step values could eventually be made generic.
173 */ 173 */
174 struct request_pm_state 174 struct request_pm_state
175 { 175 {
176 /* PM state machine step value, currently driver specific */ 176 /* PM state machine step value, currently driver specific */
177 int pm_step; 177 int pm_step;
178 /* requested PM state value (S1, S2, S3, S4, ...) */ 178 /* requested PM state value (S1, S2, S3, S4, ...) */
179 u32 pm_state; 179 u32 pm_state;
180 void* data; /* for driver use */ 180 void* data; /* for driver use */
181 }; 181 };
182 182
183 #include <linux/elevator.h> 183 #include <linux/elevator.h>
184 184
185 typedef void (request_fn_proc) (struct request_queue *q); 185 typedef void (request_fn_proc) (struct request_queue *q);
186 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 186 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
187 typedef int (prep_rq_fn) (struct request_queue *, struct request *); 187 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
188 typedef void (unprep_rq_fn) (struct request_queue *, struct request *); 188 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
189 typedef void (unplug_fn) (struct request_queue *); 189 typedef void (unplug_fn) (struct request_queue *);
190 190
191 struct bio_vec; 191 struct bio_vec;
192 struct bvec_merge_data { 192 struct bvec_merge_data {
193 struct block_device *bi_bdev; 193 struct block_device *bi_bdev;
194 sector_t bi_sector; 194 sector_t bi_sector;
195 unsigned bi_size; 195 unsigned bi_size;
196 unsigned long bi_rw; 196 unsigned long bi_rw;
197 }; 197 };
198 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, 198 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
199 struct bio_vec *); 199 struct bio_vec *);
200 typedef void (softirq_done_fn)(struct request *); 200 typedef void (softirq_done_fn)(struct request *);
201 typedef int (dma_drain_needed_fn)(struct request *); 201 typedef int (dma_drain_needed_fn)(struct request *);
202 typedef int (lld_busy_fn) (struct request_queue *q); 202 typedef int (lld_busy_fn) (struct request_queue *q);
203 203
204 enum blk_eh_timer_return { 204 enum blk_eh_timer_return {
205 BLK_EH_NOT_HANDLED, 205 BLK_EH_NOT_HANDLED,
206 BLK_EH_HANDLED, 206 BLK_EH_HANDLED,
207 BLK_EH_RESET_TIMER, 207 BLK_EH_RESET_TIMER,
208 }; 208 };
209 209
210 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); 210 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
211 211
212 enum blk_queue_state { 212 enum blk_queue_state {
213 Queue_down, 213 Queue_down,
214 Queue_up, 214 Queue_up,
215 }; 215 };
216 216
217 struct blk_queue_tag { 217 struct blk_queue_tag {
218 struct request **tag_index; /* map of busy tags */ 218 struct request **tag_index; /* map of busy tags */
219 unsigned long *tag_map; /* bit map of free/busy tags */ 219 unsigned long *tag_map; /* bit map of free/busy tags */
220 int busy; /* current depth */ 220 int busy; /* current depth */
221 int max_depth; /* what we will send to device */ 221 int max_depth; /* what we will send to device */
222 int real_max_depth; /* what the array can hold */ 222 int real_max_depth; /* what the array can hold */
223 atomic_t refcnt; /* map can be shared */ 223 atomic_t refcnt; /* map can be shared */
224 }; 224 };
225 225
226 #define BLK_SCSI_MAX_CMDS (256) 226 #define BLK_SCSI_MAX_CMDS (256)
227 #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 227 #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
228 228
229 struct queue_limits { 229 struct queue_limits {
230 unsigned long bounce_pfn; 230 unsigned long bounce_pfn;
231 unsigned long seg_boundary_mask; 231 unsigned long seg_boundary_mask;
232 232
233 unsigned int max_hw_sectors; 233 unsigned int max_hw_sectors;
234 unsigned int max_sectors; 234 unsigned int max_sectors;
235 unsigned int max_segment_size; 235 unsigned int max_segment_size;
236 unsigned int physical_block_size; 236 unsigned int physical_block_size;
237 unsigned int alignment_offset; 237 unsigned int alignment_offset;
238 unsigned int io_min; 238 unsigned int io_min;
239 unsigned int io_opt; 239 unsigned int io_opt;
240 unsigned int max_discard_sectors; 240 unsigned int max_discard_sectors;
241 unsigned int discard_granularity; 241 unsigned int discard_granularity;
242 unsigned int discard_alignment; 242 unsigned int discard_alignment;
243 243
244 unsigned short logical_block_size; 244 unsigned short logical_block_size;
245 unsigned short max_segments; 245 unsigned short max_segments;
246 246
247 unsigned char misaligned; 247 unsigned char misaligned;
248 unsigned char discard_misaligned; 248 unsigned char discard_misaligned;
249 unsigned char no_cluster; 249 unsigned char no_cluster;
250 signed char discard_zeroes_data; 250 signed char discard_zeroes_data;
251 }; 251 };
252 252
253 struct request_queue 253 struct request_queue
254 { 254 {
255 /* 255 /*
256 * Together with queue_head for cacheline sharing 256 * Together with queue_head for cacheline sharing
257 */ 257 */
258 struct list_head queue_head; 258 struct list_head queue_head;
259 struct request *last_merge; 259 struct request *last_merge;
260 struct elevator_queue *elevator; 260 struct elevator_queue *elevator;
261 261
262 /* 262 /*
263 * the queue request freelist, one for reads and one for writes 263 * the queue request freelist, one for reads and one for writes
264 */ 264 */
265 struct request_list rq; 265 struct request_list rq;
266 266
267 request_fn_proc *request_fn; 267 request_fn_proc *request_fn;
268 make_request_fn *make_request_fn; 268 make_request_fn *make_request_fn;
269 prep_rq_fn *prep_rq_fn; 269 prep_rq_fn *prep_rq_fn;
270 unprep_rq_fn *unprep_rq_fn; 270 unprep_rq_fn *unprep_rq_fn;
271 unplug_fn *unplug_fn; 271 unplug_fn *unplug_fn;
272 merge_bvec_fn *merge_bvec_fn; 272 merge_bvec_fn *merge_bvec_fn;
273 softirq_done_fn *softirq_done_fn; 273 softirq_done_fn *softirq_done_fn;
274 rq_timed_out_fn *rq_timed_out_fn; 274 rq_timed_out_fn *rq_timed_out_fn;
275 dma_drain_needed_fn *dma_drain_needed; 275 dma_drain_needed_fn *dma_drain_needed;
276 lld_busy_fn *lld_busy_fn; 276 lld_busy_fn *lld_busy_fn;
277 277
278 /* 278 /*
279 * Dispatch queue sorting 279 * Dispatch queue sorting
280 */ 280 */
281 sector_t end_sector; 281 sector_t end_sector;
282 struct request *boundary_rq; 282 struct request *boundary_rq;
283 283
284 /* 284 /*
285 * Auto-unplugging state 285 * Auto-unplugging state
286 */ 286 */
287 struct timer_list unplug_timer; 287 struct timer_list unplug_timer;
288 int unplug_thresh; /* After this many requests */ 288 int unplug_thresh; /* After this many requests */
289 unsigned long unplug_delay; /* After this many jiffies */ 289 unsigned long unplug_delay; /* After this many jiffies */
290 struct work_struct unplug_work; 290 struct work_struct unplug_work;
291 291
292 struct backing_dev_info backing_dev_info; 292 struct backing_dev_info backing_dev_info;
293 293
294 /* 294 /*
295 * The queue owner gets to use this for whatever they like. 295 * The queue owner gets to use this for whatever they like.
296 * ll_rw_blk doesn't touch it. 296 * ll_rw_blk doesn't touch it.
297 */ 297 */
298 void *queuedata; 298 void *queuedata;
299 299
300 /* 300 /*
301 * queue needs bounce pages for pages above this limit 301 * queue needs bounce pages for pages above this limit
302 */ 302 */
303 gfp_t bounce_gfp; 303 gfp_t bounce_gfp;
304 304
305 /* 305 /*
306 * various queue flags, see QUEUE_* below 306 * various queue flags, see QUEUE_* below
307 */ 307 */
308 unsigned long queue_flags; 308 unsigned long queue_flags;
309 309
310 /* 310 /*
311 * protects queue structures from reentrancy. ->__queue_lock should 311 * protects queue structures from reentrancy. ->__queue_lock should
312 * _never_ be used directly, it is queue private. always use 312 * _never_ be used directly, it is queue private. always use
313 * ->queue_lock. 313 * ->queue_lock.
314 */ 314 */
315 spinlock_t __queue_lock; 315 spinlock_t __queue_lock;
316 spinlock_t *queue_lock; 316 spinlock_t *queue_lock;
317 317
318 /* 318 /*
319 * queue kobject 319 * queue kobject
320 */ 320 */
321 struct kobject kobj; 321 struct kobject kobj;
322 322
323 /* 323 /*
324 * queue settings 324 * queue settings
325 */ 325 */
326 unsigned long nr_requests; /* Max # of requests */ 326 unsigned long nr_requests; /* Max # of requests */
327 unsigned int nr_congestion_on; 327 unsigned int nr_congestion_on;
328 unsigned int nr_congestion_off; 328 unsigned int nr_congestion_off;
329 unsigned int nr_batching; 329 unsigned int nr_batching;
330 330
331 void *dma_drain_buffer; 331 void *dma_drain_buffer;
332 unsigned int dma_drain_size; 332 unsigned int dma_drain_size;
333 unsigned int dma_pad_mask; 333 unsigned int dma_pad_mask;
334 unsigned int dma_alignment; 334 unsigned int dma_alignment;
335 335
336 struct blk_queue_tag *queue_tags; 336 struct blk_queue_tag *queue_tags;
337 struct list_head tag_busy_list; 337 struct list_head tag_busy_list;
338 338
339 unsigned int nr_sorted; 339 unsigned int nr_sorted;
340 unsigned int in_flight[2]; 340 unsigned int in_flight[2];
341 341
342 unsigned int rq_timeout; 342 unsigned int rq_timeout;
343 struct timer_list timeout; 343 struct timer_list timeout;
344 struct list_head timeout_list; 344 struct list_head timeout_list;
345 345
346 struct queue_limits limits; 346 struct queue_limits limits;
347 347
348 /* 348 /*
349 * sg stuff 349 * sg stuff
350 */ 350 */
351 unsigned int sg_timeout; 351 unsigned int sg_timeout;
352 unsigned int sg_reserved_size; 352 unsigned int sg_reserved_size;
353 int node; 353 int node;
354 #ifdef CONFIG_BLK_DEV_IO_TRACE 354 #ifdef CONFIG_BLK_DEV_IO_TRACE
355 struct blk_trace *blk_trace; 355 struct blk_trace *blk_trace;
356 #endif 356 #endif
357 /* 357 /*
358 * reserved for flush operations 358 * for flush operations
359 */ 359 */
360 unsigned int flush_flags;
361
360 unsigned int ordered, next_ordered, ordseq; 362 unsigned int ordered, next_ordered, ordseq;
361 int orderr, ordcolor; 363 int orderr, ordcolor;
362 struct request pre_flush_rq, bar_rq, post_flush_rq; 364 struct request pre_flush_rq, bar_rq, post_flush_rq;
363 struct request *orig_bar_rq; 365 struct request *orig_bar_rq;
364 366
365 struct mutex sysfs_lock; 367 struct mutex sysfs_lock;
366 368
367 #if defined(CONFIG_BLK_DEV_BSG) 369 #if defined(CONFIG_BLK_DEV_BSG)
368 struct bsg_class_device bsg_dev; 370 struct bsg_class_device bsg_dev;
369 #endif 371 #endif
370 }; 372 };
371 373
372 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 374 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
373 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 375 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
374 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 376 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
375 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ 377 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
376 #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ 378 #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
377 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 379 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */
378 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 380 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
379 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 381 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
380 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 382 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
381 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 383 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
382 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ 384 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
383 #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ 385 #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
384 #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ 386 #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */
385 #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ 387 #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */
386 #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ 388 #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
387 #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 389 #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
388 #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ 390 #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
389 #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ 391 #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */
390 #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ 392 #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */
391 #define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ 393 #define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */
392 #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ 394 #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */
393 395
394 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 396 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
395 (1 << QUEUE_FLAG_CLUSTER) | \ 397 (1 << QUEUE_FLAG_CLUSTER) | \
396 (1 << QUEUE_FLAG_STACKABLE) | \ 398 (1 << QUEUE_FLAG_STACKABLE) | \
397 (1 << QUEUE_FLAG_SAME_COMP) | \ 399 (1 << QUEUE_FLAG_SAME_COMP) | \
398 (1 << QUEUE_FLAG_ADD_RANDOM)) 400 (1 << QUEUE_FLAG_ADD_RANDOM))
399 401
400 static inline int queue_is_locked(struct request_queue *q) 402 static inline int queue_is_locked(struct request_queue *q)
401 { 403 {
402 #ifdef CONFIG_SMP 404 #ifdef CONFIG_SMP
403 spinlock_t *lock = q->queue_lock; 405 spinlock_t *lock = q->queue_lock;
404 return lock && spin_is_locked(lock); 406 return lock && spin_is_locked(lock);
405 #else 407 #else
406 return 1; 408 return 1;
407 #endif 409 #endif
408 } 410 }
409 411
410 static inline void queue_flag_set_unlocked(unsigned int flag, 412 static inline void queue_flag_set_unlocked(unsigned int flag,
411 struct request_queue *q) 413 struct request_queue *q)
412 { 414 {
413 __set_bit(flag, &q->queue_flags); 415 __set_bit(flag, &q->queue_flags);
414 } 416 }
415 417
416 static inline int queue_flag_test_and_clear(unsigned int flag, 418 static inline int queue_flag_test_and_clear(unsigned int flag,
417 struct request_queue *q) 419 struct request_queue *q)
418 { 420 {
419 WARN_ON_ONCE(!queue_is_locked(q)); 421 WARN_ON_ONCE(!queue_is_locked(q));
420 422
421 if (test_bit(flag, &q->queue_flags)) { 423 if (test_bit(flag, &q->queue_flags)) {
422 __clear_bit(flag, &q->queue_flags); 424 __clear_bit(flag, &q->queue_flags);
423 return 1; 425 return 1;
424 } 426 }
425 427
426 return 0; 428 return 0;
427 } 429 }
428 430
429 static inline int queue_flag_test_and_set(unsigned int flag, 431 static inline int queue_flag_test_and_set(unsigned int flag,
430 struct request_queue *q) 432 struct request_queue *q)
431 { 433 {
432 WARN_ON_ONCE(!queue_is_locked(q)); 434 WARN_ON_ONCE(!queue_is_locked(q));
433 435
434 if (!test_bit(flag, &q->queue_flags)) { 436 if (!test_bit(flag, &q->queue_flags)) {
435 __set_bit(flag, &q->queue_flags); 437 __set_bit(flag, &q->queue_flags);
436 return 0; 438 return 0;
437 } 439 }
438 440
439 return 1; 441 return 1;
440 } 442 }
441 443
442 static inline void queue_flag_set(unsigned int flag, struct request_queue *q) 444 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
443 { 445 {
444 WARN_ON_ONCE(!queue_is_locked(q)); 446 WARN_ON_ONCE(!queue_is_locked(q));
445 __set_bit(flag, &q->queue_flags); 447 __set_bit(flag, &q->queue_flags);
446 } 448 }
447 449
448 static inline void queue_flag_clear_unlocked(unsigned int flag, 450 static inline void queue_flag_clear_unlocked(unsigned int flag,
449 struct request_queue *q) 451 struct request_queue *q)
450 { 452 {
451 __clear_bit(flag, &q->queue_flags); 453 __clear_bit(flag, &q->queue_flags);
452 } 454 }
453 455
454 static inline int queue_in_flight(struct request_queue *q) 456 static inline int queue_in_flight(struct request_queue *q)
455 { 457 {
456 return q->in_flight[0] + q->in_flight[1]; 458 return q->in_flight[0] + q->in_flight[1];
457 } 459 }
458 460
459 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) 461 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
460 { 462 {
461 WARN_ON_ONCE(!queue_is_locked(q)); 463 WARN_ON_ONCE(!queue_is_locked(q));
462 __clear_bit(flag, &q->queue_flags); 464 __clear_bit(flag, &q->queue_flags);
463 } 465 }
464 466
465 enum { 467 enum {
466 /* 468 /*
467 * Hardbarrier is supported with one of the following methods. 469 * Hardbarrier is supported with one of the following methods.
468 * 470 *
469 * NONE : hardbarrier unsupported 471 * NONE : hardbarrier unsupported
470 * DRAIN : ordering by draining is enough 472 * DRAIN : ordering by draining is enough
471 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes 473 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
472 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write 474 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
473 */ 475 */
474 QUEUE_ORDERED_DO_PREFLUSH = 0x10, 476 QUEUE_ORDERED_DO_PREFLUSH = 0x10,
475 QUEUE_ORDERED_DO_BAR = 0x20, 477 QUEUE_ORDERED_DO_BAR = 0x20,
476 QUEUE_ORDERED_DO_POSTFLUSH = 0x40, 478 QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
477 QUEUE_ORDERED_DO_FUA = 0x80, 479 QUEUE_ORDERED_DO_FUA = 0x80,
478 480
479 QUEUE_ORDERED_NONE = 0x00, 481 QUEUE_ORDERED_NONE = 0x00,
480 482
481 QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, 483 QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR,
482 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | 484 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
483 QUEUE_ORDERED_DO_PREFLUSH | 485 QUEUE_ORDERED_DO_PREFLUSH |
484 QUEUE_ORDERED_DO_POSTFLUSH, 486 QUEUE_ORDERED_DO_POSTFLUSH,
485 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | 487 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
486 QUEUE_ORDERED_DO_PREFLUSH | 488 QUEUE_ORDERED_DO_PREFLUSH |
487 QUEUE_ORDERED_DO_FUA, 489 QUEUE_ORDERED_DO_FUA,
488 490
489 /* 491 /*
490 * Ordered operation sequence 492 * Ordered operation sequence
491 */ 493 */
492 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ 494 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
493 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ 495 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
494 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ 496 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
495 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ 497 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
496 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ 498 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
497 QUEUE_ORDSEQ_DONE = 0x20, 499 QUEUE_ORDSEQ_DONE = 0x20,
498 }; 500 };
499 501
500 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 502 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
501 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 503 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
502 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 504 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
503 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 505 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
504 #define blk_queue_noxmerges(q) \ 506 #define blk_queue_noxmerges(q) \
505 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) 507 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
506 #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 508 #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
507 #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) 509 #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
508 #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) 510 #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
509 #define blk_queue_stackable(q) \ 511 #define blk_queue_stackable(q) \
510 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) 512 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
511 #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) 513 #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
512 #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ 514 #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \
513 test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) 515 test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
514 516
515 #define blk_noretry_request(rq) \ 517 #define blk_noretry_request(rq) \
516 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ 518 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
517 REQ_FAILFAST_DRIVER)) 519 REQ_FAILFAST_DRIVER))
518 520
519 #define blk_account_rq(rq) \ 521 #define blk_account_rq(rq) \
520 (((rq)->cmd_flags & REQ_STARTED) && \ 522 (((rq)->cmd_flags & REQ_STARTED) && \
521 ((rq)->cmd_type == REQ_TYPE_FS || \ 523 ((rq)->cmd_type == REQ_TYPE_FS || \
522 ((rq)->cmd_flags & REQ_DISCARD))) 524 ((rq)->cmd_flags & REQ_DISCARD)))
523 525
524 #define blk_pm_request(rq) \ 526 #define blk_pm_request(rq) \
525 ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ 527 ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
526 (rq)->cmd_type == REQ_TYPE_PM_RESUME) 528 (rq)->cmd_type == REQ_TYPE_PM_RESUME)
527 529
528 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 530 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
529 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 531 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
530 /* rq->queuelist of dequeued request must be list_empty() */ 532 /* rq->queuelist of dequeued request must be list_empty() */
531 #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) 533 #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
532 534
533 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 535 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
534 536
535 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 537 #define rq_data_dir(rq) ((rq)->cmd_flags & 1)
536 538
537 /* 539 /*
538 * We regard a request as sync, if either a read or a sync write 540 * We regard a request as sync, if either a read or a sync write
539 */ 541 */
540 static inline bool rw_is_sync(unsigned int rw_flags) 542 static inline bool rw_is_sync(unsigned int rw_flags)
541 { 543 {
542 return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); 544 return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC);
543 } 545 }
544 546
545 static inline bool rq_is_sync(struct request *rq) 547 static inline bool rq_is_sync(struct request *rq)
546 { 548 {
547 return rw_is_sync(rq->cmd_flags); 549 return rw_is_sync(rq->cmd_flags);
548 } 550 }
549 551
550 static inline int blk_queue_full(struct request_queue *q, int sync) 552 static inline int blk_queue_full(struct request_queue *q, int sync)
551 { 553 {
552 if (sync) 554 if (sync)
553 return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); 555 return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
554 return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); 556 return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
555 } 557 }
556 558
557 static inline void blk_set_queue_full(struct request_queue *q, int sync) 559 static inline void blk_set_queue_full(struct request_queue *q, int sync)
558 { 560 {
559 if (sync) 561 if (sync)
560 queue_flag_set(QUEUE_FLAG_SYNCFULL, q); 562 queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
561 else 563 else
562 queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); 564 queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
563 } 565 }
564 566
565 static inline void blk_clear_queue_full(struct request_queue *q, int sync) 567 static inline void blk_clear_queue_full(struct request_queue *q, int sync)
566 { 568 {
567 if (sync) 569 if (sync)
568 queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); 570 queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
569 else 571 else
570 queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); 572 queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
571 } 573 }
572 574
573 575
574 /* 576 /*
575 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may 577 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
576 * it already be started by driver. 578 * it already be started by driver.
577 */ 579 */
578 #define RQ_NOMERGE_FLAGS \ 580 #define RQ_NOMERGE_FLAGS \
579 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 581 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
580 #define rq_mergeable(rq) \ 582 #define rq_mergeable(rq) \
581 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 583 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
582 (((rq)->cmd_flags & REQ_DISCARD) || \ 584 (((rq)->cmd_flags & REQ_DISCARD) || \
583 (rq)->cmd_type == REQ_TYPE_FS)) 585 (rq)->cmd_type == REQ_TYPE_FS))
584 586
585 /* 587 /*
586 * q->prep_rq_fn return values 588 * q->prep_rq_fn return values
587 */ 589 */
588 #define BLKPREP_OK 0 /* serve it */ 590 #define BLKPREP_OK 0 /* serve it */
589 #define BLKPREP_KILL 1 /* fatal error, kill */ 591 #define BLKPREP_KILL 1 /* fatal error, kill */
590 #define BLKPREP_DEFER 2 /* leave on queue */ 592 #define BLKPREP_DEFER 2 /* leave on queue */
591 593
592 extern unsigned long blk_max_low_pfn, blk_max_pfn; 594 extern unsigned long blk_max_low_pfn, blk_max_pfn;
593 595
594 /* 596 /*
595 * standard bounce addresses: 597 * standard bounce addresses:
596 * 598 *
597 * BLK_BOUNCE_HIGH : bounce all highmem pages 599 * BLK_BOUNCE_HIGH : bounce all highmem pages
598 * BLK_BOUNCE_ANY : don't bounce anything 600 * BLK_BOUNCE_ANY : don't bounce anything
599 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary 601 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
600 */ 602 */
601 603
602 #if BITS_PER_LONG == 32 604 #if BITS_PER_LONG == 32
603 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) 605 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT)
604 #else 606 #else
605 #define BLK_BOUNCE_HIGH -1ULL 607 #define BLK_BOUNCE_HIGH -1ULL
606 #endif 608 #endif
607 #define BLK_BOUNCE_ANY (-1ULL) 609 #define BLK_BOUNCE_ANY (-1ULL)
608 #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) 610 #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24))
609 611
610 /* 612 /*
611 * default timeout for SG_IO if none specified 613 * default timeout for SG_IO if none specified
612 */ 614 */
613 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 615 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
614 #define BLK_MIN_SG_TIMEOUT (7 * HZ) 616 #define BLK_MIN_SG_TIMEOUT (7 * HZ)
615 617
616 #ifdef CONFIG_BOUNCE 618 #ifdef CONFIG_BOUNCE
617 extern int init_emergency_isa_pool(void); 619 extern int init_emergency_isa_pool(void);
618 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); 620 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
619 #else 621 #else
620 static inline int init_emergency_isa_pool(void) 622 static inline int init_emergency_isa_pool(void)
621 { 623 {
622 return 0; 624 return 0;
623 } 625 }
624 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) 626 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
625 { 627 {
626 } 628 }
627 #endif /* CONFIG_MMU */ 629 #endif /* CONFIG_MMU */
628 630
629 struct rq_map_data { 631 struct rq_map_data {
630 struct page **pages; 632 struct page **pages;
631 int page_order; 633 int page_order;
632 int nr_entries; 634 int nr_entries;
633 unsigned long offset; 635 unsigned long offset;
634 int null_mapped; 636 int null_mapped;
635 int from_user; 637 int from_user;
636 }; 638 };
637 639
638 struct req_iterator { 640 struct req_iterator {
639 int i; 641 int i;
640 struct bio *bio; 642 struct bio *bio;
641 }; 643 };
642 644
643 /* This should not be used directly - use rq_for_each_segment */ 645 /* This should not be used directly - use rq_for_each_segment */
644 #define for_each_bio(_bio) \ 646 #define for_each_bio(_bio) \
645 for (; _bio; _bio = _bio->bi_next) 647 for (; _bio; _bio = _bio->bi_next)
646 #define __rq_for_each_bio(_bio, rq) \ 648 #define __rq_for_each_bio(_bio, rq) \
647 if ((rq->bio)) \ 649 if ((rq->bio)) \
648 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) 650 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
649 651
650 #define rq_for_each_segment(bvl, _rq, _iter) \ 652 #define rq_for_each_segment(bvl, _rq, _iter) \
651 __rq_for_each_bio(_iter.bio, _rq) \ 653 __rq_for_each_bio(_iter.bio, _rq) \
652 bio_for_each_segment(bvl, _iter.bio, _iter.i) 654 bio_for_each_segment(bvl, _iter.bio, _iter.i)
653 655
654 #define rq_iter_last(rq, _iter) \ 656 #define rq_iter_last(rq, _iter) \
655 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) 657 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
656 658
657 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 659 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
658 # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" 660 # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
659 #endif 661 #endif
660 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 662 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
661 extern void rq_flush_dcache_pages(struct request *rq); 663 extern void rq_flush_dcache_pages(struct request *rq);
662 #else 664 #else
663 static inline void rq_flush_dcache_pages(struct request *rq) 665 static inline void rq_flush_dcache_pages(struct request *rq)
664 { 666 {
665 } 667 }
666 #endif 668 #endif
667 669
668 extern int blk_register_queue(struct gendisk *disk); 670 extern int blk_register_queue(struct gendisk *disk);
669 extern void blk_unregister_queue(struct gendisk *disk); 671 extern void blk_unregister_queue(struct gendisk *disk);
670 extern void register_disk(struct gendisk *dev); 672 extern void register_disk(struct gendisk *dev);
671 extern void generic_make_request(struct bio *bio); 673 extern void generic_make_request(struct bio *bio);
672 extern void blk_rq_init(struct request_queue *q, struct request *rq); 674 extern void blk_rq_init(struct request_queue *q, struct request *rq);
673 extern void blk_put_request(struct request *); 675 extern void blk_put_request(struct request *);
674 extern void __blk_put_request(struct request_queue *, struct request *); 676 extern void __blk_put_request(struct request_queue *, struct request *);
675 extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 677 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
676 extern struct request *blk_make_request(struct request_queue *, struct bio *, 678 extern struct request *blk_make_request(struct request_queue *, struct bio *,
677 gfp_t); 679 gfp_t);
678 extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 680 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
679 extern void blk_requeue_request(struct request_queue *, struct request *); 681 extern void blk_requeue_request(struct request_queue *, struct request *);
680 extern void blk_add_request_payload(struct request *rq, struct page *page, 682 extern void blk_add_request_payload(struct request *rq, struct page *page,
681 unsigned int len); 683 unsigned int len);
682 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); 684 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
683 extern int blk_lld_busy(struct request_queue *q); 685 extern int blk_lld_busy(struct request_queue *q);
684 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 686 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
685 struct bio_set *bs, gfp_t gfp_mask, 687 struct bio_set *bs, gfp_t gfp_mask,
686 int (*bio_ctr)(struct bio *, struct bio *, void *), 688 int (*bio_ctr)(struct bio *, struct bio *, void *),
687 void *data); 689 void *data);
688 extern void blk_rq_unprep_clone(struct request *rq); 690 extern void blk_rq_unprep_clone(struct request *rq);
689 extern int blk_insert_cloned_request(struct request_queue *q, 691 extern int blk_insert_cloned_request(struct request_queue *q,
690 struct request *rq); 692 struct request *rq);
691 extern void blk_plug_device(struct request_queue *); 693 extern void blk_plug_device(struct request_queue *);
692 extern void blk_plug_device_unlocked(struct request_queue *); 694 extern void blk_plug_device_unlocked(struct request_queue *);
693 extern int blk_remove_plug(struct request_queue *); 695 extern int blk_remove_plug(struct request_queue *);
694 extern void blk_recount_segments(struct request_queue *, struct bio *); 696 extern void blk_recount_segments(struct request_queue *, struct bio *);
695 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, 697 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
696 unsigned int, void __user *); 698 unsigned int, void __user *);
697 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, 699 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
698 struct scsi_ioctl_command __user *); 700 struct scsi_ioctl_command __user *);
699 701
700 /* 702 /*
701 * A queue has just exitted congestion. Note this in the global counter of 703 * A queue has just exitted congestion. Note this in the global counter of
702 * congested queues, and wake up anyone who was waiting for requests to be 704 * congested queues, and wake up anyone who was waiting for requests to be
703 * put back. 705 * put back.
704 */ 706 */
705 static inline void blk_clear_queue_congested(struct request_queue *q, int sync) 707 static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
706 { 708 {
707 clear_bdi_congested(&q->backing_dev_info, sync); 709 clear_bdi_congested(&q->backing_dev_info, sync);
708 } 710 }
709 711
710 /* 712 /*
711 * A queue has just entered congestion. Flag that in the queue's VM-visible 713 * A queue has just entered congestion. Flag that in the queue's VM-visible
712 * state flags and increment the global gounter of congested queues. 714 * state flags and increment the global gounter of congested queues.
713 */ 715 */
714 static inline void blk_set_queue_congested(struct request_queue *q, int sync) 716 static inline void blk_set_queue_congested(struct request_queue *q, int sync)
715 { 717 {
716 set_bdi_congested(&q->backing_dev_info, sync); 718 set_bdi_congested(&q->backing_dev_info, sync);
717 } 719 }
718 720
719 extern void blk_start_queue(struct request_queue *q); 721 extern void blk_start_queue(struct request_queue *q);
720 extern void blk_stop_queue(struct request_queue *q); 722 extern void blk_stop_queue(struct request_queue *q);
721 extern void blk_sync_queue(struct request_queue *q); 723 extern void blk_sync_queue(struct request_queue *q);
722 extern void __blk_stop_queue(struct request_queue *q); 724 extern void __blk_stop_queue(struct request_queue *q);
723 extern void __blk_run_queue(struct request_queue *); 725 extern void __blk_run_queue(struct request_queue *);
724 extern void blk_run_queue(struct request_queue *); 726 extern void blk_run_queue(struct request_queue *);
725 extern int blk_rq_map_user(struct request_queue *, struct request *, 727 extern int blk_rq_map_user(struct request_queue *, struct request *,
726 struct rq_map_data *, void __user *, unsigned long, 728 struct rq_map_data *, void __user *, unsigned long,
727 gfp_t); 729 gfp_t);
728 extern int blk_rq_unmap_user(struct bio *); 730 extern int blk_rq_unmap_user(struct bio *);
729 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 731 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
730 extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 732 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
731 struct rq_map_data *, struct sg_iovec *, int, 733 struct rq_map_data *, struct sg_iovec *, int,
732 unsigned int, gfp_t); 734 unsigned int, gfp_t);
733 extern int blk_execute_rq(struct request_queue *, struct gendisk *, 735 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
734 struct request *, int); 736 struct request *, int);
735 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 737 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
736 struct request *, int, rq_end_io_fn *); 738 struct request *, int, rq_end_io_fn *);
737 extern void blk_unplug(struct request_queue *q); 739 extern void blk_unplug(struct request_queue *q);
738 740
739 static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 741 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
740 { 742 {
741 return bdev->bd_disk->queue; 743 return bdev->bd_disk->queue;
742 } 744 }
743 745
744 /* 746 /*
745 * blk_rq_pos() : the current sector 747 * blk_rq_pos() : the current sector
746 * blk_rq_bytes() : bytes left in the entire request 748 * blk_rq_bytes() : bytes left in the entire request
747 * blk_rq_cur_bytes() : bytes left in the current segment 749 * blk_rq_cur_bytes() : bytes left in the current segment
748 * blk_rq_err_bytes() : bytes left till the next error boundary 750 * blk_rq_err_bytes() : bytes left till the next error boundary
749 * blk_rq_sectors() : sectors left in the entire request 751 * blk_rq_sectors() : sectors left in the entire request
750 * blk_rq_cur_sectors() : sectors left in the current segment 752 * blk_rq_cur_sectors() : sectors left in the current segment
751 */ 753 */
752 static inline sector_t blk_rq_pos(const struct request *rq) 754 static inline sector_t blk_rq_pos(const struct request *rq)
753 { 755 {
754 return rq->__sector; 756 return rq->__sector;
755 } 757 }
756 758
757 static inline unsigned int blk_rq_bytes(const struct request *rq) 759 static inline unsigned int blk_rq_bytes(const struct request *rq)
758 { 760 {
759 return rq->__data_len; 761 return rq->__data_len;
760 } 762 }
761 763
762 static inline int blk_rq_cur_bytes(const struct request *rq) 764 static inline int blk_rq_cur_bytes(const struct request *rq)
763 { 765 {
764 return rq->bio ? bio_cur_bytes(rq->bio) : 0; 766 return rq->bio ? bio_cur_bytes(rq->bio) : 0;
765 } 767 }
766 768
767 extern unsigned int blk_rq_err_bytes(const struct request *rq); 769 extern unsigned int blk_rq_err_bytes(const struct request *rq);
768 770
769 static inline unsigned int blk_rq_sectors(const struct request *rq) 771 static inline unsigned int blk_rq_sectors(const struct request *rq)
770 { 772 {
771 return blk_rq_bytes(rq) >> 9; 773 return blk_rq_bytes(rq) >> 9;
772 } 774 }
773 775
774 static inline unsigned int blk_rq_cur_sectors(const struct request *rq) 776 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
775 { 777 {
776 return blk_rq_cur_bytes(rq) >> 9; 778 return blk_rq_cur_bytes(rq) >> 9;
777 } 779 }
778 780
779 /* 781 /*
780 * Request issue related functions. 782 * Request issue related functions.
781 */ 783 */
782 extern struct request *blk_peek_request(struct request_queue *q); 784 extern struct request *blk_peek_request(struct request_queue *q);
783 extern void blk_start_request(struct request *rq); 785 extern void blk_start_request(struct request *rq);
784 extern struct request *blk_fetch_request(struct request_queue *q); 786 extern struct request *blk_fetch_request(struct request_queue *q);
785 787
786 /* 788 /*
787 * Request completion related functions. 789 * Request completion related functions.
788 * 790 *
789 * blk_update_request() completes given number of bytes and updates 791 * blk_update_request() completes given number of bytes and updates
790 * the request without completing it. 792 * the request without completing it.
791 * 793 *
792 * blk_end_request() and friends. __blk_end_request() must be called 794 * blk_end_request() and friends. __blk_end_request() must be called
793 * with the request queue spinlock acquired. 795 * with the request queue spinlock acquired.
794 * 796 *
795 * Several drivers define their own end_request and call 797 * Several drivers define their own end_request and call
796 * blk_end_request() for parts of the original function. 798 * blk_end_request() for parts of the original function.
797 * This prevents code duplication in drivers. 799 * This prevents code duplication in drivers.
798 */ 800 */
799 extern bool blk_update_request(struct request *rq, int error, 801 extern bool blk_update_request(struct request *rq, int error,
800 unsigned int nr_bytes); 802 unsigned int nr_bytes);
801 extern bool blk_end_request(struct request *rq, int error, 803 extern bool blk_end_request(struct request *rq, int error,
802 unsigned int nr_bytes); 804 unsigned int nr_bytes);
803 extern void blk_end_request_all(struct request *rq, int error); 805 extern void blk_end_request_all(struct request *rq, int error);
804 extern bool blk_end_request_cur(struct request *rq, int error); 806 extern bool blk_end_request_cur(struct request *rq, int error);
805 extern bool blk_end_request_err(struct request *rq, int error); 807 extern bool blk_end_request_err(struct request *rq, int error);
806 extern bool __blk_end_request(struct request *rq, int error, 808 extern bool __blk_end_request(struct request *rq, int error,
807 unsigned int nr_bytes); 809 unsigned int nr_bytes);
808 extern void __blk_end_request_all(struct request *rq, int error); 810 extern void __blk_end_request_all(struct request *rq, int error);
809 extern bool __blk_end_request_cur(struct request *rq, int error); 811 extern bool __blk_end_request_cur(struct request *rq, int error);
810 extern bool __blk_end_request_err(struct request *rq, int error); 812 extern bool __blk_end_request_err(struct request *rq, int error);
811 813
812 extern void blk_complete_request(struct request *); 814 extern void blk_complete_request(struct request *);
813 extern void __blk_complete_request(struct request *); 815 extern void __blk_complete_request(struct request *);
814 extern void blk_abort_request(struct request *); 816 extern void blk_abort_request(struct request *);
815 extern void blk_abort_queue(struct request_queue *); 817 extern void blk_abort_queue(struct request_queue *);
816 extern void blk_unprep_request(struct request *); 818 extern void blk_unprep_request(struct request *);
817 819
818 /* 820 /*
819 * Access functions for manipulating queue properties 821 * Access functions for manipulating queue properties
820 */ 822 */
821 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, 823 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
822 spinlock_t *lock, int node_id); 824 spinlock_t *lock, int node_id);
823 extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, 825 extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
824 request_fn_proc *, 826 request_fn_proc *,
825 spinlock_t *, int node_id); 827 spinlock_t *, int node_id);
826 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); 828 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
827 extern struct request_queue *blk_init_allocated_queue(struct request_queue *, 829 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
828 request_fn_proc *, spinlock_t *); 830 request_fn_proc *, spinlock_t *);
829 extern void blk_cleanup_queue(struct request_queue *); 831 extern void blk_cleanup_queue(struct request_queue *);
830 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 832 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
831 extern void blk_queue_bounce_limit(struct request_queue *, u64); 833 extern void blk_queue_bounce_limit(struct request_queue *, u64);
832 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 834 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
833 extern void blk_queue_max_segments(struct request_queue *, unsigned short); 835 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
834 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 836 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
835 extern void blk_queue_max_discard_sectors(struct request_queue *q, 837 extern void blk_queue_max_discard_sectors(struct request_queue *q,
836 unsigned int max_discard_sectors); 838 unsigned int max_discard_sectors);
837 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); 839 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
838 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); 840 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
839 extern void blk_queue_alignment_offset(struct request_queue *q, 841 extern void blk_queue_alignment_offset(struct request_queue *q,
840 unsigned int alignment); 842 unsigned int alignment);
841 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); 843 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
842 extern void blk_queue_io_min(struct request_queue *q, unsigned int min); 844 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
843 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); 845 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
844 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); 846 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
845 extern void blk_set_default_limits(struct queue_limits *lim); 847 extern void blk_set_default_limits(struct queue_limits *lim);
846 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 848 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
847 sector_t offset); 849 sector_t offset);
848 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, 850 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
849 sector_t offset); 851 sector_t offset);
850 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, 852 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
851 sector_t offset); 853 sector_t offset);
852 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); 854 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
853 extern void blk_queue_dma_pad(struct request_queue *, unsigned int); 855 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
854 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); 856 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
855 extern int blk_queue_dma_drain(struct request_queue *q, 857 extern int blk_queue_dma_drain(struct request_queue *q,
856 dma_drain_needed_fn *dma_drain_needed, 858 dma_drain_needed_fn *dma_drain_needed,
857 void *buf, unsigned int size); 859 void *buf, unsigned int size);
858 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); 860 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
859 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 861 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
860 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); 862 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
861 extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); 863 extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
862 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); 864 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
863 extern void blk_queue_dma_alignment(struct request_queue *, int); 865 extern void blk_queue_dma_alignment(struct request_queue *, int);
864 extern void blk_queue_update_dma_alignment(struct request_queue *, int); 866 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
865 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 867 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
866 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 868 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
867 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 869 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
870 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
868 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 871 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
869 extern int blk_queue_ordered(struct request_queue *, unsigned);
870 extern bool blk_do_ordered(struct request_queue *, struct request **); 872 extern bool blk_do_ordered(struct request_queue *, struct request **);
871 extern unsigned blk_ordered_cur_seq(struct request_queue *); 873 extern unsigned blk_ordered_cur_seq(struct request_queue *);
872 extern unsigned blk_ordered_req_seq(struct request *); 874 extern unsigned blk_ordered_req_seq(struct request *);
873 extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); 875 extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
874 876
875 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 877 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
876 extern void blk_dump_rq_flags(struct request *, char *); 878 extern void blk_dump_rq_flags(struct request *, char *);
877 extern void generic_unplug_device(struct request_queue *); 879 extern void generic_unplug_device(struct request_queue *);
878 extern long nr_blockdev_pages(void); 880 extern long nr_blockdev_pages(void);
879 881
880 int blk_get_queue(struct request_queue *); 882 int blk_get_queue(struct request_queue *);
881 struct request_queue *blk_alloc_queue(gfp_t); 883 struct request_queue *blk_alloc_queue(gfp_t);
882 struct request_queue *blk_alloc_queue_node(gfp_t, int); 884 struct request_queue *blk_alloc_queue_node(gfp_t, int);
883 extern void blk_put_queue(struct request_queue *); 885 extern void blk_put_queue(struct request_queue *);
884 886
885 /* 887 /*
886 * tag stuff 888 * tag stuff
887 */ 889 */
888 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) 890 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
889 extern int blk_queue_start_tag(struct request_queue *, struct request *); 891 extern int blk_queue_start_tag(struct request_queue *, struct request *);
890 extern struct request *blk_queue_find_tag(struct request_queue *, int); 892 extern struct request *blk_queue_find_tag(struct request_queue *, int);
891 extern void blk_queue_end_tag(struct request_queue *, struct request *); 893 extern void blk_queue_end_tag(struct request_queue *, struct request *);
892 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); 894 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
893 extern void blk_queue_free_tags(struct request_queue *); 895 extern void blk_queue_free_tags(struct request_queue *);
894 extern int blk_queue_resize_tags(struct request_queue *, int); 896 extern int blk_queue_resize_tags(struct request_queue *, int);
895 extern void blk_queue_invalidate_tags(struct request_queue *); 897 extern void blk_queue_invalidate_tags(struct request_queue *);
896 extern struct blk_queue_tag *blk_init_tags(int); 898 extern struct blk_queue_tag *blk_init_tags(int);
897 extern void blk_free_tags(struct blk_queue_tag *); 899 extern void blk_free_tags(struct blk_queue_tag *);
898 900
899 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, 901 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
900 int tag) 902 int tag)
901 { 903 {
902 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 904 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
903 return NULL; 905 return NULL;
904 return bqt->tag_index[tag]; 906 return bqt->tag_index[tag];
905 } 907 }
906 enum{ 908 enum{
907 BLKDEV_WAIT, /* wait for completion */ 909 BLKDEV_WAIT, /* wait for completion */
908 BLKDEV_BARRIER, /* issue request with barrier */ 910 BLKDEV_BARRIER, /* issue request with barrier */
909 BLKDEV_SECURE, /* secure discard */ 911 BLKDEV_SECURE, /* secure discard */
910 }; 912 };
911 #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) 913 #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT)
912 #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) 914 #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER)
913 #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) 915 #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE)
914 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, 916 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
915 unsigned long); 917 unsigned long);
916 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 918 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
917 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); 919 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
918 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 920 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
919 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); 921 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
920 static inline int sb_issue_discard(struct super_block *sb, 922 static inline int sb_issue_discard(struct super_block *sb,
921 sector_t block, sector_t nr_blocks) 923 sector_t block, sector_t nr_blocks)
922 { 924 {
923 block <<= (sb->s_blocksize_bits - 9); 925 block <<= (sb->s_blocksize_bits - 9);
924 nr_blocks <<= (sb->s_blocksize_bits - 9); 926 nr_blocks <<= (sb->s_blocksize_bits - 9);
925 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, 927 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
926 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); 928 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
927 } 929 }
928 930
929 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); 931 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
930 932
931 enum blk_default_limits { 933 enum blk_default_limits {
932 BLK_MAX_SEGMENTS = 128, 934 BLK_MAX_SEGMENTS = 128,
933 BLK_SAFE_MAX_SECTORS = 255, 935 BLK_SAFE_MAX_SECTORS = 255,
934 BLK_DEF_MAX_SECTORS = 1024, 936 BLK_DEF_MAX_SECTORS = 1024,
935 BLK_MAX_SEGMENT_SIZE = 65536, 937 BLK_MAX_SEGMENT_SIZE = 65536,
936 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, 938 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
937 }; 939 };
938 940
939 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 941 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
940 942
941 static inline unsigned long queue_bounce_pfn(struct request_queue *q) 943 static inline unsigned long queue_bounce_pfn(struct request_queue *q)
942 { 944 {
943 return q->limits.bounce_pfn; 945 return q->limits.bounce_pfn;
944 } 946 }
945 947
946 static inline unsigned long queue_segment_boundary(struct request_queue *q) 948 static inline unsigned long queue_segment_boundary(struct request_queue *q)
947 { 949 {
948 return q->limits.seg_boundary_mask; 950 return q->limits.seg_boundary_mask;
949 } 951 }
950 952
951 static inline unsigned int queue_max_sectors(struct request_queue *q) 953 static inline unsigned int queue_max_sectors(struct request_queue *q)
952 { 954 {
953 return q->limits.max_sectors; 955 return q->limits.max_sectors;
954 } 956 }
955 957
956 static inline unsigned int queue_max_hw_sectors(struct request_queue *q) 958 static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
957 { 959 {
958 return q->limits.max_hw_sectors; 960 return q->limits.max_hw_sectors;
959 } 961 }
960 962
961 static inline unsigned short queue_max_segments(struct request_queue *q) 963 static inline unsigned short queue_max_segments(struct request_queue *q)
962 { 964 {
963 return q->limits.max_segments; 965 return q->limits.max_segments;
964 } 966 }
965 967
966 static inline unsigned int queue_max_segment_size(struct request_queue *q) 968 static inline unsigned int queue_max_segment_size(struct request_queue *q)
967 { 969 {
968 return q->limits.max_segment_size; 970 return q->limits.max_segment_size;
969 } 971 }
970 972
971 static inline unsigned short queue_logical_block_size(struct request_queue *q) 973 static inline unsigned short queue_logical_block_size(struct request_queue *q)
972 { 974 {
973 int retval = 512; 975 int retval = 512;
974 976
975 if (q && q->limits.logical_block_size) 977 if (q && q->limits.logical_block_size)
976 retval = q->limits.logical_block_size; 978 retval = q->limits.logical_block_size;
977 979
978 return retval; 980 return retval;
979 } 981 }
980 982
981 static inline unsigned short bdev_logical_block_size(struct block_device *bdev) 983 static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
982 { 984 {
983 return queue_logical_block_size(bdev_get_queue(bdev)); 985 return queue_logical_block_size(bdev_get_queue(bdev));
984 } 986 }
985 987
986 static inline unsigned int queue_physical_block_size(struct request_queue *q) 988 static inline unsigned int queue_physical_block_size(struct request_queue *q)
987 { 989 {
988 return q->limits.physical_block_size; 990 return q->limits.physical_block_size;
989 } 991 }
990 992
991 static inline int bdev_physical_block_size(struct block_device *bdev) 993 static inline int bdev_physical_block_size(struct block_device *bdev)
992 { 994 {
993 return queue_physical_block_size(bdev_get_queue(bdev)); 995 return queue_physical_block_size(bdev_get_queue(bdev));
994 } 996 }
995 997
996 static inline unsigned int queue_io_min(struct request_queue *q) 998 static inline unsigned int queue_io_min(struct request_queue *q)
997 { 999 {
998 return q->limits.io_min; 1000 return q->limits.io_min;
999 } 1001 }
1000 1002
1001 static inline int bdev_io_min(struct block_device *bdev) 1003 static inline int bdev_io_min(struct block_device *bdev)
1002 { 1004 {
1003 return queue_io_min(bdev_get_queue(bdev)); 1005 return queue_io_min(bdev_get_queue(bdev));
1004 } 1006 }
1005 1007
1006 static inline unsigned int queue_io_opt(struct request_queue *q) 1008 static inline unsigned int queue_io_opt(struct request_queue *q)
1007 { 1009 {
1008 return q->limits.io_opt; 1010 return q->limits.io_opt;
1009 } 1011 }
1010 1012
1011 static inline int bdev_io_opt(struct block_device *bdev) 1013 static inline int bdev_io_opt(struct block_device *bdev)
1012 { 1014 {
1013 return queue_io_opt(bdev_get_queue(bdev)); 1015 return queue_io_opt(bdev_get_queue(bdev));
1014 } 1016 }
1015 1017
1016 static inline int queue_alignment_offset(struct request_queue *q) 1018 static inline int queue_alignment_offset(struct request_queue *q)
1017 { 1019 {
1018 if (q->limits.misaligned) 1020 if (q->limits.misaligned)
1019 return -1; 1021 return -1;
1020 1022
1021 return q->limits.alignment_offset; 1023 return q->limits.alignment_offset;
1022 } 1024 }
1023 1025
1024 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) 1026 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1025 { 1027 {
1026 unsigned int granularity = max(lim->physical_block_size, lim->io_min); 1028 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1027 unsigned int alignment = (sector << 9) & (granularity - 1); 1029 unsigned int alignment = (sector << 9) & (granularity - 1);
1028 1030
1029 return (granularity + lim->alignment_offset - alignment) 1031 return (granularity + lim->alignment_offset - alignment)
1030 & (granularity - 1); 1032 & (granularity - 1);
1031 } 1033 }
1032 1034
1033 static inline int bdev_alignment_offset(struct block_device *bdev) 1035 static inline int bdev_alignment_offset(struct block_device *bdev)
1034 { 1036 {
1035 struct request_queue *q = bdev_get_queue(bdev); 1037 struct request_queue *q = bdev_get_queue(bdev);
1036 1038
1037 if (q->limits.misaligned) 1039 if (q->limits.misaligned)
1038 return -1; 1040 return -1;
1039 1041
1040 if (bdev != bdev->bd_contains) 1042 if (bdev != bdev->bd_contains)
1041 return bdev->bd_part->alignment_offset; 1043 return bdev->bd_part->alignment_offset;
1042 1044
1043 return q->limits.alignment_offset; 1045 return q->limits.alignment_offset;
1044 } 1046 }
1045 1047
1046 static inline int queue_discard_alignment(struct request_queue *q) 1048 static inline int queue_discard_alignment(struct request_queue *q)
1047 { 1049 {
1048 if (q->limits.discard_misaligned) 1050 if (q->limits.discard_misaligned)
1049 return -1; 1051 return -1;
1050 1052
1051 return q->limits.discard_alignment; 1053 return q->limits.discard_alignment;
1052 } 1054 }
1053 1055
1054 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) 1056 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
1055 { 1057 {
1056 unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); 1058 unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
1057 1059
1058 return (lim->discard_granularity + lim->discard_alignment - alignment) 1060 return (lim->discard_granularity + lim->discard_alignment - alignment)
1059 & (lim->discard_granularity - 1); 1061 & (lim->discard_granularity - 1);
1060 } 1062 }
1061 1063
1062 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) 1064 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
1063 { 1065 {
1064 if (q->limits.discard_zeroes_data == 1) 1066 if (q->limits.discard_zeroes_data == 1)
1065 return 1; 1067 return 1;
1066 1068
1067 return 0; 1069 return 0;
1068 } 1070 }
1069 1071
1070 static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) 1072 static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
1071 { 1073 {
1072 return queue_discard_zeroes_data(bdev_get_queue(bdev)); 1074 return queue_discard_zeroes_data(bdev_get_queue(bdev));
1073 } 1075 }
1074 1076
1075 static inline int queue_dma_alignment(struct request_queue *q) 1077 static inline int queue_dma_alignment(struct request_queue *q)
1076 { 1078 {
1077 return q ? q->dma_alignment : 511; 1079 return q ? q->dma_alignment : 511;
1078 } 1080 }
1079 1081
1080 static inline int blk_rq_aligned(struct request_queue *q, void *addr, 1082 static inline int blk_rq_aligned(struct request_queue *q, void *addr,
1081 unsigned int len) 1083 unsigned int len)
1082 { 1084 {
1083 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; 1085 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
1084 return !((unsigned long)addr & alignment) && !(len & alignment); 1086 return !((unsigned long)addr & alignment) && !(len & alignment);
1085 } 1087 }
1086 1088
1087 /* assumes size > 256 */ 1089 /* assumes size > 256 */
1088 static inline unsigned int blksize_bits(unsigned int size) 1090 static inline unsigned int blksize_bits(unsigned int size)
1089 { 1091 {
1090 unsigned int bits = 8; 1092 unsigned int bits = 8;
1091 do { 1093 do {
1092 bits++; 1094 bits++;
1093 size >>= 1; 1095 size >>= 1;
1094 } while (size > 256); 1096 } while (size > 256);
1095 return bits; 1097 return bits;
1096 } 1098 }
1097 1099
1098 static inline unsigned int block_size(struct block_device *bdev) 1100 static inline unsigned int block_size(struct block_device *bdev)
1099 { 1101 {
1100 return bdev->bd_block_size; 1102 return bdev->bd_block_size;
1101 } 1103 }
1102 1104
1103 typedef struct {struct page *v;} Sector; 1105 typedef struct {struct page *v;} Sector;
1104 1106
1105 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); 1107 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
1106 1108
1107 static inline void put_dev_sector(Sector p) 1109 static inline void put_dev_sector(Sector p)
1108 { 1110 {
1109 page_cache_release(p.v); 1111 page_cache_release(p.v);
1110 } 1112 }
1111 1113
1112 struct work_struct; 1114 struct work_struct;
1113 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1115 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
1114 1116
1115 #ifdef CONFIG_BLK_CGROUP 1117 #ifdef CONFIG_BLK_CGROUP
1116 /* 1118 /*
1117 * This should not be using sched_clock(). A real patch is in progress 1119 * This should not be using sched_clock(). A real patch is in progress
1118 * to fix this up, until that is in place we need to disable preemption 1120 * to fix this up, until that is in place we need to disable preemption
1119 * around sched_clock() in this function and set_io_start_time_ns(). 1121 * around sched_clock() in this function and set_io_start_time_ns().
1120 */ 1122 */
1121 static inline void set_start_time_ns(struct request *req) 1123 static inline void set_start_time_ns(struct request *req)
1122 { 1124 {
1123 preempt_disable(); 1125 preempt_disable();
1124 req->start_time_ns = sched_clock(); 1126 req->start_time_ns = sched_clock();
1125 preempt_enable(); 1127 preempt_enable();
1126 } 1128 }
1127 1129
1128 static inline void set_io_start_time_ns(struct request *req) 1130 static inline void set_io_start_time_ns(struct request *req)
1129 { 1131 {
1130 preempt_disable(); 1132 preempt_disable();
1131 req->io_start_time_ns = sched_clock(); 1133 req->io_start_time_ns = sched_clock();
1132 preempt_enable(); 1134 preempt_enable();
1133 } 1135 }
1134 1136
1135 static inline uint64_t rq_start_time_ns(struct request *req) 1137 static inline uint64_t rq_start_time_ns(struct request *req)
1136 { 1138 {
1137 return req->start_time_ns; 1139 return req->start_time_ns;
1138 } 1140 }
1139 1141
1140 static inline uint64_t rq_io_start_time_ns(struct request *req) 1142 static inline uint64_t rq_io_start_time_ns(struct request *req)
1141 { 1143 {
1142 return req->io_start_time_ns; 1144 return req->io_start_time_ns;
1143 } 1145 }
1144 #else 1146 #else
1145 static inline void set_start_time_ns(struct request *req) {} 1147 static inline void set_start_time_ns(struct request *req) {}
1146 static inline void set_io_start_time_ns(struct request *req) {} 1148 static inline void set_io_start_time_ns(struct request *req) {}
1147 static inline uint64_t rq_start_time_ns(struct request *req) 1149 static inline uint64_t rq_start_time_ns(struct request *req)
1148 { 1150 {
1149 return 0; 1151 return 0;
1150 } 1152 }
1151 static inline uint64_t rq_io_start_time_ns(struct request *req) 1153 static inline uint64_t rq_io_start_time_ns(struct request *req)
1152 { 1154 {
1153 return 0; 1155 return 0;
1154 } 1156 }
1155 #endif 1157 #endif
1156 1158
1157 #define MODULE_ALIAS_BLOCKDEV(major,minor) \ 1159 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
1158 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 1160 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
1159 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 1161 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
1160 MODULE_ALIAS("block-major-" __stringify(major) "-*") 1162 MODULE_ALIAS("block-major-" __stringify(major) "-*")
1161 1163
1162 #if defined(CONFIG_BLK_DEV_INTEGRITY) 1164 #if defined(CONFIG_BLK_DEV_INTEGRITY)
1163 1165
1164 #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ 1166 #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */
1165 #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ 1167 #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */
1166 1168
1167 struct blk_integrity_exchg { 1169 struct blk_integrity_exchg {
1168 void *prot_buf; 1170 void *prot_buf;
1169 void *data_buf; 1171 void *data_buf;
1170 sector_t sector; 1172 sector_t sector;
1171 unsigned int data_size; 1173 unsigned int data_size;
1172 unsigned short sector_size; 1174 unsigned short sector_size;
1173 const char *disk_name; 1175 const char *disk_name;
1174 }; 1176 };
1175 1177
1176 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); 1178 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
1177 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); 1179 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
1178 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); 1180 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
1179 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); 1181 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
1180 1182
1181 struct blk_integrity { 1183 struct blk_integrity {
1182 integrity_gen_fn *generate_fn; 1184 integrity_gen_fn *generate_fn;
1183 integrity_vrfy_fn *verify_fn; 1185 integrity_vrfy_fn *verify_fn;
1184 integrity_set_tag_fn *set_tag_fn; 1186 integrity_set_tag_fn *set_tag_fn;
1185 integrity_get_tag_fn *get_tag_fn; 1187 integrity_get_tag_fn *get_tag_fn;
1186 1188
1187 unsigned short flags; 1189 unsigned short flags;
1188 unsigned short tuple_size; 1190 unsigned short tuple_size;
1189 unsigned short sector_size; 1191 unsigned short sector_size;
1190 unsigned short tag_size; 1192 unsigned short tag_size;
1191 1193
1192 const char *name; 1194 const char *name;
1193 1195
1194 struct kobject kobj; 1196 struct kobject kobj;
1195 }; 1197 };
1196 1198
1197 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); 1199 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
1198 extern void blk_integrity_unregister(struct gendisk *); 1200 extern void blk_integrity_unregister(struct gendisk *);
1199 extern int blk_integrity_compare(struct gendisk *, struct gendisk *); 1201 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
1200 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 1202 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
1201 extern int blk_rq_count_integrity_sg(struct request *); 1203 extern int blk_rq_count_integrity_sg(struct request *);
1202 1204
1203 static inline 1205 static inline
1204 struct blk_integrity *bdev_get_integrity(struct block_device *bdev) 1206 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
1205 { 1207 {
1206 return bdev->bd_disk->integrity; 1208 return bdev->bd_disk->integrity;
1207 } 1209 }
1208 1210
1209 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) 1211 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1210 { 1212 {
1211 return disk->integrity; 1213 return disk->integrity;
1212 } 1214 }
1213 1215
1214 static inline int blk_integrity_rq(struct request *rq) 1216 static inline int blk_integrity_rq(struct request *rq)
1215 { 1217 {
1216 if (rq->bio == NULL) 1218 if (rq->bio == NULL)
1217 return 0; 1219 return 0;
1218 1220
1219 return bio_integrity(rq->bio); 1221 return bio_integrity(rq->bio);
1220 } 1222 }
1221 1223
1222 #else /* CONFIG_BLK_DEV_INTEGRITY */ 1224 #else /* CONFIG_BLK_DEV_INTEGRITY */
1223 1225
1224 #define blk_integrity_rq(rq) (0) 1226 #define blk_integrity_rq(rq) (0)
1225 #define blk_rq_count_integrity_sg(a) (0) 1227 #define blk_rq_count_integrity_sg(a) (0)
1226 #define blk_rq_map_integrity_sg(a, b) (0) 1228 #define blk_rq_map_integrity_sg(a, b) (0)
1227 #define bdev_get_integrity(a) (0) 1229 #define bdev_get_integrity(a) (0)
1228 #define blk_get_integrity(a) (0) 1230 #define blk_get_integrity(a) (0)
1229 #define blk_integrity_compare(a, b) (0) 1231 #define blk_integrity_compare(a, b) (0)
1230 #define blk_integrity_register(a, b) (0) 1232 #define blk_integrity_register(a, b) (0)
1231 #define blk_integrity_unregister(a) do { } while (0); 1233 #define blk_integrity_unregister(a) do { } while (0);
1232 1234
1233 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 1235 #endif /* CONFIG_BLK_DEV_INTEGRITY */
1234 1236
1235 struct block_device_operations { 1237 struct block_device_operations {
1236 int (*open) (struct block_device *, fmode_t); 1238 int (*open) (struct block_device *, fmode_t);
1237 int (*release) (struct gendisk *, fmode_t); 1239 int (*release) (struct gendisk *, fmode_t);
1238 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1240 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1239 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1241 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1240 int (*direct_access) (struct block_device *, sector_t, 1242 int (*direct_access) (struct block_device *, sector_t,
1241 void **, unsigned long *); 1243 void **, unsigned long *);
1242 int (*media_changed) (struct gendisk *); 1244 int (*media_changed) (struct gendisk *);
1243 void (*unlock_native_capacity) (struct gendisk *); 1245 void (*unlock_native_capacity) (struct gendisk *);
1244 int (*revalidate_disk) (struct gendisk *); 1246 int (*revalidate_disk) (struct gendisk *);
1245 int (*getgeo)(struct block_device *, struct hd_geometry *); 1247 int (*getgeo)(struct block_device *, struct hd_geometry *);
1246 /* this callback is with swap_lock and sometimes page table lock held */ 1248 /* this callback is with swap_lock and sometimes page table lock held */
1247 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1249 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
1248 struct module *owner; 1250 struct module *owner;
1249 }; 1251 };
1250 1252
1251 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, 1253 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1252 unsigned long); 1254 unsigned long);
1253 #else /* CONFIG_BLOCK */ 1255 #else /* CONFIG_BLOCK */
1254 /* 1256 /*
1255 * stubs for when the block layer is configured out 1257 * stubs for when the block layer is configured out
1256 */ 1258 */
1257 #define buffer_heads_over_limit 0 1259 #define buffer_heads_over_limit 0
1258 1260
1259 static inline long nr_blockdev_pages(void) 1261 static inline long nr_blockdev_pages(void)
1260 { 1262 {
1261 return 0; 1263 return 0;
1262 } 1264 }
1263 1265
1264 #endif /* CONFIG_BLOCK */ 1266 #endif /* CONFIG_BLOCK */
1265 1267
1266 #endif 1268 #endif