Commit 4913efe456c987057e5d36a3f0a55422a9072cae
Committed by
Jens Axboe
1 parent
6958f14545
Exists in
master
and in
39 other branches
block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush()
Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Boaz Harrosh <bharrosh@panasas.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com> Cc: David S. Miller <davem@davemloft.net> Cc: Alasdair G Kergon <agk@redhat.com> Cc: Pierre Ossman <drzeus@drzeus.cx> Cc: Stefan Weinhuber <wein@de.ibm.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Showing 15 changed files with 67 additions and 102 deletions Inline Diff
- block/blk-barrier.c
- block/blk-core.c
- block/blk-settings.c
- drivers/block/brd.c
- drivers/block/loop.c
- drivers/block/osdblk.c
- drivers/block/ps3disk.c
- drivers/block/virtio_blk.c
- drivers/block/xen-blkfront.c
- drivers/ide/ide-disk.c
- drivers/md/dm.c
- drivers/mmc/card/queue.c
- drivers/s390/block/dasd.c
- drivers/scsi/sd.c
- include/linux/blkdev.h
block/blk-barrier.c
1 | /* | 1 | /* |
2 | * Functions related to barrier IO handling | 2 | * Functions related to barrier IO handling |
3 | */ | 3 | */ |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/bio.h> | 6 | #include <linux/bio.h> |
7 | #include <linux/blkdev.h> | 7 | #include <linux/blkdev.h> |
8 | #include <linux/gfp.h> | 8 | #include <linux/gfp.h> |
9 | 9 | ||
10 | #include "blk.h" | 10 | #include "blk.h" |
11 | 11 | ||
12 | /** | ||
13 | * blk_queue_ordered - does this queue support ordered writes | ||
14 | * @q: the request queue | ||
15 | * @ordered: one of QUEUE_ORDERED_* | ||
16 | * | ||
17 | * Description: | ||
18 | * For journalled file systems, doing ordered writes on a commit | ||
19 | * block instead of explicitly doing wait_on_buffer (which is bad | ||
20 | * for performance) can be a big win. Block drivers supporting this | ||
21 | * feature should call this function and indicate so. | ||
22 | * | ||
23 | **/ | ||
24 | int blk_queue_ordered(struct request_queue *q, unsigned ordered) | ||
25 | { | ||
26 | if (ordered != QUEUE_ORDERED_NONE && | ||
27 | ordered != QUEUE_ORDERED_DRAIN && | ||
28 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | ||
29 | ordered != QUEUE_ORDERED_DRAIN_FUA) { | ||
30 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | ||
31 | return -EINVAL; | ||
32 | } | ||
33 | |||
34 | q->ordered = ordered; | ||
35 | q->next_ordered = ordered; | ||
36 | |||
37 | return 0; | ||
38 | } | ||
39 | EXPORT_SYMBOL(blk_queue_ordered); | ||
40 | |||
41 | /* | 12 | /* |
42 | * Cache flushing for ordered writes handling | 13 | * Cache flushing for ordered writes handling |
43 | */ | 14 | */ |
44 | unsigned blk_ordered_cur_seq(struct request_queue *q) | 15 | unsigned blk_ordered_cur_seq(struct request_queue *q) |
45 | { | 16 | { |
46 | if (!q->ordseq) | 17 | if (!q->ordseq) |
47 | return 0; | 18 | return 0; |
48 | return 1 << ffz(q->ordseq); | 19 | return 1 << ffz(q->ordseq); |
49 | } | 20 | } |
50 | 21 | ||
51 | unsigned blk_ordered_req_seq(struct request *rq) | 22 | unsigned blk_ordered_req_seq(struct request *rq) |
52 | { | 23 | { |
53 | struct request_queue *q = rq->q; | 24 | struct request_queue *q = rq->q; |
54 | 25 | ||
55 | BUG_ON(q->ordseq == 0); | 26 | BUG_ON(q->ordseq == 0); |
56 | 27 | ||
57 | if (rq == &q->pre_flush_rq) | 28 | if (rq == &q->pre_flush_rq) |
58 | return QUEUE_ORDSEQ_PREFLUSH; | 29 | return QUEUE_ORDSEQ_PREFLUSH; |
59 | if (rq == &q->bar_rq) | 30 | if (rq == &q->bar_rq) |
60 | return QUEUE_ORDSEQ_BAR; | 31 | return QUEUE_ORDSEQ_BAR; |
61 | if (rq == &q->post_flush_rq) | 32 | if (rq == &q->post_flush_rq) |
62 | return QUEUE_ORDSEQ_POSTFLUSH; | 33 | return QUEUE_ORDSEQ_POSTFLUSH; |
63 | 34 | ||
64 | /* | 35 | /* |
65 | * !fs requests don't need to follow barrier ordering. Always | 36 | * !fs requests don't need to follow barrier ordering. Always |
66 | * put them at the front. This fixes the following deadlock. | 37 | * put them at the front. This fixes the following deadlock. |
67 | * | 38 | * |
68 | * http://thread.gmane.org/gmane.linux.kernel/537473 | 39 | * http://thread.gmane.org/gmane.linux.kernel/537473 |
69 | */ | 40 | */ |
70 | if (rq->cmd_type != REQ_TYPE_FS) | 41 | if (rq->cmd_type != REQ_TYPE_FS) |
71 | return QUEUE_ORDSEQ_DRAIN; | 42 | return QUEUE_ORDSEQ_DRAIN; |
72 | 43 | ||
73 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == | 44 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == |
74 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) | 45 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) |
75 | return QUEUE_ORDSEQ_DRAIN; | 46 | return QUEUE_ORDSEQ_DRAIN; |
76 | else | 47 | else |
77 | return QUEUE_ORDSEQ_DONE; | 48 | return QUEUE_ORDSEQ_DONE; |
78 | } | 49 | } |
79 | 50 | ||
80 | bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | 51 | bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) |
81 | { | 52 | { |
82 | struct request *rq; | 53 | struct request *rq; |
83 | 54 | ||
84 | if (error && !q->orderr) | 55 | if (error && !q->orderr) |
85 | q->orderr = error; | 56 | q->orderr = error; |
86 | 57 | ||
87 | BUG_ON(q->ordseq & seq); | 58 | BUG_ON(q->ordseq & seq); |
88 | q->ordseq |= seq; | 59 | q->ordseq |= seq; |
89 | 60 | ||
90 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) | 61 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
91 | return false; | 62 | return false; |
92 | 63 | ||
93 | /* | 64 | /* |
94 | * Okay, sequence complete. | 65 | * Okay, sequence complete. |
95 | */ | 66 | */ |
96 | q->ordseq = 0; | 67 | q->ordseq = 0; |
97 | rq = q->orig_bar_rq; | 68 | rq = q->orig_bar_rq; |
98 | __blk_end_request_all(rq, q->orderr); | 69 | __blk_end_request_all(rq, q->orderr); |
99 | return true; | 70 | return true; |
100 | } | 71 | } |
101 | 72 | ||
102 | static void pre_flush_end_io(struct request *rq, int error) | 73 | static void pre_flush_end_io(struct request *rq, int error) |
103 | { | 74 | { |
104 | elv_completed_request(rq->q, rq); | 75 | elv_completed_request(rq->q, rq); |
105 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | 76 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); |
106 | } | 77 | } |
107 | 78 | ||
108 | static void bar_end_io(struct request *rq, int error) | 79 | static void bar_end_io(struct request *rq, int error) |
109 | { | 80 | { |
110 | elv_completed_request(rq->q, rq); | 81 | elv_completed_request(rq->q, rq); |
111 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | 82 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); |
112 | } | 83 | } |
113 | 84 | ||
114 | static void post_flush_end_io(struct request *rq, int error) | 85 | static void post_flush_end_io(struct request *rq, int error) |
115 | { | 86 | { |
116 | elv_completed_request(rq->q, rq); | 87 | elv_completed_request(rq->q, rq); |
117 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); | 88 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
118 | } | 89 | } |
119 | 90 | ||
120 | static void queue_flush(struct request_queue *q, unsigned which) | 91 | static void queue_flush(struct request_queue *q, unsigned which) |
121 | { | 92 | { |
122 | struct request *rq; | 93 | struct request *rq; |
123 | rq_end_io_fn *end_io; | 94 | rq_end_io_fn *end_io; |
124 | 95 | ||
125 | if (which == QUEUE_ORDERED_DO_PREFLUSH) { | 96 | if (which == QUEUE_ORDERED_DO_PREFLUSH) { |
126 | rq = &q->pre_flush_rq; | 97 | rq = &q->pre_flush_rq; |
127 | end_io = pre_flush_end_io; | 98 | end_io = pre_flush_end_io; |
128 | } else { | 99 | } else { |
129 | rq = &q->post_flush_rq; | 100 | rq = &q->post_flush_rq; |
130 | end_io = post_flush_end_io; | 101 | end_io = post_flush_end_io; |
131 | } | 102 | } |
132 | 103 | ||
133 | blk_rq_init(q, rq); | 104 | blk_rq_init(q, rq); |
134 | rq->cmd_type = REQ_TYPE_FS; | 105 | rq->cmd_type = REQ_TYPE_FS; |
135 | rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; | 106 | rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; |
136 | rq->rq_disk = q->orig_bar_rq->rq_disk; | 107 | rq->rq_disk = q->orig_bar_rq->rq_disk; |
137 | rq->end_io = end_io; | 108 | rq->end_io = end_io; |
138 | 109 | ||
139 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 110 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
140 | } | 111 | } |
141 | 112 | ||
142 | static inline bool start_ordered(struct request_queue *q, struct request **rqp) | 113 | static inline bool start_ordered(struct request_queue *q, struct request **rqp) |
143 | { | 114 | { |
144 | struct request *rq = *rqp; | 115 | struct request *rq = *rqp; |
145 | unsigned skip = 0; | 116 | unsigned skip = 0; |
146 | 117 | ||
147 | q->orderr = 0; | 118 | q->orderr = 0; |
148 | q->ordered = q->next_ordered; | 119 | q->ordered = q->next_ordered; |
149 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | 120 | q->ordseq |= QUEUE_ORDSEQ_STARTED; |
150 | 121 | ||
151 | /* | 122 | /* |
152 | * For an empty barrier, there's no actual BAR request, which | 123 | * For an empty barrier, there's no actual BAR request, which |
153 | * in turn makes POSTFLUSH unnecessary. Mask them off. | 124 | * in turn makes POSTFLUSH unnecessary. Mask them off. |
154 | */ | 125 | */ |
155 | if (!blk_rq_sectors(rq)) | 126 | if (!blk_rq_sectors(rq)) |
156 | q->ordered &= ~(QUEUE_ORDERED_DO_BAR | | 127 | q->ordered &= ~(QUEUE_ORDERED_DO_BAR | |
157 | QUEUE_ORDERED_DO_POSTFLUSH); | 128 | QUEUE_ORDERED_DO_POSTFLUSH); |
158 | 129 | ||
159 | /* stash away the original request */ | 130 | /* stash away the original request */ |
160 | blk_dequeue_request(rq); | 131 | blk_dequeue_request(rq); |
161 | q->orig_bar_rq = rq; | 132 | q->orig_bar_rq = rq; |
162 | rq = NULL; | 133 | rq = NULL; |
163 | 134 | ||
164 | /* | 135 | /* |
165 | * Queue ordered sequence. As we stack them at the head, we | 136 | * Queue ordered sequence. As we stack them at the head, we |
166 | * need to queue in reverse order. Note that we rely on that | 137 | * need to queue in reverse order. Note that we rely on that |
167 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | 138 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
168 | * request gets inbetween ordered sequence. | 139 | * request gets inbetween ordered sequence. |
169 | */ | 140 | */ |
170 | if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { | 141 | if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { |
171 | queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); | 142 | queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); |
172 | rq = &q->post_flush_rq; | 143 | rq = &q->post_flush_rq; |
173 | } else | 144 | } else |
174 | skip |= QUEUE_ORDSEQ_POSTFLUSH; | 145 | skip |= QUEUE_ORDSEQ_POSTFLUSH; |
175 | 146 | ||
176 | if (q->ordered & QUEUE_ORDERED_DO_BAR) { | 147 | if (q->ordered & QUEUE_ORDERED_DO_BAR) { |
177 | rq = &q->bar_rq; | 148 | rq = &q->bar_rq; |
178 | 149 | ||
179 | /* initialize proxy request and queue it */ | 150 | /* initialize proxy request and queue it */ |
180 | blk_rq_init(q, rq); | 151 | blk_rq_init(q, rq); |
181 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | 152 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) |
182 | rq->cmd_flags |= REQ_WRITE; | 153 | rq->cmd_flags |= REQ_WRITE; |
183 | if (q->ordered & QUEUE_ORDERED_DO_FUA) | 154 | if (q->ordered & QUEUE_ORDERED_DO_FUA) |
184 | rq->cmd_flags |= REQ_FUA; | 155 | rq->cmd_flags |= REQ_FUA; |
185 | init_request_from_bio(rq, q->orig_bar_rq->bio); | 156 | init_request_from_bio(rq, q->orig_bar_rq->bio); |
186 | rq->end_io = bar_end_io; | 157 | rq->end_io = bar_end_io; |
187 | 158 | ||
188 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 159 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
189 | } else | 160 | } else |
190 | skip |= QUEUE_ORDSEQ_BAR; | 161 | skip |= QUEUE_ORDSEQ_BAR; |
191 | 162 | ||
192 | if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { | 163 | if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { |
193 | queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); | 164 | queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); |
194 | rq = &q->pre_flush_rq; | 165 | rq = &q->pre_flush_rq; |
195 | } else | 166 | } else |
196 | skip |= QUEUE_ORDSEQ_PREFLUSH; | 167 | skip |= QUEUE_ORDSEQ_PREFLUSH; |
197 | 168 | ||
198 | if (queue_in_flight(q)) | 169 | if (queue_in_flight(q)) |
199 | rq = NULL; | 170 | rq = NULL; |
200 | else | 171 | else |
201 | skip |= QUEUE_ORDSEQ_DRAIN; | 172 | skip |= QUEUE_ORDSEQ_DRAIN; |
202 | 173 | ||
203 | *rqp = rq; | 174 | *rqp = rq; |
204 | 175 | ||
205 | /* | 176 | /* |
206 | * Complete skipped sequences. If whole sequence is complete, | 177 | * Complete skipped sequences. If whole sequence is complete, |
207 | * return false to tell elevator that this request is gone. | 178 | * return false to tell elevator that this request is gone. |
208 | */ | 179 | */ |
209 | return !blk_ordered_complete_seq(q, skip, 0); | 180 | return !blk_ordered_complete_seq(q, skip, 0); |
210 | } | 181 | } |
211 | 182 | ||
212 | bool blk_do_ordered(struct request_queue *q, struct request **rqp) | 183 | bool blk_do_ordered(struct request_queue *q, struct request **rqp) |
213 | { | 184 | { |
214 | struct request *rq = *rqp; | 185 | struct request *rq = *rqp; |
215 | const int is_barrier = rq->cmd_type == REQ_TYPE_FS && | 186 | const int is_barrier = rq->cmd_type == REQ_TYPE_FS && |
216 | (rq->cmd_flags & REQ_HARDBARRIER); | 187 | (rq->cmd_flags & REQ_HARDBARRIER); |
217 | 188 | ||
218 | if (!q->ordseq) { | 189 | if (!q->ordseq) { |
219 | if (!is_barrier) | 190 | if (!is_barrier) |
220 | return true; | 191 | return true; |
221 | 192 | ||
222 | if (q->next_ordered != QUEUE_ORDERED_NONE) | 193 | if (q->next_ordered != QUEUE_ORDERED_NONE) |
223 | return start_ordered(q, rqp); | 194 | return start_ordered(q, rqp); |
224 | else { | 195 | else { |
225 | /* | 196 | /* |
226 | * Queue ordering not supported. Terminate | 197 | * Queue ordering not supported. Terminate |
227 | * with prejudice. | 198 | * with prejudice. |
228 | */ | 199 | */ |
229 | blk_dequeue_request(rq); | 200 | blk_dequeue_request(rq); |
230 | __blk_end_request_all(rq, -EOPNOTSUPP); | 201 | __blk_end_request_all(rq, -EOPNOTSUPP); |
231 | *rqp = NULL; | 202 | *rqp = NULL; |
232 | return false; | 203 | return false; |
233 | } | 204 | } |
234 | } | 205 | } |
235 | 206 | ||
236 | /* | 207 | /* |
237 | * Ordered sequence in progress | 208 | * Ordered sequence in progress |
238 | */ | 209 | */ |
239 | 210 | ||
240 | /* Special requests are not subject to ordering rules. */ | 211 | /* Special requests are not subject to ordering rules. */ |
241 | if (rq->cmd_type != REQ_TYPE_FS && | 212 | if (rq->cmd_type != REQ_TYPE_FS && |
242 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) | 213 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) |
243 | return true; | 214 | return true; |
244 | 215 | ||
245 | /* Ordered by draining. Wait for turn. */ | 216 | /* Ordered by draining. Wait for turn. */ |
246 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); | 217 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); |
247 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) | 218 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) |
248 | *rqp = NULL; | 219 | *rqp = NULL; |
249 | 220 | ||
250 | return true; | 221 | return true; |
251 | } | 222 | } |
252 | 223 | ||
253 | static void bio_end_empty_barrier(struct bio *bio, int err) | 224 | static void bio_end_empty_barrier(struct bio *bio, int err) |
254 | { | 225 | { |
255 | if (err) { | 226 | if (err) { |
256 | if (err == -EOPNOTSUPP) | 227 | if (err == -EOPNOTSUPP) |
257 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | 228 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); |
258 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 229 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
259 | } | 230 | } |
260 | if (bio->bi_private) | 231 | if (bio->bi_private) |
261 | complete(bio->bi_private); | 232 | complete(bio->bi_private); |
262 | bio_put(bio); | 233 | bio_put(bio); |
263 | } | 234 | } |
264 | 235 | ||
265 | /** | 236 | /** |
266 | * blkdev_issue_flush - queue a flush | 237 | * blkdev_issue_flush - queue a flush |
267 | * @bdev: blockdev to issue flush for | 238 | * @bdev: blockdev to issue flush for |
268 | * @gfp_mask: memory allocation flags (for bio_alloc) | 239 | * @gfp_mask: memory allocation flags (for bio_alloc) |
269 | * @error_sector: error sector | 240 | * @error_sector: error sector |
270 | * @flags: BLKDEV_IFL_* flags to control behaviour | 241 | * @flags: BLKDEV_IFL_* flags to control behaviour |
271 | * | 242 | * |
272 | * Description: | 243 | * Description: |
273 | * Issue a flush for the block device in question. Caller can supply | 244 | * Issue a flush for the block device in question. Caller can supply |
274 | * room for storing the error offset in case of a flush error, if they | 245 | * room for storing the error offset in case of a flush error, if they |
275 | * wish to. If WAIT flag is not passed then caller may check only what | 246 | * wish to. If WAIT flag is not passed then caller may check only what |
276 | * request was pushed in some internal queue for later handling. | 247 | * request was pushed in some internal queue for later handling. |
277 | */ | 248 | */ |
278 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, | 249 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, |
279 | sector_t *error_sector, unsigned long flags) | 250 | sector_t *error_sector, unsigned long flags) |
280 | { | 251 | { |
281 | DECLARE_COMPLETION_ONSTACK(wait); | 252 | DECLARE_COMPLETION_ONSTACK(wait); |
282 | struct request_queue *q; | 253 | struct request_queue *q; |
283 | struct bio *bio; | 254 | struct bio *bio; |
284 | int ret = 0; | 255 | int ret = 0; |
285 | 256 | ||
286 | if (bdev->bd_disk == NULL) | 257 | if (bdev->bd_disk == NULL) |
287 | return -ENXIO; | 258 | return -ENXIO; |
288 | 259 | ||
289 | q = bdev_get_queue(bdev); | 260 | q = bdev_get_queue(bdev); |
290 | if (!q) | 261 | if (!q) |
291 | return -ENXIO; | 262 | return -ENXIO; |
292 | 263 | ||
293 | /* | 264 | /* |
294 | * some block devices may not have their queue correctly set up here | 265 | * some block devices may not have their queue correctly set up here |
295 | * (e.g. loop device without a backing file) and so issuing a flush | 266 | * (e.g. loop device without a backing file) and so issuing a flush |
296 | * here will panic. Ensure there is a request function before issuing | 267 | * here will panic. Ensure there is a request function before issuing |
297 | * the barrier. | 268 | * the barrier. |
298 | */ | 269 | */ |
299 | if (!q->make_request_fn) | 270 | if (!q->make_request_fn) |
300 | return -ENXIO; | 271 | return -ENXIO; |
301 | 272 | ||
302 | bio = bio_alloc(gfp_mask, 0); | 273 | bio = bio_alloc(gfp_mask, 0); |
303 | bio->bi_end_io = bio_end_empty_barrier; | 274 | bio->bi_end_io = bio_end_empty_barrier; |
304 | bio->bi_bdev = bdev; | 275 | bio->bi_bdev = bdev; |
305 | if (test_bit(BLKDEV_WAIT, &flags)) | 276 | if (test_bit(BLKDEV_WAIT, &flags)) |
306 | bio->bi_private = &wait; | 277 | bio->bi_private = &wait; |
307 | 278 | ||
308 | bio_get(bio); | 279 | bio_get(bio); |
309 | submit_bio(WRITE_BARRIER, bio); | 280 | submit_bio(WRITE_BARRIER, bio); |
310 | if (test_bit(BLKDEV_WAIT, &flags)) { | 281 | if (test_bit(BLKDEV_WAIT, &flags)) { |
311 | wait_for_completion(&wait); | 282 | wait_for_completion(&wait); |
312 | /* | 283 | /* |
313 | * The driver must store the error location in ->bi_sector, if | 284 | * The driver must store the error location in ->bi_sector, if |
314 | * it supports it. For non-stacked drivers, this should be | 285 | * it supports it. For non-stacked drivers, this should be |
315 | * copied from blk_rq_pos(rq). | 286 | * copied from blk_rq_pos(rq). |
316 | */ | 287 | */ |
317 | if (error_sector) | 288 | if (error_sector) |
318 | *error_sector = bio->bi_sector; | 289 | *error_sector = bio->bi_sector; |
319 | } | 290 | } |
320 | 291 | ||
321 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 292 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
322 | ret = -EOPNOTSUPP; | 293 | ret = -EOPNOTSUPP; |
323 | else if (!bio_flagged(bio, BIO_UPTODATE)) | 294 | else if (!bio_flagged(bio, BIO_UPTODATE)) |
324 | ret = -EIO; | 295 | ret = -EIO; |
325 | 296 | ||
326 | bio_put(bio); | 297 | bio_put(bio); |
327 | return ret; | 298 | return ret; |
328 | } | 299 | } |
329 | EXPORT_SYMBOL(blkdev_issue_flush); | 300 | EXPORT_SYMBOL(blkdev_issue_flush); |
330 | 301 |
block/blk-core.c
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> | 5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> |
6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> | 6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> |
7 | * - July2000 | 7 | * - July2000 |
8 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 | 8 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * This handles all read/write requests to block devices | 12 | * This handles all read/write requests to block devices |
13 | */ | 13 | */ |
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
17 | #include <linux/bio.h> | 17 | #include <linux/bio.h> |
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/kernel_stat.h> | 21 | #include <linux/kernel_stat.h> |
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | 30 | ||
31 | #define CREATE_TRACE_POINTS | 31 | #define CREATE_TRACE_POINTS |
32 | #include <trace/events/block.h> | 32 | #include <trace/events/block.h> |
33 | 33 | ||
34 | #include "blk.h" | 34 | #include "blk.h" |
35 | 35 | ||
36 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); | 36 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); |
37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
39 | 39 | ||
40 | static int __make_request(struct request_queue *q, struct bio *bio); | 40 | static int __make_request(struct request_queue *q, struct bio *bio); |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * For the allocated request tables | 43 | * For the allocated request tables |
44 | */ | 44 | */ |
45 | static struct kmem_cache *request_cachep; | 45 | static struct kmem_cache *request_cachep; |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * For queue allocation | 48 | * For queue allocation |
49 | */ | 49 | */ |
50 | struct kmem_cache *blk_requestq_cachep; | 50 | struct kmem_cache *blk_requestq_cachep; |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Controlling structure to kblockd | 53 | * Controlling structure to kblockd |
54 | */ | 54 | */ |
55 | static struct workqueue_struct *kblockd_workqueue; | 55 | static struct workqueue_struct *kblockd_workqueue; |
56 | 56 | ||
57 | static void drive_stat_acct(struct request *rq, int new_io) | 57 | static void drive_stat_acct(struct request *rq, int new_io) |
58 | { | 58 | { |
59 | struct hd_struct *part; | 59 | struct hd_struct *part; |
60 | int rw = rq_data_dir(rq); | 60 | int rw = rq_data_dir(rq); |
61 | int cpu; | 61 | int cpu; |
62 | 62 | ||
63 | if (!blk_do_io_stat(rq)) | 63 | if (!blk_do_io_stat(rq)) |
64 | return; | 64 | return; |
65 | 65 | ||
66 | cpu = part_stat_lock(); | 66 | cpu = part_stat_lock(); |
67 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | 67 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); |
68 | 68 | ||
69 | if (!new_io) | 69 | if (!new_io) |
70 | part_stat_inc(cpu, part, merges[rw]); | 70 | part_stat_inc(cpu, part, merges[rw]); |
71 | else { | 71 | else { |
72 | part_round_stats(cpu, part); | 72 | part_round_stats(cpu, part); |
73 | part_inc_in_flight(part, rw); | 73 | part_inc_in_flight(part, rw); |
74 | } | 74 | } |
75 | 75 | ||
76 | part_stat_unlock(); | 76 | part_stat_unlock(); |
77 | } | 77 | } |
78 | 78 | ||
79 | void blk_queue_congestion_threshold(struct request_queue *q) | 79 | void blk_queue_congestion_threshold(struct request_queue *q) |
80 | { | 80 | { |
81 | int nr; | 81 | int nr; |
82 | 82 | ||
83 | nr = q->nr_requests - (q->nr_requests / 8) + 1; | 83 | nr = q->nr_requests - (q->nr_requests / 8) + 1; |
84 | if (nr > q->nr_requests) | 84 | if (nr > q->nr_requests) |
85 | nr = q->nr_requests; | 85 | nr = q->nr_requests; |
86 | q->nr_congestion_on = nr; | 86 | q->nr_congestion_on = nr; |
87 | 87 | ||
88 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; | 88 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
89 | if (nr < 1) | 89 | if (nr < 1) |
90 | nr = 1; | 90 | nr = 1; |
91 | q->nr_congestion_off = nr; | 91 | q->nr_congestion_off = nr; |
92 | } | 92 | } |
93 | 93 | ||
94 | /** | 94 | /** |
95 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info | 95 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info |
96 | * @bdev: device | 96 | * @bdev: device |
97 | * | 97 | * |
98 | * Locates the passed device's request queue and returns the address of its | 98 | * Locates the passed device's request queue and returns the address of its |
99 | * backing_dev_info | 99 | * backing_dev_info |
100 | * | 100 | * |
101 | * Will return NULL if the request queue cannot be located. | 101 | * Will return NULL if the request queue cannot be located. |
102 | */ | 102 | */ |
103 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) | 103 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) |
104 | { | 104 | { |
105 | struct backing_dev_info *ret = NULL; | 105 | struct backing_dev_info *ret = NULL; |
106 | struct request_queue *q = bdev_get_queue(bdev); | 106 | struct request_queue *q = bdev_get_queue(bdev); |
107 | 107 | ||
108 | if (q) | 108 | if (q) |
109 | ret = &q->backing_dev_info; | 109 | ret = &q->backing_dev_info; |
110 | return ret; | 110 | return ret; |
111 | } | 111 | } |
112 | EXPORT_SYMBOL(blk_get_backing_dev_info); | 112 | EXPORT_SYMBOL(blk_get_backing_dev_info); |
113 | 113 | ||
114 | void blk_rq_init(struct request_queue *q, struct request *rq) | 114 | void blk_rq_init(struct request_queue *q, struct request *rq) |
115 | { | 115 | { |
116 | memset(rq, 0, sizeof(*rq)); | 116 | memset(rq, 0, sizeof(*rq)); |
117 | 117 | ||
118 | INIT_LIST_HEAD(&rq->queuelist); | 118 | INIT_LIST_HEAD(&rq->queuelist); |
119 | INIT_LIST_HEAD(&rq->timeout_list); | 119 | INIT_LIST_HEAD(&rq->timeout_list); |
120 | rq->cpu = -1; | 120 | rq->cpu = -1; |
121 | rq->q = q; | 121 | rq->q = q; |
122 | rq->__sector = (sector_t) -1; | 122 | rq->__sector = (sector_t) -1; |
123 | INIT_HLIST_NODE(&rq->hash); | 123 | INIT_HLIST_NODE(&rq->hash); |
124 | RB_CLEAR_NODE(&rq->rb_node); | 124 | RB_CLEAR_NODE(&rq->rb_node); |
125 | rq->cmd = rq->__cmd; | 125 | rq->cmd = rq->__cmd; |
126 | rq->cmd_len = BLK_MAX_CDB; | 126 | rq->cmd_len = BLK_MAX_CDB; |
127 | rq->tag = -1; | 127 | rq->tag = -1; |
128 | rq->ref_count = 1; | 128 | rq->ref_count = 1; |
129 | rq->start_time = jiffies; | 129 | rq->start_time = jiffies; |
130 | set_start_time_ns(rq); | 130 | set_start_time_ns(rq); |
131 | } | 131 | } |
132 | EXPORT_SYMBOL(blk_rq_init); | 132 | EXPORT_SYMBOL(blk_rq_init); |
133 | 133 | ||
134 | static void req_bio_endio(struct request *rq, struct bio *bio, | 134 | static void req_bio_endio(struct request *rq, struct bio *bio, |
135 | unsigned int nbytes, int error) | 135 | unsigned int nbytes, int error) |
136 | { | 136 | { |
137 | struct request_queue *q = rq->q; | 137 | struct request_queue *q = rq->q; |
138 | 138 | ||
139 | if (&q->bar_rq != rq) { | 139 | if (&q->bar_rq != rq) { |
140 | if (error) | 140 | if (error) |
141 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 141 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
142 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 142 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
143 | error = -EIO; | 143 | error = -EIO; |
144 | 144 | ||
145 | if (unlikely(nbytes > bio->bi_size)) { | 145 | if (unlikely(nbytes > bio->bi_size)) { |
146 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", | 146 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", |
147 | __func__, nbytes, bio->bi_size); | 147 | __func__, nbytes, bio->bi_size); |
148 | nbytes = bio->bi_size; | 148 | nbytes = bio->bi_size; |
149 | } | 149 | } |
150 | 150 | ||
151 | if (unlikely(rq->cmd_flags & REQ_QUIET)) | 151 | if (unlikely(rq->cmd_flags & REQ_QUIET)) |
152 | set_bit(BIO_QUIET, &bio->bi_flags); | 152 | set_bit(BIO_QUIET, &bio->bi_flags); |
153 | 153 | ||
154 | bio->bi_size -= nbytes; | 154 | bio->bi_size -= nbytes; |
155 | bio->bi_sector += (nbytes >> 9); | 155 | bio->bi_sector += (nbytes >> 9); |
156 | 156 | ||
157 | if (bio_integrity(bio)) | 157 | if (bio_integrity(bio)) |
158 | bio_integrity_advance(bio, nbytes); | 158 | bio_integrity_advance(bio, nbytes); |
159 | 159 | ||
160 | if (bio->bi_size == 0) | 160 | if (bio->bi_size == 0) |
161 | bio_endio(bio, error); | 161 | bio_endio(bio, error); |
162 | } else { | 162 | } else { |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Okay, this is the barrier request in progress, just | 165 | * Okay, this is the barrier request in progress, just |
166 | * record the error; | 166 | * record the error; |
167 | */ | 167 | */ |
168 | if (error && !q->orderr) | 168 | if (error && !q->orderr) |
169 | q->orderr = error; | 169 | q->orderr = error; |
170 | } | 170 | } |
171 | } | 171 | } |
172 | 172 | ||
173 | void blk_dump_rq_flags(struct request *rq, char *msg) | 173 | void blk_dump_rq_flags(struct request *rq, char *msg) |
174 | { | 174 | { |
175 | int bit; | 175 | int bit; |
176 | 176 | ||
177 | printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, | 177 | printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, |
178 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, | 178 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, |
179 | rq->cmd_flags); | 179 | rq->cmd_flags); |
180 | 180 | ||
181 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", | 181 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", |
182 | (unsigned long long)blk_rq_pos(rq), | 182 | (unsigned long long)blk_rq_pos(rq), |
183 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); | 183 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); |
184 | printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", | 184 | printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", |
185 | rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); | 185 | rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); |
186 | 186 | ||
187 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 187 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
188 | printk(KERN_INFO " cdb: "); | 188 | printk(KERN_INFO " cdb: "); |
189 | for (bit = 0; bit < BLK_MAX_CDB; bit++) | 189 | for (bit = 0; bit < BLK_MAX_CDB; bit++) |
190 | printk("%02x ", rq->cmd[bit]); | 190 | printk("%02x ", rq->cmd[bit]); |
191 | printk("\n"); | 191 | printk("\n"); |
192 | } | 192 | } |
193 | } | 193 | } |
194 | EXPORT_SYMBOL(blk_dump_rq_flags); | 194 | EXPORT_SYMBOL(blk_dump_rq_flags); |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * "plug" the device if there are no outstanding requests: this will | 197 | * "plug" the device if there are no outstanding requests: this will |
198 | * force the transfer to start only after we have put all the requests | 198 | * force the transfer to start only after we have put all the requests |
199 | * on the list. | 199 | * on the list. |
200 | * | 200 | * |
201 | * This is called with interrupts off and no requests on the queue and | 201 | * This is called with interrupts off and no requests on the queue and |
202 | * with the queue lock held. | 202 | * with the queue lock held. |
203 | */ | 203 | */ |
204 | void blk_plug_device(struct request_queue *q) | 204 | void blk_plug_device(struct request_queue *q) |
205 | { | 205 | { |
206 | WARN_ON(!irqs_disabled()); | 206 | WARN_ON(!irqs_disabled()); |
207 | 207 | ||
208 | /* | 208 | /* |
209 | * don't plug a stopped queue, it must be paired with blk_start_queue() | 209 | * don't plug a stopped queue, it must be paired with blk_start_queue() |
210 | * which will restart the queueing | 210 | * which will restart the queueing |
211 | */ | 211 | */ |
212 | if (blk_queue_stopped(q)) | 212 | if (blk_queue_stopped(q)) |
213 | return; | 213 | return; |
214 | 214 | ||
215 | if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { | 215 | if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { |
216 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); | 216 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); |
217 | trace_block_plug(q); | 217 | trace_block_plug(q); |
218 | } | 218 | } |
219 | } | 219 | } |
220 | EXPORT_SYMBOL(blk_plug_device); | 220 | EXPORT_SYMBOL(blk_plug_device); |
221 | 221 | ||
222 | /** | 222 | /** |
223 | * blk_plug_device_unlocked - plug a device without queue lock held | 223 | * blk_plug_device_unlocked - plug a device without queue lock held |
224 | * @q: The &struct request_queue to plug | 224 | * @q: The &struct request_queue to plug |
225 | * | 225 | * |
226 | * Description: | 226 | * Description: |
227 | * Like @blk_plug_device(), but grabs the queue lock and disables | 227 | * Like @blk_plug_device(), but grabs the queue lock and disables |
228 | * interrupts. | 228 | * interrupts. |
229 | **/ | 229 | **/ |
230 | void blk_plug_device_unlocked(struct request_queue *q) | 230 | void blk_plug_device_unlocked(struct request_queue *q) |
231 | { | 231 | { |
232 | unsigned long flags; | 232 | unsigned long flags; |
233 | 233 | ||
234 | spin_lock_irqsave(q->queue_lock, flags); | 234 | spin_lock_irqsave(q->queue_lock, flags); |
235 | blk_plug_device(q); | 235 | blk_plug_device(q); |
236 | spin_unlock_irqrestore(q->queue_lock, flags); | 236 | spin_unlock_irqrestore(q->queue_lock, flags); |
237 | } | 237 | } |
238 | EXPORT_SYMBOL(blk_plug_device_unlocked); | 238 | EXPORT_SYMBOL(blk_plug_device_unlocked); |
239 | 239 | ||
240 | /* | 240 | /* |
241 | * remove the queue from the plugged list, if present. called with | 241 | * remove the queue from the plugged list, if present. called with |
242 | * queue lock held and interrupts disabled. | 242 | * queue lock held and interrupts disabled. |
243 | */ | 243 | */ |
244 | int blk_remove_plug(struct request_queue *q) | 244 | int blk_remove_plug(struct request_queue *q) |
245 | { | 245 | { |
246 | WARN_ON(!irqs_disabled()); | 246 | WARN_ON(!irqs_disabled()); |
247 | 247 | ||
248 | if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) | 248 | if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) |
249 | return 0; | 249 | return 0; |
250 | 250 | ||
251 | del_timer(&q->unplug_timer); | 251 | del_timer(&q->unplug_timer); |
252 | return 1; | 252 | return 1; |
253 | } | 253 | } |
254 | EXPORT_SYMBOL(blk_remove_plug); | 254 | EXPORT_SYMBOL(blk_remove_plug); |
255 | 255 | ||
256 | /* | 256 | /* |
257 | * remove the plug and let it rip.. | 257 | * remove the plug and let it rip.. |
258 | */ | 258 | */ |
259 | void __generic_unplug_device(struct request_queue *q) | 259 | void __generic_unplug_device(struct request_queue *q) |
260 | { | 260 | { |
261 | if (unlikely(blk_queue_stopped(q))) | 261 | if (unlikely(blk_queue_stopped(q))) |
262 | return; | 262 | return; |
263 | if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) | 263 | if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) |
264 | return; | 264 | return; |
265 | 265 | ||
266 | q->request_fn(q); | 266 | q->request_fn(q); |
267 | } | 267 | } |
268 | 268 | ||
269 | /** | 269 | /** |
270 | * generic_unplug_device - fire a request queue | 270 | * generic_unplug_device - fire a request queue |
271 | * @q: The &struct request_queue in question | 271 | * @q: The &struct request_queue in question |
272 | * | 272 | * |
273 | * Description: | 273 | * Description: |
274 | * Linux uses plugging to build bigger requests queues before letting | 274 | * Linux uses plugging to build bigger requests queues before letting |
275 | * the device have at them. If a queue is plugged, the I/O scheduler | 275 | * the device have at them. If a queue is plugged, the I/O scheduler |
276 | * is still adding and merging requests on the queue. Once the queue | 276 | * is still adding and merging requests on the queue. Once the queue |
277 | * gets unplugged, the request_fn defined for the queue is invoked and | 277 | * gets unplugged, the request_fn defined for the queue is invoked and |
278 | * transfers started. | 278 | * transfers started. |
279 | **/ | 279 | **/ |
280 | void generic_unplug_device(struct request_queue *q) | 280 | void generic_unplug_device(struct request_queue *q) |
281 | { | 281 | { |
282 | if (blk_queue_plugged(q)) { | 282 | if (blk_queue_plugged(q)) { |
283 | spin_lock_irq(q->queue_lock); | 283 | spin_lock_irq(q->queue_lock); |
284 | __generic_unplug_device(q); | 284 | __generic_unplug_device(q); |
285 | spin_unlock_irq(q->queue_lock); | 285 | spin_unlock_irq(q->queue_lock); |
286 | } | 286 | } |
287 | } | 287 | } |
288 | EXPORT_SYMBOL(generic_unplug_device); | 288 | EXPORT_SYMBOL(generic_unplug_device); |
289 | 289 | ||
290 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | 290 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, |
291 | struct page *page) | 291 | struct page *page) |
292 | { | 292 | { |
293 | struct request_queue *q = bdi->unplug_io_data; | 293 | struct request_queue *q = bdi->unplug_io_data; |
294 | 294 | ||
295 | blk_unplug(q); | 295 | blk_unplug(q); |
296 | } | 296 | } |
297 | 297 | ||
298 | void blk_unplug_work(struct work_struct *work) | 298 | void blk_unplug_work(struct work_struct *work) |
299 | { | 299 | { |
300 | struct request_queue *q = | 300 | struct request_queue *q = |
301 | container_of(work, struct request_queue, unplug_work); | 301 | container_of(work, struct request_queue, unplug_work); |
302 | 302 | ||
303 | trace_block_unplug_io(q); | 303 | trace_block_unplug_io(q); |
304 | q->unplug_fn(q); | 304 | q->unplug_fn(q); |
305 | } | 305 | } |
306 | 306 | ||
307 | void blk_unplug_timeout(unsigned long data) | 307 | void blk_unplug_timeout(unsigned long data) |
308 | { | 308 | { |
309 | struct request_queue *q = (struct request_queue *)data; | 309 | struct request_queue *q = (struct request_queue *)data; |
310 | 310 | ||
311 | trace_block_unplug_timer(q); | 311 | trace_block_unplug_timer(q); |
312 | kblockd_schedule_work(q, &q->unplug_work); | 312 | kblockd_schedule_work(q, &q->unplug_work); |
313 | } | 313 | } |
314 | 314 | ||
315 | void blk_unplug(struct request_queue *q) | 315 | void blk_unplug(struct request_queue *q) |
316 | { | 316 | { |
317 | /* | 317 | /* |
318 | * devices don't necessarily have an ->unplug_fn defined | 318 | * devices don't necessarily have an ->unplug_fn defined |
319 | */ | 319 | */ |
320 | if (q->unplug_fn) { | 320 | if (q->unplug_fn) { |
321 | trace_block_unplug_io(q); | 321 | trace_block_unplug_io(q); |
322 | q->unplug_fn(q); | 322 | q->unplug_fn(q); |
323 | } | 323 | } |
324 | } | 324 | } |
325 | EXPORT_SYMBOL(blk_unplug); | 325 | EXPORT_SYMBOL(blk_unplug); |
326 | 326 | ||
327 | /** | 327 | /** |
328 | * blk_start_queue - restart a previously stopped queue | 328 | * blk_start_queue - restart a previously stopped queue |
329 | * @q: The &struct request_queue in question | 329 | * @q: The &struct request_queue in question |
330 | * | 330 | * |
331 | * Description: | 331 | * Description: |
332 | * blk_start_queue() will clear the stop flag on the queue, and call | 332 | * blk_start_queue() will clear the stop flag on the queue, and call |
333 | * the request_fn for the queue if it was in a stopped state when | 333 | * the request_fn for the queue if it was in a stopped state when |
334 | * entered. Also see blk_stop_queue(). Queue lock must be held. | 334 | * entered. Also see blk_stop_queue(). Queue lock must be held. |
335 | **/ | 335 | **/ |
336 | void blk_start_queue(struct request_queue *q) | 336 | void blk_start_queue(struct request_queue *q) |
337 | { | 337 | { |
338 | WARN_ON(!irqs_disabled()); | 338 | WARN_ON(!irqs_disabled()); |
339 | 339 | ||
340 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | 340 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
341 | __blk_run_queue(q); | 341 | __blk_run_queue(q); |
342 | } | 342 | } |
343 | EXPORT_SYMBOL(blk_start_queue); | 343 | EXPORT_SYMBOL(blk_start_queue); |
344 | 344 | ||
345 | /** | 345 | /** |
346 | * blk_stop_queue - stop a queue | 346 | * blk_stop_queue - stop a queue |
347 | * @q: The &struct request_queue in question | 347 | * @q: The &struct request_queue in question |
348 | * | 348 | * |
349 | * Description: | 349 | * Description: |
350 | * The Linux block layer assumes that a block driver will consume all | 350 | * The Linux block layer assumes that a block driver will consume all |
351 | * entries on the request queue when the request_fn strategy is called. | 351 | * entries on the request queue when the request_fn strategy is called. |
352 | * Often this will not happen, because of hardware limitations (queue | 352 | * Often this will not happen, because of hardware limitations (queue |
353 | * depth settings). If a device driver gets a 'queue full' response, | 353 | * depth settings). If a device driver gets a 'queue full' response, |
354 | * or if it simply chooses not to queue more I/O at one point, it can | 354 | * or if it simply chooses not to queue more I/O at one point, it can |
355 | * call this function to prevent the request_fn from being called until | 355 | * call this function to prevent the request_fn from being called until |
356 | * the driver has signalled it's ready to go again. This happens by calling | 356 | * the driver has signalled it's ready to go again. This happens by calling |
357 | * blk_start_queue() to restart queue operations. Queue lock must be held. | 357 | * blk_start_queue() to restart queue operations. Queue lock must be held. |
358 | **/ | 358 | **/ |
359 | void blk_stop_queue(struct request_queue *q) | 359 | void blk_stop_queue(struct request_queue *q) |
360 | { | 360 | { |
361 | blk_remove_plug(q); | 361 | blk_remove_plug(q); |
362 | queue_flag_set(QUEUE_FLAG_STOPPED, q); | 362 | queue_flag_set(QUEUE_FLAG_STOPPED, q); |
363 | } | 363 | } |
364 | EXPORT_SYMBOL(blk_stop_queue); | 364 | EXPORT_SYMBOL(blk_stop_queue); |
365 | 365 | ||
366 | /** | 366 | /** |
367 | * blk_sync_queue - cancel any pending callbacks on a queue | 367 | * blk_sync_queue - cancel any pending callbacks on a queue |
368 | * @q: the queue | 368 | * @q: the queue |
369 | * | 369 | * |
370 | * Description: | 370 | * Description: |
371 | * The block layer may perform asynchronous callback activity | 371 | * The block layer may perform asynchronous callback activity |
372 | * on a queue, such as calling the unplug function after a timeout. | 372 | * on a queue, such as calling the unplug function after a timeout. |
373 | * A block device may call blk_sync_queue to ensure that any | 373 | * A block device may call blk_sync_queue to ensure that any |
374 | * such activity is cancelled, thus allowing it to release resources | 374 | * such activity is cancelled, thus allowing it to release resources |
375 | * that the callbacks might use. The caller must already have made sure | 375 | * that the callbacks might use. The caller must already have made sure |
376 | * that its ->make_request_fn will not re-add plugging prior to calling | 376 | * that its ->make_request_fn will not re-add plugging prior to calling |
377 | * this function. | 377 | * this function. |
378 | * | 378 | * |
379 | */ | 379 | */ |
380 | void blk_sync_queue(struct request_queue *q) | 380 | void blk_sync_queue(struct request_queue *q) |
381 | { | 381 | { |
382 | del_timer_sync(&q->unplug_timer); | 382 | del_timer_sync(&q->unplug_timer); |
383 | del_timer_sync(&q->timeout); | 383 | del_timer_sync(&q->timeout); |
384 | cancel_work_sync(&q->unplug_work); | 384 | cancel_work_sync(&q->unplug_work); |
385 | } | 385 | } |
386 | EXPORT_SYMBOL(blk_sync_queue); | 386 | EXPORT_SYMBOL(blk_sync_queue); |
387 | 387 | ||
388 | /** | 388 | /** |
389 | * __blk_run_queue - run a single device queue | 389 | * __blk_run_queue - run a single device queue |
390 | * @q: The queue to run | 390 | * @q: The queue to run |
391 | * | 391 | * |
392 | * Description: | 392 | * Description: |
393 | * See @blk_run_queue. This variant must be called with the queue lock | 393 | * See @blk_run_queue. This variant must be called with the queue lock |
394 | * held and interrupts disabled. | 394 | * held and interrupts disabled. |
395 | * | 395 | * |
396 | */ | 396 | */ |
397 | void __blk_run_queue(struct request_queue *q) | 397 | void __blk_run_queue(struct request_queue *q) |
398 | { | 398 | { |
399 | blk_remove_plug(q); | 399 | blk_remove_plug(q); |
400 | 400 | ||
401 | if (unlikely(blk_queue_stopped(q))) | 401 | if (unlikely(blk_queue_stopped(q))) |
402 | return; | 402 | return; |
403 | 403 | ||
404 | if (elv_queue_empty(q)) | 404 | if (elv_queue_empty(q)) |
405 | return; | 405 | return; |
406 | 406 | ||
407 | /* | 407 | /* |
408 | * Only recurse once to avoid overrunning the stack, let the unplug | 408 | * Only recurse once to avoid overrunning the stack, let the unplug |
409 | * handling reinvoke the handler shortly if we already got there. | 409 | * handling reinvoke the handler shortly if we already got there. |
410 | */ | 410 | */ |
411 | if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | 411 | if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { |
412 | q->request_fn(q); | 412 | q->request_fn(q); |
413 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | 413 | queue_flag_clear(QUEUE_FLAG_REENTER, q); |
414 | } else { | 414 | } else { |
415 | queue_flag_set(QUEUE_FLAG_PLUGGED, q); | 415 | queue_flag_set(QUEUE_FLAG_PLUGGED, q); |
416 | kblockd_schedule_work(q, &q->unplug_work); | 416 | kblockd_schedule_work(q, &q->unplug_work); |
417 | } | 417 | } |
418 | } | 418 | } |
419 | EXPORT_SYMBOL(__blk_run_queue); | 419 | EXPORT_SYMBOL(__blk_run_queue); |
420 | 420 | ||
421 | /** | 421 | /** |
422 | * blk_run_queue - run a single device queue | 422 | * blk_run_queue - run a single device queue |
423 | * @q: The queue to run | 423 | * @q: The queue to run |
424 | * | 424 | * |
425 | * Description: | 425 | * Description: |
426 | * Invoke request handling on this queue, if it has pending work to do. | 426 | * Invoke request handling on this queue, if it has pending work to do. |
427 | * May be used to restart queueing when a request has completed. | 427 | * May be used to restart queueing when a request has completed. |
428 | */ | 428 | */ |
429 | void blk_run_queue(struct request_queue *q) | 429 | void blk_run_queue(struct request_queue *q) |
430 | { | 430 | { |
431 | unsigned long flags; | 431 | unsigned long flags; |
432 | 432 | ||
433 | spin_lock_irqsave(q->queue_lock, flags); | 433 | spin_lock_irqsave(q->queue_lock, flags); |
434 | __blk_run_queue(q); | 434 | __blk_run_queue(q); |
435 | spin_unlock_irqrestore(q->queue_lock, flags); | 435 | spin_unlock_irqrestore(q->queue_lock, flags); |
436 | } | 436 | } |
437 | EXPORT_SYMBOL(blk_run_queue); | 437 | EXPORT_SYMBOL(blk_run_queue); |
438 | 438 | ||
439 | void blk_put_queue(struct request_queue *q) | 439 | void blk_put_queue(struct request_queue *q) |
440 | { | 440 | { |
441 | kobject_put(&q->kobj); | 441 | kobject_put(&q->kobj); |
442 | } | 442 | } |
443 | 443 | ||
444 | void blk_cleanup_queue(struct request_queue *q) | 444 | void blk_cleanup_queue(struct request_queue *q) |
445 | { | 445 | { |
446 | /* | 446 | /* |
447 | * We know we have process context here, so we can be a little | 447 | * We know we have process context here, so we can be a little |
448 | * cautious and ensure that pending block actions on this device | 448 | * cautious and ensure that pending block actions on this device |
449 | * are done before moving on. Going into this function, we should | 449 | * are done before moving on. Going into this function, we should |
450 | * not have processes doing IO to this device. | 450 | * not have processes doing IO to this device. |
451 | */ | 451 | */ |
452 | blk_sync_queue(q); | 452 | blk_sync_queue(q); |
453 | 453 | ||
454 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | 454 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); |
455 | mutex_lock(&q->sysfs_lock); | 455 | mutex_lock(&q->sysfs_lock); |
456 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); | 456 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); |
457 | mutex_unlock(&q->sysfs_lock); | 457 | mutex_unlock(&q->sysfs_lock); |
458 | 458 | ||
459 | if (q->elevator) | 459 | if (q->elevator) |
460 | elevator_exit(q->elevator); | 460 | elevator_exit(q->elevator); |
461 | 461 | ||
462 | blk_put_queue(q); | 462 | blk_put_queue(q); |
463 | } | 463 | } |
464 | EXPORT_SYMBOL(blk_cleanup_queue); | 464 | EXPORT_SYMBOL(blk_cleanup_queue); |
465 | 465 | ||
466 | static int blk_init_free_list(struct request_queue *q) | 466 | static int blk_init_free_list(struct request_queue *q) |
467 | { | 467 | { |
468 | struct request_list *rl = &q->rq; | 468 | struct request_list *rl = &q->rq; |
469 | 469 | ||
470 | if (unlikely(rl->rq_pool)) | 470 | if (unlikely(rl->rq_pool)) |
471 | return 0; | 471 | return 0; |
472 | 472 | ||
473 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; | 473 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
474 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; | 474 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
475 | rl->elvpriv = 0; | 475 | rl->elvpriv = 0; |
476 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 476 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
477 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 477 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
478 | 478 | ||
479 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 479 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
480 | mempool_free_slab, request_cachep, q->node); | 480 | mempool_free_slab, request_cachep, q->node); |
481 | 481 | ||
482 | if (!rl->rq_pool) | 482 | if (!rl->rq_pool) |
483 | return -ENOMEM; | 483 | return -ENOMEM; |
484 | 484 | ||
485 | return 0; | 485 | return 0; |
486 | } | 486 | } |
487 | 487 | ||
488 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 488 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
489 | { | 489 | { |
490 | return blk_alloc_queue_node(gfp_mask, -1); | 490 | return blk_alloc_queue_node(gfp_mask, -1); |
491 | } | 491 | } |
492 | EXPORT_SYMBOL(blk_alloc_queue); | 492 | EXPORT_SYMBOL(blk_alloc_queue); |
493 | 493 | ||
494 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | 494 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) |
495 | { | 495 | { |
496 | struct request_queue *q; | 496 | struct request_queue *q; |
497 | int err; | 497 | int err; |
498 | 498 | ||
499 | q = kmem_cache_alloc_node(blk_requestq_cachep, | 499 | q = kmem_cache_alloc_node(blk_requestq_cachep, |
500 | gfp_mask | __GFP_ZERO, node_id); | 500 | gfp_mask | __GFP_ZERO, node_id); |
501 | if (!q) | 501 | if (!q) |
502 | return NULL; | 502 | return NULL; |
503 | 503 | ||
504 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; | 504 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; |
505 | q->backing_dev_info.unplug_io_data = q; | 505 | q->backing_dev_info.unplug_io_data = q; |
506 | q->backing_dev_info.ra_pages = | 506 | q->backing_dev_info.ra_pages = |
507 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 507 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
508 | q->backing_dev_info.state = 0; | 508 | q->backing_dev_info.state = 0; |
509 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 509 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
510 | q->backing_dev_info.name = "block"; | 510 | q->backing_dev_info.name = "block"; |
511 | 511 | ||
512 | err = bdi_init(&q->backing_dev_info); | 512 | err = bdi_init(&q->backing_dev_info); |
513 | if (err) { | 513 | if (err) { |
514 | kmem_cache_free(blk_requestq_cachep, q); | 514 | kmem_cache_free(blk_requestq_cachep, q); |
515 | return NULL; | 515 | return NULL; |
516 | } | 516 | } |
517 | 517 | ||
518 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | 518 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
519 | laptop_mode_timer_fn, (unsigned long) q); | 519 | laptop_mode_timer_fn, (unsigned long) q); |
520 | init_timer(&q->unplug_timer); | 520 | init_timer(&q->unplug_timer); |
521 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 521 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
522 | INIT_LIST_HEAD(&q->timeout_list); | 522 | INIT_LIST_HEAD(&q->timeout_list); |
523 | INIT_WORK(&q->unplug_work, blk_unplug_work); | 523 | INIT_WORK(&q->unplug_work, blk_unplug_work); |
524 | 524 | ||
525 | kobject_init(&q->kobj, &blk_queue_ktype); | 525 | kobject_init(&q->kobj, &blk_queue_ktype); |
526 | 526 | ||
527 | mutex_init(&q->sysfs_lock); | 527 | mutex_init(&q->sysfs_lock); |
528 | spin_lock_init(&q->__queue_lock); | 528 | spin_lock_init(&q->__queue_lock); |
529 | 529 | ||
530 | return q; | 530 | return q; |
531 | } | 531 | } |
532 | EXPORT_SYMBOL(blk_alloc_queue_node); | 532 | EXPORT_SYMBOL(blk_alloc_queue_node); |
533 | 533 | ||
534 | /** | 534 | /** |
535 | * blk_init_queue - prepare a request queue for use with a block device | 535 | * blk_init_queue - prepare a request queue for use with a block device |
536 | * @rfn: The function to be called to process requests that have been | 536 | * @rfn: The function to be called to process requests that have been |
537 | * placed on the queue. | 537 | * placed on the queue. |
538 | * @lock: Request queue spin lock | 538 | * @lock: Request queue spin lock |
539 | * | 539 | * |
540 | * Description: | 540 | * Description: |
541 | * If a block device wishes to use the standard request handling procedures, | 541 | * If a block device wishes to use the standard request handling procedures, |
542 | * which sorts requests and coalesces adjacent requests, then it must | 542 | * which sorts requests and coalesces adjacent requests, then it must |
543 | * call blk_init_queue(). The function @rfn will be called when there | 543 | * call blk_init_queue(). The function @rfn will be called when there |
544 | * are requests on the queue that need to be processed. If the device | 544 | * are requests on the queue that need to be processed. If the device |
545 | * supports plugging, then @rfn may not be called immediately when requests | 545 | * supports plugging, then @rfn may not be called immediately when requests |
546 | * are available on the queue, but may be called at some time later instead. | 546 | * are available on the queue, but may be called at some time later instead. |
547 | * Plugged queues are generally unplugged when a buffer belonging to one | 547 | * Plugged queues are generally unplugged when a buffer belonging to one |
548 | * of the requests on the queue is needed, or due to memory pressure. | 548 | * of the requests on the queue is needed, or due to memory pressure. |
549 | * | 549 | * |
550 | * @rfn is not required, or even expected, to remove all requests off the | 550 | * @rfn is not required, or even expected, to remove all requests off the |
551 | * queue, but only as many as it can handle at a time. If it does leave | 551 | * queue, but only as many as it can handle at a time. If it does leave |
552 | * requests on the queue, it is responsible for arranging that the requests | 552 | * requests on the queue, it is responsible for arranging that the requests |
553 | * get dealt with eventually. | 553 | * get dealt with eventually. |
554 | * | 554 | * |
555 | * The queue spin lock must be held while manipulating the requests on the | 555 | * The queue spin lock must be held while manipulating the requests on the |
556 | * request queue; this lock will be taken also from interrupt context, so irq | 556 | * request queue; this lock will be taken also from interrupt context, so irq |
557 | * disabling is needed for it. | 557 | * disabling is needed for it. |
558 | * | 558 | * |
559 | * Function returns a pointer to the initialized request queue, or %NULL if | 559 | * Function returns a pointer to the initialized request queue, or %NULL if |
560 | * it didn't succeed. | 560 | * it didn't succeed. |
561 | * | 561 | * |
562 | * Note: | 562 | * Note: |
563 | * blk_init_queue() must be paired with a blk_cleanup_queue() call | 563 | * blk_init_queue() must be paired with a blk_cleanup_queue() call |
564 | * when the block device is deactivated (such as at module unload). | 564 | * when the block device is deactivated (such as at module unload). |
565 | **/ | 565 | **/ |
566 | 566 | ||
567 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) | 567 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
568 | { | 568 | { |
569 | return blk_init_queue_node(rfn, lock, -1); | 569 | return blk_init_queue_node(rfn, lock, -1); |
570 | } | 570 | } |
571 | EXPORT_SYMBOL(blk_init_queue); | 571 | EXPORT_SYMBOL(blk_init_queue); |
572 | 572 | ||
573 | struct request_queue * | 573 | struct request_queue * |
574 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | 574 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
575 | { | 575 | { |
576 | struct request_queue *uninit_q, *q; | 576 | struct request_queue *uninit_q, *q; |
577 | 577 | ||
578 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 578 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); |
579 | if (!uninit_q) | 579 | if (!uninit_q) |
580 | return NULL; | 580 | return NULL; |
581 | 581 | ||
582 | q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); | 582 | q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); |
583 | if (!q) | 583 | if (!q) |
584 | blk_cleanup_queue(uninit_q); | 584 | blk_cleanup_queue(uninit_q); |
585 | 585 | ||
586 | return q; | 586 | return q; |
587 | } | 587 | } |
588 | EXPORT_SYMBOL(blk_init_queue_node); | 588 | EXPORT_SYMBOL(blk_init_queue_node); |
589 | 589 | ||
590 | struct request_queue * | 590 | struct request_queue * |
591 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | 591 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, |
592 | spinlock_t *lock) | 592 | spinlock_t *lock) |
593 | { | 593 | { |
594 | return blk_init_allocated_queue_node(q, rfn, lock, -1); | 594 | return blk_init_allocated_queue_node(q, rfn, lock, -1); |
595 | } | 595 | } |
596 | EXPORT_SYMBOL(blk_init_allocated_queue); | 596 | EXPORT_SYMBOL(blk_init_allocated_queue); |
597 | 597 | ||
598 | struct request_queue * | 598 | struct request_queue * |
599 | blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | 599 | blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, |
600 | spinlock_t *lock, int node_id) | 600 | spinlock_t *lock, int node_id) |
601 | { | 601 | { |
602 | if (!q) | 602 | if (!q) |
603 | return NULL; | 603 | return NULL; |
604 | 604 | ||
605 | q->node = node_id; | 605 | q->node = node_id; |
606 | if (blk_init_free_list(q)) | 606 | if (blk_init_free_list(q)) |
607 | return NULL; | 607 | return NULL; |
608 | 608 | ||
609 | q->request_fn = rfn; | 609 | q->request_fn = rfn; |
610 | q->prep_rq_fn = NULL; | 610 | q->prep_rq_fn = NULL; |
611 | q->unprep_rq_fn = NULL; | 611 | q->unprep_rq_fn = NULL; |
612 | q->unplug_fn = generic_unplug_device; | 612 | q->unplug_fn = generic_unplug_device; |
613 | q->queue_flags = QUEUE_FLAG_DEFAULT; | 613 | q->queue_flags = QUEUE_FLAG_DEFAULT; |
614 | q->queue_lock = lock; | 614 | q->queue_lock = lock; |
615 | 615 | ||
616 | /* | 616 | /* |
617 | * This also sets hw/phys segments, boundary and size | 617 | * This also sets hw/phys segments, boundary and size |
618 | */ | 618 | */ |
619 | blk_queue_make_request(q, __make_request); | 619 | blk_queue_make_request(q, __make_request); |
620 | 620 | ||
621 | q->sg_reserved_size = INT_MAX; | 621 | q->sg_reserved_size = INT_MAX; |
622 | 622 | ||
623 | /* | 623 | /* |
624 | * all done | 624 | * all done |
625 | */ | 625 | */ |
626 | if (!elevator_init(q, NULL)) { | 626 | if (!elevator_init(q, NULL)) { |
627 | blk_queue_congestion_threshold(q); | 627 | blk_queue_congestion_threshold(q); |
628 | return q; | 628 | return q; |
629 | } | 629 | } |
630 | 630 | ||
631 | return NULL; | 631 | return NULL; |
632 | } | 632 | } |
633 | EXPORT_SYMBOL(blk_init_allocated_queue_node); | 633 | EXPORT_SYMBOL(blk_init_allocated_queue_node); |
634 | 634 | ||
635 | int blk_get_queue(struct request_queue *q) | 635 | int blk_get_queue(struct request_queue *q) |
636 | { | 636 | { |
637 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 637 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { |
638 | kobject_get(&q->kobj); | 638 | kobject_get(&q->kobj); |
639 | return 0; | 639 | return 0; |
640 | } | 640 | } |
641 | 641 | ||
642 | return 1; | 642 | return 1; |
643 | } | 643 | } |
644 | 644 | ||
645 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 645 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
646 | { | 646 | { |
647 | if (rq->cmd_flags & REQ_ELVPRIV) | 647 | if (rq->cmd_flags & REQ_ELVPRIV) |
648 | elv_put_request(q, rq); | 648 | elv_put_request(q, rq); |
649 | mempool_free(rq, q->rq.rq_pool); | 649 | mempool_free(rq, q->rq.rq_pool); |
650 | } | 650 | } |
651 | 651 | ||
652 | static struct request * | 652 | static struct request * |
653 | blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) | 653 | blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) |
654 | { | 654 | { |
655 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 655 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
656 | 656 | ||
657 | if (!rq) | 657 | if (!rq) |
658 | return NULL; | 658 | return NULL; |
659 | 659 | ||
660 | blk_rq_init(q, rq); | 660 | blk_rq_init(q, rq); |
661 | 661 | ||
662 | rq->cmd_flags = flags | REQ_ALLOCED; | 662 | rq->cmd_flags = flags | REQ_ALLOCED; |
663 | 663 | ||
664 | if (priv) { | 664 | if (priv) { |
665 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { | 665 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { |
666 | mempool_free(rq, q->rq.rq_pool); | 666 | mempool_free(rq, q->rq.rq_pool); |
667 | return NULL; | 667 | return NULL; |
668 | } | 668 | } |
669 | rq->cmd_flags |= REQ_ELVPRIV; | 669 | rq->cmd_flags |= REQ_ELVPRIV; |
670 | } | 670 | } |
671 | 671 | ||
672 | return rq; | 672 | return rq; |
673 | } | 673 | } |
674 | 674 | ||
675 | /* | 675 | /* |
676 | * ioc_batching returns true if the ioc is a valid batching request and | 676 | * ioc_batching returns true if the ioc is a valid batching request and |
677 | * should be given priority access to a request. | 677 | * should be given priority access to a request. |
678 | */ | 678 | */ |
679 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) | 679 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) |
680 | { | 680 | { |
681 | if (!ioc) | 681 | if (!ioc) |
682 | return 0; | 682 | return 0; |
683 | 683 | ||
684 | /* | 684 | /* |
685 | * Make sure the process is able to allocate at least 1 request | 685 | * Make sure the process is able to allocate at least 1 request |
686 | * even if the batch times out, otherwise we could theoretically | 686 | * even if the batch times out, otherwise we could theoretically |
687 | * lose wakeups. | 687 | * lose wakeups. |
688 | */ | 688 | */ |
689 | return ioc->nr_batch_requests == q->nr_batching || | 689 | return ioc->nr_batch_requests == q->nr_batching || |
690 | (ioc->nr_batch_requests > 0 | 690 | (ioc->nr_batch_requests > 0 |
691 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); | 691 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
692 | } | 692 | } |
693 | 693 | ||
694 | /* | 694 | /* |
695 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This | 695 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
696 | * will cause the process to be a "batcher" on all queues in the system. This | 696 | * will cause the process to be a "batcher" on all queues in the system. This |
697 | * is the behaviour we want though - once it gets a wakeup it should be given | 697 | * is the behaviour we want though - once it gets a wakeup it should be given |
698 | * a nice run. | 698 | * a nice run. |
699 | */ | 699 | */ |
700 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) | 700 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) |
701 | { | 701 | { |
702 | if (!ioc || ioc_batching(q, ioc)) | 702 | if (!ioc || ioc_batching(q, ioc)) |
703 | return; | 703 | return; |
704 | 704 | ||
705 | ioc->nr_batch_requests = q->nr_batching; | 705 | ioc->nr_batch_requests = q->nr_batching; |
706 | ioc->last_waited = jiffies; | 706 | ioc->last_waited = jiffies; |
707 | } | 707 | } |
708 | 708 | ||
709 | static void __freed_request(struct request_queue *q, int sync) | 709 | static void __freed_request(struct request_queue *q, int sync) |
710 | { | 710 | { |
711 | struct request_list *rl = &q->rq; | 711 | struct request_list *rl = &q->rq; |
712 | 712 | ||
713 | if (rl->count[sync] < queue_congestion_off_threshold(q)) | 713 | if (rl->count[sync] < queue_congestion_off_threshold(q)) |
714 | blk_clear_queue_congested(q, sync); | 714 | blk_clear_queue_congested(q, sync); |
715 | 715 | ||
716 | if (rl->count[sync] + 1 <= q->nr_requests) { | 716 | if (rl->count[sync] + 1 <= q->nr_requests) { |
717 | if (waitqueue_active(&rl->wait[sync])) | 717 | if (waitqueue_active(&rl->wait[sync])) |
718 | wake_up(&rl->wait[sync]); | 718 | wake_up(&rl->wait[sync]); |
719 | 719 | ||
720 | blk_clear_queue_full(q, sync); | 720 | blk_clear_queue_full(q, sync); |
721 | } | 721 | } |
722 | } | 722 | } |
723 | 723 | ||
724 | /* | 724 | /* |
725 | * A request has just been released. Account for it, update the full and | 725 | * A request has just been released. Account for it, update the full and |
726 | * congestion status, wake up any waiters. Called under q->queue_lock. | 726 | * congestion status, wake up any waiters. Called under q->queue_lock. |
727 | */ | 727 | */ |
728 | static void freed_request(struct request_queue *q, int sync, int priv) | 728 | static void freed_request(struct request_queue *q, int sync, int priv) |
729 | { | 729 | { |
730 | struct request_list *rl = &q->rq; | 730 | struct request_list *rl = &q->rq; |
731 | 731 | ||
732 | rl->count[sync]--; | 732 | rl->count[sync]--; |
733 | if (priv) | 733 | if (priv) |
734 | rl->elvpriv--; | 734 | rl->elvpriv--; |
735 | 735 | ||
736 | __freed_request(q, sync); | 736 | __freed_request(q, sync); |
737 | 737 | ||
738 | if (unlikely(rl->starved[sync ^ 1])) | 738 | if (unlikely(rl->starved[sync ^ 1])) |
739 | __freed_request(q, sync ^ 1); | 739 | __freed_request(q, sync ^ 1); |
740 | } | 740 | } |
741 | 741 | ||
742 | /* | 742 | /* |
743 | * Get a free request, queue_lock must be held. | 743 | * Get a free request, queue_lock must be held. |
744 | * Returns NULL on failure, with queue_lock held. | 744 | * Returns NULL on failure, with queue_lock held. |
745 | * Returns !NULL on success, with queue_lock *not held*. | 745 | * Returns !NULL on success, with queue_lock *not held*. |
746 | */ | 746 | */ |
747 | static struct request *get_request(struct request_queue *q, int rw_flags, | 747 | static struct request *get_request(struct request_queue *q, int rw_flags, |
748 | struct bio *bio, gfp_t gfp_mask) | 748 | struct bio *bio, gfp_t gfp_mask) |
749 | { | 749 | { |
750 | struct request *rq = NULL; | 750 | struct request *rq = NULL; |
751 | struct request_list *rl = &q->rq; | 751 | struct request_list *rl = &q->rq; |
752 | struct io_context *ioc = NULL; | 752 | struct io_context *ioc = NULL; |
753 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 753 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
754 | int may_queue, priv; | 754 | int may_queue, priv; |
755 | 755 | ||
756 | may_queue = elv_may_queue(q, rw_flags); | 756 | may_queue = elv_may_queue(q, rw_flags); |
757 | if (may_queue == ELV_MQUEUE_NO) | 757 | if (may_queue == ELV_MQUEUE_NO) |
758 | goto rq_starved; | 758 | goto rq_starved; |
759 | 759 | ||
760 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { | 760 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
761 | if (rl->count[is_sync]+1 >= q->nr_requests) { | 761 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
762 | ioc = current_io_context(GFP_ATOMIC, q->node); | 762 | ioc = current_io_context(GFP_ATOMIC, q->node); |
763 | /* | 763 | /* |
764 | * The queue will fill after this allocation, so set | 764 | * The queue will fill after this allocation, so set |
765 | * it as full, and mark this process as "batching". | 765 | * it as full, and mark this process as "batching". |
766 | * This process will be allowed to complete a batch of | 766 | * This process will be allowed to complete a batch of |
767 | * requests, others will be blocked. | 767 | * requests, others will be blocked. |
768 | */ | 768 | */ |
769 | if (!blk_queue_full(q, is_sync)) { | 769 | if (!blk_queue_full(q, is_sync)) { |
770 | ioc_set_batching(q, ioc); | 770 | ioc_set_batching(q, ioc); |
771 | blk_set_queue_full(q, is_sync); | 771 | blk_set_queue_full(q, is_sync); |
772 | } else { | 772 | } else { |
773 | if (may_queue != ELV_MQUEUE_MUST | 773 | if (may_queue != ELV_MQUEUE_MUST |
774 | && !ioc_batching(q, ioc)) { | 774 | && !ioc_batching(q, ioc)) { |
775 | /* | 775 | /* |
776 | * The queue is full and the allocating | 776 | * The queue is full and the allocating |
777 | * process is not a "batcher", and not | 777 | * process is not a "batcher", and not |
778 | * exempted by the IO scheduler | 778 | * exempted by the IO scheduler |
779 | */ | 779 | */ |
780 | goto out; | 780 | goto out; |
781 | } | 781 | } |
782 | } | 782 | } |
783 | } | 783 | } |
784 | blk_set_queue_congested(q, is_sync); | 784 | blk_set_queue_congested(q, is_sync); |
785 | } | 785 | } |
786 | 786 | ||
787 | /* | 787 | /* |
788 | * Only allow batching queuers to allocate up to 50% over the defined | 788 | * Only allow batching queuers to allocate up to 50% over the defined |
789 | * limit of requests, otherwise we could have thousands of requests | 789 | * limit of requests, otherwise we could have thousands of requests |
790 | * allocated with any setting of ->nr_requests | 790 | * allocated with any setting of ->nr_requests |
791 | */ | 791 | */ |
792 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) | 792 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) |
793 | goto out; | 793 | goto out; |
794 | 794 | ||
795 | rl->count[is_sync]++; | 795 | rl->count[is_sync]++; |
796 | rl->starved[is_sync] = 0; | 796 | rl->starved[is_sync] = 0; |
797 | 797 | ||
798 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 798 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
799 | if (priv) | 799 | if (priv) |
800 | rl->elvpriv++; | 800 | rl->elvpriv++; |
801 | 801 | ||
802 | if (blk_queue_io_stat(q)) | 802 | if (blk_queue_io_stat(q)) |
803 | rw_flags |= REQ_IO_STAT; | 803 | rw_flags |= REQ_IO_STAT; |
804 | spin_unlock_irq(q->queue_lock); | 804 | spin_unlock_irq(q->queue_lock); |
805 | 805 | ||
806 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); | 806 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); |
807 | if (unlikely(!rq)) { | 807 | if (unlikely(!rq)) { |
808 | /* | 808 | /* |
809 | * Allocation failed presumably due to memory. Undo anything | 809 | * Allocation failed presumably due to memory. Undo anything |
810 | * we might have messed up. | 810 | * we might have messed up. |
811 | * | 811 | * |
812 | * Allocating task should really be put onto the front of the | 812 | * Allocating task should really be put onto the front of the |
813 | * wait queue, but this is pretty rare. | 813 | * wait queue, but this is pretty rare. |
814 | */ | 814 | */ |
815 | spin_lock_irq(q->queue_lock); | 815 | spin_lock_irq(q->queue_lock); |
816 | freed_request(q, is_sync, priv); | 816 | freed_request(q, is_sync, priv); |
817 | 817 | ||
818 | /* | 818 | /* |
819 | * in the very unlikely event that allocation failed and no | 819 | * in the very unlikely event that allocation failed and no |
820 | * requests for this direction was pending, mark us starved | 820 | * requests for this direction was pending, mark us starved |
821 | * so that freeing of a request in the other direction will | 821 | * so that freeing of a request in the other direction will |
822 | * notice us. another possible fix would be to split the | 822 | * notice us. another possible fix would be to split the |
823 | * rq mempool into READ and WRITE | 823 | * rq mempool into READ and WRITE |
824 | */ | 824 | */ |
825 | rq_starved: | 825 | rq_starved: |
826 | if (unlikely(rl->count[is_sync] == 0)) | 826 | if (unlikely(rl->count[is_sync] == 0)) |
827 | rl->starved[is_sync] = 1; | 827 | rl->starved[is_sync] = 1; |
828 | 828 | ||
829 | goto out; | 829 | goto out; |
830 | } | 830 | } |
831 | 831 | ||
832 | /* | 832 | /* |
833 | * ioc may be NULL here, and ioc_batching will be false. That's | 833 | * ioc may be NULL here, and ioc_batching will be false. That's |
834 | * OK, if the queue is under the request limit then requests need | 834 | * OK, if the queue is under the request limit then requests need |
835 | * not count toward the nr_batch_requests limit. There will always | 835 | * not count toward the nr_batch_requests limit. There will always |
836 | * be some limit enforced by BLK_BATCH_TIME. | 836 | * be some limit enforced by BLK_BATCH_TIME. |
837 | */ | 837 | */ |
838 | if (ioc_batching(q, ioc)) | 838 | if (ioc_batching(q, ioc)) |
839 | ioc->nr_batch_requests--; | 839 | ioc->nr_batch_requests--; |
840 | 840 | ||
841 | trace_block_getrq(q, bio, rw_flags & 1); | 841 | trace_block_getrq(q, bio, rw_flags & 1); |
842 | out: | 842 | out: |
843 | return rq; | 843 | return rq; |
844 | } | 844 | } |
845 | 845 | ||
846 | /* | 846 | /* |
847 | * No available requests for this queue, unplug the device and wait for some | 847 | * No available requests for this queue, unplug the device and wait for some |
848 | * requests to become available. | 848 | * requests to become available. |
849 | * | 849 | * |
850 | * Called with q->queue_lock held, and returns with it unlocked. | 850 | * Called with q->queue_lock held, and returns with it unlocked. |
851 | */ | 851 | */ |
852 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 852 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, |
853 | struct bio *bio) | 853 | struct bio *bio) |
854 | { | 854 | { |
855 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 855 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
856 | struct request *rq; | 856 | struct request *rq; |
857 | 857 | ||
858 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 858 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
859 | while (!rq) { | 859 | while (!rq) { |
860 | DEFINE_WAIT(wait); | 860 | DEFINE_WAIT(wait); |
861 | struct io_context *ioc; | 861 | struct io_context *ioc; |
862 | struct request_list *rl = &q->rq; | 862 | struct request_list *rl = &q->rq; |
863 | 863 | ||
864 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 864 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
865 | TASK_UNINTERRUPTIBLE); | 865 | TASK_UNINTERRUPTIBLE); |
866 | 866 | ||
867 | trace_block_sleeprq(q, bio, rw_flags & 1); | 867 | trace_block_sleeprq(q, bio, rw_flags & 1); |
868 | 868 | ||
869 | __generic_unplug_device(q); | 869 | __generic_unplug_device(q); |
870 | spin_unlock_irq(q->queue_lock); | 870 | spin_unlock_irq(q->queue_lock); |
871 | io_schedule(); | 871 | io_schedule(); |
872 | 872 | ||
873 | /* | 873 | /* |
874 | * After sleeping, we become a "batching" process and | 874 | * After sleeping, we become a "batching" process and |
875 | * will be able to allocate at least one request, and | 875 | * will be able to allocate at least one request, and |
876 | * up to a big batch of them for a small period time. | 876 | * up to a big batch of them for a small period time. |
877 | * See ioc_batching, ioc_set_batching | 877 | * See ioc_batching, ioc_set_batching |
878 | */ | 878 | */ |
879 | ioc = current_io_context(GFP_NOIO, q->node); | 879 | ioc = current_io_context(GFP_NOIO, q->node); |
880 | ioc_set_batching(q, ioc); | 880 | ioc_set_batching(q, ioc); |
881 | 881 | ||
882 | spin_lock_irq(q->queue_lock); | 882 | spin_lock_irq(q->queue_lock); |
883 | finish_wait(&rl->wait[is_sync], &wait); | 883 | finish_wait(&rl->wait[is_sync], &wait); |
884 | 884 | ||
885 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 885 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
886 | }; | 886 | }; |
887 | 887 | ||
888 | return rq; | 888 | return rq; |
889 | } | 889 | } |
890 | 890 | ||
891 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 891 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
892 | { | 892 | { |
893 | struct request *rq; | 893 | struct request *rq; |
894 | 894 | ||
895 | BUG_ON(rw != READ && rw != WRITE); | 895 | BUG_ON(rw != READ && rw != WRITE); |
896 | 896 | ||
897 | spin_lock_irq(q->queue_lock); | 897 | spin_lock_irq(q->queue_lock); |
898 | if (gfp_mask & __GFP_WAIT) { | 898 | if (gfp_mask & __GFP_WAIT) { |
899 | rq = get_request_wait(q, rw, NULL); | 899 | rq = get_request_wait(q, rw, NULL); |
900 | } else { | 900 | } else { |
901 | rq = get_request(q, rw, NULL, gfp_mask); | 901 | rq = get_request(q, rw, NULL, gfp_mask); |
902 | if (!rq) | 902 | if (!rq) |
903 | spin_unlock_irq(q->queue_lock); | 903 | spin_unlock_irq(q->queue_lock); |
904 | } | 904 | } |
905 | /* q->queue_lock is unlocked at this point */ | 905 | /* q->queue_lock is unlocked at this point */ |
906 | 906 | ||
907 | return rq; | 907 | return rq; |
908 | } | 908 | } |
909 | EXPORT_SYMBOL(blk_get_request); | 909 | EXPORT_SYMBOL(blk_get_request); |
910 | 910 | ||
911 | /** | 911 | /** |
912 | * blk_make_request - given a bio, allocate a corresponding struct request. | 912 | * blk_make_request - given a bio, allocate a corresponding struct request. |
913 | * @q: target request queue | 913 | * @q: target request queue |
914 | * @bio: The bio describing the memory mappings that will be submitted for IO. | 914 | * @bio: The bio describing the memory mappings that will be submitted for IO. |
915 | * It may be a chained-bio properly constructed by block/bio layer. | 915 | * It may be a chained-bio properly constructed by block/bio layer. |
916 | * @gfp_mask: gfp flags to be used for memory allocation | 916 | * @gfp_mask: gfp flags to be used for memory allocation |
917 | * | 917 | * |
918 | * blk_make_request is the parallel of generic_make_request for BLOCK_PC | 918 | * blk_make_request is the parallel of generic_make_request for BLOCK_PC |
919 | * type commands. Where the struct request needs to be farther initialized by | 919 | * type commands. Where the struct request needs to be farther initialized by |
920 | * the caller. It is passed a &struct bio, which describes the memory info of | 920 | * the caller. It is passed a &struct bio, which describes the memory info of |
921 | * the I/O transfer. | 921 | * the I/O transfer. |
922 | * | 922 | * |
923 | * The caller of blk_make_request must make sure that bi_io_vec | 923 | * The caller of blk_make_request must make sure that bi_io_vec |
924 | * are set to describe the memory buffers. That bio_data_dir() will return | 924 | * are set to describe the memory buffers. That bio_data_dir() will return |
925 | * the needed direction of the request. (And all bio's in the passed bio-chain | 925 | * the needed direction of the request. (And all bio's in the passed bio-chain |
926 | * are properly set accordingly) | 926 | * are properly set accordingly) |
927 | * | 927 | * |
928 | * If called under none-sleepable conditions, mapped bio buffers must not | 928 | * If called under none-sleepable conditions, mapped bio buffers must not |
929 | * need bouncing, by calling the appropriate masked or flagged allocator, | 929 | * need bouncing, by calling the appropriate masked or flagged allocator, |
930 | * suitable for the target device. Otherwise the call to blk_queue_bounce will | 930 | * suitable for the target device. Otherwise the call to blk_queue_bounce will |
931 | * BUG. | 931 | * BUG. |
932 | * | 932 | * |
933 | * WARNING: When allocating/cloning a bio-chain, careful consideration should be | 933 | * WARNING: When allocating/cloning a bio-chain, careful consideration should be |
934 | * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for | 934 | * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for |
935 | * anything but the first bio in the chain. Otherwise you risk waiting for IO | 935 | * anything but the first bio in the chain. Otherwise you risk waiting for IO |
936 | * completion of a bio that hasn't been submitted yet, thus resulting in a | 936 | * completion of a bio that hasn't been submitted yet, thus resulting in a |
937 | * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead | 937 | * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead |
938 | * of bio_alloc(), as that avoids the mempool deadlock. | 938 | * of bio_alloc(), as that avoids the mempool deadlock. |
939 | * If possible a big IO should be split into smaller parts when allocation | 939 | * If possible a big IO should be split into smaller parts when allocation |
940 | * fails. Partial allocation should not be an error, or you risk a live-lock. | 940 | * fails. Partial allocation should not be an error, or you risk a live-lock. |
941 | */ | 941 | */ |
942 | struct request *blk_make_request(struct request_queue *q, struct bio *bio, | 942 | struct request *blk_make_request(struct request_queue *q, struct bio *bio, |
943 | gfp_t gfp_mask) | 943 | gfp_t gfp_mask) |
944 | { | 944 | { |
945 | struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); | 945 | struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); |
946 | 946 | ||
947 | if (unlikely(!rq)) | 947 | if (unlikely(!rq)) |
948 | return ERR_PTR(-ENOMEM); | 948 | return ERR_PTR(-ENOMEM); |
949 | 949 | ||
950 | for_each_bio(bio) { | 950 | for_each_bio(bio) { |
951 | struct bio *bounce_bio = bio; | 951 | struct bio *bounce_bio = bio; |
952 | int ret; | 952 | int ret; |
953 | 953 | ||
954 | blk_queue_bounce(q, &bounce_bio); | 954 | blk_queue_bounce(q, &bounce_bio); |
955 | ret = blk_rq_append_bio(q, rq, bounce_bio); | 955 | ret = blk_rq_append_bio(q, rq, bounce_bio); |
956 | if (unlikely(ret)) { | 956 | if (unlikely(ret)) { |
957 | blk_put_request(rq); | 957 | blk_put_request(rq); |
958 | return ERR_PTR(ret); | 958 | return ERR_PTR(ret); |
959 | } | 959 | } |
960 | } | 960 | } |
961 | 961 | ||
962 | return rq; | 962 | return rq; |
963 | } | 963 | } |
964 | EXPORT_SYMBOL(blk_make_request); | 964 | EXPORT_SYMBOL(blk_make_request); |
965 | 965 | ||
966 | /** | 966 | /** |
967 | * blk_requeue_request - put a request back on queue | 967 | * blk_requeue_request - put a request back on queue |
968 | * @q: request queue where request should be inserted | 968 | * @q: request queue where request should be inserted |
969 | * @rq: request to be inserted | 969 | * @rq: request to be inserted |
970 | * | 970 | * |
971 | * Description: | 971 | * Description: |
972 | * Drivers often keep queueing requests until the hardware cannot accept | 972 | * Drivers often keep queueing requests until the hardware cannot accept |
973 | * more, when that condition happens we need to put the request back | 973 | * more, when that condition happens we need to put the request back |
974 | * on the queue. Must be called with queue lock held. | 974 | * on the queue. Must be called with queue lock held. |
975 | */ | 975 | */ |
976 | void blk_requeue_request(struct request_queue *q, struct request *rq) | 976 | void blk_requeue_request(struct request_queue *q, struct request *rq) |
977 | { | 977 | { |
978 | blk_delete_timer(rq); | 978 | blk_delete_timer(rq); |
979 | blk_clear_rq_complete(rq); | 979 | blk_clear_rq_complete(rq); |
980 | trace_block_rq_requeue(q, rq); | 980 | trace_block_rq_requeue(q, rq); |
981 | 981 | ||
982 | if (blk_rq_tagged(rq)) | 982 | if (blk_rq_tagged(rq)) |
983 | blk_queue_end_tag(q, rq); | 983 | blk_queue_end_tag(q, rq); |
984 | 984 | ||
985 | BUG_ON(blk_queued_rq(rq)); | 985 | BUG_ON(blk_queued_rq(rq)); |
986 | 986 | ||
987 | elv_requeue_request(q, rq); | 987 | elv_requeue_request(q, rq); |
988 | } | 988 | } |
989 | EXPORT_SYMBOL(blk_requeue_request); | 989 | EXPORT_SYMBOL(blk_requeue_request); |
990 | 990 | ||
991 | /** | 991 | /** |
992 | * blk_insert_request - insert a special request into a request queue | 992 | * blk_insert_request - insert a special request into a request queue |
993 | * @q: request queue where request should be inserted | 993 | * @q: request queue where request should be inserted |
994 | * @rq: request to be inserted | 994 | * @rq: request to be inserted |
995 | * @at_head: insert request at head or tail of queue | 995 | * @at_head: insert request at head or tail of queue |
996 | * @data: private data | 996 | * @data: private data |
997 | * | 997 | * |
998 | * Description: | 998 | * Description: |
999 | * Many block devices need to execute commands asynchronously, so they don't | 999 | * Many block devices need to execute commands asynchronously, so they don't |
1000 | * block the whole kernel from preemption during request execution. This is | 1000 | * block the whole kernel from preemption during request execution. This is |
1001 | * accomplished normally by inserting aritficial requests tagged as | 1001 | * accomplished normally by inserting aritficial requests tagged as |
1002 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | 1002 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them |
1003 | * be scheduled for actual execution by the request queue. | 1003 | * be scheduled for actual execution by the request queue. |
1004 | * | 1004 | * |
1005 | * We have the option of inserting the head or the tail of the queue. | 1005 | * We have the option of inserting the head or the tail of the queue. |
1006 | * Typically we use the tail for new ioctls and so forth. We use the head | 1006 | * Typically we use the tail for new ioctls and so forth. We use the head |
1007 | * of the queue for things like a QUEUE_FULL message from a device, or a | 1007 | * of the queue for things like a QUEUE_FULL message from a device, or a |
1008 | * host that is unable to accept a particular command. | 1008 | * host that is unable to accept a particular command. |
1009 | */ | 1009 | */ |
1010 | void blk_insert_request(struct request_queue *q, struct request *rq, | 1010 | void blk_insert_request(struct request_queue *q, struct request *rq, |
1011 | int at_head, void *data) | 1011 | int at_head, void *data) |
1012 | { | 1012 | { |
1013 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 1013 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
1014 | unsigned long flags; | 1014 | unsigned long flags; |
1015 | 1015 | ||
1016 | /* | 1016 | /* |
1017 | * tell I/O scheduler that this isn't a regular read/write (ie it | 1017 | * tell I/O scheduler that this isn't a regular read/write (ie it |
1018 | * must not attempt merges on this) and that it acts as a soft | 1018 | * must not attempt merges on this) and that it acts as a soft |
1019 | * barrier | 1019 | * barrier |
1020 | */ | 1020 | */ |
1021 | rq->cmd_type = REQ_TYPE_SPECIAL; | 1021 | rq->cmd_type = REQ_TYPE_SPECIAL; |
1022 | 1022 | ||
1023 | rq->special = data; | 1023 | rq->special = data; |
1024 | 1024 | ||
1025 | spin_lock_irqsave(q->queue_lock, flags); | 1025 | spin_lock_irqsave(q->queue_lock, flags); |
1026 | 1026 | ||
1027 | /* | 1027 | /* |
1028 | * If command is tagged, release the tag | 1028 | * If command is tagged, release the tag |
1029 | */ | 1029 | */ |
1030 | if (blk_rq_tagged(rq)) | 1030 | if (blk_rq_tagged(rq)) |
1031 | blk_queue_end_tag(q, rq); | 1031 | blk_queue_end_tag(q, rq); |
1032 | 1032 | ||
1033 | drive_stat_acct(rq, 1); | 1033 | drive_stat_acct(rq, 1); |
1034 | __elv_add_request(q, rq, where, 0); | 1034 | __elv_add_request(q, rq, where, 0); |
1035 | __blk_run_queue(q); | 1035 | __blk_run_queue(q); |
1036 | spin_unlock_irqrestore(q->queue_lock, flags); | 1036 | spin_unlock_irqrestore(q->queue_lock, flags); |
1037 | } | 1037 | } |
1038 | EXPORT_SYMBOL(blk_insert_request); | 1038 | EXPORT_SYMBOL(blk_insert_request); |
1039 | 1039 | ||
1040 | /* | 1040 | /* |
1041 | * add-request adds a request to the linked list. | 1041 | * add-request adds a request to the linked list. |
1042 | * queue lock is held and interrupts disabled, as we muck with the | 1042 | * queue lock is held and interrupts disabled, as we muck with the |
1043 | * request queue list. | 1043 | * request queue list. |
1044 | */ | 1044 | */ |
1045 | static inline void add_request(struct request_queue *q, struct request *req) | 1045 | static inline void add_request(struct request_queue *q, struct request *req) |
1046 | { | 1046 | { |
1047 | drive_stat_acct(req, 1); | 1047 | drive_stat_acct(req, 1); |
1048 | 1048 | ||
1049 | /* | 1049 | /* |
1050 | * elevator indicated where it wants this request to be | 1050 | * elevator indicated where it wants this request to be |
1051 | * inserted at elevator_merge time | 1051 | * inserted at elevator_merge time |
1052 | */ | 1052 | */ |
1053 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); | 1053 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); |
1054 | } | 1054 | } |
1055 | 1055 | ||
1056 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 1056 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1057 | unsigned long now) | 1057 | unsigned long now) |
1058 | { | 1058 | { |
1059 | if (now == part->stamp) | 1059 | if (now == part->stamp) |
1060 | return; | 1060 | return; |
1061 | 1061 | ||
1062 | if (part_in_flight(part)) { | 1062 | if (part_in_flight(part)) { |
1063 | __part_stat_add(cpu, part, time_in_queue, | 1063 | __part_stat_add(cpu, part, time_in_queue, |
1064 | part_in_flight(part) * (now - part->stamp)); | 1064 | part_in_flight(part) * (now - part->stamp)); |
1065 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); | 1065 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); |
1066 | } | 1066 | } |
1067 | part->stamp = now; | 1067 | part->stamp = now; |
1068 | } | 1068 | } |
1069 | 1069 | ||
1070 | /** | 1070 | /** |
1071 | * part_round_stats() - Round off the performance stats on a struct disk_stats. | 1071 | * part_round_stats() - Round off the performance stats on a struct disk_stats. |
1072 | * @cpu: cpu number for stats access | 1072 | * @cpu: cpu number for stats access |
1073 | * @part: target partition | 1073 | * @part: target partition |
1074 | * | 1074 | * |
1075 | * The average IO queue length and utilisation statistics are maintained | 1075 | * The average IO queue length and utilisation statistics are maintained |
1076 | * by observing the current state of the queue length and the amount of | 1076 | * by observing the current state of the queue length and the amount of |
1077 | * time it has been in this state for. | 1077 | * time it has been in this state for. |
1078 | * | 1078 | * |
1079 | * Normally, that accounting is done on IO completion, but that can result | 1079 | * Normally, that accounting is done on IO completion, but that can result |
1080 | * in more than a second's worth of IO being accounted for within any one | 1080 | * in more than a second's worth of IO being accounted for within any one |
1081 | * second, leading to >100% utilisation. To deal with that, we call this | 1081 | * second, leading to >100% utilisation. To deal with that, we call this |
1082 | * function to do a round-off before returning the results when reading | 1082 | * function to do a round-off before returning the results when reading |
1083 | * /proc/diskstats. This accounts immediately for all queue usage up to | 1083 | * /proc/diskstats. This accounts immediately for all queue usage up to |
1084 | * the current jiffies and restarts the counters again. | 1084 | * the current jiffies and restarts the counters again. |
1085 | */ | 1085 | */ |
1086 | void part_round_stats(int cpu, struct hd_struct *part) | 1086 | void part_round_stats(int cpu, struct hd_struct *part) |
1087 | { | 1087 | { |
1088 | unsigned long now = jiffies; | 1088 | unsigned long now = jiffies; |
1089 | 1089 | ||
1090 | if (part->partno) | 1090 | if (part->partno) |
1091 | part_round_stats_single(cpu, &part_to_disk(part)->part0, now); | 1091 | part_round_stats_single(cpu, &part_to_disk(part)->part0, now); |
1092 | part_round_stats_single(cpu, part, now); | 1092 | part_round_stats_single(cpu, part, now); |
1093 | } | 1093 | } |
1094 | EXPORT_SYMBOL_GPL(part_round_stats); | 1094 | EXPORT_SYMBOL_GPL(part_round_stats); |
1095 | 1095 | ||
1096 | /* | 1096 | /* |
1097 | * queue lock must be held | 1097 | * queue lock must be held |
1098 | */ | 1098 | */ |
1099 | void __blk_put_request(struct request_queue *q, struct request *req) | 1099 | void __blk_put_request(struct request_queue *q, struct request *req) |
1100 | { | 1100 | { |
1101 | if (unlikely(!q)) | 1101 | if (unlikely(!q)) |
1102 | return; | 1102 | return; |
1103 | if (unlikely(--req->ref_count)) | 1103 | if (unlikely(--req->ref_count)) |
1104 | return; | 1104 | return; |
1105 | 1105 | ||
1106 | elv_completed_request(q, req); | 1106 | elv_completed_request(q, req); |
1107 | 1107 | ||
1108 | /* this is a bio leak */ | 1108 | /* this is a bio leak */ |
1109 | WARN_ON(req->bio != NULL); | 1109 | WARN_ON(req->bio != NULL); |
1110 | 1110 | ||
1111 | /* | 1111 | /* |
1112 | * Request may not have originated from ll_rw_blk. if not, | 1112 | * Request may not have originated from ll_rw_blk. if not, |
1113 | * it didn't come out of our reserved rq pools | 1113 | * it didn't come out of our reserved rq pools |
1114 | */ | 1114 | */ |
1115 | if (req->cmd_flags & REQ_ALLOCED) { | 1115 | if (req->cmd_flags & REQ_ALLOCED) { |
1116 | int is_sync = rq_is_sync(req) != 0; | 1116 | int is_sync = rq_is_sync(req) != 0; |
1117 | int priv = req->cmd_flags & REQ_ELVPRIV; | 1117 | int priv = req->cmd_flags & REQ_ELVPRIV; |
1118 | 1118 | ||
1119 | BUG_ON(!list_empty(&req->queuelist)); | 1119 | BUG_ON(!list_empty(&req->queuelist)); |
1120 | BUG_ON(!hlist_unhashed(&req->hash)); | 1120 | BUG_ON(!hlist_unhashed(&req->hash)); |
1121 | 1121 | ||
1122 | blk_free_request(q, req); | 1122 | blk_free_request(q, req); |
1123 | freed_request(q, is_sync, priv); | 1123 | freed_request(q, is_sync, priv); |
1124 | } | 1124 | } |
1125 | } | 1125 | } |
1126 | EXPORT_SYMBOL_GPL(__blk_put_request); | 1126 | EXPORT_SYMBOL_GPL(__blk_put_request); |
1127 | 1127 | ||
1128 | void blk_put_request(struct request *req) | 1128 | void blk_put_request(struct request *req) |
1129 | { | 1129 | { |
1130 | unsigned long flags; | 1130 | unsigned long flags; |
1131 | struct request_queue *q = req->q; | 1131 | struct request_queue *q = req->q; |
1132 | 1132 | ||
1133 | spin_lock_irqsave(q->queue_lock, flags); | 1133 | spin_lock_irqsave(q->queue_lock, flags); |
1134 | __blk_put_request(q, req); | 1134 | __blk_put_request(q, req); |
1135 | spin_unlock_irqrestore(q->queue_lock, flags); | 1135 | spin_unlock_irqrestore(q->queue_lock, flags); |
1136 | } | 1136 | } |
1137 | EXPORT_SYMBOL(blk_put_request); | 1137 | EXPORT_SYMBOL(blk_put_request); |
1138 | 1138 | ||
1139 | /** | 1139 | /** |
1140 | * blk_add_request_payload - add a payload to a request | 1140 | * blk_add_request_payload - add a payload to a request |
1141 | * @rq: request to update | 1141 | * @rq: request to update |
1142 | * @page: page backing the payload | 1142 | * @page: page backing the payload |
1143 | * @len: length of the payload. | 1143 | * @len: length of the payload. |
1144 | * | 1144 | * |
1145 | * This allows to later add a payload to an already submitted request by | 1145 | * This allows to later add a payload to an already submitted request by |
1146 | * a block driver. The driver needs to take care of freeing the payload | 1146 | * a block driver. The driver needs to take care of freeing the payload |
1147 | * itself. | 1147 | * itself. |
1148 | * | 1148 | * |
1149 | * Note that this is a quite horrible hack and nothing but handling of | 1149 | * Note that this is a quite horrible hack and nothing but handling of |
1150 | * discard requests should ever use it. | 1150 | * discard requests should ever use it. |
1151 | */ | 1151 | */ |
1152 | void blk_add_request_payload(struct request *rq, struct page *page, | 1152 | void blk_add_request_payload(struct request *rq, struct page *page, |
1153 | unsigned int len) | 1153 | unsigned int len) |
1154 | { | 1154 | { |
1155 | struct bio *bio = rq->bio; | 1155 | struct bio *bio = rq->bio; |
1156 | 1156 | ||
1157 | bio->bi_io_vec->bv_page = page; | 1157 | bio->bi_io_vec->bv_page = page; |
1158 | bio->bi_io_vec->bv_offset = 0; | 1158 | bio->bi_io_vec->bv_offset = 0; |
1159 | bio->bi_io_vec->bv_len = len; | 1159 | bio->bi_io_vec->bv_len = len; |
1160 | 1160 | ||
1161 | bio->bi_size = len; | 1161 | bio->bi_size = len; |
1162 | bio->bi_vcnt = 1; | 1162 | bio->bi_vcnt = 1; |
1163 | bio->bi_phys_segments = 1; | 1163 | bio->bi_phys_segments = 1; |
1164 | 1164 | ||
1165 | rq->__data_len = rq->resid_len = len; | 1165 | rq->__data_len = rq->resid_len = len; |
1166 | rq->nr_phys_segments = 1; | 1166 | rq->nr_phys_segments = 1; |
1167 | rq->buffer = bio_data(bio); | 1167 | rq->buffer = bio_data(bio); |
1168 | } | 1168 | } |
1169 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1169 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
1170 | 1170 | ||
1171 | void init_request_from_bio(struct request *req, struct bio *bio) | 1171 | void init_request_from_bio(struct request *req, struct bio *bio) |
1172 | { | 1172 | { |
1173 | req->cpu = bio->bi_comp_cpu; | 1173 | req->cpu = bio->bi_comp_cpu; |
1174 | req->cmd_type = REQ_TYPE_FS; | 1174 | req->cmd_type = REQ_TYPE_FS; |
1175 | 1175 | ||
1176 | req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; | 1176 | req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; |
1177 | if (bio->bi_rw & REQ_RAHEAD) | 1177 | if (bio->bi_rw & REQ_RAHEAD) |
1178 | req->cmd_flags |= REQ_FAILFAST_MASK; | 1178 | req->cmd_flags |= REQ_FAILFAST_MASK; |
1179 | 1179 | ||
1180 | req->errors = 0; | 1180 | req->errors = 0; |
1181 | req->__sector = bio->bi_sector; | 1181 | req->__sector = bio->bi_sector; |
1182 | req->ioprio = bio_prio(bio); | 1182 | req->ioprio = bio_prio(bio); |
1183 | blk_rq_bio_prep(req->q, req, bio); | 1183 | blk_rq_bio_prep(req->q, req, bio); |
1184 | } | 1184 | } |
1185 | 1185 | ||
1186 | /* | 1186 | /* |
1187 | * Only disabling plugging for non-rotational devices if it does tagging | 1187 | * Only disabling plugging for non-rotational devices if it does tagging |
1188 | * as well, otherwise we do need the proper merging | 1188 | * as well, otherwise we do need the proper merging |
1189 | */ | 1189 | */ |
1190 | static inline bool queue_should_plug(struct request_queue *q) | 1190 | static inline bool queue_should_plug(struct request_queue *q) |
1191 | { | 1191 | { |
1192 | return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); | 1192 | return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | static int __make_request(struct request_queue *q, struct bio *bio) | 1195 | static int __make_request(struct request_queue *q, struct bio *bio) |
1196 | { | 1196 | { |
1197 | struct request *req; | 1197 | struct request *req; |
1198 | int el_ret; | 1198 | int el_ret; |
1199 | unsigned int bytes = bio->bi_size; | 1199 | unsigned int bytes = bio->bi_size; |
1200 | const unsigned short prio = bio_prio(bio); | 1200 | const unsigned short prio = bio_prio(bio); |
1201 | const bool sync = (bio->bi_rw & REQ_SYNC); | 1201 | const bool sync = (bio->bi_rw & REQ_SYNC); |
1202 | const bool unplug = (bio->bi_rw & REQ_UNPLUG); | 1202 | const bool unplug = (bio->bi_rw & REQ_UNPLUG); |
1203 | const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1203 | const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; |
1204 | int rw_flags; | 1204 | int rw_flags; |
1205 | 1205 | ||
1206 | if ((bio->bi_rw & REQ_HARDBARRIER) && | 1206 | /* REQ_HARDBARRIER is no more */ |
1207 | (q->next_ordered == QUEUE_ORDERED_NONE)) { | 1207 | if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, |
1208 | "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { | ||
1208 | bio_endio(bio, -EOPNOTSUPP); | 1209 | bio_endio(bio, -EOPNOTSUPP); |
1209 | return 0; | 1210 | return 0; |
1210 | } | 1211 | } |
1212 | |||
1211 | /* | 1213 | /* |
1212 | * low level driver can indicate that it wants pages above a | 1214 | * low level driver can indicate that it wants pages above a |
1213 | * certain limit bounced to low memory (ie for highmem, or even | 1215 | * certain limit bounced to low memory (ie for highmem, or even |
1214 | * ISA dma in theory) | 1216 | * ISA dma in theory) |
1215 | */ | 1217 | */ |
1216 | blk_queue_bounce(q, &bio); | 1218 | blk_queue_bounce(q, &bio); |
1217 | 1219 | ||
1218 | spin_lock_irq(q->queue_lock); | 1220 | spin_lock_irq(q->queue_lock); |
1219 | 1221 | ||
1220 | if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) | 1222 | if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) |
1221 | goto get_rq; | 1223 | goto get_rq; |
1222 | 1224 | ||
1223 | el_ret = elv_merge(q, &req, bio); | 1225 | el_ret = elv_merge(q, &req, bio); |
1224 | switch (el_ret) { | 1226 | switch (el_ret) { |
1225 | case ELEVATOR_BACK_MERGE: | 1227 | case ELEVATOR_BACK_MERGE: |
1226 | BUG_ON(!rq_mergeable(req)); | 1228 | BUG_ON(!rq_mergeable(req)); |
1227 | 1229 | ||
1228 | if (!ll_back_merge_fn(q, req, bio)) | 1230 | if (!ll_back_merge_fn(q, req, bio)) |
1229 | break; | 1231 | break; |
1230 | 1232 | ||
1231 | trace_block_bio_backmerge(q, bio); | 1233 | trace_block_bio_backmerge(q, bio); |
1232 | 1234 | ||
1233 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | 1235 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
1234 | blk_rq_set_mixed_merge(req); | 1236 | blk_rq_set_mixed_merge(req); |
1235 | 1237 | ||
1236 | req->biotail->bi_next = bio; | 1238 | req->biotail->bi_next = bio; |
1237 | req->biotail = bio; | 1239 | req->biotail = bio; |
1238 | req->__data_len += bytes; | 1240 | req->__data_len += bytes; |
1239 | req->ioprio = ioprio_best(req->ioprio, prio); | 1241 | req->ioprio = ioprio_best(req->ioprio, prio); |
1240 | if (!blk_rq_cpu_valid(req)) | 1242 | if (!blk_rq_cpu_valid(req)) |
1241 | req->cpu = bio->bi_comp_cpu; | 1243 | req->cpu = bio->bi_comp_cpu; |
1242 | drive_stat_acct(req, 0); | 1244 | drive_stat_acct(req, 0); |
1243 | elv_bio_merged(q, req, bio); | 1245 | elv_bio_merged(q, req, bio); |
1244 | if (!attempt_back_merge(q, req)) | 1246 | if (!attempt_back_merge(q, req)) |
1245 | elv_merged_request(q, req, el_ret); | 1247 | elv_merged_request(q, req, el_ret); |
1246 | goto out; | 1248 | goto out; |
1247 | 1249 | ||
1248 | case ELEVATOR_FRONT_MERGE: | 1250 | case ELEVATOR_FRONT_MERGE: |
1249 | BUG_ON(!rq_mergeable(req)); | 1251 | BUG_ON(!rq_mergeable(req)); |
1250 | 1252 | ||
1251 | if (!ll_front_merge_fn(q, req, bio)) | 1253 | if (!ll_front_merge_fn(q, req, bio)) |
1252 | break; | 1254 | break; |
1253 | 1255 | ||
1254 | trace_block_bio_frontmerge(q, bio); | 1256 | trace_block_bio_frontmerge(q, bio); |
1255 | 1257 | ||
1256 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { | 1258 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { |
1257 | blk_rq_set_mixed_merge(req); | 1259 | blk_rq_set_mixed_merge(req); |
1258 | req->cmd_flags &= ~REQ_FAILFAST_MASK; | 1260 | req->cmd_flags &= ~REQ_FAILFAST_MASK; |
1259 | req->cmd_flags |= ff; | 1261 | req->cmd_flags |= ff; |
1260 | } | 1262 | } |
1261 | 1263 | ||
1262 | bio->bi_next = req->bio; | 1264 | bio->bi_next = req->bio; |
1263 | req->bio = bio; | 1265 | req->bio = bio; |
1264 | 1266 | ||
1265 | /* | 1267 | /* |
1266 | * may not be valid. if the low level driver said | 1268 | * may not be valid. if the low level driver said |
1267 | * it didn't need a bounce buffer then it better | 1269 | * it didn't need a bounce buffer then it better |
1268 | * not touch req->buffer either... | 1270 | * not touch req->buffer either... |
1269 | */ | 1271 | */ |
1270 | req->buffer = bio_data(bio); | 1272 | req->buffer = bio_data(bio); |
1271 | req->__sector = bio->bi_sector; | 1273 | req->__sector = bio->bi_sector; |
1272 | req->__data_len += bytes; | 1274 | req->__data_len += bytes; |
1273 | req->ioprio = ioprio_best(req->ioprio, prio); | 1275 | req->ioprio = ioprio_best(req->ioprio, prio); |
1274 | if (!blk_rq_cpu_valid(req)) | 1276 | if (!blk_rq_cpu_valid(req)) |
1275 | req->cpu = bio->bi_comp_cpu; | 1277 | req->cpu = bio->bi_comp_cpu; |
1276 | drive_stat_acct(req, 0); | 1278 | drive_stat_acct(req, 0); |
1277 | elv_bio_merged(q, req, bio); | 1279 | elv_bio_merged(q, req, bio); |
1278 | if (!attempt_front_merge(q, req)) | 1280 | if (!attempt_front_merge(q, req)) |
1279 | elv_merged_request(q, req, el_ret); | 1281 | elv_merged_request(q, req, el_ret); |
1280 | goto out; | 1282 | goto out; |
1281 | 1283 | ||
1282 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ | 1284 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ |
1283 | default: | 1285 | default: |
1284 | ; | 1286 | ; |
1285 | } | 1287 | } |
1286 | 1288 | ||
1287 | get_rq: | 1289 | get_rq: |
1288 | /* | 1290 | /* |
1289 | * This sync check and mask will be re-done in init_request_from_bio(), | 1291 | * This sync check and mask will be re-done in init_request_from_bio(), |
1290 | * but we need to set it earlier to expose the sync flag to the | 1292 | * but we need to set it earlier to expose the sync flag to the |
1291 | * rq allocator and io schedulers. | 1293 | * rq allocator and io schedulers. |
1292 | */ | 1294 | */ |
1293 | rw_flags = bio_data_dir(bio); | 1295 | rw_flags = bio_data_dir(bio); |
1294 | if (sync) | 1296 | if (sync) |
1295 | rw_flags |= REQ_SYNC; | 1297 | rw_flags |= REQ_SYNC; |
1296 | 1298 | ||
1297 | /* | 1299 | /* |
1298 | * Grab a free request. This is might sleep but can not fail. | 1300 | * Grab a free request. This is might sleep but can not fail. |
1299 | * Returns with the queue unlocked. | 1301 | * Returns with the queue unlocked. |
1300 | */ | 1302 | */ |
1301 | req = get_request_wait(q, rw_flags, bio); | 1303 | req = get_request_wait(q, rw_flags, bio); |
1302 | 1304 | ||
1303 | /* | 1305 | /* |
1304 | * After dropping the lock and possibly sleeping here, our request | 1306 | * After dropping the lock and possibly sleeping here, our request |
1305 | * may now be mergeable after it had proven unmergeable (above). | 1307 | * may now be mergeable after it had proven unmergeable (above). |
1306 | * We don't worry about that case for efficiency. It won't happen | 1308 | * We don't worry about that case for efficiency. It won't happen |
1307 | * often, and the elevators are able to handle it. | 1309 | * often, and the elevators are able to handle it. |
1308 | */ | 1310 | */ |
1309 | init_request_from_bio(req, bio); | 1311 | init_request_from_bio(req, bio); |
1310 | 1312 | ||
1311 | spin_lock_irq(q->queue_lock); | 1313 | spin_lock_irq(q->queue_lock); |
1312 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || | 1314 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || |
1313 | bio_flagged(bio, BIO_CPU_AFFINE)) | 1315 | bio_flagged(bio, BIO_CPU_AFFINE)) |
1314 | req->cpu = blk_cpu_to_group(smp_processor_id()); | 1316 | req->cpu = blk_cpu_to_group(smp_processor_id()); |
1315 | if (queue_should_plug(q) && elv_queue_empty(q)) | 1317 | if (queue_should_plug(q) && elv_queue_empty(q)) |
1316 | blk_plug_device(q); | 1318 | blk_plug_device(q); |
1317 | add_request(q, req); | 1319 | add_request(q, req); |
1318 | out: | 1320 | out: |
1319 | if (unplug || !queue_should_plug(q)) | 1321 | if (unplug || !queue_should_plug(q)) |
1320 | __generic_unplug_device(q); | 1322 | __generic_unplug_device(q); |
1321 | spin_unlock_irq(q->queue_lock); | 1323 | spin_unlock_irq(q->queue_lock); |
1322 | return 0; | 1324 | return 0; |
1323 | } | 1325 | } |
1324 | 1326 | ||
1325 | /* | 1327 | /* |
1326 | * If bio->bi_dev is a partition, remap the location | 1328 | * If bio->bi_dev is a partition, remap the location |
1327 | */ | 1329 | */ |
1328 | static inline void blk_partition_remap(struct bio *bio) | 1330 | static inline void blk_partition_remap(struct bio *bio) |
1329 | { | 1331 | { |
1330 | struct block_device *bdev = bio->bi_bdev; | 1332 | struct block_device *bdev = bio->bi_bdev; |
1331 | 1333 | ||
1332 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { | 1334 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { |
1333 | struct hd_struct *p = bdev->bd_part; | 1335 | struct hd_struct *p = bdev->bd_part; |
1334 | 1336 | ||
1335 | bio->bi_sector += p->start_sect; | 1337 | bio->bi_sector += p->start_sect; |
1336 | bio->bi_bdev = bdev->bd_contains; | 1338 | bio->bi_bdev = bdev->bd_contains; |
1337 | 1339 | ||
1338 | trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, | 1340 | trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, |
1339 | bdev->bd_dev, | 1341 | bdev->bd_dev, |
1340 | bio->bi_sector - p->start_sect); | 1342 | bio->bi_sector - p->start_sect); |
1341 | } | 1343 | } |
1342 | } | 1344 | } |
1343 | 1345 | ||
1344 | static void handle_bad_sector(struct bio *bio) | 1346 | static void handle_bad_sector(struct bio *bio) |
1345 | { | 1347 | { |
1346 | char b[BDEVNAME_SIZE]; | 1348 | char b[BDEVNAME_SIZE]; |
1347 | 1349 | ||
1348 | printk(KERN_INFO "attempt to access beyond end of device\n"); | 1350 | printk(KERN_INFO "attempt to access beyond end of device\n"); |
1349 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", | 1351 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", |
1350 | bdevname(bio->bi_bdev, b), | 1352 | bdevname(bio->bi_bdev, b), |
1351 | bio->bi_rw, | 1353 | bio->bi_rw, |
1352 | (unsigned long long)bio->bi_sector + bio_sectors(bio), | 1354 | (unsigned long long)bio->bi_sector + bio_sectors(bio), |
1353 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); | 1355 | (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); |
1354 | 1356 | ||
1355 | set_bit(BIO_EOF, &bio->bi_flags); | 1357 | set_bit(BIO_EOF, &bio->bi_flags); |
1356 | } | 1358 | } |
1357 | 1359 | ||
1358 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 1360 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
1359 | 1361 | ||
1360 | static DECLARE_FAULT_ATTR(fail_make_request); | 1362 | static DECLARE_FAULT_ATTR(fail_make_request); |
1361 | 1363 | ||
1362 | static int __init setup_fail_make_request(char *str) | 1364 | static int __init setup_fail_make_request(char *str) |
1363 | { | 1365 | { |
1364 | return setup_fault_attr(&fail_make_request, str); | 1366 | return setup_fault_attr(&fail_make_request, str); |
1365 | } | 1367 | } |
1366 | __setup("fail_make_request=", setup_fail_make_request); | 1368 | __setup("fail_make_request=", setup_fail_make_request); |
1367 | 1369 | ||
1368 | static int should_fail_request(struct bio *bio) | 1370 | static int should_fail_request(struct bio *bio) |
1369 | { | 1371 | { |
1370 | struct hd_struct *part = bio->bi_bdev->bd_part; | 1372 | struct hd_struct *part = bio->bi_bdev->bd_part; |
1371 | 1373 | ||
1372 | if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) | 1374 | if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) |
1373 | return should_fail(&fail_make_request, bio->bi_size); | 1375 | return should_fail(&fail_make_request, bio->bi_size); |
1374 | 1376 | ||
1375 | return 0; | 1377 | return 0; |
1376 | } | 1378 | } |
1377 | 1379 | ||
1378 | static int __init fail_make_request_debugfs(void) | 1380 | static int __init fail_make_request_debugfs(void) |
1379 | { | 1381 | { |
1380 | return init_fault_attr_dentries(&fail_make_request, | 1382 | return init_fault_attr_dentries(&fail_make_request, |
1381 | "fail_make_request"); | 1383 | "fail_make_request"); |
1382 | } | 1384 | } |
1383 | 1385 | ||
1384 | late_initcall(fail_make_request_debugfs); | 1386 | late_initcall(fail_make_request_debugfs); |
1385 | 1387 | ||
1386 | #else /* CONFIG_FAIL_MAKE_REQUEST */ | 1388 | #else /* CONFIG_FAIL_MAKE_REQUEST */ |
1387 | 1389 | ||
1388 | static inline int should_fail_request(struct bio *bio) | 1390 | static inline int should_fail_request(struct bio *bio) |
1389 | { | 1391 | { |
1390 | return 0; | 1392 | return 0; |
1391 | } | 1393 | } |
1392 | 1394 | ||
1393 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ | 1395 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ |
1394 | 1396 | ||
1395 | /* | 1397 | /* |
1396 | * Check whether this bio extends beyond the end of the device. | 1398 | * Check whether this bio extends beyond the end of the device. |
1397 | */ | 1399 | */ |
1398 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) | 1400 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) |
1399 | { | 1401 | { |
1400 | sector_t maxsector; | 1402 | sector_t maxsector; |
1401 | 1403 | ||
1402 | if (!nr_sectors) | 1404 | if (!nr_sectors) |
1403 | return 0; | 1405 | return 0; |
1404 | 1406 | ||
1405 | /* Test device or partition size, when known. */ | 1407 | /* Test device or partition size, when known. */ |
1406 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | 1408 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; |
1407 | if (maxsector) { | 1409 | if (maxsector) { |
1408 | sector_t sector = bio->bi_sector; | 1410 | sector_t sector = bio->bi_sector; |
1409 | 1411 | ||
1410 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | 1412 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { |
1411 | /* | 1413 | /* |
1412 | * This may well happen - the kernel calls bread() | 1414 | * This may well happen - the kernel calls bread() |
1413 | * without checking the size of the device, e.g., when | 1415 | * without checking the size of the device, e.g., when |
1414 | * mounting a device. | 1416 | * mounting a device. |
1415 | */ | 1417 | */ |
1416 | handle_bad_sector(bio); | 1418 | handle_bad_sector(bio); |
1417 | return 1; | 1419 | return 1; |
1418 | } | 1420 | } |
1419 | } | 1421 | } |
1420 | 1422 | ||
1421 | return 0; | 1423 | return 0; |
1422 | } | 1424 | } |
1423 | 1425 | ||
1424 | /** | 1426 | /** |
1425 | * generic_make_request - hand a buffer to its device driver for I/O | 1427 | * generic_make_request - hand a buffer to its device driver for I/O |
1426 | * @bio: The bio describing the location in memory and on the device. | 1428 | * @bio: The bio describing the location in memory and on the device. |
1427 | * | 1429 | * |
1428 | * generic_make_request() is used to make I/O requests of block | 1430 | * generic_make_request() is used to make I/O requests of block |
1429 | * devices. It is passed a &struct bio, which describes the I/O that needs | 1431 | * devices. It is passed a &struct bio, which describes the I/O that needs |
1430 | * to be done. | 1432 | * to be done. |
1431 | * | 1433 | * |
1432 | * generic_make_request() does not return any status. The | 1434 | * generic_make_request() does not return any status. The |
1433 | * success/failure status of the request, along with notification of | 1435 | * success/failure status of the request, along with notification of |
1434 | * completion, is delivered asynchronously through the bio->bi_end_io | 1436 | * completion, is delivered asynchronously through the bio->bi_end_io |
1435 | * function described (one day) else where. | 1437 | * function described (one day) else where. |
1436 | * | 1438 | * |
1437 | * The caller of generic_make_request must make sure that bi_io_vec | 1439 | * The caller of generic_make_request must make sure that bi_io_vec |
1438 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | 1440 | * are set to describe the memory buffer, and that bi_dev and bi_sector are |
1439 | * set to describe the device address, and the | 1441 | * set to describe the device address, and the |
1440 | * bi_end_io and optionally bi_private are set to describe how | 1442 | * bi_end_io and optionally bi_private are set to describe how |
1441 | * completion notification should be signaled. | 1443 | * completion notification should be signaled. |
1442 | * | 1444 | * |
1443 | * generic_make_request and the drivers it calls may use bi_next if this | 1445 | * generic_make_request and the drivers it calls may use bi_next if this |
1444 | * bio happens to be merged with someone else, and may change bi_dev and | 1446 | * bio happens to be merged with someone else, and may change bi_dev and |
1445 | * bi_sector for remaps as it sees fit. So the values of these fields | 1447 | * bi_sector for remaps as it sees fit. So the values of these fields |
1446 | * should NOT be depended on after the call to generic_make_request. | 1448 | * should NOT be depended on after the call to generic_make_request. |
1447 | */ | 1449 | */ |
1448 | static inline void __generic_make_request(struct bio *bio) | 1450 | static inline void __generic_make_request(struct bio *bio) |
1449 | { | 1451 | { |
1450 | struct request_queue *q; | 1452 | struct request_queue *q; |
1451 | sector_t old_sector; | 1453 | sector_t old_sector; |
1452 | int ret, nr_sectors = bio_sectors(bio); | 1454 | int ret, nr_sectors = bio_sectors(bio); |
1453 | dev_t old_dev; | 1455 | dev_t old_dev; |
1454 | int err = -EIO; | 1456 | int err = -EIO; |
1455 | 1457 | ||
1456 | might_sleep(); | 1458 | might_sleep(); |
1457 | 1459 | ||
1458 | if (bio_check_eod(bio, nr_sectors)) | 1460 | if (bio_check_eod(bio, nr_sectors)) |
1459 | goto end_io; | 1461 | goto end_io; |
1460 | 1462 | ||
1461 | /* | 1463 | /* |
1462 | * Resolve the mapping until finished. (drivers are | 1464 | * Resolve the mapping until finished. (drivers are |
1463 | * still free to implement/resolve their own stacking | 1465 | * still free to implement/resolve their own stacking |
1464 | * by explicitly returning 0) | 1466 | * by explicitly returning 0) |
1465 | * | 1467 | * |
1466 | * NOTE: we don't repeat the blk_size check for each new device. | 1468 | * NOTE: we don't repeat the blk_size check for each new device. |
1467 | * Stacking drivers are expected to know what they are doing. | 1469 | * Stacking drivers are expected to know what they are doing. |
1468 | */ | 1470 | */ |
1469 | old_sector = -1; | 1471 | old_sector = -1; |
1470 | old_dev = 0; | 1472 | old_dev = 0; |
1471 | do { | 1473 | do { |
1472 | char b[BDEVNAME_SIZE]; | 1474 | char b[BDEVNAME_SIZE]; |
1473 | 1475 | ||
1474 | q = bdev_get_queue(bio->bi_bdev); | 1476 | q = bdev_get_queue(bio->bi_bdev); |
1475 | if (unlikely(!q)) { | 1477 | if (unlikely(!q)) { |
1476 | printk(KERN_ERR | 1478 | printk(KERN_ERR |
1477 | "generic_make_request: Trying to access " | 1479 | "generic_make_request: Trying to access " |
1478 | "nonexistent block-device %s (%Lu)\n", | 1480 | "nonexistent block-device %s (%Lu)\n", |
1479 | bdevname(bio->bi_bdev, b), | 1481 | bdevname(bio->bi_bdev, b), |
1480 | (long long) bio->bi_sector); | 1482 | (long long) bio->bi_sector); |
1481 | goto end_io; | 1483 | goto end_io; |
1482 | } | 1484 | } |
1483 | 1485 | ||
1484 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && | 1486 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && |
1485 | nr_sectors > queue_max_hw_sectors(q))) { | 1487 | nr_sectors > queue_max_hw_sectors(q))) { |
1486 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", | 1488 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", |
1487 | bdevname(bio->bi_bdev, b), | 1489 | bdevname(bio->bi_bdev, b), |
1488 | bio_sectors(bio), | 1490 | bio_sectors(bio), |
1489 | queue_max_hw_sectors(q)); | 1491 | queue_max_hw_sectors(q)); |
1490 | goto end_io; | 1492 | goto end_io; |
1491 | } | 1493 | } |
1492 | 1494 | ||
1493 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 1495 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
1494 | goto end_io; | 1496 | goto end_io; |
1495 | 1497 | ||
1496 | if (should_fail_request(bio)) | 1498 | if (should_fail_request(bio)) |
1497 | goto end_io; | 1499 | goto end_io; |
1498 | 1500 | ||
1499 | /* | 1501 | /* |
1500 | * If this device has partitions, remap block n | 1502 | * If this device has partitions, remap block n |
1501 | * of partition p to block n+start(p) of the disk. | 1503 | * of partition p to block n+start(p) of the disk. |
1502 | */ | 1504 | */ |
1503 | blk_partition_remap(bio); | 1505 | blk_partition_remap(bio); |
1504 | 1506 | ||
1505 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) | 1507 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) |
1506 | goto end_io; | 1508 | goto end_io; |
1507 | 1509 | ||
1508 | if (old_sector != -1) | 1510 | if (old_sector != -1) |
1509 | trace_block_remap(q, bio, old_dev, old_sector); | 1511 | trace_block_remap(q, bio, old_dev, old_sector); |
1510 | 1512 | ||
1511 | old_sector = bio->bi_sector; | 1513 | old_sector = bio->bi_sector; |
1512 | old_dev = bio->bi_bdev->bd_dev; | 1514 | old_dev = bio->bi_bdev->bd_dev; |
1513 | 1515 | ||
1514 | if (bio_check_eod(bio, nr_sectors)) | 1516 | if (bio_check_eod(bio, nr_sectors)) |
1515 | goto end_io; | 1517 | goto end_io; |
1516 | 1518 | ||
1517 | if ((bio->bi_rw & REQ_DISCARD) && | 1519 | if ((bio->bi_rw & REQ_DISCARD) && |
1518 | (!blk_queue_discard(q) || | 1520 | (!blk_queue_discard(q) || |
1519 | ((bio->bi_rw & REQ_SECURE) && | 1521 | ((bio->bi_rw & REQ_SECURE) && |
1520 | !blk_queue_secdiscard(q)))) { | 1522 | !blk_queue_secdiscard(q)))) { |
1521 | err = -EOPNOTSUPP; | 1523 | err = -EOPNOTSUPP; |
1522 | goto end_io; | 1524 | goto end_io; |
1523 | } | 1525 | } |
1524 | 1526 | ||
1525 | trace_block_bio_queue(q, bio); | 1527 | trace_block_bio_queue(q, bio); |
1526 | 1528 | ||
1527 | ret = q->make_request_fn(q, bio); | 1529 | ret = q->make_request_fn(q, bio); |
1528 | } while (ret); | 1530 | } while (ret); |
1529 | 1531 | ||
1530 | return; | 1532 | return; |
1531 | 1533 | ||
1532 | end_io: | 1534 | end_io: |
1533 | bio_endio(bio, err); | 1535 | bio_endio(bio, err); |
1534 | } | 1536 | } |
1535 | 1537 | ||
1536 | /* | 1538 | /* |
1537 | * We only want one ->make_request_fn to be active at a time, | 1539 | * We only want one ->make_request_fn to be active at a time, |
1538 | * else stack usage with stacked devices could be a problem. | 1540 | * else stack usage with stacked devices could be a problem. |
1539 | * So use current->bio_list to keep a list of requests | 1541 | * So use current->bio_list to keep a list of requests |
1540 | * submited by a make_request_fn function. | 1542 | * submited by a make_request_fn function. |
1541 | * current->bio_list is also used as a flag to say if | 1543 | * current->bio_list is also used as a flag to say if |
1542 | * generic_make_request is currently active in this task or not. | 1544 | * generic_make_request is currently active in this task or not. |
1543 | * If it is NULL, then no make_request is active. If it is non-NULL, | 1545 | * If it is NULL, then no make_request is active. If it is non-NULL, |
1544 | * then a make_request is active, and new requests should be added | 1546 | * then a make_request is active, and new requests should be added |
1545 | * at the tail | 1547 | * at the tail |
1546 | */ | 1548 | */ |
1547 | void generic_make_request(struct bio *bio) | 1549 | void generic_make_request(struct bio *bio) |
1548 | { | 1550 | { |
1549 | struct bio_list bio_list_on_stack; | 1551 | struct bio_list bio_list_on_stack; |
1550 | 1552 | ||
1551 | if (current->bio_list) { | 1553 | if (current->bio_list) { |
1552 | /* make_request is active */ | 1554 | /* make_request is active */ |
1553 | bio_list_add(current->bio_list, bio); | 1555 | bio_list_add(current->bio_list, bio); |
1554 | return; | 1556 | return; |
1555 | } | 1557 | } |
1556 | /* following loop may be a bit non-obvious, and so deserves some | 1558 | /* following loop may be a bit non-obvious, and so deserves some |
1557 | * explanation. | 1559 | * explanation. |
1558 | * Before entering the loop, bio->bi_next is NULL (as all callers | 1560 | * Before entering the loop, bio->bi_next is NULL (as all callers |
1559 | * ensure that) so we have a list with a single bio. | 1561 | * ensure that) so we have a list with a single bio. |
1560 | * We pretend that we have just taken it off a longer list, so | 1562 | * We pretend that we have just taken it off a longer list, so |
1561 | * we assign bio_list to a pointer to the bio_list_on_stack, | 1563 | * we assign bio_list to a pointer to the bio_list_on_stack, |
1562 | * thus initialising the bio_list of new bios to be | 1564 | * thus initialising the bio_list of new bios to be |
1563 | * added. __generic_make_request may indeed add some more bios | 1565 | * added. __generic_make_request may indeed add some more bios |
1564 | * through a recursive call to generic_make_request. If it | 1566 | * through a recursive call to generic_make_request. If it |
1565 | * did, we find a non-NULL value in bio_list and re-enter the loop | 1567 | * did, we find a non-NULL value in bio_list and re-enter the loop |
1566 | * from the top. In this case we really did just take the bio | 1568 | * from the top. In this case we really did just take the bio |
1567 | * of the top of the list (no pretending) and so remove it from | 1569 | * of the top of the list (no pretending) and so remove it from |
1568 | * bio_list, and call into __generic_make_request again. | 1570 | * bio_list, and call into __generic_make_request again. |
1569 | * | 1571 | * |
1570 | * The loop was structured like this to make only one call to | 1572 | * The loop was structured like this to make only one call to |
1571 | * __generic_make_request (which is important as it is large and | 1573 | * __generic_make_request (which is important as it is large and |
1572 | * inlined) and to keep the structure simple. | 1574 | * inlined) and to keep the structure simple. |
1573 | */ | 1575 | */ |
1574 | BUG_ON(bio->bi_next); | 1576 | BUG_ON(bio->bi_next); |
1575 | bio_list_init(&bio_list_on_stack); | 1577 | bio_list_init(&bio_list_on_stack); |
1576 | current->bio_list = &bio_list_on_stack; | 1578 | current->bio_list = &bio_list_on_stack; |
1577 | do { | 1579 | do { |
1578 | __generic_make_request(bio); | 1580 | __generic_make_request(bio); |
1579 | bio = bio_list_pop(current->bio_list); | 1581 | bio = bio_list_pop(current->bio_list); |
1580 | } while (bio); | 1582 | } while (bio); |
1581 | current->bio_list = NULL; /* deactivate */ | 1583 | current->bio_list = NULL; /* deactivate */ |
1582 | } | 1584 | } |
1583 | EXPORT_SYMBOL(generic_make_request); | 1585 | EXPORT_SYMBOL(generic_make_request); |
1584 | 1586 | ||
1585 | /** | 1587 | /** |
1586 | * submit_bio - submit a bio to the block device layer for I/O | 1588 | * submit_bio - submit a bio to the block device layer for I/O |
1587 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | 1589 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) |
1588 | * @bio: The &struct bio which describes the I/O | 1590 | * @bio: The &struct bio which describes the I/O |
1589 | * | 1591 | * |
1590 | * submit_bio() is very similar in purpose to generic_make_request(), and | 1592 | * submit_bio() is very similar in purpose to generic_make_request(), and |
1591 | * uses that function to do most of the work. Both are fairly rough | 1593 | * uses that function to do most of the work. Both are fairly rough |
1592 | * interfaces; @bio must be presetup and ready for I/O. | 1594 | * interfaces; @bio must be presetup and ready for I/O. |
1593 | * | 1595 | * |
1594 | */ | 1596 | */ |
1595 | void submit_bio(int rw, struct bio *bio) | 1597 | void submit_bio(int rw, struct bio *bio) |
1596 | { | 1598 | { |
1597 | int count = bio_sectors(bio); | 1599 | int count = bio_sectors(bio); |
1598 | 1600 | ||
1599 | bio->bi_rw |= rw; | 1601 | bio->bi_rw |= rw; |
1600 | 1602 | ||
1601 | /* | 1603 | /* |
1602 | * If it's a regular read/write or a barrier with data attached, | 1604 | * If it's a regular read/write or a barrier with data attached, |
1603 | * go through the normal accounting stuff before submission. | 1605 | * go through the normal accounting stuff before submission. |
1604 | */ | 1606 | */ |
1605 | if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { | 1607 | if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { |
1606 | if (rw & WRITE) { | 1608 | if (rw & WRITE) { |
1607 | count_vm_events(PGPGOUT, count); | 1609 | count_vm_events(PGPGOUT, count); |
1608 | } else { | 1610 | } else { |
1609 | task_io_account_read(bio->bi_size); | 1611 | task_io_account_read(bio->bi_size); |
1610 | count_vm_events(PGPGIN, count); | 1612 | count_vm_events(PGPGIN, count); |
1611 | } | 1613 | } |
1612 | 1614 | ||
1613 | if (unlikely(block_dump)) { | 1615 | if (unlikely(block_dump)) { |
1614 | char b[BDEVNAME_SIZE]; | 1616 | char b[BDEVNAME_SIZE]; |
1615 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | 1617 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", |
1616 | current->comm, task_pid_nr(current), | 1618 | current->comm, task_pid_nr(current), |
1617 | (rw & WRITE) ? "WRITE" : "READ", | 1619 | (rw & WRITE) ? "WRITE" : "READ", |
1618 | (unsigned long long)bio->bi_sector, | 1620 | (unsigned long long)bio->bi_sector, |
1619 | bdevname(bio->bi_bdev, b)); | 1621 | bdevname(bio->bi_bdev, b)); |
1620 | } | 1622 | } |
1621 | } | 1623 | } |
1622 | 1624 | ||
1623 | generic_make_request(bio); | 1625 | generic_make_request(bio); |
1624 | } | 1626 | } |
1625 | EXPORT_SYMBOL(submit_bio); | 1627 | EXPORT_SYMBOL(submit_bio); |
1626 | 1628 | ||
1627 | /** | 1629 | /** |
1628 | * blk_rq_check_limits - Helper function to check a request for the queue limit | 1630 | * blk_rq_check_limits - Helper function to check a request for the queue limit |
1629 | * @q: the queue | 1631 | * @q: the queue |
1630 | * @rq: the request being checked | 1632 | * @rq: the request being checked |
1631 | * | 1633 | * |
1632 | * Description: | 1634 | * Description: |
1633 | * @rq may have been made based on weaker limitations of upper-level queues | 1635 | * @rq may have been made based on weaker limitations of upper-level queues |
1634 | * in request stacking drivers, and it may violate the limitation of @q. | 1636 | * in request stacking drivers, and it may violate the limitation of @q. |
1635 | * Since the block layer and the underlying device driver trust @rq | 1637 | * Since the block layer and the underlying device driver trust @rq |
1636 | * after it is inserted to @q, it should be checked against @q before | 1638 | * after it is inserted to @q, it should be checked against @q before |
1637 | * the insertion using this generic function. | 1639 | * the insertion using this generic function. |
1638 | * | 1640 | * |
1639 | * This function should also be useful for request stacking drivers | 1641 | * This function should also be useful for request stacking drivers |
1640 | * in some cases below, so export this fuction. | 1642 | * in some cases below, so export this fuction. |
1641 | * Request stacking drivers like request-based dm may change the queue | 1643 | * Request stacking drivers like request-based dm may change the queue |
1642 | * limits while requests are in the queue (e.g. dm's table swapping). | 1644 | * limits while requests are in the queue (e.g. dm's table swapping). |
1643 | * Such request stacking drivers should check those requests agaist | 1645 | * Such request stacking drivers should check those requests agaist |
1644 | * the new queue limits again when they dispatch those requests, | 1646 | * the new queue limits again when they dispatch those requests, |
1645 | * although such checkings are also done against the old queue limits | 1647 | * although such checkings are also done against the old queue limits |
1646 | * when submitting requests. | 1648 | * when submitting requests. |
1647 | */ | 1649 | */ |
1648 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) | 1650 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) |
1649 | { | 1651 | { |
1650 | if (rq->cmd_flags & REQ_DISCARD) | 1652 | if (rq->cmd_flags & REQ_DISCARD) |
1651 | return 0; | 1653 | return 0; |
1652 | 1654 | ||
1653 | if (blk_rq_sectors(rq) > queue_max_sectors(q) || | 1655 | if (blk_rq_sectors(rq) > queue_max_sectors(q) || |
1654 | blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { | 1656 | blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { |
1655 | printk(KERN_ERR "%s: over max size limit.\n", __func__); | 1657 | printk(KERN_ERR "%s: over max size limit.\n", __func__); |
1656 | return -EIO; | 1658 | return -EIO; |
1657 | } | 1659 | } |
1658 | 1660 | ||
1659 | /* | 1661 | /* |
1660 | * queue's settings related to segment counting like q->bounce_pfn | 1662 | * queue's settings related to segment counting like q->bounce_pfn |
1661 | * may differ from that of other stacking queues. | 1663 | * may differ from that of other stacking queues. |
1662 | * Recalculate it to check the request correctly on this queue's | 1664 | * Recalculate it to check the request correctly on this queue's |
1663 | * limitation. | 1665 | * limitation. |
1664 | */ | 1666 | */ |
1665 | blk_recalc_rq_segments(rq); | 1667 | blk_recalc_rq_segments(rq); |
1666 | if (rq->nr_phys_segments > queue_max_segments(q)) { | 1668 | if (rq->nr_phys_segments > queue_max_segments(q)) { |
1667 | printk(KERN_ERR "%s: over max segments limit.\n", __func__); | 1669 | printk(KERN_ERR "%s: over max segments limit.\n", __func__); |
1668 | return -EIO; | 1670 | return -EIO; |
1669 | } | 1671 | } |
1670 | 1672 | ||
1671 | return 0; | 1673 | return 0; |
1672 | } | 1674 | } |
1673 | EXPORT_SYMBOL_GPL(blk_rq_check_limits); | 1675 | EXPORT_SYMBOL_GPL(blk_rq_check_limits); |
1674 | 1676 | ||
1675 | /** | 1677 | /** |
1676 | * blk_insert_cloned_request - Helper for stacking drivers to submit a request | 1678 | * blk_insert_cloned_request - Helper for stacking drivers to submit a request |
1677 | * @q: the queue to submit the request | 1679 | * @q: the queue to submit the request |
1678 | * @rq: the request being queued | 1680 | * @rq: the request being queued |
1679 | */ | 1681 | */ |
1680 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | 1682 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) |
1681 | { | 1683 | { |
1682 | unsigned long flags; | 1684 | unsigned long flags; |
1683 | 1685 | ||
1684 | if (blk_rq_check_limits(q, rq)) | 1686 | if (blk_rq_check_limits(q, rq)) |
1685 | return -EIO; | 1687 | return -EIO; |
1686 | 1688 | ||
1687 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 1689 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
1688 | if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && | 1690 | if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && |
1689 | should_fail(&fail_make_request, blk_rq_bytes(rq))) | 1691 | should_fail(&fail_make_request, blk_rq_bytes(rq))) |
1690 | return -EIO; | 1692 | return -EIO; |
1691 | #endif | 1693 | #endif |
1692 | 1694 | ||
1693 | spin_lock_irqsave(q->queue_lock, flags); | 1695 | spin_lock_irqsave(q->queue_lock, flags); |
1694 | 1696 | ||
1695 | /* | 1697 | /* |
1696 | * Submitting request must be dequeued before calling this function | 1698 | * Submitting request must be dequeued before calling this function |
1697 | * because it will be linked to another request_queue | 1699 | * because it will be linked to another request_queue |
1698 | */ | 1700 | */ |
1699 | BUG_ON(blk_queued_rq(rq)); | 1701 | BUG_ON(blk_queued_rq(rq)); |
1700 | 1702 | ||
1701 | drive_stat_acct(rq, 1); | 1703 | drive_stat_acct(rq, 1); |
1702 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | 1704 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); |
1703 | 1705 | ||
1704 | spin_unlock_irqrestore(q->queue_lock, flags); | 1706 | spin_unlock_irqrestore(q->queue_lock, flags); |
1705 | 1707 | ||
1706 | return 0; | 1708 | return 0; |
1707 | } | 1709 | } |
1708 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); | 1710 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); |
1709 | 1711 | ||
1710 | /** | 1712 | /** |
1711 | * blk_rq_err_bytes - determine number of bytes till the next failure boundary | 1713 | * blk_rq_err_bytes - determine number of bytes till the next failure boundary |
1712 | * @rq: request to examine | 1714 | * @rq: request to examine |
1713 | * | 1715 | * |
1714 | * Description: | 1716 | * Description: |
1715 | * A request could be merge of IOs which require different failure | 1717 | * A request could be merge of IOs which require different failure |
1716 | * handling. This function determines the number of bytes which | 1718 | * handling. This function determines the number of bytes which |
1717 | * can be failed from the beginning of the request without | 1719 | * can be failed from the beginning of the request without |
1718 | * crossing into area which need to be retried further. | 1720 | * crossing into area which need to be retried further. |
1719 | * | 1721 | * |
1720 | * Return: | 1722 | * Return: |
1721 | * The number of bytes to fail. | 1723 | * The number of bytes to fail. |
1722 | * | 1724 | * |
1723 | * Context: | 1725 | * Context: |
1724 | * queue_lock must be held. | 1726 | * queue_lock must be held. |
1725 | */ | 1727 | */ |
1726 | unsigned int blk_rq_err_bytes(const struct request *rq) | 1728 | unsigned int blk_rq_err_bytes(const struct request *rq) |
1727 | { | 1729 | { |
1728 | unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; | 1730 | unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; |
1729 | unsigned int bytes = 0; | 1731 | unsigned int bytes = 0; |
1730 | struct bio *bio; | 1732 | struct bio *bio; |
1731 | 1733 | ||
1732 | if (!(rq->cmd_flags & REQ_MIXED_MERGE)) | 1734 | if (!(rq->cmd_flags & REQ_MIXED_MERGE)) |
1733 | return blk_rq_bytes(rq); | 1735 | return blk_rq_bytes(rq); |
1734 | 1736 | ||
1735 | /* | 1737 | /* |
1736 | * Currently the only 'mixing' which can happen is between | 1738 | * Currently the only 'mixing' which can happen is between |
1737 | * different fastfail types. We can safely fail portions | 1739 | * different fastfail types. We can safely fail portions |
1738 | * which have all the failfast bits that the first one has - | 1740 | * which have all the failfast bits that the first one has - |
1739 | * the ones which are at least as eager to fail as the first | 1741 | * the ones which are at least as eager to fail as the first |
1740 | * one. | 1742 | * one. |
1741 | */ | 1743 | */ |
1742 | for (bio = rq->bio; bio; bio = bio->bi_next) { | 1744 | for (bio = rq->bio; bio; bio = bio->bi_next) { |
1743 | if ((bio->bi_rw & ff) != ff) | 1745 | if ((bio->bi_rw & ff) != ff) |
1744 | break; | 1746 | break; |
1745 | bytes += bio->bi_size; | 1747 | bytes += bio->bi_size; |
1746 | } | 1748 | } |
1747 | 1749 | ||
1748 | /* this could lead to infinite loop */ | 1750 | /* this could lead to infinite loop */ |
1749 | BUG_ON(blk_rq_bytes(rq) && !bytes); | 1751 | BUG_ON(blk_rq_bytes(rq) && !bytes); |
1750 | return bytes; | 1752 | return bytes; |
1751 | } | 1753 | } |
1752 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); | 1754 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); |
1753 | 1755 | ||
1754 | static void blk_account_io_completion(struct request *req, unsigned int bytes) | 1756 | static void blk_account_io_completion(struct request *req, unsigned int bytes) |
1755 | { | 1757 | { |
1756 | if (blk_do_io_stat(req)) { | 1758 | if (blk_do_io_stat(req)) { |
1757 | const int rw = rq_data_dir(req); | 1759 | const int rw = rq_data_dir(req); |
1758 | struct hd_struct *part; | 1760 | struct hd_struct *part; |
1759 | int cpu; | 1761 | int cpu; |
1760 | 1762 | ||
1761 | cpu = part_stat_lock(); | 1763 | cpu = part_stat_lock(); |
1762 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 1764 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); |
1763 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); | 1765 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); |
1764 | part_stat_unlock(); | 1766 | part_stat_unlock(); |
1765 | } | 1767 | } |
1766 | } | 1768 | } |
1767 | 1769 | ||
1768 | static void blk_account_io_done(struct request *req) | 1770 | static void blk_account_io_done(struct request *req) |
1769 | { | 1771 | { |
1770 | /* | 1772 | /* |
1771 | * Account IO completion. bar_rq isn't accounted as a normal | 1773 | * Account IO completion. bar_rq isn't accounted as a normal |
1772 | * IO on queueing nor completion. Accounting the containing | 1774 | * IO on queueing nor completion. Accounting the containing |
1773 | * request is enough. | 1775 | * request is enough. |
1774 | */ | 1776 | */ |
1775 | if (blk_do_io_stat(req) && req != &req->q->bar_rq) { | 1777 | if (blk_do_io_stat(req) && req != &req->q->bar_rq) { |
1776 | unsigned long duration = jiffies - req->start_time; | 1778 | unsigned long duration = jiffies - req->start_time; |
1777 | const int rw = rq_data_dir(req); | 1779 | const int rw = rq_data_dir(req); |
1778 | struct hd_struct *part; | 1780 | struct hd_struct *part; |
1779 | int cpu; | 1781 | int cpu; |
1780 | 1782 | ||
1781 | cpu = part_stat_lock(); | 1783 | cpu = part_stat_lock(); |
1782 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 1784 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); |
1783 | 1785 | ||
1784 | part_stat_inc(cpu, part, ios[rw]); | 1786 | part_stat_inc(cpu, part, ios[rw]); |
1785 | part_stat_add(cpu, part, ticks[rw], duration); | 1787 | part_stat_add(cpu, part, ticks[rw], duration); |
1786 | part_round_stats(cpu, part); | 1788 | part_round_stats(cpu, part); |
1787 | part_dec_in_flight(part, rw); | 1789 | part_dec_in_flight(part, rw); |
1788 | 1790 | ||
1789 | part_stat_unlock(); | 1791 | part_stat_unlock(); |
1790 | } | 1792 | } |
1791 | } | 1793 | } |
1792 | 1794 | ||
1793 | /** | 1795 | /** |
1794 | * blk_peek_request - peek at the top of a request queue | 1796 | * blk_peek_request - peek at the top of a request queue |
1795 | * @q: request queue to peek at | 1797 | * @q: request queue to peek at |
1796 | * | 1798 | * |
1797 | * Description: | 1799 | * Description: |
1798 | * Return the request at the top of @q. The returned request | 1800 | * Return the request at the top of @q. The returned request |
1799 | * should be started using blk_start_request() before LLD starts | 1801 | * should be started using blk_start_request() before LLD starts |
1800 | * processing it. | 1802 | * processing it. |
1801 | * | 1803 | * |
1802 | * Return: | 1804 | * Return: |
1803 | * Pointer to the request at the top of @q if available. Null | 1805 | * Pointer to the request at the top of @q if available. Null |
1804 | * otherwise. | 1806 | * otherwise. |
1805 | * | 1807 | * |
1806 | * Context: | 1808 | * Context: |
1807 | * queue_lock must be held. | 1809 | * queue_lock must be held. |
1808 | */ | 1810 | */ |
1809 | struct request *blk_peek_request(struct request_queue *q) | 1811 | struct request *blk_peek_request(struct request_queue *q) |
1810 | { | 1812 | { |
1811 | struct request *rq; | 1813 | struct request *rq; |
1812 | int ret; | 1814 | int ret; |
1813 | 1815 | ||
1814 | while ((rq = __elv_next_request(q)) != NULL) { | 1816 | while ((rq = __elv_next_request(q)) != NULL) { |
1815 | if (!(rq->cmd_flags & REQ_STARTED)) { | 1817 | if (!(rq->cmd_flags & REQ_STARTED)) { |
1816 | /* | 1818 | /* |
1817 | * This is the first time the device driver | 1819 | * This is the first time the device driver |
1818 | * sees this request (possibly after | 1820 | * sees this request (possibly after |
1819 | * requeueing). Notify IO scheduler. | 1821 | * requeueing). Notify IO scheduler. |
1820 | */ | 1822 | */ |
1821 | if (rq->cmd_flags & REQ_SORTED) | 1823 | if (rq->cmd_flags & REQ_SORTED) |
1822 | elv_activate_rq(q, rq); | 1824 | elv_activate_rq(q, rq); |
1823 | 1825 | ||
1824 | /* | 1826 | /* |
1825 | * just mark as started even if we don't start | 1827 | * just mark as started even if we don't start |
1826 | * it, a request that has been delayed should | 1828 | * it, a request that has been delayed should |
1827 | * not be passed by new incoming requests | 1829 | * not be passed by new incoming requests |
1828 | */ | 1830 | */ |
1829 | rq->cmd_flags |= REQ_STARTED; | 1831 | rq->cmd_flags |= REQ_STARTED; |
1830 | trace_block_rq_issue(q, rq); | 1832 | trace_block_rq_issue(q, rq); |
1831 | } | 1833 | } |
1832 | 1834 | ||
1833 | if (!q->boundary_rq || q->boundary_rq == rq) { | 1835 | if (!q->boundary_rq || q->boundary_rq == rq) { |
1834 | q->end_sector = rq_end_sector(rq); | 1836 | q->end_sector = rq_end_sector(rq); |
1835 | q->boundary_rq = NULL; | 1837 | q->boundary_rq = NULL; |
1836 | } | 1838 | } |
1837 | 1839 | ||
1838 | if (rq->cmd_flags & REQ_DONTPREP) | 1840 | if (rq->cmd_flags & REQ_DONTPREP) |
1839 | break; | 1841 | break; |
1840 | 1842 | ||
1841 | if (q->dma_drain_size && blk_rq_bytes(rq)) { | 1843 | if (q->dma_drain_size && blk_rq_bytes(rq)) { |
1842 | /* | 1844 | /* |
1843 | * make sure space for the drain appears we | 1845 | * make sure space for the drain appears we |
1844 | * know we can do this because max_hw_segments | 1846 | * know we can do this because max_hw_segments |
1845 | * has been adjusted to be one fewer than the | 1847 | * has been adjusted to be one fewer than the |
1846 | * device can handle | 1848 | * device can handle |
1847 | */ | 1849 | */ |
1848 | rq->nr_phys_segments++; | 1850 | rq->nr_phys_segments++; |
1849 | } | 1851 | } |
1850 | 1852 | ||
1851 | if (!q->prep_rq_fn) | 1853 | if (!q->prep_rq_fn) |
1852 | break; | 1854 | break; |
1853 | 1855 | ||
1854 | ret = q->prep_rq_fn(q, rq); | 1856 | ret = q->prep_rq_fn(q, rq); |
1855 | if (ret == BLKPREP_OK) { | 1857 | if (ret == BLKPREP_OK) { |
1856 | break; | 1858 | break; |
1857 | } else if (ret == BLKPREP_DEFER) { | 1859 | } else if (ret == BLKPREP_DEFER) { |
1858 | /* | 1860 | /* |
1859 | * the request may have been (partially) prepped. | 1861 | * the request may have been (partially) prepped. |
1860 | * we need to keep this request in the front to | 1862 | * we need to keep this request in the front to |
1861 | * avoid resource deadlock. REQ_STARTED will | 1863 | * avoid resource deadlock. REQ_STARTED will |
1862 | * prevent other fs requests from passing this one. | 1864 | * prevent other fs requests from passing this one. |
1863 | */ | 1865 | */ |
1864 | if (q->dma_drain_size && blk_rq_bytes(rq) && | 1866 | if (q->dma_drain_size && blk_rq_bytes(rq) && |
1865 | !(rq->cmd_flags & REQ_DONTPREP)) { | 1867 | !(rq->cmd_flags & REQ_DONTPREP)) { |
1866 | /* | 1868 | /* |
1867 | * remove the space for the drain we added | 1869 | * remove the space for the drain we added |
1868 | * so that we don't add it again | 1870 | * so that we don't add it again |
1869 | */ | 1871 | */ |
1870 | --rq->nr_phys_segments; | 1872 | --rq->nr_phys_segments; |
1871 | } | 1873 | } |
1872 | 1874 | ||
1873 | rq = NULL; | 1875 | rq = NULL; |
1874 | break; | 1876 | break; |
1875 | } else if (ret == BLKPREP_KILL) { | 1877 | } else if (ret == BLKPREP_KILL) { |
1876 | rq->cmd_flags |= REQ_QUIET; | 1878 | rq->cmd_flags |= REQ_QUIET; |
1877 | /* | 1879 | /* |
1878 | * Mark this request as started so we don't trigger | 1880 | * Mark this request as started so we don't trigger |
1879 | * any debug logic in the end I/O path. | 1881 | * any debug logic in the end I/O path. |
1880 | */ | 1882 | */ |
1881 | blk_start_request(rq); | 1883 | blk_start_request(rq); |
1882 | __blk_end_request_all(rq, -EIO); | 1884 | __blk_end_request_all(rq, -EIO); |
1883 | } else { | 1885 | } else { |
1884 | printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); | 1886 | printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); |
1885 | break; | 1887 | break; |
1886 | } | 1888 | } |
1887 | } | 1889 | } |
1888 | 1890 | ||
1889 | return rq; | 1891 | return rq; |
1890 | } | 1892 | } |
1891 | EXPORT_SYMBOL(blk_peek_request); | 1893 | EXPORT_SYMBOL(blk_peek_request); |
1892 | 1894 | ||
1893 | void blk_dequeue_request(struct request *rq) | 1895 | void blk_dequeue_request(struct request *rq) |
1894 | { | 1896 | { |
1895 | struct request_queue *q = rq->q; | 1897 | struct request_queue *q = rq->q; |
1896 | 1898 | ||
1897 | BUG_ON(list_empty(&rq->queuelist)); | 1899 | BUG_ON(list_empty(&rq->queuelist)); |
1898 | BUG_ON(ELV_ON_HASH(rq)); | 1900 | BUG_ON(ELV_ON_HASH(rq)); |
1899 | 1901 | ||
1900 | list_del_init(&rq->queuelist); | 1902 | list_del_init(&rq->queuelist); |
1901 | 1903 | ||
1902 | /* | 1904 | /* |
1903 | * the time frame between a request being removed from the lists | 1905 | * the time frame between a request being removed from the lists |
1904 | * and to it is freed is accounted as io that is in progress at | 1906 | * and to it is freed is accounted as io that is in progress at |
1905 | * the driver side. | 1907 | * the driver side. |
1906 | */ | 1908 | */ |
1907 | if (blk_account_rq(rq)) { | 1909 | if (blk_account_rq(rq)) { |
1908 | q->in_flight[rq_is_sync(rq)]++; | 1910 | q->in_flight[rq_is_sync(rq)]++; |
1909 | set_io_start_time_ns(rq); | 1911 | set_io_start_time_ns(rq); |
1910 | } | 1912 | } |
1911 | } | 1913 | } |
1912 | 1914 | ||
1913 | /** | 1915 | /** |
1914 | * blk_start_request - start request processing on the driver | 1916 | * blk_start_request - start request processing on the driver |
1915 | * @req: request to dequeue | 1917 | * @req: request to dequeue |
1916 | * | 1918 | * |
1917 | * Description: | 1919 | * Description: |
1918 | * Dequeue @req and start timeout timer on it. This hands off the | 1920 | * Dequeue @req and start timeout timer on it. This hands off the |
1919 | * request to the driver. | 1921 | * request to the driver. |
1920 | * | 1922 | * |
1921 | * Block internal functions which don't want to start timer should | 1923 | * Block internal functions which don't want to start timer should |
1922 | * call blk_dequeue_request(). | 1924 | * call blk_dequeue_request(). |
1923 | * | 1925 | * |
1924 | * Context: | 1926 | * Context: |
1925 | * queue_lock must be held. | 1927 | * queue_lock must be held. |
1926 | */ | 1928 | */ |
1927 | void blk_start_request(struct request *req) | 1929 | void blk_start_request(struct request *req) |
1928 | { | 1930 | { |
1929 | blk_dequeue_request(req); | 1931 | blk_dequeue_request(req); |
1930 | 1932 | ||
1931 | /* | 1933 | /* |
1932 | * We are now handing the request to the hardware, initialize | 1934 | * We are now handing the request to the hardware, initialize |
1933 | * resid_len to full count and add the timeout handler. | 1935 | * resid_len to full count and add the timeout handler. |
1934 | */ | 1936 | */ |
1935 | req->resid_len = blk_rq_bytes(req); | 1937 | req->resid_len = blk_rq_bytes(req); |
1936 | if (unlikely(blk_bidi_rq(req))) | 1938 | if (unlikely(blk_bidi_rq(req))) |
1937 | req->next_rq->resid_len = blk_rq_bytes(req->next_rq); | 1939 | req->next_rq->resid_len = blk_rq_bytes(req->next_rq); |
1938 | 1940 | ||
1939 | blk_add_timer(req); | 1941 | blk_add_timer(req); |
1940 | } | 1942 | } |
1941 | EXPORT_SYMBOL(blk_start_request); | 1943 | EXPORT_SYMBOL(blk_start_request); |
1942 | 1944 | ||
1943 | /** | 1945 | /** |
1944 | * blk_fetch_request - fetch a request from a request queue | 1946 | * blk_fetch_request - fetch a request from a request queue |
1945 | * @q: request queue to fetch a request from | 1947 | * @q: request queue to fetch a request from |
1946 | * | 1948 | * |
1947 | * Description: | 1949 | * Description: |
1948 | * Return the request at the top of @q. The request is started on | 1950 | * Return the request at the top of @q. The request is started on |
1949 | * return and LLD can start processing it immediately. | 1951 | * return and LLD can start processing it immediately. |
1950 | * | 1952 | * |
1951 | * Return: | 1953 | * Return: |
1952 | * Pointer to the request at the top of @q if available. Null | 1954 | * Pointer to the request at the top of @q if available. Null |
1953 | * otherwise. | 1955 | * otherwise. |
1954 | * | 1956 | * |
1955 | * Context: | 1957 | * Context: |
1956 | * queue_lock must be held. | 1958 | * queue_lock must be held. |
1957 | */ | 1959 | */ |
1958 | struct request *blk_fetch_request(struct request_queue *q) | 1960 | struct request *blk_fetch_request(struct request_queue *q) |
1959 | { | 1961 | { |
1960 | struct request *rq; | 1962 | struct request *rq; |
1961 | 1963 | ||
1962 | rq = blk_peek_request(q); | 1964 | rq = blk_peek_request(q); |
1963 | if (rq) | 1965 | if (rq) |
1964 | blk_start_request(rq); | 1966 | blk_start_request(rq); |
1965 | return rq; | 1967 | return rq; |
1966 | } | 1968 | } |
1967 | EXPORT_SYMBOL(blk_fetch_request); | 1969 | EXPORT_SYMBOL(blk_fetch_request); |
1968 | 1970 | ||
1969 | /** | 1971 | /** |
1970 | * blk_update_request - Special helper function for request stacking drivers | 1972 | * blk_update_request - Special helper function for request stacking drivers |
1971 | * @req: the request being processed | 1973 | * @req: the request being processed |
1972 | * @error: %0 for success, < %0 for error | 1974 | * @error: %0 for success, < %0 for error |
1973 | * @nr_bytes: number of bytes to complete @req | 1975 | * @nr_bytes: number of bytes to complete @req |
1974 | * | 1976 | * |
1975 | * Description: | 1977 | * Description: |
1976 | * Ends I/O on a number of bytes attached to @req, but doesn't complete | 1978 | * Ends I/O on a number of bytes attached to @req, but doesn't complete |
1977 | * the request structure even if @req doesn't have leftover. | 1979 | * the request structure even if @req doesn't have leftover. |
1978 | * If @req has leftover, sets it up for the next range of segments. | 1980 | * If @req has leftover, sets it up for the next range of segments. |
1979 | * | 1981 | * |
1980 | * This special helper function is only for request stacking drivers | 1982 | * This special helper function is only for request stacking drivers |
1981 | * (e.g. request-based dm) so that they can handle partial completion. | 1983 | * (e.g. request-based dm) so that they can handle partial completion. |
1982 | * Actual device drivers should use blk_end_request instead. | 1984 | * Actual device drivers should use blk_end_request instead. |
1983 | * | 1985 | * |
1984 | * Passing the result of blk_rq_bytes() as @nr_bytes guarantees | 1986 | * Passing the result of blk_rq_bytes() as @nr_bytes guarantees |
1985 | * %false return from this function. | 1987 | * %false return from this function. |
1986 | * | 1988 | * |
1987 | * Return: | 1989 | * Return: |
1988 | * %false - this request doesn't have any more data | 1990 | * %false - this request doesn't have any more data |
1989 | * %true - this request has more data | 1991 | * %true - this request has more data |
1990 | **/ | 1992 | **/ |
1991 | bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | 1993 | bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) |
1992 | { | 1994 | { |
1993 | int total_bytes, bio_nbytes, next_idx = 0; | 1995 | int total_bytes, bio_nbytes, next_idx = 0; |
1994 | struct bio *bio; | 1996 | struct bio *bio; |
1995 | 1997 | ||
1996 | if (!req->bio) | 1998 | if (!req->bio) |
1997 | return false; | 1999 | return false; |
1998 | 2000 | ||
1999 | trace_block_rq_complete(req->q, req); | 2001 | trace_block_rq_complete(req->q, req); |
2000 | 2002 | ||
2001 | /* | 2003 | /* |
2002 | * For fs requests, rq is just carrier of independent bio's | 2004 | * For fs requests, rq is just carrier of independent bio's |
2003 | * and each partial completion should be handled separately. | 2005 | * and each partial completion should be handled separately. |
2004 | * Reset per-request error on each partial completion. | 2006 | * Reset per-request error on each partial completion. |
2005 | * | 2007 | * |
2006 | * TODO: tj: This is too subtle. It would be better to let | 2008 | * TODO: tj: This is too subtle. It would be better to let |
2007 | * low level drivers do what they see fit. | 2009 | * low level drivers do what they see fit. |
2008 | */ | 2010 | */ |
2009 | if (req->cmd_type == REQ_TYPE_FS) | 2011 | if (req->cmd_type == REQ_TYPE_FS) |
2010 | req->errors = 0; | 2012 | req->errors = 0; |
2011 | 2013 | ||
2012 | if (error && req->cmd_type == REQ_TYPE_FS && | 2014 | if (error && req->cmd_type == REQ_TYPE_FS && |
2013 | !(req->cmd_flags & REQ_QUIET)) { | 2015 | !(req->cmd_flags & REQ_QUIET)) { |
2014 | printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", | 2016 | printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", |
2015 | req->rq_disk ? req->rq_disk->disk_name : "?", | 2017 | req->rq_disk ? req->rq_disk->disk_name : "?", |
2016 | (unsigned long long)blk_rq_pos(req)); | 2018 | (unsigned long long)blk_rq_pos(req)); |
2017 | } | 2019 | } |
2018 | 2020 | ||
2019 | blk_account_io_completion(req, nr_bytes); | 2021 | blk_account_io_completion(req, nr_bytes); |
2020 | 2022 | ||
2021 | total_bytes = bio_nbytes = 0; | 2023 | total_bytes = bio_nbytes = 0; |
2022 | while ((bio = req->bio) != NULL) { | 2024 | while ((bio = req->bio) != NULL) { |
2023 | int nbytes; | 2025 | int nbytes; |
2024 | 2026 | ||
2025 | if (nr_bytes >= bio->bi_size) { | 2027 | if (nr_bytes >= bio->bi_size) { |
2026 | req->bio = bio->bi_next; | 2028 | req->bio = bio->bi_next; |
2027 | nbytes = bio->bi_size; | 2029 | nbytes = bio->bi_size; |
2028 | req_bio_endio(req, bio, nbytes, error); | 2030 | req_bio_endio(req, bio, nbytes, error); |
2029 | next_idx = 0; | 2031 | next_idx = 0; |
2030 | bio_nbytes = 0; | 2032 | bio_nbytes = 0; |
2031 | } else { | 2033 | } else { |
2032 | int idx = bio->bi_idx + next_idx; | 2034 | int idx = bio->bi_idx + next_idx; |
2033 | 2035 | ||
2034 | if (unlikely(idx >= bio->bi_vcnt)) { | 2036 | if (unlikely(idx >= bio->bi_vcnt)) { |
2035 | blk_dump_rq_flags(req, "__end_that"); | 2037 | blk_dump_rq_flags(req, "__end_that"); |
2036 | printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", | 2038 | printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", |
2037 | __func__, idx, bio->bi_vcnt); | 2039 | __func__, idx, bio->bi_vcnt); |
2038 | break; | 2040 | break; |
2039 | } | 2041 | } |
2040 | 2042 | ||
2041 | nbytes = bio_iovec_idx(bio, idx)->bv_len; | 2043 | nbytes = bio_iovec_idx(bio, idx)->bv_len; |
2042 | BIO_BUG_ON(nbytes > bio->bi_size); | 2044 | BIO_BUG_ON(nbytes > bio->bi_size); |
2043 | 2045 | ||
2044 | /* | 2046 | /* |
2045 | * not a complete bvec done | 2047 | * not a complete bvec done |
2046 | */ | 2048 | */ |
2047 | if (unlikely(nbytes > nr_bytes)) { | 2049 | if (unlikely(nbytes > nr_bytes)) { |
2048 | bio_nbytes += nr_bytes; | 2050 | bio_nbytes += nr_bytes; |
2049 | total_bytes += nr_bytes; | 2051 | total_bytes += nr_bytes; |
2050 | break; | 2052 | break; |
2051 | } | 2053 | } |
2052 | 2054 | ||
2053 | /* | 2055 | /* |
2054 | * advance to the next vector | 2056 | * advance to the next vector |
2055 | */ | 2057 | */ |
2056 | next_idx++; | 2058 | next_idx++; |
2057 | bio_nbytes += nbytes; | 2059 | bio_nbytes += nbytes; |
2058 | } | 2060 | } |
2059 | 2061 | ||
2060 | total_bytes += nbytes; | 2062 | total_bytes += nbytes; |
2061 | nr_bytes -= nbytes; | 2063 | nr_bytes -= nbytes; |
2062 | 2064 | ||
2063 | bio = req->bio; | 2065 | bio = req->bio; |
2064 | if (bio) { | 2066 | if (bio) { |
2065 | /* | 2067 | /* |
2066 | * end more in this run, or just return 'not-done' | 2068 | * end more in this run, or just return 'not-done' |
2067 | */ | 2069 | */ |
2068 | if (unlikely(nr_bytes <= 0)) | 2070 | if (unlikely(nr_bytes <= 0)) |
2069 | break; | 2071 | break; |
2070 | } | 2072 | } |
2071 | } | 2073 | } |
2072 | 2074 | ||
2073 | /* | 2075 | /* |
2074 | * completely done | 2076 | * completely done |
2075 | */ | 2077 | */ |
2076 | if (!req->bio) { | 2078 | if (!req->bio) { |
2077 | /* | 2079 | /* |
2078 | * Reset counters so that the request stacking driver | 2080 | * Reset counters so that the request stacking driver |
2079 | * can find how many bytes remain in the request | 2081 | * can find how many bytes remain in the request |
2080 | * later. | 2082 | * later. |
2081 | */ | 2083 | */ |
2082 | req->__data_len = 0; | 2084 | req->__data_len = 0; |
2083 | return false; | 2085 | return false; |
2084 | } | 2086 | } |
2085 | 2087 | ||
2086 | /* | 2088 | /* |
2087 | * if the request wasn't completed, update state | 2089 | * if the request wasn't completed, update state |
2088 | */ | 2090 | */ |
2089 | if (bio_nbytes) { | 2091 | if (bio_nbytes) { |
2090 | req_bio_endio(req, bio, bio_nbytes, error); | 2092 | req_bio_endio(req, bio, bio_nbytes, error); |
2091 | bio->bi_idx += next_idx; | 2093 | bio->bi_idx += next_idx; |
2092 | bio_iovec(bio)->bv_offset += nr_bytes; | 2094 | bio_iovec(bio)->bv_offset += nr_bytes; |
2093 | bio_iovec(bio)->bv_len -= nr_bytes; | 2095 | bio_iovec(bio)->bv_len -= nr_bytes; |
2094 | } | 2096 | } |
2095 | 2097 | ||
2096 | req->__data_len -= total_bytes; | 2098 | req->__data_len -= total_bytes; |
2097 | req->buffer = bio_data(req->bio); | 2099 | req->buffer = bio_data(req->bio); |
2098 | 2100 | ||
2099 | /* update sector only for requests with clear definition of sector */ | 2101 | /* update sector only for requests with clear definition of sector */ |
2100 | if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) | 2102 | if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) |
2101 | req->__sector += total_bytes >> 9; | 2103 | req->__sector += total_bytes >> 9; |
2102 | 2104 | ||
2103 | /* mixed attributes always follow the first bio */ | 2105 | /* mixed attributes always follow the first bio */ |
2104 | if (req->cmd_flags & REQ_MIXED_MERGE) { | 2106 | if (req->cmd_flags & REQ_MIXED_MERGE) { |
2105 | req->cmd_flags &= ~REQ_FAILFAST_MASK; | 2107 | req->cmd_flags &= ~REQ_FAILFAST_MASK; |
2106 | req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; | 2108 | req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; |
2107 | } | 2109 | } |
2108 | 2110 | ||
2109 | /* | 2111 | /* |
2110 | * If total number of sectors is less than the first segment | 2112 | * If total number of sectors is less than the first segment |
2111 | * size, something has gone terribly wrong. | 2113 | * size, something has gone terribly wrong. |
2112 | */ | 2114 | */ |
2113 | if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { | 2115 | if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { |
2114 | printk(KERN_ERR "blk: request botched\n"); | 2116 | printk(KERN_ERR "blk: request botched\n"); |
2115 | req->__data_len = blk_rq_cur_bytes(req); | 2117 | req->__data_len = blk_rq_cur_bytes(req); |
2116 | } | 2118 | } |
2117 | 2119 | ||
2118 | /* recalculate the number of segments */ | 2120 | /* recalculate the number of segments */ |
2119 | blk_recalc_rq_segments(req); | 2121 | blk_recalc_rq_segments(req); |
2120 | 2122 | ||
2121 | return true; | 2123 | return true; |
2122 | } | 2124 | } |
2123 | EXPORT_SYMBOL_GPL(blk_update_request); | 2125 | EXPORT_SYMBOL_GPL(blk_update_request); |
2124 | 2126 | ||
2125 | static bool blk_update_bidi_request(struct request *rq, int error, | 2127 | static bool blk_update_bidi_request(struct request *rq, int error, |
2126 | unsigned int nr_bytes, | 2128 | unsigned int nr_bytes, |
2127 | unsigned int bidi_bytes) | 2129 | unsigned int bidi_bytes) |
2128 | { | 2130 | { |
2129 | if (blk_update_request(rq, error, nr_bytes)) | 2131 | if (blk_update_request(rq, error, nr_bytes)) |
2130 | return true; | 2132 | return true; |
2131 | 2133 | ||
2132 | /* Bidi request must be completed as a whole */ | 2134 | /* Bidi request must be completed as a whole */ |
2133 | if (unlikely(blk_bidi_rq(rq)) && | 2135 | if (unlikely(blk_bidi_rq(rq)) && |
2134 | blk_update_request(rq->next_rq, error, bidi_bytes)) | 2136 | blk_update_request(rq->next_rq, error, bidi_bytes)) |
2135 | return true; | 2137 | return true; |
2136 | 2138 | ||
2137 | if (blk_queue_add_random(rq->q)) | 2139 | if (blk_queue_add_random(rq->q)) |
2138 | add_disk_randomness(rq->rq_disk); | 2140 | add_disk_randomness(rq->rq_disk); |
2139 | 2141 | ||
2140 | return false; | 2142 | return false; |
2141 | } | 2143 | } |
2142 | 2144 | ||
2143 | /** | 2145 | /** |
2144 | * blk_unprep_request - unprepare a request | 2146 | * blk_unprep_request - unprepare a request |
2145 | * @req: the request | 2147 | * @req: the request |
2146 | * | 2148 | * |
2147 | * This function makes a request ready for complete resubmission (or | 2149 | * This function makes a request ready for complete resubmission (or |
2148 | * completion). It happens only after all error handling is complete, | 2150 | * completion). It happens only after all error handling is complete, |
2149 | * so represents the appropriate moment to deallocate any resources | 2151 | * so represents the appropriate moment to deallocate any resources |
2150 | * that were allocated to the request in the prep_rq_fn. The queue | 2152 | * that were allocated to the request in the prep_rq_fn. The queue |
2151 | * lock is held when calling this. | 2153 | * lock is held when calling this. |
2152 | */ | 2154 | */ |
2153 | void blk_unprep_request(struct request *req) | 2155 | void blk_unprep_request(struct request *req) |
2154 | { | 2156 | { |
2155 | struct request_queue *q = req->q; | 2157 | struct request_queue *q = req->q; |
2156 | 2158 | ||
2157 | req->cmd_flags &= ~REQ_DONTPREP; | 2159 | req->cmd_flags &= ~REQ_DONTPREP; |
2158 | if (q->unprep_rq_fn) | 2160 | if (q->unprep_rq_fn) |
2159 | q->unprep_rq_fn(q, req); | 2161 | q->unprep_rq_fn(q, req); |
2160 | } | 2162 | } |
2161 | EXPORT_SYMBOL_GPL(blk_unprep_request); | 2163 | EXPORT_SYMBOL_GPL(blk_unprep_request); |
2162 | 2164 | ||
2163 | /* | 2165 | /* |
2164 | * queue lock must be held | 2166 | * queue lock must be held |
2165 | */ | 2167 | */ |
2166 | static void blk_finish_request(struct request *req, int error) | 2168 | static void blk_finish_request(struct request *req, int error) |
2167 | { | 2169 | { |
2168 | if (blk_rq_tagged(req)) | 2170 | if (blk_rq_tagged(req)) |
2169 | blk_queue_end_tag(req->q, req); | 2171 | blk_queue_end_tag(req->q, req); |
2170 | 2172 | ||
2171 | BUG_ON(blk_queued_rq(req)); | 2173 | BUG_ON(blk_queued_rq(req)); |
2172 | 2174 | ||
2173 | if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) | 2175 | if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) |
2174 | laptop_io_completion(&req->q->backing_dev_info); | 2176 | laptop_io_completion(&req->q->backing_dev_info); |
2175 | 2177 | ||
2176 | blk_delete_timer(req); | 2178 | blk_delete_timer(req); |
2177 | 2179 | ||
2178 | if (req->cmd_flags & REQ_DONTPREP) | 2180 | if (req->cmd_flags & REQ_DONTPREP) |
2179 | blk_unprep_request(req); | 2181 | blk_unprep_request(req); |
2180 | 2182 | ||
2181 | 2183 | ||
2182 | blk_account_io_done(req); | 2184 | blk_account_io_done(req); |
2183 | 2185 | ||
2184 | if (req->end_io) | 2186 | if (req->end_io) |
2185 | req->end_io(req, error); | 2187 | req->end_io(req, error); |
2186 | else { | 2188 | else { |
2187 | if (blk_bidi_rq(req)) | 2189 | if (blk_bidi_rq(req)) |
2188 | __blk_put_request(req->next_rq->q, req->next_rq); | 2190 | __blk_put_request(req->next_rq->q, req->next_rq); |
2189 | 2191 | ||
2190 | __blk_put_request(req->q, req); | 2192 | __blk_put_request(req->q, req); |
2191 | } | 2193 | } |
2192 | } | 2194 | } |
2193 | 2195 | ||
2194 | /** | 2196 | /** |
2195 | * blk_end_bidi_request - Complete a bidi request | 2197 | * blk_end_bidi_request - Complete a bidi request |
2196 | * @rq: the request to complete | 2198 | * @rq: the request to complete |
2197 | * @error: %0 for success, < %0 for error | 2199 | * @error: %0 for success, < %0 for error |
2198 | * @nr_bytes: number of bytes to complete @rq | 2200 | * @nr_bytes: number of bytes to complete @rq |
2199 | * @bidi_bytes: number of bytes to complete @rq->next_rq | 2201 | * @bidi_bytes: number of bytes to complete @rq->next_rq |
2200 | * | 2202 | * |
2201 | * Description: | 2203 | * Description: |
2202 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | 2204 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. |
2203 | * Drivers that supports bidi can safely call this member for any | 2205 | * Drivers that supports bidi can safely call this member for any |
2204 | * type of request, bidi or uni. In the later case @bidi_bytes is | 2206 | * type of request, bidi or uni. In the later case @bidi_bytes is |
2205 | * just ignored. | 2207 | * just ignored. |
2206 | * | 2208 | * |
2207 | * Return: | 2209 | * Return: |
2208 | * %false - we are done with this request | 2210 | * %false - we are done with this request |
2209 | * %true - still buffers pending for this request | 2211 | * %true - still buffers pending for this request |
2210 | **/ | 2212 | **/ |
2211 | static bool blk_end_bidi_request(struct request *rq, int error, | 2213 | static bool blk_end_bidi_request(struct request *rq, int error, |
2212 | unsigned int nr_bytes, unsigned int bidi_bytes) | 2214 | unsigned int nr_bytes, unsigned int bidi_bytes) |
2213 | { | 2215 | { |
2214 | struct request_queue *q = rq->q; | 2216 | struct request_queue *q = rq->q; |
2215 | unsigned long flags; | 2217 | unsigned long flags; |
2216 | 2218 | ||
2217 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) | 2219 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
2218 | return true; | 2220 | return true; |
2219 | 2221 | ||
2220 | spin_lock_irqsave(q->queue_lock, flags); | 2222 | spin_lock_irqsave(q->queue_lock, flags); |
2221 | blk_finish_request(rq, error); | 2223 | blk_finish_request(rq, error); |
2222 | spin_unlock_irqrestore(q->queue_lock, flags); | 2224 | spin_unlock_irqrestore(q->queue_lock, flags); |
2223 | 2225 | ||
2224 | return false; | 2226 | return false; |
2225 | } | 2227 | } |
2226 | 2228 | ||
2227 | /** | 2229 | /** |
2228 | * __blk_end_bidi_request - Complete a bidi request with queue lock held | 2230 | * __blk_end_bidi_request - Complete a bidi request with queue lock held |
2229 | * @rq: the request to complete | 2231 | * @rq: the request to complete |
2230 | * @error: %0 for success, < %0 for error | 2232 | * @error: %0 for success, < %0 for error |
2231 | * @nr_bytes: number of bytes to complete @rq | 2233 | * @nr_bytes: number of bytes to complete @rq |
2232 | * @bidi_bytes: number of bytes to complete @rq->next_rq | 2234 | * @bidi_bytes: number of bytes to complete @rq->next_rq |
2233 | * | 2235 | * |
2234 | * Description: | 2236 | * Description: |
2235 | * Identical to blk_end_bidi_request() except that queue lock is | 2237 | * Identical to blk_end_bidi_request() except that queue lock is |
2236 | * assumed to be locked on entry and remains so on return. | 2238 | * assumed to be locked on entry and remains so on return. |
2237 | * | 2239 | * |
2238 | * Return: | 2240 | * Return: |
2239 | * %false - we are done with this request | 2241 | * %false - we are done with this request |
2240 | * %true - still buffers pending for this request | 2242 | * %true - still buffers pending for this request |
2241 | **/ | 2243 | **/ |
2242 | static bool __blk_end_bidi_request(struct request *rq, int error, | 2244 | static bool __blk_end_bidi_request(struct request *rq, int error, |
2243 | unsigned int nr_bytes, unsigned int bidi_bytes) | 2245 | unsigned int nr_bytes, unsigned int bidi_bytes) |
2244 | { | 2246 | { |
2245 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) | 2247 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
2246 | return true; | 2248 | return true; |
2247 | 2249 | ||
2248 | blk_finish_request(rq, error); | 2250 | blk_finish_request(rq, error); |
2249 | 2251 | ||
2250 | return false; | 2252 | return false; |
2251 | } | 2253 | } |
2252 | 2254 | ||
2253 | /** | 2255 | /** |
2254 | * blk_end_request - Helper function for drivers to complete the request. | 2256 | * blk_end_request - Helper function for drivers to complete the request. |
2255 | * @rq: the request being processed | 2257 | * @rq: the request being processed |
2256 | * @error: %0 for success, < %0 for error | 2258 | * @error: %0 for success, < %0 for error |
2257 | * @nr_bytes: number of bytes to complete | 2259 | * @nr_bytes: number of bytes to complete |
2258 | * | 2260 | * |
2259 | * Description: | 2261 | * Description: |
2260 | * Ends I/O on a number of bytes attached to @rq. | 2262 | * Ends I/O on a number of bytes attached to @rq. |
2261 | * If @rq has leftover, sets it up for the next range of segments. | 2263 | * If @rq has leftover, sets it up for the next range of segments. |
2262 | * | 2264 | * |
2263 | * Return: | 2265 | * Return: |
2264 | * %false - we are done with this request | 2266 | * %false - we are done with this request |
2265 | * %true - still buffers pending for this request | 2267 | * %true - still buffers pending for this request |
2266 | **/ | 2268 | **/ |
2267 | bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | 2269 | bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
2268 | { | 2270 | { |
2269 | return blk_end_bidi_request(rq, error, nr_bytes, 0); | 2271 | return blk_end_bidi_request(rq, error, nr_bytes, 0); |
2270 | } | 2272 | } |
2271 | EXPORT_SYMBOL(blk_end_request); | 2273 | EXPORT_SYMBOL(blk_end_request); |
2272 | 2274 | ||
2273 | /** | 2275 | /** |
2274 | * blk_end_request_all - Helper function for drives to finish the request. | 2276 | * blk_end_request_all - Helper function for drives to finish the request. |
2275 | * @rq: the request to finish | 2277 | * @rq: the request to finish |
2276 | * @error: %0 for success, < %0 for error | 2278 | * @error: %0 for success, < %0 for error |
2277 | * | 2279 | * |
2278 | * Description: | 2280 | * Description: |
2279 | * Completely finish @rq. | 2281 | * Completely finish @rq. |
2280 | */ | 2282 | */ |
2281 | void blk_end_request_all(struct request *rq, int error) | 2283 | void blk_end_request_all(struct request *rq, int error) |
2282 | { | 2284 | { |
2283 | bool pending; | 2285 | bool pending; |
2284 | unsigned int bidi_bytes = 0; | 2286 | unsigned int bidi_bytes = 0; |
2285 | 2287 | ||
2286 | if (unlikely(blk_bidi_rq(rq))) | 2288 | if (unlikely(blk_bidi_rq(rq))) |
2287 | bidi_bytes = blk_rq_bytes(rq->next_rq); | 2289 | bidi_bytes = blk_rq_bytes(rq->next_rq); |
2288 | 2290 | ||
2289 | pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); | 2291 | pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
2290 | BUG_ON(pending); | 2292 | BUG_ON(pending); |
2291 | } | 2293 | } |
2292 | EXPORT_SYMBOL(blk_end_request_all); | 2294 | EXPORT_SYMBOL(blk_end_request_all); |
2293 | 2295 | ||
2294 | /** | 2296 | /** |
2295 | * blk_end_request_cur - Helper function to finish the current request chunk. | 2297 | * blk_end_request_cur - Helper function to finish the current request chunk. |
2296 | * @rq: the request to finish the current chunk for | 2298 | * @rq: the request to finish the current chunk for |
2297 | * @error: %0 for success, < %0 for error | 2299 | * @error: %0 for success, < %0 for error |
2298 | * | 2300 | * |
2299 | * Description: | 2301 | * Description: |
2300 | * Complete the current consecutively mapped chunk from @rq. | 2302 | * Complete the current consecutively mapped chunk from @rq. |
2301 | * | 2303 | * |
2302 | * Return: | 2304 | * Return: |
2303 | * %false - we are done with this request | 2305 | * %false - we are done with this request |
2304 | * %true - still buffers pending for this request | 2306 | * %true - still buffers pending for this request |
2305 | */ | 2307 | */ |
2306 | bool blk_end_request_cur(struct request *rq, int error) | 2308 | bool blk_end_request_cur(struct request *rq, int error) |
2307 | { | 2309 | { |
2308 | return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); | 2310 | return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); |
2309 | } | 2311 | } |
2310 | EXPORT_SYMBOL(blk_end_request_cur); | 2312 | EXPORT_SYMBOL(blk_end_request_cur); |
2311 | 2313 | ||
2312 | /** | 2314 | /** |
2313 | * blk_end_request_err - Finish a request till the next failure boundary. | 2315 | * blk_end_request_err - Finish a request till the next failure boundary. |
2314 | * @rq: the request to finish till the next failure boundary for | 2316 | * @rq: the request to finish till the next failure boundary for |
2315 | * @error: must be negative errno | 2317 | * @error: must be negative errno |
2316 | * | 2318 | * |
2317 | * Description: | 2319 | * Description: |
2318 | * Complete @rq till the next failure boundary. | 2320 | * Complete @rq till the next failure boundary. |
2319 | * | 2321 | * |
2320 | * Return: | 2322 | * Return: |
2321 | * %false - we are done with this request | 2323 | * %false - we are done with this request |
2322 | * %true - still buffers pending for this request | 2324 | * %true - still buffers pending for this request |
2323 | */ | 2325 | */ |
2324 | bool blk_end_request_err(struct request *rq, int error) | 2326 | bool blk_end_request_err(struct request *rq, int error) |
2325 | { | 2327 | { |
2326 | WARN_ON(error >= 0); | 2328 | WARN_ON(error >= 0); |
2327 | return blk_end_request(rq, error, blk_rq_err_bytes(rq)); | 2329 | return blk_end_request(rq, error, blk_rq_err_bytes(rq)); |
2328 | } | 2330 | } |
2329 | EXPORT_SYMBOL_GPL(blk_end_request_err); | 2331 | EXPORT_SYMBOL_GPL(blk_end_request_err); |
2330 | 2332 | ||
2331 | /** | 2333 | /** |
2332 | * __blk_end_request - Helper function for drivers to complete the request. | 2334 | * __blk_end_request - Helper function for drivers to complete the request. |
2333 | * @rq: the request being processed | 2335 | * @rq: the request being processed |
2334 | * @error: %0 for success, < %0 for error | 2336 | * @error: %0 for success, < %0 for error |
2335 | * @nr_bytes: number of bytes to complete | 2337 | * @nr_bytes: number of bytes to complete |
2336 | * | 2338 | * |
2337 | * Description: | 2339 | * Description: |
2338 | * Must be called with queue lock held unlike blk_end_request(). | 2340 | * Must be called with queue lock held unlike blk_end_request(). |
2339 | * | 2341 | * |
2340 | * Return: | 2342 | * Return: |
2341 | * %false - we are done with this request | 2343 | * %false - we are done with this request |
2342 | * %true - still buffers pending for this request | 2344 | * %true - still buffers pending for this request |
2343 | **/ | 2345 | **/ |
2344 | bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | 2346 | bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
2345 | { | 2347 | { |
2346 | return __blk_end_bidi_request(rq, error, nr_bytes, 0); | 2348 | return __blk_end_bidi_request(rq, error, nr_bytes, 0); |
2347 | } | 2349 | } |
2348 | EXPORT_SYMBOL(__blk_end_request); | 2350 | EXPORT_SYMBOL(__blk_end_request); |
2349 | 2351 | ||
2350 | /** | 2352 | /** |
2351 | * __blk_end_request_all - Helper function for drives to finish the request. | 2353 | * __blk_end_request_all - Helper function for drives to finish the request. |
2352 | * @rq: the request to finish | 2354 | * @rq: the request to finish |
2353 | * @error: %0 for success, < %0 for error | 2355 | * @error: %0 for success, < %0 for error |
2354 | * | 2356 | * |
2355 | * Description: | 2357 | * Description: |
2356 | * Completely finish @rq. Must be called with queue lock held. | 2358 | * Completely finish @rq. Must be called with queue lock held. |
2357 | */ | 2359 | */ |
2358 | void __blk_end_request_all(struct request *rq, int error) | 2360 | void __blk_end_request_all(struct request *rq, int error) |
2359 | { | 2361 | { |
2360 | bool pending; | 2362 | bool pending; |
2361 | unsigned int bidi_bytes = 0; | 2363 | unsigned int bidi_bytes = 0; |
2362 | 2364 | ||
2363 | if (unlikely(blk_bidi_rq(rq))) | 2365 | if (unlikely(blk_bidi_rq(rq))) |
2364 | bidi_bytes = blk_rq_bytes(rq->next_rq); | 2366 | bidi_bytes = blk_rq_bytes(rq->next_rq); |
2365 | 2367 | ||
2366 | pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); | 2368 | pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
2367 | BUG_ON(pending); | 2369 | BUG_ON(pending); |
2368 | } | 2370 | } |
2369 | EXPORT_SYMBOL(__blk_end_request_all); | 2371 | EXPORT_SYMBOL(__blk_end_request_all); |
2370 | 2372 | ||
2371 | /** | 2373 | /** |
2372 | * __blk_end_request_cur - Helper function to finish the current request chunk. | 2374 | * __blk_end_request_cur - Helper function to finish the current request chunk. |
2373 | * @rq: the request to finish the current chunk for | 2375 | * @rq: the request to finish the current chunk for |
2374 | * @error: %0 for success, < %0 for error | 2376 | * @error: %0 for success, < %0 for error |
2375 | * | 2377 | * |
2376 | * Description: | 2378 | * Description: |
2377 | * Complete the current consecutively mapped chunk from @rq. Must | 2379 | * Complete the current consecutively mapped chunk from @rq. Must |
2378 | * be called with queue lock held. | 2380 | * be called with queue lock held. |
2379 | * | 2381 | * |
2380 | * Return: | 2382 | * Return: |
2381 | * %false - we are done with this request | 2383 | * %false - we are done with this request |
2382 | * %true - still buffers pending for this request | 2384 | * %true - still buffers pending for this request |
2383 | */ | 2385 | */ |
2384 | bool __blk_end_request_cur(struct request *rq, int error) | 2386 | bool __blk_end_request_cur(struct request *rq, int error) |
2385 | { | 2387 | { |
2386 | return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); | 2388 | return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); |
2387 | } | 2389 | } |
2388 | EXPORT_SYMBOL(__blk_end_request_cur); | 2390 | EXPORT_SYMBOL(__blk_end_request_cur); |
2389 | 2391 | ||
2390 | /** | 2392 | /** |
2391 | * __blk_end_request_err - Finish a request till the next failure boundary. | 2393 | * __blk_end_request_err - Finish a request till the next failure boundary. |
2392 | * @rq: the request to finish till the next failure boundary for | 2394 | * @rq: the request to finish till the next failure boundary for |
2393 | * @error: must be negative errno | 2395 | * @error: must be negative errno |
2394 | * | 2396 | * |
2395 | * Description: | 2397 | * Description: |
2396 | * Complete @rq till the next failure boundary. Must be called | 2398 | * Complete @rq till the next failure boundary. Must be called |
2397 | * with queue lock held. | 2399 | * with queue lock held. |
2398 | * | 2400 | * |
2399 | * Return: | 2401 | * Return: |
2400 | * %false - we are done with this request | 2402 | * %false - we are done with this request |
2401 | * %true - still buffers pending for this request | 2403 | * %true - still buffers pending for this request |
2402 | */ | 2404 | */ |
2403 | bool __blk_end_request_err(struct request *rq, int error) | 2405 | bool __blk_end_request_err(struct request *rq, int error) |
2404 | { | 2406 | { |
2405 | WARN_ON(error >= 0); | 2407 | WARN_ON(error >= 0); |
2406 | return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); | 2408 | return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); |
2407 | } | 2409 | } |
2408 | EXPORT_SYMBOL_GPL(__blk_end_request_err); | 2410 | EXPORT_SYMBOL_GPL(__blk_end_request_err); |
2409 | 2411 | ||
2410 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 2412 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
2411 | struct bio *bio) | 2413 | struct bio *bio) |
2412 | { | 2414 | { |
2413 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ | 2415 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ |
2414 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; | 2416 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; |
2415 | 2417 | ||
2416 | if (bio_has_data(bio)) { | 2418 | if (bio_has_data(bio)) { |
2417 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 2419 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
2418 | rq->buffer = bio_data(bio); | 2420 | rq->buffer = bio_data(bio); |
2419 | } | 2421 | } |
2420 | rq->__data_len = bio->bi_size; | 2422 | rq->__data_len = bio->bi_size; |
2421 | rq->bio = rq->biotail = bio; | 2423 | rq->bio = rq->biotail = bio; |
2422 | 2424 | ||
2423 | if (bio->bi_bdev) | 2425 | if (bio->bi_bdev) |
2424 | rq->rq_disk = bio->bi_bdev->bd_disk; | 2426 | rq->rq_disk = bio->bi_bdev->bd_disk; |
2425 | } | 2427 | } |
2426 | 2428 | ||
2427 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 2429 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
2428 | /** | 2430 | /** |
2429 | * rq_flush_dcache_pages - Helper function to flush all pages in a request | 2431 | * rq_flush_dcache_pages - Helper function to flush all pages in a request |
2430 | * @rq: the request to be flushed | 2432 | * @rq: the request to be flushed |
2431 | * | 2433 | * |
2432 | * Description: | 2434 | * Description: |
2433 | * Flush all pages in @rq. | 2435 | * Flush all pages in @rq. |
2434 | */ | 2436 | */ |
2435 | void rq_flush_dcache_pages(struct request *rq) | 2437 | void rq_flush_dcache_pages(struct request *rq) |
2436 | { | 2438 | { |
2437 | struct req_iterator iter; | 2439 | struct req_iterator iter; |
2438 | struct bio_vec *bvec; | 2440 | struct bio_vec *bvec; |
2439 | 2441 | ||
2440 | rq_for_each_segment(bvec, rq, iter) | 2442 | rq_for_each_segment(bvec, rq, iter) |
2441 | flush_dcache_page(bvec->bv_page); | 2443 | flush_dcache_page(bvec->bv_page); |
2442 | } | 2444 | } |
2443 | EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); | 2445 | EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); |
2444 | #endif | 2446 | #endif |
2445 | 2447 | ||
2446 | /** | 2448 | /** |
2447 | * blk_lld_busy - Check if underlying low-level drivers of a device are busy | 2449 | * blk_lld_busy - Check if underlying low-level drivers of a device are busy |
2448 | * @q : the queue of the device being checked | 2450 | * @q : the queue of the device being checked |
2449 | * | 2451 | * |
2450 | * Description: | 2452 | * Description: |
2451 | * Check if underlying low-level drivers of a device are busy. | 2453 | * Check if underlying low-level drivers of a device are busy. |
2452 | * If the drivers want to export their busy state, they must set own | 2454 | * If the drivers want to export their busy state, they must set own |
2453 | * exporting function using blk_queue_lld_busy() first. | 2455 | * exporting function using blk_queue_lld_busy() first. |
2454 | * | 2456 | * |
2455 | * Basically, this function is used only by request stacking drivers | 2457 | * Basically, this function is used only by request stacking drivers |
2456 | * to stop dispatching requests to underlying devices when underlying | 2458 | * to stop dispatching requests to underlying devices when underlying |
2457 | * devices are busy. This behavior helps more I/O merging on the queue | 2459 | * devices are busy. This behavior helps more I/O merging on the queue |
2458 | * of the request stacking driver and prevents I/O throughput regression | 2460 | * of the request stacking driver and prevents I/O throughput regression |
2459 | * on burst I/O load. | 2461 | * on burst I/O load. |
2460 | * | 2462 | * |
2461 | * Return: | 2463 | * Return: |
2462 | * 0 - Not busy (The request stacking driver should dispatch request) | 2464 | * 0 - Not busy (The request stacking driver should dispatch request) |
2463 | * 1 - Busy (The request stacking driver should stop dispatching request) | 2465 | * 1 - Busy (The request stacking driver should stop dispatching request) |
2464 | */ | 2466 | */ |
2465 | int blk_lld_busy(struct request_queue *q) | 2467 | int blk_lld_busy(struct request_queue *q) |
2466 | { | 2468 | { |
2467 | if (q->lld_busy_fn) | 2469 | if (q->lld_busy_fn) |
2468 | return q->lld_busy_fn(q); | 2470 | return q->lld_busy_fn(q); |
2469 | 2471 | ||
2470 | return 0; | 2472 | return 0; |
2471 | } | 2473 | } |
2472 | EXPORT_SYMBOL_GPL(blk_lld_busy); | 2474 | EXPORT_SYMBOL_GPL(blk_lld_busy); |
2473 | 2475 | ||
2474 | /** | 2476 | /** |
2475 | * blk_rq_unprep_clone - Helper function to free all bios in a cloned request | 2477 | * blk_rq_unprep_clone - Helper function to free all bios in a cloned request |
2476 | * @rq: the clone request to be cleaned up | 2478 | * @rq: the clone request to be cleaned up |
2477 | * | 2479 | * |
2478 | * Description: | 2480 | * Description: |
2479 | * Free all bios in @rq for a cloned request. | 2481 | * Free all bios in @rq for a cloned request. |
2480 | */ | 2482 | */ |
2481 | void blk_rq_unprep_clone(struct request *rq) | 2483 | void blk_rq_unprep_clone(struct request *rq) |
2482 | { | 2484 | { |
2483 | struct bio *bio; | 2485 | struct bio *bio; |
2484 | 2486 | ||
2485 | while ((bio = rq->bio) != NULL) { | 2487 | while ((bio = rq->bio) != NULL) { |
2486 | rq->bio = bio->bi_next; | 2488 | rq->bio = bio->bi_next; |
2487 | 2489 | ||
2488 | bio_put(bio); | 2490 | bio_put(bio); |
2489 | } | 2491 | } |
2490 | } | 2492 | } |
2491 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | 2493 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); |
2492 | 2494 | ||
2493 | /* | 2495 | /* |
2494 | * Copy attributes of the original request to the clone request. | 2496 | * Copy attributes of the original request to the clone request. |
2495 | * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. | 2497 | * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. |
2496 | */ | 2498 | */ |
2497 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | 2499 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
2498 | { | 2500 | { |
2499 | dst->cpu = src->cpu; | 2501 | dst->cpu = src->cpu; |
2500 | dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); | 2502 | dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); |
2501 | if (src->cmd_flags & REQ_DISCARD) | 2503 | if (src->cmd_flags & REQ_DISCARD) |
2502 | dst->cmd_flags |= REQ_DISCARD; | 2504 | dst->cmd_flags |= REQ_DISCARD; |
2503 | dst->cmd_type = src->cmd_type; | 2505 | dst->cmd_type = src->cmd_type; |
2504 | dst->__sector = blk_rq_pos(src); | 2506 | dst->__sector = blk_rq_pos(src); |
2505 | dst->__data_len = blk_rq_bytes(src); | 2507 | dst->__data_len = blk_rq_bytes(src); |
2506 | dst->nr_phys_segments = src->nr_phys_segments; | 2508 | dst->nr_phys_segments = src->nr_phys_segments; |
2507 | dst->ioprio = src->ioprio; | 2509 | dst->ioprio = src->ioprio; |
2508 | dst->extra_len = src->extra_len; | 2510 | dst->extra_len = src->extra_len; |
2509 | } | 2511 | } |
2510 | 2512 | ||
2511 | /** | 2513 | /** |
2512 | * blk_rq_prep_clone - Helper function to setup clone request | 2514 | * blk_rq_prep_clone - Helper function to setup clone request |
2513 | * @rq: the request to be setup | 2515 | * @rq: the request to be setup |
2514 | * @rq_src: original request to be cloned | 2516 | * @rq_src: original request to be cloned |
2515 | * @bs: bio_set that bios for clone are allocated from | 2517 | * @bs: bio_set that bios for clone are allocated from |
2516 | * @gfp_mask: memory allocation mask for bio | 2518 | * @gfp_mask: memory allocation mask for bio |
2517 | * @bio_ctr: setup function to be called for each clone bio. | 2519 | * @bio_ctr: setup function to be called for each clone bio. |
2518 | * Returns %0 for success, non %0 for failure. | 2520 | * Returns %0 for success, non %0 for failure. |
2519 | * @data: private data to be passed to @bio_ctr | 2521 | * @data: private data to be passed to @bio_ctr |
2520 | * | 2522 | * |
2521 | * Description: | 2523 | * Description: |
2522 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. | 2524 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. |
2523 | * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) | 2525 | * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) |
2524 | * are not copied, and copying such parts is the caller's responsibility. | 2526 | * are not copied, and copying such parts is the caller's responsibility. |
2525 | * Also, pages which the original bios are pointing to are not copied | 2527 | * Also, pages which the original bios are pointing to are not copied |
2526 | * and the cloned bios just point same pages. | 2528 | * and the cloned bios just point same pages. |
2527 | * So cloned bios must be completed before original bios, which means | 2529 | * So cloned bios must be completed before original bios, which means |
2528 | * the caller must complete @rq before @rq_src. | 2530 | * the caller must complete @rq before @rq_src. |
2529 | */ | 2531 | */ |
2530 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | 2532 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, |
2531 | struct bio_set *bs, gfp_t gfp_mask, | 2533 | struct bio_set *bs, gfp_t gfp_mask, |
2532 | int (*bio_ctr)(struct bio *, struct bio *, void *), | 2534 | int (*bio_ctr)(struct bio *, struct bio *, void *), |
2533 | void *data) | 2535 | void *data) |
2534 | { | 2536 | { |
2535 | struct bio *bio, *bio_src; | 2537 | struct bio *bio, *bio_src; |
2536 | 2538 | ||
2537 | if (!bs) | 2539 | if (!bs) |
2538 | bs = fs_bio_set; | 2540 | bs = fs_bio_set; |
2539 | 2541 | ||
2540 | blk_rq_init(NULL, rq); | 2542 | blk_rq_init(NULL, rq); |
2541 | 2543 | ||
2542 | __rq_for_each_bio(bio_src, rq_src) { | 2544 | __rq_for_each_bio(bio_src, rq_src) { |
2543 | bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); | 2545 | bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); |
2544 | if (!bio) | 2546 | if (!bio) |
2545 | goto free_and_out; | 2547 | goto free_and_out; |
2546 | 2548 | ||
2547 | __bio_clone(bio, bio_src); | 2549 | __bio_clone(bio, bio_src); |
2548 | 2550 | ||
2549 | if (bio_integrity(bio_src) && | 2551 | if (bio_integrity(bio_src) && |
2550 | bio_integrity_clone(bio, bio_src, gfp_mask, bs)) | 2552 | bio_integrity_clone(bio, bio_src, gfp_mask, bs)) |
2551 | goto free_and_out; | 2553 | goto free_and_out; |
2552 | 2554 | ||
2553 | if (bio_ctr && bio_ctr(bio, bio_src, data)) | 2555 | if (bio_ctr && bio_ctr(bio, bio_src, data)) |
2554 | goto free_and_out; | 2556 | goto free_and_out; |
2555 | 2557 | ||
2556 | if (rq->bio) { | 2558 | if (rq->bio) { |
2557 | rq->biotail->bi_next = bio; | 2559 | rq->biotail->bi_next = bio; |
2558 | rq->biotail = bio; | 2560 | rq->biotail = bio; |
2559 | } else | 2561 | } else |
2560 | rq->bio = rq->biotail = bio; | 2562 | rq->bio = rq->biotail = bio; |
2561 | } | 2563 | } |
2562 | 2564 | ||
2563 | __blk_rq_prep_clone(rq, rq_src); | 2565 | __blk_rq_prep_clone(rq, rq_src); |
2564 | 2566 | ||
2565 | return 0; | 2567 | return 0; |
2566 | 2568 | ||
2567 | free_and_out: | 2569 | free_and_out: |
2568 | if (bio) | 2570 | if (bio) |
2569 | bio_free(bio, bs); | 2571 | bio_free(bio, bs); |
2570 | blk_rq_unprep_clone(rq); | 2572 | blk_rq_unprep_clone(rq); |
2571 | 2573 | ||
2572 | return -ENOMEM; | 2574 | return -ENOMEM; |
2573 | } | 2575 | } |
2574 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); | 2576 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); |
2575 | 2577 | ||
2576 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | 2578 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) |
2577 | { | 2579 | { |
2578 | return queue_work(kblockd_workqueue, work); | 2580 | return queue_work(kblockd_workqueue, work); |
2579 | } | 2581 | } |
2580 | EXPORT_SYMBOL(kblockd_schedule_work); | 2582 | EXPORT_SYMBOL(kblockd_schedule_work); |
2581 | 2583 | ||
2582 | int __init blk_dev_init(void) | 2584 | int __init blk_dev_init(void) |
2583 | { | 2585 | { |
2584 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 2586 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
2585 | sizeof(((struct request *)0)->cmd_flags)); | 2587 | sizeof(((struct request *)0)->cmd_flags)); |
2586 | 2588 | ||
2587 | kblockd_workqueue = create_workqueue("kblockd"); | 2589 | kblockd_workqueue = create_workqueue("kblockd"); |
2588 | if (!kblockd_workqueue) | 2590 | if (!kblockd_workqueue) |
2589 | panic("Failed to create kblockd\n"); | 2591 | panic("Failed to create kblockd\n"); |
2590 | 2592 | ||
2591 | request_cachep = kmem_cache_create("blkdev_requests", | 2593 | request_cachep = kmem_cache_create("blkdev_requests", |
2592 | sizeof(struct request), 0, SLAB_PANIC, NULL); | 2594 | sizeof(struct request), 0, SLAB_PANIC, NULL); |
2593 | 2595 | ||
2594 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", | 2596 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", |
2595 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 2597 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
2596 | 2598 | ||
2597 | return 0; | 2599 | return 0; |
2598 | } | 2600 | } |
2599 | 2601 |
block/blk-settings.c
1 | /* | 1 | /* |
2 | * Functions related to setting various queue properties from drivers | 2 | * Functions related to setting various queue properties from drivers |
3 | */ | 3 | */ |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/bio.h> | 7 | #include <linux/bio.h> |
8 | #include <linux/blkdev.h> | 8 | #include <linux/blkdev.h> |
9 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ | 9 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ |
10 | #include <linux/gcd.h> | 10 | #include <linux/gcd.h> |
11 | #include <linux/lcm.h> | 11 | #include <linux/lcm.h> |
12 | #include <linux/jiffies.h> | 12 | #include <linux/jiffies.h> |
13 | #include <linux/gfp.h> | 13 | #include <linux/gfp.h> |
14 | 14 | ||
15 | #include "blk.h" | 15 | #include "blk.h" |
16 | 16 | ||
17 | unsigned long blk_max_low_pfn; | 17 | unsigned long blk_max_low_pfn; |
18 | EXPORT_SYMBOL(blk_max_low_pfn); | 18 | EXPORT_SYMBOL(blk_max_low_pfn); |
19 | 19 | ||
20 | unsigned long blk_max_pfn; | 20 | unsigned long blk_max_pfn; |
21 | 21 | ||
22 | /** | 22 | /** |
23 | * blk_queue_prep_rq - set a prepare_request function for queue | 23 | * blk_queue_prep_rq - set a prepare_request function for queue |
24 | * @q: queue | 24 | * @q: queue |
25 | * @pfn: prepare_request function | 25 | * @pfn: prepare_request function |
26 | * | 26 | * |
27 | * It's possible for a queue to register a prepare_request callback which | 27 | * It's possible for a queue to register a prepare_request callback which |
28 | * is invoked before the request is handed to the request_fn. The goal of | 28 | * is invoked before the request is handed to the request_fn. The goal of |
29 | * the function is to prepare a request for I/O, it can be used to build a | 29 | * the function is to prepare a request for I/O, it can be used to build a |
30 | * cdb from the request data for instance. | 30 | * cdb from the request data for instance. |
31 | * | 31 | * |
32 | */ | 32 | */ |
33 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) | 33 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) |
34 | { | 34 | { |
35 | q->prep_rq_fn = pfn; | 35 | q->prep_rq_fn = pfn; |
36 | } | 36 | } |
37 | EXPORT_SYMBOL(blk_queue_prep_rq); | 37 | EXPORT_SYMBOL(blk_queue_prep_rq); |
38 | 38 | ||
39 | /** | 39 | /** |
40 | * blk_queue_unprep_rq - set an unprepare_request function for queue | 40 | * blk_queue_unprep_rq - set an unprepare_request function for queue |
41 | * @q: queue | 41 | * @q: queue |
42 | * @ufn: unprepare_request function | 42 | * @ufn: unprepare_request function |
43 | * | 43 | * |
44 | * It's possible for a queue to register an unprepare_request callback | 44 | * It's possible for a queue to register an unprepare_request callback |
45 | * which is invoked before the request is finally completed. The goal | 45 | * which is invoked before the request is finally completed. The goal |
46 | * of the function is to deallocate any data that was allocated in the | 46 | * of the function is to deallocate any data that was allocated in the |
47 | * prepare_request callback. | 47 | * prepare_request callback. |
48 | * | 48 | * |
49 | */ | 49 | */ |
50 | void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn) | 50 | void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn) |
51 | { | 51 | { |
52 | q->unprep_rq_fn = ufn; | 52 | q->unprep_rq_fn = ufn; |
53 | } | 53 | } |
54 | EXPORT_SYMBOL(blk_queue_unprep_rq); | 54 | EXPORT_SYMBOL(blk_queue_unprep_rq); |
55 | 55 | ||
56 | /** | 56 | /** |
57 | * blk_queue_merge_bvec - set a merge_bvec function for queue | 57 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
58 | * @q: queue | 58 | * @q: queue |
59 | * @mbfn: merge_bvec_fn | 59 | * @mbfn: merge_bvec_fn |
60 | * | 60 | * |
61 | * Usually queues have static limitations on the max sectors or segments that | 61 | * Usually queues have static limitations on the max sectors or segments that |
62 | * we can put in a request. Stacking drivers may have some settings that | 62 | * we can put in a request. Stacking drivers may have some settings that |
63 | * are dynamic, and thus we have to query the queue whether it is ok to | 63 | * are dynamic, and thus we have to query the queue whether it is ok to |
64 | * add a new bio_vec to a bio at a given offset or not. If the block device | 64 | * add a new bio_vec to a bio at a given offset or not. If the block device |
65 | * has such limitations, it needs to register a merge_bvec_fn to control | 65 | * has such limitations, it needs to register a merge_bvec_fn to control |
66 | * the size of bio's sent to it. Note that a block device *must* allow a | 66 | * the size of bio's sent to it. Note that a block device *must* allow a |
67 | * single page to be added to an empty bio. The block device driver may want | 67 | * single page to be added to an empty bio. The block device driver may want |
68 | * to use the bio_split() function to deal with these bio's. By default | 68 | * to use the bio_split() function to deal with these bio's. By default |
69 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are | 69 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are |
70 | * honored. | 70 | * honored. |
71 | */ | 71 | */ |
72 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) | 72 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) |
73 | { | 73 | { |
74 | q->merge_bvec_fn = mbfn; | 74 | q->merge_bvec_fn = mbfn; |
75 | } | 75 | } |
76 | EXPORT_SYMBOL(blk_queue_merge_bvec); | 76 | EXPORT_SYMBOL(blk_queue_merge_bvec); |
77 | 77 | ||
78 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) | 78 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) |
79 | { | 79 | { |
80 | q->softirq_done_fn = fn; | 80 | q->softirq_done_fn = fn; |
81 | } | 81 | } |
82 | EXPORT_SYMBOL(blk_queue_softirq_done); | 82 | EXPORT_SYMBOL(blk_queue_softirq_done); |
83 | 83 | ||
84 | void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) | 84 | void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) |
85 | { | 85 | { |
86 | q->rq_timeout = timeout; | 86 | q->rq_timeout = timeout; |
87 | } | 87 | } |
88 | EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); | 88 | EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); |
89 | 89 | ||
90 | void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) | 90 | void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) |
91 | { | 91 | { |
92 | q->rq_timed_out_fn = fn; | 92 | q->rq_timed_out_fn = fn; |
93 | } | 93 | } |
94 | EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); | 94 | EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); |
95 | 95 | ||
96 | void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) | 96 | void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) |
97 | { | 97 | { |
98 | q->lld_busy_fn = fn; | 98 | q->lld_busy_fn = fn; |
99 | } | 99 | } |
100 | EXPORT_SYMBOL_GPL(blk_queue_lld_busy); | 100 | EXPORT_SYMBOL_GPL(blk_queue_lld_busy); |
101 | 101 | ||
102 | /** | 102 | /** |
103 | * blk_set_default_limits - reset limits to default values | 103 | * blk_set_default_limits - reset limits to default values |
104 | * @lim: the queue_limits structure to reset | 104 | * @lim: the queue_limits structure to reset |
105 | * | 105 | * |
106 | * Description: | 106 | * Description: |
107 | * Returns a queue_limit struct to its default state. Can be used by | 107 | * Returns a queue_limit struct to its default state. Can be used by |
108 | * stacking drivers like DM that stage table swaps and reuse an | 108 | * stacking drivers like DM that stage table swaps and reuse an |
109 | * existing device queue. | 109 | * existing device queue. |
110 | */ | 110 | */ |
111 | void blk_set_default_limits(struct queue_limits *lim) | 111 | void blk_set_default_limits(struct queue_limits *lim) |
112 | { | 112 | { |
113 | lim->max_segments = BLK_MAX_SEGMENTS; | 113 | lim->max_segments = BLK_MAX_SEGMENTS; |
114 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | 114 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
115 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; | 115 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
116 | lim->max_sectors = BLK_DEF_MAX_SECTORS; | 116 | lim->max_sectors = BLK_DEF_MAX_SECTORS; |
117 | lim->max_hw_sectors = INT_MAX; | 117 | lim->max_hw_sectors = INT_MAX; |
118 | lim->max_discard_sectors = 0; | 118 | lim->max_discard_sectors = 0; |
119 | lim->discard_granularity = 0; | 119 | lim->discard_granularity = 0; |
120 | lim->discard_alignment = 0; | 120 | lim->discard_alignment = 0; |
121 | lim->discard_misaligned = 0; | 121 | lim->discard_misaligned = 0; |
122 | lim->discard_zeroes_data = -1; | 122 | lim->discard_zeroes_data = -1; |
123 | lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; | 123 | lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; |
124 | lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); | 124 | lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); |
125 | lim->alignment_offset = 0; | 125 | lim->alignment_offset = 0; |
126 | lim->io_opt = 0; | 126 | lim->io_opt = 0; |
127 | lim->misaligned = 0; | 127 | lim->misaligned = 0; |
128 | lim->no_cluster = 0; | 128 | lim->no_cluster = 0; |
129 | } | 129 | } |
130 | EXPORT_SYMBOL(blk_set_default_limits); | 130 | EXPORT_SYMBOL(blk_set_default_limits); |
131 | 131 | ||
132 | /** | 132 | /** |
133 | * blk_queue_make_request - define an alternate make_request function for a device | 133 | * blk_queue_make_request - define an alternate make_request function for a device |
134 | * @q: the request queue for the device to be affected | 134 | * @q: the request queue for the device to be affected |
135 | * @mfn: the alternate make_request function | 135 | * @mfn: the alternate make_request function |
136 | * | 136 | * |
137 | * Description: | 137 | * Description: |
138 | * The normal way for &struct bios to be passed to a device | 138 | * The normal way for &struct bios to be passed to a device |
139 | * driver is for them to be collected into requests on a request | 139 | * driver is for them to be collected into requests on a request |
140 | * queue, and then to allow the device driver to select requests | 140 | * queue, and then to allow the device driver to select requests |
141 | * off that queue when it is ready. This works well for many block | 141 | * off that queue when it is ready. This works well for many block |
142 | * devices. However some block devices (typically virtual devices | 142 | * devices. However some block devices (typically virtual devices |
143 | * such as md or lvm) do not benefit from the processing on the | 143 | * such as md or lvm) do not benefit from the processing on the |
144 | * request queue, and are served best by having the requests passed | 144 | * request queue, and are served best by having the requests passed |
145 | * directly to them. This can be achieved by providing a function | 145 | * directly to them. This can be achieved by providing a function |
146 | * to blk_queue_make_request(). | 146 | * to blk_queue_make_request(). |
147 | * | 147 | * |
148 | * Caveat: | 148 | * Caveat: |
149 | * The driver that does this *must* be able to deal appropriately | 149 | * The driver that does this *must* be able to deal appropriately |
150 | * with buffers in "highmemory". This can be accomplished by either calling | 150 | * with buffers in "highmemory". This can be accomplished by either calling |
151 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling | 151 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling |
152 | * blk_queue_bounce() to create a buffer in normal memory. | 152 | * blk_queue_bounce() to create a buffer in normal memory. |
153 | **/ | 153 | **/ |
154 | void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) | 154 | void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) |
155 | { | 155 | { |
156 | /* | 156 | /* |
157 | * set defaults | 157 | * set defaults |
158 | */ | 158 | */ |
159 | q->nr_requests = BLKDEV_MAX_RQ; | 159 | q->nr_requests = BLKDEV_MAX_RQ; |
160 | 160 | ||
161 | q->make_request_fn = mfn; | 161 | q->make_request_fn = mfn; |
162 | blk_queue_dma_alignment(q, 511); | 162 | blk_queue_dma_alignment(q, 511); |
163 | blk_queue_congestion_threshold(q); | 163 | blk_queue_congestion_threshold(q); |
164 | q->nr_batching = BLK_BATCH_REQ; | 164 | q->nr_batching = BLK_BATCH_REQ; |
165 | 165 | ||
166 | q->unplug_thresh = 4; /* hmm */ | 166 | q->unplug_thresh = 4; /* hmm */ |
167 | q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */ | 167 | q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */ |
168 | if (q->unplug_delay == 0) | 168 | if (q->unplug_delay == 0) |
169 | q->unplug_delay = 1; | 169 | q->unplug_delay = 1; |
170 | 170 | ||
171 | q->unplug_timer.function = blk_unplug_timeout; | 171 | q->unplug_timer.function = blk_unplug_timeout; |
172 | q->unplug_timer.data = (unsigned long)q; | 172 | q->unplug_timer.data = (unsigned long)q; |
173 | 173 | ||
174 | blk_set_default_limits(&q->limits); | 174 | blk_set_default_limits(&q->limits); |
175 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); | 175 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * If the caller didn't supply a lock, fall back to our embedded | 178 | * If the caller didn't supply a lock, fall back to our embedded |
179 | * per-queue locks | 179 | * per-queue locks |
180 | */ | 180 | */ |
181 | if (!q->queue_lock) | 181 | if (!q->queue_lock) |
182 | q->queue_lock = &q->__queue_lock; | 182 | q->queue_lock = &q->__queue_lock; |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * by default assume old behaviour and bounce for any highmem page | 185 | * by default assume old behaviour and bounce for any highmem page |
186 | */ | 186 | */ |
187 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 187 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
188 | } | 188 | } |
189 | EXPORT_SYMBOL(blk_queue_make_request); | 189 | EXPORT_SYMBOL(blk_queue_make_request); |
190 | 190 | ||
191 | /** | 191 | /** |
192 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 192 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
193 | * @q: the request queue for the device | 193 | * @q: the request queue for the device |
194 | * @dma_mask: the maximum address the device can handle | 194 | * @dma_mask: the maximum address the device can handle |
195 | * | 195 | * |
196 | * Description: | 196 | * Description: |
197 | * Different hardware can have different requirements as to what pages | 197 | * Different hardware can have different requirements as to what pages |
198 | * it can do I/O directly to. A low level driver can call | 198 | * it can do I/O directly to. A low level driver can call |
199 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | 199 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce |
200 | * buffers for doing I/O to pages residing above @dma_mask. | 200 | * buffers for doing I/O to pages residing above @dma_mask. |
201 | **/ | 201 | **/ |
202 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) | 202 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) |
203 | { | 203 | { |
204 | unsigned long b_pfn = dma_mask >> PAGE_SHIFT; | 204 | unsigned long b_pfn = dma_mask >> PAGE_SHIFT; |
205 | int dma = 0; | 205 | int dma = 0; |
206 | 206 | ||
207 | q->bounce_gfp = GFP_NOIO; | 207 | q->bounce_gfp = GFP_NOIO; |
208 | #if BITS_PER_LONG == 64 | 208 | #if BITS_PER_LONG == 64 |
209 | /* | 209 | /* |
210 | * Assume anything <= 4GB can be handled by IOMMU. Actually | 210 | * Assume anything <= 4GB can be handled by IOMMU. Actually |
211 | * some IOMMUs can handle everything, but I don't know of a | 211 | * some IOMMUs can handle everything, but I don't know of a |
212 | * way to test this here. | 212 | * way to test this here. |
213 | */ | 213 | */ |
214 | if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) | 214 | if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) |
215 | dma = 1; | 215 | dma = 1; |
216 | q->limits.bounce_pfn = max_low_pfn; | 216 | q->limits.bounce_pfn = max_low_pfn; |
217 | #else | 217 | #else |
218 | if (b_pfn < blk_max_low_pfn) | 218 | if (b_pfn < blk_max_low_pfn) |
219 | dma = 1; | 219 | dma = 1; |
220 | q->limits.bounce_pfn = b_pfn; | 220 | q->limits.bounce_pfn = b_pfn; |
221 | #endif | 221 | #endif |
222 | if (dma) { | 222 | if (dma) { |
223 | init_emergency_isa_pool(); | 223 | init_emergency_isa_pool(); |
224 | q->bounce_gfp = GFP_NOIO | GFP_DMA; | 224 | q->bounce_gfp = GFP_NOIO | GFP_DMA; |
225 | q->limits.bounce_pfn = b_pfn; | 225 | q->limits.bounce_pfn = b_pfn; |
226 | } | 226 | } |
227 | } | 227 | } |
228 | EXPORT_SYMBOL(blk_queue_bounce_limit); | 228 | EXPORT_SYMBOL(blk_queue_bounce_limit); |
229 | 229 | ||
230 | /** | 230 | /** |
231 | * blk_queue_max_hw_sectors - set max sectors for a request for this queue | 231 | * blk_queue_max_hw_sectors - set max sectors for a request for this queue |
232 | * @q: the request queue for the device | 232 | * @q: the request queue for the device |
233 | * @max_hw_sectors: max hardware sectors in the usual 512b unit | 233 | * @max_hw_sectors: max hardware sectors in the usual 512b unit |
234 | * | 234 | * |
235 | * Description: | 235 | * Description: |
236 | * Enables a low level driver to set a hard upper limit, | 236 | * Enables a low level driver to set a hard upper limit, |
237 | * max_hw_sectors, on the size of requests. max_hw_sectors is set by | 237 | * max_hw_sectors, on the size of requests. max_hw_sectors is set by |
238 | * the device driver based upon the combined capabilities of I/O | 238 | * the device driver based upon the combined capabilities of I/O |
239 | * controller and storage device. | 239 | * controller and storage device. |
240 | * | 240 | * |
241 | * max_sectors is a soft limit imposed by the block layer for | 241 | * max_sectors is a soft limit imposed by the block layer for |
242 | * filesystem type requests. This value can be overridden on a | 242 | * filesystem type requests. This value can be overridden on a |
243 | * per-device basis in /sys/block/<device>/queue/max_sectors_kb. | 243 | * per-device basis in /sys/block/<device>/queue/max_sectors_kb. |
244 | * The soft limit can not exceed max_hw_sectors. | 244 | * The soft limit can not exceed max_hw_sectors. |
245 | **/ | 245 | **/ |
246 | void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) | 246 | void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) |
247 | { | 247 | { |
248 | if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { | 248 | if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { |
249 | max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); | 249 | max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); |
250 | printk(KERN_INFO "%s: set to minimum %d\n", | 250 | printk(KERN_INFO "%s: set to minimum %d\n", |
251 | __func__, max_hw_sectors); | 251 | __func__, max_hw_sectors); |
252 | } | 252 | } |
253 | 253 | ||
254 | q->limits.max_hw_sectors = max_hw_sectors; | 254 | q->limits.max_hw_sectors = max_hw_sectors; |
255 | q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, | 255 | q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, |
256 | BLK_DEF_MAX_SECTORS); | 256 | BLK_DEF_MAX_SECTORS); |
257 | } | 257 | } |
258 | EXPORT_SYMBOL(blk_queue_max_hw_sectors); | 258 | EXPORT_SYMBOL(blk_queue_max_hw_sectors); |
259 | 259 | ||
260 | /** | 260 | /** |
261 | * blk_queue_max_discard_sectors - set max sectors for a single discard | 261 | * blk_queue_max_discard_sectors - set max sectors for a single discard |
262 | * @q: the request queue for the device | 262 | * @q: the request queue for the device |
263 | * @max_discard_sectors: maximum number of sectors to discard | 263 | * @max_discard_sectors: maximum number of sectors to discard |
264 | **/ | 264 | **/ |
265 | void blk_queue_max_discard_sectors(struct request_queue *q, | 265 | void blk_queue_max_discard_sectors(struct request_queue *q, |
266 | unsigned int max_discard_sectors) | 266 | unsigned int max_discard_sectors) |
267 | { | 267 | { |
268 | q->limits.max_discard_sectors = max_discard_sectors; | 268 | q->limits.max_discard_sectors = max_discard_sectors; |
269 | } | 269 | } |
270 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); | 270 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); |
271 | 271 | ||
272 | /** | 272 | /** |
273 | * blk_queue_max_segments - set max hw segments for a request for this queue | 273 | * blk_queue_max_segments - set max hw segments for a request for this queue |
274 | * @q: the request queue for the device | 274 | * @q: the request queue for the device |
275 | * @max_segments: max number of segments | 275 | * @max_segments: max number of segments |
276 | * | 276 | * |
277 | * Description: | 277 | * Description: |
278 | * Enables a low level driver to set an upper limit on the number of | 278 | * Enables a low level driver to set an upper limit on the number of |
279 | * hw data segments in a request. | 279 | * hw data segments in a request. |
280 | **/ | 280 | **/ |
281 | void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) | 281 | void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) |
282 | { | 282 | { |
283 | if (!max_segments) { | 283 | if (!max_segments) { |
284 | max_segments = 1; | 284 | max_segments = 1; |
285 | printk(KERN_INFO "%s: set to minimum %d\n", | 285 | printk(KERN_INFO "%s: set to minimum %d\n", |
286 | __func__, max_segments); | 286 | __func__, max_segments); |
287 | } | 287 | } |
288 | 288 | ||
289 | q->limits.max_segments = max_segments; | 289 | q->limits.max_segments = max_segments; |
290 | } | 290 | } |
291 | EXPORT_SYMBOL(blk_queue_max_segments); | 291 | EXPORT_SYMBOL(blk_queue_max_segments); |
292 | 292 | ||
293 | /** | 293 | /** |
294 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg | 294 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg |
295 | * @q: the request queue for the device | 295 | * @q: the request queue for the device |
296 | * @max_size: max size of segment in bytes | 296 | * @max_size: max size of segment in bytes |
297 | * | 297 | * |
298 | * Description: | 298 | * Description: |
299 | * Enables a low level driver to set an upper limit on the size of a | 299 | * Enables a low level driver to set an upper limit on the size of a |
300 | * coalesced segment | 300 | * coalesced segment |
301 | **/ | 301 | **/ |
302 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) | 302 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) |
303 | { | 303 | { |
304 | if (max_size < PAGE_CACHE_SIZE) { | 304 | if (max_size < PAGE_CACHE_SIZE) { |
305 | max_size = PAGE_CACHE_SIZE; | 305 | max_size = PAGE_CACHE_SIZE; |
306 | printk(KERN_INFO "%s: set to minimum %d\n", | 306 | printk(KERN_INFO "%s: set to minimum %d\n", |
307 | __func__, max_size); | 307 | __func__, max_size); |
308 | } | 308 | } |
309 | 309 | ||
310 | q->limits.max_segment_size = max_size; | 310 | q->limits.max_segment_size = max_size; |
311 | } | 311 | } |
312 | EXPORT_SYMBOL(blk_queue_max_segment_size); | 312 | EXPORT_SYMBOL(blk_queue_max_segment_size); |
313 | 313 | ||
314 | /** | 314 | /** |
315 | * blk_queue_logical_block_size - set logical block size for the queue | 315 | * blk_queue_logical_block_size - set logical block size for the queue |
316 | * @q: the request queue for the device | 316 | * @q: the request queue for the device |
317 | * @size: the logical block size, in bytes | 317 | * @size: the logical block size, in bytes |
318 | * | 318 | * |
319 | * Description: | 319 | * Description: |
320 | * This should be set to the lowest possible block size that the | 320 | * This should be set to the lowest possible block size that the |
321 | * storage device can address. The default of 512 covers most | 321 | * storage device can address. The default of 512 covers most |
322 | * hardware. | 322 | * hardware. |
323 | **/ | 323 | **/ |
324 | void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) | 324 | void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) |
325 | { | 325 | { |
326 | q->limits.logical_block_size = size; | 326 | q->limits.logical_block_size = size; |
327 | 327 | ||
328 | if (q->limits.physical_block_size < size) | 328 | if (q->limits.physical_block_size < size) |
329 | q->limits.physical_block_size = size; | 329 | q->limits.physical_block_size = size; |
330 | 330 | ||
331 | if (q->limits.io_min < q->limits.physical_block_size) | 331 | if (q->limits.io_min < q->limits.physical_block_size) |
332 | q->limits.io_min = q->limits.physical_block_size; | 332 | q->limits.io_min = q->limits.physical_block_size; |
333 | } | 333 | } |
334 | EXPORT_SYMBOL(blk_queue_logical_block_size); | 334 | EXPORT_SYMBOL(blk_queue_logical_block_size); |
335 | 335 | ||
336 | /** | 336 | /** |
337 | * blk_queue_physical_block_size - set physical block size for the queue | 337 | * blk_queue_physical_block_size - set physical block size for the queue |
338 | * @q: the request queue for the device | 338 | * @q: the request queue for the device |
339 | * @size: the physical block size, in bytes | 339 | * @size: the physical block size, in bytes |
340 | * | 340 | * |
341 | * Description: | 341 | * Description: |
342 | * This should be set to the lowest possible sector size that the | 342 | * This should be set to the lowest possible sector size that the |
343 | * hardware can operate on without reverting to read-modify-write | 343 | * hardware can operate on without reverting to read-modify-write |
344 | * operations. | 344 | * operations. |
345 | */ | 345 | */ |
346 | void blk_queue_physical_block_size(struct request_queue *q, unsigned short size) | 346 | void blk_queue_physical_block_size(struct request_queue *q, unsigned short size) |
347 | { | 347 | { |
348 | q->limits.physical_block_size = size; | 348 | q->limits.physical_block_size = size; |
349 | 349 | ||
350 | if (q->limits.physical_block_size < q->limits.logical_block_size) | 350 | if (q->limits.physical_block_size < q->limits.logical_block_size) |
351 | q->limits.physical_block_size = q->limits.logical_block_size; | 351 | q->limits.physical_block_size = q->limits.logical_block_size; |
352 | 352 | ||
353 | if (q->limits.io_min < q->limits.physical_block_size) | 353 | if (q->limits.io_min < q->limits.physical_block_size) |
354 | q->limits.io_min = q->limits.physical_block_size; | 354 | q->limits.io_min = q->limits.physical_block_size; |
355 | } | 355 | } |
356 | EXPORT_SYMBOL(blk_queue_physical_block_size); | 356 | EXPORT_SYMBOL(blk_queue_physical_block_size); |
357 | 357 | ||
358 | /** | 358 | /** |
359 | * blk_queue_alignment_offset - set physical block alignment offset | 359 | * blk_queue_alignment_offset - set physical block alignment offset |
360 | * @q: the request queue for the device | 360 | * @q: the request queue for the device |
361 | * @offset: alignment offset in bytes | 361 | * @offset: alignment offset in bytes |
362 | * | 362 | * |
363 | * Description: | 363 | * Description: |
364 | * Some devices are naturally misaligned to compensate for things like | 364 | * Some devices are naturally misaligned to compensate for things like |
365 | * the legacy DOS partition table 63-sector offset. Low-level drivers | 365 | * the legacy DOS partition table 63-sector offset. Low-level drivers |
366 | * should call this function for devices whose first sector is not | 366 | * should call this function for devices whose first sector is not |
367 | * naturally aligned. | 367 | * naturally aligned. |
368 | */ | 368 | */ |
369 | void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) | 369 | void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) |
370 | { | 370 | { |
371 | q->limits.alignment_offset = | 371 | q->limits.alignment_offset = |
372 | offset & (q->limits.physical_block_size - 1); | 372 | offset & (q->limits.physical_block_size - 1); |
373 | q->limits.misaligned = 0; | 373 | q->limits.misaligned = 0; |
374 | } | 374 | } |
375 | EXPORT_SYMBOL(blk_queue_alignment_offset); | 375 | EXPORT_SYMBOL(blk_queue_alignment_offset); |
376 | 376 | ||
377 | /** | 377 | /** |
378 | * blk_limits_io_min - set minimum request size for a device | 378 | * blk_limits_io_min - set minimum request size for a device |
379 | * @limits: the queue limits | 379 | * @limits: the queue limits |
380 | * @min: smallest I/O size in bytes | 380 | * @min: smallest I/O size in bytes |
381 | * | 381 | * |
382 | * Description: | 382 | * Description: |
383 | * Some devices have an internal block size bigger than the reported | 383 | * Some devices have an internal block size bigger than the reported |
384 | * hardware sector size. This function can be used to signal the | 384 | * hardware sector size. This function can be used to signal the |
385 | * smallest I/O the device can perform without incurring a performance | 385 | * smallest I/O the device can perform without incurring a performance |
386 | * penalty. | 386 | * penalty. |
387 | */ | 387 | */ |
388 | void blk_limits_io_min(struct queue_limits *limits, unsigned int min) | 388 | void blk_limits_io_min(struct queue_limits *limits, unsigned int min) |
389 | { | 389 | { |
390 | limits->io_min = min; | 390 | limits->io_min = min; |
391 | 391 | ||
392 | if (limits->io_min < limits->logical_block_size) | 392 | if (limits->io_min < limits->logical_block_size) |
393 | limits->io_min = limits->logical_block_size; | 393 | limits->io_min = limits->logical_block_size; |
394 | 394 | ||
395 | if (limits->io_min < limits->physical_block_size) | 395 | if (limits->io_min < limits->physical_block_size) |
396 | limits->io_min = limits->physical_block_size; | 396 | limits->io_min = limits->physical_block_size; |
397 | } | 397 | } |
398 | EXPORT_SYMBOL(blk_limits_io_min); | 398 | EXPORT_SYMBOL(blk_limits_io_min); |
399 | 399 | ||
400 | /** | 400 | /** |
401 | * blk_queue_io_min - set minimum request size for the queue | 401 | * blk_queue_io_min - set minimum request size for the queue |
402 | * @q: the request queue for the device | 402 | * @q: the request queue for the device |
403 | * @min: smallest I/O size in bytes | 403 | * @min: smallest I/O size in bytes |
404 | * | 404 | * |
405 | * Description: | 405 | * Description: |
406 | * Storage devices may report a granularity or preferred minimum I/O | 406 | * Storage devices may report a granularity or preferred minimum I/O |
407 | * size which is the smallest request the device can perform without | 407 | * size which is the smallest request the device can perform without |
408 | * incurring a performance penalty. For disk drives this is often the | 408 | * incurring a performance penalty. For disk drives this is often the |
409 | * physical block size. For RAID arrays it is often the stripe chunk | 409 | * physical block size. For RAID arrays it is often the stripe chunk |
410 | * size. A properly aligned multiple of minimum_io_size is the | 410 | * size. A properly aligned multiple of minimum_io_size is the |
411 | * preferred request size for workloads where a high number of I/O | 411 | * preferred request size for workloads where a high number of I/O |
412 | * operations is desired. | 412 | * operations is desired. |
413 | */ | 413 | */ |
414 | void blk_queue_io_min(struct request_queue *q, unsigned int min) | 414 | void blk_queue_io_min(struct request_queue *q, unsigned int min) |
415 | { | 415 | { |
416 | blk_limits_io_min(&q->limits, min); | 416 | blk_limits_io_min(&q->limits, min); |
417 | } | 417 | } |
418 | EXPORT_SYMBOL(blk_queue_io_min); | 418 | EXPORT_SYMBOL(blk_queue_io_min); |
419 | 419 | ||
420 | /** | 420 | /** |
421 | * blk_limits_io_opt - set optimal request size for a device | 421 | * blk_limits_io_opt - set optimal request size for a device |
422 | * @limits: the queue limits | 422 | * @limits: the queue limits |
423 | * @opt: smallest I/O size in bytes | 423 | * @opt: smallest I/O size in bytes |
424 | * | 424 | * |
425 | * Description: | 425 | * Description: |
426 | * Storage devices may report an optimal I/O size, which is the | 426 | * Storage devices may report an optimal I/O size, which is the |
427 | * device's preferred unit for sustained I/O. This is rarely reported | 427 | * device's preferred unit for sustained I/O. This is rarely reported |
428 | * for disk drives. For RAID arrays it is usually the stripe width or | 428 | * for disk drives. For RAID arrays it is usually the stripe width or |
429 | * the internal track size. A properly aligned multiple of | 429 | * the internal track size. A properly aligned multiple of |
430 | * optimal_io_size is the preferred request size for workloads where | 430 | * optimal_io_size is the preferred request size for workloads where |
431 | * sustained throughput is desired. | 431 | * sustained throughput is desired. |
432 | */ | 432 | */ |
433 | void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt) | 433 | void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt) |
434 | { | 434 | { |
435 | limits->io_opt = opt; | 435 | limits->io_opt = opt; |
436 | } | 436 | } |
437 | EXPORT_SYMBOL(blk_limits_io_opt); | 437 | EXPORT_SYMBOL(blk_limits_io_opt); |
438 | 438 | ||
439 | /** | 439 | /** |
440 | * blk_queue_io_opt - set optimal request size for the queue | 440 | * blk_queue_io_opt - set optimal request size for the queue |
441 | * @q: the request queue for the device | 441 | * @q: the request queue for the device |
442 | * @opt: optimal request size in bytes | 442 | * @opt: optimal request size in bytes |
443 | * | 443 | * |
444 | * Description: | 444 | * Description: |
445 | * Storage devices may report an optimal I/O size, which is the | 445 | * Storage devices may report an optimal I/O size, which is the |
446 | * device's preferred unit for sustained I/O. This is rarely reported | 446 | * device's preferred unit for sustained I/O. This is rarely reported |
447 | * for disk drives. For RAID arrays it is usually the stripe width or | 447 | * for disk drives. For RAID arrays it is usually the stripe width or |
448 | * the internal track size. A properly aligned multiple of | 448 | * the internal track size. A properly aligned multiple of |
449 | * optimal_io_size is the preferred request size for workloads where | 449 | * optimal_io_size is the preferred request size for workloads where |
450 | * sustained throughput is desired. | 450 | * sustained throughput is desired. |
451 | */ | 451 | */ |
452 | void blk_queue_io_opt(struct request_queue *q, unsigned int opt) | 452 | void blk_queue_io_opt(struct request_queue *q, unsigned int opt) |
453 | { | 453 | { |
454 | blk_limits_io_opt(&q->limits, opt); | 454 | blk_limits_io_opt(&q->limits, opt); |
455 | } | 455 | } |
456 | EXPORT_SYMBOL(blk_queue_io_opt); | 456 | EXPORT_SYMBOL(blk_queue_io_opt); |
457 | 457 | ||
458 | /* | 458 | /* |
459 | * Returns the minimum that is _not_ zero, unless both are zero. | 459 | * Returns the minimum that is _not_ zero, unless both are zero. |
460 | */ | 460 | */ |
461 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 461 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
462 | 462 | ||
463 | /** | 463 | /** |
464 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers | 464 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers |
465 | * @t: the stacking driver (top) | 465 | * @t: the stacking driver (top) |
466 | * @b: the underlying device (bottom) | 466 | * @b: the underlying device (bottom) |
467 | **/ | 467 | **/ |
468 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | 468 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) |
469 | { | 469 | { |
470 | blk_stack_limits(&t->limits, &b->limits, 0); | 470 | blk_stack_limits(&t->limits, &b->limits, 0); |
471 | 471 | ||
472 | if (!t->queue_lock) | 472 | if (!t->queue_lock) |
473 | WARN_ON_ONCE(1); | 473 | WARN_ON_ONCE(1); |
474 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { | 474 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { |
475 | unsigned long flags; | 475 | unsigned long flags; |
476 | spin_lock_irqsave(t->queue_lock, flags); | 476 | spin_lock_irqsave(t->queue_lock, flags); |
477 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); | 477 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); |
478 | spin_unlock_irqrestore(t->queue_lock, flags); | 478 | spin_unlock_irqrestore(t->queue_lock, flags); |
479 | } | 479 | } |
480 | } | 480 | } |
481 | EXPORT_SYMBOL(blk_queue_stack_limits); | 481 | EXPORT_SYMBOL(blk_queue_stack_limits); |
482 | 482 | ||
483 | /** | 483 | /** |
484 | * blk_stack_limits - adjust queue_limits for stacked devices | 484 | * blk_stack_limits - adjust queue_limits for stacked devices |
485 | * @t: the stacking driver limits (top device) | 485 | * @t: the stacking driver limits (top device) |
486 | * @b: the underlying queue limits (bottom, component device) | 486 | * @b: the underlying queue limits (bottom, component device) |
487 | * @start: first data sector within component device | 487 | * @start: first data sector within component device |
488 | * | 488 | * |
489 | * Description: | 489 | * Description: |
490 | * This function is used by stacking drivers like MD and DM to ensure | 490 | * This function is used by stacking drivers like MD and DM to ensure |
491 | * that all component devices have compatible block sizes and | 491 | * that all component devices have compatible block sizes and |
492 | * alignments. The stacking driver must provide a queue_limits | 492 | * alignments. The stacking driver must provide a queue_limits |
493 | * struct (top) and then iteratively call the stacking function for | 493 | * struct (top) and then iteratively call the stacking function for |
494 | * all component (bottom) devices. The stacking function will | 494 | * all component (bottom) devices. The stacking function will |
495 | * attempt to combine the values and ensure proper alignment. | 495 | * attempt to combine the values and ensure proper alignment. |
496 | * | 496 | * |
497 | * Returns 0 if the top and bottom queue_limits are compatible. The | 497 | * Returns 0 if the top and bottom queue_limits are compatible. The |
498 | * top device's block sizes and alignment offsets may be adjusted to | 498 | * top device's block sizes and alignment offsets may be adjusted to |
499 | * ensure alignment with the bottom device. If no compatible sizes | 499 | * ensure alignment with the bottom device. If no compatible sizes |
500 | * and alignments exist, -1 is returned and the resulting top | 500 | * and alignments exist, -1 is returned and the resulting top |
501 | * queue_limits will have the misaligned flag set to indicate that | 501 | * queue_limits will have the misaligned flag set to indicate that |
502 | * the alignment_offset is undefined. | 502 | * the alignment_offset is undefined. |
503 | */ | 503 | */ |
504 | int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | 504 | int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, |
505 | sector_t start) | 505 | sector_t start) |
506 | { | 506 | { |
507 | unsigned int top, bottom, alignment, ret = 0; | 507 | unsigned int top, bottom, alignment, ret = 0; |
508 | 508 | ||
509 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); | 509 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); |
510 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); | 510 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); |
511 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); | 511 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); |
512 | 512 | ||
513 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, | 513 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, |
514 | b->seg_boundary_mask); | 514 | b->seg_boundary_mask); |
515 | 515 | ||
516 | t->max_segments = min_not_zero(t->max_segments, b->max_segments); | 516 | t->max_segments = min_not_zero(t->max_segments, b->max_segments); |
517 | 517 | ||
518 | t->max_segment_size = min_not_zero(t->max_segment_size, | 518 | t->max_segment_size = min_not_zero(t->max_segment_size, |
519 | b->max_segment_size); | 519 | b->max_segment_size); |
520 | 520 | ||
521 | t->misaligned |= b->misaligned; | 521 | t->misaligned |= b->misaligned; |
522 | 522 | ||
523 | alignment = queue_limit_alignment_offset(b, start); | 523 | alignment = queue_limit_alignment_offset(b, start); |
524 | 524 | ||
525 | /* Bottom device has different alignment. Check that it is | 525 | /* Bottom device has different alignment. Check that it is |
526 | * compatible with the current top alignment. | 526 | * compatible with the current top alignment. |
527 | */ | 527 | */ |
528 | if (t->alignment_offset != alignment) { | 528 | if (t->alignment_offset != alignment) { |
529 | 529 | ||
530 | top = max(t->physical_block_size, t->io_min) | 530 | top = max(t->physical_block_size, t->io_min) |
531 | + t->alignment_offset; | 531 | + t->alignment_offset; |
532 | bottom = max(b->physical_block_size, b->io_min) + alignment; | 532 | bottom = max(b->physical_block_size, b->io_min) + alignment; |
533 | 533 | ||
534 | /* Verify that top and bottom intervals line up */ | 534 | /* Verify that top and bottom intervals line up */ |
535 | if (max(top, bottom) & (min(top, bottom) - 1)) { | 535 | if (max(top, bottom) & (min(top, bottom) - 1)) { |
536 | t->misaligned = 1; | 536 | t->misaligned = 1; |
537 | ret = -1; | 537 | ret = -1; |
538 | } | 538 | } |
539 | } | 539 | } |
540 | 540 | ||
541 | t->logical_block_size = max(t->logical_block_size, | 541 | t->logical_block_size = max(t->logical_block_size, |
542 | b->logical_block_size); | 542 | b->logical_block_size); |
543 | 543 | ||
544 | t->physical_block_size = max(t->physical_block_size, | 544 | t->physical_block_size = max(t->physical_block_size, |
545 | b->physical_block_size); | 545 | b->physical_block_size); |
546 | 546 | ||
547 | t->io_min = max(t->io_min, b->io_min); | 547 | t->io_min = max(t->io_min, b->io_min); |
548 | t->io_opt = lcm(t->io_opt, b->io_opt); | 548 | t->io_opt = lcm(t->io_opt, b->io_opt); |
549 | 549 | ||
550 | t->no_cluster |= b->no_cluster; | 550 | t->no_cluster |= b->no_cluster; |
551 | t->discard_zeroes_data &= b->discard_zeroes_data; | 551 | t->discard_zeroes_data &= b->discard_zeroes_data; |
552 | 552 | ||
553 | /* Physical block size a multiple of the logical block size? */ | 553 | /* Physical block size a multiple of the logical block size? */ |
554 | if (t->physical_block_size & (t->logical_block_size - 1)) { | 554 | if (t->physical_block_size & (t->logical_block_size - 1)) { |
555 | t->physical_block_size = t->logical_block_size; | 555 | t->physical_block_size = t->logical_block_size; |
556 | t->misaligned = 1; | 556 | t->misaligned = 1; |
557 | ret = -1; | 557 | ret = -1; |
558 | } | 558 | } |
559 | 559 | ||
560 | /* Minimum I/O a multiple of the physical block size? */ | 560 | /* Minimum I/O a multiple of the physical block size? */ |
561 | if (t->io_min & (t->physical_block_size - 1)) { | 561 | if (t->io_min & (t->physical_block_size - 1)) { |
562 | t->io_min = t->physical_block_size; | 562 | t->io_min = t->physical_block_size; |
563 | t->misaligned = 1; | 563 | t->misaligned = 1; |
564 | ret = -1; | 564 | ret = -1; |
565 | } | 565 | } |
566 | 566 | ||
567 | /* Optimal I/O a multiple of the physical block size? */ | 567 | /* Optimal I/O a multiple of the physical block size? */ |
568 | if (t->io_opt & (t->physical_block_size - 1)) { | 568 | if (t->io_opt & (t->physical_block_size - 1)) { |
569 | t->io_opt = 0; | 569 | t->io_opt = 0; |
570 | t->misaligned = 1; | 570 | t->misaligned = 1; |
571 | ret = -1; | 571 | ret = -1; |
572 | } | 572 | } |
573 | 573 | ||
574 | /* Find lowest common alignment_offset */ | 574 | /* Find lowest common alignment_offset */ |
575 | t->alignment_offset = lcm(t->alignment_offset, alignment) | 575 | t->alignment_offset = lcm(t->alignment_offset, alignment) |
576 | & (max(t->physical_block_size, t->io_min) - 1); | 576 | & (max(t->physical_block_size, t->io_min) - 1); |
577 | 577 | ||
578 | /* Verify that new alignment_offset is on a logical block boundary */ | 578 | /* Verify that new alignment_offset is on a logical block boundary */ |
579 | if (t->alignment_offset & (t->logical_block_size - 1)) { | 579 | if (t->alignment_offset & (t->logical_block_size - 1)) { |
580 | t->misaligned = 1; | 580 | t->misaligned = 1; |
581 | ret = -1; | 581 | ret = -1; |
582 | } | 582 | } |
583 | 583 | ||
584 | /* Discard alignment and granularity */ | 584 | /* Discard alignment and granularity */ |
585 | if (b->discard_granularity) { | 585 | if (b->discard_granularity) { |
586 | alignment = queue_limit_discard_alignment(b, start); | 586 | alignment = queue_limit_discard_alignment(b, start); |
587 | 587 | ||
588 | if (t->discard_granularity != 0 && | 588 | if (t->discard_granularity != 0 && |
589 | t->discard_alignment != alignment) { | 589 | t->discard_alignment != alignment) { |
590 | top = t->discard_granularity + t->discard_alignment; | 590 | top = t->discard_granularity + t->discard_alignment; |
591 | bottom = b->discard_granularity + alignment; | 591 | bottom = b->discard_granularity + alignment; |
592 | 592 | ||
593 | /* Verify that top and bottom intervals line up */ | 593 | /* Verify that top and bottom intervals line up */ |
594 | if (max(top, bottom) & (min(top, bottom) - 1)) | 594 | if (max(top, bottom) & (min(top, bottom) - 1)) |
595 | t->discard_misaligned = 1; | 595 | t->discard_misaligned = 1; |
596 | } | 596 | } |
597 | 597 | ||
598 | t->max_discard_sectors = min_not_zero(t->max_discard_sectors, | 598 | t->max_discard_sectors = min_not_zero(t->max_discard_sectors, |
599 | b->max_discard_sectors); | 599 | b->max_discard_sectors); |
600 | t->discard_granularity = max(t->discard_granularity, | 600 | t->discard_granularity = max(t->discard_granularity, |
601 | b->discard_granularity); | 601 | b->discard_granularity); |
602 | t->discard_alignment = lcm(t->discard_alignment, alignment) & | 602 | t->discard_alignment = lcm(t->discard_alignment, alignment) & |
603 | (t->discard_granularity - 1); | 603 | (t->discard_granularity - 1); |
604 | } | 604 | } |
605 | 605 | ||
606 | return ret; | 606 | return ret; |
607 | } | 607 | } |
608 | EXPORT_SYMBOL(blk_stack_limits); | 608 | EXPORT_SYMBOL(blk_stack_limits); |
609 | 609 | ||
610 | /** | 610 | /** |
611 | * bdev_stack_limits - adjust queue limits for stacked drivers | 611 | * bdev_stack_limits - adjust queue limits for stacked drivers |
612 | * @t: the stacking driver limits (top device) | 612 | * @t: the stacking driver limits (top device) |
613 | * @bdev: the component block_device (bottom) | 613 | * @bdev: the component block_device (bottom) |
614 | * @start: first data sector within component device | 614 | * @start: first data sector within component device |
615 | * | 615 | * |
616 | * Description: | 616 | * Description: |
617 | * Merges queue limits for a top device and a block_device. Returns | 617 | * Merges queue limits for a top device and a block_device. Returns |
618 | * 0 if alignment didn't change. Returns -1 if adding the bottom | 618 | * 0 if alignment didn't change. Returns -1 if adding the bottom |
619 | * device caused misalignment. | 619 | * device caused misalignment. |
620 | */ | 620 | */ |
621 | int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, | 621 | int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, |
622 | sector_t start) | 622 | sector_t start) |
623 | { | 623 | { |
624 | struct request_queue *bq = bdev_get_queue(bdev); | 624 | struct request_queue *bq = bdev_get_queue(bdev); |
625 | 625 | ||
626 | start += get_start_sect(bdev); | 626 | start += get_start_sect(bdev); |
627 | 627 | ||
628 | return blk_stack_limits(t, &bq->limits, start); | 628 | return blk_stack_limits(t, &bq->limits, start); |
629 | } | 629 | } |
630 | EXPORT_SYMBOL(bdev_stack_limits); | 630 | EXPORT_SYMBOL(bdev_stack_limits); |
631 | 631 | ||
632 | /** | 632 | /** |
633 | * disk_stack_limits - adjust queue limits for stacked drivers | 633 | * disk_stack_limits - adjust queue limits for stacked drivers |
634 | * @disk: MD/DM gendisk (top) | 634 | * @disk: MD/DM gendisk (top) |
635 | * @bdev: the underlying block device (bottom) | 635 | * @bdev: the underlying block device (bottom) |
636 | * @offset: offset to beginning of data within component device | 636 | * @offset: offset to beginning of data within component device |
637 | * | 637 | * |
638 | * Description: | 638 | * Description: |
639 | * Merges the limits for a top level gendisk and a bottom level | 639 | * Merges the limits for a top level gendisk and a bottom level |
640 | * block_device. | 640 | * block_device. |
641 | */ | 641 | */ |
642 | void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | 642 | void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, |
643 | sector_t offset) | 643 | sector_t offset) |
644 | { | 644 | { |
645 | struct request_queue *t = disk->queue; | 645 | struct request_queue *t = disk->queue; |
646 | struct request_queue *b = bdev_get_queue(bdev); | 646 | struct request_queue *b = bdev_get_queue(bdev); |
647 | 647 | ||
648 | if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { | 648 | if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { |
649 | char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; | 649 | char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; |
650 | 650 | ||
651 | disk_name(disk, 0, top); | 651 | disk_name(disk, 0, top); |
652 | bdevname(bdev, bottom); | 652 | bdevname(bdev, bottom); |
653 | 653 | ||
654 | printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", | 654 | printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", |
655 | top, bottom); | 655 | top, bottom); |
656 | } | 656 | } |
657 | 657 | ||
658 | if (!t->queue_lock) | 658 | if (!t->queue_lock) |
659 | WARN_ON_ONCE(1); | 659 | WARN_ON_ONCE(1); |
660 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { | 660 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { |
661 | unsigned long flags; | 661 | unsigned long flags; |
662 | 662 | ||
663 | spin_lock_irqsave(t->queue_lock, flags); | 663 | spin_lock_irqsave(t->queue_lock, flags); |
664 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | 664 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) |
665 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); | 665 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); |
666 | spin_unlock_irqrestore(t->queue_lock, flags); | 666 | spin_unlock_irqrestore(t->queue_lock, flags); |
667 | } | 667 | } |
668 | } | 668 | } |
669 | EXPORT_SYMBOL(disk_stack_limits); | 669 | EXPORT_SYMBOL(disk_stack_limits); |
670 | 670 | ||
671 | /** | 671 | /** |
672 | * blk_queue_dma_pad - set pad mask | 672 | * blk_queue_dma_pad - set pad mask |
673 | * @q: the request queue for the device | 673 | * @q: the request queue for the device |
674 | * @mask: pad mask | 674 | * @mask: pad mask |
675 | * | 675 | * |
676 | * Set dma pad mask. | 676 | * Set dma pad mask. |
677 | * | 677 | * |
678 | * Appending pad buffer to a request modifies the last entry of a | 678 | * Appending pad buffer to a request modifies the last entry of a |
679 | * scatter list such that it includes the pad buffer. | 679 | * scatter list such that it includes the pad buffer. |
680 | **/ | 680 | **/ |
681 | void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) | 681 | void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) |
682 | { | 682 | { |
683 | q->dma_pad_mask = mask; | 683 | q->dma_pad_mask = mask; |
684 | } | 684 | } |
685 | EXPORT_SYMBOL(blk_queue_dma_pad); | 685 | EXPORT_SYMBOL(blk_queue_dma_pad); |
686 | 686 | ||
687 | /** | 687 | /** |
688 | * blk_queue_update_dma_pad - update pad mask | 688 | * blk_queue_update_dma_pad - update pad mask |
689 | * @q: the request queue for the device | 689 | * @q: the request queue for the device |
690 | * @mask: pad mask | 690 | * @mask: pad mask |
691 | * | 691 | * |
692 | * Update dma pad mask. | 692 | * Update dma pad mask. |
693 | * | 693 | * |
694 | * Appending pad buffer to a request modifies the last entry of a | 694 | * Appending pad buffer to a request modifies the last entry of a |
695 | * scatter list such that it includes the pad buffer. | 695 | * scatter list such that it includes the pad buffer. |
696 | **/ | 696 | **/ |
697 | void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) | 697 | void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) |
698 | { | 698 | { |
699 | if (mask > q->dma_pad_mask) | 699 | if (mask > q->dma_pad_mask) |
700 | q->dma_pad_mask = mask; | 700 | q->dma_pad_mask = mask; |
701 | } | 701 | } |
702 | EXPORT_SYMBOL(blk_queue_update_dma_pad); | 702 | EXPORT_SYMBOL(blk_queue_update_dma_pad); |
703 | 703 | ||
704 | /** | 704 | /** |
705 | * blk_queue_dma_drain - Set up a drain buffer for excess dma. | 705 | * blk_queue_dma_drain - Set up a drain buffer for excess dma. |
706 | * @q: the request queue for the device | 706 | * @q: the request queue for the device |
707 | * @dma_drain_needed: fn which returns non-zero if drain is necessary | 707 | * @dma_drain_needed: fn which returns non-zero if drain is necessary |
708 | * @buf: physically contiguous buffer | 708 | * @buf: physically contiguous buffer |
709 | * @size: size of the buffer in bytes | 709 | * @size: size of the buffer in bytes |
710 | * | 710 | * |
711 | * Some devices have excess DMA problems and can't simply discard (or | 711 | * Some devices have excess DMA problems and can't simply discard (or |
712 | * zero fill) the unwanted piece of the transfer. They have to have a | 712 | * zero fill) the unwanted piece of the transfer. They have to have a |
713 | * real area of memory to transfer it into. The use case for this is | 713 | * real area of memory to transfer it into. The use case for this is |
714 | * ATAPI devices in DMA mode. If the packet command causes a transfer | 714 | * ATAPI devices in DMA mode. If the packet command causes a transfer |
715 | * bigger than the transfer size some HBAs will lock up if there | 715 | * bigger than the transfer size some HBAs will lock up if there |
716 | * aren't DMA elements to contain the excess transfer. What this API | 716 | * aren't DMA elements to contain the excess transfer. What this API |
717 | * does is adjust the queue so that the buf is always appended | 717 | * does is adjust the queue so that the buf is always appended |
718 | * silently to the scatterlist. | 718 | * silently to the scatterlist. |
719 | * | 719 | * |
720 | * Note: This routine adjusts max_hw_segments to make room for appending | 720 | * Note: This routine adjusts max_hw_segments to make room for appending |
721 | * the drain buffer. If you call blk_queue_max_segments() after calling | 721 | * the drain buffer. If you call blk_queue_max_segments() after calling |
722 | * this routine, you must set the limit to one fewer than your device | 722 | * this routine, you must set the limit to one fewer than your device |
723 | * can support otherwise there won't be room for the drain buffer. | 723 | * can support otherwise there won't be room for the drain buffer. |
724 | */ | 724 | */ |
725 | int blk_queue_dma_drain(struct request_queue *q, | 725 | int blk_queue_dma_drain(struct request_queue *q, |
726 | dma_drain_needed_fn *dma_drain_needed, | 726 | dma_drain_needed_fn *dma_drain_needed, |
727 | void *buf, unsigned int size) | 727 | void *buf, unsigned int size) |
728 | { | 728 | { |
729 | if (queue_max_segments(q) < 2) | 729 | if (queue_max_segments(q) < 2) |
730 | return -EINVAL; | 730 | return -EINVAL; |
731 | /* make room for appending the drain */ | 731 | /* make room for appending the drain */ |
732 | blk_queue_max_segments(q, queue_max_segments(q) - 1); | 732 | blk_queue_max_segments(q, queue_max_segments(q) - 1); |
733 | q->dma_drain_needed = dma_drain_needed; | 733 | q->dma_drain_needed = dma_drain_needed; |
734 | q->dma_drain_buffer = buf; | 734 | q->dma_drain_buffer = buf; |
735 | q->dma_drain_size = size; | 735 | q->dma_drain_size = size; |
736 | 736 | ||
737 | return 0; | 737 | return 0; |
738 | } | 738 | } |
739 | EXPORT_SYMBOL_GPL(blk_queue_dma_drain); | 739 | EXPORT_SYMBOL_GPL(blk_queue_dma_drain); |
740 | 740 | ||
741 | /** | 741 | /** |
742 | * blk_queue_segment_boundary - set boundary rules for segment merging | 742 | * blk_queue_segment_boundary - set boundary rules for segment merging |
743 | * @q: the request queue for the device | 743 | * @q: the request queue for the device |
744 | * @mask: the memory boundary mask | 744 | * @mask: the memory boundary mask |
745 | **/ | 745 | **/ |
746 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) | 746 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) |
747 | { | 747 | { |
748 | if (mask < PAGE_CACHE_SIZE - 1) { | 748 | if (mask < PAGE_CACHE_SIZE - 1) { |
749 | mask = PAGE_CACHE_SIZE - 1; | 749 | mask = PAGE_CACHE_SIZE - 1; |
750 | printk(KERN_INFO "%s: set to minimum %lx\n", | 750 | printk(KERN_INFO "%s: set to minimum %lx\n", |
751 | __func__, mask); | 751 | __func__, mask); |
752 | } | 752 | } |
753 | 753 | ||
754 | q->limits.seg_boundary_mask = mask; | 754 | q->limits.seg_boundary_mask = mask; |
755 | } | 755 | } |
756 | EXPORT_SYMBOL(blk_queue_segment_boundary); | 756 | EXPORT_SYMBOL(blk_queue_segment_boundary); |
757 | 757 | ||
758 | /** | 758 | /** |
759 | * blk_queue_dma_alignment - set dma length and memory alignment | 759 | * blk_queue_dma_alignment - set dma length and memory alignment |
760 | * @q: the request queue for the device | 760 | * @q: the request queue for the device |
761 | * @mask: alignment mask | 761 | * @mask: alignment mask |
762 | * | 762 | * |
763 | * description: | 763 | * description: |
764 | * set required memory and length alignment for direct dma transactions. | 764 | * set required memory and length alignment for direct dma transactions. |
765 | * this is used when building direct io requests for the queue. | 765 | * this is used when building direct io requests for the queue. |
766 | * | 766 | * |
767 | **/ | 767 | **/ |
768 | void blk_queue_dma_alignment(struct request_queue *q, int mask) | 768 | void blk_queue_dma_alignment(struct request_queue *q, int mask) |
769 | { | 769 | { |
770 | q->dma_alignment = mask; | 770 | q->dma_alignment = mask; |
771 | } | 771 | } |
772 | EXPORT_SYMBOL(blk_queue_dma_alignment); | 772 | EXPORT_SYMBOL(blk_queue_dma_alignment); |
773 | 773 | ||
774 | /** | 774 | /** |
775 | * blk_queue_update_dma_alignment - update dma length and memory alignment | 775 | * blk_queue_update_dma_alignment - update dma length and memory alignment |
776 | * @q: the request queue for the device | 776 | * @q: the request queue for the device |
777 | * @mask: alignment mask | 777 | * @mask: alignment mask |
778 | * | 778 | * |
779 | * description: | 779 | * description: |
780 | * update required memory and length alignment for direct dma transactions. | 780 | * update required memory and length alignment for direct dma transactions. |
781 | * If the requested alignment is larger than the current alignment, then | 781 | * If the requested alignment is larger than the current alignment, then |
782 | * the current queue alignment is updated to the new value, otherwise it | 782 | * the current queue alignment is updated to the new value, otherwise it |
783 | * is left alone. The design of this is to allow multiple objects | 783 | * is left alone. The design of this is to allow multiple objects |
784 | * (driver, device, transport etc) to set their respective | 784 | * (driver, device, transport etc) to set their respective |
785 | * alignments without having them interfere. | 785 | * alignments without having them interfere. |
786 | * | 786 | * |
787 | **/ | 787 | **/ |
788 | void blk_queue_update_dma_alignment(struct request_queue *q, int mask) | 788 | void blk_queue_update_dma_alignment(struct request_queue *q, int mask) |
789 | { | 789 | { |
790 | BUG_ON(mask > PAGE_SIZE); | 790 | BUG_ON(mask > PAGE_SIZE); |
791 | 791 | ||
792 | if (mask > q->dma_alignment) | 792 | if (mask > q->dma_alignment) |
793 | q->dma_alignment = mask; | 793 | q->dma_alignment = mask; |
794 | } | 794 | } |
795 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); | 795 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); |
796 | 796 | ||
797 | /** | ||
798 | * blk_queue_flush - configure queue's cache flush capability | ||
799 | * @q: the request queue for the device | ||
800 | * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA | ||
801 | * | ||
802 | * Tell block layer cache flush capability of @q. If it supports | ||
803 | * flushing, REQ_FLUSH should be set. If it supports bypassing | ||
804 | * write cache for individual writes, REQ_FUA should be set. | ||
805 | */ | ||
806 | void blk_queue_flush(struct request_queue *q, unsigned int flush) | ||
807 | { | ||
808 | WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); | ||
809 | |||
810 | if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) | ||
811 | flush &= ~REQ_FUA; | ||
812 | |||
813 | q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); | ||
814 | } | ||
815 | EXPORT_SYMBOL_GPL(blk_queue_flush); | ||
816 | |||
797 | static int __init blk_settings_init(void) | 817 | static int __init blk_settings_init(void) |
798 | { | 818 | { |
799 | blk_max_low_pfn = max_low_pfn - 1; | 819 | blk_max_low_pfn = max_low_pfn - 1; |
800 | blk_max_pfn = max_pfn - 1; | 820 | blk_max_pfn = max_pfn - 1; |
801 | return 0; | 821 | return 0; |
802 | } | 822 | } |
803 | subsys_initcall(blk_settings_init); | 823 | subsys_initcall(blk_settings_init); |
804 | 824 |
drivers/block/brd.c
1 | /* | 1 | /* |
2 | * Ram backed block device driver. | 2 | * Ram backed block device driver. |
3 | * | 3 | * |
4 | * Copyright (C) 2007 Nick Piggin | 4 | * Copyright (C) 2007 Nick Piggin |
5 | * Copyright (C) 2007 Novell Inc. | 5 | * Copyright (C) 2007 Novell Inc. |
6 | * | 6 | * |
7 | * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright | 7 | * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright |
8 | * of their respective owners. | 8 | * of their respective owners. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/moduleparam.h> | 13 | #include <linux/moduleparam.h> |
14 | #include <linux/major.h> | 14 | #include <linux/major.h> |
15 | #include <linux/blkdev.h> | 15 | #include <linux/blkdev.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/highmem.h> | 17 | #include <linux/highmem.h> |
18 | #include <linux/smp_lock.h> | 18 | #include <linux/smp_lock.h> |
19 | #include <linux/radix-tree.h> | 19 | #include <linux/radix-tree.h> |
20 | #include <linux/buffer_head.h> /* invalidate_bh_lrus() */ | 20 | #include <linux/buffer_head.h> /* invalidate_bh_lrus() */ |
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | 22 | ||
23 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
24 | 24 | ||
25 | #define SECTOR_SHIFT 9 | 25 | #define SECTOR_SHIFT 9 |
26 | #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) | 26 | #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) |
27 | #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) | 27 | #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Each block ramdisk device has a radix_tree brd_pages of pages that stores | 30 | * Each block ramdisk device has a radix_tree brd_pages of pages that stores |
31 | * the pages containing the block device's contents. A brd page's ->index is | 31 | * the pages containing the block device's contents. A brd page's ->index is |
32 | * its offset in PAGE_SIZE units. This is similar to, but in no way connected | 32 | * its offset in PAGE_SIZE units. This is similar to, but in no way connected |
33 | * with, the kernel's pagecache or buffer cache (which sit above our block | 33 | * with, the kernel's pagecache or buffer cache (which sit above our block |
34 | * device). | 34 | * device). |
35 | */ | 35 | */ |
36 | struct brd_device { | 36 | struct brd_device { |
37 | int brd_number; | 37 | int brd_number; |
38 | int brd_refcnt; | 38 | int brd_refcnt; |
39 | loff_t brd_offset; | 39 | loff_t brd_offset; |
40 | loff_t brd_sizelimit; | 40 | loff_t brd_sizelimit; |
41 | unsigned brd_blocksize; | 41 | unsigned brd_blocksize; |
42 | 42 | ||
43 | struct request_queue *brd_queue; | 43 | struct request_queue *brd_queue; |
44 | struct gendisk *brd_disk; | 44 | struct gendisk *brd_disk; |
45 | struct list_head brd_list; | 45 | struct list_head brd_list; |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Backing store of pages and lock to protect it. This is the contents | 48 | * Backing store of pages and lock to protect it. This is the contents |
49 | * of the block device. | 49 | * of the block device. |
50 | */ | 50 | */ |
51 | spinlock_t brd_lock; | 51 | spinlock_t brd_lock; |
52 | struct radix_tree_root brd_pages; | 52 | struct radix_tree_root brd_pages; |
53 | }; | 53 | }; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * Look up and return a brd's page for a given sector. | 56 | * Look up and return a brd's page for a given sector. |
57 | */ | 57 | */ |
58 | static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) | 58 | static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) |
59 | { | 59 | { |
60 | pgoff_t idx; | 60 | pgoff_t idx; |
61 | struct page *page; | 61 | struct page *page; |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * The page lifetime is protected by the fact that we have opened the | 64 | * The page lifetime is protected by the fact that we have opened the |
65 | * device node -- brd pages will never be deleted under us, so we | 65 | * device node -- brd pages will never be deleted under us, so we |
66 | * don't need any further locking or refcounting. | 66 | * don't need any further locking or refcounting. |
67 | * | 67 | * |
68 | * This is strictly true for the radix-tree nodes as well (ie. we | 68 | * This is strictly true for the radix-tree nodes as well (ie. we |
69 | * don't actually need the rcu_read_lock()), however that is not a | 69 | * don't actually need the rcu_read_lock()), however that is not a |
70 | * documented feature of the radix-tree API so it is better to be | 70 | * documented feature of the radix-tree API so it is better to be |
71 | * safe here (we don't have total exclusion from radix tree updates | 71 | * safe here (we don't have total exclusion from radix tree updates |
72 | * here, only deletes). | 72 | * here, only deletes). |
73 | */ | 73 | */ |
74 | rcu_read_lock(); | 74 | rcu_read_lock(); |
75 | idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ | 75 | idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ |
76 | page = radix_tree_lookup(&brd->brd_pages, idx); | 76 | page = radix_tree_lookup(&brd->brd_pages, idx); |
77 | rcu_read_unlock(); | 77 | rcu_read_unlock(); |
78 | 78 | ||
79 | BUG_ON(page && page->index != idx); | 79 | BUG_ON(page && page->index != idx); |
80 | 80 | ||
81 | return page; | 81 | return page; |
82 | } | 82 | } |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Look up and return a brd's page for a given sector. | 85 | * Look up and return a brd's page for a given sector. |
86 | * If one does not exist, allocate an empty page, and insert that. Then | 86 | * If one does not exist, allocate an empty page, and insert that. Then |
87 | * return it. | 87 | * return it. |
88 | */ | 88 | */ |
89 | static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) | 89 | static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) |
90 | { | 90 | { |
91 | pgoff_t idx; | 91 | pgoff_t idx; |
92 | struct page *page; | 92 | struct page *page; |
93 | gfp_t gfp_flags; | 93 | gfp_t gfp_flags; |
94 | 94 | ||
95 | page = brd_lookup_page(brd, sector); | 95 | page = brd_lookup_page(brd, sector); |
96 | if (page) | 96 | if (page) |
97 | return page; | 97 | return page; |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * Must use NOIO because we don't want to recurse back into the | 100 | * Must use NOIO because we don't want to recurse back into the |
101 | * block or filesystem layers from page reclaim. | 101 | * block or filesystem layers from page reclaim. |
102 | * | 102 | * |
103 | * Cannot support XIP and highmem, because our ->direct_access | 103 | * Cannot support XIP and highmem, because our ->direct_access |
104 | * routine for XIP must return memory that is always addressable. | 104 | * routine for XIP must return memory that is always addressable. |
105 | * If XIP was reworked to use pfns and kmap throughout, this | 105 | * If XIP was reworked to use pfns and kmap throughout, this |
106 | * restriction might be able to be lifted. | 106 | * restriction might be able to be lifted. |
107 | */ | 107 | */ |
108 | gfp_flags = GFP_NOIO | __GFP_ZERO; | 108 | gfp_flags = GFP_NOIO | __GFP_ZERO; |
109 | #ifndef CONFIG_BLK_DEV_XIP | 109 | #ifndef CONFIG_BLK_DEV_XIP |
110 | gfp_flags |= __GFP_HIGHMEM; | 110 | gfp_flags |= __GFP_HIGHMEM; |
111 | #endif | 111 | #endif |
112 | page = alloc_page(gfp_flags); | 112 | page = alloc_page(gfp_flags); |
113 | if (!page) | 113 | if (!page) |
114 | return NULL; | 114 | return NULL; |
115 | 115 | ||
116 | if (radix_tree_preload(GFP_NOIO)) { | 116 | if (radix_tree_preload(GFP_NOIO)) { |
117 | __free_page(page); | 117 | __free_page(page); |
118 | return NULL; | 118 | return NULL; |
119 | } | 119 | } |
120 | 120 | ||
121 | spin_lock(&brd->brd_lock); | 121 | spin_lock(&brd->brd_lock); |
122 | idx = sector >> PAGE_SECTORS_SHIFT; | 122 | idx = sector >> PAGE_SECTORS_SHIFT; |
123 | if (radix_tree_insert(&brd->brd_pages, idx, page)) { | 123 | if (radix_tree_insert(&brd->brd_pages, idx, page)) { |
124 | __free_page(page); | 124 | __free_page(page); |
125 | page = radix_tree_lookup(&brd->brd_pages, idx); | 125 | page = radix_tree_lookup(&brd->brd_pages, idx); |
126 | BUG_ON(!page); | 126 | BUG_ON(!page); |
127 | BUG_ON(page->index != idx); | 127 | BUG_ON(page->index != idx); |
128 | } else | 128 | } else |
129 | page->index = idx; | 129 | page->index = idx; |
130 | spin_unlock(&brd->brd_lock); | 130 | spin_unlock(&brd->brd_lock); |
131 | 131 | ||
132 | radix_tree_preload_end(); | 132 | radix_tree_preload_end(); |
133 | 133 | ||
134 | return page; | 134 | return page; |
135 | } | 135 | } |
136 | 136 | ||
137 | static void brd_free_page(struct brd_device *brd, sector_t sector) | 137 | static void brd_free_page(struct brd_device *brd, sector_t sector) |
138 | { | 138 | { |
139 | struct page *page; | 139 | struct page *page; |
140 | pgoff_t idx; | 140 | pgoff_t idx; |
141 | 141 | ||
142 | spin_lock(&brd->brd_lock); | 142 | spin_lock(&brd->brd_lock); |
143 | idx = sector >> PAGE_SECTORS_SHIFT; | 143 | idx = sector >> PAGE_SECTORS_SHIFT; |
144 | page = radix_tree_delete(&brd->brd_pages, idx); | 144 | page = radix_tree_delete(&brd->brd_pages, idx); |
145 | spin_unlock(&brd->brd_lock); | 145 | spin_unlock(&brd->brd_lock); |
146 | if (page) | 146 | if (page) |
147 | __free_page(page); | 147 | __free_page(page); |
148 | } | 148 | } |
149 | 149 | ||
150 | static void brd_zero_page(struct brd_device *brd, sector_t sector) | 150 | static void brd_zero_page(struct brd_device *brd, sector_t sector) |
151 | { | 151 | { |
152 | struct page *page; | 152 | struct page *page; |
153 | 153 | ||
154 | page = brd_lookup_page(brd, sector); | 154 | page = brd_lookup_page(brd, sector); |
155 | if (page) | 155 | if (page) |
156 | clear_highpage(page); | 156 | clear_highpage(page); |
157 | } | 157 | } |
158 | 158 | ||
159 | /* | 159 | /* |
160 | * Free all backing store pages and radix tree. This must only be called when | 160 | * Free all backing store pages and radix tree. This must only be called when |
161 | * there are no other users of the device. | 161 | * there are no other users of the device. |
162 | */ | 162 | */ |
163 | #define FREE_BATCH 16 | 163 | #define FREE_BATCH 16 |
164 | static void brd_free_pages(struct brd_device *brd) | 164 | static void brd_free_pages(struct brd_device *brd) |
165 | { | 165 | { |
166 | unsigned long pos = 0; | 166 | unsigned long pos = 0; |
167 | struct page *pages[FREE_BATCH]; | 167 | struct page *pages[FREE_BATCH]; |
168 | int nr_pages; | 168 | int nr_pages; |
169 | 169 | ||
170 | do { | 170 | do { |
171 | int i; | 171 | int i; |
172 | 172 | ||
173 | nr_pages = radix_tree_gang_lookup(&brd->brd_pages, | 173 | nr_pages = radix_tree_gang_lookup(&brd->brd_pages, |
174 | (void **)pages, pos, FREE_BATCH); | 174 | (void **)pages, pos, FREE_BATCH); |
175 | 175 | ||
176 | for (i = 0; i < nr_pages; i++) { | 176 | for (i = 0; i < nr_pages; i++) { |
177 | void *ret; | 177 | void *ret; |
178 | 178 | ||
179 | BUG_ON(pages[i]->index < pos); | 179 | BUG_ON(pages[i]->index < pos); |
180 | pos = pages[i]->index; | 180 | pos = pages[i]->index; |
181 | ret = radix_tree_delete(&brd->brd_pages, pos); | 181 | ret = radix_tree_delete(&brd->brd_pages, pos); |
182 | BUG_ON(!ret || ret != pages[i]); | 182 | BUG_ON(!ret || ret != pages[i]); |
183 | __free_page(pages[i]); | 183 | __free_page(pages[i]); |
184 | } | 184 | } |
185 | 185 | ||
186 | pos++; | 186 | pos++; |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * This assumes radix_tree_gang_lookup always returns as | 189 | * This assumes radix_tree_gang_lookup always returns as |
190 | * many pages as possible. If the radix-tree code changes, | 190 | * many pages as possible. If the radix-tree code changes, |
191 | * so will this have to. | 191 | * so will this have to. |
192 | */ | 192 | */ |
193 | } while (nr_pages == FREE_BATCH); | 193 | } while (nr_pages == FREE_BATCH); |
194 | } | 194 | } |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * copy_to_brd_setup must be called before copy_to_brd. It may sleep. | 197 | * copy_to_brd_setup must be called before copy_to_brd. It may sleep. |
198 | */ | 198 | */ |
199 | static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) | 199 | static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) |
200 | { | 200 | { |
201 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | 201 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; |
202 | size_t copy; | 202 | size_t copy; |
203 | 203 | ||
204 | copy = min_t(size_t, n, PAGE_SIZE - offset); | 204 | copy = min_t(size_t, n, PAGE_SIZE - offset); |
205 | if (!brd_insert_page(brd, sector)) | 205 | if (!brd_insert_page(brd, sector)) |
206 | return -ENOMEM; | 206 | return -ENOMEM; |
207 | if (copy < n) { | 207 | if (copy < n) { |
208 | sector += copy >> SECTOR_SHIFT; | 208 | sector += copy >> SECTOR_SHIFT; |
209 | if (!brd_insert_page(brd, sector)) | 209 | if (!brd_insert_page(brd, sector)) |
210 | return -ENOMEM; | 210 | return -ENOMEM; |
211 | } | 211 | } |
212 | return 0; | 212 | return 0; |
213 | } | 213 | } |
214 | 214 | ||
215 | static void discard_from_brd(struct brd_device *brd, | 215 | static void discard_from_brd(struct brd_device *brd, |
216 | sector_t sector, size_t n) | 216 | sector_t sector, size_t n) |
217 | { | 217 | { |
218 | while (n >= PAGE_SIZE) { | 218 | while (n >= PAGE_SIZE) { |
219 | /* | 219 | /* |
220 | * Don't want to actually discard pages here because | 220 | * Don't want to actually discard pages here because |
221 | * re-allocating the pages can result in writeback | 221 | * re-allocating the pages can result in writeback |
222 | * deadlocks under heavy load. | 222 | * deadlocks under heavy load. |
223 | */ | 223 | */ |
224 | if (0) | 224 | if (0) |
225 | brd_free_page(brd, sector); | 225 | brd_free_page(brd, sector); |
226 | else | 226 | else |
227 | brd_zero_page(brd, sector); | 227 | brd_zero_page(brd, sector); |
228 | sector += PAGE_SIZE >> SECTOR_SHIFT; | 228 | sector += PAGE_SIZE >> SECTOR_SHIFT; |
229 | n -= PAGE_SIZE; | 229 | n -= PAGE_SIZE; |
230 | } | 230 | } |
231 | } | 231 | } |
232 | 232 | ||
233 | /* | 233 | /* |
234 | * Copy n bytes from src to the brd starting at sector. Does not sleep. | 234 | * Copy n bytes from src to the brd starting at sector. Does not sleep. |
235 | */ | 235 | */ |
236 | static void copy_to_brd(struct brd_device *brd, const void *src, | 236 | static void copy_to_brd(struct brd_device *brd, const void *src, |
237 | sector_t sector, size_t n) | 237 | sector_t sector, size_t n) |
238 | { | 238 | { |
239 | struct page *page; | 239 | struct page *page; |
240 | void *dst; | 240 | void *dst; |
241 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | 241 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; |
242 | size_t copy; | 242 | size_t copy; |
243 | 243 | ||
244 | copy = min_t(size_t, n, PAGE_SIZE - offset); | 244 | copy = min_t(size_t, n, PAGE_SIZE - offset); |
245 | page = brd_lookup_page(brd, sector); | 245 | page = brd_lookup_page(brd, sector); |
246 | BUG_ON(!page); | 246 | BUG_ON(!page); |
247 | 247 | ||
248 | dst = kmap_atomic(page, KM_USER1); | 248 | dst = kmap_atomic(page, KM_USER1); |
249 | memcpy(dst + offset, src, copy); | 249 | memcpy(dst + offset, src, copy); |
250 | kunmap_atomic(dst, KM_USER1); | 250 | kunmap_atomic(dst, KM_USER1); |
251 | 251 | ||
252 | if (copy < n) { | 252 | if (copy < n) { |
253 | src += copy; | 253 | src += copy; |
254 | sector += copy >> SECTOR_SHIFT; | 254 | sector += copy >> SECTOR_SHIFT; |
255 | copy = n - copy; | 255 | copy = n - copy; |
256 | page = brd_lookup_page(brd, sector); | 256 | page = brd_lookup_page(brd, sector); |
257 | BUG_ON(!page); | 257 | BUG_ON(!page); |
258 | 258 | ||
259 | dst = kmap_atomic(page, KM_USER1); | 259 | dst = kmap_atomic(page, KM_USER1); |
260 | memcpy(dst, src, copy); | 260 | memcpy(dst, src, copy); |
261 | kunmap_atomic(dst, KM_USER1); | 261 | kunmap_atomic(dst, KM_USER1); |
262 | } | 262 | } |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * Copy n bytes to dst from the brd starting at sector. Does not sleep. | 266 | * Copy n bytes to dst from the brd starting at sector. Does not sleep. |
267 | */ | 267 | */ |
268 | static void copy_from_brd(void *dst, struct brd_device *brd, | 268 | static void copy_from_brd(void *dst, struct brd_device *brd, |
269 | sector_t sector, size_t n) | 269 | sector_t sector, size_t n) |
270 | { | 270 | { |
271 | struct page *page; | 271 | struct page *page; |
272 | void *src; | 272 | void *src; |
273 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | 273 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; |
274 | size_t copy; | 274 | size_t copy; |
275 | 275 | ||
276 | copy = min_t(size_t, n, PAGE_SIZE - offset); | 276 | copy = min_t(size_t, n, PAGE_SIZE - offset); |
277 | page = brd_lookup_page(brd, sector); | 277 | page = brd_lookup_page(brd, sector); |
278 | if (page) { | 278 | if (page) { |
279 | src = kmap_atomic(page, KM_USER1); | 279 | src = kmap_atomic(page, KM_USER1); |
280 | memcpy(dst, src + offset, copy); | 280 | memcpy(dst, src + offset, copy); |
281 | kunmap_atomic(src, KM_USER1); | 281 | kunmap_atomic(src, KM_USER1); |
282 | } else | 282 | } else |
283 | memset(dst, 0, copy); | 283 | memset(dst, 0, copy); |
284 | 284 | ||
285 | if (copy < n) { | 285 | if (copy < n) { |
286 | dst += copy; | 286 | dst += copy; |
287 | sector += copy >> SECTOR_SHIFT; | 287 | sector += copy >> SECTOR_SHIFT; |
288 | copy = n - copy; | 288 | copy = n - copy; |
289 | page = brd_lookup_page(brd, sector); | 289 | page = brd_lookup_page(brd, sector); |
290 | if (page) { | 290 | if (page) { |
291 | src = kmap_atomic(page, KM_USER1); | 291 | src = kmap_atomic(page, KM_USER1); |
292 | memcpy(dst, src, copy); | 292 | memcpy(dst, src, copy); |
293 | kunmap_atomic(src, KM_USER1); | 293 | kunmap_atomic(src, KM_USER1); |
294 | } else | 294 | } else |
295 | memset(dst, 0, copy); | 295 | memset(dst, 0, copy); |
296 | } | 296 | } |
297 | } | 297 | } |
298 | 298 | ||
299 | /* | 299 | /* |
300 | * Process a single bvec of a bio. | 300 | * Process a single bvec of a bio. |
301 | */ | 301 | */ |
302 | static int brd_do_bvec(struct brd_device *brd, struct page *page, | 302 | static int brd_do_bvec(struct brd_device *brd, struct page *page, |
303 | unsigned int len, unsigned int off, int rw, | 303 | unsigned int len, unsigned int off, int rw, |
304 | sector_t sector) | 304 | sector_t sector) |
305 | { | 305 | { |
306 | void *mem; | 306 | void *mem; |
307 | int err = 0; | 307 | int err = 0; |
308 | 308 | ||
309 | if (rw != READ) { | 309 | if (rw != READ) { |
310 | err = copy_to_brd_setup(brd, sector, len); | 310 | err = copy_to_brd_setup(brd, sector, len); |
311 | if (err) | 311 | if (err) |
312 | goto out; | 312 | goto out; |
313 | } | 313 | } |
314 | 314 | ||
315 | mem = kmap_atomic(page, KM_USER0); | 315 | mem = kmap_atomic(page, KM_USER0); |
316 | if (rw == READ) { | 316 | if (rw == READ) { |
317 | copy_from_brd(mem + off, brd, sector, len); | 317 | copy_from_brd(mem + off, brd, sector, len); |
318 | flush_dcache_page(page); | 318 | flush_dcache_page(page); |
319 | } else { | 319 | } else { |
320 | flush_dcache_page(page); | 320 | flush_dcache_page(page); |
321 | copy_to_brd(brd, mem + off, sector, len); | 321 | copy_to_brd(brd, mem + off, sector, len); |
322 | } | 322 | } |
323 | kunmap_atomic(mem, KM_USER0); | 323 | kunmap_atomic(mem, KM_USER0); |
324 | 324 | ||
325 | out: | 325 | out: |
326 | return err; | 326 | return err; |
327 | } | 327 | } |
328 | 328 | ||
329 | static int brd_make_request(struct request_queue *q, struct bio *bio) | 329 | static int brd_make_request(struct request_queue *q, struct bio *bio) |
330 | { | 330 | { |
331 | struct block_device *bdev = bio->bi_bdev; | 331 | struct block_device *bdev = bio->bi_bdev; |
332 | struct brd_device *brd = bdev->bd_disk->private_data; | 332 | struct brd_device *brd = bdev->bd_disk->private_data; |
333 | int rw; | 333 | int rw; |
334 | struct bio_vec *bvec; | 334 | struct bio_vec *bvec; |
335 | sector_t sector; | 335 | sector_t sector; |
336 | int i; | 336 | int i; |
337 | int err = -EIO; | 337 | int err = -EIO; |
338 | 338 | ||
339 | sector = bio->bi_sector; | 339 | sector = bio->bi_sector; |
340 | if (sector + (bio->bi_size >> SECTOR_SHIFT) > | 340 | if (sector + (bio->bi_size >> SECTOR_SHIFT) > |
341 | get_capacity(bdev->bd_disk)) | 341 | get_capacity(bdev->bd_disk)) |
342 | goto out; | 342 | goto out; |
343 | 343 | ||
344 | if (unlikely(bio->bi_rw & REQ_DISCARD)) { | 344 | if (unlikely(bio->bi_rw & REQ_DISCARD)) { |
345 | err = 0; | 345 | err = 0; |
346 | discard_from_brd(brd, sector, bio->bi_size); | 346 | discard_from_brd(brd, sector, bio->bi_size); |
347 | goto out; | 347 | goto out; |
348 | } | 348 | } |
349 | 349 | ||
350 | rw = bio_rw(bio); | 350 | rw = bio_rw(bio); |
351 | if (rw == READA) | 351 | if (rw == READA) |
352 | rw = READ; | 352 | rw = READ; |
353 | 353 | ||
354 | bio_for_each_segment(bvec, bio, i) { | 354 | bio_for_each_segment(bvec, bio, i) { |
355 | unsigned int len = bvec->bv_len; | 355 | unsigned int len = bvec->bv_len; |
356 | err = brd_do_bvec(brd, bvec->bv_page, len, | 356 | err = brd_do_bvec(brd, bvec->bv_page, len, |
357 | bvec->bv_offset, rw, sector); | 357 | bvec->bv_offset, rw, sector); |
358 | if (err) | 358 | if (err) |
359 | break; | 359 | break; |
360 | sector += len >> SECTOR_SHIFT; | 360 | sector += len >> SECTOR_SHIFT; |
361 | } | 361 | } |
362 | 362 | ||
363 | out: | 363 | out: |
364 | bio_endio(bio, err); | 364 | bio_endio(bio, err); |
365 | 365 | ||
366 | return 0; | 366 | return 0; |
367 | } | 367 | } |
368 | 368 | ||
369 | #ifdef CONFIG_BLK_DEV_XIP | 369 | #ifdef CONFIG_BLK_DEV_XIP |
370 | static int brd_direct_access(struct block_device *bdev, sector_t sector, | 370 | static int brd_direct_access(struct block_device *bdev, sector_t sector, |
371 | void **kaddr, unsigned long *pfn) | 371 | void **kaddr, unsigned long *pfn) |
372 | { | 372 | { |
373 | struct brd_device *brd = bdev->bd_disk->private_data; | 373 | struct brd_device *brd = bdev->bd_disk->private_data; |
374 | struct page *page; | 374 | struct page *page; |
375 | 375 | ||
376 | if (!brd) | 376 | if (!brd) |
377 | return -ENODEV; | 377 | return -ENODEV; |
378 | if (sector & (PAGE_SECTORS-1)) | 378 | if (sector & (PAGE_SECTORS-1)) |
379 | return -EINVAL; | 379 | return -EINVAL; |
380 | if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk)) | 380 | if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk)) |
381 | return -ERANGE; | 381 | return -ERANGE; |
382 | page = brd_insert_page(brd, sector); | 382 | page = brd_insert_page(brd, sector); |
383 | if (!page) | 383 | if (!page) |
384 | return -ENOMEM; | 384 | return -ENOMEM; |
385 | *kaddr = page_address(page); | 385 | *kaddr = page_address(page); |
386 | *pfn = page_to_pfn(page); | 386 | *pfn = page_to_pfn(page); |
387 | 387 | ||
388 | return 0; | 388 | return 0; |
389 | } | 389 | } |
390 | #endif | 390 | #endif |
391 | 391 | ||
392 | static int brd_ioctl(struct block_device *bdev, fmode_t mode, | 392 | static int brd_ioctl(struct block_device *bdev, fmode_t mode, |
393 | unsigned int cmd, unsigned long arg) | 393 | unsigned int cmd, unsigned long arg) |
394 | { | 394 | { |
395 | int error; | 395 | int error; |
396 | struct brd_device *brd = bdev->bd_disk->private_data; | 396 | struct brd_device *brd = bdev->bd_disk->private_data; |
397 | 397 | ||
398 | if (cmd != BLKFLSBUF) | 398 | if (cmd != BLKFLSBUF) |
399 | return -ENOTTY; | 399 | return -ENOTTY; |
400 | 400 | ||
401 | /* | 401 | /* |
402 | * ram device BLKFLSBUF has special semantics, we want to actually | 402 | * ram device BLKFLSBUF has special semantics, we want to actually |
403 | * release and destroy the ramdisk data. | 403 | * release and destroy the ramdisk data. |
404 | */ | 404 | */ |
405 | lock_kernel(); | 405 | lock_kernel(); |
406 | mutex_lock(&bdev->bd_mutex); | 406 | mutex_lock(&bdev->bd_mutex); |
407 | error = -EBUSY; | 407 | error = -EBUSY; |
408 | if (bdev->bd_openers <= 1) { | 408 | if (bdev->bd_openers <= 1) { |
409 | /* | 409 | /* |
410 | * Invalidate the cache first, so it isn't written | 410 | * Invalidate the cache first, so it isn't written |
411 | * back to the device. | 411 | * back to the device. |
412 | * | 412 | * |
413 | * Another thread might instantiate more buffercache here, | 413 | * Another thread might instantiate more buffercache here, |
414 | * but there is not much we can do to close that race. | 414 | * but there is not much we can do to close that race. |
415 | */ | 415 | */ |
416 | invalidate_bh_lrus(); | 416 | invalidate_bh_lrus(); |
417 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | 417 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); |
418 | brd_free_pages(brd); | 418 | brd_free_pages(brd); |
419 | error = 0; | 419 | error = 0; |
420 | } | 420 | } |
421 | mutex_unlock(&bdev->bd_mutex); | 421 | mutex_unlock(&bdev->bd_mutex); |
422 | unlock_kernel(); | 422 | unlock_kernel(); |
423 | 423 | ||
424 | return error; | 424 | return error; |
425 | } | 425 | } |
426 | 426 | ||
427 | static const struct block_device_operations brd_fops = { | 427 | static const struct block_device_operations brd_fops = { |
428 | .owner = THIS_MODULE, | 428 | .owner = THIS_MODULE, |
429 | .ioctl = brd_ioctl, | 429 | .ioctl = brd_ioctl, |
430 | #ifdef CONFIG_BLK_DEV_XIP | 430 | #ifdef CONFIG_BLK_DEV_XIP |
431 | .direct_access = brd_direct_access, | 431 | .direct_access = brd_direct_access, |
432 | #endif | 432 | #endif |
433 | }; | 433 | }; |
434 | 434 | ||
435 | /* | 435 | /* |
436 | * And now the modules code and kernel interface. | 436 | * And now the modules code and kernel interface. |
437 | */ | 437 | */ |
438 | static int rd_nr; | 438 | static int rd_nr; |
439 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; | 439 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; |
440 | static int max_part; | 440 | static int max_part; |
441 | static int part_shift; | 441 | static int part_shift; |
442 | module_param(rd_nr, int, 0); | 442 | module_param(rd_nr, int, 0); |
443 | MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); | 443 | MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); |
444 | module_param(rd_size, int, 0); | 444 | module_param(rd_size, int, 0); |
445 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | 445 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); |
446 | module_param(max_part, int, 0); | 446 | module_param(max_part, int, 0); |
447 | MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk"); | 447 | MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk"); |
448 | MODULE_LICENSE("GPL"); | 448 | MODULE_LICENSE("GPL"); |
449 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | 449 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); |
450 | MODULE_ALIAS("rd"); | 450 | MODULE_ALIAS("rd"); |
451 | 451 | ||
452 | #ifndef MODULE | 452 | #ifndef MODULE |
453 | /* Legacy boot options - nonmodular */ | 453 | /* Legacy boot options - nonmodular */ |
454 | static int __init ramdisk_size(char *str) | 454 | static int __init ramdisk_size(char *str) |
455 | { | 455 | { |
456 | rd_size = simple_strtol(str, NULL, 0); | 456 | rd_size = simple_strtol(str, NULL, 0); |
457 | return 1; | 457 | return 1; |
458 | } | 458 | } |
459 | __setup("ramdisk_size=", ramdisk_size); | 459 | __setup("ramdisk_size=", ramdisk_size); |
460 | #endif | 460 | #endif |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * The device scheme is derived from loop.c. Keep them in synch where possible | 463 | * The device scheme is derived from loop.c. Keep them in synch where possible |
464 | * (should share code eventually). | 464 | * (should share code eventually). |
465 | */ | 465 | */ |
466 | static LIST_HEAD(brd_devices); | 466 | static LIST_HEAD(brd_devices); |
467 | static DEFINE_MUTEX(brd_devices_mutex); | 467 | static DEFINE_MUTEX(brd_devices_mutex); |
468 | 468 | ||
469 | static struct brd_device *brd_alloc(int i) | 469 | static struct brd_device *brd_alloc(int i) |
470 | { | 470 | { |
471 | struct brd_device *brd; | 471 | struct brd_device *brd; |
472 | struct gendisk *disk; | 472 | struct gendisk *disk; |
473 | 473 | ||
474 | brd = kzalloc(sizeof(*brd), GFP_KERNEL); | 474 | brd = kzalloc(sizeof(*brd), GFP_KERNEL); |
475 | if (!brd) | 475 | if (!brd) |
476 | goto out; | 476 | goto out; |
477 | brd->brd_number = i; | 477 | brd->brd_number = i; |
478 | spin_lock_init(&brd->brd_lock); | 478 | spin_lock_init(&brd->brd_lock); |
479 | INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); | 479 | INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); |
480 | 480 | ||
481 | brd->brd_queue = blk_alloc_queue(GFP_KERNEL); | 481 | brd->brd_queue = blk_alloc_queue(GFP_KERNEL); |
482 | if (!brd->brd_queue) | 482 | if (!brd->brd_queue) |
483 | goto out_free_dev; | 483 | goto out_free_dev; |
484 | blk_queue_make_request(brd->brd_queue, brd_make_request); | 484 | blk_queue_make_request(brd->brd_queue, brd_make_request); |
485 | blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); | ||
486 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); | 485 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); |
487 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); | 486 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); |
488 | 487 | ||
489 | brd->brd_queue->limits.discard_granularity = PAGE_SIZE; | 488 | brd->brd_queue->limits.discard_granularity = PAGE_SIZE; |
490 | brd->brd_queue->limits.max_discard_sectors = UINT_MAX; | 489 | brd->brd_queue->limits.max_discard_sectors = UINT_MAX; |
491 | brd->brd_queue->limits.discard_zeroes_data = 1; | 490 | brd->brd_queue->limits.discard_zeroes_data = 1; |
492 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); | 491 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); |
493 | 492 | ||
494 | disk = brd->brd_disk = alloc_disk(1 << part_shift); | 493 | disk = brd->brd_disk = alloc_disk(1 << part_shift); |
495 | if (!disk) | 494 | if (!disk) |
496 | goto out_free_queue; | 495 | goto out_free_queue; |
497 | disk->major = RAMDISK_MAJOR; | 496 | disk->major = RAMDISK_MAJOR; |
498 | disk->first_minor = i << part_shift; | 497 | disk->first_minor = i << part_shift; |
499 | disk->fops = &brd_fops; | 498 | disk->fops = &brd_fops; |
500 | disk->private_data = brd; | 499 | disk->private_data = brd; |
501 | disk->queue = brd->brd_queue; | 500 | disk->queue = brd->brd_queue; |
502 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | 501 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; |
503 | sprintf(disk->disk_name, "ram%d", i); | 502 | sprintf(disk->disk_name, "ram%d", i); |
504 | set_capacity(disk, rd_size * 2); | 503 | set_capacity(disk, rd_size * 2); |
505 | 504 | ||
506 | return brd; | 505 | return brd; |
507 | 506 | ||
508 | out_free_queue: | 507 | out_free_queue: |
509 | blk_cleanup_queue(brd->brd_queue); | 508 | blk_cleanup_queue(brd->brd_queue); |
510 | out_free_dev: | 509 | out_free_dev: |
511 | kfree(brd); | 510 | kfree(brd); |
512 | out: | 511 | out: |
513 | return NULL; | 512 | return NULL; |
514 | } | 513 | } |
515 | 514 | ||
516 | static void brd_free(struct brd_device *brd) | 515 | static void brd_free(struct brd_device *brd) |
517 | { | 516 | { |
518 | put_disk(brd->brd_disk); | 517 | put_disk(brd->brd_disk); |
519 | blk_cleanup_queue(brd->brd_queue); | 518 | blk_cleanup_queue(brd->brd_queue); |
520 | brd_free_pages(brd); | 519 | brd_free_pages(brd); |
521 | kfree(brd); | 520 | kfree(brd); |
522 | } | 521 | } |
523 | 522 | ||
524 | static struct brd_device *brd_init_one(int i) | 523 | static struct brd_device *brd_init_one(int i) |
525 | { | 524 | { |
526 | struct brd_device *brd; | 525 | struct brd_device *brd; |
527 | 526 | ||
528 | list_for_each_entry(brd, &brd_devices, brd_list) { | 527 | list_for_each_entry(brd, &brd_devices, brd_list) { |
529 | if (brd->brd_number == i) | 528 | if (brd->brd_number == i) |
530 | goto out; | 529 | goto out; |
531 | } | 530 | } |
532 | 531 | ||
533 | brd = brd_alloc(i); | 532 | brd = brd_alloc(i); |
534 | if (brd) { | 533 | if (brd) { |
535 | add_disk(brd->brd_disk); | 534 | add_disk(brd->brd_disk); |
536 | list_add_tail(&brd->brd_list, &brd_devices); | 535 | list_add_tail(&brd->brd_list, &brd_devices); |
537 | } | 536 | } |
538 | out: | 537 | out: |
539 | return brd; | 538 | return brd; |
540 | } | 539 | } |
541 | 540 | ||
542 | static void brd_del_one(struct brd_device *brd) | 541 | static void brd_del_one(struct brd_device *brd) |
543 | { | 542 | { |
544 | list_del(&brd->brd_list); | 543 | list_del(&brd->brd_list); |
545 | del_gendisk(brd->brd_disk); | 544 | del_gendisk(brd->brd_disk); |
546 | brd_free(brd); | 545 | brd_free(brd); |
547 | } | 546 | } |
548 | 547 | ||
549 | static struct kobject *brd_probe(dev_t dev, int *part, void *data) | 548 | static struct kobject *brd_probe(dev_t dev, int *part, void *data) |
550 | { | 549 | { |
551 | struct brd_device *brd; | 550 | struct brd_device *brd; |
552 | struct kobject *kobj; | 551 | struct kobject *kobj; |
553 | 552 | ||
554 | mutex_lock(&brd_devices_mutex); | 553 | mutex_lock(&brd_devices_mutex); |
555 | brd = brd_init_one(dev & MINORMASK); | 554 | brd = brd_init_one(dev & MINORMASK); |
556 | kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); | 555 | kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); |
557 | mutex_unlock(&brd_devices_mutex); | 556 | mutex_unlock(&brd_devices_mutex); |
558 | 557 | ||
559 | *part = 0; | 558 | *part = 0; |
560 | return kobj; | 559 | return kobj; |
561 | } | 560 | } |
562 | 561 | ||
563 | static int __init brd_init(void) | 562 | static int __init brd_init(void) |
564 | { | 563 | { |
565 | int i, nr; | 564 | int i, nr; |
566 | unsigned long range; | 565 | unsigned long range; |
567 | struct brd_device *brd, *next; | 566 | struct brd_device *brd, *next; |
568 | 567 | ||
569 | /* | 568 | /* |
570 | * brd module now has a feature to instantiate underlying device | 569 | * brd module now has a feature to instantiate underlying device |
571 | * structure on-demand, provided that there is an access dev node. | 570 | * structure on-demand, provided that there is an access dev node. |
572 | * However, this will not work well with user space tool that doesn't | 571 | * However, this will not work well with user space tool that doesn't |
573 | * know about such "feature". In order to not break any existing | 572 | * know about such "feature". In order to not break any existing |
574 | * tool, we do the following: | 573 | * tool, we do the following: |
575 | * | 574 | * |
576 | * (1) if rd_nr is specified, create that many upfront, and this | 575 | * (1) if rd_nr is specified, create that many upfront, and this |
577 | * also becomes a hard limit. | 576 | * also becomes a hard limit. |
578 | * (2) if rd_nr is not specified, create 1 rd device on module | 577 | * (2) if rd_nr is not specified, create 1 rd device on module |
579 | * load, user can further extend brd device by create dev node | 578 | * load, user can further extend brd device by create dev node |
580 | * themselves and have kernel automatically instantiate actual | 579 | * themselves and have kernel automatically instantiate actual |
581 | * device on-demand. | 580 | * device on-demand. |
582 | */ | 581 | */ |
583 | 582 | ||
584 | part_shift = 0; | 583 | part_shift = 0; |
585 | if (max_part > 0) | 584 | if (max_part > 0) |
586 | part_shift = fls(max_part); | 585 | part_shift = fls(max_part); |
587 | 586 | ||
588 | if (rd_nr > 1UL << (MINORBITS - part_shift)) | 587 | if (rd_nr > 1UL << (MINORBITS - part_shift)) |
589 | return -EINVAL; | 588 | return -EINVAL; |
590 | 589 | ||
591 | if (rd_nr) { | 590 | if (rd_nr) { |
592 | nr = rd_nr; | 591 | nr = rd_nr; |
593 | range = rd_nr; | 592 | range = rd_nr; |
594 | } else { | 593 | } else { |
595 | nr = CONFIG_BLK_DEV_RAM_COUNT; | 594 | nr = CONFIG_BLK_DEV_RAM_COUNT; |
596 | range = 1UL << (MINORBITS - part_shift); | 595 | range = 1UL << (MINORBITS - part_shift); |
597 | } | 596 | } |
598 | 597 | ||
599 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) | 598 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) |
600 | return -EIO; | 599 | return -EIO; |
601 | 600 | ||
602 | for (i = 0; i < nr; i++) { | 601 | for (i = 0; i < nr; i++) { |
603 | brd = brd_alloc(i); | 602 | brd = brd_alloc(i); |
604 | if (!brd) | 603 | if (!brd) |
605 | goto out_free; | 604 | goto out_free; |
606 | list_add_tail(&brd->brd_list, &brd_devices); | 605 | list_add_tail(&brd->brd_list, &brd_devices); |
607 | } | 606 | } |
608 | 607 | ||
609 | /* point of no return */ | 608 | /* point of no return */ |
610 | 609 | ||
611 | list_for_each_entry(brd, &brd_devices, brd_list) | 610 | list_for_each_entry(brd, &brd_devices, brd_list) |
612 | add_disk(brd->brd_disk); | 611 | add_disk(brd->brd_disk); |
613 | 612 | ||
614 | blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range, | 613 | blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range, |
615 | THIS_MODULE, brd_probe, NULL, NULL); | 614 | THIS_MODULE, brd_probe, NULL, NULL); |
616 | 615 | ||
617 | printk(KERN_INFO "brd: module loaded\n"); | 616 | printk(KERN_INFO "brd: module loaded\n"); |
618 | return 0; | 617 | return 0; |
619 | 618 | ||
620 | out_free: | 619 | out_free: |
621 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { | 620 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { |
622 | list_del(&brd->brd_list); | 621 | list_del(&brd->brd_list); |
623 | brd_free(brd); | 622 | brd_free(brd); |
624 | } | 623 | } |
625 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | 624 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
626 | 625 | ||
627 | return -ENOMEM; | 626 | return -ENOMEM; |
628 | } | 627 | } |
629 | 628 | ||
630 | static void __exit brd_exit(void) | 629 | static void __exit brd_exit(void) |
631 | { | 630 | { |
632 | unsigned long range; | 631 | unsigned long range; |
633 | struct brd_device *brd, *next; | 632 | struct brd_device *brd, *next; |
634 | 633 | ||
635 | range = rd_nr ? rd_nr : 1UL << (MINORBITS - part_shift); | 634 | range = rd_nr ? rd_nr : 1UL << (MINORBITS - part_shift); |
636 | 635 | ||
637 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) | 636 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) |
638 | brd_del_one(brd); | 637 | brd_del_one(brd); |
639 | 638 | ||
640 | blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range); | 639 | blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range); |
641 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | 640 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
642 | } | 641 | } |
643 | 642 | ||
644 | module_init(brd_init); | 643 | module_init(brd_init); |
645 | module_exit(brd_exit); | 644 | module_exit(brd_exit); |
646 | 645 | ||
647 | 646 |
drivers/block/loop.c
1 | /* | 1 | /* |
2 | * linux/drivers/block/loop.c | 2 | * linux/drivers/block/loop.c |
3 | * | 3 | * |
4 | * Written by Theodore Ts'o, 3/29/93 | 4 | * Written by Theodore Ts'o, 3/29/93 |
5 | * | 5 | * |
6 | * Copyright 1993 by Theodore Ts'o. Redistribution of this file is | 6 | * Copyright 1993 by Theodore Ts'o. Redistribution of this file is |
7 | * permitted under the GNU General Public License. | 7 | * permitted under the GNU General Public License. |
8 | * | 8 | * |
9 | * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 | 9 | * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 |
10 | * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 | 10 | * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 |
11 | * | 11 | * |
12 | * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 | 12 | * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 |
13 | * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 | 13 | * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 |
14 | * | 14 | * |
15 | * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 | 15 | * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 |
16 | * | 16 | * |
17 | * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 | 17 | * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 |
18 | * | 18 | * |
19 | * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 | 19 | * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 |
20 | * | 20 | * |
21 | * Loadable modules and other fixes by AK, 1998 | 21 | * Loadable modules and other fixes by AK, 1998 |
22 | * | 22 | * |
23 | * Make real block number available to downstream transfer functions, enables | 23 | * Make real block number available to downstream transfer functions, enables |
24 | * CBC (and relatives) mode encryption requiring unique IVs per data block. | 24 | * CBC (and relatives) mode encryption requiring unique IVs per data block. |
25 | * Reed H. Petty, rhp@draper.net | 25 | * Reed H. Petty, rhp@draper.net |
26 | * | 26 | * |
27 | * Maximum number of loop devices now dynamic via max_loop module parameter. | 27 | * Maximum number of loop devices now dynamic via max_loop module parameter. |
28 | * Russell Kroll <rkroll@exploits.org> 19990701 | 28 | * Russell Kroll <rkroll@exploits.org> 19990701 |
29 | * | 29 | * |
30 | * Maximum number of loop devices when compiled-in now selectable by passing | 30 | * Maximum number of loop devices when compiled-in now selectable by passing |
31 | * max_loop=<1-255> to the kernel on boot. | 31 | * max_loop=<1-255> to the kernel on boot. |
32 | * Erik I. Bolsรธ, <eriki@himolde.no>, Oct 31, 1999 | 32 | * Erik I. Bolsรธ, <eriki@himolde.no>, Oct 31, 1999 |
33 | * | 33 | * |
34 | * Completely rewrite request handling to be make_request_fn style and | 34 | * Completely rewrite request handling to be make_request_fn style and |
35 | * non blocking, pushing work to a helper thread. Lots of fixes from | 35 | * non blocking, pushing work to a helper thread. Lots of fixes from |
36 | * Al Viro too. | 36 | * Al Viro too. |
37 | * Jens Axboe <axboe@suse.de>, Nov 2000 | 37 | * Jens Axboe <axboe@suse.de>, Nov 2000 |
38 | * | 38 | * |
39 | * Support up to 256 loop devices | 39 | * Support up to 256 loop devices |
40 | * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 | 40 | * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 |
41 | * | 41 | * |
42 | * Support for falling back on the write file operation when the address space | 42 | * Support for falling back on the write file operation when the address space |
43 | * operations write_begin is not available on the backing filesystem. | 43 | * operations write_begin is not available on the backing filesystem. |
44 | * Anton Altaparmakov, 16 Feb 2005 | 44 | * Anton Altaparmakov, 16 Feb 2005 |
45 | * | 45 | * |
46 | * Still To Fix: | 46 | * Still To Fix: |
47 | * - Advisory locking is ignored here. | 47 | * - Advisory locking is ignored here. |
48 | * - Should use an own CAP_* category instead of CAP_SYS_ADMIN | 48 | * - Should use an own CAP_* category instead of CAP_SYS_ADMIN |
49 | * | 49 | * |
50 | */ | 50 | */ |
51 | 51 | ||
52 | #include <linux/module.h> | 52 | #include <linux/module.h> |
53 | #include <linux/moduleparam.h> | 53 | #include <linux/moduleparam.h> |
54 | #include <linux/sched.h> | 54 | #include <linux/sched.h> |
55 | #include <linux/fs.h> | 55 | #include <linux/fs.h> |
56 | #include <linux/file.h> | 56 | #include <linux/file.h> |
57 | #include <linux/stat.h> | 57 | #include <linux/stat.h> |
58 | #include <linux/errno.h> | 58 | #include <linux/errno.h> |
59 | #include <linux/major.h> | 59 | #include <linux/major.h> |
60 | #include <linux/wait.h> | 60 | #include <linux/wait.h> |
61 | #include <linux/blkdev.h> | 61 | #include <linux/blkdev.h> |
62 | #include <linux/blkpg.h> | 62 | #include <linux/blkpg.h> |
63 | #include <linux/init.h> | 63 | #include <linux/init.h> |
64 | #include <linux/swap.h> | 64 | #include <linux/swap.h> |
65 | #include <linux/slab.h> | 65 | #include <linux/slab.h> |
66 | #include <linux/loop.h> | 66 | #include <linux/loop.h> |
67 | #include <linux/compat.h> | 67 | #include <linux/compat.h> |
68 | #include <linux/suspend.h> | 68 | #include <linux/suspend.h> |
69 | #include <linux/freezer.h> | 69 | #include <linux/freezer.h> |
70 | #include <linux/smp_lock.h> | 70 | #include <linux/smp_lock.h> |
71 | #include <linux/writeback.h> | 71 | #include <linux/writeback.h> |
72 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | 72 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ |
73 | #include <linux/completion.h> | 73 | #include <linux/completion.h> |
74 | #include <linux/highmem.h> | 74 | #include <linux/highmem.h> |
75 | #include <linux/kthread.h> | 75 | #include <linux/kthread.h> |
76 | #include <linux/splice.h> | 76 | #include <linux/splice.h> |
77 | 77 | ||
78 | #include <asm/uaccess.h> | 78 | #include <asm/uaccess.h> |
79 | 79 | ||
80 | static LIST_HEAD(loop_devices); | 80 | static LIST_HEAD(loop_devices); |
81 | static DEFINE_MUTEX(loop_devices_mutex); | 81 | static DEFINE_MUTEX(loop_devices_mutex); |
82 | 82 | ||
83 | static int max_part; | 83 | static int max_part; |
84 | static int part_shift; | 84 | static int part_shift; |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Transfer functions | 87 | * Transfer functions |
88 | */ | 88 | */ |
89 | static int transfer_none(struct loop_device *lo, int cmd, | 89 | static int transfer_none(struct loop_device *lo, int cmd, |
90 | struct page *raw_page, unsigned raw_off, | 90 | struct page *raw_page, unsigned raw_off, |
91 | struct page *loop_page, unsigned loop_off, | 91 | struct page *loop_page, unsigned loop_off, |
92 | int size, sector_t real_block) | 92 | int size, sector_t real_block) |
93 | { | 93 | { |
94 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; | 94 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; |
95 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; | 95 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; |
96 | 96 | ||
97 | if (cmd == READ) | 97 | if (cmd == READ) |
98 | memcpy(loop_buf, raw_buf, size); | 98 | memcpy(loop_buf, raw_buf, size); |
99 | else | 99 | else |
100 | memcpy(raw_buf, loop_buf, size); | 100 | memcpy(raw_buf, loop_buf, size); |
101 | 101 | ||
102 | kunmap_atomic(raw_buf, KM_USER0); | 102 | kunmap_atomic(raw_buf, KM_USER0); |
103 | kunmap_atomic(loop_buf, KM_USER1); | 103 | kunmap_atomic(loop_buf, KM_USER1); |
104 | cond_resched(); | 104 | cond_resched(); |
105 | return 0; | 105 | return 0; |
106 | } | 106 | } |
107 | 107 | ||
108 | static int transfer_xor(struct loop_device *lo, int cmd, | 108 | static int transfer_xor(struct loop_device *lo, int cmd, |
109 | struct page *raw_page, unsigned raw_off, | 109 | struct page *raw_page, unsigned raw_off, |
110 | struct page *loop_page, unsigned loop_off, | 110 | struct page *loop_page, unsigned loop_off, |
111 | int size, sector_t real_block) | 111 | int size, sector_t real_block) |
112 | { | 112 | { |
113 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; | 113 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; |
114 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; | 114 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; |
115 | char *in, *out, *key; | 115 | char *in, *out, *key; |
116 | int i, keysize; | 116 | int i, keysize; |
117 | 117 | ||
118 | if (cmd == READ) { | 118 | if (cmd == READ) { |
119 | in = raw_buf; | 119 | in = raw_buf; |
120 | out = loop_buf; | 120 | out = loop_buf; |
121 | } else { | 121 | } else { |
122 | in = loop_buf; | 122 | in = loop_buf; |
123 | out = raw_buf; | 123 | out = raw_buf; |
124 | } | 124 | } |
125 | 125 | ||
126 | key = lo->lo_encrypt_key; | 126 | key = lo->lo_encrypt_key; |
127 | keysize = lo->lo_encrypt_key_size; | 127 | keysize = lo->lo_encrypt_key_size; |
128 | for (i = 0; i < size; i++) | 128 | for (i = 0; i < size; i++) |
129 | *out++ = *in++ ^ key[(i & 511) % keysize]; | 129 | *out++ = *in++ ^ key[(i & 511) % keysize]; |
130 | 130 | ||
131 | kunmap_atomic(raw_buf, KM_USER0); | 131 | kunmap_atomic(raw_buf, KM_USER0); |
132 | kunmap_atomic(loop_buf, KM_USER1); | 132 | kunmap_atomic(loop_buf, KM_USER1); |
133 | cond_resched(); | 133 | cond_resched(); |
134 | return 0; | 134 | return 0; |
135 | } | 135 | } |
136 | 136 | ||
137 | static int xor_init(struct loop_device *lo, const struct loop_info64 *info) | 137 | static int xor_init(struct loop_device *lo, const struct loop_info64 *info) |
138 | { | 138 | { |
139 | if (unlikely(info->lo_encrypt_key_size <= 0)) | 139 | if (unlikely(info->lo_encrypt_key_size <= 0)) |
140 | return -EINVAL; | 140 | return -EINVAL; |
141 | return 0; | 141 | return 0; |
142 | } | 142 | } |
143 | 143 | ||
144 | static struct loop_func_table none_funcs = { | 144 | static struct loop_func_table none_funcs = { |
145 | .number = LO_CRYPT_NONE, | 145 | .number = LO_CRYPT_NONE, |
146 | .transfer = transfer_none, | 146 | .transfer = transfer_none, |
147 | }; | 147 | }; |
148 | 148 | ||
149 | static struct loop_func_table xor_funcs = { | 149 | static struct loop_func_table xor_funcs = { |
150 | .number = LO_CRYPT_XOR, | 150 | .number = LO_CRYPT_XOR, |
151 | .transfer = transfer_xor, | 151 | .transfer = transfer_xor, |
152 | .init = xor_init | 152 | .init = xor_init |
153 | }; | 153 | }; |
154 | 154 | ||
155 | /* xfer_funcs[0] is special - its release function is never called */ | 155 | /* xfer_funcs[0] is special - its release function is never called */ |
156 | static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { | 156 | static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { |
157 | &none_funcs, | 157 | &none_funcs, |
158 | &xor_funcs | 158 | &xor_funcs |
159 | }; | 159 | }; |
160 | 160 | ||
161 | static loff_t get_loop_size(struct loop_device *lo, struct file *file) | 161 | static loff_t get_loop_size(struct loop_device *lo, struct file *file) |
162 | { | 162 | { |
163 | loff_t size, offset, loopsize; | 163 | loff_t size, offset, loopsize; |
164 | 164 | ||
165 | /* Compute loopsize in bytes */ | 165 | /* Compute loopsize in bytes */ |
166 | size = i_size_read(file->f_mapping->host); | 166 | size = i_size_read(file->f_mapping->host); |
167 | offset = lo->lo_offset; | 167 | offset = lo->lo_offset; |
168 | loopsize = size - offset; | 168 | loopsize = size - offset; |
169 | if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) | 169 | if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) |
170 | loopsize = lo->lo_sizelimit; | 170 | loopsize = lo->lo_sizelimit; |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * Unfortunately, if we want to do I/O on the device, | 173 | * Unfortunately, if we want to do I/O on the device, |
174 | * the number of 512-byte sectors has to fit into a sector_t. | 174 | * the number of 512-byte sectors has to fit into a sector_t. |
175 | */ | 175 | */ |
176 | return loopsize >> 9; | 176 | return loopsize >> 9; |
177 | } | 177 | } |
178 | 178 | ||
179 | static int | 179 | static int |
180 | figure_loop_size(struct loop_device *lo) | 180 | figure_loop_size(struct loop_device *lo) |
181 | { | 181 | { |
182 | loff_t size = get_loop_size(lo, lo->lo_backing_file); | 182 | loff_t size = get_loop_size(lo, lo->lo_backing_file); |
183 | sector_t x = (sector_t)size; | 183 | sector_t x = (sector_t)size; |
184 | 184 | ||
185 | if (unlikely((loff_t)x != size)) | 185 | if (unlikely((loff_t)x != size)) |
186 | return -EFBIG; | 186 | return -EFBIG; |
187 | 187 | ||
188 | set_capacity(lo->lo_disk, x); | 188 | set_capacity(lo->lo_disk, x); |
189 | return 0; | 189 | return 0; |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline int | 192 | static inline int |
193 | lo_do_transfer(struct loop_device *lo, int cmd, | 193 | lo_do_transfer(struct loop_device *lo, int cmd, |
194 | struct page *rpage, unsigned roffs, | 194 | struct page *rpage, unsigned roffs, |
195 | struct page *lpage, unsigned loffs, | 195 | struct page *lpage, unsigned loffs, |
196 | int size, sector_t rblock) | 196 | int size, sector_t rblock) |
197 | { | 197 | { |
198 | if (unlikely(!lo->transfer)) | 198 | if (unlikely(!lo->transfer)) |
199 | return 0; | 199 | return 0; |
200 | 200 | ||
201 | return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); | 201 | return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); |
202 | } | 202 | } |
203 | 203 | ||
204 | /** | 204 | /** |
205 | * do_lo_send_aops - helper for writing data to a loop device | 205 | * do_lo_send_aops - helper for writing data to a loop device |
206 | * | 206 | * |
207 | * This is the fast version for backing filesystems which implement the address | 207 | * This is the fast version for backing filesystems which implement the address |
208 | * space operations write_begin and write_end. | 208 | * space operations write_begin and write_end. |
209 | */ | 209 | */ |
210 | static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | 210 | static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, |
211 | loff_t pos, struct page *unused) | 211 | loff_t pos, struct page *unused) |
212 | { | 212 | { |
213 | struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ | 213 | struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ |
214 | struct address_space *mapping = file->f_mapping; | 214 | struct address_space *mapping = file->f_mapping; |
215 | pgoff_t index; | 215 | pgoff_t index; |
216 | unsigned offset, bv_offs; | 216 | unsigned offset, bv_offs; |
217 | int len, ret; | 217 | int len, ret; |
218 | 218 | ||
219 | mutex_lock(&mapping->host->i_mutex); | 219 | mutex_lock(&mapping->host->i_mutex); |
220 | index = pos >> PAGE_CACHE_SHIFT; | 220 | index = pos >> PAGE_CACHE_SHIFT; |
221 | offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); | 221 | offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); |
222 | bv_offs = bvec->bv_offset; | 222 | bv_offs = bvec->bv_offset; |
223 | len = bvec->bv_len; | 223 | len = bvec->bv_len; |
224 | while (len > 0) { | 224 | while (len > 0) { |
225 | sector_t IV; | 225 | sector_t IV; |
226 | unsigned size, copied; | 226 | unsigned size, copied; |
227 | int transfer_result; | 227 | int transfer_result; |
228 | struct page *page; | 228 | struct page *page; |
229 | void *fsdata; | 229 | void *fsdata; |
230 | 230 | ||
231 | IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); | 231 | IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); |
232 | size = PAGE_CACHE_SIZE - offset; | 232 | size = PAGE_CACHE_SIZE - offset; |
233 | if (size > len) | 233 | if (size > len) |
234 | size = len; | 234 | size = len; |
235 | 235 | ||
236 | ret = pagecache_write_begin(file, mapping, pos, size, 0, | 236 | ret = pagecache_write_begin(file, mapping, pos, size, 0, |
237 | &page, &fsdata); | 237 | &page, &fsdata); |
238 | if (ret) | 238 | if (ret) |
239 | goto fail; | 239 | goto fail; |
240 | 240 | ||
241 | file_update_time(file); | 241 | file_update_time(file); |
242 | 242 | ||
243 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, | 243 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, |
244 | bvec->bv_page, bv_offs, size, IV); | 244 | bvec->bv_page, bv_offs, size, IV); |
245 | copied = size; | 245 | copied = size; |
246 | if (unlikely(transfer_result)) | 246 | if (unlikely(transfer_result)) |
247 | copied = 0; | 247 | copied = 0; |
248 | 248 | ||
249 | ret = pagecache_write_end(file, mapping, pos, size, copied, | 249 | ret = pagecache_write_end(file, mapping, pos, size, copied, |
250 | page, fsdata); | 250 | page, fsdata); |
251 | if (ret < 0 || ret != copied) | 251 | if (ret < 0 || ret != copied) |
252 | goto fail; | 252 | goto fail; |
253 | 253 | ||
254 | if (unlikely(transfer_result)) | 254 | if (unlikely(transfer_result)) |
255 | goto fail; | 255 | goto fail; |
256 | 256 | ||
257 | bv_offs += copied; | 257 | bv_offs += copied; |
258 | len -= copied; | 258 | len -= copied; |
259 | offset = 0; | 259 | offset = 0; |
260 | index++; | 260 | index++; |
261 | pos += copied; | 261 | pos += copied; |
262 | } | 262 | } |
263 | ret = 0; | 263 | ret = 0; |
264 | out: | 264 | out: |
265 | mutex_unlock(&mapping->host->i_mutex); | 265 | mutex_unlock(&mapping->host->i_mutex); |
266 | return ret; | 266 | return ret; |
267 | fail: | 267 | fail: |
268 | ret = -1; | 268 | ret = -1; |
269 | goto out; | 269 | goto out; |
270 | } | 270 | } |
271 | 271 | ||
272 | /** | 272 | /** |
273 | * __do_lo_send_write - helper for writing data to a loop device | 273 | * __do_lo_send_write - helper for writing data to a loop device |
274 | * | 274 | * |
275 | * This helper just factors out common code between do_lo_send_direct_write() | 275 | * This helper just factors out common code between do_lo_send_direct_write() |
276 | * and do_lo_send_write(). | 276 | * and do_lo_send_write(). |
277 | */ | 277 | */ |
278 | static int __do_lo_send_write(struct file *file, | 278 | static int __do_lo_send_write(struct file *file, |
279 | u8 *buf, const int len, loff_t pos) | 279 | u8 *buf, const int len, loff_t pos) |
280 | { | 280 | { |
281 | ssize_t bw; | 281 | ssize_t bw; |
282 | mm_segment_t old_fs = get_fs(); | 282 | mm_segment_t old_fs = get_fs(); |
283 | 283 | ||
284 | set_fs(get_ds()); | 284 | set_fs(get_ds()); |
285 | bw = file->f_op->write(file, buf, len, &pos); | 285 | bw = file->f_op->write(file, buf, len, &pos); |
286 | set_fs(old_fs); | 286 | set_fs(old_fs); |
287 | if (likely(bw == len)) | 287 | if (likely(bw == len)) |
288 | return 0; | 288 | return 0; |
289 | printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", | 289 | printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", |
290 | (unsigned long long)pos, len); | 290 | (unsigned long long)pos, len); |
291 | if (bw >= 0) | 291 | if (bw >= 0) |
292 | bw = -EIO; | 292 | bw = -EIO; |
293 | return bw; | 293 | return bw; |
294 | } | 294 | } |
295 | 295 | ||
296 | /** | 296 | /** |
297 | * do_lo_send_direct_write - helper for writing data to a loop device | 297 | * do_lo_send_direct_write - helper for writing data to a loop device |
298 | * | 298 | * |
299 | * This is the fast, non-transforming version for backing filesystems which do | 299 | * This is the fast, non-transforming version for backing filesystems which do |
300 | * not implement the address space operations write_begin and write_end. | 300 | * not implement the address space operations write_begin and write_end. |
301 | * It uses the write file operation which should be present on all writeable | 301 | * It uses the write file operation which should be present on all writeable |
302 | * filesystems. | 302 | * filesystems. |
303 | */ | 303 | */ |
304 | static int do_lo_send_direct_write(struct loop_device *lo, | 304 | static int do_lo_send_direct_write(struct loop_device *lo, |
305 | struct bio_vec *bvec, loff_t pos, struct page *page) | 305 | struct bio_vec *bvec, loff_t pos, struct page *page) |
306 | { | 306 | { |
307 | ssize_t bw = __do_lo_send_write(lo->lo_backing_file, | 307 | ssize_t bw = __do_lo_send_write(lo->lo_backing_file, |
308 | kmap(bvec->bv_page) + bvec->bv_offset, | 308 | kmap(bvec->bv_page) + bvec->bv_offset, |
309 | bvec->bv_len, pos); | 309 | bvec->bv_len, pos); |
310 | kunmap(bvec->bv_page); | 310 | kunmap(bvec->bv_page); |
311 | cond_resched(); | 311 | cond_resched(); |
312 | return bw; | 312 | return bw; |
313 | } | 313 | } |
314 | 314 | ||
315 | /** | 315 | /** |
316 | * do_lo_send_write - helper for writing data to a loop device | 316 | * do_lo_send_write - helper for writing data to a loop device |
317 | * | 317 | * |
318 | * This is the slow, transforming version for filesystems which do not | 318 | * This is the slow, transforming version for filesystems which do not |
319 | * implement the address space operations write_begin and write_end. It | 319 | * implement the address space operations write_begin and write_end. It |
320 | * uses the write file operation which should be present on all writeable | 320 | * uses the write file operation which should be present on all writeable |
321 | * filesystems. | 321 | * filesystems. |
322 | * | 322 | * |
323 | * Using fops->write is slower than using aops->{prepare,commit}_write in the | 323 | * Using fops->write is slower than using aops->{prepare,commit}_write in the |
324 | * transforming case because we need to double buffer the data as we cannot do | 324 | * transforming case because we need to double buffer the data as we cannot do |
325 | * the transformations in place as we do not have direct access to the | 325 | * the transformations in place as we do not have direct access to the |
326 | * destination pages of the backing file. | 326 | * destination pages of the backing file. |
327 | */ | 327 | */ |
328 | static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, | 328 | static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, |
329 | loff_t pos, struct page *page) | 329 | loff_t pos, struct page *page) |
330 | { | 330 | { |
331 | int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, | 331 | int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, |
332 | bvec->bv_offset, bvec->bv_len, pos >> 9); | 332 | bvec->bv_offset, bvec->bv_len, pos >> 9); |
333 | if (likely(!ret)) | 333 | if (likely(!ret)) |
334 | return __do_lo_send_write(lo->lo_backing_file, | 334 | return __do_lo_send_write(lo->lo_backing_file, |
335 | page_address(page), bvec->bv_len, | 335 | page_address(page), bvec->bv_len, |
336 | pos); | 336 | pos); |
337 | printk(KERN_ERR "loop: Transfer error at byte offset %llu, " | 337 | printk(KERN_ERR "loop: Transfer error at byte offset %llu, " |
338 | "length %i.\n", (unsigned long long)pos, bvec->bv_len); | 338 | "length %i.\n", (unsigned long long)pos, bvec->bv_len); |
339 | if (ret > 0) | 339 | if (ret > 0) |
340 | ret = -EIO; | 340 | ret = -EIO; |
341 | return ret; | 341 | return ret; |
342 | } | 342 | } |
343 | 343 | ||
344 | static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) | 344 | static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) |
345 | { | 345 | { |
346 | int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, | 346 | int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, |
347 | struct page *page); | 347 | struct page *page); |
348 | struct bio_vec *bvec; | 348 | struct bio_vec *bvec; |
349 | struct page *page = NULL; | 349 | struct page *page = NULL; |
350 | int i, ret = 0; | 350 | int i, ret = 0; |
351 | 351 | ||
352 | do_lo_send = do_lo_send_aops; | 352 | do_lo_send = do_lo_send_aops; |
353 | if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { | 353 | if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { |
354 | do_lo_send = do_lo_send_direct_write; | 354 | do_lo_send = do_lo_send_direct_write; |
355 | if (lo->transfer != transfer_none) { | 355 | if (lo->transfer != transfer_none) { |
356 | page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); | 356 | page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); |
357 | if (unlikely(!page)) | 357 | if (unlikely(!page)) |
358 | goto fail; | 358 | goto fail; |
359 | kmap(page); | 359 | kmap(page); |
360 | do_lo_send = do_lo_send_write; | 360 | do_lo_send = do_lo_send_write; |
361 | } | 361 | } |
362 | } | 362 | } |
363 | bio_for_each_segment(bvec, bio, i) { | 363 | bio_for_each_segment(bvec, bio, i) { |
364 | ret = do_lo_send(lo, bvec, pos, page); | 364 | ret = do_lo_send(lo, bvec, pos, page); |
365 | if (ret < 0) | 365 | if (ret < 0) |
366 | break; | 366 | break; |
367 | pos += bvec->bv_len; | 367 | pos += bvec->bv_len; |
368 | } | 368 | } |
369 | if (page) { | 369 | if (page) { |
370 | kunmap(page); | 370 | kunmap(page); |
371 | __free_page(page); | 371 | __free_page(page); |
372 | } | 372 | } |
373 | out: | 373 | out: |
374 | return ret; | 374 | return ret; |
375 | fail: | 375 | fail: |
376 | printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); | 376 | printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); |
377 | ret = -ENOMEM; | 377 | ret = -ENOMEM; |
378 | goto out; | 378 | goto out; |
379 | } | 379 | } |
380 | 380 | ||
381 | struct lo_read_data { | 381 | struct lo_read_data { |
382 | struct loop_device *lo; | 382 | struct loop_device *lo; |
383 | struct page *page; | 383 | struct page *page; |
384 | unsigned offset; | 384 | unsigned offset; |
385 | int bsize; | 385 | int bsize; |
386 | }; | 386 | }; |
387 | 387 | ||
388 | static int | 388 | static int |
389 | lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | 389 | lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
390 | struct splice_desc *sd) | 390 | struct splice_desc *sd) |
391 | { | 391 | { |
392 | struct lo_read_data *p = sd->u.data; | 392 | struct lo_read_data *p = sd->u.data; |
393 | struct loop_device *lo = p->lo; | 393 | struct loop_device *lo = p->lo; |
394 | struct page *page = buf->page; | 394 | struct page *page = buf->page; |
395 | sector_t IV; | 395 | sector_t IV; |
396 | int size, ret; | 396 | int size, ret; |
397 | 397 | ||
398 | ret = buf->ops->confirm(pipe, buf); | 398 | ret = buf->ops->confirm(pipe, buf); |
399 | if (unlikely(ret)) | 399 | if (unlikely(ret)) |
400 | return ret; | 400 | return ret; |
401 | 401 | ||
402 | IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + | 402 | IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + |
403 | (buf->offset >> 9); | 403 | (buf->offset >> 9); |
404 | size = sd->len; | 404 | size = sd->len; |
405 | if (size > p->bsize) | 405 | if (size > p->bsize) |
406 | size = p->bsize; | 406 | size = p->bsize; |
407 | 407 | ||
408 | if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { | 408 | if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { |
409 | printk(KERN_ERR "loop: transfer error block %ld\n", | 409 | printk(KERN_ERR "loop: transfer error block %ld\n", |
410 | page->index); | 410 | page->index); |
411 | size = -EINVAL; | 411 | size = -EINVAL; |
412 | } | 412 | } |
413 | 413 | ||
414 | flush_dcache_page(p->page); | 414 | flush_dcache_page(p->page); |
415 | 415 | ||
416 | if (size > 0) | 416 | if (size > 0) |
417 | p->offset += size; | 417 | p->offset += size; |
418 | 418 | ||
419 | return size; | 419 | return size; |
420 | } | 420 | } |
421 | 421 | ||
422 | static int | 422 | static int |
423 | lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) | 423 | lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) |
424 | { | 424 | { |
425 | return __splice_from_pipe(pipe, sd, lo_splice_actor); | 425 | return __splice_from_pipe(pipe, sd, lo_splice_actor); |
426 | } | 426 | } |
427 | 427 | ||
428 | static int | 428 | static int |
429 | do_lo_receive(struct loop_device *lo, | 429 | do_lo_receive(struct loop_device *lo, |
430 | struct bio_vec *bvec, int bsize, loff_t pos) | 430 | struct bio_vec *bvec, int bsize, loff_t pos) |
431 | { | 431 | { |
432 | struct lo_read_data cookie; | 432 | struct lo_read_data cookie; |
433 | struct splice_desc sd; | 433 | struct splice_desc sd; |
434 | struct file *file; | 434 | struct file *file; |
435 | long retval; | 435 | long retval; |
436 | 436 | ||
437 | cookie.lo = lo; | 437 | cookie.lo = lo; |
438 | cookie.page = bvec->bv_page; | 438 | cookie.page = bvec->bv_page; |
439 | cookie.offset = bvec->bv_offset; | 439 | cookie.offset = bvec->bv_offset; |
440 | cookie.bsize = bsize; | 440 | cookie.bsize = bsize; |
441 | 441 | ||
442 | sd.len = 0; | 442 | sd.len = 0; |
443 | sd.total_len = bvec->bv_len; | 443 | sd.total_len = bvec->bv_len; |
444 | sd.flags = 0; | 444 | sd.flags = 0; |
445 | sd.pos = pos; | 445 | sd.pos = pos; |
446 | sd.u.data = &cookie; | 446 | sd.u.data = &cookie; |
447 | 447 | ||
448 | file = lo->lo_backing_file; | 448 | file = lo->lo_backing_file; |
449 | retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); | 449 | retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); |
450 | 450 | ||
451 | if (retval < 0) | 451 | if (retval < 0) |
452 | return retval; | 452 | return retval; |
453 | 453 | ||
454 | return 0; | 454 | return 0; |
455 | } | 455 | } |
456 | 456 | ||
457 | static int | 457 | static int |
458 | lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) | 458 | lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) |
459 | { | 459 | { |
460 | struct bio_vec *bvec; | 460 | struct bio_vec *bvec; |
461 | int i, ret = 0; | 461 | int i, ret = 0; |
462 | 462 | ||
463 | bio_for_each_segment(bvec, bio, i) { | 463 | bio_for_each_segment(bvec, bio, i) { |
464 | ret = do_lo_receive(lo, bvec, bsize, pos); | 464 | ret = do_lo_receive(lo, bvec, bsize, pos); |
465 | if (ret < 0) | 465 | if (ret < 0) |
466 | break; | 466 | break; |
467 | pos += bvec->bv_len; | 467 | pos += bvec->bv_len; |
468 | } | 468 | } |
469 | return ret; | 469 | return ret; |
470 | } | 470 | } |
471 | 471 | ||
472 | static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | 472 | static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) |
473 | { | 473 | { |
474 | loff_t pos; | 474 | loff_t pos; |
475 | int ret; | 475 | int ret; |
476 | 476 | ||
477 | pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; | 477 | pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; |
478 | 478 | ||
479 | if (bio_rw(bio) == WRITE) { | 479 | if (bio_rw(bio) == WRITE) { |
480 | bool barrier = (bio->bi_rw & REQ_HARDBARRIER); | 480 | bool barrier = (bio->bi_rw & REQ_HARDBARRIER); |
481 | struct file *file = lo->lo_backing_file; | 481 | struct file *file = lo->lo_backing_file; |
482 | 482 | ||
483 | if (barrier) { | 483 | if (barrier) { |
484 | if (unlikely(!file->f_op->fsync)) { | 484 | if (unlikely(!file->f_op->fsync)) { |
485 | ret = -EOPNOTSUPP; | 485 | ret = -EOPNOTSUPP; |
486 | goto out; | 486 | goto out; |
487 | } | 487 | } |
488 | 488 | ||
489 | ret = vfs_fsync(file, 0); | 489 | ret = vfs_fsync(file, 0); |
490 | if (unlikely(ret)) { | 490 | if (unlikely(ret)) { |
491 | ret = -EIO; | 491 | ret = -EIO; |
492 | goto out; | 492 | goto out; |
493 | } | 493 | } |
494 | } | 494 | } |
495 | 495 | ||
496 | ret = lo_send(lo, bio, pos); | 496 | ret = lo_send(lo, bio, pos); |
497 | 497 | ||
498 | if (barrier && !ret) { | 498 | if (barrier && !ret) { |
499 | ret = vfs_fsync(file, 0); | 499 | ret = vfs_fsync(file, 0); |
500 | if (unlikely(ret)) | 500 | if (unlikely(ret)) |
501 | ret = -EIO; | 501 | ret = -EIO; |
502 | } | 502 | } |
503 | } else | 503 | } else |
504 | ret = lo_receive(lo, bio, lo->lo_blocksize, pos); | 504 | ret = lo_receive(lo, bio, lo->lo_blocksize, pos); |
505 | 505 | ||
506 | out: | 506 | out: |
507 | return ret; | 507 | return ret; |
508 | } | 508 | } |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * Add bio to back of pending list | 511 | * Add bio to back of pending list |
512 | */ | 512 | */ |
513 | static void loop_add_bio(struct loop_device *lo, struct bio *bio) | 513 | static void loop_add_bio(struct loop_device *lo, struct bio *bio) |
514 | { | 514 | { |
515 | bio_list_add(&lo->lo_bio_list, bio); | 515 | bio_list_add(&lo->lo_bio_list, bio); |
516 | } | 516 | } |
517 | 517 | ||
518 | /* | 518 | /* |
519 | * Grab first pending buffer | 519 | * Grab first pending buffer |
520 | */ | 520 | */ |
521 | static struct bio *loop_get_bio(struct loop_device *lo) | 521 | static struct bio *loop_get_bio(struct loop_device *lo) |
522 | { | 522 | { |
523 | return bio_list_pop(&lo->lo_bio_list); | 523 | return bio_list_pop(&lo->lo_bio_list); |
524 | } | 524 | } |
525 | 525 | ||
526 | static int loop_make_request(struct request_queue *q, struct bio *old_bio) | 526 | static int loop_make_request(struct request_queue *q, struct bio *old_bio) |
527 | { | 527 | { |
528 | struct loop_device *lo = q->queuedata; | 528 | struct loop_device *lo = q->queuedata; |
529 | int rw = bio_rw(old_bio); | 529 | int rw = bio_rw(old_bio); |
530 | 530 | ||
531 | if (rw == READA) | 531 | if (rw == READA) |
532 | rw = READ; | 532 | rw = READ; |
533 | 533 | ||
534 | BUG_ON(!lo || (rw != READ && rw != WRITE)); | 534 | BUG_ON(!lo || (rw != READ && rw != WRITE)); |
535 | 535 | ||
536 | spin_lock_irq(&lo->lo_lock); | 536 | spin_lock_irq(&lo->lo_lock); |
537 | if (lo->lo_state != Lo_bound) | 537 | if (lo->lo_state != Lo_bound) |
538 | goto out; | 538 | goto out; |
539 | if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) | 539 | if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) |
540 | goto out; | 540 | goto out; |
541 | loop_add_bio(lo, old_bio); | 541 | loop_add_bio(lo, old_bio); |
542 | wake_up(&lo->lo_event); | 542 | wake_up(&lo->lo_event); |
543 | spin_unlock_irq(&lo->lo_lock); | 543 | spin_unlock_irq(&lo->lo_lock); |
544 | return 0; | 544 | return 0; |
545 | 545 | ||
546 | out: | 546 | out: |
547 | spin_unlock_irq(&lo->lo_lock); | 547 | spin_unlock_irq(&lo->lo_lock); |
548 | bio_io_error(old_bio); | 548 | bio_io_error(old_bio); |
549 | return 0; | 549 | return 0; |
550 | } | 550 | } |
551 | 551 | ||
552 | /* | 552 | /* |
553 | * kick off io on the underlying address space | 553 | * kick off io on the underlying address space |
554 | */ | 554 | */ |
555 | static void loop_unplug(struct request_queue *q) | 555 | static void loop_unplug(struct request_queue *q) |
556 | { | 556 | { |
557 | struct loop_device *lo = q->queuedata; | 557 | struct loop_device *lo = q->queuedata; |
558 | 558 | ||
559 | queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); | 559 | queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); |
560 | blk_run_address_space(lo->lo_backing_file->f_mapping); | 560 | blk_run_address_space(lo->lo_backing_file->f_mapping); |
561 | } | 561 | } |
562 | 562 | ||
563 | struct switch_request { | 563 | struct switch_request { |
564 | struct file *file; | 564 | struct file *file; |
565 | struct completion wait; | 565 | struct completion wait; |
566 | }; | 566 | }; |
567 | 567 | ||
568 | static void do_loop_switch(struct loop_device *, struct switch_request *); | 568 | static void do_loop_switch(struct loop_device *, struct switch_request *); |
569 | 569 | ||
570 | static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) | 570 | static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) |
571 | { | 571 | { |
572 | if (unlikely(!bio->bi_bdev)) { | 572 | if (unlikely(!bio->bi_bdev)) { |
573 | do_loop_switch(lo, bio->bi_private); | 573 | do_loop_switch(lo, bio->bi_private); |
574 | bio_put(bio); | 574 | bio_put(bio); |
575 | } else { | 575 | } else { |
576 | int ret = do_bio_filebacked(lo, bio); | 576 | int ret = do_bio_filebacked(lo, bio); |
577 | bio_endio(bio, ret); | 577 | bio_endio(bio, ret); |
578 | } | 578 | } |
579 | } | 579 | } |
580 | 580 | ||
581 | /* | 581 | /* |
582 | * worker thread that handles reads/writes to file backed loop devices, | 582 | * worker thread that handles reads/writes to file backed loop devices, |
583 | * to avoid blocking in our make_request_fn. it also does loop decrypting | 583 | * to avoid blocking in our make_request_fn. it also does loop decrypting |
584 | * on reads for block backed loop, as that is too heavy to do from | 584 | * on reads for block backed loop, as that is too heavy to do from |
585 | * b_end_io context where irqs may be disabled. | 585 | * b_end_io context where irqs may be disabled. |
586 | * | 586 | * |
587 | * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before | 587 | * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before |
588 | * calling kthread_stop(). Therefore once kthread_should_stop() is | 588 | * calling kthread_stop(). Therefore once kthread_should_stop() is |
589 | * true, make_request will not place any more requests. Therefore | 589 | * true, make_request will not place any more requests. Therefore |
590 | * once kthread_should_stop() is true and lo_bio is NULL, we are | 590 | * once kthread_should_stop() is true and lo_bio is NULL, we are |
591 | * done with the loop. | 591 | * done with the loop. |
592 | */ | 592 | */ |
593 | static int loop_thread(void *data) | 593 | static int loop_thread(void *data) |
594 | { | 594 | { |
595 | struct loop_device *lo = data; | 595 | struct loop_device *lo = data; |
596 | struct bio *bio; | 596 | struct bio *bio; |
597 | 597 | ||
598 | set_user_nice(current, -20); | 598 | set_user_nice(current, -20); |
599 | 599 | ||
600 | while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { | 600 | while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { |
601 | 601 | ||
602 | wait_event_interruptible(lo->lo_event, | 602 | wait_event_interruptible(lo->lo_event, |
603 | !bio_list_empty(&lo->lo_bio_list) || | 603 | !bio_list_empty(&lo->lo_bio_list) || |
604 | kthread_should_stop()); | 604 | kthread_should_stop()); |
605 | 605 | ||
606 | if (bio_list_empty(&lo->lo_bio_list)) | 606 | if (bio_list_empty(&lo->lo_bio_list)) |
607 | continue; | 607 | continue; |
608 | spin_lock_irq(&lo->lo_lock); | 608 | spin_lock_irq(&lo->lo_lock); |
609 | bio = loop_get_bio(lo); | 609 | bio = loop_get_bio(lo); |
610 | spin_unlock_irq(&lo->lo_lock); | 610 | spin_unlock_irq(&lo->lo_lock); |
611 | 611 | ||
612 | BUG_ON(!bio); | 612 | BUG_ON(!bio); |
613 | loop_handle_bio(lo, bio); | 613 | loop_handle_bio(lo, bio); |
614 | } | 614 | } |
615 | 615 | ||
616 | return 0; | 616 | return 0; |
617 | } | 617 | } |
618 | 618 | ||
619 | /* | 619 | /* |
620 | * loop_switch performs the hard work of switching a backing store. | 620 | * loop_switch performs the hard work of switching a backing store. |
621 | * First it needs to flush existing IO, it does this by sending a magic | 621 | * First it needs to flush existing IO, it does this by sending a magic |
622 | * BIO down the pipe. The completion of this BIO does the actual switch. | 622 | * BIO down the pipe. The completion of this BIO does the actual switch. |
623 | */ | 623 | */ |
624 | static int loop_switch(struct loop_device *lo, struct file *file) | 624 | static int loop_switch(struct loop_device *lo, struct file *file) |
625 | { | 625 | { |
626 | struct switch_request w; | 626 | struct switch_request w; |
627 | struct bio *bio = bio_alloc(GFP_KERNEL, 0); | 627 | struct bio *bio = bio_alloc(GFP_KERNEL, 0); |
628 | if (!bio) | 628 | if (!bio) |
629 | return -ENOMEM; | 629 | return -ENOMEM; |
630 | init_completion(&w.wait); | 630 | init_completion(&w.wait); |
631 | w.file = file; | 631 | w.file = file; |
632 | bio->bi_private = &w; | 632 | bio->bi_private = &w; |
633 | bio->bi_bdev = NULL; | 633 | bio->bi_bdev = NULL; |
634 | loop_make_request(lo->lo_queue, bio); | 634 | loop_make_request(lo->lo_queue, bio); |
635 | wait_for_completion(&w.wait); | 635 | wait_for_completion(&w.wait); |
636 | return 0; | 636 | return 0; |
637 | } | 637 | } |
638 | 638 | ||
639 | /* | 639 | /* |
640 | * Helper to flush the IOs in loop, but keeping loop thread running | 640 | * Helper to flush the IOs in loop, but keeping loop thread running |
641 | */ | 641 | */ |
642 | static int loop_flush(struct loop_device *lo) | 642 | static int loop_flush(struct loop_device *lo) |
643 | { | 643 | { |
644 | /* loop not yet configured, no running thread, nothing to flush */ | 644 | /* loop not yet configured, no running thread, nothing to flush */ |
645 | if (!lo->lo_thread) | 645 | if (!lo->lo_thread) |
646 | return 0; | 646 | return 0; |
647 | 647 | ||
648 | return loop_switch(lo, NULL); | 648 | return loop_switch(lo, NULL); |
649 | } | 649 | } |
650 | 650 | ||
651 | /* | 651 | /* |
652 | * Do the actual switch; called from the BIO completion routine | 652 | * Do the actual switch; called from the BIO completion routine |
653 | */ | 653 | */ |
654 | static void do_loop_switch(struct loop_device *lo, struct switch_request *p) | 654 | static void do_loop_switch(struct loop_device *lo, struct switch_request *p) |
655 | { | 655 | { |
656 | struct file *file = p->file; | 656 | struct file *file = p->file; |
657 | struct file *old_file = lo->lo_backing_file; | 657 | struct file *old_file = lo->lo_backing_file; |
658 | struct address_space *mapping; | 658 | struct address_space *mapping; |
659 | 659 | ||
660 | /* if no new file, only flush of queued bios requested */ | 660 | /* if no new file, only flush of queued bios requested */ |
661 | if (!file) | 661 | if (!file) |
662 | goto out; | 662 | goto out; |
663 | 663 | ||
664 | mapping = file->f_mapping; | 664 | mapping = file->f_mapping; |
665 | mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); | 665 | mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); |
666 | lo->lo_backing_file = file; | 666 | lo->lo_backing_file = file; |
667 | lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? | 667 | lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? |
668 | mapping->host->i_bdev->bd_block_size : PAGE_SIZE; | 668 | mapping->host->i_bdev->bd_block_size : PAGE_SIZE; |
669 | lo->old_gfp_mask = mapping_gfp_mask(mapping); | 669 | lo->old_gfp_mask = mapping_gfp_mask(mapping); |
670 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); | 670 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); |
671 | out: | 671 | out: |
672 | complete(&p->wait); | 672 | complete(&p->wait); |
673 | } | 673 | } |
674 | 674 | ||
675 | 675 | ||
676 | /* | 676 | /* |
677 | * loop_change_fd switched the backing store of a loopback device to | 677 | * loop_change_fd switched the backing store of a loopback device to |
678 | * a new file. This is useful for operating system installers to free up | 678 | * a new file. This is useful for operating system installers to free up |
679 | * the original file and in High Availability environments to switch to | 679 | * the original file and in High Availability environments to switch to |
680 | * an alternative location for the content in case of server meltdown. | 680 | * an alternative location for the content in case of server meltdown. |
681 | * This can only work if the loop device is used read-only, and if the | 681 | * This can only work if the loop device is used read-only, and if the |
682 | * new backing store is the same size and type as the old backing store. | 682 | * new backing store is the same size and type as the old backing store. |
683 | */ | 683 | */ |
684 | static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, | 684 | static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, |
685 | unsigned int arg) | 685 | unsigned int arg) |
686 | { | 686 | { |
687 | struct file *file, *old_file; | 687 | struct file *file, *old_file; |
688 | struct inode *inode; | 688 | struct inode *inode; |
689 | int error; | 689 | int error; |
690 | 690 | ||
691 | error = -ENXIO; | 691 | error = -ENXIO; |
692 | if (lo->lo_state != Lo_bound) | 692 | if (lo->lo_state != Lo_bound) |
693 | goto out; | 693 | goto out; |
694 | 694 | ||
695 | /* the loop device has to be read-only */ | 695 | /* the loop device has to be read-only */ |
696 | error = -EINVAL; | 696 | error = -EINVAL; |
697 | if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) | 697 | if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) |
698 | goto out; | 698 | goto out; |
699 | 699 | ||
700 | error = -EBADF; | 700 | error = -EBADF; |
701 | file = fget(arg); | 701 | file = fget(arg); |
702 | if (!file) | 702 | if (!file) |
703 | goto out; | 703 | goto out; |
704 | 704 | ||
705 | inode = file->f_mapping->host; | 705 | inode = file->f_mapping->host; |
706 | old_file = lo->lo_backing_file; | 706 | old_file = lo->lo_backing_file; |
707 | 707 | ||
708 | error = -EINVAL; | 708 | error = -EINVAL; |
709 | 709 | ||
710 | if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) | 710 | if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) |
711 | goto out_putf; | 711 | goto out_putf; |
712 | 712 | ||
713 | /* size of the new backing store needs to be the same */ | 713 | /* size of the new backing store needs to be the same */ |
714 | if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) | 714 | if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) |
715 | goto out_putf; | 715 | goto out_putf; |
716 | 716 | ||
717 | /* and ... switch */ | 717 | /* and ... switch */ |
718 | error = loop_switch(lo, file); | 718 | error = loop_switch(lo, file); |
719 | if (error) | 719 | if (error) |
720 | goto out_putf; | 720 | goto out_putf; |
721 | 721 | ||
722 | fput(old_file); | 722 | fput(old_file); |
723 | if (max_part > 0) | 723 | if (max_part > 0) |
724 | ioctl_by_bdev(bdev, BLKRRPART, 0); | 724 | ioctl_by_bdev(bdev, BLKRRPART, 0); |
725 | return 0; | 725 | return 0; |
726 | 726 | ||
727 | out_putf: | 727 | out_putf: |
728 | fput(file); | 728 | fput(file); |
729 | out: | 729 | out: |
730 | return error; | 730 | return error; |
731 | } | 731 | } |
732 | 732 | ||
733 | static inline int is_loop_device(struct file *file) | 733 | static inline int is_loop_device(struct file *file) |
734 | { | 734 | { |
735 | struct inode *i = file->f_mapping->host; | 735 | struct inode *i = file->f_mapping->host; |
736 | 736 | ||
737 | return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; | 737 | return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; |
738 | } | 738 | } |
739 | 739 | ||
740 | static int loop_set_fd(struct loop_device *lo, fmode_t mode, | 740 | static int loop_set_fd(struct loop_device *lo, fmode_t mode, |
741 | struct block_device *bdev, unsigned int arg) | 741 | struct block_device *bdev, unsigned int arg) |
742 | { | 742 | { |
743 | struct file *file, *f; | 743 | struct file *file, *f; |
744 | struct inode *inode; | 744 | struct inode *inode; |
745 | struct address_space *mapping; | 745 | struct address_space *mapping; |
746 | unsigned lo_blocksize; | 746 | unsigned lo_blocksize; |
747 | int lo_flags = 0; | 747 | int lo_flags = 0; |
748 | int error; | 748 | int error; |
749 | loff_t size; | 749 | loff_t size; |
750 | 750 | ||
751 | /* This is safe, since we have a reference from open(). */ | 751 | /* This is safe, since we have a reference from open(). */ |
752 | __module_get(THIS_MODULE); | 752 | __module_get(THIS_MODULE); |
753 | 753 | ||
754 | error = -EBADF; | 754 | error = -EBADF; |
755 | file = fget(arg); | 755 | file = fget(arg); |
756 | if (!file) | 756 | if (!file) |
757 | goto out; | 757 | goto out; |
758 | 758 | ||
759 | error = -EBUSY; | 759 | error = -EBUSY; |
760 | if (lo->lo_state != Lo_unbound) | 760 | if (lo->lo_state != Lo_unbound) |
761 | goto out_putf; | 761 | goto out_putf; |
762 | 762 | ||
763 | /* Avoid recursion */ | 763 | /* Avoid recursion */ |
764 | f = file; | 764 | f = file; |
765 | while (is_loop_device(f)) { | 765 | while (is_loop_device(f)) { |
766 | struct loop_device *l; | 766 | struct loop_device *l; |
767 | 767 | ||
768 | if (f->f_mapping->host->i_bdev == bdev) | 768 | if (f->f_mapping->host->i_bdev == bdev) |
769 | goto out_putf; | 769 | goto out_putf; |
770 | 770 | ||
771 | l = f->f_mapping->host->i_bdev->bd_disk->private_data; | 771 | l = f->f_mapping->host->i_bdev->bd_disk->private_data; |
772 | if (l->lo_state == Lo_unbound) { | 772 | if (l->lo_state == Lo_unbound) { |
773 | error = -EINVAL; | 773 | error = -EINVAL; |
774 | goto out_putf; | 774 | goto out_putf; |
775 | } | 775 | } |
776 | f = l->lo_backing_file; | 776 | f = l->lo_backing_file; |
777 | } | 777 | } |
778 | 778 | ||
779 | mapping = file->f_mapping; | 779 | mapping = file->f_mapping; |
780 | inode = mapping->host; | 780 | inode = mapping->host; |
781 | 781 | ||
782 | if (!(file->f_mode & FMODE_WRITE)) | 782 | if (!(file->f_mode & FMODE_WRITE)) |
783 | lo_flags |= LO_FLAGS_READ_ONLY; | 783 | lo_flags |= LO_FLAGS_READ_ONLY; |
784 | 784 | ||
785 | error = -EINVAL; | 785 | error = -EINVAL; |
786 | if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 786 | if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
787 | const struct address_space_operations *aops = mapping->a_ops; | 787 | const struct address_space_operations *aops = mapping->a_ops; |
788 | 788 | ||
789 | if (aops->write_begin) | 789 | if (aops->write_begin) |
790 | lo_flags |= LO_FLAGS_USE_AOPS; | 790 | lo_flags |= LO_FLAGS_USE_AOPS; |
791 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) | 791 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) |
792 | lo_flags |= LO_FLAGS_READ_ONLY; | 792 | lo_flags |= LO_FLAGS_READ_ONLY; |
793 | 793 | ||
794 | lo_blocksize = S_ISBLK(inode->i_mode) ? | 794 | lo_blocksize = S_ISBLK(inode->i_mode) ? |
795 | inode->i_bdev->bd_block_size : PAGE_SIZE; | 795 | inode->i_bdev->bd_block_size : PAGE_SIZE; |
796 | 796 | ||
797 | error = 0; | 797 | error = 0; |
798 | } else { | 798 | } else { |
799 | goto out_putf; | 799 | goto out_putf; |
800 | } | 800 | } |
801 | 801 | ||
802 | size = get_loop_size(lo, file); | 802 | size = get_loop_size(lo, file); |
803 | 803 | ||
804 | if ((loff_t)(sector_t)size != size) { | 804 | if ((loff_t)(sector_t)size != size) { |
805 | error = -EFBIG; | 805 | error = -EFBIG; |
806 | goto out_putf; | 806 | goto out_putf; |
807 | } | 807 | } |
808 | 808 | ||
809 | if (!(mode & FMODE_WRITE)) | 809 | if (!(mode & FMODE_WRITE)) |
810 | lo_flags |= LO_FLAGS_READ_ONLY; | 810 | lo_flags |= LO_FLAGS_READ_ONLY; |
811 | 811 | ||
812 | set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); | 812 | set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); |
813 | 813 | ||
814 | lo->lo_blocksize = lo_blocksize; | 814 | lo->lo_blocksize = lo_blocksize; |
815 | lo->lo_device = bdev; | 815 | lo->lo_device = bdev; |
816 | lo->lo_flags = lo_flags; | 816 | lo->lo_flags = lo_flags; |
817 | lo->lo_backing_file = file; | 817 | lo->lo_backing_file = file; |
818 | lo->transfer = transfer_none; | 818 | lo->transfer = transfer_none; |
819 | lo->ioctl = NULL; | 819 | lo->ioctl = NULL; |
820 | lo->lo_sizelimit = 0; | 820 | lo->lo_sizelimit = 0; |
821 | lo->old_gfp_mask = mapping_gfp_mask(mapping); | 821 | lo->old_gfp_mask = mapping_gfp_mask(mapping); |
822 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); | 822 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); |
823 | 823 | ||
824 | bio_list_init(&lo->lo_bio_list); | 824 | bio_list_init(&lo->lo_bio_list); |
825 | 825 | ||
826 | /* | 826 | /* |
827 | * set queue make_request_fn, and add limits based on lower level | 827 | * set queue make_request_fn, and add limits based on lower level |
828 | * device | 828 | * device |
829 | */ | 829 | */ |
830 | blk_queue_make_request(lo->lo_queue, loop_make_request); | 830 | blk_queue_make_request(lo->lo_queue, loop_make_request); |
831 | lo->lo_queue->queuedata = lo; | 831 | lo->lo_queue->queuedata = lo; |
832 | lo->lo_queue->unplug_fn = loop_unplug; | 832 | lo->lo_queue->unplug_fn = loop_unplug; |
833 | 833 | ||
834 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) | 834 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) |
835 | blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH); | 835 | blk_queue_flush(lo->lo_queue, REQ_FLUSH); |
836 | 836 | ||
837 | set_capacity(lo->lo_disk, size); | 837 | set_capacity(lo->lo_disk, size); |
838 | bd_set_size(bdev, size << 9); | 838 | bd_set_size(bdev, size << 9); |
839 | /* let user-space know about the new size */ | 839 | /* let user-space know about the new size */ |
840 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); | 840 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); |
841 | 841 | ||
842 | set_blocksize(bdev, lo_blocksize); | 842 | set_blocksize(bdev, lo_blocksize); |
843 | 843 | ||
844 | lo->lo_thread = kthread_create(loop_thread, lo, "loop%d", | 844 | lo->lo_thread = kthread_create(loop_thread, lo, "loop%d", |
845 | lo->lo_number); | 845 | lo->lo_number); |
846 | if (IS_ERR(lo->lo_thread)) { | 846 | if (IS_ERR(lo->lo_thread)) { |
847 | error = PTR_ERR(lo->lo_thread); | 847 | error = PTR_ERR(lo->lo_thread); |
848 | goto out_clr; | 848 | goto out_clr; |
849 | } | 849 | } |
850 | lo->lo_state = Lo_bound; | 850 | lo->lo_state = Lo_bound; |
851 | wake_up_process(lo->lo_thread); | 851 | wake_up_process(lo->lo_thread); |
852 | if (max_part > 0) | 852 | if (max_part > 0) |
853 | ioctl_by_bdev(bdev, BLKRRPART, 0); | 853 | ioctl_by_bdev(bdev, BLKRRPART, 0); |
854 | return 0; | 854 | return 0; |
855 | 855 | ||
856 | out_clr: | 856 | out_clr: |
857 | lo->lo_thread = NULL; | 857 | lo->lo_thread = NULL; |
858 | lo->lo_device = NULL; | 858 | lo->lo_device = NULL; |
859 | lo->lo_backing_file = NULL; | 859 | lo->lo_backing_file = NULL; |
860 | lo->lo_flags = 0; | 860 | lo->lo_flags = 0; |
861 | set_capacity(lo->lo_disk, 0); | 861 | set_capacity(lo->lo_disk, 0); |
862 | invalidate_bdev(bdev); | 862 | invalidate_bdev(bdev); |
863 | bd_set_size(bdev, 0); | 863 | bd_set_size(bdev, 0); |
864 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); | 864 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); |
865 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask); | 865 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask); |
866 | lo->lo_state = Lo_unbound; | 866 | lo->lo_state = Lo_unbound; |
867 | out_putf: | 867 | out_putf: |
868 | fput(file); | 868 | fput(file); |
869 | out: | 869 | out: |
870 | /* This is safe: open() is still holding a reference. */ | 870 | /* This is safe: open() is still holding a reference. */ |
871 | module_put(THIS_MODULE); | 871 | module_put(THIS_MODULE); |
872 | return error; | 872 | return error; |
873 | } | 873 | } |
874 | 874 | ||
875 | static int | 875 | static int |
876 | loop_release_xfer(struct loop_device *lo) | 876 | loop_release_xfer(struct loop_device *lo) |
877 | { | 877 | { |
878 | int err = 0; | 878 | int err = 0; |
879 | struct loop_func_table *xfer = lo->lo_encryption; | 879 | struct loop_func_table *xfer = lo->lo_encryption; |
880 | 880 | ||
881 | if (xfer) { | 881 | if (xfer) { |
882 | if (xfer->release) | 882 | if (xfer->release) |
883 | err = xfer->release(lo); | 883 | err = xfer->release(lo); |
884 | lo->transfer = NULL; | 884 | lo->transfer = NULL; |
885 | lo->lo_encryption = NULL; | 885 | lo->lo_encryption = NULL; |
886 | module_put(xfer->owner); | 886 | module_put(xfer->owner); |
887 | } | 887 | } |
888 | return err; | 888 | return err; |
889 | } | 889 | } |
890 | 890 | ||
891 | static int | 891 | static int |
892 | loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, | 892 | loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, |
893 | const struct loop_info64 *i) | 893 | const struct loop_info64 *i) |
894 | { | 894 | { |
895 | int err = 0; | 895 | int err = 0; |
896 | 896 | ||
897 | if (xfer) { | 897 | if (xfer) { |
898 | struct module *owner = xfer->owner; | 898 | struct module *owner = xfer->owner; |
899 | 899 | ||
900 | if (!try_module_get(owner)) | 900 | if (!try_module_get(owner)) |
901 | return -EINVAL; | 901 | return -EINVAL; |
902 | if (xfer->init) | 902 | if (xfer->init) |
903 | err = xfer->init(lo, i); | 903 | err = xfer->init(lo, i); |
904 | if (err) | 904 | if (err) |
905 | module_put(owner); | 905 | module_put(owner); |
906 | else | 906 | else |
907 | lo->lo_encryption = xfer; | 907 | lo->lo_encryption = xfer; |
908 | } | 908 | } |
909 | return err; | 909 | return err; |
910 | } | 910 | } |
911 | 911 | ||
912 | static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) | 912 | static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) |
913 | { | 913 | { |
914 | struct file *filp = lo->lo_backing_file; | 914 | struct file *filp = lo->lo_backing_file; |
915 | gfp_t gfp = lo->old_gfp_mask; | 915 | gfp_t gfp = lo->old_gfp_mask; |
916 | 916 | ||
917 | if (lo->lo_state != Lo_bound) | 917 | if (lo->lo_state != Lo_bound) |
918 | return -ENXIO; | 918 | return -ENXIO; |
919 | 919 | ||
920 | if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ | 920 | if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ |
921 | return -EBUSY; | 921 | return -EBUSY; |
922 | 922 | ||
923 | if (filp == NULL) | 923 | if (filp == NULL) |
924 | return -EINVAL; | 924 | return -EINVAL; |
925 | 925 | ||
926 | spin_lock_irq(&lo->lo_lock); | 926 | spin_lock_irq(&lo->lo_lock); |
927 | lo->lo_state = Lo_rundown; | 927 | lo->lo_state = Lo_rundown; |
928 | spin_unlock_irq(&lo->lo_lock); | 928 | spin_unlock_irq(&lo->lo_lock); |
929 | 929 | ||
930 | kthread_stop(lo->lo_thread); | 930 | kthread_stop(lo->lo_thread); |
931 | 931 | ||
932 | lo->lo_queue->unplug_fn = NULL; | 932 | lo->lo_queue->unplug_fn = NULL; |
933 | lo->lo_backing_file = NULL; | 933 | lo->lo_backing_file = NULL; |
934 | 934 | ||
935 | loop_release_xfer(lo); | 935 | loop_release_xfer(lo); |
936 | lo->transfer = NULL; | 936 | lo->transfer = NULL; |
937 | lo->ioctl = NULL; | 937 | lo->ioctl = NULL; |
938 | lo->lo_device = NULL; | 938 | lo->lo_device = NULL; |
939 | lo->lo_encryption = NULL; | 939 | lo->lo_encryption = NULL; |
940 | lo->lo_offset = 0; | 940 | lo->lo_offset = 0; |
941 | lo->lo_sizelimit = 0; | 941 | lo->lo_sizelimit = 0; |
942 | lo->lo_encrypt_key_size = 0; | 942 | lo->lo_encrypt_key_size = 0; |
943 | lo->lo_flags = 0; | 943 | lo->lo_flags = 0; |
944 | lo->lo_thread = NULL; | 944 | lo->lo_thread = NULL; |
945 | memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); | 945 | memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); |
946 | memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); | 946 | memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); |
947 | memset(lo->lo_file_name, 0, LO_NAME_SIZE); | 947 | memset(lo->lo_file_name, 0, LO_NAME_SIZE); |
948 | if (bdev) | 948 | if (bdev) |
949 | invalidate_bdev(bdev); | 949 | invalidate_bdev(bdev); |
950 | set_capacity(lo->lo_disk, 0); | 950 | set_capacity(lo->lo_disk, 0); |
951 | if (bdev) { | 951 | if (bdev) { |
952 | bd_set_size(bdev, 0); | 952 | bd_set_size(bdev, 0); |
953 | /* let user-space know about this change */ | 953 | /* let user-space know about this change */ |
954 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); | 954 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); |
955 | } | 955 | } |
956 | mapping_set_gfp_mask(filp->f_mapping, gfp); | 956 | mapping_set_gfp_mask(filp->f_mapping, gfp); |
957 | lo->lo_state = Lo_unbound; | 957 | lo->lo_state = Lo_unbound; |
958 | /* This is safe: open() is still holding a reference. */ | 958 | /* This is safe: open() is still holding a reference. */ |
959 | module_put(THIS_MODULE); | 959 | module_put(THIS_MODULE); |
960 | if (max_part > 0 && bdev) | 960 | if (max_part > 0 && bdev) |
961 | ioctl_by_bdev(bdev, BLKRRPART, 0); | 961 | ioctl_by_bdev(bdev, BLKRRPART, 0); |
962 | mutex_unlock(&lo->lo_ctl_mutex); | 962 | mutex_unlock(&lo->lo_ctl_mutex); |
963 | /* | 963 | /* |
964 | * Need not hold lo_ctl_mutex to fput backing file. | 964 | * Need not hold lo_ctl_mutex to fput backing file. |
965 | * Calling fput holding lo_ctl_mutex triggers a circular | 965 | * Calling fput holding lo_ctl_mutex triggers a circular |
966 | * lock dependency possibility warning as fput can take | 966 | * lock dependency possibility warning as fput can take |
967 | * bd_mutex which is usually taken before lo_ctl_mutex. | 967 | * bd_mutex which is usually taken before lo_ctl_mutex. |
968 | */ | 968 | */ |
969 | fput(filp); | 969 | fput(filp); |
970 | return 0; | 970 | return 0; |
971 | } | 971 | } |
972 | 972 | ||
973 | static int | 973 | static int |
974 | loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | 974 | loop_set_status(struct loop_device *lo, const struct loop_info64 *info) |
975 | { | 975 | { |
976 | int err; | 976 | int err; |
977 | struct loop_func_table *xfer; | 977 | struct loop_func_table *xfer; |
978 | uid_t uid = current_uid(); | 978 | uid_t uid = current_uid(); |
979 | 979 | ||
980 | if (lo->lo_encrypt_key_size && | 980 | if (lo->lo_encrypt_key_size && |
981 | lo->lo_key_owner != uid && | 981 | lo->lo_key_owner != uid && |
982 | !capable(CAP_SYS_ADMIN)) | 982 | !capable(CAP_SYS_ADMIN)) |
983 | return -EPERM; | 983 | return -EPERM; |
984 | if (lo->lo_state != Lo_bound) | 984 | if (lo->lo_state != Lo_bound) |
985 | return -ENXIO; | 985 | return -ENXIO; |
986 | if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) | 986 | if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) |
987 | return -EINVAL; | 987 | return -EINVAL; |
988 | 988 | ||
989 | err = loop_release_xfer(lo); | 989 | err = loop_release_xfer(lo); |
990 | if (err) | 990 | if (err) |
991 | return err; | 991 | return err; |
992 | 992 | ||
993 | if (info->lo_encrypt_type) { | 993 | if (info->lo_encrypt_type) { |
994 | unsigned int type = info->lo_encrypt_type; | 994 | unsigned int type = info->lo_encrypt_type; |
995 | 995 | ||
996 | if (type >= MAX_LO_CRYPT) | 996 | if (type >= MAX_LO_CRYPT) |
997 | return -EINVAL; | 997 | return -EINVAL; |
998 | xfer = xfer_funcs[type]; | 998 | xfer = xfer_funcs[type]; |
999 | if (xfer == NULL) | 999 | if (xfer == NULL) |
1000 | return -EINVAL; | 1000 | return -EINVAL; |
1001 | } else | 1001 | } else |
1002 | xfer = NULL; | 1002 | xfer = NULL; |
1003 | 1003 | ||
1004 | err = loop_init_xfer(lo, xfer, info); | 1004 | err = loop_init_xfer(lo, xfer, info); |
1005 | if (err) | 1005 | if (err) |
1006 | return err; | 1006 | return err; |
1007 | 1007 | ||
1008 | if (lo->lo_offset != info->lo_offset || | 1008 | if (lo->lo_offset != info->lo_offset || |
1009 | lo->lo_sizelimit != info->lo_sizelimit) { | 1009 | lo->lo_sizelimit != info->lo_sizelimit) { |
1010 | lo->lo_offset = info->lo_offset; | 1010 | lo->lo_offset = info->lo_offset; |
1011 | lo->lo_sizelimit = info->lo_sizelimit; | 1011 | lo->lo_sizelimit = info->lo_sizelimit; |
1012 | if (figure_loop_size(lo)) | 1012 | if (figure_loop_size(lo)) |
1013 | return -EFBIG; | 1013 | return -EFBIG; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); | 1016 | memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); |
1017 | memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); | 1017 | memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); |
1018 | lo->lo_file_name[LO_NAME_SIZE-1] = 0; | 1018 | lo->lo_file_name[LO_NAME_SIZE-1] = 0; |
1019 | lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; | 1019 | lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; |
1020 | 1020 | ||
1021 | if (!xfer) | 1021 | if (!xfer) |
1022 | xfer = &none_funcs; | 1022 | xfer = &none_funcs; |
1023 | lo->transfer = xfer->transfer; | 1023 | lo->transfer = xfer->transfer; |
1024 | lo->ioctl = xfer->ioctl; | 1024 | lo->ioctl = xfer->ioctl; |
1025 | 1025 | ||
1026 | if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != | 1026 | if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != |
1027 | (info->lo_flags & LO_FLAGS_AUTOCLEAR)) | 1027 | (info->lo_flags & LO_FLAGS_AUTOCLEAR)) |
1028 | lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; | 1028 | lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; |
1029 | 1029 | ||
1030 | lo->lo_encrypt_key_size = info->lo_encrypt_key_size; | 1030 | lo->lo_encrypt_key_size = info->lo_encrypt_key_size; |
1031 | lo->lo_init[0] = info->lo_init[0]; | 1031 | lo->lo_init[0] = info->lo_init[0]; |
1032 | lo->lo_init[1] = info->lo_init[1]; | 1032 | lo->lo_init[1] = info->lo_init[1]; |
1033 | if (info->lo_encrypt_key_size) { | 1033 | if (info->lo_encrypt_key_size) { |
1034 | memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, | 1034 | memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, |
1035 | info->lo_encrypt_key_size); | 1035 | info->lo_encrypt_key_size); |
1036 | lo->lo_key_owner = uid; | 1036 | lo->lo_key_owner = uid; |
1037 | } | 1037 | } |
1038 | 1038 | ||
1039 | return 0; | 1039 | return 0; |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | static int | 1042 | static int |
1043 | loop_get_status(struct loop_device *lo, struct loop_info64 *info) | 1043 | loop_get_status(struct loop_device *lo, struct loop_info64 *info) |
1044 | { | 1044 | { |
1045 | struct file *file = lo->lo_backing_file; | 1045 | struct file *file = lo->lo_backing_file; |
1046 | struct kstat stat; | 1046 | struct kstat stat; |
1047 | int error; | 1047 | int error; |
1048 | 1048 | ||
1049 | if (lo->lo_state != Lo_bound) | 1049 | if (lo->lo_state != Lo_bound) |
1050 | return -ENXIO; | 1050 | return -ENXIO; |
1051 | error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat); | 1051 | error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat); |
1052 | if (error) | 1052 | if (error) |
1053 | return error; | 1053 | return error; |
1054 | memset(info, 0, sizeof(*info)); | 1054 | memset(info, 0, sizeof(*info)); |
1055 | info->lo_number = lo->lo_number; | 1055 | info->lo_number = lo->lo_number; |
1056 | info->lo_device = huge_encode_dev(stat.dev); | 1056 | info->lo_device = huge_encode_dev(stat.dev); |
1057 | info->lo_inode = stat.ino; | 1057 | info->lo_inode = stat.ino; |
1058 | info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); | 1058 | info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); |
1059 | info->lo_offset = lo->lo_offset; | 1059 | info->lo_offset = lo->lo_offset; |
1060 | info->lo_sizelimit = lo->lo_sizelimit; | 1060 | info->lo_sizelimit = lo->lo_sizelimit; |
1061 | info->lo_flags = lo->lo_flags; | 1061 | info->lo_flags = lo->lo_flags; |
1062 | memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); | 1062 | memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); |
1063 | memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); | 1063 | memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); |
1064 | info->lo_encrypt_type = | 1064 | info->lo_encrypt_type = |
1065 | lo->lo_encryption ? lo->lo_encryption->number : 0; | 1065 | lo->lo_encryption ? lo->lo_encryption->number : 0; |
1066 | if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { | 1066 | if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { |
1067 | info->lo_encrypt_key_size = lo->lo_encrypt_key_size; | 1067 | info->lo_encrypt_key_size = lo->lo_encrypt_key_size; |
1068 | memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, | 1068 | memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, |
1069 | lo->lo_encrypt_key_size); | 1069 | lo->lo_encrypt_key_size); |
1070 | } | 1070 | } |
1071 | return 0; | 1071 | return 0; |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | static void | 1074 | static void |
1075 | loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) | 1075 | loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) |
1076 | { | 1076 | { |
1077 | memset(info64, 0, sizeof(*info64)); | 1077 | memset(info64, 0, sizeof(*info64)); |
1078 | info64->lo_number = info->lo_number; | 1078 | info64->lo_number = info->lo_number; |
1079 | info64->lo_device = info->lo_device; | 1079 | info64->lo_device = info->lo_device; |
1080 | info64->lo_inode = info->lo_inode; | 1080 | info64->lo_inode = info->lo_inode; |
1081 | info64->lo_rdevice = info->lo_rdevice; | 1081 | info64->lo_rdevice = info->lo_rdevice; |
1082 | info64->lo_offset = info->lo_offset; | 1082 | info64->lo_offset = info->lo_offset; |
1083 | info64->lo_sizelimit = 0; | 1083 | info64->lo_sizelimit = 0; |
1084 | info64->lo_encrypt_type = info->lo_encrypt_type; | 1084 | info64->lo_encrypt_type = info->lo_encrypt_type; |
1085 | info64->lo_encrypt_key_size = info->lo_encrypt_key_size; | 1085 | info64->lo_encrypt_key_size = info->lo_encrypt_key_size; |
1086 | info64->lo_flags = info->lo_flags; | 1086 | info64->lo_flags = info->lo_flags; |
1087 | info64->lo_init[0] = info->lo_init[0]; | 1087 | info64->lo_init[0] = info->lo_init[0]; |
1088 | info64->lo_init[1] = info->lo_init[1]; | 1088 | info64->lo_init[1] = info->lo_init[1]; |
1089 | if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) | 1089 | if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) |
1090 | memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); | 1090 | memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); |
1091 | else | 1091 | else |
1092 | memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); | 1092 | memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); |
1093 | memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); | 1093 | memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | static int | 1096 | static int |
1097 | loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) | 1097 | loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) |
1098 | { | 1098 | { |
1099 | memset(info, 0, sizeof(*info)); | 1099 | memset(info, 0, sizeof(*info)); |
1100 | info->lo_number = info64->lo_number; | 1100 | info->lo_number = info64->lo_number; |
1101 | info->lo_device = info64->lo_device; | 1101 | info->lo_device = info64->lo_device; |
1102 | info->lo_inode = info64->lo_inode; | 1102 | info->lo_inode = info64->lo_inode; |
1103 | info->lo_rdevice = info64->lo_rdevice; | 1103 | info->lo_rdevice = info64->lo_rdevice; |
1104 | info->lo_offset = info64->lo_offset; | 1104 | info->lo_offset = info64->lo_offset; |
1105 | info->lo_encrypt_type = info64->lo_encrypt_type; | 1105 | info->lo_encrypt_type = info64->lo_encrypt_type; |
1106 | info->lo_encrypt_key_size = info64->lo_encrypt_key_size; | 1106 | info->lo_encrypt_key_size = info64->lo_encrypt_key_size; |
1107 | info->lo_flags = info64->lo_flags; | 1107 | info->lo_flags = info64->lo_flags; |
1108 | info->lo_init[0] = info64->lo_init[0]; | 1108 | info->lo_init[0] = info64->lo_init[0]; |
1109 | info->lo_init[1] = info64->lo_init[1]; | 1109 | info->lo_init[1] = info64->lo_init[1]; |
1110 | if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) | 1110 | if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) |
1111 | memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); | 1111 | memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); |
1112 | else | 1112 | else |
1113 | memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); | 1113 | memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); |
1114 | memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); | 1114 | memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); |
1115 | 1115 | ||
1116 | /* error in case values were truncated */ | 1116 | /* error in case values were truncated */ |
1117 | if (info->lo_device != info64->lo_device || | 1117 | if (info->lo_device != info64->lo_device || |
1118 | info->lo_rdevice != info64->lo_rdevice || | 1118 | info->lo_rdevice != info64->lo_rdevice || |
1119 | info->lo_inode != info64->lo_inode || | 1119 | info->lo_inode != info64->lo_inode || |
1120 | info->lo_offset != info64->lo_offset) | 1120 | info->lo_offset != info64->lo_offset) |
1121 | return -EOVERFLOW; | 1121 | return -EOVERFLOW; |
1122 | 1122 | ||
1123 | return 0; | 1123 | return 0; |
1124 | } | 1124 | } |
1125 | 1125 | ||
1126 | static int | 1126 | static int |
1127 | loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) | 1127 | loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) |
1128 | { | 1128 | { |
1129 | struct loop_info info; | 1129 | struct loop_info info; |
1130 | struct loop_info64 info64; | 1130 | struct loop_info64 info64; |
1131 | 1131 | ||
1132 | if (copy_from_user(&info, arg, sizeof (struct loop_info))) | 1132 | if (copy_from_user(&info, arg, sizeof (struct loop_info))) |
1133 | return -EFAULT; | 1133 | return -EFAULT; |
1134 | loop_info64_from_old(&info, &info64); | 1134 | loop_info64_from_old(&info, &info64); |
1135 | return loop_set_status(lo, &info64); | 1135 | return loop_set_status(lo, &info64); |
1136 | } | 1136 | } |
1137 | 1137 | ||
1138 | static int | 1138 | static int |
1139 | loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) | 1139 | loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) |
1140 | { | 1140 | { |
1141 | struct loop_info64 info64; | 1141 | struct loop_info64 info64; |
1142 | 1142 | ||
1143 | if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) | 1143 | if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) |
1144 | return -EFAULT; | 1144 | return -EFAULT; |
1145 | return loop_set_status(lo, &info64); | 1145 | return loop_set_status(lo, &info64); |
1146 | } | 1146 | } |
1147 | 1147 | ||
1148 | static int | 1148 | static int |
1149 | loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { | 1149 | loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { |
1150 | struct loop_info info; | 1150 | struct loop_info info; |
1151 | struct loop_info64 info64; | 1151 | struct loop_info64 info64; |
1152 | int err = 0; | 1152 | int err = 0; |
1153 | 1153 | ||
1154 | if (!arg) | 1154 | if (!arg) |
1155 | err = -EINVAL; | 1155 | err = -EINVAL; |
1156 | if (!err) | 1156 | if (!err) |
1157 | err = loop_get_status(lo, &info64); | 1157 | err = loop_get_status(lo, &info64); |
1158 | if (!err) | 1158 | if (!err) |
1159 | err = loop_info64_to_old(&info64, &info); | 1159 | err = loop_info64_to_old(&info64, &info); |
1160 | if (!err && copy_to_user(arg, &info, sizeof(info))) | 1160 | if (!err && copy_to_user(arg, &info, sizeof(info))) |
1161 | err = -EFAULT; | 1161 | err = -EFAULT; |
1162 | 1162 | ||
1163 | return err; | 1163 | return err; |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | static int | 1166 | static int |
1167 | loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { | 1167 | loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { |
1168 | struct loop_info64 info64; | 1168 | struct loop_info64 info64; |
1169 | int err = 0; | 1169 | int err = 0; |
1170 | 1170 | ||
1171 | if (!arg) | 1171 | if (!arg) |
1172 | err = -EINVAL; | 1172 | err = -EINVAL; |
1173 | if (!err) | 1173 | if (!err) |
1174 | err = loop_get_status(lo, &info64); | 1174 | err = loop_get_status(lo, &info64); |
1175 | if (!err && copy_to_user(arg, &info64, sizeof(info64))) | 1175 | if (!err && copy_to_user(arg, &info64, sizeof(info64))) |
1176 | err = -EFAULT; | 1176 | err = -EFAULT; |
1177 | 1177 | ||
1178 | return err; | 1178 | return err; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) | 1181 | static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) |
1182 | { | 1182 | { |
1183 | int err; | 1183 | int err; |
1184 | sector_t sec; | 1184 | sector_t sec; |
1185 | loff_t sz; | 1185 | loff_t sz; |
1186 | 1186 | ||
1187 | err = -ENXIO; | 1187 | err = -ENXIO; |
1188 | if (unlikely(lo->lo_state != Lo_bound)) | 1188 | if (unlikely(lo->lo_state != Lo_bound)) |
1189 | goto out; | 1189 | goto out; |
1190 | err = figure_loop_size(lo); | 1190 | err = figure_loop_size(lo); |
1191 | if (unlikely(err)) | 1191 | if (unlikely(err)) |
1192 | goto out; | 1192 | goto out; |
1193 | sec = get_capacity(lo->lo_disk); | 1193 | sec = get_capacity(lo->lo_disk); |
1194 | /* the width of sector_t may be narrow for bit-shift */ | 1194 | /* the width of sector_t may be narrow for bit-shift */ |
1195 | sz = sec; | 1195 | sz = sec; |
1196 | sz <<= 9; | 1196 | sz <<= 9; |
1197 | mutex_lock(&bdev->bd_mutex); | 1197 | mutex_lock(&bdev->bd_mutex); |
1198 | bd_set_size(bdev, sz); | 1198 | bd_set_size(bdev, sz); |
1199 | /* let user-space know about the new size */ | 1199 | /* let user-space know about the new size */ |
1200 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); | 1200 | kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); |
1201 | mutex_unlock(&bdev->bd_mutex); | 1201 | mutex_unlock(&bdev->bd_mutex); |
1202 | 1202 | ||
1203 | out: | 1203 | out: |
1204 | return err; | 1204 | return err; |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | static int lo_ioctl(struct block_device *bdev, fmode_t mode, | 1207 | static int lo_ioctl(struct block_device *bdev, fmode_t mode, |
1208 | unsigned int cmd, unsigned long arg) | 1208 | unsigned int cmd, unsigned long arg) |
1209 | { | 1209 | { |
1210 | struct loop_device *lo = bdev->bd_disk->private_data; | 1210 | struct loop_device *lo = bdev->bd_disk->private_data; |
1211 | int err; | 1211 | int err; |
1212 | 1212 | ||
1213 | mutex_lock_nested(&lo->lo_ctl_mutex, 1); | 1213 | mutex_lock_nested(&lo->lo_ctl_mutex, 1); |
1214 | switch (cmd) { | 1214 | switch (cmd) { |
1215 | case LOOP_SET_FD: | 1215 | case LOOP_SET_FD: |
1216 | err = loop_set_fd(lo, mode, bdev, arg); | 1216 | err = loop_set_fd(lo, mode, bdev, arg); |
1217 | break; | 1217 | break; |
1218 | case LOOP_CHANGE_FD: | 1218 | case LOOP_CHANGE_FD: |
1219 | err = loop_change_fd(lo, bdev, arg); | 1219 | err = loop_change_fd(lo, bdev, arg); |
1220 | break; | 1220 | break; |
1221 | case LOOP_CLR_FD: | 1221 | case LOOP_CLR_FD: |
1222 | /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ | 1222 | /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ |
1223 | err = loop_clr_fd(lo, bdev); | 1223 | err = loop_clr_fd(lo, bdev); |
1224 | if (!err) | 1224 | if (!err) |
1225 | goto out_unlocked; | 1225 | goto out_unlocked; |
1226 | break; | 1226 | break; |
1227 | case LOOP_SET_STATUS: | 1227 | case LOOP_SET_STATUS: |
1228 | err = loop_set_status_old(lo, (struct loop_info __user *) arg); | 1228 | err = loop_set_status_old(lo, (struct loop_info __user *) arg); |
1229 | break; | 1229 | break; |
1230 | case LOOP_GET_STATUS: | 1230 | case LOOP_GET_STATUS: |
1231 | err = loop_get_status_old(lo, (struct loop_info __user *) arg); | 1231 | err = loop_get_status_old(lo, (struct loop_info __user *) arg); |
1232 | break; | 1232 | break; |
1233 | case LOOP_SET_STATUS64: | 1233 | case LOOP_SET_STATUS64: |
1234 | err = loop_set_status64(lo, (struct loop_info64 __user *) arg); | 1234 | err = loop_set_status64(lo, (struct loop_info64 __user *) arg); |
1235 | break; | 1235 | break; |
1236 | case LOOP_GET_STATUS64: | 1236 | case LOOP_GET_STATUS64: |
1237 | err = loop_get_status64(lo, (struct loop_info64 __user *) arg); | 1237 | err = loop_get_status64(lo, (struct loop_info64 __user *) arg); |
1238 | break; | 1238 | break; |
1239 | case LOOP_SET_CAPACITY: | 1239 | case LOOP_SET_CAPACITY: |
1240 | err = -EPERM; | 1240 | err = -EPERM; |
1241 | if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) | 1241 | if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) |
1242 | err = loop_set_capacity(lo, bdev); | 1242 | err = loop_set_capacity(lo, bdev); |
1243 | break; | 1243 | break; |
1244 | default: | 1244 | default: |
1245 | err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; | 1245 | err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; |
1246 | } | 1246 | } |
1247 | mutex_unlock(&lo->lo_ctl_mutex); | 1247 | mutex_unlock(&lo->lo_ctl_mutex); |
1248 | 1248 | ||
1249 | out_unlocked: | 1249 | out_unlocked: |
1250 | return err; | 1250 | return err; |
1251 | } | 1251 | } |
1252 | 1252 | ||
1253 | #ifdef CONFIG_COMPAT | 1253 | #ifdef CONFIG_COMPAT |
1254 | struct compat_loop_info { | 1254 | struct compat_loop_info { |
1255 | compat_int_t lo_number; /* ioctl r/o */ | 1255 | compat_int_t lo_number; /* ioctl r/o */ |
1256 | compat_dev_t lo_device; /* ioctl r/o */ | 1256 | compat_dev_t lo_device; /* ioctl r/o */ |
1257 | compat_ulong_t lo_inode; /* ioctl r/o */ | 1257 | compat_ulong_t lo_inode; /* ioctl r/o */ |
1258 | compat_dev_t lo_rdevice; /* ioctl r/o */ | 1258 | compat_dev_t lo_rdevice; /* ioctl r/o */ |
1259 | compat_int_t lo_offset; | 1259 | compat_int_t lo_offset; |
1260 | compat_int_t lo_encrypt_type; | 1260 | compat_int_t lo_encrypt_type; |
1261 | compat_int_t lo_encrypt_key_size; /* ioctl w/o */ | 1261 | compat_int_t lo_encrypt_key_size; /* ioctl w/o */ |
1262 | compat_int_t lo_flags; /* ioctl r/o */ | 1262 | compat_int_t lo_flags; /* ioctl r/o */ |
1263 | char lo_name[LO_NAME_SIZE]; | 1263 | char lo_name[LO_NAME_SIZE]; |
1264 | unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ | 1264 | unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ |
1265 | compat_ulong_t lo_init[2]; | 1265 | compat_ulong_t lo_init[2]; |
1266 | char reserved[4]; | 1266 | char reserved[4]; |
1267 | }; | 1267 | }; |
1268 | 1268 | ||
1269 | /* | 1269 | /* |
1270 | * Transfer 32-bit compatibility structure in userspace to 64-bit loop info | 1270 | * Transfer 32-bit compatibility structure in userspace to 64-bit loop info |
1271 | * - noinlined to reduce stack space usage in main part of driver | 1271 | * - noinlined to reduce stack space usage in main part of driver |
1272 | */ | 1272 | */ |
1273 | static noinline int | 1273 | static noinline int |
1274 | loop_info64_from_compat(const struct compat_loop_info __user *arg, | 1274 | loop_info64_from_compat(const struct compat_loop_info __user *arg, |
1275 | struct loop_info64 *info64) | 1275 | struct loop_info64 *info64) |
1276 | { | 1276 | { |
1277 | struct compat_loop_info info; | 1277 | struct compat_loop_info info; |
1278 | 1278 | ||
1279 | if (copy_from_user(&info, arg, sizeof(info))) | 1279 | if (copy_from_user(&info, arg, sizeof(info))) |
1280 | return -EFAULT; | 1280 | return -EFAULT; |
1281 | 1281 | ||
1282 | memset(info64, 0, sizeof(*info64)); | 1282 | memset(info64, 0, sizeof(*info64)); |
1283 | info64->lo_number = info.lo_number; | 1283 | info64->lo_number = info.lo_number; |
1284 | info64->lo_device = info.lo_device; | 1284 | info64->lo_device = info.lo_device; |
1285 | info64->lo_inode = info.lo_inode; | 1285 | info64->lo_inode = info.lo_inode; |
1286 | info64->lo_rdevice = info.lo_rdevice; | 1286 | info64->lo_rdevice = info.lo_rdevice; |
1287 | info64->lo_offset = info.lo_offset; | 1287 | info64->lo_offset = info.lo_offset; |
1288 | info64->lo_sizelimit = 0; | 1288 | info64->lo_sizelimit = 0; |
1289 | info64->lo_encrypt_type = info.lo_encrypt_type; | 1289 | info64->lo_encrypt_type = info.lo_encrypt_type; |
1290 | info64->lo_encrypt_key_size = info.lo_encrypt_key_size; | 1290 | info64->lo_encrypt_key_size = info.lo_encrypt_key_size; |
1291 | info64->lo_flags = info.lo_flags; | 1291 | info64->lo_flags = info.lo_flags; |
1292 | info64->lo_init[0] = info.lo_init[0]; | 1292 | info64->lo_init[0] = info.lo_init[0]; |
1293 | info64->lo_init[1] = info.lo_init[1]; | 1293 | info64->lo_init[1] = info.lo_init[1]; |
1294 | if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) | 1294 | if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) |
1295 | memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); | 1295 | memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); |
1296 | else | 1296 | else |
1297 | memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); | 1297 | memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); |
1298 | memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); | 1298 | memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); |
1299 | return 0; | 1299 | return 0; |
1300 | } | 1300 | } |
1301 | 1301 | ||
1302 | /* | 1302 | /* |
1303 | * Transfer 64-bit loop info to 32-bit compatibility structure in userspace | 1303 | * Transfer 64-bit loop info to 32-bit compatibility structure in userspace |
1304 | * - noinlined to reduce stack space usage in main part of driver | 1304 | * - noinlined to reduce stack space usage in main part of driver |
1305 | */ | 1305 | */ |
1306 | static noinline int | 1306 | static noinline int |
1307 | loop_info64_to_compat(const struct loop_info64 *info64, | 1307 | loop_info64_to_compat(const struct loop_info64 *info64, |
1308 | struct compat_loop_info __user *arg) | 1308 | struct compat_loop_info __user *arg) |
1309 | { | 1309 | { |
1310 | struct compat_loop_info info; | 1310 | struct compat_loop_info info; |
1311 | 1311 | ||
1312 | memset(&info, 0, sizeof(info)); | 1312 | memset(&info, 0, sizeof(info)); |
1313 | info.lo_number = info64->lo_number; | 1313 | info.lo_number = info64->lo_number; |
1314 | info.lo_device = info64->lo_device; | 1314 | info.lo_device = info64->lo_device; |
1315 | info.lo_inode = info64->lo_inode; | 1315 | info.lo_inode = info64->lo_inode; |
1316 | info.lo_rdevice = info64->lo_rdevice; | 1316 | info.lo_rdevice = info64->lo_rdevice; |
1317 | info.lo_offset = info64->lo_offset; | 1317 | info.lo_offset = info64->lo_offset; |
1318 | info.lo_encrypt_type = info64->lo_encrypt_type; | 1318 | info.lo_encrypt_type = info64->lo_encrypt_type; |
1319 | info.lo_encrypt_key_size = info64->lo_encrypt_key_size; | 1319 | info.lo_encrypt_key_size = info64->lo_encrypt_key_size; |
1320 | info.lo_flags = info64->lo_flags; | 1320 | info.lo_flags = info64->lo_flags; |
1321 | info.lo_init[0] = info64->lo_init[0]; | 1321 | info.lo_init[0] = info64->lo_init[0]; |
1322 | info.lo_init[1] = info64->lo_init[1]; | 1322 | info.lo_init[1] = info64->lo_init[1]; |
1323 | if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) | 1323 | if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) |
1324 | memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); | 1324 | memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); |
1325 | else | 1325 | else |
1326 | memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); | 1326 | memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); |
1327 | memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); | 1327 | memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); |
1328 | 1328 | ||
1329 | /* error in case values were truncated */ | 1329 | /* error in case values were truncated */ |
1330 | if (info.lo_device != info64->lo_device || | 1330 | if (info.lo_device != info64->lo_device || |
1331 | info.lo_rdevice != info64->lo_rdevice || | 1331 | info.lo_rdevice != info64->lo_rdevice || |
1332 | info.lo_inode != info64->lo_inode || | 1332 | info.lo_inode != info64->lo_inode || |
1333 | info.lo_offset != info64->lo_offset || | 1333 | info.lo_offset != info64->lo_offset || |
1334 | info.lo_init[0] != info64->lo_init[0] || | 1334 | info.lo_init[0] != info64->lo_init[0] || |
1335 | info.lo_init[1] != info64->lo_init[1]) | 1335 | info.lo_init[1] != info64->lo_init[1]) |
1336 | return -EOVERFLOW; | 1336 | return -EOVERFLOW; |
1337 | 1337 | ||
1338 | if (copy_to_user(arg, &info, sizeof(info))) | 1338 | if (copy_to_user(arg, &info, sizeof(info))) |
1339 | return -EFAULT; | 1339 | return -EFAULT; |
1340 | return 0; | 1340 | return 0; |
1341 | } | 1341 | } |
1342 | 1342 | ||
1343 | static int | 1343 | static int |
1344 | loop_set_status_compat(struct loop_device *lo, | 1344 | loop_set_status_compat(struct loop_device *lo, |
1345 | const struct compat_loop_info __user *arg) | 1345 | const struct compat_loop_info __user *arg) |
1346 | { | 1346 | { |
1347 | struct loop_info64 info64; | 1347 | struct loop_info64 info64; |
1348 | int ret; | 1348 | int ret; |
1349 | 1349 | ||
1350 | ret = loop_info64_from_compat(arg, &info64); | 1350 | ret = loop_info64_from_compat(arg, &info64); |
1351 | if (ret < 0) | 1351 | if (ret < 0) |
1352 | return ret; | 1352 | return ret; |
1353 | return loop_set_status(lo, &info64); | 1353 | return loop_set_status(lo, &info64); |
1354 | } | 1354 | } |
1355 | 1355 | ||
1356 | static int | 1356 | static int |
1357 | loop_get_status_compat(struct loop_device *lo, | 1357 | loop_get_status_compat(struct loop_device *lo, |
1358 | struct compat_loop_info __user *arg) | 1358 | struct compat_loop_info __user *arg) |
1359 | { | 1359 | { |
1360 | struct loop_info64 info64; | 1360 | struct loop_info64 info64; |
1361 | int err = 0; | 1361 | int err = 0; |
1362 | 1362 | ||
1363 | if (!arg) | 1363 | if (!arg) |
1364 | err = -EINVAL; | 1364 | err = -EINVAL; |
1365 | if (!err) | 1365 | if (!err) |
1366 | err = loop_get_status(lo, &info64); | 1366 | err = loop_get_status(lo, &info64); |
1367 | if (!err) | 1367 | if (!err) |
1368 | err = loop_info64_to_compat(&info64, arg); | 1368 | err = loop_info64_to_compat(&info64, arg); |
1369 | return err; | 1369 | return err; |
1370 | } | 1370 | } |
1371 | 1371 | ||
1372 | static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, | 1372 | static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, |
1373 | unsigned int cmd, unsigned long arg) | 1373 | unsigned int cmd, unsigned long arg) |
1374 | { | 1374 | { |
1375 | struct loop_device *lo = bdev->bd_disk->private_data; | 1375 | struct loop_device *lo = bdev->bd_disk->private_data; |
1376 | int err; | 1376 | int err; |
1377 | 1377 | ||
1378 | switch(cmd) { | 1378 | switch(cmd) { |
1379 | case LOOP_SET_STATUS: | 1379 | case LOOP_SET_STATUS: |
1380 | mutex_lock(&lo->lo_ctl_mutex); | 1380 | mutex_lock(&lo->lo_ctl_mutex); |
1381 | err = loop_set_status_compat( | 1381 | err = loop_set_status_compat( |
1382 | lo, (const struct compat_loop_info __user *) arg); | 1382 | lo, (const struct compat_loop_info __user *) arg); |
1383 | mutex_unlock(&lo->lo_ctl_mutex); | 1383 | mutex_unlock(&lo->lo_ctl_mutex); |
1384 | break; | 1384 | break; |
1385 | case LOOP_GET_STATUS: | 1385 | case LOOP_GET_STATUS: |
1386 | mutex_lock(&lo->lo_ctl_mutex); | 1386 | mutex_lock(&lo->lo_ctl_mutex); |
1387 | err = loop_get_status_compat( | 1387 | err = loop_get_status_compat( |
1388 | lo, (struct compat_loop_info __user *) arg); | 1388 | lo, (struct compat_loop_info __user *) arg); |
1389 | mutex_unlock(&lo->lo_ctl_mutex); | 1389 | mutex_unlock(&lo->lo_ctl_mutex); |
1390 | break; | 1390 | break; |
1391 | case LOOP_SET_CAPACITY: | 1391 | case LOOP_SET_CAPACITY: |
1392 | case LOOP_CLR_FD: | 1392 | case LOOP_CLR_FD: |
1393 | case LOOP_GET_STATUS64: | 1393 | case LOOP_GET_STATUS64: |
1394 | case LOOP_SET_STATUS64: | 1394 | case LOOP_SET_STATUS64: |
1395 | arg = (unsigned long) compat_ptr(arg); | 1395 | arg = (unsigned long) compat_ptr(arg); |
1396 | case LOOP_SET_FD: | 1396 | case LOOP_SET_FD: |
1397 | case LOOP_CHANGE_FD: | 1397 | case LOOP_CHANGE_FD: |
1398 | err = lo_ioctl(bdev, mode, cmd, arg); | 1398 | err = lo_ioctl(bdev, mode, cmd, arg); |
1399 | break; | 1399 | break; |
1400 | default: | 1400 | default: |
1401 | err = -ENOIOCTLCMD; | 1401 | err = -ENOIOCTLCMD; |
1402 | break; | 1402 | break; |
1403 | } | 1403 | } |
1404 | return err; | 1404 | return err; |
1405 | } | 1405 | } |
1406 | #endif | 1406 | #endif |
1407 | 1407 | ||
1408 | static int lo_open(struct block_device *bdev, fmode_t mode) | 1408 | static int lo_open(struct block_device *bdev, fmode_t mode) |
1409 | { | 1409 | { |
1410 | struct loop_device *lo = bdev->bd_disk->private_data; | 1410 | struct loop_device *lo = bdev->bd_disk->private_data; |
1411 | 1411 | ||
1412 | lock_kernel(); | 1412 | lock_kernel(); |
1413 | mutex_lock(&lo->lo_ctl_mutex); | 1413 | mutex_lock(&lo->lo_ctl_mutex); |
1414 | lo->lo_refcnt++; | 1414 | lo->lo_refcnt++; |
1415 | mutex_unlock(&lo->lo_ctl_mutex); | 1415 | mutex_unlock(&lo->lo_ctl_mutex); |
1416 | unlock_kernel(); | 1416 | unlock_kernel(); |
1417 | 1417 | ||
1418 | return 0; | 1418 | return 0; |
1419 | } | 1419 | } |
1420 | 1420 | ||
1421 | static int lo_release(struct gendisk *disk, fmode_t mode) | 1421 | static int lo_release(struct gendisk *disk, fmode_t mode) |
1422 | { | 1422 | { |
1423 | struct loop_device *lo = disk->private_data; | 1423 | struct loop_device *lo = disk->private_data; |
1424 | int err; | 1424 | int err; |
1425 | 1425 | ||
1426 | lock_kernel(); | 1426 | lock_kernel(); |
1427 | mutex_lock(&lo->lo_ctl_mutex); | 1427 | mutex_lock(&lo->lo_ctl_mutex); |
1428 | 1428 | ||
1429 | if (--lo->lo_refcnt) | 1429 | if (--lo->lo_refcnt) |
1430 | goto out; | 1430 | goto out; |
1431 | 1431 | ||
1432 | if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { | 1432 | if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { |
1433 | /* | 1433 | /* |
1434 | * In autoclear mode, stop the loop thread | 1434 | * In autoclear mode, stop the loop thread |
1435 | * and remove configuration after last close. | 1435 | * and remove configuration after last close. |
1436 | */ | 1436 | */ |
1437 | err = loop_clr_fd(lo, NULL); | 1437 | err = loop_clr_fd(lo, NULL); |
1438 | if (!err) | 1438 | if (!err) |
1439 | goto out_unlocked; | 1439 | goto out_unlocked; |
1440 | } else { | 1440 | } else { |
1441 | /* | 1441 | /* |
1442 | * Otherwise keep thread (if running) and config, | 1442 | * Otherwise keep thread (if running) and config, |
1443 | * but flush possible ongoing bios in thread. | 1443 | * but flush possible ongoing bios in thread. |
1444 | */ | 1444 | */ |
1445 | loop_flush(lo); | 1445 | loop_flush(lo); |
1446 | } | 1446 | } |
1447 | 1447 | ||
1448 | out: | 1448 | out: |
1449 | mutex_unlock(&lo->lo_ctl_mutex); | 1449 | mutex_unlock(&lo->lo_ctl_mutex); |
1450 | out_unlocked: | 1450 | out_unlocked: |
1451 | lock_kernel(); | 1451 | lock_kernel(); |
1452 | return 0; | 1452 | return 0; |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | static const struct block_device_operations lo_fops = { | 1455 | static const struct block_device_operations lo_fops = { |
1456 | .owner = THIS_MODULE, | 1456 | .owner = THIS_MODULE, |
1457 | .open = lo_open, | 1457 | .open = lo_open, |
1458 | .release = lo_release, | 1458 | .release = lo_release, |
1459 | .ioctl = lo_ioctl, | 1459 | .ioctl = lo_ioctl, |
1460 | #ifdef CONFIG_COMPAT | 1460 | #ifdef CONFIG_COMPAT |
1461 | .compat_ioctl = lo_compat_ioctl, | 1461 | .compat_ioctl = lo_compat_ioctl, |
1462 | #endif | 1462 | #endif |
1463 | }; | 1463 | }; |
1464 | 1464 | ||
1465 | /* | 1465 | /* |
1466 | * And now the modules code and kernel interface. | 1466 | * And now the modules code and kernel interface. |
1467 | */ | 1467 | */ |
1468 | static int max_loop; | 1468 | static int max_loop; |
1469 | module_param(max_loop, int, 0); | 1469 | module_param(max_loop, int, 0); |
1470 | MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); | 1470 | MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); |
1471 | module_param(max_part, int, 0); | 1471 | module_param(max_part, int, 0); |
1472 | MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); | 1472 | MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); |
1473 | MODULE_LICENSE("GPL"); | 1473 | MODULE_LICENSE("GPL"); |
1474 | MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); | 1474 | MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); |
1475 | 1475 | ||
1476 | int loop_register_transfer(struct loop_func_table *funcs) | 1476 | int loop_register_transfer(struct loop_func_table *funcs) |
1477 | { | 1477 | { |
1478 | unsigned int n = funcs->number; | 1478 | unsigned int n = funcs->number; |
1479 | 1479 | ||
1480 | if (n >= MAX_LO_CRYPT || xfer_funcs[n]) | 1480 | if (n >= MAX_LO_CRYPT || xfer_funcs[n]) |
1481 | return -EINVAL; | 1481 | return -EINVAL; |
1482 | xfer_funcs[n] = funcs; | 1482 | xfer_funcs[n] = funcs; |
1483 | return 0; | 1483 | return 0; |
1484 | } | 1484 | } |
1485 | 1485 | ||
1486 | int loop_unregister_transfer(int number) | 1486 | int loop_unregister_transfer(int number) |
1487 | { | 1487 | { |
1488 | unsigned int n = number; | 1488 | unsigned int n = number; |
1489 | struct loop_device *lo; | 1489 | struct loop_device *lo; |
1490 | struct loop_func_table *xfer; | 1490 | struct loop_func_table *xfer; |
1491 | 1491 | ||
1492 | if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) | 1492 | if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) |
1493 | return -EINVAL; | 1493 | return -EINVAL; |
1494 | 1494 | ||
1495 | xfer_funcs[n] = NULL; | 1495 | xfer_funcs[n] = NULL; |
1496 | 1496 | ||
1497 | list_for_each_entry(lo, &loop_devices, lo_list) { | 1497 | list_for_each_entry(lo, &loop_devices, lo_list) { |
1498 | mutex_lock(&lo->lo_ctl_mutex); | 1498 | mutex_lock(&lo->lo_ctl_mutex); |
1499 | 1499 | ||
1500 | if (lo->lo_encryption == xfer) | 1500 | if (lo->lo_encryption == xfer) |
1501 | loop_release_xfer(lo); | 1501 | loop_release_xfer(lo); |
1502 | 1502 | ||
1503 | mutex_unlock(&lo->lo_ctl_mutex); | 1503 | mutex_unlock(&lo->lo_ctl_mutex); |
1504 | } | 1504 | } |
1505 | 1505 | ||
1506 | return 0; | 1506 | return 0; |
1507 | } | 1507 | } |
1508 | 1508 | ||
1509 | EXPORT_SYMBOL(loop_register_transfer); | 1509 | EXPORT_SYMBOL(loop_register_transfer); |
1510 | EXPORT_SYMBOL(loop_unregister_transfer); | 1510 | EXPORT_SYMBOL(loop_unregister_transfer); |
1511 | 1511 | ||
1512 | static struct loop_device *loop_alloc(int i) | 1512 | static struct loop_device *loop_alloc(int i) |
1513 | { | 1513 | { |
1514 | struct loop_device *lo; | 1514 | struct loop_device *lo; |
1515 | struct gendisk *disk; | 1515 | struct gendisk *disk; |
1516 | 1516 | ||
1517 | lo = kzalloc(sizeof(*lo), GFP_KERNEL); | 1517 | lo = kzalloc(sizeof(*lo), GFP_KERNEL); |
1518 | if (!lo) | 1518 | if (!lo) |
1519 | goto out; | 1519 | goto out; |
1520 | 1520 | ||
1521 | lo->lo_queue = blk_alloc_queue(GFP_KERNEL); | 1521 | lo->lo_queue = blk_alloc_queue(GFP_KERNEL); |
1522 | if (!lo->lo_queue) | 1522 | if (!lo->lo_queue) |
1523 | goto out_free_dev; | 1523 | goto out_free_dev; |
1524 | 1524 | ||
1525 | disk = lo->lo_disk = alloc_disk(1 << part_shift); | 1525 | disk = lo->lo_disk = alloc_disk(1 << part_shift); |
1526 | if (!disk) | 1526 | if (!disk) |
1527 | goto out_free_queue; | 1527 | goto out_free_queue; |
1528 | 1528 | ||
1529 | mutex_init(&lo->lo_ctl_mutex); | 1529 | mutex_init(&lo->lo_ctl_mutex); |
1530 | lo->lo_number = i; | 1530 | lo->lo_number = i; |
1531 | lo->lo_thread = NULL; | 1531 | lo->lo_thread = NULL; |
1532 | init_waitqueue_head(&lo->lo_event); | 1532 | init_waitqueue_head(&lo->lo_event); |
1533 | spin_lock_init(&lo->lo_lock); | 1533 | spin_lock_init(&lo->lo_lock); |
1534 | disk->major = LOOP_MAJOR; | 1534 | disk->major = LOOP_MAJOR; |
1535 | disk->first_minor = i << part_shift; | 1535 | disk->first_minor = i << part_shift; |
1536 | disk->fops = &lo_fops; | 1536 | disk->fops = &lo_fops; |
1537 | disk->private_data = lo; | 1537 | disk->private_data = lo; |
1538 | disk->queue = lo->lo_queue; | 1538 | disk->queue = lo->lo_queue; |
1539 | sprintf(disk->disk_name, "loop%d", i); | 1539 | sprintf(disk->disk_name, "loop%d", i); |
1540 | return lo; | 1540 | return lo; |
1541 | 1541 | ||
1542 | out_free_queue: | 1542 | out_free_queue: |
1543 | blk_cleanup_queue(lo->lo_queue); | 1543 | blk_cleanup_queue(lo->lo_queue); |
1544 | out_free_dev: | 1544 | out_free_dev: |
1545 | kfree(lo); | 1545 | kfree(lo); |
1546 | out: | 1546 | out: |
1547 | return NULL; | 1547 | return NULL; |
1548 | } | 1548 | } |
1549 | 1549 | ||
1550 | static void loop_free(struct loop_device *lo) | 1550 | static void loop_free(struct loop_device *lo) |
1551 | { | 1551 | { |
1552 | blk_cleanup_queue(lo->lo_queue); | 1552 | blk_cleanup_queue(lo->lo_queue); |
1553 | put_disk(lo->lo_disk); | 1553 | put_disk(lo->lo_disk); |
1554 | list_del(&lo->lo_list); | 1554 | list_del(&lo->lo_list); |
1555 | kfree(lo); | 1555 | kfree(lo); |
1556 | } | 1556 | } |
1557 | 1557 | ||
1558 | static struct loop_device *loop_init_one(int i) | 1558 | static struct loop_device *loop_init_one(int i) |
1559 | { | 1559 | { |
1560 | struct loop_device *lo; | 1560 | struct loop_device *lo; |
1561 | 1561 | ||
1562 | list_for_each_entry(lo, &loop_devices, lo_list) { | 1562 | list_for_each_entry(lo, &loop_devices, lo_list) { |
1563 | if (lo->lo_number == i) | 1563 | if (lo->lo_number == i) |
1564 | return lo; | 1564 | return lo; |
1565 | } | 1565 | } |
1566 | 1566 | ||
1567 | lo = loop_alloc(i); | 1567 | lo = loop_alloc(i); |
1568 | if (lo) { | 1568 | if (lo) { |
1569 | add_disk(lo->lo_disk); | 1569 | add_disk(lo->lo_disk); |
1570 | list_add_tail(&lo->lo_list, &loop_devices); | 1570 | list_add_tail(&lo->lo_list, &loop_devices); |
1571 | } | 1571 | } |
1572 | return lo; | 1572 | return lo; |
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | static void loop_del_one(struct loop_device *lo) | 1575 | static void loop_del_one(struct loop_device *lo) |
1576 | { | 1576 | { |
1577 | del_gendisk(lo->lo_disk); | 1577 | del_gendisk(lo->lo_disk); |
1578 | loop_free(lo); | 1578 | loop_free(lo); |
1579 | } | 1579 | } |
1580 | 1580 | ||
1581 | static struct kobject *loop_probe(dev_t dev, int *part, void *data) | 1581 | static struct kobject *loop_probe(dev_t dev, int *part, void *data) |
1582 | { | 1582 | { |
1583 | struct loop_device *lo; | 1583 | struct loop_device *lo; |
1584 | struct kobject *kobj; | 1584 | struct kobject *kobj; |
1585 | 1585 | ||
1586 | mutex_lock(&loop_devices_mutex); | 1586 | mutex_lock(&loop_devices_mutex); |
1587 | lo = loop_init_one(dev & MINORMASK); | 1587 | lo = loop_init_one(dev & MINORMASK); |
1588 | kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); | 1588 | kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); |
1589 | mutex_unlock(&loop_devices_mutex); | 1589 | mutex_unlock(&loop_devices_mutex); |
1590 | 1590 | ||
1591 | *part = 0; | 1591 | *part = 0; |
1592 | return kobj; | 1592 | return kobj; |
1593 | } | 1593 | } |
1594 | 1594 | ||
1595 | static int __init loop_init(void) | 1595 | static int __init loop_init(void) |
1596 | { | 1596 | { |
1597 | int i, nr; | 1597 | int i, nr; |
1598 | unsigned long range; | 1598 | unsigned long range; |
1599 | struct loop_device *lo, *next; | 1599 | struct loop_device *lo, *next; |
1600 | 1600 | ||
1601 | /* | 1601 | /* |
1602 | * loop module now has a feature to instantiate underlying device | 1602 | * loop module now has a feature to instantiate underlying device |
1603 | * structure on-demand, provided that there is an access dev node. | 1603 | * structure on-demand, provided that there is an access dev node. |
1604 | * However, this will not work well with user space tool that doesn't | 1604 | * However, this will not work well with user space tool that doesn't |
1605 | * know about such "feature". In order to not break any existing | 1605 | * know about such "feature". In order to not break any existing |
1606 | * tool, we do the following: | 1606 | * tool, we do the following: |
1607 | * | 1607 | * |
1608 | * (1) if max_loop is specified, create that many upfront, and this | 1608 | * (1) if max_loop is specified, create that many upfront, and this |
1609 | * also becomes a hard limit. | 1609 | * also becomes a hard limit. |
1610 | * (2) if max_loop is not specified, create 8 loop device on module | 1610 | * (2) if max_loop is not specified, create 8 loop device on module |
1611 | * load, user can further extend loop device by create dev node | 1611 | * load, user can further extend loop device by create dev node |
1612 | * themselves and have kernel automatically instantiate actual | 1612 | * themselves and have kernel automatically instantiate actual |
1613 | * device on-demand. | 1613 | * device on-demand. |
1614 | */ | 1614 | */ |
1615 | 1615 | ||
1616 | part_shift = 0; | 1616 | part_shift = 0; |
1617 | if (max_part > 0) | 1617 | if (max_part > 0) |
1618 | part_shift = fls(max_part); | 1618 | part_shift = fls(max_part); |
1619 | 1619 | ||
1620 | if (max_loop > 1UL << (MINORBITS - part_shift)) | 1620 | if (max_loop > 1UL << (MINORBITS - part_shift)) |
1621 | return -EINVAL; | 1621 | return -EINVAL; |
1622 | 1622 | ||
1623 | if (max_loop) { | 1623 | if (max_loop) { |
1624 | nr = max_loop; | 1624 | nr = max_loop; |
1625 | range = max_loop; | 1625 | range = max_loop; |
1626 | } else { | 1626 | } else { |
1627 | nr = 8; | 1627 | nr = 8; |
1628 | range = 1UL << (MINORBITS - part_shift); | 1628 | range = 1UL << (MINORBITS - part_shift); |
1629 | } | 1629 | } |
1630 | 1630 | ||
1631 | if (register_blkdev(LOOP_MAJOR, "loop")) | 1631 | if (register_blkdev(LOOP_MAJOR, "loop")) |
1632 | return -EIO; | 1632 | return -EIO; |
1633 | 1633 | ||
1634 | for (i = 0; i < nr; i++) { | 1634 | for (i = 0; i < nr; i++) { |
1635 | lo = loop_alloc(i); | 1635 | lo = loop_alloc(i); |
1636 | if (!lo) | 1636 | if (!lo) |
1637 | goto Enomem; | 1637 | goto Enomem; |
1638 | list_add_tail(&lo->lo_list, &loop_devices); | 1638 | list_add_tail(&lo->lo_list, &loop_devices); |
1639 | } | 1639 | } |
1640 | 1640 | ||
1641 | /* point of no return */ | 1641 | /* point of no return */ |
1642 | 1642 | ||
1643 | list_for_each_entry(lo, &loop_devices, lo_list) | 1643 | list_for_each_entry(lo, &loop_devices, lo_list) |
1644 | add_disk(lo->lo_disk); | 1644 | add_disk(lo->lo_disk); |
1645 | 1645 | ||
1646 | blk_register_region(MKDEV(LOOP_MAJOR, 0), range, | 1646 | blk_register_region(MKDEV(LOOP_MAJOR, 0), range, |
1647 | THIS_MODULE, loop_probe, NULL, NULL); | 1647 | THIS_MODULE, loop_probe, NULL, NULL); |
1648 | 1648 | ||
1649 | printk(KERN_INFO "loop: module loaded\n"); | 1649 | printk(KERN_INFO "loop: module loaded\n"); |
1650 | return 0; | 1650 | return 0; |
1651 | 1651 | ||
1652 | Enomem: | 1652 | Enomem: |
1653 | printk(KERN_INFO "loop: out of memory\n"); | 1653 | printk(KERN_INFO "loop: out of memory\n"); |
1654 | 1654 | ||
1655 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) | 1655 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) |
1656 | loop_free(lo); | 1656 | loop_free(lo); |
1657 | 1657 | ||
1658 | unregister_blkdev(LOOP_MAJOR, "loop"); | 1658 | unregister_blkdev(LOOP_MAJOR, "loop"); |
1659 | return -ENOMEM; | 1659 | return -ENOMEM; |
1660 | } | 1660 | } |
1661 | 1661 | ||
1662 | static void __exit loop_exit(void) | 1662 | static void __exit loop_exit(void) |
1663 | { | 1663 | { |
1664 | unsigned long range; | 1664 | unsigned long range; |
1665 | struct loop_device *lo, *next; | 1665 | struct loop_device *lo, *next; |
1666 | 1666 | ||
1667 | range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift); | 1667 | range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift); |
1668 | 1668 | ||
1669 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) | 1669 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) |
1670 | loop_del_one(lo); | 1670 | loop_del_one(lo); |
1671 | 1671 | ||
1672 | blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); | 1672 | blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); |
1673 | unregister_blkdev(LOOP_MAJOR, "loop"); | 1673 | unregister_blkdev(LOOP_MAJOR, "loop"); |
1674 | } | 1674 | } |
1675 | 1675 | ||
1676 | module_init(loop_init); | 1676 | module_init(loop_init); |
1677 | module_exit(loop_exit); | 1677 | module_exit(loop_exit); |
1678 | 1678 | ||
1679 | #ifndef MODULE | 1679 | #ifndef MODULE |
1680 | static int __init max_loop_setup(char *str) | 1680 | static int __init max_loop_setup(char *str) |
1681 | { | 1681 | { |
1682 | max_loop = simple_strtol(str, NULL, 0); | 1682 | max_loop = simple_strtol(str, NULL, 0); |
1683 | return 1; | 1683 | return 1; |
1684 | } | 1684 | } |
1685 | 1685 | ||
1686 | __setup("max_loop=", max_loop_setup); | 1686 | __setup("max_loop=", max_loop_setup); |
1687 | #endif | 1687 | #endif |
1688 | 1688 |
drivers/block/osdblk.c
1 | 1 | ||
2 | /* | 2 | /* |
3 | osdblk.c -- Export a single SCSI OSD object as a Linux block device | 3 | osdblk.c -- Export a single SCSI OSD object as a Linux block device |
4 | 4 | ||
5 | 5 | ||
6 | Copyright 2009 Red Hat, Inc. | 6 | Copyright 2009 Red Hat, Inc. |
7 | 7 | ||
8 | This program is free software; you can redistribute it and/or modify | 8 | This program is free software; you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by | 9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation. | 10 | the Free Software Foundation. |
11 | 11 | ||
12 | This program is distributed in the hope that it will be useful, | 12 | This program is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | GNU General Public License for more details. | 15 | GNU General Public License for more details. |
16 | 16 | ||
17 | You should have received a copy of the GNU General Public License | 17 | You should have received a copy of the GNU General Public License |
18 | along with this program; see the file COPYING. If not, write to | 18 | along with this program; see the file COPYING. If not, write to |
19 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 19 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | 20 | ||
21 | 21 | ||
22 | Instructions for use | 22 | Instructions for use |
23 | -------------------- | 23 | -------------------- |
24 | 24 | ||
25 | 1) Map a Linux block device to an existing OSD object. | 25 | 1) Map a Linux block device to an existing OSD object. |
26 | 26 | ||
27 | In this example, we will use partition id 1234, object id 5678, | 27 | In this example, we will use partition id 1234, object id 5678, |
28 | OSD device /dev/osd1. | 28 | OSD device /dev/osd1. |
29 | 29 | ||
30 | $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add | 30 | $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add |
31 | 31 | ||
32 | 32 | ||
33 | 2) List all active blkdev<->object mappings. | 33 | 2) List all active blkdev<->object mappings. |
34 | 34 | ||
35 | In this example, we have performed step #1 twice, creating two blkdevs, | 35 | In this example, we have performed step #1 twice, creating two blkdevs, |
36 | mapped to two separate OSD objects. | 36 | mapped to two separate OSD objects. |
37 | 37 | ||
38 | $ cat /sys/class/osdblk/list | 38 | $ cat /sys/class/osdblk/list |
39 | 0 174 1234 5678 /dev/osd1 | 39 | 0 174 1234 5678 /dev/osd1 |
40 | 1 179 1994 897123 /dev/osd0 | 40 | 1 179 1994 897123 /dev/osd0 |
41 | 41 | ||
42 | The columns, in order, are: | 42 | The columns, in order, are: |
43 | - blkdev unique id | 43 | - blkdev unique id |
44 | - blkdev assigned major | 44 | - blkdev assigned major |
45 | - OSD object partition id | 45 | - OSD object partition id |
46 | - OSD object id | 46 | - OSD object id |
47 | - OSD device | 47 | - OSD device |
48 | 48 | ||
49 | 49 | ||
50 | 3) Remove an active blkdev<->object mapping. | 50 | 3) Remove an active blkdev<->object mapping. |
51 | 51 | ||
52 | In this example, we remove the mapping with blkdev unique id 1. | 52 | In this example, we remove the mapping with blkdev unique id 1. |
53 | 53 | ||
54 | $ echo 1 > /sys/class/osdblk/remove | 54 | $ echo 1 > /sys/class/osdblk/remove |
55 | 55 | ||
56 | 56 | ||
57 | NOTE: The actual creation and deletion of OSD objects is outside the scope | 57 | NOTE: The actual creation and deletion of OSD objects is outside the scope |
58 | of this driver. | 58 | of this driver. |
59 | 59 | ||
60 | */ | 60 | */ |
61 | 61 | ||
62 | #include <linux/kernel.h> | 62 | #include <linux/kernel.h> |
63 | #include <linux/device.h> | 63 | #include <linux/device.h> |
64 | #include <linux/module.h> | 64 | #include <linux/module.h> |
65 | #include <linux/fs.h> | 65 | #include <linux/fs.h> |
66 | #include <linux/slab.h> | 66 | #include <linux/slab.h> |
67 | #include <scsi/osd_initiator.h> | 67 | #include <scsi/osd_initiator.h> |
68 | #include <scsi/osd_attributes.h> | 68 | #include <scsi/osd_attributes.h> |
69 | #include <scsi/osd_sec.h> | 69 | #include <scsi/osd_sec.h> |
70 | #include <scsi/scsi_device.h> | 70 | #include <scsi/scsi_device.h> |
71 | 71 | ||
72 | #define DRV_NAME "osdblk" | 72 | #define DRV_NAME "osdblk" |
73 | #define PFX DRV_NAME ": " | 73 | #define PFX DRV_NAME ": " |
74 | 74 | ||
75 | /* #define _OSDBLK_DEBUG */ | 75 | /* #define _OSDBLK_DEBUG */ |
76 | #ifdef _OSDBLK_DEBUG | 76 | #ifdef _OSDBLK_DEBUG |
77 | #define OSDBLK_DEBUG(fmt, a...) \ | 77 | #define OSDBLK_DEBUG(fmt, a...) \ |
78 | printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) | 78 | printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) |
79 | #else | 79 | #else |
80 | #define OSDBLK_DEBUG(fmt, a...) \ | 80 | #define OSDBLK_DEBUG(fmt, a...) \ |
81 | do { if (0) printk(fmt, ##a); } while (0) | 81 | do { if (0) printk(fmt, ##a); } while (0) |
82 | #endif | 82 | #endif |
83 | 83 | ||
84 | MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); | 84 | MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); |
85 | MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); | 85 | MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); |
86 | MODULE_LICENSE("GPL"); | 86 | MODULE_LICENSE("GPL"); |
87 | 87 | ||
88 | struct osdblk_device; | 88 | struct osdblk_device; |
89 | 89 | ||
90 | enum { | 90 | enum { |
91 | OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ | 91 | OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ |
92 | OSDBLK_MAX_REQ = 32, /* max parallel requests */ | 92 | OSDBLK_MAX_REQ = 32, /* max parallel requests */ |
93 | OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ | 93 | OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct osdblk_request { | 96 | struct osdblk_request { |
97 | struct request *rq; /* blk layer request */ | 97 | struct request *rq; /* blk layer request */ |
98 | struct bio *bio; /* cloned bio */ | 98 | struct bio *bio; /* cloned bio */ |
99 | struct osdblk_device *osdev; /* associated blkdev */ | 99 | struct osdblk_device *osdev; /* associated blkdev */ |
100 | }; | 100 | }; |
101 | 101 | ||
102 | struct osdblk_device { | 102 | struct osdblk_device { |
103 | int id; /* blkdev unique id */ | 103 | int id; /* blkdev unique id */ |
104 | 104 | ||
105 | int major; /* blkdev assigned major */ | 105 | int major; /* blkdev assigned major */ |
106 | struct gendisk *disk; /* blkdev's gendisk and rq */ | 106 | struct gendisk *disk; /* blkdev's gendisk and rq */ |
107 | struct request_queue *q; | 107 | struct request_queue *q; |
108 | 108 | ||
109 | struct osd_dev *osd; /* associated OSD */ | 109 | struct osd_dev *osd; /* associated OSD */ |
110 | 110 | ||
111 | char name[32]; /* blkdev name, e.g. osdblk34 */ | 111 | char name[32]; /* blkdev name, e.g. osdblk34 */ |
112 | 112 | ||
113 | spinlock_t lock; /* queue lock */ | 113 | spinlock_t lock; /* queue lock */ |
114 | 114 | ||
115 | struct osd_obj_id obj; /* OSD partition, obj id */ | 115 | struct osd_obj_id obj; /* OSD partition, obj id */ |
116 | uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ | 116 | uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ |
117 | 117 | ||
118 | struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ | 118 | struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ |
119 | 119 | ||
120 | struct list_head node; | 120 | struct list_head node; |
121 | 121 | ||
122 | char osd_path[0]; /* OSD device path */ | 122 | char osd_path[0]; /* OSD device path */ |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static struct class *class_osdblk; /* /sys/class/osdblk */ | 125 | static struct class *class_osdblk; /* /sys/class/osdblk */ |
126 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | 126 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ |
127 | static LIST_HEAD(osdblkdev_list); | 127 | static LIST_HEAD(osdblkdev_list); |
128 | 128 | ||
129 | static const struct block_device_operations osdblk_bd_ops = { | 129 | static const struct block_device_operations osdblk_bd_ops = { |
130 | .owner = THIS_MODULE, | 130 | .owner = THIS_MODULE, |
131 | }; | 131 | }; |
132 | 132 | ||
133 | static const struct osd_attr g_attr_logical_length = ATTR_DEF( | 133 | static const struct osd_attr g_attr_logical_length = ATTR_DEF( |
134 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | 134 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
135 | 135 | ||
136 | static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], | 136 | static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], |
137 | const struct osd_obj_id *obj) | 137 | const struct osd_obj_id *obj) |
138 | { | 138 | { |
139 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | 139 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); |
140 | } | 140 | } |
141 | 141 | ||
142 | /* copied from exofs; move to libosd? */ | 142 | /* copied from exofs; move to libosd? */ |
143 | /* | 143 | /* |
144 | * Perform a synchronous OSD operation. copied from exofs; move to libosd? | 144 | * Perform a synchronous OSD operation. copied from exofs; move to libosd? |
145 | */ | 145 | */ |
146 | static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | 146 | static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) |
147 | { | 147 | { |
148 | int ret; | 148 | int ret; |
149 | 149 | ||
150 | or->timeout = timeout; | 150 | or->timeout = timeout; |
151 | ret = osd_finalize_request(or, 0, credential, NULL); | 151 | ret = osd_finalize_request(or, 0, credential, NULL); |
152 | if (ret) | 152 | if (ret) |
153 | return ret; | 153 | return ret; |
154 | 154 | ||
155 | ret = osd_execute_request(or); | 155 | ret = osd_execute_request(or); |
156 | 156 | ||
157 | /* osd_req_decode_sense(or, ret); */ | 157 | /* osd_req_decode_sense(or, ret); */ |
158 | return ret; | 158 | return ret; |
159 | } | 159 | } |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * Perform an asynchronous OSD operation. copied from exofs; move to libosd? | 162 | * Perform an asynchronous OSD operation. copied from exofs; move to libosd? |
163 | */ | 163 | */ |
164 | static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, | 164 | static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, |
165 | void *caller_context, u8 *cred) | 165 | void *caller_context, u8 *cred) |
166 | { | 166 | { |
167 | int ret; | 167 | int ret; |
168 | 168 | ||
169 | ret = osd_finalize_request(or, 0, cred, NULL); | 169 | ret = osd_finalize_request(or, 0, cred, NULL); |
170 | if (ret) | 170 | if (ret) |
171 | return ret; | 171 | return ret; |
172 | 172 | ||
173 | ret = osd_execute_request_async(or, async_done, caller_context); | 173 | ret = osd_execute_request_async(or, async_done, caller_context); |
174 | 174 | ||
175 | return ret; | 175 | return ret; |
176 | } | 176 | } |
177 | 177 | ||
178 | /* copied from exofs; move to libosd? */ | 178 | /* copied from exofs; move to libosd? */ |
179 | static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | 179 | static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) |
180 | { | 180 | { |
181 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | 181 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ |
182 | void *iter = NULL; | 182 | void *iter = NULL; |
183 | int nelem; | 183 | int nelem; |
184 | 184 | ||
185 | do { | 185 | do { |
186 | nelem = 1; | 186 | nelem = 1; |
187 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | 187 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); |
188 | if ((cur_attr.attr_page == attr->attr_page) && | 188 | if ((cur_attr.attr_page == attr->attr_page) && |
189 | (cur_attr.attr_id == attr->attr_id)) { | 189 | (cur_attr.attr_id == attr->attr_id)) { |
190 | attr->len = cur_attr.len; | 190 | attr->len = cur_attr.len; |
191 | attr->val_ptr = cur_attr.val_ptr; | 191 | attr->val_ptr = cur_attr.val_ptr; |
192 | return 0; | 192 | return 0; |
193 | } | 193 | } |
194 | } while (iter); | 194 | } while (iter); |
195 | 195 | ||
196 | return -EIO; | 196 | return -EIO; |
197 | } | 197 | } |
198 | 198 | ||
199 | static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) | 199 | static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) |
200 | { | 200 | { |
201 | struct osd_request *or; | 201 | struct osd_request *or; |
202 | struct osd_attr attr; | 202 | struct osd_attr attr; |
203 | int ret; | 203 | int ret; |
204 | 204 | ||
205 | /* start request */ | 205 | /* start request */ |
206 | or = osd_start_request(osdev->osd, GFP_KERNEL); | 206 | or = osd_start_request(osdev->osd, GFP_KERNEL); |
207 | if (!or) | 207 | if (!or) |
208 | return -ENOMEM; | 208 | return -ENOMEM; |
209 | 209 | ||
210 | /* create a get-attributes(length) request */ | 210 | /* create a get-attributes(length) request */ |
211 | osd_req_get_attributes(or, &osdev->obj); | 211 | osd_req_get_attributes(or, &osdev->obj); |
212 | 212 | ||
213 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | 213 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); |
214 | 214 | ||
215 | /* execute op synchronously */ | 215 | /* execute op synchronously */ |
216 | ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); | 216 | ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); |
217 | if (ret) | 217 | if (ret) |
218 | goto out; | 218 | goto out; |
219 | 219 | ||
220 | /* extract length from returned attribute info */ | 220 | /* extract length from returned attribute info */ |
221 | attr = g_attr_logical_length; | 221 | attr = g_attr_logical_length; |
222 | ret = extract_attr_from_req(or, &attr); | 222 | ret = extract_attr_from_req(or, &attr); |
223 | if (ret) | 223 | if (ret) |
224 | goto out; | 224 | goto out; |
225 | 225 | ||
226 | *size_out = get_unaligned_be64(attr.val_ptr); | 226 | *size_out = get_unaligned_be64(attr.val_ptr); |
227 | 227 | ||
228 | out: | 228 | out: |
229 | osd_end_request(or); | 229 | osd_end_request(or); |
230 | return ret; | 230 | return ret; |
231 | 231 | ||
232 | } | 232 | } |
233 | 233 | ||
234 | static void osdblk_osd_complete(struct osd_request *or, void *private) | 234 | static void osdblk_osd_complete(struct osd_request *or, void *private) |
235 | { | 235 | { |
236 | struct osdblk_request *orq = private; | 236 | struct osdblk_request *orq = private; |
237 | struct osd_sense_info osi; | 237 | struct osd_sense_info osi; |
238 | int ret = osd_req_decode_sense(or, &osi); | 238 | int ret = osd_req_decode_sense(or, &osi); |
239 | 239 | ||
240 | if (ret) { | 240 | if (ret) { |
241 | ret = -EIO; | 241 | ret = -EIO; |
242 | OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); | 242 | OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); |
243 | } | 243 | } |
244 | 244 | ||
245 | /* complete OSD request */ | 245 | /* complete OSD request */ |
246 | osd_end_request(or); | 246 | osd_end_request(or); |
247 | 247 | ||
248 | /* complete request passed to osdblk by block layer */ | 248 | /* complete request passed to osdblk by block layer */ |
249 | __blk_end_request_all(orq->rq, ret); | 249 | __blk_end_request_all(orq->rq, ret); |
250 | } | 250 | } |
251 | 251 | ||
252 | static void bio_chain_put(struct bio *chain) | 252 | static void bio_chain_put(struct bio *chain) |
253 | { | 253 | { |
254 | struct bio *tmp; | 254 | struct bio *tmp; |
255 | 255 | ||
256 | while (chain) { | 256 | while (chain) { |
257 | tmp = chain; | 257 | tmp = chain; |
258 | chain = chain->bi_next; | 258 | chain = chain->bi_next; |
259 | 259 | ||
260 | bio_put(tmp); | 260 | bio_put(tmp); |
261 | } | 261 | } |
262 | } | 262 | } |
263 | 263 | ||
264 | static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) | 264 | static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) |
265 | { | 265 | { |
266 | struct bio *tmp, *new_chain = NULL, *tail = NULL; | 266 | struct bio *tmp, *new_chain = NULL, *tail = NULL; |
267 | 267 | ||
268 | while (old_chain) { | 268 | while (old_chain) { |
269 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | 269 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); |
270 | if (!tmp) | 270 | if (!tmp) |
271 | goto err_out; | 271 | goto err_out; |
272 | 272 | ||
273 | __bio_clone(tmp, old_chain); | 273 | __bio_clone(tmp, old_chain); |
274 | tmp->bi_bdev = NULL; | 274 | tmp->bi_bdev = NULL; |
275 | gfpmask &= ~__GFP_WAIT; | 275 | gfpmask &= ~__GFP_WAIT; |
276 | tmp->bi_next = NULL; | 276 | tmp->bi_next = NULL; |
277 | 277 | ||
278 | if (!new_chain) | 278 | if (!new_chain) |
279 | new_chain = tail = tmp; | 279 | new_chain = tail = tmp; |
280 | else { | 280 | else { |
281 | tail->bi_next = tmp; | 281 | tail->bi_next = tmp; |
282 | tail = tmp; | 282 | tail = tmp; |
283 | } | 283 | } |
284 | 284 | ||
285 | old_chain = old_chain->bi_next; | 285 | old_chain = old_chain->bi_next; |
286 | } | 286 | } |
287 | 287 | ||
288 | return new_chain; | 288 | return new_chain; |
289 | 289 | ||
290 | err_out: | 290 | err_out: |
291 | OSDBLK_DEBUG("bio_chain_clone with err\n"); | 291 | OSDBLK_DEBUG("bio_chain_clone with err\n"); |
292 | bio_chain_put(new_chain); | 292 | bio_chain_put(new_chain); |
293 | return NULL; | 293 | return NULL; |
294 | } | 294 | } |
295 | 295 | ||
296 | static void osdblk_rq_fn(struct request_queue *q) | 296 | static void osdblk_rq_fn(struct request_queue *q) |
297 | { | 297 | { |
298 | struct osdblk_device *osdev = q->queuedata; | 298 | struct osdblk_device *osdev = q->queuedata; |
299 | 299 | ||
300 | while (1) { | 300 | while (1) { |
301 | struct request *rq; | 301 | struct request *rq; |
302 | struct osdblk_request *orq; | 302 | struct osdblk_request *orq; |
303 | struct osd_request *or; | 303 | struct osd_request *or; |
304 | struct bio *bio; | 304 | struct bio *bio; |
305 | bool do_write, do_flush; | 305 | bool do_write, do_flush; |
306 | 306 | ||
307 | /* peek at request from block layer */ | 307 | /* peek at request from block layer */ |
308 | rq = blk_fetch_request(q); | 308 | rq = blk_fetch_request(q); |
309 | if (!rq) | 309 | if (!rq) |
310 | break; | 310 | break; |
311 | 311 | ||
312 | /* filter out block requests we don't understand */ | 312 | /* filter out block requests we don't understand */ |
313 | if (rq->cmd_type != REQ_TYPE_FS && | 313 | if (rq->cmd_type != REQ_TYPE_FS && |
314 | !(rq->cmd_flags & REQ_HARDBARRIER)) { | 314 | !(rq->cmd_flags & REQ_HARDBARRIER)) { |
315 | blk_end_request_all(rq, 0); | 315 | blk_end_request_all(rq, 0); |
316 | continue; | 316 | continue; |
317 | } | 317 | } |
318 | 318 | ||
319 | /* deduce our operation (read, write, flush) */ | 319 | /* deduce our operation (read, write, flush) */ |
320 | /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] | 320 | /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] |
321 | * into a clearly defined set of RPC commands: | 321 | * into a clearly defined set of RPC commands: |
322 | * read, write, flush, scsi command, power mgmt req, | 322 | * read, write, flush, scsi command, power mgmt req, |
323 | * driver-specific, etc. | 323 | * driver-specific, etc. |
324 | */ | 324 | */ |
325 | 325 | ||
326 | do_flush = rq->cmd_flags & REQ_FLUSH; | 326 | do_flush = rq->cmd_flags & REQ_FLUSH; |
327 | do_write = (rq_data_dir(rq) == WRITE); | 327 | do_write = (rq_data_dir(rq) == WRITE); |
328 | 328 | ||
329 | if (!do_flush) { /* osd_flush does not use a bio */ | 329 | if (!do_flush) { /* osd_flush does not use a bio */ |
330 | /* a bio clone to be passed down to OSD request */ | 330 | /* a bio clone to be passed down to OSD request */ |
331 | bio = bio_chain_clone(rq->bio, GFP_ATOMIC); | 331 | bio = bio_chain_clone(rq->bio, GFP_ATOMIC); |
332 | if (!bio) | 332 | if (!bio) |
333 | break; | 333 | break; |
334 | } else | 334 | } else |
335 | bio = NULL; | 335 | bio = NULL; |
336 | 336 | ||
337 | /* alloc internal OSD request, for OSD command execution */ | 337 | /* alloc internal OSD request, for OSD command execution */ |
338 | or = osd_start_request(osdev->osd, GFP_ATOMIC); | 338 | or = osd_start_request(osdev->osd, GFP_ATOMIC); |
339 | if (!or) { | 339 | if (!or) { |
340 | bio_chain_put(bio); | 340 | bio_chain_put(bio); |
341 | OSDBLK_DEBUG("osd_start_request with err\n"); | 341 | OSDBLK_DEBUG("osd_start_request with err\n"); |
342 | break; | 342 | break; |
343 | } | 343 | } |
344 | 344 | ||
345 | orq = &osdev->req[rq->tag]; | 345 | orq = &osdev->req[rq->tag]; |
346 | orq->rq = rq; | 346 | orq->rq = rq; |
347 | orq->bio = bio; | 347 | orq->bio = bio; |
348 | orq->osdev = osdev; | 348 | orq->osdev = osdev; |
349 | 349 | ||
350 | /* init OSD command: flush, write or read */ | 350 | /* init OSD command: flush, write or read */ |
351 | if (do_flush) | 351 | if (do_flush) |
352 | osd_req_flush_object(or, &osdev->obj, | 352 | osd_req_flush_object(or, &osdev->obj, |
353 | OSD_CDB_FLUSH_ALL, 0, 0); | 353 | OSD_CDB_FLUSH_ALL, 0, 0); |
354 | else if (do_write) | 354 | else if (do_write) |
355 | osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, | 355 | osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, |
356 | bio, blk_rq_bytes(rq)); | 356 | bio, blk_rq_bytes(rq)); |
357 | else | 357 | else |
358 | osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, | 358 | osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, |
359 | bio, blk_rq_bytes(rq)); | 359 | bio, blk_rq_bytes(rq)); |
360 | 360 | ||
361 | OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", | 361 | OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", |
362 | do_flush ? "flush" : do_write ? | 362 | do_flush ? "flush" : do_write ? |
363 | "write" : "read", blk_rq_bytes(rq), | 363 | "write" : "read", blk_rq_bytes(rq), |
364 | blk_rq_pos(rq) * 512ULL); | 364 | blk_rq_pos(rq) * 512ULL); |
365 | 365 | ||
366 | /* begin OSD command execution */ | 366 | /* begin OSD command execution */ |
367 | if (osd_async_op(or, osdblk_osd_complete, orq, | 367 | if (osd_async_op(or, osdblk_osd_complete, orq, |
368 | osdev->obj_cred)) { | 368 | osdev->obj_cred)) { |
369 | osd_end_request(or); | 369 | osd_end_request(or); |
370 | blk_requeue_request(q, rq); | 370 | blk_requeue_request(q, rq); |
371 | bio_chain_put(bio); | 371 | bio_chain_put(bio); |
372 | OSDBLK_DEBUG("osd_execute_request_async with err\n"); | 372 | OSDBLK_DEBUG("osd_execute_request_async with err\n"); |
373 | break; | 373 | break; |
374 | } | 374 | } |
375 | 375 | ||
376 | /* remove the special 'flush' marker, now that the command | 376 | /* remove the special 'flush' marker, now that the command |
377 | * is executing | 377 | * is executing |
378 | */ | 378 | */ |
379 | rq->special = NULL; | 379 | rq->special = NULL; |
380 | } | 380 | } |
381 | } | 381 | } |
382 | 382 | ||
383 | static void osdblk_free_disk(struct osdblk_device *osdev) | 383 | static void osdblk_free_disk(struct osdblk_device *osdev) |
384 | { | 384 | { |
385 | struct gendisk *disk = osdev->disk; | 385 | struct gendisk *disk = osdev->disk; |
386 | 386 | ||
387 | if (!disk) | 387 | if (!disk) |
388 | return; | 388 | return; |
389 | 389 | ||
390 | if (disk->flags & GENHD_FL_UP) | 390 | if (disk->flags & GENHD_FL_UP) |
391 | del_gendisk(disk); | 391 | del_gendisk(disk); |
392 | if (disk->queue) | 392 | if (disk->queue) |
393 | blk_cleanup_queue(disk->queue); | 393 | blk_cleanup_queue(disk->queue); |
394 | put_disk(disk); | 394 | put_disk(disk); |
395 | } | 395 | } |
396 | 396 | ||
397 | static int osdblk_init_disk(struct osdblk_device *osdev) | 397 | static int osdblk_init_disk(struct osdblk_device *osdev) |
398 | { | 398 | { |
399 | struct gendisk *disk; | 399 | struct gendisk *disk; |
400 | struct request_queue *q; | 400 | struct request_queue *q; |
401 | int rc; | 401 | int rc; |
402 | u64 obj_size = 0; | 402 | u64 obj_size = 0; |
403 | 403 | ||
404 | /* contact OSD, request size info about the object being mapped */ | 404 | /* contact OSD, request size info about the object being mapped */ |
405 | rc = osdblk_get_obj_size(osdev, &obj_size); | 405 | rc = osdblk_get_obj_size(osdev, &obj_size); |
406 | if (rc) | 406 | if (rc) |
407 | return rc; | 407 | return rc; |
408 | 408 | ||
409 | /* create gendisk info */ | 409 | /* create gendisk info */ |
410 | disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); | 410 | disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); |
411 | if (!disk) | 411 | if (!disk) |
412 | return -ENOMEM; | 412 | return -ENOMEM; |
413 | 413 | ||
414 | sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); | 414 | sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); |
415 | disk->major = osdev->major; | 415 | disk->major = osdev->major; |
416 | disk->first_minor = 0; | 416 | disk->first_minor = 0; |
417 | disk->fops = &osdblk_bd_ops; | 417 | disk->fops = &osdblk_bd_ops; |
418 | disk->private_data = osdev; | 418 | disk->private_data = osdev; |
419 | 419 | ||
420 | /* init rq */ | 420 | /* init rq */ |
421 | q = blk_init_queue(osdblk_rq_fn, &osdev->lock); | 421 | q = blk_init_queue(osdblk_rq_fn, &osdev->lock); |
422 | if (!q) { | 422 | if (!q) { |
423 | put_disk(disk); | 423 | put_disk(disk); |
424 | return -ENOMEM; | 424 | return -ENOMEM; |
425 | } | 425 | } |
426 | 426 | ||
427 | /* switch queue to TCQ mode; allocate tag map */ | 427 | /* switch queue to TCQ mode; allocate tag map */ |
428 | rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); | 428 | rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); |
429 | if (rc) { | 429 | if (rc) { |
430 | blk_cleanup_queue(q); | 430 | blk_cleanup_queue(q); |
431 | put_disk(disk); | 431 | put_disk(disk); |
432 | return rc; | 432 | return rc; |
433 | } | 433 | } |
434 | 434 | ||
435 | /* Set our limits to the lower device limits, because osdblk cannot | 435 | /* Set our limits to the lower device limits, because osdblk cannot |
436 | * sleep when allocating a lower-request and therefore cannot be | 436 | * sleep when allocating a lower-request and therefore cannot be |
437 | * bouncing. | 437 | * bouncing. |
438 | */ | 438 | */ |
439 | blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); | 439 | blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); |
440 | 440 | ||
441 | blk_queue_prep_rq(q, blk_queue_start_tag); | 441 | blk_queue_prep_rq(q, blk_queue_start_tag); |
442 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); | 442 | blk_queue_flush(q, REQ_FLUSH); |
443 | 443 | ||
444 | disk->queue = q; | 444 | disk->queue = q; |
445 | 445 | ||
446 | q->queuedata = osdev; | 446 | q->queuedata = osdev; |
447 | 447 | ||
448 | osdev->disk = disk; | 448 | osdev->disk = disk; |
449 | osdev->q = q; | 449 | osdev->q = q; |
450 | 450 | ||
451 | /* finally, announce the disk to the world */ | 451 | /* finally, announce the disk to the world */ |
452 | set_capacity(disk, obj_size / 512ULL); | 452 | set_capacity(disk, obj_size / 512ULL); |
453 | add_disk(disk); | 453 | add_disk(disk); |
454 | 454 | ||
455 | printk(KERN_INFO "%s: Added of size 0x%llx\n", | 455 | printk(KERN_INFO "%s: Added of size 0x%llx\n", |
456 | disk->disk_name, (unsigned long long)obj_size); | 456 | disk->disk_name, (unsigned long long)obj_size); |
457 | 457 | ||
458 | return 0; | 458 | return 0; |
459 | } | 459 | } |
460 | 460 | ||
461 | /******************************************************************** | 461 | /******************************************************************** |
462 | * /sys/class/osdblk/ | 462 | * /sys/class/osdblk/ |
463 | * add map OSD object to blkdev | 463 | * add map OSD object to blkdev |
464 | * remove unmap OSD object | 464 | * remove unmap OSD object |
465 | * list show mappings | 465 | * list show mappings |
466 | *******************************************************************/ | 466 | *******************************************************************/ |
467 | 467 | ||
468 | static void class_osdblk_release(struct class *cls) | 468 | static void class_osdblk_release(struct class *cls) |
469 | { | 469 | { |
470 | kfree(cls); | 470 | kfree(cls); |
471 | } | 471 | } |
472 | 472 | ||
473 | static ssize_t class_osdblk_list(struct class *c, | 473 | static ssize_t class_osdblk_list(struct class *c, |
474 | struct class_attribute *attr, | 474 | struct class_attribute *attr, |
475 | char *data) | 475 | char *data) |
476 | { | 476 | { |
477 | int n = 0; | 477 | int n = 0; |
478 | struct list_head *tmp; | 478 | struct list_head *tmp; |
479 | 479 | ||
480 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 480 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
481 | 481 | ||
482 | list_for_each(tmp, &osdblkdev_list) { | 482 | list_for_each(tmp, &osdblkdev_list) { |
483 | struct osdblk_device *osdev; | 483 | struct osdblk_device *osdev; |
484 | 484 | ||
485 | osdev = list_entry(tmp, struct osdblk_device, node); | 485 | osdev = list_entry(tmp, struct osdblk_device, node); |
486 | 486 | ||
487 | n += sprintf(data+n, "%d %d %llu %llu %s\n", | 487 | n += sprintf(data+n, "%d %d %llu %llu %s\n", |
488 | osdev->id, | 488 | osdev->id, |
489 | osdev->major, | 489 | osdev->major, |
490 | osdev->obj.partition, | 490 | osdev->obj.partition, |
491 | osdev->obj.id, | 491 | osdev->obj.id, |
492 | osdev->osd_path); | 492 | osdev->osd_path); |
493 | } | 493 | } |
494 | 494 | ||
495 | mutex_unlock(&ctl_mutex); | 495 | mutex_unlock(&ctl_mutex); |
496 | return n; | 496 | return n; |
497 | } | 497 | } |
498 | 498 | ||
499 | static ssize_t class_osdblk_add(struct class *c, | 499 | static ssize_t class_osdblk_add(struct class *c, |
500 | struct class_attribute *attr, | 500 | struct class_attribute *attr, |
501 | const char *buf, size_t count) | 501 | const char *buf, size_t count) |
502 | { | 502 | { |
503 | struct osdblk_device *osdev; | 503 | struct osdblk_device *osdev; |
504 | ssize_t rc; | 504 | ssize_t rc; |
505 | int irc, new_id = 0; | 505 | int irc, new_id = 0; |
506 | struct list_head *tmp; | 506 | struct list_head *tmp; |
507 | 507 | ||
508 | if (!try_module_get(THIS_MODULE)) | 508 | if (!try_module_get(THIS_MODULE)) |
509 | return -ENODEV; | 509 | return -ENODEV; |
510 | 510 | ||
511 | /* new osdblk_device object */ | 511 | /* new osdblk_device object */ |
512 | osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); | 512 | osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); |
513 | if (!osdev) { | 513 | if (!osdev) { |
514 | rc = -ENOMEM; | 514 | rc = -ENOMEM; |
515 | goto err_out_mod; | 515 | goto err_out_mod; |
516 | } | 516 | } |
517 | 517 | ||
518 | /* static osdblk_device initialization */ | 518 | /* static osdblk_device initialization */ |
519 | spin_lock_init(&osdev->lock); | 519 | spin_lock_init(&osdev->lock); |
520 | INIT_LIST_HEAD(&osdev->node); | 520 | INIT_LIST_HEAD(&osdev->node); |
521 | 521 | ||
522 | /* generate unique id: find highest unique id, add one */ | 522 | /* generate unique id: find highest unique id, add one */ |
523 | 523 | ||
524 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 524 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
525 | 525 | ||
526 | list_for_each(tmp, &osdblkdev_list) { | 526 | list_for_each(tmp, &osdblkdev_list) { |
527 | struct osdblk_device *osdev; | 527 | struct osdblk_device *osdev; |
528 | 528 | ||
529 | osdev = list_entry(tmp, struct osdblk_device, node); | 529 | osdev = list_entry(tmp, struct osdblk_device, node); |
530 | if (osdev->id > new_id) | 530 | if (osdev->id > new_id) |
531 | new_id = osdev->id + 1; | 531 | new_id = osdev->id + 1; |
532 | } | 532 | } |
533 | 533 | ||
534 | osdev->id = new_id; | 534 | osdev->id = new_id; |
535 | 535 | ||
536 | /* add to global list */ | 536 | /* add to global list */ |
537 | list_add_tail(&osdev->node, &osdblkdev_list); | 537 | list_add_tail(&osdev->node, &osdblkdev_list); |
538 | 538 | ||
539 | mutex_unlock(&ctl_mutex); | 539 | mutex_unlock(&ctl_mutex); |
540 | 540 | ||
541 | /* parse add command */ | 541 | /* parse add command */ |
542 | if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, | 542 | if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, |
543 | osdev->osd_path) != 3) { | 543 | osdev->osd_path) != 3) { |
544 | rc = -EINVAL; | 544 | rc = -EINVAL; |
545 | goto err_out_slot; | 545 | goto err_out_slot; |
546 | } | 546 | } |
547 | 547 | ||
548 | /* initialize rest of new object */ | 548 | /* initialize rest of new object */ |
549 | sprintf(osdev->name, DRV_NAME "%d", osdev->id); | 549 | sprintf(osdev->name, DRV_NAME "%d", osdev->id); |
550 | 550 | ||
551 | /* contact requested OSD */ | 551 | /* contact requested OSD */ |
552 | osdev->osd = osduld_path_lookup(osdev->osd_path); | 552 | osdev->osd = osduld_path_lookup(osdev->osd_path); |
553 | if (IS_ERR(osdev->osd)) { | 553 | if (IS_ERR(osdev->osd)) { |
554 | rc = PTR_ERR(osdev->osd); | 554 | rc = PTR_ERR(osdev->osd); |
555 | goto err_out_slot; | 555 | goto err_out_slot; |
556 | } | 556 | } |
557 | 557 | ||
558 | /* build OSD credential */ | 558 | /* build OSD credential */ |
559 | osdblk_make_credential(osdev->obj_cred, &osdev->obj); | 559 | osdblk_make_credential(osdev->obj_cred, &osdev->obj); |
560 | 560 | ||
561 | /* register our block device */ | 561 | /* register our block device */ |
562 | irc = register_blkdev(0, osdev->name); | 562 | irc = register_blkdev(0, osdev->name); |
563 | if (irc < 0) { | 563 | if (irc < 0) { |
564 | rc = irc; | 564 | rc = irc; |
565 | goto err_out_osd; | 565 | goto err_out_osd; |
566 | } | 566 | } |
567 | 567 | ||
568 | osdev->major = irc; | 568 | osdev->major = irc; |
569 | 569 | ||
570 | /* set up and announce blkdev mapping */ | 570 | /* set up and announce blkdev mapping */ |
571 | rc = osdblk_init_disk(osdev); | 571 | rc = osdblk_init_disk(osdev); |
572 | if (rc) | 572 | if (rc) |
573 | goto err_out_blkdev; | 573 | goto err_out_blkdev; |
574 | 574 | ||
575 | return count; | 575 | return count; |
576 | 576 | ||
577 | err_out_blkdev: | 577 | err_out_blkdev: |
578 | unregister_blkdev(osdev->major, osdev->name); | 578 | unregister_blkdev(osdev->major, osdev->name); |
579 | err_out_osd: | 579 | err_out_osd: |
580 | osduld_put_device(osdev->osd); | 580 | osduld_put_device(osdev->osd); |
581 | err_out_slot: | 581 | err_out_slot: |
582 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 582 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
583 | list_del_init(&osdev->node); | 583 | list_del_init(&osdev->node); |
584 | mutex_unlock(&ctl_mutex); | 584 | mutex_unlock(&ctl_mutex); |
585 | 585 | ||
586 | kfree(osdev); | 586 | kfree(osdev); |
587 | err_out_mod: | 587 | err_out_mod: |
588 | OSDBLK_DEBUG("Error adding device %s\n", buf); | 588 | OSDBLK_DEBUG("Error adding device %s\n", buf); |
589 | module_put(THIS_MODULE); | 589 | module_put(THIS_MODULE); |
590 | return rc; | 590 | return rc; |
591 | } | 591 | } |
592 | 592 | ||
593 | static ssize_t class_osdblk_remove(struct class *c, | 593 | static ssize_t class_osdblk_remove(struct class *c, |
594 | struct class_attribute *attr, | 594 | struct class_attribute *attr, |
595 | const char *buf, | 595 | const char *buf, |
596 | size_t count) | 596 | size_t count) |
597 | { | 597 | { |
598 | struct osdblk_device *osdev = NULL; | 598 | struct osdblk_device *osdev = NULL; |
599 | int target_id, rc; | 599 | int target_id, rc; |
600 | unsigned long ul; | 600 | unsigned long ul; |
601 | struct list_head *tmp; | 601 | struct list_head *tmp; |
602 | 602 | ||
603 | rc = strict_strtoul(buf, 10, &ul); | 603 | rc = strict_strtoul(buf, 10, &ul); |
604 | if (rc) | 604 | if (rc) |
605 | return rc; | 605 | return rc; |
606 | 606 | ||
607 | /* convert to int; abort if we lost anything in the conversion */ | 607 | /* convert to int; abort if we lost anything in the conversion */ |
608 | target_id = (int) ul; | 608 | target_id = (int) ul; |
609 | if (target_id != ul) | 609 | if (target_id != ul) |
610 | return -EINVAL; | 610 | return -EINVAL; |
611 | 611 | ||
612 | /* remove object from list immediately */ | 612 | /* remove object from list immediately */ |
613 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 613 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
614 | 614 | ||
615 | list_for_each(tmp, &osdblkdev_list) { | 615 | list_for_each(tmp, &osdblkdev_list) { |
616 | osdev = list_entry(tmp, struct osdblk_device, node); | 616 | osdev = list_entry(tmp, struct osdblk_device, node); |
617 | if (osdev->id == target_id) { | 617 | if (osdev->id == target_id) { |
618 | list_del_init(&osdev->node); | 618 | list_del_init(&osdev->node); |
619 | break; | 619 | break; |
620 | } | 620 | } |
621 | osdev = NULL; | 621 | osdev = NULL; |
622 | } | 622 | } |
623 | 623 | ||
624 | mutex_unlock(&ctl_mutex); | 624 | mutex_unlock(&ctl_mutex); |
625 | 625 | ||
626 | if (!osdev) | 626 | if (!osdev) |
627 | return -ENOENT; | 627 | return -ENOENT; |
628 | 628 | ||
629 | /* clean up and free blkdev and associated OSD connection */ | 629 | /* clean up and free blkdev and associated OSD connection */ |
630 | osdblk_free_disk(osdev); | 630 | osdblk_free_disk(osdev); |
631 | unregister_blkdev(osdev->major, osdev->name); | 631 | unregister_blkdev(osdev->major, osdev->name); |
632 | osduld_put_device(osdev->osd); | 632 | osduld_put_device(osdev->osd); |
633 | kfree(osdev); | 633 | kfree(osdev); |
634 | 634 | ||
635 | /* release module ref */ | 635 | /* release module ref */ |
636 | module_put(THIS_MODULE); | 636 | module_put(THIS_MODULE); |
637 | 637 | ||
638 | return count; | 638 | return count; |
639 | } | 639 | } |
640 | 640 | ||
641 | static struct class_attribute class_osdblk_attrs[] = { | 641 | static struct class_attribute class_osdblk_attrs[] = { |
642 | __ATTR(add, 0200, NULL, class_osdblk_add), | 642 | __ATTR(add, 0200, NULL, class_osdblk_add), |
643 | __ATTR(remove, 0200, NULL, class_osdblk_remove), | 643 | __ATTR(remove, 0200, NULL, class_osdblk_remove), |
644 | __ATTR(list, 0444, class_osdblk_list, NULL), | 644 | __ATTR(list, 0444, class_osdblk_list, NULL), |
645 | __ATTR_NULL | 645 | __ATTR_NULL |
646 | }; | 646 | }; |
647 | 647 | ||
648 | static int osdblk_sysfs_init(void) | 648 | static int osdblk_sysfs_init(void) |
649 | { | 649 | { |
650 | int ret = 0; | 650 | int ret = 0; |
651 | 651 | ||
652 | /* | 652 | /* |
653 | * create control files in sysfs | 653 | * create control files in sysfs |
654 | * /sys/class/osdblk/... | 654 | * /sys/class/osdblk/... |
655 | */ | 655 | */ |
656 | class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); | 656 | class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); |
657 | if (!class_osdblk) | 657 | if (!class_osdblk) |
658 | return -ENOMEM; | 658 | return -ENOMEM; |
659 | 659 | ||
660 | class_osdblk->name = DRV_NAME; | 660 | class_osdblk->name = DRV_NAME; |
661 | class_osdblk->owner = THIS_MODULE; | 661 | class_osdblk->owner = THIS_MODULE; |
662 | class_osdblk->class_release = class_osdblk_release; | 662 | class_osdblk->class_release = class_osdblk_release; |
663 | class_osdblk->class_attrs = class_osdblk_attrs; | 663 | class_osdblk->class_attrs = class_osdblk_attrs; |
664 | 664 | ||
665 | ret = class_register(class_osdblk); | 665 | ret = class_register(class_osdblk); |
666 | if (ret) { | 666 | if (ret) { |
667 | kfree(class_osdblk); | 667 | kfree(class_osdblk); |
668 | class_osdblk = NULL; | 668 | class_osdblk = NULL; |
669 | printk(PFX "failed to create class osdblk\n"); | 669 | printk(PFX "failed to create class osdblk\n"); |
670 | return ret; | 670 | return ret; |
671 | } | 671 | } |
672 | 672 | ||
673 | return 0; | 673 | return 0; |
674 | } | 674 | } |
675 | 675 | ||
676 | static void osdblk_sysfs_cleanup(void) | 676 | static void osdblk_sysfs_cleanup(void) |
677 | { | 677 | { |
678 | if (class_osdblk) | 678 | if (class_osdblk) |
679 | class_destroy(class_osdblk); | 679 | class_destroy(class_osdblk); |
680 | class_osdblk = NULL; | 680 | class_osdblk = NULL; |
681 | } | 681 | } |
682 | 682 | ||
683 | static int __init osdblk_init(void) | 683 | static int __init osdblk_init(void) |
684 | { | 684 | { |
685 | int rc; | 685 | int rc; |
686 | 686 | ||
687 | rc = osdblk_sysfs_init(); | 687 | rc = osdblk_sysfs_init(); |
688 | if (rc) | 688 | if (rc) |
689 | return rc; | 689 | return rc; |
690 | 690 | ||
691 | return 0; | 691 | return 0; |
692 | } | 692 | } |
693 | 693 | ||
694 | static void __exit osdblk_exit(void) | 694 | static void __exit osdblk_exit(void) |
695 | { | 695 | { |
696 | osdblk_sysfs_cleanup(); | 696 | osdblk_sysfs_cleanup(); |
697 | } | 697 | } |
698 | 698 | ||
699 | module_init(osdblk_init); | 699 | module_init(osdblk_init); |
700 | module_exit(osdblk_exit); | 700 | module_exit(osdblk_exit); |
701 | 701 | ||
702 | 702 |
drivers/block/ps3disk.c
1 | /* | 1 | /* |
2 | * PS3 Disk Storage Driver | 2 | * PS3 Disk Storage Driver |
3 | * | 3 | * |
4 | * Copyright (C) 2007 Sony Computer Entertainment Inc. | 4 | * Copyright (C) 2007 Sony Computer Entertainment Inc. |
5 | * Copyright 2007 Sony Corp. | 5 | * Copyright 2007 Sony Corp. |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify it | 7 | * This program is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License as published | 8 | * under the terms of the GNU General Public License as published |
9 | * by the Free Software Foundation; version 2 of the License. | 9 | * by the Free Software Foundation; version 2 of the License. |
10 | * | 10 | * |
11 | * This program is distributed in the hope that it will be useful, but | 11 | * This program is distributed in the hope that it will be useful, but |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * General Public License for more details. | 14 | * General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public License along | 16 | * You should have received a copy of the GNU General Public License along |
17 | * with this program; if not, write to the Free Software Foundation, Inc., | 17 | * with this program; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | 18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/ata.h> | 21 | #include <linux/ata.h> |
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | 24 | ||
25 | #include <asm/lv1call.h> | 25 | #include <asm/lv1call.h> |
26 | #include <asm/ps3stor.h> | 26 | #include <asm/ps3stor.h> |
27 | #include <asm/firmware.h> | 27 | #include <asm/firmware.h> |
28 | 28 | ||
29 | 29 | ||
30 | #define DEVICE_NAME "ps3disk" | 30 | #define DEVICE_NAME "ps3disk" |
31 | 31 | ||
32 | #define BOUNCE_SIZE (64*1024) | 32 | #define BOUNCE_SIZE (64*1024) |
33 | 33 | ||
34 | #define PS3DISK_MAX_DISKS 16 | 34 | #define PS3DISK_MAX_DISKS 16 |
35 | #define PS3DISK_MINORS 16 | 35 | #define PS3DISK_MINORS 16 |
36 | 36 | ||
37 | 37 | ||
38 | #define PS3DISK_NAME "ps3d%c" | 38 | #define PS3DISK_NAME "ps3d%c" |
39 | 39 | ||
40 | 40 | ||
41 | struct ps3disk_private { | 41 | struct ps3disk_private { |
42 | spinlock_t lock; /* Request queue spinlock */ | 42 | spinlock_t lock; /* Request queue spinlock */ |
43 | struct request_queue *queue; | 43 | struct request_queue *queue; |
44 | struct gendisk *gendisk; | 44 | struct gendisk *gendisk; |
45 | unsigned int blocking_factor; | 45 | unsigned int blocking_factor; |
46 | struct request *req; | 46 | struct request *req; |
47 | u64 raw_capacity; | 47 | u64 raw_capacity; |
48 | unsigned char model[ATA_ID_PROD_LEN+1]; | 48 | unsigned char model[ATA_ID_PROD_LEN+1]; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | 51 | ||
52 | #define LV1_STORAGE_SEND_ATA_COMMAND (2) | 52 | #define LV1_STORAGE_SEND_ATA_COMMAND (2) |
53 | #define LV1_STORAGE_ATA_HDDOUT (0x23) | 53 | #define LV1_STORAGE_ATA_HDDOUT (0x23) |
54 | 54 | ||
55 | struct lv1_ata_cmnd_block { | 55 | struct lv1_ata_cmnd_block { |
56 | u16 features; | 56 | u16 features; |
57 | u16 sector_count; | 57 | u16 sector_count; |
58 | u16 LBA_low; | 58 | u16 LBA_low; |
59 | u16 LBA_mid; | 59 | u16 LBA_mid; |
60 | u16 LBA_high; | 60 | u16 LBA_high; |
61 | u8 device; | 61 | u8 device; |
62 | u8 command; | 62 | u8 command; |
63 | u32 is_ext; | 63 | u32 is_ext; |
64 | u32 proto; | 64 | u32 proto; |
65 | u32 in_out; | 65 | u32 in_out; |
66 | u32 size; | 66 | u32 size; |
67 | u64 buffer; | 67 | u64 buffer; |
68 | u32 arglen; | 68 | u32 arglen; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | enum lv1_ata_proto { | 71 | enum lv1_ata_proto { |
72 | NON_DATA_PROTO = 0, | 72 | NON_DATA_PROTO = 0, |
73 | PIO_DATA_IN_PROTO = 1, | 73 | PIO_DATA_IN_PROTO = 1, |
74 | PIO_DATA_OUT_PROTO = 2, | 74 | PIO_DATA_OUT_PROTO = 2, |
75 | DMA_PROTO = 3 | 75 | DMA_PROTO = 3 |
76 | }; | 76 | }; |
77 | 77 | ||
78 | enum lv1_ata_in_out { | 78 | enum lv1_ata_in_out { |
79 | DIR_WRITE = 0, /* memory -> device */ | 79 | DIR_WRITE = 0, /* memory -> device */ |
80 | DIR_READ = 1 /* device -> memory */ | 80 | DIR_READ = 1 /* device -> memory */ |
81 | }; | 81 | }; |
82 | 82 | ||
83 | static int ps3disk_major; | 83 | static int ps3disk_major; |
84 | 84 | ||
85 | 85 | ||
86 | static const struct block_device_operations ps3disk_fops = { | 86 | static const struct block_device_operations ps3disk_fops = { |
87 | .owner = THIS_MODULE, | 87 | .owner = THIS_MODULE, |
88 | }; | 88 | }; |
89 | 89 | ||
90 | 90 | ||
91 | static void ps3disk_scatter_gather(struct ps3_storage_device *dev, | 91 | static void ps3disk_scatter_gather(struct ps3_storage_device *dev, |
92 | struct request *req, int gather) | 92 | struct request *req, int gather) |
93 | { | 93 | { |
94 | unsigned int offset = 0; | 94 | unsigned int offset = 0; |
95 | struct req_iterator iter; | 95 | struct req_iterator iter; |
96 | struct bio_vec *bvec; | 96 | struct bio_vec *bvec; |
97 | unsigned int i = 0; | 97 | unsigned int i = 0; |
98 | size_t size; | 98 | size_t size; |
99 | void *buf; | 99 | void *buf; |
100 | 100 | ||
101 | rq_for_each_segment(bvec, req, iter) { | 101 | rq_for_each_segment(bvec, req, iter) { |
102 | unsigned long flags; | 102 | unsigned long flags; |
103 | dev_dbg(&dev->sbd.core, | 103 | dev_dbg(&dev->sbd.core, |
104 | "%s:%u: bio %u: %u segs %u sectors from %lu\n", | 104 | "%s:%u: bio %u: %u segs %u sectors from %lu\n", |
105 | __func__, __LINE__, i, bio_segments(iter.bio), | 105 | __func__, __LINE__, i, bio_segments(iter.bio), |
106 | bio_sectors(iter.bio), iter.bio->bi_sector); | 106 | bio_sectors(iter.bio), iter.bio->bi_sector); |
107 | 107 | ||
108 | size = bvec->bv_len; | 108 | size = bvec->bv_len; |
109 | buf = bvec_kmap_irq(bvec, &flags); | 109 | buf = bvec_kmap_irq(bvec, &flags); |
110 | if (gather) | 110 | if (gather) |
111 | memcpy(dev->bounce_buf+offset, buf, size); | 111 | memcpy(dev->bounce_buf+offset, buf, size); |
112 | else | 112 | else |
113 | memcpy(buf, dev->bounce_buf+offset, size); | 113 | memcpy(buf, dev->bounce_buf+offset, size); |
114 | offset += size; | 114 | offset += size; |
115 | flush_kernel_dcache_page(bvec->bv_page); | 115 | flush_kernel_dcache_page(bvec->bv_page); |
116 | bvec_kunmap_irq(bvec, &flags); | 116 | bvec_kunmap_irq(bvec, &flags); |
117 | i++; | 117 | i++; |
118 | } | 118 | } |
119 | } | 119 | } |
120 | 120 | ||
121 | static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, | 121 | static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, |
122 | struct request *req) | 122 | struct request *req) |
123 | { | 123 | { |
124 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); | 124 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); |
125 | int write = rq_data_dir(req), res; | 125 | int write = rq_data_dir(req), res; |
126 | const char *op = write ? "write" : "read"; | 126 | const char *op = write ? "write" : "read"; |
127 | u64 start_sector, sectors; | 127 | u64 start_sector, sectors; |
128 | unsigned int region_id = dev->regions[dev->region_idx].id; | 128 | unsigned int region_id = dev->regions[dev->region_idx].id; |
129 | 129 | ||
130 | #ifdef DEBUG | 130 | #ifdef DEBUG |
131 | unsigned int n = 0; | 131 | unsigned int n = 0; |
132 | struct bio_vec *bv; | 132 | struct bio_vec *bv; |
133 | struct req_iterator iter; | 133 | struct req_iterator iter; |
134 | 134 | ||
135 | rq_for_each_segment(bv, req, iter) | 135 | rq_for_each_segment(bv, req, iter) |
136 | n++; | 136 | n++; |
137 | dev_dbg(&dev->sbd.core, | 137 | dev_dbg(&dev->sbd.core, |
138 | "%s:%u: %s req has %u bvecs for %u sectors\n", | 138 | "%s:%u: %s req has %u bvecs for %u sectors\n", |
139 | __func__, __LINE__, op, n, blk_rq_sectors(req)); | 139 | __func__, __LINE__, op, n, blk_rq_sectors(req)); |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | start_sector = blk_rq_pos(req) * priv->blocking_factor; | 142 | start_sector = blk_rq_pos(req) * priv->blocking_factor; |
143 | sectors = blk_rq_sectors(req) * priv->blocking_factor; | 143 | sectors = blk_rq_sectors(req) * priv->blocking_factor; |
144 | dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n", | 144 | dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n", |
145 | __func__, __LINE__, op, sectors, start_sector); | 145 | __func__, __LINE__, op, sectors, start_sector); |
146 | 146 | ||
147 | if (write) { | 147 | if (write) { |
148 | ps3disk_scatter_gather(dev, req, 1); | 148 | ps3disk_scatter_gather(dev, req, 1); |
149 | 149 | ||
150 | res = lv1_storage_write(dev->sbd.dev_id, region_id, | 150 | res = lv1_storage_write(dev->sbd.dev_id, region_id, |
151 | start_sector, sectors, 0, | 151 | start_sector, sectors, 0, |
152 | dev->bounce_lpar, &dev->tag); | 152 | dev->bounce_lpar, &dev->tag); |
153 | } else { | 153 | } else { |
154 | res = lv1_storage_read(dev->sbd.dev_id, region_id, | 154 | res = lv1_storage_read(dev->sbd.dev_id, region_id, |
155 | start_sector, sectors, 0, | 155 | start_sector, sectors, 0, |
156 | dev->bounce_lpar, &dev->tag); | 156 | dev->bounce_lpar, &dev->tag); |
157 | } | 157 | } |
158 | if (res) { | 158 | if (res) { |
159 | dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, | 159 | dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, |
160 | __LINE__, op, res); | 160 | __LINE__, op, res); |
161 | __blk_end_request_all(req, -EIO); | 161 | __blk_end_request_all(req, -EIO); |
162 | return 0; | 162 | return 0; |
163 | } | 163 | } |
164 | 164 | ||
165 | priv->req = req; | 165 | priv->req = req; |
166 | return 1; | 166 | return 1; |
167 | } | 167 | } |
168 | 168 | ||
169 | static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, | 169 | static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, |
170 | struct request *req) | 170 | struct request *req) |
171 | { | 171 | { |
172 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); | 172 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); |
173 | u64 res; | 173 | u64 res; |
174 | 174 | ||
175 | dev_dbg(&dev->sbd.core, "%s:%u: flush request\n", __func__, __LINE__); | 175 | dev_dbg(&dev->sbd.core, "%s:%u: flush request\n", __func__, __LINE__); |
176 | 176 | ||
177 | res = lv1_storage_send_device_command(dev->sbd.dev_id, | 177 | res = lv1_storage_send_device_command(dev->sbd.dev_id, |
178 | LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, | 178 | LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, |
179 | 0, &dev->tag); | 179 | 0, &dev->tag); |
180 | if (res) { | 180 | if (res) { |
181 | dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", | 181 | dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", |
182 | __func__, __LINE__, res); | 182 | __func__, __LINE__, res); |
183 | __blk_end_request_all(req, -EIO); | 183 | __blk_end_request_all(req, -EIO); |
184 | return 0; | 184 | return 0; |
185 | } | 185 | } |
186 | 186 | ||
187 | priv->req = req; | 187 | priv->req = req; |
188 | return 1; | 188 | return 1; |
189 | } | 189 | } |
190 | 190 | ||
191 | static void ps3disk_do_request(struct ps3_storage_device *dev, | 191 | static void ps3disk_do_request(struct ps3_storage_device *dev, |
192 | struct request_queue *q) | 192 | struct request_queue *q) |
193 | { | 193 | { |
194 | struct request *req; | 194 | struct request *req; |
195 | 195 | ||
196 | dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); | 196 | dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); |
197 | 197 | ||
198 | while ((req = blk_fetch_request(q))) { | 198 | while ((req = blk_fetch_request(q))) { |
199 | if (req->cmd_flags & REQ_FLUSH) { | 199 | if (req->cmd_flags & REQ_FLUSH) { |
200 | if (ps3disk_submit_flush_request(dev, req)) | 200 | if (ps3disk_submit_flush_request(dev, req)) |
201 | break; | 201 | break; |
202 | } else if (req->cmd_type == REQ_TYPE_FS) { | 202 | } else if (req->cmd_type == REQ_TYPE_FS) { |
203 | if (ps3disk_submit_request_sg(dev, req)) | 203 | if (ps3disk_submit_request_sg(dev, req)) |
204 | break; | 204 | break; |
205 | } else { | 205 | } else { |
206 | blk_dump_rq_flags(req, DEVICE_NAME " bad request"); | 206 | blk_dump_rq_flags(req, DEVICE_NAME " bad request"); |
207 | __blk_end_request_all(req, -EIO); | 207 | __blk_end_request_all(req, -EIO); |
208 | continue; | 208 | continue; |
209 | } | 209 | } |
210 | } | 210 | } |
211 | } | 211 | } |
212 | 212 | ||
213 | static void ps3disk_request(struct request_queue *q) | 213 | static void ps3disk_request(struct request_queue *q) |
214 | { | 214 | { |
215 | struct ps3_storage_device *dev = q->queuedata; | 215 | struct ps3_storage_device *dev = q->queuedata; |
216 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); | 216 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); |
217 | 217 | ||
218 | if (priv->req) { | 218 | if (priv->req) { |
219 | dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__); | 219 | dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__); |
220 | return; | 220 | return; |
221 | } | 221 | } |
222 | 222 | ||
223 | ps3disk_do_request(dev, q); | 223 | ps3disk_do_request(dev, q); |
224 | } | 224 | } |
225 | 225 | ||
226 | static irqreturn_t ps3disk_interrupt(int irq, void *data) | 226 | static irqreturn_t ps3disk_interrupt(int irq, void *data) |
227 | { | 227 | { |
228 | struct ps3_storage_device *dev = data; | 228 | struct ps3_storage_device *dev = data; |
229 | struct ps3disk_private *priv; | 229 | struct ps3disk_private *priv; |
230 | struct request *req; | 230 | struct request *req; |
231 | int res, read, error; | 231 | int res, read, error; |
232 | u64 tag, status; | 232 | u64 tag, status; |
233 | const char *op; | 233 | const char *op; |
234 | 234 | ||
235 | res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status); | 235 | res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status); |
236 | 236 | ||
237 | if (tag != dev->tag) | 237 | if (tag != dev->tag) |
238 | dev_err(&dev->sbd.core, | 238 | dev_err(&dev->sbd.core, |
239 | "%s:%u: tag mismatch, got %llx, expected %llx\n", | 239 | "%s:%u: tag mismatch, got %llx, expected %llx\n", |
240 | __func__, __LINE__, tag, dev->tag); | 240 | __func__, __LINE__, tag, dev->tag); |
241 | 241 | ||
242 | if (res) { | 242 | if (res) { |
243 | dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n", | 243 | dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n", |
244 | __func__, __LINE__, res, status); | 244 | __func__, __LINE__, res, status); |
245 | return IRQ_HANDLED; | 245 | return IRQ_HANDLED; |
246 | } | 246 | } |
247 | 247 | ||
248 | priv = ps3_system_bus_get_drvdata(&dev->sbd); | 248 | priv = ps3_system_bus_get_drvdata(&dev->sbd); |
249 | req = priv->req; | 249 | req = priv->req; |
250 | if (!req) { | 250 | if (!req) { |
251 | dev_dbg(&dev->sbd.core, | 251 | dev_dbg(&dev->sbd.core, |
252 | "%s:%u non-block layer request completed\n", __func__, | 252 | "%s:%u non-block layer request completed\n", __func__, |
253 | __LINE__); | 253 | __LINE__); |
254 | dev->lv1_status = status; | 254 | dev->lv1_status = status; |
255 | complete(&dev->done); | 255 | complete(&dev->done); |
256 | return IRQ_HANDLED; | 256 | return IRQ_HANDLED; |
257 | } | 257 | } |
258 | 258 | ||
259 | if (req->cmd_flags & REQ_FLUSH) { | 259 | if (req->cmd_flags & REQ_FLUSH) { |
260 | read = 0; | 260 | read = 0; |
261 | op = "flush"; | 261 | op = "flush"; |
262 | } else { | 262 | } else { |
263 | read = !rq_data_dir(req); | 263 | read = !rq_data_dir(req); |
264 | op = read ? "read" : "write"; | 264 | op = read ? "read" : "write"; |
265 | } | 265 | } |
266 | if (status) { | 266 | if (status) { |
267 | dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, | 267 | dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, |
268 | __LINE__, op, status); | 268 | __LINE__, op, status); |
269 | error = -EIO; | 269 | error = -EIO; |
270 | } else { | 270 | } else { |
271 | dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__, | 271 | dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__, |
272 | __LINE__, op); | 272 | __LINE__, op); |
273 | error = 0; | 273 | error = 0; |
274 | if (read) | 274 | if (read) |
275 | ps3disk_scatter_gather(dev, req, 0); | 275 | ps3disk_scatter_gather(dev, req, 0); |
276 | } | 276 | } |
277 | 277 | ||
278 | spin_lock(&priv->lock); | 278 | spin_lock(&priv->lock); |
279 | __blk_end_request_all(req, error); | 279 | __blk_end_request_all(req, error); |
280 | priv->req = NULL; | 280 | priv->req = NULL; |
281 | ps3disk_do_request(dev, priv->queue); | 281 | ps3disk_do_request(dev, priv->queue); |
282 | spin_unlock(&priv->lock); | 282 | spin_unlock(&priv->lock); |
283 | 283 | ||
284 | return IRQ_HANDLED; | 284 | return IRQ_HANDLED; |
285 | } | 285 | } |
286 | 286 | ||
287 | static int ps3disk_sync_cache(struct ps3_storage_device *dev) | 287 | static int ps3disk_sync_cache(struct ps3_storage_device *dev) |
288 | { | 288 | { |
289 | u64 res; | 289 | u64 res; |
290 | 290 | ||
291 | dev_dbg(&dev->sbd.core, "%s:%u: sync cache\n", __func__, __LINE__); | 291 | dev_dbg(&dev->sbd.core, "%s:%u: sync cache\n", __func__, __LINE__); |
292 | 292 | ||
293 | res = ps3stor_send_command(dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0); | 293 | res = ps3stor_send_command(dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0); |
294 | if (res) { | 294 | if (res) { |
295 | dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", | 295 | dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", |
296 | __func__, __LINE__, res); | 296 | __func__, __LINE__, res); |
297 | return -EIO; | 297 | return -EIO; |
298 | } | 298 | } |
299 | return 0; | 299 | return 0; |
300 | } | 300 | } |
301 | 301 | ||
302 | 302 | ||
303 | /* ATA helpers copied from drivers/ata/libata-core.c */ | 303 | /* ATA helpers copied from drivers/ata/libata-core.c */ |
304 | 304 | ||
305 | static void swap_buf_le16(u16 *buf, unsigned int buf_words) | 305 | static void swap_buf_le16(u16 *buf, unsigned int buf_words) |
306 | { | 306 | { |
307 | #ifdef __BIG_ENDIAN | 307 | #ifdef __BIG_ENDIAN |
308 | unsigned int i; | 308 | unsigned int i; |
309 | 309 | ||
310 | for (i = 0; i < buf_words; i++) | 310 | for (i = 0; i < buf_words; i++) |
311 | buf[i] = le16_to_cpu(buf[i]); | 311 | buf[i] = le16_to_cpu(buf[i]); |
312 | #endif /* __BIG_ENDIAN */ | 312 | #endif /* __BIG_ENDIAN */ |
313 | } | 313 | } |
314 | 314 | ||
315 | static u64 ata_id_n_sectors(const u16 *id) | 315 | static u64 ata_id_n_sectors(const u16 *id) |
316 | { | 316 | { |
317 | if (ata_id_has_lba(id)) { | 317 | if (ata_id_has_lba(id)) { |
318 | if (ata_id_has_lba48(id)) | 318 | if (ata_id_has_lba48(id)) |
319 | return ata_id_u64(id, 100); | 319 | return ata_id_u64(id, 100); |
320 | else | 320 | else |
321 | return ata_id_u32(id, 60); | 321 | return ata_id_u32(id, 60); |
322 | } else { | 322 | } else { |
323 | if (ata_id_current_chs_valid(id)) | 323 | if (ata_id_current_chs_valid(id)) |
324 | return ata_id_u32(id, 57); | 324 | return ata_id_u32(id, 57); |
325 | else | 325 | else |
326 | return id[1] * id[3] * id[6]; | 326 | return id[1] * id[3] * id[6]; |
327 | } | 327 | } |
328 | } | 328 | } |
329 | 329 | ||
330 | static void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, | 330 | static void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, |
331 | unsigned int len) | 331 | unsigned int len) |
332 | { | 332 | { |
333 | unsigned int c; | 333 | unsigned int c; |
334 | 334 | ||
335 | while (len > 0) { | 335 | while (len > 0) { |
336 | c = id[ofs] >> 8; | 336 | c = id[ofs] >> 8; |
337 | *s = c; | 337 | *s = c; |
338 | s++; | 338 | s++; |
339 | 339 | ||
340 | c = id[ofs] & 0xff; | 340 | c = id[ofs] & 0xff; |
341 | *s = c; | 341 | *s = c; |
342 | s++; | 342 | s++; |
343 | 343 | ||
344 | ofs++; | 344 | ofs++; |
345 | len -= 2; | 345 | len -= 2; |
346 | } | 346 | } |
347 | } | 347 | } |
348 | 348 | ||
349 | static void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, | 349 | static void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, |
350 | unsigned int len) | 350 | unsigned int len) |
351 | { | 351 | { |
352 | unsigned char *p; | 352 | unsigned char *p; |
353 | 353 | ||
354 | WARN_ON(!(len & 1)); | 354 | WARN_ON(!(len & 1)); |
355 | 355 | ||
356 | ata_id_string(id, s, ofs, len - 1); | 356 | ata_id_string(id, s, ofs, len - 1); |
357 | 357 | ||
358 | p = s + strnlen(s, len - 1); | 358 | p = s + strnlen(s, len - 1); |
359 | while (p > s && p[-1] == ' ') | 359 | while (p > s && p[-1] == ' ') |
360 | p--; | 360 | p--; |
361 | *p = '\0'; | 361 | *p = '\0'; |
362 | } | 362 | } |
363 | 363 | ||
364 | static int ps3disk_identify(struct ps3_storage_device *dev) | 364 | static int ps3disk_identify(struct ps3_storage_device *dev) |
365 | { | 365 | { |
366 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); | 366 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); |
367 | struct lv1_ata_cmnd_block ata_cmnd; | 367 | struct lv1_ata_cmnd_block ata_cmnd; |
368 | u16 *id = dev->bounce_buf; | 368 | u16 *id = dev->bounce_buf; |
369 | u64 res; | 369 | u64 res; |
370 | 370 | ||
371 | dev_dbg(&dev->sbd.core, "%s:%u: identify disk\n", __func__, __LINE__); | 371 | dev_dbg(&dev->sbd.core, "%s:%u: identify disk\n", __func__, __LINE__); |
372 | 372 | ||
373 | memset(&ata_cmnd, 0, sizeof(struct lv1_ata_cmnd_block)); | 373 | memset(&ata_cmnd, 0, sizeof(struct lv1_ata_cmnd_block)); |
374 | ata_cmnd.command = ATA_CMD_ID_ATA; | 374 | ata_cmnd.command = ATA_CMD_ID_ATA; |
375 | ata_cmnd.sector_count = 1; | 375 | ata_cmnd.sector_count = 1; |
376 | ata_cmnd.size = ata_cmnd.arglen = ATA_ID_WORDS * 2; | 376 | ata_cmnd.size = ata_cmnd.arglen = ATA_ID_WORDS * 2; |
377 | ata_cmnd.buffer = dev->bounce_lpar; | 377 | ata_cmnd.buffer = dev->bounce_lpar; |
378 | ata_cmnd.proto = PIO_DATA_IN_PROTO; | 378 | ata_cmnd.proto = PIO_DATA_IN_PROTO; |
379 | ata_cmnd.in_out = DIR_READ; | 379 | ata_cmnd.in_out = DIR_READ; |
380 | 380 | ||
381 | res = ps3stor_send_command(dev, LV1_STORAGE_SEND_ATA_COMMAND, | 381 | res = ps3stor_send_command(dev, LV1_STORAGE_SEND_ATA_COMMAND, |
382 | ps3_mm_phys_to_lpar(__pa(&ata_cmnd)), | 382 | ps3_mm_phys_to_lpar(__pa(&ata_cmnd)), |
383 | sizeof(ata_cmnd), ata_cmnd.buffer, | 383 | sizeof(ata_cmnd), ata_cmnd.buffer, |
384 | ata_cmnd.arglen); | 384 | ata_cmnd.arglen); |
385 | if (res) { | 385 | if (res) { |
386 | dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%llx\n", | 386 | dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%llx\n", |
387 | __func__, __LINE__, res); | 387 | __func__, __LINE__, res); |
388 | return -EIO; | 388 | return -EIO; |
389 | } | 389 | } |
390 | 390 | ||
391 | swap_buf_le16(id, ATA_ID_WORDS); | 391 | swap_buf_le16(id, ATA_ID_WORDS); |
392 | 392 | ||
393 | /* All we're interested in are raw capacity and model name */ | 393 | /* All we're interested in are raw capacity and model name */ |
394 | priv->raw_capacity = ata_id_n_sectors(id); | 394 | priv->raw_capacity = ata_id_n_sectors(id); |
395 | ata_id_c_string(id, priv->model, ATA_ID_PROD, sizeof(priv->model)); | 395 | ata_id_c_string(id, priv->model, ATA_ID_PROD, sizeof(priv->model)); |
396 | return 0; | 396 | return 0; |
397 | } | 397 | } |
398 | 398 | ||
399 | static unsigned long ps3disk_mask; | 399 | static unsigned long ps3disk_mask; |
400 | 400 | ||
401 | static DEFINE_MUTEX(ps3disk_mask_mutex); | 401 | static DEFINE_MUTEX(ps3disk_mask_mutex); |
402 | 402 | ||
403 | static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) | 403 | static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) |
404 | { | 404 | { |
405 | struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); | 405 | struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); |
406 | struct ps3disk_private *priv; | 406 | struct ps3disk_private *priv; |
407 | int error; | 407 | int error; |
408 | unsigned int devidx; | 408 | unsigned int devidx; |
409 | struct request_queue *queue; | 409 | struct request_queue *queue; |
410 | struct gendisk *gendisk; | 410 | struct gendisk *gendisk; |
411 | 411 | ||
412 | if (dev->blk_size < 512) { | 412 | if (dev->blk_size < 512) { |
413 | dev_err(&dev->sbd.core, | 413 | dev_err(&dev->sbd.core, |
414 | "%s:%u: cannot handle block size %llu\n", __func__, | 414 | "%s:%u: cannot handle block size %llu\n", __func__, |
415 | __LINE__, dev->blk_size); | 415 | __LINE__, dev->blk_size); |
416 | return -EINVAL; | 416 | return -EINVAL; |
417 | } | 417 | } |
418 | 418 | ||
419 | BUILD_BUG_ON(PS3DISK_MAX_DISKS > BITS_PER_LONG); | 419 | BUILD_BUG_ON(PS3DISK_MAX_DISKS > BITS_PER_LONG); |
420 | mutex_lock(&ps3disk_mask_mutex); | 420 | mutex_lock(&ps3disk_mask_mutex); |
421 | devidx = find_first_zero_bit(&ps3disk_mask, PS3DISK_MAX_DISKS); | 421 | devidx = find_first_zero_bit(&ps3disk_mask, PS3DISK_MAX_DISKS); |
422 | if (devidx >= PS3DISK_MAX_DISKS) { | 422 | if (devidx >= PS3DISK_MAX_DISKS) { |
423 | dev_err(&dev->sbd.core, "%s:%u: Too many disks\n", __func__, | 423 | dev_err(&dev->sbd.core, "%s:%u: Too many disks\n", __func__, |
424 | __LINE__); | 424 | __LINE__); |
425 | mutex_unlock(&ps3disk_mask_mutex); | 425 | mutex_unlock(&ps3disk_mask_mutex); |
426 | return -ENOSPC; | 426 | return -ENOSPC; |
427 | } | 427 | } |
428 | __set_bit(devidx, &ps3disk_mask); | 428 | __set_bit(devidx, &ps3disk_mask); |
429 | mutex_unlock(&ps3disk_mask_mutex); | 429 | mutex_unlock(&ps3disk_mask_mutex); |
430 | 430 | ||
431 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 431 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
432 | if (!priv) { | 432 | if (!priv) { |
433 | error = -ENOMEM; | 433 | error = -ENOMEM; |
434 | goto fail; | 434 | goto fail; |
435 | } | 435 | } |
436 | 436 | ||
437 | ps3_system_bus_set_drvdata(_dev, priv); | 437 | ps3_system_bus_set_drvdata(_dev, priv); |
438 | spin_lock_init(&priv->lock); | 438 | spin_lock_init(&priv->lock); |
439 | 439 | ||
440 | dev->bounce_size = BOUNCE_SIZE; | 440 | dev->bounce_size = BOUNCE_SIZE; |
441 | dev->bounce_buf = kmalloc(BOUNCE_SIZE, GFP_DMA); | 441 | dev->bounce_buf = kmalloc(BOUNCE_SIZE, GFP_DMA); |
442 | if (!dev->bounce_buf) { | 442 | if (!dev->bounce_buf) { |
443 | error = -ENOMEM; | 443 | error = -ENOMEM; |
444 | goto fail_free_priv; | 444 | goto fail_free_priv; |
445 | } | 445 | } |
446 | 446 | ||
447 | error = ps3stor_setup(dev, ps3disk_interrupt); | 447 | error = ps3stor_setup(dev, ps3disk_interrupt); |
448 | if (error) | 448 | if (error) |
449 | goto fail_free_bounce; | 449 | goto fail_free_bounce; |
450 | 450 | ||
451 | ps3disk_identify(dev); | 451 | ps3disk_identify(dev); |
452 | 452 | ||
453 | queue = blk_init_queue(ps3disk_request, &priv->lock); | 453 | queue = blk_init_queue(ps3disk_request, &priv->lock); |
454 | if (!queue) { | 454 | if (!queue) { |
455 | dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n", | 455 | dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n", |
456 | __func__, __LINE__); | 456 | __func__, __LINE__); |
457 | error = -ENOMEM; | 457 | error = -ENOMEM; |
458 | goto fail_teardown; | 458 | goto fail_teardown; |
459 | } | 459 | } |
460 | 460 | ||
461 | priv->queue = queue; | 461 | priv->queue = queue; |
462 | queue->queuedata = dev; | 462 | queue->queuedata = dev; |
463 | 463 | ||
464 | blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH); | 464 | blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH); |
465 | 465 | ||
466 | blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); | 466 | blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); |
467 | blk_queue_segment_boundary(queue, -1UL); | 467 | blk_queue_segment_boundary(queue, -1UL); |
468 | blk_queue_dma_alignment(queue, dev->blk_size-1); | 468 | blk_queue_dma_alignment(queue, dev->blk_size-1); |
469 | blk_queue_logical_block_size(queue, dev->blk_size); | 469 | blk_queue_logical_block_size(queue, dev->blk_size); |
470 | 470 | ||
471 | blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); | 471 | blk_queue_flush(queue, REQ_FLUSH); |
472 | 472 | ||
473 | blk_queue_max_segments(queue, -1); | 473 | blk_queue_max_segments(queue, -1); |
474 | blk_queue_max_segment_size(queue, dev->bounce_size); | 474 | blk_queue_max_segment_size(queue, dev->bounce_size); |
475 | 475 | ||
476 | gendisk = alloc_disk(PS3DISK_MINORS); | 476 | gendisk = alloc_disk(PS3DISK_MINORS); |
477 | if (!gendisk) { | 477 | if (!gendisk) { |
478 | dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__, | 478 | dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__, |
479 | __LINE__); | 479 | __LINE__); |
480 | error = -ENOMEM; | 480 | error = -ENOMEM; |
481 | goto fail_cleanup_queue; | 481 | goto fail_cleanup_queue; |
482 | } | 482 | } |
483 | 483 | ||
484 | priv->gendisk = gendisk; | 484 | priv->gendisk = gendisk; |
485 | gendisk->major = ps3disk_major; | 485 | gendisk->major = ps3disk_major; |
486 | gendisk->first_minor = devidx * PS3DISK_MINORS; | 486 | gendisk->first_minor = devidx * PS3DISK_MINORS; |
487 | gendisk->fops = &ps3disk_fops; | 487 | gendisk->fops = &ps3disk_fops; |
488 | gendisk->queue = queue; | 488 | gendisk->queue = queue; |
489 | gendisk->private_data = dev; | 489 | gendisk->private_data = dev; |
490 | gendisk->driverfs_dev = &dev->sbd.core; | 490 | gendisk->driverfs_dev = &dev->sbd.core; |
491 | snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME, | 491 | snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME, |
492 | devidx+'a'); | 492 | devidx+'a'); |
493 | priv->blocking_factor = dev->blk_size >> 9; | 493 | priv->blocking_factor = dev->blk_size >> 9; |
494 | set_capacity(gendisk, | 494 | set_capacity(gendisk, |
495 | dev->regions[dev->region_idx].size*priv->blocking_factor); | 495 | dev->regions[dev->region_idx].size*priv->blocking_factor); |
496 | 496 | ||
497 | dev_info(&dev->sbd.core, | 497 | dev_info(&dev->sbd.core, |
498 | "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n", | 498 | "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n", |
499 | gendisk->disk_name, priv->model, priv->raw_capacity >> 11, | 499 | gendisk->disk_name, priv->model, priv->raw_capacity >> 11, |
500 | get_capacity(gendisk) >> 11); | 500 | get_capacity(gendisk) >> 11); |
501 | 501 | ||
502 | add_disk(gendisk); | 502 | add_disk(gendisk); |
503 | return 0; | 503 | return 0; |
504 | 504 | ||
505 | fail_cleanup_queue: | 505 | fail_cleanup_queue: |
506 | blk_cleanup_queue(queue); | 506 | blk_cleanup_queue(queue); |
507 | fail_teardown: | 507 | fail_teardown: |
508 | ps3stor_teardown(dev); | 508 | ps3stor_teardown(dev); |
509 | fail_free_bounce: | 509 | fail_free_bounce: |
510 | kfree(dev->bounce_buf); | 510 | kfree(dev->bounce_buf); |
511 | fail_free_priv: | 511 | fail_free_priv: |
512 | kfree(priv); | 512 | kfree(priv); |
513 | ps3_system_bus_set_drvdata(_dev, NULL); | 513 | ps3_system_bus_set_drvdata(_dev, NULL); |
514 | fail: | 514 | fail: |
515 | mutex_lock(&ps3disk_mask_mutex); | 515 | mutex_lock(&ps3disk_mask_mutex); |
516 | __clear_bit(devidx, &ps3disk_mask); | 516 | __clear_bit(devidx, &ps3disk_mask); |
517 | mutex_unlock(&ps3disk_mask_mutex); | 517 | mutex_unlock(&ps3disk_mask_mutex); |
518 | return error; | 518 | return error; |
519 | } | 519 | } |
520 | 520 | ||
521 | static int ps3disk_remove(struct ps3_system_bus_device *_dev) | 521 | static int ps3disk_remove(struct ps3_system_bus_device *_dev) |
522 | { | 522 | { |
523 | struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); | 523 | struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); |
524 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); | 524 | struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); |
525 | 525 | ||
526 | mutex_lock(&ps3disk_mask_mutex); | 526 | mutex_lock(&ps3disk_mask_mutex); |
527 | __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, | 527 | __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, |
528 | &ps3disk_mask); | 528 | &ps3disk_mask); |
529 | mutex_unlock(&ps3disk_mask_mutex); | 529 | mutex_unlock(&ps3disk_mask_mutex); |
530 | del_gendisk(priv->gendisk); | 530 | del_gendisk(priv->gendisk); |
531 | blk_cleanup_queue(priv->queue); | 531 | blk_cleanup_queue(priv->queue); |
532 | put_disk(priv->gendisk); | 532 | put_disk(priv->gendisk); |
533 | dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); | 533 | dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); |
534 | ps3disk_sync_cache(dev); | 534 | ps3disk_sync_cache(dev); |
535 | ps3stor_teardown(dev); | 535 | ps3stor_teardown(dev); |
536 | kfree(dev->bounce_buf); | 536 | kfree(dev->bounce_buf); |
537 | kfree(priv); | 537 | kfree(priv); |
538 | ps3_system_bus_set_drvdata(_dev, NULL); | 538 | ps3_system_bus_set_drvdata(_dev, NULL); |
539 | return 0; | 539 | return 0; |
540 | } | 540 | } |
541 | 541 | ||
542 | static struct ps3_system_bus_driver ps3disk = { | 542 | static struct ps3_system_bus_driver ps3disk = { |
543 | .match_id = PS3_MATCH_ID_STOR_DISK, | 543 | .match_id = PS3_MATCH_ID_STOR_DISK, |
544 | .core.name = DEVICE_NAME, | 544 | .core.name = DEVICE_NAME, |
545 | .core.owner = THIS_MODULE, | 545 | .core.owner = THIS_MODULE, |
546 | .probe = ps3disk_probe, | 546 | .probe = ps3disk_probe, |
547 | .remove = ps3disk_remove, | 547 | .remove = ps3disk_remove, |
548 | .shutdown = ps3disk_remove, | 548 | .shutdown = ps3disk_remove, |
549 | }; | 549 | }; |
550 | 550 | ||
551 | 551 | ||
552 | static int __init ps3disk_init(void) | 552 | static int __init ps3disk_init(void) |
553 | { | 553 | { |
554 | int error; | 554 | int error; |
555 | 555 | ||
556 | if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) | 556 | if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) |
557 | return -ENODEV; | 557 | return -ENODEV; |
558 | 558 | ||
559 | error = register_blkdev(0, DEVICE_NAME); | 559 | error = register_blkdev(0, DEVICE_NAME); |
560 | if (error <= 0) { | 560 | if (error <= 0) { |
561 | printk(KERN_ERR "%s:%u: register_blkdev failed %d\n", __func__, | 561 | printk(KERN_ERR "%s:%u: register_blkdev failed %d\n", __func__, |
562 | __LINE__, error); | 562 | __LINE__, error); |
563 | return error; | 563 | return error; |
564 | } | 564 | } |
565 | ps3disk_major = error; | 565 | ps3disk_major = error; |
566 | 566 | ||
567 | pr_info("%s:%u: registered block device major %d\n", __func__, | 567 | pr_info("%s:%u: registered block device major %d\n", __func__, |
568 | __LINE__, ps3disk_major); | 568 | __LINE__, ps3disk_major); |
569 | 569 | ||
570 | error = ps3_system_bus_driver_register(&ps3disk); | 570 | error = ps3_system_bus_driver_register(&ps3disk); |
571 | if (error) | 571 | if (error) |
572 | unregister_blkdev(ps3disk_major, DEVICE_NAME); | 572 | unregister_blkdev(ps3disk_major, DEVICE_NAME); |
573 | 573 | ||
574 | return error; | 574 | return error; |
575 | } | 575 | } |
576 | 576 | ||
577 | static void __exit ps3disk_exit(void) | 577 | static void __exit ps3disk_exit(void) |
578 | { | 578 | { |
579 | ps3_system_bus_driver_unregister(&ps3disk); | 579 | ps3_system_bus_driver_unregister(&ps3disk); |
580 | unregister_blkdev(ps3disk_major, DEVICE_NAME); | 580 | unregister_blkdev(ps3disk_major, DEVICE_NAME); |
581 | } | 581 | } |
582 | 582 | ||
583 | module_init(ps3disk_init); | 583 | module_init(ps3disk_init); |
584 | module_exit(ps3disk_exit); | 584 | module_exit(ps3disk_exit); |
585 | 585 | ||
586 | MODULE_LICENSE("GPL"); | 586 | MODULE_LICENSE("GPL"); |
587 | MODULE_DESCRIPTION("PS3 Disk Storage Driver"); | 587 | MODULE_DESCRIPTION("PS3 Disk Storage Driver"); |
588 | MODULE_AUTHOR("Sony Corporation"); | 588 | MODULE_AUTHOR("Sony Corporation"); |
589 | MODULE_ALIAS(PS3_MODULE_ALIAS_STOR_DISK); | 589 | MODULE_ALIAS(PS3_MODULE_ALIAS_STOR_DISK); |
590 | 590 |
drivers/block/virtio_blk.c
1 | //#define DEBUG | 1 | //#define DEBUG |
2 | #include <linux/spinlock.h> | 2 | #include <linux/spinlock.h> |
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | #include <linux/blkdev.h> | 4 | #include <linux/blkdev.h> |
5 | #include <linux/smp_lock.h> | 5 | #include <linux/smp_lock.h> |
6 | #include <linux/hdreg.h> | 6 | #include <linux/hdreg.h> |
7 | #include <linux/virtio.h> | 7 | #include <linux/virtio.h> |
8 | #include <linux/virtio_blk.h> | 8 | #include <linux/virtio_blk.h> |
9 | #include <linux/scatterlist.h> | 9 | #include <linux/scatterlist.h> |
10 | 10 | ||
11 | #define PART_BITS 4 | 11 | #define PART_BITS 4 |
12 | 12 | ||
13 | static int major, index; | 13 | static int major, index; |
14 | 14 | ||
15 | struct virtio_blk | 15 | struct virtio_blk |
16 | { | 16 | { |
17 | spinlock_t lock; | 17 | spinlock_t lock; |
18 | 18 | ||
19 | struct virtio_device *vdev; | 19 | struct virtio_device *vdev; |
20 | struct virtqueue *vq; | 20 | struct virtqueue *vq; |
21 | 21 | ||
22 | /* The disk structure for the kernel. */ | 22 | /* The disk structure for the kernel. */ |
23 | struct gendisk *disk; | 23 | struct gendisk *disk; |
24 | 24 | ||
25 | /* Request tracking. */ | 25 | /* Request tracking. */ |
26 | struct list_head reqs; | 26 | struct list_head reqs; |
27 | 27 | ||
28 | mempool_t *pool; | 28 | mempool_t *pool; |
29 | 29 | ||
30 | /* What host tells us, plus 2 for header & tailer. */ | 30 | /* What host tells us, plus 2 for header & tailer. */ |
31 | unsigned int sg_elems; | 31 | unsigned int sg_elems; |
32 | 32 | ||
33 | /* Scatterlist: can be too big for stack. */ | 33 | /* Scatterlist: can be too big for stack. */ |
34 | struct scatterlist sg[/*sg_elems*/]; | 34 | struct scatterlist sg[/*sg_elems*/]; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct virtblk_req | 37 | struct virtblk_req |
38 | { | 38 | { |
39 | struct list_head list; | 39 | struct list_head list; |
40 | struct request *req; | 40 | struct request *req; |
41 | struct virtio_blk_outhdr out_hdr; | 41 | struct virtio_blk_outhdr out_hdr; |
42 | struct virtio_scsi_inhdr in_hdr; | 42 | struct virtio_scsi_inhdr in_hdr; |
43 | u8 status; | 43 | u8 status; |
44 | }; | 44 | }; |
45 | 45 | ||
46 | static void blk_done(struct virtqueue *vq) | 46 | static void blk_done(struct virtqueue *vq) |
47 | { | 47 | { |
48 | struct virtio_blk *vblk = vq->vdev->priv; | 48 | struct virtio_blk *vblk = vq->vdev->priv; |
49 | struct virtblk_req *vbr; | 49 | struct virtblk_req *vbr; |
50 | unsigned int len; | 50 | unsigned int len; |
51 | unsigned long flags; | 51 | unsigned long flags; |
52 | 52 | ||
53 | spin_lock_irqsave(&vblk->lock, flags); | 53 | spin_lock_irqsave(&vblk->lock, flags); |
54 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | 54 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { |
55 | int error; | 55 | int error; |
56 | 56 | ||
57 | switch (vbr->status) { | 57 | switch (vbr->status) { |
58 | case VIRTIO_BLK_S_OK: | 58 | case VIRTIO_BLK_S_OK: |
59 | error = 0; | 59 | error = 0; |
60 | break; | 60 | break; |
61 | case VIRTIO_BLK_S_UNSUPP: | 61 | case VIRTIO_BLK_S_UNSUPP: |
62 | error = -ENOTTY; | 62 | error = -ENOTTY; |
63 | break; | 63 | break; |
64 | default: | 64 | default: |
65 | error = -EIO; | 65 | error = -EIO; |
66 | break; | 66 | break; |
67 | } | 67 | } |
68 | 68 | ||
69 | switch (vbr->req->cmd_type) { | 69 | switch (vbr->req->cmd_type) { |
70 | case REQ_TYPE_BLOCK_PC: | 70 | case REQ_TYPE_BLOCK_PC: |
71 | vbr->req->resid_len = vbr->in_hdr.residual; | 71 | vbr->req->resid_len = vbr->in_hdr.residual; |
72 | vbr->req->sense_len = vbr->in_hdr.sense_len; | 72 | vbr->req->sense_len = vbr->in_hdr.sense_len; |
73 | vbr->req->errors = vbr->in_hdr.errors; | 73 | vbr->req->errors = vbr->in_hdr.errors; |
74 | break; | 74 | break; |
75 | case REQ_TYPE_SPECIAL: | 75 | case REQ_TYPE_SPECIAL: |
76 | vbr->req->errors = (error != 0); | 76 | vbr->req->errors = (error != 0); |
77 | break; | 77 | break; |
78 | default: | 78 | default: |
79 | break; | 79 | break; |
80 | } | 80 | } |
81 | 81 | ||
82 | __blk_end_request_all(vbr->req, error); | 82 | __blk_end_request_all(vbr->req, error); |
83 | list_del(&vbr->list); | 83 | list_del(&vbr->list); |
84 | mempool_free(vbr, vblk->pool); | 84 | mempool_free(vbr, vblk->pool); |
85 | } | 85 | } |
86 | /* In case queue is stopped waiting for more buffers. */ | 86 | /* In case queue is stopped waiting for more buffers. */ |
87 | blk_start_queue(vblk->disk->queue); | 87 | blk_start_queue(vblk->disk->queue); |
88 | spin_unlock_irqrestore(&vblk->lock, flags); | 88 | spin_unlock_irqrestore(&vblk->lock, flags); |
89 | } | 89 | } |
90 | 90 | ||
91 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | 91 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, |
92 | struct request *req) | 92 | struct request *req) |
93 | { | 93 | { |
94 | unsigned long num, out = 0, in = 0; | 94 | unsigned long num, out = 0, in = 0; |
95 | struct virtblk_req *vbr; | 95 | struct virtblk_req *vbr; |
96 | 96 | ||
97 | vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); | 97 | vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); |
98 | if (!vbr) | 98 | if (!vbr) |
99 | /* When another request finishes we'll try again. */ | 99 | /* When another request finishes we'll try again. */ |
100 | return false; | 100 | return false; |
101 | 101 | ||
102 | vbr->req = req; | 102 | vbr->req = req; |
103 | 103 | ||
104 | if (req->cmd_flags & REQ_FLUSH) { | 104 | if (req->cmd_flags & REQ_FLUSH) { |
105 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; | 105 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; |
106 | vbr->out_hdr.sector = 0; | 106 | vbr->out_hdr.sector = 0; |
107 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | 107 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); |
108 | } else { | 108 | } else { |
109 | switch (req->cmd_type) { | 109 | switch (req->cmd_type) { |
110 | case REQ_TYPE_FS: | 110 | case REQ_TYPE_FS: |
111 | vbr->out_hdr.type = 0; | 111 | vbr->out_hdr.type = 0; |
112 | vbr->out_hdr.sector = blk_rq_pos(vbr->req); | 112 | vbr->out_hdr.sector = blk_rq_pos(vbr->req); |
113 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | 113 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); |
114 | break; | 114 | break; |
115 | case REQ_TYPE_BLOCK_PC: | 115 | case REQ_TYPE_BLOCK_PC: |
116 | vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; | 116 | vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; |
117 | vbr->out_hdr.sector = 0; | 117 | vbr->out_hdr.sector = 0; |
118 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | 118 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); |
119 | break; | 119 | break; |
120 | case REQ_TYPE_SPECIAL: | 120 | case REQ_TYPE_SPECIAL: |
121 | vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID; | 121 | vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID; |
122 | vbr->out_hdr.sector = 0; | 122 | vbr->out_hdr.sector = 0; |
123 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | 123 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); |
124 | break; | 124 | break; |
125 | default: | 125 | default: |
126 | /* We don't put anything else in the queue. */ | 126 | /* We don't put anything else in the queue. */ |
127 | BUG(); | 127 | BUG(); |
128 | } | 128 | } |
129 | } | 129 | } |
130 | 130 | ||
131 | if (vbr->req->cmd_flags & REQ_HARDBARRIER) | 131 | if (vbr->req->cmd_flags & REQ_HARDBARRIER) |
132 | vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; | 132 | vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; |
133 | 133 | ||
134 | sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); | 134 | sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * If this is a packet command we need a couple of additional headers. | 137 | * If this is a packet command we need a couple of additional headers. |
138 | * Behind the normal outhdr we put a segment with the scsi command | 138 | * Behind the normal outhdr we put a segment with the scsi command |
139 | * block, and before the normal inhdr we put the sense data and the | 139 | * block, and before the normal inhdr we put the sense data and the |
140 | * inhdr with additional status information before the normal inhdr. | 140 | * inhdr with additional status information before the normal inhdr. |
141 | */ | 141 | */ |
142 | if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) | 142 | if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) |
143 | sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len); | 143 | sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len); |
144 | 144 | ||
145 | num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); | 145 | num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); |
146 | 146 | ||
147 | if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { | 147 | if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { |
148 | sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); | 148 | sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); |
149 | sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, | 149 | sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, |
150 | sizeof(vbr->in_hdr)); | 150 | sizeof(vbr->in_hdr)); |
151 | } | 151 | } |
152 | 152 | ||
153 | sg_set_buf(&vblk->sg[num + out + in++], &vbr->status, | 153 | sg_set_buf(&vblk->sg[num + out + in++], &vbr->status, |
154 | sizeof(vbr->status)); | 154 | sizeof(vbr->status)); |
155 | 155 | ||
156 | if (num) { | 156 | if (num) { |
157 | if (rq_data_dir(vbr->req) == WRITE) { | 157 | if (rq_data_dir(vbr->req) == WRITE) { |
158 | vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; | 158 | vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; |
159 | out += num; | 159 | out += num; |
160 | } else { | 160 | } else { |
161 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; | 161 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; |
162 | in += num; | 162 | in += num; |
163 | } | 163 | } |
164 | } | 164 | } |
165 | 165 | ||
166 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { | 166 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { |
167 | mempool_free(vbr, vblk->pool); | 167 | mempool_free(vbr, vblk->pool); |
168 | return false; | 168 | return false; |
169 | } | 169 | } |
170 | 170 | ||
171 | list_add_tail(&vbr->list, &vblk->reqs); | 171 | list_add_tail(&vbr->list, &vblk->reqs); |
172 | return true; | 172 | return true; |
173 | } | 173 | } |
174 | 174 | ||
175 | static void do_virtblk_request(struct request_queue *q) | 175 | static void do_virtblk_request(struct request_queue *q) |
176 | { | 176 | { |
177 | struct virtio_blk *vblk = q->queuedata; | 177 | struct virtio_blk *vblk = q->queuedata; |
178 | struct request *req; | 178 | struct request *req; |
179 | unsigned int issued = 0; | 179 | unsigned int issued = 0; |
180 | 180 | ||
181 | while ((req = blk_peek_request(q)) != NULL) { | 181 | while ((req = blk_peek_request(q)) != NULL) { |
182 | BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); | 182 | BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); |
183 | 183 | ||
184 | /* If this request fails, stop queue and wait for something to | 184 | /* If this request fails, stop queue and wait for something to |
185 | finish to restart it. */ | 185 | finish to restart it. */ |
186 | if (!do_req(q, vblk, req)) { | 186 | if (!do_req(q, vblk, req)) { |
187 | blk_stop_queue(q); | 187 | blk_stop_queue(q); |
188 | break; | 188 | break; |
189 | } | 189 | } |
190 | blk_start_request(req); | 190 | blk_start_request(req); |
191 | issued++; | 191 | issued++; |
192 | } | 192 | } |
193 | 193 | ||
194 | if (issued) | 194 | if (issued) |
195 | virtqueue_kick(vblk->vq); | 195 | virtqueue_kick(vblk->vq); |
196 | } | 196 | } |
197 | 197 | ||
198 | /* return id (s/n) string for *disk to *id_str | 198 | /* return id (s/n) string for *disk to *id_str |
199 | */ | 199 | */ |
200 | static int virtblk_get_id(struct gendisk *disk, char *id_str) | 200 | static int virtblk_get_id(struct gendisk *disk, char *id_str) |
201 | { | 201 | { |
202 | struct virtio_blk *vblk = disk->private_data; | 202 | struct virtio_blk *vblk = disk->private_data; |
203 | struct request *req; | 203 | struct request *req; |
204 | struct bio *bio; | 204 | struct bio *bio; |
205 | 205 | ||
206 | bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, | 206 | bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, |
207 | GFP_KERNEL); | 207 | GFP_KERNEL); |
208 | if (IS_ERR(bio)) | 208 | if (IS_ERR(bio)) |
209 | return PTR_ERR(bio); | 209 | return PTR_ERR(bio); |
210 | 210 | ||
211 | req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL); | 211 | req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL); |
212 | if (IS_ERR(req)) { | 212 | if (IS_ERR(req)) { |
213 | bio_put(bio); | 213 | bio_put(bio); |
214 | return PTR_ERR(req); | 214 | return PTR_ERR(req); |
215 | } | 215 | } |
216 | 216 | ||
217 | req->cmd_type = REQ_TYPE_SPECIAL; | 217 | req->cmd_type = REQ_TYPE_SPECIAL; |
218 | return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); | 218 | return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); |
219 | } | 219 | } |
220 | 220 | ||
221 | static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | 221 | static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, |
222 | unsigned cmd, unsigned long data) | 222 | unsigned cmd, unsigned long data) |
223 | { | 223 | { |
224 | struct gendisk *disk = bdev->bd_disk; | 224 | struct gendisk *disk = bdev->bd_disk; |
225 | struct virtio_blk *vblk = disk->private_data; | 225 | struct virtio_blk *vblk = disk->private_data; |
226 | 226 | ||
227 | /* | 227 | /* |
228 | * Only allow the generic SCSI ioctls if the host can support it. | 228 | * Only allow the generic SCSI ioctls if the host can support it. |
229 | */ | 229 | */ |
230 | if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) | 230 | if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) |
231 | return -ENOTTY; | 231 | return -ENOTTY; |
232 | 232 | ||
233 | return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, | 233 | return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, |
234 | (void __user *)data); | 234 | (void __user *)data); |
235 | } | 235 | } |
236 | 236 | ||
237 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, | 237 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, |
238 | unsigned int cmd, unsigned long param) | 238 | unsigned int cmd, unsigned long param) |
239 | { | 239 | { |
240 | int ret; | 240 | int ret; |
241 | 241 | ||
242 | lock_kernel(); | 242 | lock_kernel(); |
243 | ret = virtblk_locked_ioctl(bdev, mode, cmd, param); | 243 | ret = virtblk_locked_ioctl(bdev, mode, cmd, param); |
244 | unlock_kernel(); | 244 | unlock_kernel(); |
245 | 245 | ||
246 | return ret; | 246 | return ret; |
247 | } | 247 | } |
248 | 248 | ||
249 | /* We provide getgeo only to please some old bootloader/partitioning tools */ | 249 | /* We provide getgeo only to please some old bootloader/partitioning tools */ |
250 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) | 250 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) |
251 | { | 251 | { |
252 | struct virtio_blk *vblk = bd->bd_disk->private_data; | 252 | struct virtio_blk *vblk = bd->bd_disk->private_data; |
253 | struct virtio_blk_geometry vgeo; | 253 | struct virtio_blk_geometry vgeo; |
254 | int err; | 254 | int err; |
255 | 255 | ||
256 | /* see if the host passed in geometry config */ | 256 | /* see if the host passed in geometry config */ |
257 | err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, | 257 | err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, |
258 | offsetof(struct virtio_blk_config, geometry), | 258 | offsetof(struct virtio_blk_config, geometry), |
259 | &vgeo); | 259 | &vgeo); |
260 | 260 | ||
261 | if (!err) { | 261 | if (!err) { |
262 | geo->heads = vgeo.heads; | 262 | geo->heads = vgeo.heads; |
263 | geo->sectors = vgeo.sectors; | 263 | geo->sectors = vgeo.sectors; |
264 | geo->cylinders = vgeo.cylinders; | 264 | geo->cylinders = vgeo.cylinders; |
265 | } else { | 265 | } else { |
266 | /* some standard values, similar to sd */ | 266 | /* some standard values, similar to sd */ |
267 | geo->heads = 1 << 6; | 267 | geo->heads = 1 << 6; |
268 | geo->sectors = 1 << 5; | 268 | geo->sectors = 1 << 5; |
269 | geo->cylinders = get_capacity(bd->bd_disk) >> 11; | 269 | geo->cylinders = get_capacity(bd->bd_disk) >> 11; |
270 | } | 270 | } |
271 | return 0; | 271 | return 0; |
272 | } | 272 | } |
273 | 273 | ||
274 | static const struct block_device_operations virtblk_fops = { | 274 | static const struct block_device_operations virtblk_fops = { |
275 | .ioctl = virtblk_ioctl, | 275 | .ioctl = virtblk_ioctl, |
276 | .owner = THIS_MODULE, | 276 | .owner = THIS_MODULE, |
277 | .getgeo = virtblk_getgeo, | 277 | .getgeo = virtblk_getgeo, |
278 | }; | 278 | }; |
279 | 279 | ||
280 | static int index_to_minor(int index) | 280 | static int index_to_minor(int index) |
281 | { | 281 | { |
282 | return index << PART_BITS; | 282 | return index << PART_BITS; |
283 | } | 283 | } |
284 | 284 | ||
285 | static ssize_t virtblk_serial_show(struct device *dev, | 285 | static ssize_t virtblk_serial_show(struct device *dev, |
286 | struct device_attribute *attr, char *buf) | 286 | struct device_attribute *attr, char *buf) |
287 | { | 287 | { |
288 | struct gendisk *disk = dev_to_disk(dev); | 288 | struct gendisk *disk = dev_to_disk(dev); |
289 | int err; | 289 | int err; |
290 | 290 | ||
291 | /* sysfs gives us a PAGE_SIZE buffer */ | 291 | /* sysfs gives us a PAGE_SIZE buffer */ |
292 | BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); | 292 | BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); |
293 | 293 | ||
294 | buf[VIRTIO_BLK_ID_BYTES] = '\0'; | 294 | buf[VIRTIO_BLK_ID_BYTES] = '\0'; |
295 | err = virtblk_get_id(disk, buf); | 295 | err = virtblk_get_id(disk, buf); |
296 | if (!err) | 296 | if (!err) |
297 | return strlen(buf); | 297 | return strlen(buf); |
298 | 298 | ||
299 | if (err == -EIO) /* Unsupported? Make it empty. */ | 299 | if (err == -EIO) /* Unsupported? Make it empty. */ |
300 | return 0; | 300 | return 0; |
301 | 301 | ||
302 | return err; | 302 | return err; |
303 | } | 303 | } |
304 | DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); | 304 | DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); |
305 | 305 | ||
306 | static int __devinit virtblk_probe(struct virtio_device *vdev) | 306 | static int __devinit virtblk_probe(struct virtio_device *vdev) |
307 | { | 307 | { |
308 | struct virtio_blk *vblk; | 308 | struct virtio_blk *vblk; |
309 | struct request_queue *q; | 309 | struct request_queue *q; |
310 | int err; | 310 | int err; |
311 | u64 cap; | 311 | u64 cap; |
312 | u32 v, blk_size, sg_elems, opt_io_size; | 312 | u32 v, blk_size, sg_elems, opt_io_size; |
313 | u16 min_io_size; | 313 | u16 min_io_size; |
314 | u8 physical_block_exp, alignment_offset; | 314 | u8 physical_block_exp, alignment_offset; |
315 | 315 | ||
316 | if (index_to_minor(index) >= 1 << MINORBITS) | 316 | if (index_to_minor(index) >= 1 << MINORBITS) |
317 | return -ENOSPC; | 317 | return -ENOSPC; |
318 | 318 | ||
319 | /* We need to know how many segments before we allocate. */ | 319 | /* We need to know how many segments before we allocate. */ |
320 | err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, | 320 | err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, |
321 | offsetof(struct virtio_blk_config, seg_max), | 321 | offsetof(struct virtio_blk_config, seg_max), |
322 | &sg_elems); | 322 | &sg_elems); |
323 | 323 | ||
324 | /* We need at least one SG element, whatever they say. */ | 324 | /* We need at least one SG element, whatever they say. */ |
325 | if (err || !sg_elems) | 325 | if (err || !sg_elems) |
326 | sg_elems = 1; | 326 | sg_elems = 1; |
327 | 327 | ||
328 | /* We need an extra sg elements at head and tail. */ | 328 | /* We need an extra sg elements at head and tail. */ |
329 | sg_elems += 2; | 329 | sg_elems += 2; |
330 | vdev->priv = vblk = kmalloc(sizeof(*vblk) + | 330 | vdev->priv = vblk = kmalloc(sizeof(*vblk) + |
331 | sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); | 331 | sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); |
332 | if (!vblk) { | 332 | if (!vblk) { |
333 | err = -ENOMEM; | 333 | err = -ENOMEM; |
334 | goto out; | 334 | goto out; |
335 | } | 335 | } |
336 | 336 | ||
337 | INIT_LIST_HEAD(&vblk->reqs); | 337 | INIT_LIST_HEAD(&vblk->reqs); |
338 | spin_lock_init(&vblk->lock); | 338 | spin_lock_init(&vblk->lock); |
339 | vblk->vdev = vdev; | 339 | vblk->vdev = vdev; |
340 | vblk->sg_elems = sg_elems; | 340 | vblk->sg_elems = sg_elems; |
341 | sg_init_table(vblk->sg, vblk->sg_elems); | 341 | sg_init_table(vblk->sg, vblk->sg_elems); |
342 | 342 | ||
343 | /* We expect one virtqueue, for output. */ | 343 | /* We expect one virtqueue, for output. */ |
344 | vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); | 344 | vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); |
345 | if (IS_ERR(vblk->vq)) { | 345 | if (IS_ERR(vblk->vq)) { |
346 | err = PTR_ERR(vblk->vq); | 346 | err = PTR_ERR(vblk->vq); |
347 | goto out_free_vblk; | 347 | goto out_free_vblk; |
348 | } | 348 | } |
349 | 349 | ||
350 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); | 350 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); |
351 | if (!vblk->pool) { | 351 | if (!vblk->pool) { |
352 | err = -ENOMEM; | 352 | err = -ENOMEM; |
353 | goto out_free_vq; | 353 | goto out_free_vq; |
354 | } | 354 | } |
355 | 355 | ||
356 | /* FIXME: How many partitions? How long is a piece of string? */ | 356 | /* FIXME: How many partitions? How long is a piece of string? */ |
357 | vblk->disk = alloc_disk(1 << PART_BITS); | 357 | vblk->disk = alloc_disk(1 << PART_BITS); |
358 | if (!vblk->disk) { | 358 | if (!vblk->disk) { |
359 | err = -ENOMEM; | 359 | err = -ENOMEM; |
360 | goto out_mempool; | 360 | goto out_mempool; |
361 | } | 361 | } |
362 | 362 | ||
363 | q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); | 363 | q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); |
364 | if (!q) { | 364 | if (!q) { |
365 | err = -ENOMEM; | 365 | err = -ENOMEM; |
366 | goto out_put_disk; | 366 | goto out_put_disk; |
367 | } | 367 | } |
368 | 368 | ||
369 | q->queuedata = vblk; | 369 | q->queuedata = vblk; |
370 | 370 | ||
371 | if (index < 26) { | 371 | if (index < 26) { |
372 | sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); | 372 | sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); |
373 | } else if (index < (26 + 1) * 26) { | 373 | } else if (index < (26 + 1) * 26) { |
374 | sprintf(vblk->disk->disk_name, "vd%c%c", | 374 | sprintf(vblk->disk->disk_name, "vd%c%c", |
375 | 'a' + index / 26 - 1, 'a' + index % 26); | 375 | 'a' + index / 26 - 1, 'a' + index % 26); |
376 | } else { | 376 | } else { |
377 | const unsigned int m1 = (index / 26 - 1) / 26 - 1; | 377 | const unsigned int m1 = (index / 26 - 1) / 26 - 1; |
378 | const unsigned int m2 = (index / 26 - 1) % 26; | 378 | const unsigned int m2 = (index / 26 - 1) % 26; |
379 | const unsigned int m3 = index % 26; | 379 | const unsigned int m3 = index % 26; |
380 | sprintf(vblk->disk->disk_name, "vd%c%c%c", | 380 | sprintf(vblk->disk->disk_name, "vd%c%c%c", |
381 | 'a' + m1, 'a' + m2, 'a' + m3); | 381 | 'a' + m1, 'a' + m2, 'a' + m3); |
382 | } | 382 | } |
383 | 383 | ||
384 | vblk->disk->major = major; | 384 | vblk->disk->major = major; |
385 | vblk->disk->first_minor = index_to_minor(index); | 385 | vblk->disk->first_minor = index_to_minor(index); |
386 | vblk->disk->private_data = vblk; | 386 | vblk->disk->private_data = vblk; |
387 | vblk->disk->fops = &virtblk_fops; | 387 | vblk->disk->fops = &virtblk_fops; |
388 | vblk->disk->driverfs_dev = &vdev->dev; | 388 | vblk->disk->driverfs_dev = &vdev->dev; |
389 | index++; | 389 | index++; |
390 | 390 | ||
391 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { | 391 | /* |
392 | /* | 392 | * If the FLUSH feature is supported we do have support for |
393 | * If the FLUSH feature is supported we do have support for | 393 | * flushing a volatile write cache on the host. Use that to |
394 | * flushing a volatile write cache on the host. Use that | 394 | * implement write barrier support; otherwise, we must assume |
395 | * to implement write barrier support. | 395 | * that the host does not perform any kind of volatile write |
396 | */ | 396 | * caching. |
397 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); | 397 | */ |
398 | } else { | 398 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) |
399 | /* | 399 | blk_queue_flush(q, REQ_FLUSH); |
400 | * If the FLUSH feature is not supported we must assume that | ||
401 | * the host does not perform any kind of volatile write | ||
402 | * caching. We still need to drain the queue to provider | ||
403 | * proper barrier semantics. | ||
404 | */ | ||
405 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); | ||
406 | } | ||
407 | 400 | ||
408 | /* If disk is read-only in the host, the guest should obey */ | 401 | /* If disk is read-only in the host, the guest should obey */ |
409 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) | 402 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) |
410 | set_disk_ro(vblk->disk, 1); | 403 | set_disk_ro(vblk->disk, 1); |
411 | 404 | ||
412 | /* Host must always specify the capacity. */ | 405 | /* Host must always specify the capacity. */ |
413 | vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), | 406 | vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), |
414 | &cap, sizeof(cap)); | 407 | &cap, sizeof(cap)); |
415 | 408 | ||
416 | /* If capacity is too big, truncate with warning. */ | 409 | /* If capacity is too big, truncate with warning. */ |
417 | if ((sector_t)cap != cap) { | 410 | if ((sector_t)cap != cap) { |
418 | dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", | 411 | dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", |
419 | (unsigned long long)cap); | 412 | (unsigned long long)cap); |
420 | cap = (sector_t)-1; | 413 | cap = (sector_t)-1; |
421 | } | 414 | } |
422 | set_capacity(vblk->disk, cap); | 415 | set_capacity(vblk->disk, cap); |
423 | 416 | ||
424 | /* We can handle whatever the host told us to handle. */ | 417 | /* We can handle whatever the host told us to handle. */ |
425 | blk_queue_max_segments(q, vblk->sg_elems-2); | 418 | blk_queue_max_segments(q, vblk->sg_elems-2); |
426 | 419 | ||
427 | /* No need to bounce any requests */ | 420 | /* No need to bounce any requests */ |
428 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); | 421 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); |
429 | 422 | ||
430 | /* No real sector limit. */ | 423 | /* No real sector limit. */ |
431 | blk_queue_max_hw_sectors(q, -1U); | 424 | blk_queue_max_hw_sectors(q, -1U); |
432 | 425 | ||
433 | /* Host can optionally specify maximum segment size and number of | 426 | /* Host can optionally specify maximum segment size and number of |
434 | * segments. */ | 427 | * segments. */ |
435 | err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, | 428 | err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, |
436 | offsetof(struct virtio_blk_config, size_max), | 429 | offsetof(struct virtio_blk_config, size_max), |
437 | &v); | 430 | &v); |
438 | if (!err) | 431 | if (!err) |
439 | blk_queue_max_segment_size(q, v); | 432 | blk_queue_max_segment_size(q, v); |
440 | else | 433 | else |
441 | blk_queue_max_segment_size(q, -1U); | 434 | blk_queue_max_segment_size(q, -1U); |
442 | 435 | ||
443 | /* Host can optionally specify the block size of the device */ | 436 | /* Host can optionally specify the block size of the device */ |
444 | err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, | 437 | err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, |
445 | offsetof(struct virtio_blk_config, blk_size), | 438 | offsetof(struct virtio_blk_config, blk_size), |
446 | &blk_size); | 439 | &blk_size); |
447 | if (!err) | 440 | if (!err) |
448 | blk_queue_logical_block_size(q, blk_size); | 441 | blk_queue_logical_block_size(q, blk_size); |
449 | else | 442 | else |
450 | blk_size = queue_logical_block_size(q); | 443 | blk_size = queue_logical_block_size(q); |
451 | 444 | ||
452 | /* Use topology information if available */ | 445 | /* Use topology information if available */ |
453 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, | 446 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, |
454 | offsetof(struct virtio_blk_config, physical_block_exp), | 447 | offsetof(struct virtio_blk_config, physical_block_exp), |
455 | &physical_block_exp); | 448 | &physical_block_exp); |
456 | if (!err && physical_block_exp) | 449 | if (!err && physical_block_exp) |
457 | blk_queue_physical_block_size(q, | 450 | blk_queue_physical_block_size(q, |
458 | blk_size * (1 << physical_block_exp)); | 451 | blk_size * (1 << physical_block_exp)); |
459 | 452 | ||
460 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, | 453 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, |
461 | offsetof(struct virtio_blk_config, alignment_offset), | 454 | offsetof(struct virtio_blk_config, alignment_offset), |
462 | &alignment_offset); | 455 | &alignment_offset); |
463 | if (!err && alignment_offset) | 456 | if (!err && alignment_offset) |
464 | blk_queue_alignment_offset(q, blk_size * alignment_offset); | 457 | blk_queue_alignment_offset(q, blk_size * alignment_offset); |
465 | 458 | ||
466 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, | 459 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, |
467 | offsetof(struct virtio_blk_config, min_io_size), | 460 | offsetof(struct virtio_blk_config, min_io_size), |
468 | &min_io_size); | 461 | &min_io_size); |
469 | if (!err && min_io_size) | 462 | if (!err && min_io_size) |
470 | blk_queue_io_min(q, blk_size * min_io_size); | 463 | blk_queue_io_min(q, blk_size * min_io_size); |
471 | 464 | ||
472 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, | 465 | err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, |
473 | offsetof(struct virtio_blk_config, opt_io_size), | 466 | offsetof(struct virtio_blk_config, opt_io_size), |
474 | &opt_io_size); | 467 | &opt_io_size); |
475 | if (!err && opt_io_size) | 468 | if (!err && opt_io_size) |
476 | blk_queue_io_opt(q, blk_size * opt_io_size); | 469 | blk_queue_io_opt(q, blk_size * opt_io_size); |
477 | 470 | ||
478 | 471 | ||
479 | add_disk(vblk->disk); | 472 | add_disk(vblk->disk); |
480 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); | 473 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); |
481 | if (err) | 474 | if (err) |
482 | goto out_del_disk; | 475 | goto out_del_disk; |
483 | 476 | ||
484 | return 0; | 477 | return 0; |
485 | 478 | ||
486 | out_del_disk: | 479 | out_del_disk: |
487 | del_gendisk(vblk->disk); | 480 | del_gendisk(vblk->disk); |
488 | blk_cleanup_queue(vblk->disk->queue); | 481 | blk_cleanup_queue(vblk->disk->queue); |
489 | out_put_disk: | 482 | out_put_disk: |
490 | put_disk(vblk->disk); | 483 | put_disk(vblk->disk); |
491 | out_mempool: | 484 | out_mempool: |
492 | mempool_destroy(vblk->pool); | 485 | mempool_destroy(vblk->pool); |
493 | out_free_vq: | 486 | out_free_vq: |
494 | vdev->config->del_vqs(vdev); | 487 | vdev->config->del_vqs(vdev); |
495 | out_free_vblk: | 488 | out_free_vblk: |
496 | kfree(vblk); | 489 | kfree(vblk); |
497 | out: | 490 | out: |
498 | return err; | 491 | return err; |
499 | } | 492 | } |
500 | 493 | ||
501 | static void __devexit virtblk_remove(struct virtio_device *vdev) | 494 | static void __devexit virtblk_remove(struct virtio_device *vdev) |
502 | { | 495 | { |
503 | struct virtio_blk *vblk = vdev->priv; | 496 | struct virtio_blk *vblk = vdev->priv; |
504 | 497 | ||
505 | /* Nothing should be pending. */ | 498 | /* Nothing should be pending. */ |
506 | BUG_ON(!list_empty(&vblk->reqs)); | 499 | BUG_ON(!list_empty(&vblk->reqs)); |
507 | 500 | ||
508 | /* Stop all the virtqueues. */ | 501 | /* Stop all the virtqueues. */ |
509 | vdev->config->reset(vdev); | 502 | vdev->config->reset(vdev); |
510 | 503 | ||
511 | del_gendisk(vblk->disk); | 504 | del_gendisk(vblk->disk); |
512 | blk_cleanup_queue(vblk->disk->queue); | 505 | blk_cleanup_queue(vblk->disk->queue); |
513 | put_disk(vblk->disk); | 506 | put_disk(vblk->disk); |
514 | mempool_destroy(vblk->pool); | 507 | mempool_destroy(vblk->pool); |
515 | vdev->config->del_vqs(vdev); | 508 | vdev->config->del_vqs(vdev); |
516 | kfree(vblk); | 509 | kfree(vblk); |
517 | } | 510 | } |
518 | 511 | ||
519 | static const struct virtio_device_id id_table[] = { | 512 | static const struct virtio_device_id id_table[] = { |
520 | { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, | 513 | { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, |
521 | { 0 }, | 514 | { 0 }, |
522 | }; | 515 | }; |
523 | 516 | ||
524 | static unsigned int features[] = { | 517 | static unsigned int features[] = { |
525 | VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, | 518 | VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, |
526 | VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, | 519 | VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, |
527 | VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY | 520 | VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY |
528 | }; | 521 | }; |
529 | 522 | ||
530 | /* | 523 | /* |
531 | * virtio_blk causes spurious section mismatch warning by | 524 | * virtio_blk causes spurious section mismatch warning by |
532 | * simultaneously referring to a __devinit and a __devexit function. | 525 | * simultaneously referring to a __devinit and a __devexit function. |
533 | * Use __refdata to avoid this warning. | 526 | * Use __refdata to avoid this warning. |
534 | */ | 527 | */ |
535 | static struct virtio_driver __refdata virtio_blk = { | 528 | static struct virtio_driver __refdata virtio_blk = { |
536 | .feature_table = features, | 529 | .feature_table = features, |
537 | .feature_table_size = ARRAY_SIZE(features), | 530 | .feature_table_size = ARRAY_SIZE(features), |
538 | .driver.name = KBUILD_MODNAME, | 531 | .driver.name = KBUILD_MODNAME, |
539 | .driver.owner = THIS_MODULE, | 532 | .driver.owner = THIS_MODULE, |
540 | .id_table = id_table, | 533 | .id_table = id_table, |
541 | .probe = virtblk_probe, | 534 | .probe = virtblk_probe, |
542 | .remove = __devexit_p(virtblk_remove), | 535 | .remove = __devexit_p(virtblk_remove), |
543 | }; | 536 | }; |
544 | 537 | ||
545 | static int __init init(void) | 538 | static int __init init(void) |
546 | { | 539 | { |
547 | major = register_blkdev(0, "virtblk"); | 540 | major = register_blkdev(0, "virtblk"); |
548 | if (major < 0) | 541 | if (major < 0) |
549 | return major; | 542 | return major; |
550 | return register_virtio_driver(&virtio_blk); | 543 | return register_virtio_driver(&virtio_blk); |
551 | } | 544 | } |
552 | 545 | ||
553 | static void __exit fini(void) | 546 | static void __exit fini(void) |
554 | { | 547 | { |
555 | unregister_blkdev(major, "virtblk"); | 548 | unregister_blkdev(major, "virtblk"); |
556 | unregister_virtio_driver(&virtio_blk); | 549 | unregister_virtio_driver(&virtio_blk); |
557 | } | 550 | } |
558 | module_init(init); | 551 | module_init(init); |
559 | module_exit(fini); | 552 | module_exit(fini); |
560 | 553 | ||
561 | MODULE_DEVICE_TABLE(virtio, id_table); | 554 | MODULE_DEVICE_TABLE(virtio, id_table); |
562 | MODULE_DESCRIPTION("Virtio block driver"); | 555 | MODULE_DESCRIPTION("Virtio block driver"); |
563 | MODULE_LICENSE("GPL"); | 556 | MODULE_LICENSE("GPL"); |
564 | 557 |
drivers/block/xen-blkfront.c
1 | /* | 1 | /* |
2 | * blkfront.c | 2 | * blkfront.c |
3 | * | 3 | * |
4 | * XenLinux virtual block device driver. | 4 | * XenLinux virtual block device driver. |
5 | * | 5 | * |
6 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | 6 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand |
7 | * Modifications by Mark A. Williamson are (c) Intel Research Cambridge | 7 | * Modifications by Mark A. Williamson are (c) Intel Research Cambridge |
8 | * Copyright (c) 2004, Christian Limpach | 8 | * Copyright (c) 2004, Christian Limpach |
9 | * Copyright (c) 2004, Andrew Warfield | 9 | * Copyright (c) 2004, Andrew Warfield |
10 | * Copyright (c) 2005, Christopher Clark | 10 | * Copyright (c) 2005, Christopher Clark |
11 | * Copyright (c) 2005, XenSource Ltd | 11 | * Copyright (c) 2005, XenSource Ltd |
12 | * | 12 | * |
13 | * This program is free software; you can redistribute it and/or | 13 | * This program is free software; you can redistribute it and/or |
14 | * modify it under the terms of the GNU General Public License version 2 | 14 | * modify it under the terms of the GNU General Public License version 2 |
15 | * as published by the Free Software Foundation; or, when distributed | 15 | * as published by the Free Software Foundation; or, when distributed |
16 | * separately from the Linux kernel or incorporated into other | 16 | * separately from the Linux kernel or incorporated into other |
17 | * software packages, subject to the following license: | 17 | * software packages, subject to the following license: |
18 | * | 18 | * |
19 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
20 | * of this source file (the "Software"), to deal in the Software without | 20 | * of this source file (the "Software"), to deal in the Software without |
21 | * restriction, including without limitation the rights to use, copy, modify, | 21 | * restriction, including without limitation the rights to use, copy, modify, |
22 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 22 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
23 | * and to permit persons to whom the Software is furnished to do so, subject to | 23 | * and to permit persons to whom the Software is furnished to do so, subject to |
24 | * the following conditions: | 24 | * the following conditions: |
25 | * | 25 | * |
26 | * The above copyright notice and this permission notice shall be included in | 26 | * The above copyright notice and this permission notice shall be included in |
27 | * all copies or substantial portions of the Software. | 27 | * all copies or substantial portions of the Software. |
28 | * | 28 | * |
29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
34 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 34 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
35 | * IN THE SOFTWARE. | 35 | * IN THE SOFTWARE. |
36 | */ | 36 | */ |
37 | 37 | ||
38 | #include <linux/interrupt.h> | 38 | #include <linux/interrupt.h> |
39 | #include <linux/blkdev.h> | 39 | #include <linux/blkdev.h> |
40 | #include <linux/hdreg.h> | 40 | #include <linux/hdreg.h> |
41 | #include <linux/cdrom.h> | 41 | #include <linux/cdrom.h> |
42 | #include <linux/module.h> | 42 | #include <linux/module.h> |
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/smp_lock.h> | 44 | #include <linux/smp_lock.h> |
45 | #include <linux/scatterlist.h> | 45 | #include <linux/scatterlist.h> |
46 | 46 | ||
47 | #include <xen/xen.h> | 47 | #include <xen/xen.h> |
48 | #include <xen/xenbus.h> | 48 | #include <xen/xenbus.h> |
49 | #include <xen/grant_table.h> | 49 | #include <xen/grant_table.h> |
50 | #include <xen/events.h> | 50 | #include <xen/events.h> |
51 | #include <xen/page.h> | 51 | #include <xen/page.h> |
52 | #include <xen/platform_pci.h> | 52 | #include <xen/platform_pci.h> |
53 | 53 | ||
54 | #include <xen/interface/grant_table.h> | 54 | #include <xen/interface/grant_table.h> |
55 | #include <xen/interface/io/blkif.h> | 55 | #include <xen/interface/io/blkif.h> |
56 | #include <xen/interface/io/protocols.h> | 56 | #include <xen/interface/io/protocols.h> |
57 | 57 | ||
58 | #include <asm/xen/hypervisor.h> | 58 | #include <asm/xen/hypervisor.h> |
59 | 59 | ||
60 | enum blkif_state { | 60 | enum blkif_state { |
61 | BLKIF_STATE_DISCONNECTED, | 61 | BLKIF_STATE_DISCONNECTED, |
62 | BLKIF_STATE_CONNECTED, | 62 | BLKIF_STATE_CONNECTED, |
63 | BLKIF_STATE_SUSPENDED, | 63 | BLKIF_STATE_SUSPENDED, |
64 | }; | 64 | }; |
65 | 65 | ||
66 | struct blk_shadow { | 66 | struct blk_shadow { |
67 | struct blkif_request req; | 67 | struct blkif_request req; |
68 | unsigned long request; | 68 | unsigned long request; |
69 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 69 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | static const struct block_device_operations xlvbd_block_fops; | 72 | static const struct block_device_operations xlvbd_block_fops; |
73 | 73 | ||
74 | #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) | 74 | #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * We have one of these per vbd, whether ide, scsi or 'other'. They | 77 | * We have one of these per vbd, whether ide, scsi or 'other'. They |
78 | * hang in private_data off the gendisk structure. We may end up | 78 | * hang in private_data off the gendisk structure. We may end up |
79 | * putting all kinds of interesting stuff here :-) | 79 | * putting all kinds of interesting stuff here :-) |
80 | */ | 80 | */ |
81 | struct blkfront_info | 81 | struct blkfront_info |
82 | { | 82 | { |
83 | struct mutex mutex; | 83 | struct mutex mutex; |
84 | struct xenbus_device *xbdev; | 84 | struct xenbus_device *xbdev; |
85 | struct gendisk *gd; | 85 | struct gendisk *gd; |
86 | int vdevice; | 86 | int vdevice; |
87 | blkif_vdev_t handle; | 87 | blkif_vdev_t handle; |
88 | enum blkif_state connected; | 88 | enum blkif_state connected; |
89 | int ring_ref; | 89 | int ring_ref; |
90 | struct blkif_front_ring ring; | 90 | struct blkif_front_ring ring; |
91 | struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 91 | struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
92 | unsigned int evtchn, irq; | 92 | unsigned int evtchn, irq; |
93 | struct request_queue *rq; | 93 | struct request_queue *rq; |
94 | struct work_struct work; | 94 | struct work_struct work; |
95 | struct gnttab_free_callback callback; | 95 | struct gnttab_free_callback callback; |
96 | struct blk_shadow shadow[BLK_RING_SIZE]; | 96 | struct blk_shadow shadow[BLK_RING_SIZE]; |
97 | unsigned long shadow_free; | 97 | unsigned long shadow_free; |
98 | int feature_barrier; | 98 | unsigned int feature_flush; |
99 | int is_ready; | 99 | int is_ready; |
100 | }; | 100 | }; |
101 | 101 | ||
102 | static DEFINE_SPINLOCK(blkif_io_lock); | 102 | static DEFINE_SPINLOCK(blkif_io_lock); |
103 | 103 | ||
104 | static unsigned int nr_minors; | 104 | static unsigned int nr_minors; |
105 | static unsigned long *minors; | 105 | static unsigned long *minors; |
106 | static DEFINE_SPINLOCK(minor_lock); | 106 | static DEFINE_SPINLOCK(minor_lock); |
107 | 107 | ||
108 | #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ | 108 | #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ |
109 | (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) | 109 | (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) |
110 | #define GRANT_INVALID_REF 0 | 110 | #define GRANT_INVALID_REF 0 |
111 | 111 | ||
112 | #define PARTS_PER_DISK 16 | 112 | #define PARTS_PER_DISK 16 |
113 | #define PARTS_PER_EXT_DISK 256 | 113 | #define PARTS_PER_EXT_DISK 256 |
114 | 114 | ||
115 | #define BLKIF_MAJOR(dev) ((dev)>>8) | 115 | #define BLKIF_MAJOR(dev) ((dev)>>8) |
116 | #define BLKIF_MINOR(dev) ((dev) & 0xff) | 116 | #define BLKIF_MINOR(dev) ((dev) & 0xff) |
117 | 117 | ||
118 | #define EXT_SHIFT 28 | 118 | #define EXT_SHIFT 28 |
119 | #define EXTENDED (1<<EXT_SHIFT) | 119 | #define EXTENDED (1<<EXT_SHIFT) |
120 | #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) | 120 | #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) |
121 | #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) | 121 | #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) |
122 | 122 | ||
123 | #define DEV_NAME "xvd" /* name in /dev */ | 123 | #define DEV_NAME "xvd" /* name in /dev */ |
124 | 124 | ||
125 | static int get_id_from_freelist(struct blkfront_info *info) | 125 | static int get_id_from_freelist(struct blkfront_info *info) |
126 | { | 126 | { |
127 | unsigned long free = info->shadow_free; | 127 | unsigned long free = info->shadow_free; |
128 | BUG_ON(free >= BLK_RING_SIZE); | 128 | BUG_ON(free >= BLK_RING_SIZE); |
129 | info->shadow_free = info->shadow[free].req.id; | 129 | info->shadow_free = info->shadow[free].req.id; |
130 | info->shadow[free].req.id = 0x0fffffee; /* debug */ | 130 | info->shadow[free].req.id = 0x0fffffee; /* debug */ |
131 | return free; | 131 | return free; |
132 | } | 132 | } |
133 | 133 | ||
134 | static void add_id_to_freelist(struct blkfront_info *info, | 134 | static void add_id_to_freelist(struct blkfront_info *info, |
135 | unsigned long id) | 135 | unsigned long id) |
136 | { | 136 | { |
137 | info->shadow[id].req.id = info->shadow_free; | 137 | info->shadow[id].req.id = info->shadow_free; |
138 | info->shadow[id].request = 0; | 138 | info->shadow[id].request = 0; |
139 | info->shadow_free = id; | 139 | info->shadow_free = id; |
140 | } | 140 | } |
141 | 141 | ||
142 | static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) | 142 | static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) |
143 | { | 143 | { |
144 | unsigned int end = minor + nr; | 144 | unsigned int end = minor + nr; |
145 | int rc; | 145 | int rc; |
146 | 146 | ||
147 | if (end > nr_minors) { | 147 | if (end > nr_minors) { |
148 | unsigned long *bitmap, *old; | 148 | unsigned long *bitmap, *old; |
149 | 149 | ||
150 | bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), | 150 | bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), |
151 | GFP_KERNEL); | 151 | GFP_KERNEL); |
152 | if (bitmap == NULL) | 152 | if (bitmap == NULL) |
153 | return -ENOMEM; | 153 | return -ENOMEM; |
154 | 154 | ||
155 | spin_lock(&minor_lock); | 155 | spin_lock(&minor_lock); |
156 | if (end > nr_minors) { | 156 | if (end > nr_minors) { |
157 | old = minors; | 157 | old = minors; |
158 | memcpy(bitmap, minors, | 158 | memcpy(bitmap, minors, |
159 | BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); | 159 | BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); |
160 | minors = bitmap; | 160 | minors = bitmap; |
161 | nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; | 161 | nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; |
162 | } else | 162 | } else |
163 | old = bitmap; | 163 | old = bitmap; |
164 | spin_unlock(&minor_lock); | 164 | spin_unlock(&minor_lock); |
165 | kfree(old); | 165 | kfree(old); |
166 | } | 166 | } |
167 | 167 | ||
168 | spin_lock(&minor_lock); | 168 | spin_lock(&minor_lock); |
169 | if (find_next_bit(minors, end, minor) >= end) { | 169 | if (find_next_bit(minors, end, minor) >= end) { |
170 | for (; minor < end; ++minor) | 170 | for (; minor < end; ++minor) |
171 | __set_bit(minor, minors); | 171 | __set_bit(minor, minors); |
172 | rc = 0; | 172 | rc = 0; |
173 | } else | 173 | } else |
174 | rc = -EBUSY; | 174 | rc = -EBUSY; |
175 | spin_unlock(&minor_lock); | 175 | spin_unlock(&minor_lock); |
176 | 176 | ||
177 | return rc; | 177 | return rc; |
178 | } | 178 | } |
179 | 179 | ||
180 | static void xlbd_release_minors(unsigned int minor, unsigned int nr) | 180 | static void xlbd_release_minors(unsigned int minor, unsigned int nr) |
181 | { | 181 | { |
182 | unsigned int end = minor + nr; | 182 | unsigned int end = minor + nr; |
183 | 183 | ||
184 | BUG_ON(end > nr_minors); | 184 | BUG_ON(end > nr_minors); |
185 | spin_lock(&minor_lock); | 185 | spin_lock(&minor_lock); |
186 | for (; minor < end; ++minor) | 186 | for (; minor < end; ++minor) |
187 | __clear_bit(minor, minors); | 187 | __clear_bit(minor, minors); |
188 | spin_unlock(&minor_lock); | 188 | spin_unlock(&minor_lock); |
189 | } | 189 | } |
190 | 190 | ||
191 | static void blkif_restart_queue_callback(void *arg) | 191 | static void blkif_restart_queue_callback(void *arg) |
192 | { | 192 | { |
193 | struct blkfront_info *info = (struct blkfront_info *)arg; | 193 | struct blkfront_info *info = (struct blkfront_info *)arg; |
194 | schedule_work(&info->work); | 194 | schedule_work(&info->work); |
195 | } | 195 | } |
196 | 196 | ||
197 | static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) | 197 | static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) |
198 | { | 198 | { |
199 | /* We don't have real geometry info, but let's at least return | 199 | /* We don't have real geometry info, but let's at least return |
200 | values consistent with the size of the device */ | 200 | values consistent with the size of the device */ |
201 | sector_t nsect = get_capacity(bd->bd_disk); | 201 | sector_t nsect = get_capacity(bd->bd_disk); |
202 | sector_t cylinders = nsect; | 202 | sector_t cylinders = nsect; |
203 | 203 | ||
204 | hg->heads = 0xff; | 204 | hg->heads = 0xff; |
205 | hg->sectors = 0x3f; | 205 | hg->sectors = 0x3f; |
206 | sector_div(cylinders, hg->heads * hg->sectors); | 206 | sector_div(cylinders, hg->heads * hg->sectors); |
207 | hg->cylinders = cylinders; | 207 | hg->cylinders = cylinders; |
208 | if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) | 208 | if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) |
209 | hg->cylinders = 0xffff; | 209 | hg->cylinders = 0xffff; |
210 | return 0; | 210 | return 0; |
211 | } | 211 | } |
212 | 212 | ||
213 | static int blkif_ioctl(struct block_device *bdev, fmode_t mode, | 213 | static int blkif_ioctl(struct block_device *bdev, fmode_t mode, |
214 | unsigned command, unsigned long argument) | 214 | unsigned command, unsigned long argument) |
215 | { | 215 | { |
216 | struct blkfront_info *info = bdev->bd_disk->private_data; | 216 | struct blkfront_info *info = bdev->bd_disk->private_data; |
217 | int i; | 217 | int i; |
218 | 218 | ||
219 | dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", | 219 | dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", |
220 | command, (long)argument); | 220 | command, (long)argument); |
221 | 221 | ||
222 | switch (command) { | 222 | switch (command) { |
223 | case CDROMMULTISESSION: | 223 | case CDROMMULTISESSION: |
224 | dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); | 224 | dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); |
225 | for (i = 0; i < sizeof(struct cdrom_multisession); i++) | 225 | for (i = 0; i < sizeof(struct cdrom_multisession); i++) |
226 | if (put_user(0, (char __user *)(argument + i))) | 226 | if (put_user(0, (char __user *)(argument + i))) |
227 | return -EFAULT; | 227 | return -EFAULT; |
228 | return 0; | 228 | return 0; |
229 | 229 | ||
230 | case CDROM_GET_CAPABILITY: { | 230 | case CDROM_GET_CAPABILITY: { |
231 | struct gendisk *gd = info->gd; | 231 | struct gendisk *gd = info->gd; |
232 | if (gd->flags & GENHD_FL_CD) | 232 | if (gd->flags & GENHD_FL_CD) |
233 | return 0; | 233 | return 0; |
234 | return -EINVAL; | 234 | return -EINVAL; |
235 | } | 235 | } |
236 | 236 | ||
237 | default: | 237 | default: |
238 | /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", | 238 | /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", |
239 | command);*/ | 239 | command);*/ |
240 | return -EINVAL; /* same return as native Linux */ | 240 | return -EINVAL; /* same return as native Linux */ |
241 | } | 241 | } |
242 | 242 | ||
243 | return 0; | 243 | return 0; |
244 | } | 244 | } |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * blkif_queue_request | 247 | * blkif_queue_request |
248 | * | 248 | * |
249 | * request block io | 249 | * request block io |
250 | * | 250 | * |
251 | * id: for guest use only. | 251 | * id: for guest use only. |
252 | * operation: BLKIF_OP_{READ,WRITE,PROBE} | 252 | * operation: BLKIF_OP_{READ,WRITE,PROBE} |
253 | * buffer: buffer to read/write into. this should be a | 253 | * buffer: buffer to read/write into. this should be a |
254 | * virtual address in the guest os. | 254 | * virtual address in the guest os. |
255 | */ | 255 | */ |
256 | static int blkif_queue_request(struct request *req) | 256 | static int blkif_queue_request(struct request *req) |
257 | { | 257 | { |
258 | struct blkfront_info *info = req->rq_disk->private_data; | 258 | struct blkfront_info *info = req->rq_disk->private_data; |
259 | unsigned long buffer_mfn; | 259 | unsigned long buffer_mfn; |
260 | struct blkif_request *ring_req; | 260 | struct blkif_request *ring_req; |
261 | unsigned long id; | 261 | unsigned long id; |
262 | unsigned int fsect, lsect; | 262 | unsigned int fsect, lsect; |
263 | int i, ref; | 263 | int i, ref; |
264 | grant_ref_t gref_head; | 264 | grant_ref_t gref_head; |
265 | struct scatterlist *sg; | 265 | struct scatterlist *sg; |
266 | 266 | ||
267 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) | 267 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) |
268 | return 1; | 268 | return 1; |
269 | 269 | ||
270 | if (gnttab_alloc_grant_references( | 270 | if (gnttab_alloc_grant_references( |
271 | BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { | 271 | BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { |
272 | gnttab_request_free_callback( | 272 | gnttab_request_free_callback( |
273 | &info->callback, | 273 | &info->callback, |
274 | blkif_restart_queue_callback, | 274 | blkif_restart_queue_callback, |
275 | info, | 275 | info, |
276 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | 276 | BLKIF_MAX_SEGMENTS_PER_REQUEST); |
277 | return 1; | 277 | return 1; |
278 | } | 278 | } |
279 | 279 | ||
280 | /* Fill out a communications ring structure. */ | 280 | /* Fill out a communications ring structure. */ |
281 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 281 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); |
282 | id = get_id_from_freelist(info); | 282 | id = get_id_from_freelist(info); |
283 | info->shadow[id].request = (unsigned long)req; | 283 | info->shadow[id].request = (unsigned long)req; |
284 | 284 | ||
285 | ring_req->id = id; | 285 | ring_req->id = id; |
286 | ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); | 286 | ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); |
287 | ring_req->handle = info->handle; | 287 | ring_req->handle = info->handle; |
288 | 288 | ||
289 | ring_req->operation = rq_data_dir(req) ? | 289 | ring_req->operation = rq_data_dir(req) ? |
290 | BLKIF_OP_WRITE : BLKIF_OP_READ; | 290 | BLKIF_OP_WRITE : BLKIF_OP_READ; |
291 | if (req->cmd_flags & REQ_HARDBARRIER) | 291 | if (req->cmd_flags & REQ_HARDBARRIER) |
292 | ring_req->operation = BLKIF_OP_WRITE_BARRIER; | 292 | ring_req->operation = BLKIF_OP_WRITE_BARRIER; |
293 | 293 | ||
294 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); | 294 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); |
295 | BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); | 295 | BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); |
296 | 296 | ||
297 | for_each_sg(info->sg, sg, ring_req->nr_segments, i) { | 297 | for_each_sg(info->sg, sg, ring_req->nr_segments, i) { |
298 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); | 298 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); |
299 | fsect = sg->offset >> 9; | 299 | fsect = sg->offset >> 9; |
300 | lsect = fsect + (sg->length >> 9) - 1; | 300 | lsect = fsect + (sg->length >> 9) - 1; |
301 | /* install a grant reference. */ | 301 | /* install a grant reference. */ |
302 | ref = gnttab_claim_grant_reference(&gref_head); | 302 | ref = gnttab_claim_grant_reference(&gref_head); |
303 | BUG_ON(ref == -ENOSPC); | 303 | BUG_ON(ref == -ENOSPC); |
304 | 304 | ||
305 | gnttab_grant_foreign_access_ref( | 305 | gnttab_grant_foreign_access_ref( |
306 | ref, | 306 | ref, |
307 | info->xbdev->otherend_id, | 307 | info->xbdev->otherend_id, |
308 | buffer_mfn, | 308 | buffer_mfn, |
309 | rq_data_dir(req) ); | 309 | rq_data_dir(req) ); |
310 | 310 | ||
311 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); | 311 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); |
312 | ring_req->seg[i] = | 312 | ring_req->seg[i] = |
313 | (struct blkif_request_segment) { | 313 | (struct blkif_request_segment) { |
314 | .gref = ref, | 314 | .gref = ref, |
315 | .first_sect = fsect, | 315 | .first_sect = fsect, |
316 | .last_sect = lsect }; | 316 | .last_sect = lsect }; |
317 | } | 317 | } |
318 | 318 | ||
319 | info->ring.req_prod_pvt++; | 319 | info->ring.req_prod_pvt++; |
320 | 320 | ||
321 | /* Keep a private copy so we can reissue requests when recovering. */ | 321 | /* Keep a private copy so we can reissue requests when recovering. */ |
322 | info->shadow[id].req = *ring_req; | 322 | info->shadow[id].req = *ring_req; |
323 | 323 | ||
324 | gnttab_free_grant_references(gref_head); | 324 | gnttab_free_grant_references(gref_head); |
325 | 325 | ||
326 | return 0; | 326 | return 0; |
327 | } | 327 | } |
328 | 328 | ||
329 | 329 | ||
330 | static inline void flush_requests(struct blkfront_info *info) | 330 | static inline void flush_requests(struct blkfront_info *info) |
331 | { | 331 | { |
332 | int notify; | 332 | int notify; |
333 | 333 | ||
334 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); | 334 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); |
335 | 335 | ||
336 | if (notify) | 336 | if (notify) |
337 | notify_remote_via_irq(info->irq); | 337 | notify_remote_via_irq(info->irq); |
338 | } | 338 | } |
339 | 339 | ||
340 | /* | 340 | /* |
341 | * do_blkif_request | 341 | * do_blkif_request |
342 | * read a block; request is in a request queue | 342 | * read a block; request is in a request queue |
343 | */ | 343 | */ |
344 | static void do_blkif_request(struct request_queue *rq) | 344 | static void do_blkif_request(struct request_queue *rq) |
345 | { | 345 | { |
346 | struct blkfront_info *info = NULL; | 346 | struct blkfront_info *info = NULL; |
347 | struct request *req; | 347 | struct request *req; |
348 | int queued; | 348 | int queued; |
349 | 349 | ||
350 | pr_debug("Entered do_blkif_request\n"); | 350 | pr_debug("Entered do_blkif_request\n"); |
351 | 351 | ||
352 | queued = 0; | 352 | queued = 0; |
353 | 353 | ||
354 | while ((req = blk_peek_request(rq)) != NULL) { | 354 | while ((req = blk_peek_request(rq)) != NULL) { |
355 | info = req->rq_disk->private_data; | 355 | info = req->rq_disk->private_data; |
356 | 356 | ||
357 | if (RING_FULL(&info->ring)) | 357 | if (RING_FULL(&info->ring)) |
358 | goto wait; | 358 | goto wait; |
359 | 359 | ||
360 | blk_start_request(req); | 360 | blk_start_request(req); |
361 | 361 | ||
362 | if (req->cmd_type != REQ_TYPE_FS) { | 362 | if (req->cmd_type != REQ_TYPE_FS) { |
363 | __blk_end_request_all(req, -EIO); | 363 | __blk_end_request_all(req, -EIO); |
364 | continue; | 364 | continue; |
365 | } | 365 | } |
366 | 366 | ||
367 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " | 367 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " |
368 | "(%u/%u) buffer:%p [%s]\n", | 368 | "(%u/%u) buffer:%p [%s]\n", |
369 | req, req->cmd, (unsigned long)blk_rq_pos(req), | 369 | req, req->cmd, (unsigned long)blk_rq_pos(req), |
370 | blk_rq_cur_sectors(req), blk_rq_sectors(req), | 370 | blk_rq_cur_sectors(req), blk_rq_sectors(req), |
371 | req->buffer, rq_data_dir(req) ? "write" : "read"); | 371 | req->buffer, rq_data_dir(req) ? "write" : "read"); |
372 | 372 | ||
373 | if (blkif_queue_request(req)) { | 373 | if (blkif_queue_request(req)) { |
374 | blk_requeue_request(rq, req); | 374 | blk_requeue_request(rq, req); |
375 | wait: | 375 | wait: |
376 | /* Avoid pointless unplugs. */ | 376 | /* Avoid pointless unplugs. */ |
377 | blk_stop_queue(rq); | 377 | blk_stop_queue(rq); |
378 | break; | 378 | break; |
379 | } | 379 | } |
380 | 380 | ||
381 | queued++; | 381 | queued++; |
382 | } | 382 | } |
383 | 383 | ||
384 | if (queued != 0) | 384 | if (queued != 0) |
385 | flush_requests(info); | 385 | flush_requests(info); |
386 | } | 386 | } |
387 | 387 | ||
388 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | 388 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) |
389 | { | 389 | { |
390 | struct request_queue *rq; | 390 | struct request_queue *rq; |
391 | 391 | ||
392 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); | 392 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); |
393 | if (rq == NULL) | 393 | if (rq == NULL) |
394 | return -1; | 394 | return -1; |
395 | 395 | ||
396 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); | 396 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); |
397 | 397 | ||
398 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ | 398 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ |
399 | blk_queue_logical_block_size(rq, sector_size); | 399 | blk_queue_logical_block_size(rq, sector_size); |
400 | blk_queue_max_hw_sectors(rq, 512); | 400 | blk_queue_max_hw_sectors(rq, 512); |
401 | 401 | ||
402 | /* Each segment in a request is up to an aligned page in size. */ | 402 | /* Each segment in a request is up to an aligned page in size. */ |
403 | blk_queue_segment_boundary(rq, PAGE_SIZE - 1); | 403 | blk_queue_segment_boundary(rq, PAGE_SIZE - 1); |
404 | blk_queue_max_segment_size(rq, PAGE_SIZE); | 404 | blk_queue_max_segment_size(rq, PAGE_SIZE); |
405 | 405 | ||
406 | /* Ensure a merged request will fit in a single I/O ring slot. */ | 406 | /* Ensure a merged request will fit in a single I/O ring slot. */ |
407 | blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); | 407 | blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
408 | 408 | ||
409 | /* Make sure buffer addresses are sector-aligned. */ | 409 | /* Make sure buffer addresses are sector-aligned. */ |
410 | blk_queue_dma_alignment(rq, 511); | 410 | blk_queue_dma_alignment(rq, 511); |
411 | 411 | ||
412 | /* Make sure we don't use bounce buffers. */ | 412 | /* Make sure we don't use bounce buffers. */ |
413 | blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); | 413 | blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); |
414 | 414 | ||
415 | gd->queue = rq; | 415 | gd->queue = rq; |
416 | 416 | ||
417 | return 0; | 417 | return 0; |
418 | } | 418 | } |
419 | 419 | ||
420 | 420 | ||
421 | static int xlvbd_barrier(struct blkfront_info *info) | 421 | static void xlvbd_flush(struct blkfront_info *info) |
422 | { | 422 | { |
423 | int err; | 423 | blk_queue_flush(info->rq, info->feature_flush); |
424 | const char *barrier; | ||
425 | |||
426 | switch (info->feature_barrier) { | ||
427 | case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; | ||
428 | case QUEUE_ORDERED_NONE: barrier = "disabled"; break; | ||
429 | default: return -EINVAL; | ||
430 | } | ||
431 | |||
432 | err = blk_queue_ordered(info->rq, info->feature_barrier); | ||
433 | |||
434 | if (err) | ||
435 | return err; | ||
436 | |||
437 | printk(KERN_INFO "blkfront: %s: barriers %s\n", | 424 | printk(KERN_INFO "blkfront: %s: barriers %s\n", |
438 | info->gd->disk_name, barrier); | 425 | info->gd->disk_name, |
439 | return 0; | 426 | info->feature_flush ? "enabled" : "disabled"); |
440 | } | 427 | } |
441 | 428 | ||
442 | 429 | ||
443 | static int xlvbd_alloc_gendisk(blkif_sector_t capacity, | 430 | static int xlvbd_alloc_gendisk(blkif_sector_t capacity, |
444 | struct blkfront_info *info, | 431 | struct blkfront_info *info, |
445 | u16 vdisk_info, u16 sector_size) | 432 | u16 vdisk_info, u16 sector_size) |
446 | { | 433 | { |
447 | struct gendisk *gd; | 434 | struct gendisk *gd; |
448 | int nr_minors = 1; | 435 | int nr_minors = 1; |
449 | int err = -ENODEV; | 436 | int err = -ENODEV; |
450 | unsigned int offset; | 437 | unsigned int offset; |
451 | int minor; | 438 | int minor; |
452 | int nr_parts; | 439 | int nr_parts; |
453 | 440 | ||
454 | BUG_ON(info->gd != NULL); | 441 | BUG_ON(info->gd != NULL); |
455 | BUG_ON(info->rq != NULL); | 442 | BUG_ON(info->rq != NULL); |
456 | 443 | ||
457 | if ((info->vdevice>>EXT_SHIFT) > 1) { | 444 | if ((info->vdevice>>EXT_SHIFT) > 1) { |
458 | /* this is above the extended range; something is wrong */ | 445 | /* this is above the extended range; something is wrong */ |
459 | printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); | 446 | printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); |
460 | return -ENODEV; | 447 | return -ENODEV; |
461 | } | 448 | } |
462 | 449 | ||
463 | if (!VDEV_IS_EXTENDED(info->vdevice)) { | 450 | if (!VDEV_IS_EXTENDED(info->vdevice)) { |
464 | minor = BLKIF_MINOR(info->vdevice); | 451 | minor = BLKIF_MINOR(info->vdevice); |
465 | nr_parts = PARTS_PER_DISK; | 452 | nr_parts = PARTS_PER_DISK; |
466 | } else { | 453 | } else { |
467 | minor = BLKIF_MINOR_EXT(info->vdevice); | 454 | minor = BLKIF_MINOR_EXT(info->vdevice); |
468 | nr_parts = PARTS_PER_EXT_DISK; | 455 | nr_parts = PARTS_PER_EXT_DISK; |
469 | } | 456 | } |
470 | 457 | ||
471 | if ((minor % nr_parts) == 0) | 458 | if ((minor % nr_parts) == 0) |
472 | nr_minors = nr_parts; | 459 | nr_minors = nr_parts; |
473 | 460 | ||
474 | err = xlbd_reserve_minors(minor, nr_minors); | 461 | err = xlbd_reserve_minors(minor, nr_minors); |
475 | if (err) | 462 | if (err) |
476 | goto out; | 463 | goto out; |
477 | err = -ENODEV; | 464 | err = -ENODEV; |
478 | 465 | ||
479 | gd = alloc_disk(nr_minors); | 466 | gd = alloc_disk(nr_minors); |
480 | if (gd == NULL) | 467 | if (gd == NULL) |
481 | goto release; | 468 | goto release; |
482 | 469 | ||
483 | offset = minor / nr_parts; | 470 | offset = minor / nr_parts; |
484 | 471 | ||
485 | if (nr_minors > 1) { | 472 | if (nr_minors > 1) { |
486 | if (offset < 26) | 473 | if (offset < 26) |
487 | sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); | 474 | sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); |
488 | else | 475 | else |
489 | sprintf(gd->disk_name, "%s%c%c", DEV_NAME, | 476 | sprintf(gd->disk_name, "%s%c%c", DEV_NAME, |
490 | 'a' + ((offset / 26)-1), 'a' + (offset % 26)); | 477 | 'a' + ((offset / 26)-1), 'a' + (offset % 26)); |
491 | } else { | 478 | } else { |
492 | if (offset < 26) | 479 | if (offset < 26) |
493 | sprintf(gd->disk_name, "%s%c%d", DEV_NAME, | 480 | sprintf(gd->disk_name, "%s%c%d", DEV_NAME, |
494 | 'a' + offset, | 481 | 'a' + offset, |
495 | minor & (nr_parts - 1)); | 482 | minor & (nr_parts - 1)); |
496 | else | 483 | else |
497 | sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, | 484 | sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, |
498 | 'a' + ((offset / 26) - 1), | 485 | 'a' + ((offset / 26) - 1), |
499 | 'a' + (offset % 26), | 486 | 'a' + (offset % 26), |
500 | minor & (nr_parts - 1)); | 487 | minor & (nr_parts - 1)); |
501 | } | 488 | } |
502 | 489 | ||
503 | gd->major = XENVBD_MAJOR; | 490 | gd->major = XENVBD_MAJOR; |
504 | gd->first_minor = minor; | 491 | gd->first_minor = minor; |
505 | gd->fops = &xlvbd_block_fops; | 492 | gd->fops = &xlvbd_block_fops; |
506 | gd->private_data = info; | 493 | gd->private_data = info; |
507 | gd->driverfs_dev = &(info->xbdev->dev); | 494 | gd->driverfs_dev = &(info->xbdev->dev); |
508 | set_capacity(gd, capacity); | 495 | set_capacity(gd, capacity); |
509 | 496 | ||
510 | if (xlvbd_init_blk_queue(gd, sector_size)) { | 497 | if (xlvbd_init_blk_queue(gd, sector_size)) { |
511 | del_gendisk(gd); | 498 | del_gendisk(gd); |
512 | goto release; | 499 | goto release; |
513 | } | 500 | } |
514 | 501 | ||
515 | info->rq = gd->queue; | 502 | info->rq = gd->queue; |
516 | info->gd = gd; | 503 | info->gd = gd; |
517 | 504 | ||
518 | xlvbd_barrier(info); | 505 | xlvbd_flush(info); |
519 | 506 | ||
520 | if (vdisk_info & VDISK_READONLY) | 507 | if (vdisk_info & VDISK_READONLY) |
521 | set_disk_ro(gd, 1); | 508 | set_disk_ro(gd, 1); |
522 | 509 | ||
523 | if (vdisk_info & VDISK_REMOVABLE) | 510 | if (vdisk_info & VDISK_REMOVABLE) |
524 | gd->flags |= GENHD_FL_REMOVABLE; | 511 | gd->flags |= GENHD_FL_REMOVABLE; |
525 | 512 | ||
526 | if (vdisk_info & VDISK_CDROM) | 513 | if (vdisk_info & VDISK_CDROM) |
527 | gd->flags |= GENHD_FL_CD; | 514 | gd->flags |= GENHD_FL_CD; |
528 | 515 | ||
529 | return 0; | 516 | return 0; |
530 | 517 | ||
531 | release: | 518 | release: |
532 | xlbd_release_minors(minor, nr_minors); | 519 | xlbd_release_minors(minor, nr_minors); |
533 | out: | 520 | out: |
534 | return err; | 521 | return err; |
535 | } | 522 | } |
536 | 523 | ||
537 | static void xlvbd_release_gendisk(struct blkfront_info *info) | 524 | static void xlvbd_release_gendisk(struct blkfront_info *info) |
538 | { | 525 | { |
539 | unsigned int minor, nr_minors; | 526 | unsigned int minor, nr_minors; |
540 | unsigned long flags; | 527 | unsigned long flags; |
541 | 528 | ||
542 | if (info->rq == NULL) | 529 | if (info->rq == NULL) |
543 | return; | 530 | return; |
544 | 531 | ||
545 | spin_lock_irqsave(&blkif_io_lock, flags); | 532 | spin_lock_irqsave(&blkif_io_lock, flags); |
546 | 533 | ||
547 | /* No more blkif_request(). */ | 534 | /* No more blkif_request(). */ |
548 | blk_stop_queue(info->rq); | 535 | blk_stop_queue(info->rq); |
549 | 536 | ||
550 | /* No more gnttab callback work. */ | 537 | /* No more gnttab callback work. */ |
551 | gnttab_cancel_free_callback(&info->callback); | 538 | gnttab_cancel_free_callback(&info->callback); |
552 | spin_unlock_irqrestore(&blkif_io_lock, flags); | 539 | spin_unlock_irqrestore(&blkif_io_lock, flags); |
553 | 540 | ||
554 | /* Flush gnttab callback work. Must be done with no locks held. */ | 541 | /* Flush gnttab callback work. Must be done with no locks held. */ |
555 | flush_scheduled_work(); | 542 | flush_scheduled_work(); |
556 | 543 | ||
557 | del_gendisk(info->gd); | 544 | del_gendisk(info->gd); |
558 | 545 | ||
559 | minor = info->gd->first_minor; | 546 | minor = info->gd->first_minor; |
560 | nr_minors = info->gd->minors; | 547 | nr_minors = info->gd->minors; |
561 | xlbd_release_minors(minor, nr_minors); | 548 | xlbd_release_minors(minor, nr_minors); |
562 | 549 | ||
563 | blk_cleanup_queue(info->rq); | 550 | blk_cleanup_queue(info->rq); |
564 | info->rq = NULL; | 551 | info->rq = NULL; |
565 | 552 | ||
566 | put_disk(info->gd); | 553 | put_disk(info->gd); |
567 | info->gd = NULL; | 554 | info->gd = NULL; |
568 | } | 555 | } |
569 | 556 | ||
570 | static void kick_pending_request_queues(struct blkfront_info *info) | 557 | static void kick_pending_request_queues(struct blkfront_info *info) |
571 | { | 558 | { |
572 | if (!RING_FULL(&info->ring)) { | 559 | if (!RING_FULL(&info->ring)) { |
573 | /* Re-enable calldowns. */ | 560 | /* Re-enable calldowns. */ |
574 | blk_start_queue(info->rq); | 561 | blk_start_queue(info->rq); |
575 | /* Kick things off immediately. */ | 562 | /* Kick things off immediately. */ |
576 | do_blkif_request(info->rq); | 563 | do_blkif_request(info->rq); |
577 | } | 564 | } |
578 | } | 565 | } |
579 | 566 | ||
580 | static void blkif_restart_queue(struct work_struct *work) | 567 | static void blkif_restart_queue(struct work_struct *work) |
581 | { | 568 | { |
582 | struct blkfront_info *info = container_of(work, struct blkfront_info, work); | 569 | struct blkfront_info *info = container_of(work, struct blkfront_info, work); |
583 | 570 | ||
584 | spin_lock_irq(&blkif_io_lock); | 571 | spin_lock_irq(&blkif_io_lock); |
585 | if (info->connected == BLKIF_STATE_CONNECTED) | 572 | if (info->connected == BLKIF_STATE_CONNECTED) |
586 | kick_pending_request_queues(info); | 573 | kick_pending_request_queues(info); |
587 | spin_unlock_irq(&blkif_io_lock); | 574 | spin_unlock_irq(&blkif_io_lock); |
588 | } | 575 | } |
589 | 576 | ||
590 | static void blkif_free(struct blkfront_info *info, int suspend) | 577 | static void blkif_free(struct blkfront_info *info, int suspend) |
591 | { | 578 | { |
592 | /* Prevent new requests being issued until we fix things up. */ | 579 | /* Prevent new requests being issued until we fix things up. */ |
593 | spin_lock_irq(&blkif_io_lock); | 580 | spin_lock_irq(&blkif_io_lock); |
594 | info->connected = suspend ? | 581 | info->connected = suspend ? |
595 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; | 582 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; |
596 | /* No more blkif_request(). */ | 583 | /* No more blkif_request(). */ |
597 | if (info->rq) | 584 | if (info->rq) |
598 | blk_stop_queue(info->rq); | 585 | blk_stop_queue(info->rq); |
599 | /* No more gnttab callback work. */ | 586 | /* No more gnttab callback work. */ |
600 | gnttab_cancel_free_callback(&info->callback); | 587 | gnttab_cancel_free_callback(&info->callback); |
601 | spin_unlock_irq(&blkif_io_lock); | 588 | spin_unlock_irq(&blkif_io_lock); |
602 | 589 | ||
603 | /* Flush gnttab callback work. Must be done with no locks held. */ | 590 | /* Flush gnttab callback work. Must be done with no locks held. */ |
604 | flush_scheduled_work(); | 591 | flush_scheduled_work(); |
605 | 592 | ||
606 | /* Free resources associated with old device channel. */ | 593 | /* Free resources associated with old device channel. */ |
607 | if (info->ring_ref != GRANT_INVALID_REF) { | 594 | if (info->ring_ref != GRANT_INVALID_REF) { |
608 | gnttab_end_foreign_access(info->ring_ref, 0, | 595 | gnttab_end_foreign_access(info->ring_ref, 0, |
609 | (unsigned long)info->ring.sring); | 596 | (unsigned long)info->ring.sring); |
610 | info->ring_ref = GRANT_INVALID_REF; | 597 | info->ring_ref = GRANT_INVALID_REF; |
611 | info->ring.sring = NULL; | 598 | info->ring.sring = NULL; |
612 | } | 599 | } |
613 | if (info->irq) | 600 | if (info->irq) |
614 | unbind_from_irqhandler(info->irq, info); | 601 | unbind_from_irqhandler(info->irq, info); |
615 | info->evtchn = info->irq = 0; | 602 | info->evtchn = info->irq = 0; |
616 | 603 | ||
617 | } | 604 | } |
618 | 605 | ||
619 | static void blkif_completion(struct blk_shadow *s) | 606 | static void blkif_completion(struct blk_shadow *s) |
620 | { | 607 | { |
621 | int i; | 608 | int i; |
622 | for (i = 0; i < s->req.nr_segments; i++) | 609 | for (i = 0; i < s->req.nr_segments; i++) |
623 | gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); | 610 | gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); |
624 | } | 611 | } |
625 | 612 | ||
626 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) | 613 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) |
627 | { | 614 | { |
628 | struct request *req; | 615 | struct request *req; |
629 | struct blkif_response *bret; | 616 | struct blkif_response *bret; |
630 | RING_IDX i, rp; | 617 | RING_IDX i, rp; |
631 | unsigned long flags; | 618 | unsigned long flags; |
632 | struct blkfront_info *info = (struct blkfront_info *)dev_id; | 619 | struct blkfront_info *info = (struct blkfront_info *)dev_id; |
633 | int error; | 620 | int error; |
634 | 621 | ||
635 | spin_lock_irqsave(&blkif_io_lock, flags); | 622 | spin_lock_irqsave(&blkif_io_lock, flags); |
636 | 623 | ||
637 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { | 624 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { |
638 | spin_unlock_irqrestore(&blkif_io_lock, flags); | 625 | spin_unlock_irqrestore(&blkif_io_lock, flags); |
639 | return IRQ_HANDLED; | 626 | return IRQ_HANDLED; |
640 | } | 627 | } |
641 | 628 | ||
642 | again: | 629 | again: |
643 | rp = info->ring.sring->rsp_prod; | 630 | rp = info->ring.sring->rsp_prod; |
644 | rmb(); /* Ensure we see queued responses up to 'rp'. */ | 631 | rmb(); /* Ensure we see queued responses up to 'rp'. */ |
645 | 632 | ||
646 | for (i = info->ring.rsp_cons; i != rp; i++) { | 633 | for (i = info->ring.rsp_cons; i != rp; i++) { |
647 | unsigned long id; | 634 | unsigned long id; |
648 | 635 | ||
649 | bret = RING_GET_RESPONSE(&info->ring, i); | 636 | bret = RING_GET_RESPONSE(&info->ring, i); |
650 | id = bret->id; | 637 | id = bret->id; |
651 | req = (struct request *)info->shadow[id].request; | 638 | req = (struct request *)info->shadow[id].request; |
652 | 639 | ||
653 | blkif_completion(&info->shadow[id]); | 640 | blkif_completion(&info->shadow[id]); |
654 | 641 | ||
655 | add_id_to_freelist(info, id); | 642 | add_id_to_freelist(info, id); |
656 | 643 | ||
657 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; | 644 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; |
658 | switch (bret->operation) { | 645 | switch (bret->operation) { |
659 | case BLKIF_OP_WRITE_BARRIER: | 646 | case BLKIF_OP_WRITE_BARRIER: |
660 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | 647 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { |
661 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", | 648 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", |
662 | info->gd->disk_name); | 649 | info->gd->disk_name); |
663 | error = -EOPNOTSUPP; | 650 | error = -EOPNOTSUPP; |
664 | info->feature_barrier = QUEUE_ORDERED_NONE; | 651 | info->feature_flush = 0; |
665 | xlvbd_barrier(info); | 652 | xlvbd_flush(info); |
666 | } | 653 | } |
667 | /* fall through */ | 654 | /* fall through */ |
668 | case BLKIF_OP_READ: | 655 | case BLKIF_OP_READ: |
669 | case BLKIF_OP_WRITE: | 656 | case BLKIF_OP_WRITE: |
670 | if (unlikely(bret->status != BLKIF_RSP_OKAY)) | 657 | if (unlikely(bret->status != BLKIF_RSP_OKAY)) |
671 | dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " | 658 | dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " |
672 | "request: %x\n", bret->status); | 659 | "request: %x\n", bret->status); |
673 | 660 | ||
674 | __blk_end_request_all(req, error); | 661 | __blk_end_request_all(req, error); |
675 | break; | 662 | break; |
676 | default: | 663 | default: |
677 | BUG(); | 664 | BUG(); |
678 | } | 665 | } |
679 | } | 666 | } |
680 | 667 | ||
681 | info->ring.rsp_cons = i; | 668 | info->ring.rsp_cons = i; |
682 | 669 | ||
683 | if (i != info->ring.req_prod_pvt) { | 670 | if (i != info->ring.req_prod_pvt) { |
684 | int more_to_do; | 671 | int more_to_do; |
685 | RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); | 672 | RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); |
686 | if (more_to_do) | 673 | if (more_to_do) |
687 | goto again; | 674 | goto again; |
688 | } else | 675 | } else |
689 | info->ring.sring->rsp_event = i + 1; | 676 | info->ring.sring->rsp_event = i + 1; |
690 | 677 | ||
691 | kick_pending_request_queues(info); | 678 | kick_pending_request_queues(info); |
692 | 679 | ||
693 | spin_unlock_irqrestore(&blkif_io_lock, flags); | 680 | spin_unlock_irqrestore(&blkif_io_lock, flags); |
694 | 681 | ||
695 | return IRQ_HANDLED; | 682 | return IRQ_HANDLED; |
696 | } | 683 | } |
697 | 684 | ||
698 | 685 | ||
699 | static int setup_blkring(struct xenbus_device *dev, | 686 | static int setup_blkring(struct xenbus_device *dev, |
700 | struct blkfront_info *info) | 687 | struct blkfront_info *info) |
701 | { | 688 | { |
702 | struct blkif_sring *sring; | 689 | struct blkif_sring *sring; |
703 | int err; | 690 | int err; |
704 | 691 | ||
705 | info->ring_ref = GRANT_INVALID_REF; | 692 | info->ring_ref = GRANT_INVALID_REF; |
706 | 693 | ||
707 | sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); | 694 | sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); |
708 | if (!sring) { | 695 | if (!sring) { |
709 | xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); | 696 | xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); |
710 | return -ENOMEM; | 697 | return -ENOMEM; |
711 | } | 698 | } |
712 | SHARED_RING_INIT(sring); | 699 | SHARED_RING_INIT(sring); |
713 | FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); | 700 | FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); |
714 | 701 | ||
715 | sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); | 702 | sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
716 | 703 | ||
717 | err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); | 704 | err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); |
718 | if (err < 0) { | 705 | if (err < 0) { |
719 | free_page((unsigned long)sring); | 706 | free_page((unsigned long)sring); |
720 | info->ring.sring = NULL; | 707 | info->ring.sring = NULL; |
721 | goto fail; | 708 | goto fail; |
722 | } | 709 | } |
723 | info->ring_ref = err; | 710 | info->ring_ref = err; |
724 | 711 | ||
725 | err = xenbus_alloc_evtchn(dev, &info->evtchn); | 712 | err = xenbus_alloc_evtchn(dev, &info->evtchn); |
726 | if (err) | 713 | if (err) |
727 | goto fail; | 714 | goto fail; |
728 | 715 | ||
729 | err = bind_evtchn_to_irqhandler(info->evtchn, | 716 | err = bind_evtchn_to_irqhandler(info->evtchn, |
730 | blkif_interrupt, | 717 | blkif_interrupt, |
731 | IRQF_SAMPLE_RANDOM, "blkif", info); | 718 | IRQF_SAMPLE_RANDOM, "blkif", info); |
732 | if (err <= 0) { | 719 | if (err <= 0) { |
733 | xenbus_dev_fatal(dev, err, | 720 | xenbus_dev_fatal(dev, err, |
734 | "bind_evtchn_to_irqhandler failed"); | 721 | "bind_evtchn_to_irqhandler failed"); |
735 | goto fail; | 722 | goto fail; |
736 | } | 723 | } |
737 | info->irq = err; | 724 | info->irq = err; |
738 | 725 | ||
739 | return 0; | 726 | return 0; |
740 | fail: | 727 | fail: |
741 | blkif_free(info, 0); | 728 | blkif_free(info, 0); |
742 | return err; | 729 | return err; |
743 | } | 730 | } |
744 | 731 | ||
745 | 732 | ||
746 | /* Common code used when first setting up, and when resuming. */ | 733 | /* Common code used when first setting up, and when resuming. */ |
747 | static int talk_to_blkback(struct xenbus_device *dev, | 734 | static int talk_to_blkback(struct xenbus_device *dev, |
748 | struct blkfront_info *info) | 735 | struct blkfront_info *info) |
749 | { | 736 | { |
750 | const char *message = NULL; | 737 | const char *message = NULL; |
751 | struct xenbus_transaction xbt; | 738 | struct xenbus_transaction xbt; |
752 | int err; | 739 | int err; |
753 | 740 | ||
754 | /* Create shared ring, alloc event channel. */ | 741 | /* Create shared ring, alloc event channel. */ |
755 | err = setup_blkring(dev, info); | 742 | err = setup_blkring(dev, info); |
756 | if (err) | 743 | if (err) |
757 | goto out; | 744 | goto out; |
758 | 745 | ||
759 | again: | 746 | again: |
760 | err = xenbus_transaction_start(&xbt); | 747 | err = xenbus_transaction_start(&xbt); |
761 | if (err) { | 748 | if (err) { |
762 | xenbus_dev_fatal(dev, err, "starting transaction"); | 749 | xenbus_dev_fatal(dev, err, "starting transaction"); |
763 | goto destroy_blkring; | 750 | goto destroy_blkring; |
764 | } | 751 | } |
765 | 752 | ||
766 | err = xenbus_printf(xbt, dev->nodename, | 753 | err = xenbus_printf(xbt, dev->nodename, |
767 | "ring-ref", "%u", info->ring_ref); | 754 | "ring-ref", "%u", info->ring_ref); |
768 | if (err) { | 755 | if (err) { |
769 | message = "writing ring-ref"; | 756 | message = "writing ring-ref"; |
770 | goto abort_transaction; | 757 | goto abort_transaction; |
771 | } | 758 | } |
772 | err = xenbus_printf(xbt, dev->nodename, | 759 | err = xenbus_printf(xbt, dev->nodename, |
773 | "event-channel", "%u", info->evtchn); | 760 | "event-channel", "%u", info->evtchn); |
774 | if (err) { | 761 | if (err) { |
775 | message = "writing event-channel"; | 762 | message = "writing event-channel"; |
776 | goto abort_transaction; | 763 | goto abort_transaction; |
777 | } | 764 | } |
778 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | 765 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", |
779 | XEN_IO_PROTO_ABI_NATIVE); | 766 | XEN_IO_PROTO_ABI_NATIVE); |
780 | if (err) { | 767 | if (err) { |
781 | message = "writing protocol"; | 768 | message = "writing protocol"; |
782 | goto abort_transaction; | 769 | goto abort_transaction; |
783 | } | 770 | } |
784 | 771 | ||
785 | err = xenbus_transaction_end(xbt, 0); | 772 | err = xenbus_transaction_end(xbt, 0); |
786 | if (err) { | 773 | if (err) { |
787 | if (err == -EAGAIN) | 774 | if (err == -EAGAIN) |
788 | goto again; | 775 | goto again; |
789 | xenbus_dev_fatal(dev, err, "completing transaction"); | 776 | xenbus_dev_fatal(dev, err, "completing transaction"); |
790 | goto destroy_blkring; | 777 | goto destroy_blkring; |
791 | } | 778 | } |
792 | 779 | ||
793 | xenbus_switch_state(dev, XenbusStateInitialised); | 780 | xenbus_switch_state(dev, XenbusStateInitialised); |
794 | 781 | ||
795 | return 0; | 782 | return 0; |
796 | 783 | ||
797 | abort_transaction: | 784 | abort_transaction: |
798 | xenbus_transaction_end(xbt, 1); | 785 | xenbus_transaction_end(xbt, 1); |
799 | if (message) | 786 | if (message) |
800 | xenbus_dev_fatal(dev, err, "%s", message); | 787 | xenbus_dev_fatal(dev, err, "%s", message); |
801 | destroy_blkring: | 788 | destroy_blkring: |
802 | blkif_free(info, 0); | 789 | blkif_free(info, 0); |
803 | out: | 790 | out: |
804 | return err; | 791 | return err; |
805 | } | 792 | } |
806 | 793 | ||
807 | /** | 794 | /** |
808 | * Entry point to this code when a new device is created. Allocate the basic | 795 | * Entry point to this code when a new device is created. Allocate the basic |
809 | * structures and the ring buffer for communication with the backend, and | 796 | * structures and the ring buffer for communication with the backend, and |
810 | * inform the backend of the appropriate details for those. Switch to | 797 | * inform the backend of the appropriate details for those. Switch to |
811 | * Initialised state. | 798 | * Initialised state. |
812 | */ | 799 | */ |
813 | static int blkfront_probe(struct xenbus_device *dev, | 800 | static int blkfront_probe(struct xenbus_device *dev, |
814 | const struct xenbus_device_id *id) | 801 | const struct xenbus_device_id *id) |
815 | { | 802 | { |
816 | int err, vdevice, i; | 803 | int err, vdevice, i; |
817 | struct blkfront_info *info; | 804 | struct blkfront_info *info; |
818 | 805 | ||
819 | /* FIXME: Use dynamic device id if this is not set. */ | 806 | /* FIXME: Use dynamic device id if this is not set. */ |
820 | err = xenbus_scanf(XBT_NIL, dev->nodename, | 807 | err = xenbus_scanf(XBT_NIL, dev->nodename, |
821 | "virtual-device", "%i", &vdevice); | 808 | "virtual-device", "%i", &vdevice); |
822 | if (err != 1) { | 809 | if (err != 1) { |
823 | /* go looking in the extended area instead */ | 810 | /* go looking in the extended area instead */ |
824 | err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", | 811 | err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", |
825 | "%i", &vdevice); | 812 | "%i", &vdevice); |
826 | if (err != 1) { | 813 | if (err != 1) { |
827 | xenbus_dev_fatal(dev, err, "reading virtual-device"); | 814 | xenbus_dev_fatal(dev, err, "reading virtual-device"); |
828 | return err; | 815 | return err; |
829 | } | 816 | } |
830 | } | 817 | } |
831 | 818 | ||
832 | if (xen_hvm_domain()) { | 819 | if (xen_hvm_domain()) { |
833 | char *type; | 820 | char *type; |
834 | int len; | 821 | int len; |
835 | /* no unplug has been done: do not hook devices != xen vbds */ | 822 | /* no unplug has been done: do not hook devices != xen vbds */ |
836 | if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { | 823 | if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { |
837 | int major; | 824 | int major; |
838 | 825 | ||
839 | if (!VDEV_IS_EXTENDED(vdevice)) | 826 | if (!VDEV_IS_EXTENDED(vdevice)) |
840 | major = BLKIF_MAJOR(vdevice); | 827 | major = BLKIF_MAJOR(vdevice); |
841 | else | 828 | else |
842 | major = XENVBD_MAJOR; | 829 | major = XENVBD_MAJOR; |
843 | 830 | ||
844 | if (major != XENVBD_MAJOR) { | 831 | if (major != XENVBD_MAJOR) { |
845 | printk(KERN_INFO | 832 | printk(KERN_INFO |
846 | "%s: HVM does not support vbd %d as xen block device\n", | 833 | "%s: HVM does not support vbd %d as xen block device\n", |
847 | __FUNCTION__, vdevice); | 834 | __FUNCTION__, vdevice); |
848 | return -ENODEV; | 835 | return -ENODEV; |
849 | } | 836 | } |
850 | } | 837 | } |
851 | /* do not create a PV cdrom device if we are an HVM guest */ | 838 | /* do not create a PV cdrom device if we are an HVM guest */ |
852 | type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); | 839 | type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); |
853 | if (IS_ERR(type)) | 840 | if (IS_ERR(type)) |
854 | return -ENODEV; | 841 | return -ENODEV; |
855 | if (strncmp(type, "cdrom", 5) == 0) { | 842 | if (strncmp(type, "cdrom", 5) == 0) { |
856 | kfree(type); | 843 | kfree(type); |
857 | return -ENODEV; | 844 | return -ENODEV; |
858 | } | 845 | } |
859 | kfree(type); | 846 | kfree(type); |
860 | } | 847 | } |
861 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 848 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
862 | if (!info) { | 849 | if (!info) { |
863 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | 850 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); |
864 | return -ENOMEM; | 851 | return -ENOMEM; |
865 | } | 852 | } |
866 | 853 | ||
867 | mutex_init(&info->mutex); | 854 | mutex_init(&info->mutex); |
868 | info->xbdev = dev; | 855 | info->xbdev = dev; |
869 | info->vdevice = vdevice; | 856 | info->vdevice = vdevice; |
870 | info->connected = BLKIF_STATE_DISCONNECTED; | 857 | info->connected = BLKIF_STATE_DISCONNECTED; |
871 | INIT_WORK(&info->work, blkif_restart_queue); | 858 | INIT_WORK(&info->work, blkif_restart_queue); |
872 | 859 | ||
873 | for (i = 0; i < BLK_RING_SIZE; i++) | 860 | for (i = 0; i < BLK_RING_SIZE; i++) |
874 | info->shadow[i].req.id = i+1; | 861 | info->shadow[i].req.id = i+1; |
875 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | 862 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; |
876 | 863 | ||
877 | /* Front end dir is a number, which is used as the id. */ | 864 | /* Front end dir is a number, which is used as the id. */ |
878 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); | 865 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); |
879 | dev_set_drvdata(&dev->dev, info); | 866 | dev_set_drvdata(&dev->dev, info); |
880 | 867 | ||
881 | err = talk_to_blkback(dev, info); | 868 | err = talk_to_blkback(dev, info); |
882 | if (err) { | 869 | if (err) { |
883 | kfree(info); | 870 | kfree(info); |
884 | dev_set_drvdata(&dev->dev, NULL); | 871 | dev_set_drvdata(&dev->dev, NULL); |
885 | return err; | 872 | return err; |
886 | } | 873 | } |
887 | 874 | ||
888 | return 0; | 875 | return 0; |
889 | } | 876 | } |
890 | 877 | ||
891 | 878 | ||
892 | static int blkif_recover(struct blkfront_info *info) | 879 | static int blkif_recover(struct blkfront_info *info) |
893 | { | 880 | { |
894 | int i; | 881 | int i; |
895 | struct blkif_request *req; | 882 | struct blkif_request *req; |
896 | struct blk_shadow *copy; | 883 | struct blk_shadow *copy; |
897 | int j; | 884 | int j; |
898 | 885 | ||
899 | /* Stage 1: Make a safe copy of the shadow state. */ | 886 | /* Stage 1: Make a safe copy of the shadow state. */ |
900 | copy = kmalloc(sizeof(info->shadow), | 887 | copy = kmalloc(sizeof(info->shadow), |
901 | GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); | 888 | GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); |
902 | if (!copy) | 889 | if (!copy) |
903 | return -ENOMEM; | 890 | return -ENOMEM; |
904 | memcpy(copy, info->shadow, sizeof(info->shadow)); | 891 | memcpy(copy, info->shadow, sizeof(info->shadow)); |
905 | 892 | ||
906 | /* Stage 2: Set up free list. */ | 893 | /* Stage 2: Set up free list. */ |
907 | memset(&info->shadow, 0, sizeof(info->shadow)); | 894 | memset(&info->shadow, 0, sizeof(info->shadow)); |
908 | for (i = 0; i < BLK_RING_SIZE; i++) | 895 | for (i = 0; i < BLK_RING_SIZE; i++) |
909 | info->shadow[i].req.id = i+1; | 896 | info->shadow[i].req.id = i+1; |
910 | info->shadow_free = info->ring.req_prod_pvt; | 897 | info->shadow_free = info->ring.req_prod_pvt; |
911 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | 898 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; |
912 | 899 | ||
913 | /* Stage 3: Find pending requests and requeue them. */ | 900 | /* Stage 3: Find pending requests and requeue them. */ |
914 | for (i = 0; i < BLK_RING_SIZE; i++) { | 901 | for (i = 0; i < BLK_RING_SIZE; i++) { |
915 | /* Not in use? */ | 902 | /* Not in use? */ |
916 | if (copy[i].request == 0) | 903 | if (copy[i].request == 0) |
917 | continue; | 904 | continue; |
918 | 905 | ||
919 | /* Grab a request slot and copy shadow state into it. */ | 906 | /* Grab a request slot and copy shadow state into it. */ |
920 | req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 907 | req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); |
921 | *req = copy[i].req; | 908 | *req = copy[i].req; |
922 | 909 | ||
923 | /* We get a new request id, and must reset the shadow state. */ | 910 | /* We get a new request id, and must reset the shadow state. */ |
924 | req->id = get_id_from_freelist(info); | 911 | req->id = get_id_from_freelist(info); |
925 | memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); | 912 | memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); |
926 | 913 | ||
927 | /* Rewrite any grant references invalidated by susp/resume. */ | 914 | /* Rewrite any grant references invalidated by susp/resume. */ |
928 | for (j = 0; j < req->nr_segments; j++) | 915 | for (j = 0; j < req->nr_segments; j++) |
929 | gnttab_grant_foreign_access_ref( | 916 | gnttab_grant_foreign_access_ref( |
930 | req->seg[j].gref, | 917 | req->seg[j].gref, |
931 | info->xbdev->otherend_id, | 918 | info->xbdev->otherend_id, |
932 | pfn_to_mfn(info->shadow[req->id].frame[j]), | 919 | pfn_to_mfn(info->shadow[req->id].frame[j]), |
933 | rq_data_dir( | 920 | rq_data_dir( |
934 | (struct request *) | 921 | (struct request *) |
935 | info->shadow[req->id].request)); | 922 | info->shadow[req->id].request)); |
936 | info->shadow[req->id].req = *req; | 923 | info->shadow[req->id].req = *req; |
937 | 924 | ||
938 | info->ring.req_prod_pvt++; | 925 | info->ring.req_prod_pvt++; |
939 | } | 926 | } |
940 | 927 | ||
941 | kfree(copy); | 928 | kfree(copy); |
942 | 929 | ||
943 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | 930 | xenbus_switch_state(info->xbdev, XenbusStateConnected); |
944 | 931 | ||
945 | spin_lock_irq(&blkif_io_lock); | 932 | spin_lock_irq(&blkif_io_lock); |
946 | 933 | ||
947 | /* Now safe for us to use the shared ring */ | 934 | /* Now safe for us to use the shared ring */ |
948 | info->connected = BLKIF_STATE_CONNECTED; | 935 | info->connected = BLKIF_STATE_CONNECTED; |
949 | 936 | ||
950 | /* Send off requeued requests */ | 937 | /* Send off requeued requests */ |
951 | flush_requests(info); | 938 | flush_requests(info); |
952 | 939 | ||
953 | /* Kick any other new requests queued since we resumed */ | 940 | /* Kick any other new requests queued since we resumed */ |
954 | kick_pending_request_queues(info); | 941 | kick_pending_request_queues(info); |
955 | 942 | ||
956 | spin_unlock_irq(&blkif_io_lock); | 943 | spin_unlock_irq(&blkif_io_lock); |
957 | 944 | ||
958 | return 0; | 945 | return 0; |
959 | } | 946 | } |
960 | 947 | ||
961 | /** | 948 | /** |
962 | * We are reconnecting to the backend, due to a suspend/resume, or a backend | 949 | * We are reconnecting to the backend, due to a suspend/resume, or a backend |
963 | * driver restart. We tear down our blkif structure and recreate it, but | 950 | * driver restart. We tear down our blkif structure and recreate it, but |
964 | * leave the device-layer structures intact so that this is transparent to the | 951 | * leave the device-layer structures intact so that this is transparent to the |
965 | * rest of the kernel. | 952 | * rest of the kernel. |
966 | */ | 953 | */ |
967 | static int blkfront_resume(struct xenbus_device *dev) | 954 | static int blkfront_resume(struct xenbus_device *dev) |
968 | { | 955 | { |
969 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); | 956 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); |
970 | int err; | 957 | int err; |
971 | 958 | ||
972 | dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); | 959 | dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); |
973 | 960 | ||
974 | blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); | 961 | blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); |
975 | 962 | ||
976 | err = talk_to_blkback(dev, info); | 963 | err = talk_to_blkback(dev, info); |
977 | if (info->connected == BLKIF_STATE_SUSPENDED && !err) | 964 | if (info->connected == BLKIF_STATE_SUSPENDED && !err) |
978 | err = blkif_recover(info); | 965 | err = blkif_recover(info); |
979 | 966 | ||
980 | return err; | 967 | return err; |
981 | } | 968 | } |
982 | 969 | ||
983 | static void | 970 | static void |
984 | blkfront_closing(struct blkfront_info *info) | 971 | blkfront_closing(struct blkfront_info *info) |
985 | { | 972 | { |
986 | struct xenbus_device *xbdev = info->xbdev; | 973 | struct xenbus_device *xbdev = info->xbdev; |
987 | struct block_device *bdev = NULL; | 974 | struct block_device *bdev = NULL; |
988 | 975 | ||
989 | mutex_lock(&info->mutex); | 976 | mutex_lock(&info->mutex); |
990 | 977 | ||
991 | if (xbdev->state == XenbusStateClosing) { | 978 | if (xbdev->state == XenbusStateClosing) { |
992 | mutex_unlock(&info->mutex); | 979 | mutex_unlock(&info->mutex); |
993 | return; | 980 | return; |
994 | } | 981 | } |
995 | 982 | ||
996 | if (info->gd) | 983 | if (info->gd) |
997 | bdev = bdget_disk(info->gd, 0); | 984 | bdev = bdget_disk(info->gd, 0); |
998 | 985 | ||
999 | mutex_unlock(&info->mutex); | 986 | mutex_unlock(&info->mutex); |
1000 | 987 | ||
1001 | if (!bdev) { | 988 | if (!bdev) { |
1002 | xenbus_frontend_closed(xbdev); | 989 | xenbus_frontend_closed(xbdev); |
1003 | return; | 990 | return; |
1004 | } | 991 | } |
1005 | 992 | ||
1006 | mutex_lock(&bdev->bd_mutex); | 993 | mutex_lock(&bdev->bd_mutex); |
1007 | 994 | ||
1008 | if (bdev->bd_openers) { | 995 | if (bdev->bd_openers) { |
1009 | xenbus_dev_error(xbdev, -EBUSY, | 996 | xenbus_dev_error(xbdev, -EBUSY, |
1010 | "Device in use; refusing to close"); | 997 | "Device in use; refusing to close"); |
1011 | xenbus_switch_state(xbdev, XenbusStateClosing); | 998 | xenbus_switch_state(xbdev, XenbusStateClosing); |
1012 | } else { | 999 | } else { |
1013 | xlvbd_release_gendisk(info); | 1000 | xlvbd_release_gendisk(info); |
1014 | xenbus_frontend_closed(xbdev); | 1001 | xenbus_frontend_closed(xbdev); |
1015 | } | 1002 | } |
1016 | 1003 | ||
1017 | mutex_unlock(&bdev->bd_mutex); | 1004 | mutex_unlock(&bdev->bd_mutex); |
1018 | bdput(bdev); | 1005 | bdput(bdev); |
1019 | } | 1006 | } |
1020 | 1007 | ||
1021 | /* | 1008 | /* |
1022 | * Invoked when the backend is finally 'ready' (and has told produced | 1009 | * Invoked when the backend is finally 'ready' (and has told produced |
1023 | * the details about the physical device - #sectors, size, etc). | 1010 | * the details about the physical device - #sectors, size, etc). |
1024 | */ | 1011 | */ |
1025 | static void blkfront_connect(struct blkfront_info *info) | 1012 | static void blkfront_connect(struct blkfront_info *info) |
1026 | { | 1013 | { |
1027 | unsigned long long sectors; | 1014 | unsigned long long sectors; |
1028 | unsigned long sector_size; | 1015 | unsigned long sector_size; |
1029 | unsigned int binfo; | 1016 | unsigned int binfo; |
1030 | int err; | 1017 | int err; |
1031 | int barrier; | 1018 | int barrier; |
1032 | 1019 | ||
1033 | switch (info->connected) { | 1020 | switch (info->connected) { |
1034 | case BLKIF_STATE_CONNECTED: | 1021 | case BLKIF_STATE_CONNECTED: |
1035 | /* | 1022 | /* |
1036 | * Potentially, the back-end may be signalling | 1023 | * Potentially, the back-end may be signalling |
1037 | * a capacity change; update the capacity. | 1024 | * a capacity change; update the capacity. |
1038 | */ | 1025 | */ |
1039 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, | 1026 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, |
1040 | "sectors", "%Lu", §ors); | 1027 | "sectors", "%Lu", §ors); |
1041 | if (XENBUS_EXIST_ERR(err)) | 1028 | if (XENBUS_EXIST_ERR(err)) |
1042 | return; | 1029 | return; |
1043 | printk(KERN_INFO "Setting capacity to %Lu\n", | 1030 | printk(KERN_INFO "Setting capacity to %Lu\n", |
1044 | sectors); | 1031 | sectors); |
1045 | set_capacity(info->gd, sectors); | 1032 | set_capacity(info->gd, sectors); |
1046 | revalidate_disk(info->gd); | 1033 | revalidate_disk(info->gd); |
1047 | 1034 | ||
1048 | /* fall through */ | 1035 | /* fall through */ |
1049 | case BLKIF_STATE_SUSPENDED: | 1036 | case BLKIF_STATE_SUSPENDED: |
1050 | return; | 1037 | return; |
1051 | 1038 | ||
1052 | default: | 1039 | default: |
1053 | break; | 1040 | break; |
1054 | } | 1041 | } |
1055 | 1042 | ||
1056 | dev_dbg(&info->xbdev->dev, "%s:%s.\n", | 1043 | dev_dbg(&info->xbdev->dev, "%s:%s.\n", |
1057 | __func__, info->xbdev->otherend); | 1044 | __func__, info->xbdev->otherend); |
1058 | 1045 | ||
1059 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1046 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1060 | "sectors", "%llu", §ors, | 1047 | "sectors", "%llu", §ors, |
1061 | "info", "%u", &binfo, | 1048 | "info", "%u", &binfo, |
1062 | "sector-size", "%lu", §or_size, | 1049 | "sector-size", "%lu", §or_size, |
1063 | NULL); | 1050 | NULL); |
1064 | if (err) { | 1051 | if (err) { |
1065 | xenbus_dev_fatal(info->xbdev, err, | 1052 | xenbus_dev_fatal(info->xbdev, err, |
1066 | "reading backend fields at %s", | 1053 | "reading backend fields at %s", |
1067 | info->xbdev->otherend); | 1054 | info->xbdev->otherend); |
1068 | return; | 1055 | return; |
1069 | } | 1056 | } |
1070 | 1057 | ||
1071 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1058 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1072 | "feature-barrier", "%lu", &barrier, | 1059 | "feature-barrier", "%lu", &barrier, |
1073 | NULL); | 1060 | NULL); |
1074 | 1061 | ||
1075 | /* | 1062 | /* |
1076 | * If there's no "feature-barrier" defined, then it means | 1063 | * If there's no "feature-barrier" defined, then it means |
1077 | * we're dealing with a very old backend which writes | 1064 | * we're dealing with a very old backend which writes |
1078 | * synchronously; draining will do what needs to get done. | 1065 | * synchronously; nothing to do. |
1079 | * | 1066 | * |
1080 | * If there are barriers, then we use flush. | 1067 | * If there are barriers, then we use flush. |
1081 | * | ||
1082 | * If barriers are not supported, then there's no much we can | ||
1083 | * do, so just set ordering to NONE. | ||
1084 | */ | 1068 | */ |
1085 | if (err) | 1069 | info->feature_flush = 0; |
1086 | info->feature_barrier = QUEUE_ORDERED_DRAIN; | 1070 | if (!err && barrier) |
1087 | else if (barrier) | 1071 | info->feature_flush = REQ_FLUSH; |
1088 | info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; | ||
1089 | else | ||
1090 | info->feature_barrier = QUEUE_ORDERED_NONE; | ||
1091 | 1072 | ||
1092 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); | 1073 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
1093 | if (err) { | 1074 | if (err) { |
1094 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | 1075 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", |
1095 | info->xbdev->otherend); | 1076 | info->xbdev->otherend); |
1096 | return; | 1077 | return; |
1097 | } | 1078 | } |
1098 | 1079 | ||
1099 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | 1080 | xenbus_switch_state(info->xbdev, XenbusStateConnected); |
1100 | 1081 | ||
1101 | /* Kick pending requests. */ | 1082 | /* Kick pending requests. */ |
1102 | spin_lock_irq(&blkif_io_lock); | 1083 | spin_lock_irq(&blkif_io_lock); |
1103 | info->connected = BLKIF_STATE_CONNECTED; | 1084 | info->connected = BLKIF_STATE_CONNECTED; |
1104 | kick_pending_request_queues(info); | 1085 | kick_pending_request_queues(info); |
1105 | spin_unlock_irq(&blkif_io_lock); | 1086 | spin_unlock_irq(&blkif_io_lock); |
1106 | 1087 | ||
1107 | add_disk(info->gd); | 1088 | add_disk(info->gd); |
1108 | 1089 | ||
1109 | info->is_ready = 1; | 1090 | info->is_ready = 1; |
1110 | } | 1091 | } |
1111 | 1092 | ||
1112 | /** | 1093 | /** |
1113 | * Callback received when the backend's state changes. | 1094 | * Callback received when the backend's state changes. |
1114 | */ | 1095 | */ |
1115 | static void blkback_changed(struct xenbus_device *dev, | 1096 | static void blkback_changed(struct xenbus_device *dev, |
1116 | enum xenbus_state backend_state) | 1097 | enum xenbus_state backend_state) |
1117 | { | 1098 | { |
1118 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); | 1099 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); |
1119 | 1100 | ||
1120 | dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); | 1101 | dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); |
1121 | 1102 | ||
1122 | switch (backend_state) { | 1103 | switch (backend_state) { |
1123 | case XenbusStateInitialising: | 1104 | case XenbusStateInitialising: |
1124 | case XenbusStateInitWait: | 1105 | case XenbusStateInitWait: |
1125 | case XenbusStateInitialised: | 1106 | case XenbusStateInitialised: |
1126 | case XenbusStateUnknown: | 1107 | case XenbusStateUnknown: |
1127 | case XenbusStateClosed: | 1108 | case XenbusStateClosed: |
1128 | break; | 1109 | break; |
1129 | 1110 | ||
1130 | case XenbusStateConnected: | 1111 | case XenbusStateConnected: |
1131 | blkfront_connect(info); | 1112 | blkfront_connect(info); |
1132 | break; | 1113 | break; |
1133 | 1114 | ||
1134 | case XenbusStateClosing: | 1115 | case XenbusStateClosing: |
1135 | blkfront_closing(info); | 1116 | blkfront_closing(info); |
1136 | break; | 1117 | break; |
1137 | } | 1118 | } |
1138 | } | 1119 | } |
1139 | 1120 | ||
1140 | static int blkfront_remove(struct xenbus_device *xbdev) | 1121 | static int blkfront_remove(struct xenbus_device *xbdev) |
1141 | { | 1122 | { |
1142 | struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); | 1123 | struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); |
1143 | struct block_device *bdev = NULL; | 1124 | struct block_device *bdev = NULL; |
1144 | struct gendisk *disk; | 1125 | struct gendisk *disk; |
1145 | 1126 | ||
1146 | dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); | 1127 | dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); |
1147 | 1128 | ||
1148 | blkif_free(info, 0); | 1129 | blkif_free(info, 0); |
1149 | 1130 | ||
1150 | mutex_lock(&info->mutex); | 1131 | mutex_lock(&info->mutex); |
1151 | 1132 | ||
1152 | disk = info->gd; | 1133 | disk = info->gd; |
1153 | if (disk) | 1134 | if (disk) |
1154 | bdev = bdget_disk(disk, 0); | 1135 | bdev = bdget_disk(disk, 0); |
1155 | 1136 | ||
1156 | info->xbdev = NULL; | 1137 | info->xbdev = NULL; |
1157 | mutex_unlock(&info->mutex); | 1138 | mutex_unlock(&info->mutex); |
1158 | 1139 | ||
1159 | if (!bdev) { | 1140 | if (!bdev) { |
1160 | kfree(info); | 1141 | kfree(info); |
1161 | return 0; | 1142 | return 0; |
1162 | } | 1143 | } |
1163 | 1144 | ||
1164 | /* | 1145 | /* |
1165 | * The xbdev was removed before we reached the Closed | 1146 | * The xbdev was removed before we reached the Closed |
1166 | * state. See if it's safe to remove the disk. If the bdev | 1147 | * state. See if it's safe to remove the disk. If the bdev |
1167 | * isn't closed yet, we let release take care of it. | 1148 | * isn't closed yet, we let release take care of it. |
1168 | */ | 1149 | */ |
1169 | 1150 | ||
1170 | mutex_lock(&bdev->bd_mutex); | 1151 | mutex_lock(&bdev->bd_mutex); |
1171 | info = disk->private_data; | 1152 | info = disk->private_data; |
1172 | 1153 | ||
1173 | dev_warn(disk_to_dev(disk), | 1154 | dev_warn(disk_to_dev(disk), |
1174 | "%s was hot-unplugged, %d stale handles\n", | 1155 | "%s was hot-unplugged, %d stale handles\n", |
1175 | xbdev->nodename, bdev->bd_openers); | 1156 | xbdev->nodename, bdev->bd_openers); |
1176 | 1157 | ||
1177 | if (info && !bdev->bd_openers) { | 1158 | if (info && !bdev->bd_openers) { |
1178 | xlvbd_release_gendisk(info); | 1159 | xlvbd_release_gendisk(info); |
1179 | disk->private_data = NULL; | 1160 | disk->private_data = NULL; |
1180 | kfree(info); | 1161 | kfree(info); |
1181 | } | 1162 | } |
1182 | 1163 | ||
1183 | mutex_unlock(&bdev->bd_mutex); | 1164 | mutex_unlock(&bdev->bd_mutex); |
1184 | bdput(bdev); | 1165 | bdput(bdev); |
1185 | 1166 | ||
1186 | return 0; | 1167 | return 0; |
1187 | } | 1168 | } |
1188 | 1169 | ||
1189 | static int blkfront_is_ready(struct xenbus_device *dev) | 1170 | static int blkfront_is_ready(struct xenbus_device *dev) |
1190 | { | 1171 | { |
1191 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); | 1172 | struct blkfront_info *info = dev_get_drvdata(&dev->dev); |
1192 | 1173 | ||
1193 | return info->is_ready && info->xbdev; | 1174 | return info->is_ready && info->xbdev; |
1194 | } | 1175 | } |
1195 | 1176 | ||
1196 | static int blkif_open(struct block_device *bdev, fmode_t mode) | 1177 | static int blkif_open(struct block_device *bdev, fmode_t mode) |
1197 | { | 1178 | { |
1198 | struct gendisk *disk = bdev->bd_disk; | 1179 | struct gendisk *disk = bdev->bd_disk; |
1199 | struct blkfront_info *info; | 1180 | struct blkfront_info *info; |
1200 | int err = 0; | 1181 | int err = 0; |
1201 | 1182 | ||
1202 | lock_kernel(); | 1183 | lock_kernel(); |
1203 | 1184 | ||
1204 | info = disk->private_data; | 1185 | info = disk->private_data; |
1205 | if (!info) { | 1186 | if (!info) { |
1206 | /* xbdev gone */ | 1187 | /* xbdev gone */ |
1207 | err = -ERESTARTSYS; | 1188 | err = -ERESTARTSYS; |
1208 | goto out; | 1189 | goto out; |
1209 | } | 1190 | } |
1210 | 1191 | ||
1211 | mutex_lock(&info->mutex); | 1192 | mutex_lock(&info->mutex); |
1212 | 1193 | ||
1213 | if (!info->gd) | 1194 | if (!info->gd) |
1214 | /* xbdev is closed */ | 1195 | /* xbdev is closed */ |
1215 | err = -ERESTARTSYS; | 1196 | err = -ERESTARTSYS; |
1216 | 1197 | ||
1217 | mutex_unlock(&info->mutex); | 1198 | mutex_unlock(&info->mutex); |
1218 | 1199 | ||
1219 | out: | 1200 | out: |
1220 | unlock_kernel(); | 1201 | unlock_kernel(); |
1221 | return err; | 1202 | return err; |
1222 | } | 1203 | } |
1223 | 1204 | ||
1224 | static int blkif_release(struct gendisk *disk, fmode_t mode) | 1205 | static int blkif_release(struct gendisk *disk, fmode_t mode) |
1225 | { | 1206 | { |
1226 | struct blkfront_info *info = disk->private_data; | 1207 | struct blkfront_info *info = disk->private_data; |
1227 | struct block_device *bdev; | 1208 | struct block_device *bdev; |
1228 | struct xenbus_device *xbdev; | 1209 | struct xenbus_device *xbdev; |
1229 | 1210 | ||
1230 | lock_kernel(); | 1211 | lock_kernel(); |
1231 | 1212 | ||
1232 | bdev = bdget_disk(disk, 0); | 1213 | bdev = bdget_disk(disk, 0); |
1233 | bdput(bdev); | 1214 | bdput(bdev); |
1234 | 1215 | ||
1235 | if (bdev->bd_openers) | 1216 | if (bdev->bd_openers) |
1236 | goto out; | 1217 | goto out; |
1237 | 1218 | ||
1238 | /* | 1219 | /* |
1239 | * Check if we have been instructed to close. We will have | 1220 | * Check if we have been instructed to close. We will have |
1240 | * deferred this request, because the bdev was still open. | 1221 | * deferred this request, because the bdev was still open. |
1241 | */ | 1222 | */ |
1242 | 1223 | ||
1243 | mutex_lock(&info->mutex); | 1224 | mutex_lock(&info->mutex); |
1244 | xbdev = info->xbdev; | 1225 | xbdev = info->xbdev; |
1245 | 1226 | ||
1246 | if (xbdev && xbdev->state == XenbusStateClosing) { | 1227 | if (xbdev && xbdev->state == XenbusStateClosing) { |
1247 | /* pending switch to state closed */ | 1228 | /* pending switch to state closed */ |
1248 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); | 1229 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); |
1249 | xlvbd_release_gendisk(info); | 1230 | xlvbd_release_gendisk(info); |
1250 | xenbus_frontend_closed(info->xbdev); | 1231 | xenbus_frontend_closed(info->xbdev); |
1251 | } | 1232 | } |
1252 | 1233 | ||
1253 | mutex_unlock(&info->mutex); | 1234 | mutex_unlock(&info->mutex); |
1254 | 1235 | ||
1255 | if (!xbdev) { | 1236 | if (!xbdev) { |
1256 | /* sudden device removal */ | 1237 | /* sudden device removal */ |
1257 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); | 1238 | dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); |
1258 | xlvbd_release_gendisk(info); | 1239 | xlvbd_release_gendisk(info); |
1259 | disk->private_data = NULL; | 1240 | disk->private_data = NULL; |
1260 | kfree(info); | 1241 | kfree(info); |
1261 | } | 1242 | } |
1262 | 1243 | ||
1263 | out: | 1244 | out: |
1264 | unlock_kernel(); | 1245 | unlock_kernel(); |
1265 | return 0; | 1246 | return 0; |
1266 | } | 1247 | } |
1267 | 1248 | ||
1268 | static const struct block_device_operations xlvbd_block_fops = | 1249 | static const struct block_device_operations xlvbd_block_fops = |
1269 | { | 1250 | { |
1270 | .owner = THIS_MODULE, | 1251 | .owner = THIS_MODULE, |
1271 | .open = blkif_open, | 1252 | .open = blkif_open, |
1272 | .release = blkif_release, | 1253 | .release = blkif_release, |
1273 | .getgeo = blkif_getgeo, | 1254 | .getgeo = blkif_getgeo, |
1274 | .ioctl = blkif_ioctl, | 1255 | .ioctl = blkif_ioctl, |
1275 | }; | 1256 | }; |
1276 | 1257 | ||
1277 | 1258 | ||
1278 | static const struct xenbus_device_id blkfront_ids[] = { | 1259 | static const struct xenbus_device_id blkfront_ids[] = { |
1279 | { "vbd" }, | 1260 | { "vbd" }, |
1280 | { "" } | 1261 | { "" } |
1281 | }; | 1262 | }; |
1282 | 1263 | ||
1283 | static struct xenbus_driver blkfront = { | 1264 | static struct xenbus_driver blkfront = { |
1284 | .name = "vbd", | 1265 | .name = "vbd", |
1285 | .owner = THIS_MODULE, | 1266 | .owner = THIS_MODULE, |
1286 | .ids = blkfront_ids, | 1267 | .ids = blkfront_ids, |
1287 | .probe = blkfront_probe, | 1268 | .probe = blkfront_probe, |
1288 | .remove = blkfront_remove, | 1269 | .remove = blkfront_remove, |
1289 | .resume = blkfront_resume, | 1270 | .resume = blkfront_resume, |
1290 | .otherend_changed = blkback_changed, | 1271 | .otherend_changed = blkback_changed, |
1291 | .is_ready = blkfront_is_ready, | 1272 | .is_ready = blkfront_is_ready, |
1292 | }; | 1273 | }; |
1293 | 1274 | ||
1294 | static int __init xlblk_init(void) | 1275 | static int __init xlblk_init(void) |
1295 | { | 1276 | { |
1296 | if (!xen_domain()) | 1277 | if (!xen_domain()) |
1297 | return -ENODEV; | 1278 | return -ENODEV; |
1298 | 1279 | ||
1299 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { | 1280 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { |
1300 | printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", | 1281 | printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", |
1301 | XENVBD_MAJOR, DEV_NAME); | 1282 | XENVBD_MAJOR, DEV_NAME); |
1302 | return -ENODEV; | 1283 | return -ENODEV; |
1303 | } | 1284 | } |
1304 | 1285 | ||
1305 | return xenbus_register_frontend(&blkfront); | 1286 | return xenbus_register_frontend(&blkfront); |
1306 | } | 1287 | } |
1307 | module_init(xlblk_init); | 1288 | module_init(xlblk_init); |
1308 | 1289 | ||
1309 | 1290 | ||
1310 | static void __exit xlblk_exit(void) | 1291 | static void __exit xlblk_exit(void) |
1311 | { | 1292 | { |
1312 | return xenbus_unregister_driver(&blkfront); | 1293 | return xenbus_unregister_driver(&blkfront); |
1313 | } | 1294 | } |
1314 | module_exit(xlblk_exit); | 1295 | module_exit(xlblk_exit); |
1315 | 1296 | ||
1316 | MODULE_DESCRIPTION("Xen virtual block device frontend"); | 1297 | MODULE_DESCRIPTION("Xen virtual block device frontend"); |
1317 | MODULE_LICENSE("GPL"); | 1298 | MODULE_LICENSE("GPL"); |
1318 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); | 1299 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); |
1319 | MODULE_ALIAS("xen:vbd"); | 1300 | MODULE_ALIAS("xen:vbd"); |
1320 | MODULE_ALIAS("xenblk"); | 1301 | MODULE_ALIAS("xenblk"); |
1321 | 1302 |
drivers/ide/ide-disk.c
1 | /* | 1 | /* |
2 | * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) | 2 | * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) |
3 | * Copyright (C) 1998-2002 Linux ATA Development | 3 | * Copyright (C) 1998-2002 Linux ATA Development |
4 | * Andre Hedrick <andre@linux-ide.org> | 4 | * Andre Hedrick <andre@linux-ide.org> |
5 | * Copyright (C) 2003 Red Hat | 5 | * Copyright (C) 2003 Red Hat |
6 | * Copyright (C) 2003-2005, 2007 Bartlomiej Zolnierkiewicz | 6 | * Copyright (C) 2003-2005, 2007 Bartlomiej Zolnierkiewicz |
7 | */ | 7 | */ |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Mostly written by Mark Lord <mlord@pobox.com> | 10 | * Mostly written by Mark Lord <mlord@pobox.com> |
11 | * and Gadi Oxman <gadio@netvision.net.il> | 11 | * and Gadi Oxman <gadio@netvision.net.il> |
12 | * and Andre Hedrick <andre@linux-ide.org> | 12 | * and Andre Hedrick <andre@linux-ide.org> |
13 | * | 13 | * |
14 | * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. | 14 | * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/timer.h> | 20 | #include <linux/timer.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/major.h> | 23 | #include <linux/major.h> |
24 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
25 | #include <linux/genhd.h> | 25 | #include <linux/genhd.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | #include <linux/mutex.h> | 28 | #include <linux/mutex.h> |
29 | #include <linux/leds.h> | 29 | #include <linux/leds.h> |
30 | #include <linux/ide.h> | 30 | #include <linux/ide.h> |
31 | 31 | ||
32 | #include <asm/byteorder.h> | 32 | #include <asm/byteorder.h> |
33 | #include <asm/irq.h> | 33 | #include <asm/irq.h> |
34 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
35 | #include <asm/io.h> | 35 | #include <asm/io.h> |
36 | #include <asm/div64.h> | 36 | #include <asm/div64.h> |
37 | 37 | ||
38 | #include "ide-disk.h" | 38 | #include "ide-disk.h" |
39 | 39 | ||
40 | static const u8 ide_rw_cmds[] = { | 40 | static const u8 ide_rw_cmds[] = { |
41 | ATA_CMD_READ_MULTI, | 41 | ATA_CMD_READ_MULTI, |
42 | ATA_CMD_WRITE_MULTI, | 42 | ATA_CMD_WRITE_MULTI, |
43 | ATA_CMD_READ_MULTI_EXT, | 43 | ATA_CMD_READ_MULTI_EXT, |
44 | ATA_CMD_WRITE_MULTI_EXT, | 44 | ATA_CMD_WRITE_MULTI_EXT, |
45 | ATA_CMD_PIO_READ, | 45 | ATA_CMD_PIO_READ, |
46 | ATA_CMD_PIO_WRITE, | 46 | ATA_CMD_PIO_WRITE, |
47 | ATA_CMD_PIO_READ_EXT, | 47 | ATA_CMD_PIO_READ_EXT, |
48 | ATA_CMD_PIO_WRITE_EXT, | 48 | ATA_CMD_PIO_WRITE_EXT, |
49 | ATA_CMD_READ, | 49 | ATA_CMD_READ, |
50 | ATA_CMD_WRITE, | 50 | ATA_CMD_WRITE, |
51 | ATA_CMD_READ_EXT, | 51 | ATA_CMD_READ_EXT, |
52 | ATA_CMD_WRITE_EXT, | 52 | ATA_CMD_WRITE_EXT, |
53 | }; | 53 | }; |
54 | 54 | ||
55 | static void ide_tf_set_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 dma) | 55 | static void ide_tf_set_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 dma) |
56 | { | 56 | { |
57 | u8 index, lba48, write; | 57 | u8 index, lba48, write; |
58 | 58 | ||
59 | lba48 = (cmd->tf_flags & IDE_TFLAG_LBA48) ? 2 : 0; | 59 | lba48 = (cmd->tf_flags & IDE_TFLAG_LBA48) ? 2 : 0; |
60 | write = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 0; | 60 | write = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 0; |
61 | 61 | ||
62 | if (dma) { | 62 | if (dma) { |
63 | cmd->protocol = ATA_PROT_DMA; | 63 | cmd->protocol = ATA_PROT_DMA; |
64 | index = 8; | 64 | index = 8; |
65 | } else { | 65 | } else { |
66 | cmd->protocol = ATA_PROT_PIO; | 66 | cmd->protocol = ATA_PROT_PIO; |
67 | if (drive->mult_count) { | 67 | if (drive->mult_count) { |
68 | cmd->tf_flags |= IDE_TFLAG_MULTI_PIO; | 68 | cmd->tf_flags |= IDE_TFLAG_MULTI_PIO; |
69 | index = 0; | 69 | index = 0; |
70 | } else | 70 | } else |
71 | index = 4; | 71 | index = 4; |
72 | } | 72 | } |
73 | 73 | ||
74 | cmd->tf.command = ide_rw_cmds[index + lba48 + write]; | 74 | cmd->tf.command = ide_rw_cmds[index + lba48 + write]; |
75 | } | 75 | } |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * __ide_do_rw_disk() issues READ and WRITE commands to a disk, | 78 | * __ide_do_rw_disk() issues READ and WRITE commands to a disk, |
79 | * using LBA if supported, or CHS otherwise, to address sectors. | 79 | * using LBA if supported, or CHS otherwise, to address sectors. |
80 | */ | 80 | */ |
81 | static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq, | 81 | static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq, |
82 | sector_t block) | 82 | sector_t block) |
83 | { | 83 | { |
84 | ide_hwif_t *hwif = drive->hwif; | 84 | ide_hwif_t *hwif = drive->hwif; |
85 | u16 nsectors = (u16)blk_rq_sectors(rq); | 85 | u16 nsectors = (u16)blk_rq_sectors(rq); |
86 | u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48); | 86 | u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48); |
87 | u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); | 87 | u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); |
88 | struct ide_cmd cmd; | 88 | struct ide_cmd cmd; |
89 | struct ide_taskfile *tf = &cmd.tf; | 89 | struct ide_taskfile *tf = &cmd.tf; |
90 | ide_startstop_t rc; | 90 | ide_startstop_t rc; |
91 | 91 | ||
92 | if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) { | 92 | if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) { |
93 | if (block + blk_rq_sectors(rq) > 1ULL << 28) | 93 | if (block + blk_rq_sectors(rq) > 1ULL << 28) |
94 | dma = 0; | 94 | dma = 0; |
95 | else | 95 | else |
96 | lba48 = 0; | 96 | lba48 = 0; |
97 | } | 97 | } |
98 | 98 | ||
99 | memset(&cmd, 0, sizeof(cmd)); | 99 | memset(&cmd, 0, sizeof(cmd)); |
100 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 100 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
101 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; | 101 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; |
102 | 102 | ||
103 | if (drive->dev_flags & IDE_DFLAG_LBA) { | 103 | if (drive->dev_flags & IDE_DFLAG_LBA) { |
104 | if (lba48) { | 104 | if (lba48) { |
105 | pr_debug("%s: LBA=0x%012llx\n", drive->name, | 105 | pr_debug("%s: LBA=0x%012llx\n", drive->name, |
106 | (unsigned long long)block); | 106 | (unsigned long long)block); |
107 | 107 | ||
108 | tf->nsect = nsectors & 0xff; | 108 | tf->nsect = nsectors & 0xff; |
109 | tf->lbal = (u8) block; | 109 | tf->lbal = (u8) block; |
110 | tf->lbam = (u8)(block >> 8); | 110 | tf->lbam = (u8)(block >> 8); |
111 | tf->lbah = (u8)(block >> 16); | 111 | tf->lbah = (u8)(block >> 16); |
112 | tf->device = ATA_LBA; | 112 | tf->device = ATA_LBA; |
113 | 113 | ||
114 | tf = &cmd.hob; | 114 | tf = &cmd.hob; |
115 | tf->nsect = (nsectors >> 8) & 0xff; | 115 | tf->nsect = (nsectors >> 8) & 0xff; |
116 | tf->lbal = (u8)(block >> 24); | 116 | tf->lbal = (u8)(block >> 24); |
117 | if (sizeof(block) != 4) { | 117 | if (sizeof(block) != 4) { |
118 | tf->lbam = (u8)((u64)block >> 32); | 118 | tf->lbam = (u8)((u64)block >> 32); |
119 | tf->lbah = (u8)((u64)block >> 40); | 119 | tf->lbah = (u8)((u64)block >> 40); |
120 | } | 120 | } |
121 | 121 | ||
122 | cmd.valid.out.hob = IDE_VALID_OUT_HOB; | 122 | cmd.valid.out.hob = IDE_VALID_OUT_HOB; |
123 | cmd.valid.in.hob = IDE_VALID_IN_HOB; | 123 | cmd.valid.in.hob = IDE_VALID_IN_HOB; |
124 | cmd.tf_flags |= IDE_TFLAG_LBA48; | 124 | cmd.tf_flags |= IDE_TFLAG_LBA48; |
125 | } else { | 125 | } else { |
126 | tf->nsect = nsectors & 0xff; | 126 | tf->nsect = nsectors & 0xff; |
127 | tf->lbal = block; | 127 | tf->lbal = block; |
128 | tf->lbam = block >>= 8; | 128 | tf->lbam = block >>= 8; |
129 | tf->lbah = block >>= 8; | 129 | tf->lbah = block >>= 8; |
130 | tf->device = ((block >> 8) & 0xf) | ATA_LBA; | 130 | tf->device = ((block >> 8) & 0xf) | ATA_LBA; |
131 | } | 131 | } |
132 | } else { | 132 | } else { |
133 | unsigned int sect, head, cyl, track; | 133 | unsigned int sect, head, cyl, track; |
134 | 134 | ||
135 | track = (int)block / drive->sect; | 135 | track = (int)block / drive->sect; |
136 | sect = (int)block % drive->sect + 1; | 136 | sect = (int)block % drive->sect + 1; |
137 | head = track % drive->head; | 137 | head = track % drive->head; |
138 | cyl = track / drive->head; | 138 | cyl = track / drive->head; |
139 | 139 | ||
140 | pr_debug("%s: CHS=%u/%u/%u\n", drive->name, cyl, head, sect); | 140 | pr_debug("%s: CHS=%u/%u/%u\n", drive->name, cyl, head, sect); |
141 | 141 | ||
142 | tf->nsect = nsectors & 0xff; | 142 | tf->nsect = nsectors & 0xff; |
143 | tf->lbal = sect; | 143 | tf->lbal = sect; |
144 | tf->lbam = cyl; | 144 | tf->lbam = cyl; |
145 | tf->lbah = cyl >> 8; | 145 | tf->lbah = cyl >> 8; |
146 | tf->device = head; | 146 | tf->device = head; |
147 | } | 147 | } |
148 | 148 | ||
149 | cmd.tf_flags |= IDE_TFLAG_FS; | 149 | cmd.tf_flags |= IDE_TFLAG_FS; |
150 | 150 | ||
151 | if (rq_data_dir(rq)) | 151 | if (rq_data_dir(rq)) |
152 | cmd.tf_flags |= IDE_TFLAG_WRITE; | 152 | cmd.tf_flags |= IDE_TFLAG_WRITE; |
153 | 153 | ||
154 | ide_tf_set_cmd(drive, &cmd, dma); | 154 | ide_tf_set_cmd(drive, &cmd, dma); |
155 | cmd.rq = rq; | 155 | cmd.rq = rq; |
156 | 156 | ||
157 | if (dma == 0) { | 157 | if (dma == 0) { |
158 | ide_init_sg_cmd(&cmd, nsectors << 9); | 158 | ide_init_sg_cmd(&cmd, nsectors << 9); |
159 | ide_map_sg(drive, &cmd); | 159 | ide_map_sg(drive, &cmd); |
160 | } | 160 | } |
161 | 161 | ||
162 | rc = do_rw_taskfile(drive, &cmd); | 162 | rc = do_rw_taskfile(drive, &cmd); |
163 | 163 | ||
164 | if (rc == ide_stopped && dma) { | 164 | if (rc == ide_stopped && dma) { |
165 | /* fallback to PIO */ | 165 | /* fallback to PIO */ |
166 | cmd.tf_flags |= IDE_TFLAG_DMA_PIO_FALLBACK; | 166 | cmd.tf_flags |= IDE_TFLAG_DMA_PIO_FALLBACK; |
167 | ide_tf_set_cmd(drive, &cmd, 0); | 167 | ide_tf_set_cmd(drive, &cmd, 0); |
168 | ide_init_sg_cmd(&cmd, nsectors << 9); | 168 | ide_init_sg_cmd(&cmd, nsectors << 9); |
169 | rc = do_rw_taskfile(drive, &cmd); | 169 | rc = do_rw_taskfile(drive, &cmd); |
170 | } | 170 | } |
171 | 171 | ||
172 | return rc; | 172 | return rc; |
173 | } | 173 | } |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * 268435455 == 137439 MB or 28bit limit | 176 | * 268435455 == 137439 MB or 28bit limit |
177 | * 320173056 == 163929 MB or 48bit addressing | 177 | * 320173056 == 163929 MB or 48bit addressing |
178 | * 1073741822 == 549756 MB or 48bit addressing fake drive | 178 | * 1073741822 == 549756 MB or 48bit addressing fake drive |
179 | */ | 179 | */ |
180 | 180 | ||
181 | static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, | 181 | static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, |
182 | sector_t block) | 182 | sector_t block) |
183 | { | 183 | { |
184 | ide_hwif_t *hwif = drive->hwif; | 184 | ide_hwif_t *hwif = drive->hwif; |
185 | 185 | ||
186 | BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); | 186 | BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); |
187 | BUG_ON(rq->cmd_type != REQ_TYPE_FS); | 187 | BUG_ON(rq->cmd_type != REQ_TYPE_FS); |
188 | 188 | ||
189 | ledtrig_ide_activity(); | 189 | ledtrig_ide_activity(); |
190 | 190 | ||
191 | pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n", | 191 | pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n", |
192 | drive->name, rq_data_dir(rq) == READ ? "read" : "writ", | 192 | drive->name, rq_data_dir(rq) == READ ? "read" : "writ", |
193 | (unsigned long long)block, blk_rq_sectors(rq), | 193 | (unsigned long long)block, blk_rq_sectors(rq), |
194 | (unsigned long)rq->buffer); | 194 | (unsigned long)rq->buffer); |
195 | 195 | ||
196 | if (hwif->rw_disk) | 196 | if (hwif->rw_disk) |
197 | hwif->rw_disk(drive, rq); | 197 | hwif->rw_disk(drive, rq); |
198 | 198 | ||
199 | return __ide_do_rw_disk(drive, rq, block); | 199 | return __ide_do_rw_disk(drive, rq, block); |
200 | } | 200 | } |
201 | 201 | ||
202 | /* | 202 | /* |
203 | * Queries for true maximum capacity of the drive. | 203 | * Queries for true maximum capacity of the drive. |
204 | * Returns maximum LBA address (> 0) of the drive, 0 if failed. | 204 | * Returns maximum LBA address (> 0) of the drive, 0 if failed. |
205 | */ | 205 | */ |
206 | static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48) | 206 | static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48) |
207 | { | 207 | { |
208 | struct ide_cmd cmd; | 208 | struct ide_cmd cmd; |
209 | struct ide_taskfile *tf = &cmd.tf; | 209 | struct ide_taskfile *tf = &cmd.tf; |
210 | u64 addr = 0; | 210 | u64 addr = 0; |
211 | 211 | ||
212 | memset(&cmd, 0, sizeof(cmd)); | 212 | memset(&cmd, 0, sizeof(cmd)); |
213 | if (lba48) | 213 | if (lba48) |
214 | tf->command = ATA_CMD_READ_NATIVE_MAX_EXT; | 214 | tf->command = ATA_CMD_READ_NATIVE_MAX_EXT; |
215 | else | 215 | else |
216 | tf->command = ATA_CMD_READ_NATIVE_MAX; | 216 | tf->command = ATA_CMD_READ_NATIVE_MAX; |
217 | tf->device = ATA_LBA; | 217 | tf->device = ATA_LBA; |
218 | 218 | ||
219 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 219 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
220 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; | 220 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; |
221 | if (lba48) { | 221 | if (lba48) { |
222 | cmd.valid.out.hob = IDE_VALID_OUT_HOB; | 222 | cmd.valid.out.hob = IDE_VALID_OUT_HOB; |
223 | cmd.valid.in.hob = IDE_VALID_IN_HOB; | 223 | cmd.valid.in.hob = IDE_VALID_IN_HOB; |
224 | cmd.tf_flags = IDE_TFLAG_LBA48; | 224 | cmd.tf_flags = IDE_TFLAG_LBA48; |
225 | } | 225 | } |
226 | 226 | ||
227 | ide_no_data_taskfile(drive, &cmd); | 227 | ide_no_data_taskfile(drive, &cmd); |
228 | 228 | ||
229 | /* if OK, compute maximum address value */ | 229 | /* if OK, compute maximum address value */ |
230 | if (!(tf->status & ATA_ERR)) | 230 | if (!(tf->status & ATA_ERR)) |
231 | addr = ide_get_lba_addr(&cmd, lba48) + 1; | 231 | addr = ide_get_lba_addr(&cmd, lba48) + 1; |
232 | 232 | ||
233 | return addr; | 233 | return addr; |
234 | } | 234 | } |
235 | 235 | ||
236 | /* | 236 | /* |
237 | * Sets maximum virtual LBA address of the drive. | 237 | * Sets maximum virtual LBA address of the drive. |
238 | * Returns new maximum virtual LBA address (> 0) or 0 on failure. | 238 | * Returns new maximum virtual LBA address (> 0) or 0 on failure. |
239 | */ | 239 | */ |
240 | static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48) | 240 | static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48) |
241 | { | 241 | { |
242 | struct ide_cmd cmd; | 242 | struct ide_cmd cmd; |
243 | struct ide_taskfile *tf = &cmd.tf; | 243 | struct ide_taskfile *tf = &cmd.tf; |
244 | u64 addr_set = 0; | 244 | u64 addr_set = 0; |
245 | 245 | ||
246 | addr_req--; | 246 | addr_req--; |
247 | 247 | ||
248 | memset(&cmd, 0, sizeof(cmd)); | 248 | memset(&cmd, 0, sizeof(cmd)); |
249 | tf->lbal = (addr_req >> 0) & 0xff; | 249 | tf->lbal = (addr_req >> 0) & 0xff; |
250 | tf->lbam = (addr_req >>= 8) & 0xff; | 250 | tf->lbam = (addr_req >>= 8) & 0xff; |
251 | tf->lbah = (addr_req >>= 8) & 0xff; | 251 | tf->lbah = (addr_req >>= 8) & 0xff; |
252 | if (lba48) { | 252 | if (lba48) { |
253 | cmd.hob.lbal = (addr_req >>= 8) & 0xff; | 253 | cmd.hob.lbal = (addr_req >>= 8) & 0xff; |
254 | cmd.hob.lbam = (addr_req >>= 8) & 0xff; | 254 | cmd.hob.lbam = (addr_req >>= 8) & 0xff; |
255 | cmd.hob.lbah = (addr_req >>= 8) & 0xff; | 255 | cmd.hob.lbah = (addr_req >>= 8) & 0xff; |
256 | tf->command = ATA_CMD_SET_MAX_EXT; | 256 | tf->command = ATA_CMD_SET_MAX_EXT; |
257 | } else { | 257 | } else { |
258 | tf->device = (addr_req >>= 8) & 0x0f; | 258 | tf->device = (addr_req >>= 8) & 0x0f; |
259 | tf->command = ATA_CMD_SET_MAX; | 259 | tf->command = ATA_CMD_SET_MAX; |
260 | } | 260 | } |
261 | tf->device |= ATA_LBA; | 261 | tf->device |= ATA_LBA; |
262 | 262 | ||
263 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 263 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
264 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; | 264 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; |
265 | if (lba48) { | 265 | if (lba48) { |
266 | cmd.valid.out.hob = IDE_VALID_OUT_HOB; | 266 | cmd.valid.out.hob = IDE_VALID_OUT_HOB; |
267 | cmd.valid.in.hob = IDE_VALID_IN_HOB; | 267 | cmd.valid.in.hob = IDE_VALID_IN_HOB; |
268 | cmd.tf_flags = IDE_TFLAG_LBA48; | 268 | cmd.tf_flags = IDE_TFLAG_LBA48; |
269 | } | 269 | } |
270 | 270 | ||
271 | ide_no_data_taskfile(drive, &cmd); | 271 | ide_no_data_taskfile(drive, &cmd); |
272 | 272 | ||
273 | /* if OK, compute maximum address value */ | 273 | /* if OK, compute maximum address value */ |
274 | if (!(tf->status & ATA_ERR)) | 274 | if (!(tf->status & ATA_ERR)) |
275 | addr_set = ide_get_lba_addr(&cmd, lba48) + 1; | 275 | addr_set = ide_get_lba_addr(&cmd, lba48) + 1; |
276 | 276 | ||
277 | return addr_set; | 277 | return addr_set; |
278 | } | 278 | } |
279 | 279 | ||
280 | static unsigned long long sectors_to_MB(unsigned long long n) | 280 | static unsigned long long sectors_to_MB(unsigned long long n) |
281 | { | 281 | { |
282 | n <<= 9; /* make it bytes */ | 282 | n <<= 9; /* make it bytes */ |
283 | do_div(n, 1000000); /* make it MB */ | 283 | do_div(n, 1000000); /* make it MB */ |
284 | return n; | 284 | return n; |
285 | } | 285 | } |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * Some disks report total number of sectors instead of | 288 | * Some disks report total number of sectors instead of |
289 | * maximum sector address. We list them here. | 289 | * maximum sector address. We list them here. |
290 | */ | 290 | */ |
291 | static const struct drive_list_entry hpa_list[] = { | 291 | static const struct drive_list_entry hpa_list[] = { |
292 | { "ST340823A", NULL }, | 292 | { "ST340823A", NULL }, |
293 | { "ST320413A", NULL }, | 293 | { "ST320413A", NULL }, |
294 | { "ST310211A", NULL }, | 294 | { "ST310211A", NULL }, |
295 | { NULL, NULL } | 295 | { NULL, NULL } |
296 | }; | 296 | }; |
297 | 297 | ||
298 | static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48) | 298 | static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48) |
299 | { | 299 | { |
300 | u64 capacity, set_max; | 300 | u64 capacity, set_max; |
301 | 301 | ||
302 | capacity = drive->capacity64; | 302 | capacity = drive->capacity64; |
303 | set_max = idedisk_read_native_max_address(drive, lba48); | 303 | set_max = idedisk_read_native_max_address(drive, lba48); |
304 | 304 | ||
305 | if (ide_in_drive_list(drive->id, hpa_list)) { | 305 | if (ide_in_drive_list(drive->id, hpa_list)) { |
306 | /* | 306 | /* |
307 | * Since we are inclusive wrt to firmware revisions do this | 307 | * Since we are inclusive wrt to firmware revisions do this |
308 | * extra check and apply the workaround only when needed. | 308 | * extra check and apply the workaround only when needed. |
309 | */ | 309 | */ |
310 | if (set_max == capacity + 1) | 310 | if (set_max == capacity + 1) |
311 | set_max--; | 311 | set_max--; |
312 | } | 312 | } |
313 | 313 | ||
314 | return set_max; | 314 | return set_max; |
315 | } | 315 | } |
316 | 316 | ||
317 | static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48) | 317 | static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48) |
318 | { | 318 | { |
319 | set_max = idedisk_set_max_address(drive, set_max, lba48); | 319 | set_max = idedisk_set_max_address(drive, set_max, lba48); |
320 | if (set_max) | 320 | if (set_max) |
321 | drive->capacity64 = set_max; | 321 | drive->capacity64 = set_max; |
322 | 322 | ||
323 | return set_max; | 323 | return set_max; |
324 | } | 324 | } |
325 | 325 | ||
326 | static void idedisk_check_hpa(ide_drive_t *drive) | 326 | static void idedisk_check_hpa(ide_drive_t *drive) |
327 | { | 327 | { |
328 | u64 capacity, set_max; | 328 | u64 capacity, set_max; |
329 | int lba48 = ata_id_lba48_enabled(drive->id); | 329 | int lba48 = ata_id_lba48_enabled(drive->id); |
330 | 330 | ||
331 | capacity = drive->capacity64; | 331 | capacity = drive->capacity64; |
332 | set_max = ide_disk_hpa_get_native_capacity(drive, lba48); | 332 | set_max = ide_disk_hpa_get_native_capacity(drive, lba48); |
333 | 333 | ||
334 | if (set_max <= capacity) | 334 | if (set_max <= capacity) |
335 | return; | 335 | return; |
336 | 336 | ||
337 | drive->probed_capacity = set_max; | 337 | drive->probed_capacity = set_max; |
338 | 338 | ||
339 | printk(KERN_INFO "%s: Host Protected Area detected.\n" | 339 | printk(KERN_INFO "%s: Host Protected Area detected.\n" |
340 | "\tcurrent capacity is %llu sectors (%llu MB)\n" | 340 | "\tcurrent capacity is %llu sectors (%llu MB)\n" |
341 | "\tnative capacity is %llu sectors (%llu MB)\n", | 341 | "\tnative capacity is %llu sectors (%llu MB)\n", |
342 | drive->name, | 342 | drive->name, |
343 | capacity, sectors_to_MB(capacity), | 343 | capacity, sectors_to_MB(capacity), |
344 | set_max, sectors_to_MB(set_max)); | 344 | set_max, sectors_to_MB(set_max)); |
345 | 345 | ||
346 | if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0) | 346 | if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0) |
347 | return; | 347 | return; |
348 | 348 | ||
349 | set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48); | 349 | set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48); |
350 | if (set_max) | 350 | if (set_max) |
351 | printk(KERN_INFO "%s: Host Protected Area disabled.\n", | 351 | printk(KERN_INFO "%s: Host Protected Area disabled.\n", |
352 | drive->name); | 352 | drive->name); |
353 | } | 353 | } |
354 | 354 | ||
355 | static int ide_disk_get_capacity(ide_drive_t *drive) | 355 | static int ide_disk_get_capacity(ide_drive_t *drive) |
356 | { | 356 | { |
357 | u16 *id = drive->id; | 357 | u16 *id = drive->id; |
358 | int lba; | 358 | int lba; |
359 | 359 | ||
360 | if (ata_id_lba48_enabled(id)) { | 360 | if (ata_id_lba48_enabled(id)) { |
361 | /* drive speaks 48-bit LBA */ | 361 | /* drive speaks 48-bit LBA */ |
362 | lba = 1; | 362 | lba = 1; |
363 | drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); | 363 | drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); |
364 | } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) { | 364 | } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) { |
365 | /* drive speaks 28-bit LBA */ | 365 | /* drive speaks 28-bit LBA */ |
366 | lba = 1; | 366 | lba = 1; |
367 | drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY); | 367 | drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY); |
368 | } else { | 368 | } else { |
369 | /* drive speaks boring old 28-bit CHS */ | 369 | /* drive speaks boring old 28-bit CHS */ |
370 | lba = 0; | 370 | lba = 0; |
371 | drive->capacity64 = drive->cyl * drive->head * drive->sect; | 371 | drive->capacity64 = drive->cyl * drive->head * drive->sect; |
372 | } | 372 | } |
373 | 373 | ||
374 | drive->probed_capacity = drive->capacity64; | 374 | drive->probed_capacity = drive->capacity64; |
375 | 375 | ||
376 | if (lba) { | 376 | if (lba) { |
377 | drive->dev_flags |= IDE_DFLAG_LBA; | 377 | drive->dev_flags |= IDE_DFLAG_LBA; |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * If this device supports the Host Protected Area feature set, | 380 | * If this device supports the Host Protected Area feature set, |
381 | * then we may need to change our opinion about its capacity. | 381 | * then we may need to change our opinion about its capacity. |
382 | */ | 382 | */ |
383 | if (ata_id_hpa_enabled(id)) | 383 | if (ata_id_hpa_enabled(id)) |
384 | idedisk_check_hpa(drive); | 384 | idedisk_check_hpa(drive); |
385 | } | 385 | } |
386 | 386 | ||
387 | /* limit drive capacity to 137GB if LBA48 cannot be used */ | 387 | /* limit drive capacity to 137GB if LBA48 cannot be used */ |
388 | if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 && | 388 | if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 && |
389 | drive->capacity64 > 1ULL << 28) { | 389 | drive->capacity64 > 1ULL << 28) { |
390 | printk(KERN_WARNING "%s: cannot use LBA48 - full capacity " | 390 | printk(KERN_WARNING "%s: cannot use LBA48 - full capacity " |
391 | "%llu sectors (%llu MB)\n", | 391 | "%llu sectors (%llu MB)\n", |
392 | drive->name, (unsigned long long)drive->capacity64, | 392 | drive->name, (unsigned long long)drive->capacity64, |
393 | sectors_to_MB(drive->capacity64)); | 393 | sectors_to_MB(drive->capacity64)); |
394 | drive->probed_capacity = drive->capacity64 = 1ULL << 28; | 394 | drive->probed_capacity = drive->capacity64 = 1ULL << 28; |
395 | } | 395 | } |
396 | 396 | ||
397 | if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && | 397 | if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && |
398 | (drive->dev_flags & IDE_DFLAG_LBA48)) { | 398 | (drive->dev_flags & IDE_DFLAG_LBA48)) { |
399 | if (drive->capacity64 > 1ULL << 28) { | 399 | if (drive->capacity64 > 1ULL << 28) { |
400 | printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode" | 400 | printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode" |
401 | " will be used for accessing sectors " | 401 | " will be used for accessing sectors " |
402 | "> %u\n", drive->name, 1 << 28); | 402 | "> %u\n", drive->name, 1 << 28); |
403 | } else | 403 | } else |
404 | drive->dev_flags &= ~IDE_DFLAG_LBA48; | 404 | drive->dev_flags &= ~IDE_DFLAG_LBA48; |
405 | } | 405 | } |
406 | 406 | ||
407 | return 0; | 407 | return 0; |
408 | } | 408 | } |
409 | 409 | ||
410 | static void ide_disk_unlock_native_capacity(ide_drive_t *drive) | 410 | static void ide_disk_unlock_native_capacity(ide_drive_t *drive) |
411 | { | 411 | { |
412 | u16 *id = drive->id; | 412 | u16 *id = drive->id; |
413 | int lba48 = ata_id_lba48_enabled(id); | 413 | int lba48 = ata_id_lba48_enabled(id); |
414 | 414 | ||
415 | if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 || | 415 | if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 || |
416 | ata_id_hpa_enabled(id) == 0) | 416 | ata_id_hpa_enabled(id) == 0) |
417 | return; | 417 | return; |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * according to the spec the SET MAX ADDRESS command shall be | 420 | * according to the spec the SET MAX ADDRESS command shall be |
421 | * immediately preceded by a READ NATIVE MAX ADDRESS command | 421 | * immediately preceded by a READ NATIVE MAX ADDRESS command |
422 | */ | 422 | */ |
423 | if (!ide_disk_hpa_get_native_capacity(drive, lba48)) | 423 | if (!ide_disk_hpa_get_native_capacity(drive, lba48)) |
424 | return; | 424 | return; |
425 | 425 | ||
426 | if (ide_disk_hpa_set_capacity(drive, drive->probed_capacity, lba48)) | 426 | if (ide_disk_hpa_set_capacity(drive, drive->probed_capacity, lba48)) |
427 | drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ | 427 | drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ |
428 | } | 428 | } |
429 | 429 | ||
430 | static int idedisk_prep_fn(struct request_queue *q, struct request *rq) | 430 | static int idedisk_prep_fn(struct request_queue *q, struct request *rq) |
431 | { | 431 | { |
432 | ide_drive_t *drive = q->queuedata; | 432 | ide_drive_t *drive = q->queuedata; |
433 | struct ide_cmd *cmd; | 433 | struct ide_cmd *cmd; |
434 | 434 | ||
435 | if (!(rq->cmd_flags & REQ_FLUSH)) | 435 | if (!(rq->cmd_flags & REQ_FLUSH)) |
436 | return BLKPREP_OK; | 436 | return BLKPREP_OK; |
437 | 437 | ||
438 | cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); | 438 | cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); |
439 | 439 | ||
440 | /* FIXME: map struct ide_taskfile on rq->cmd[] */ | 440 | /* FIXME: map struct ide_taskfile on rq->cmd[] */ |
441 | BUG_ON(cmd == NULL); | 441 | BUG_ON(cmd == NULL); |
442 | 442 | ||
443 | memset(cmd, 0, sizeof(*cmd)); | 443 | memset(cmd, 0, sizeof(*cmd)); |
444 | if (ata_id_flush_ext_enabled(drive->id) && | 444 | if (ata_id_flush_ext_enabled(drive->id) && |
445 | (drive->capacity64 >= (1UL << 28))) | 445 | (drive->capacity64 >= (1UL << 28))) |
446 | cmd->tf.command = ATA_CMD_FLUSH_EXT; | 446 | cmd->tf.command = ATA_CMD_FLUSH_EXT; |
447 | else | 447 | else |
448 | cmd->tf.command = ATA_CMD_FLUSH; | 448 | cmd->tf.command = ATA_CMD_FLUSH; |
449 | cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 449 | cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
450 | cmd->tf_flags = IDE_TFLAG_DYN; | 450 | cmd->tf_flags = IDE_TFLAG_DYN; |
451 | cmd->protocol = ATA_PROT_NODATA; | 451 | cmd->protocol = ATA_PROT_NODATA; |
452 | 452 | ||
453 | rq->cmd_type = REQ_TYPE_ATA_TASKFILE; | 453 | rq->cmd_type = REQ_TYPE_ATA_TASKFILE; |
454 | rq->special = cmd; | 454 | rq->special = cmd; |
455 | cmd->rq = rq; | 455 | cmd->rq = rq; |
456 | 456 | ||
457 | return BLKPREP_OK; | 457 | return BLKPREP_OK; |
458 | } | 458 | } |
459 | 459 | ||
460 | ide_devset_get(multcount, mult_count); | 460 | ide_devset_get(multcount, mult_count); |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * This is tightly woven into the driver->do_special can not touch. | 463 | * This is tightly woven into the driver->do_special can not touch. |
464 | * DON'T do it again until a total personality rewrite is committed. | 464 | * DON'T do it again until a total personality rewrite is committed. |
465 | */ | 465 | */ |
466 | static int set_multcount(ide_drive_t *drive, int arg) | 466 | static int set_multcount(ide_drive_t *drive, int arg) |
467 | { | 467 | { |
468 | struct request *rq; | 468 | struct request *rq; |
469 | int error; | 469 | int error; |
470 | 470 | ||
471 | if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) | 471 | if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) |
472 | return -EINVAL; | 472 | return -EINVAL; |
473 | 473 | ||
474 | if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) | 474 | if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) |
475 | return -EBUSY; | 475 | return -EBUSY; |
476 | 476 | ||
477 | rq = blk_get_request(drive->queue, READ, __GFP_WAIT); | 477 | rq = blk_get_request(drive->queue, READ, __GFP_WAIT); |
478 | rq->cmd_type = REQ_TYPE_ATA_TASKFILE; | 478 | rq->cmd_type = REQ_TYPE_ATA_TASKFILE; |
479 | 479 | ||
480 | drive->mult_req = arg; | 480 | drive->mult_req = arg; |
481 | drive->special_flags |= IDE_SFLAG_SET_MULTMODE; | 481 | drive->special_flags |= IDE_SFLAG_SET_MULTMODE; |
482 | error = blk_execute_rq(drive->queue, NULL, rq, 0); | 482 | error = blk_execute_rq(drive->queue, NULL, rq, 0); |
483 | blk_put_request(rq); | 483 | blk_put_request(rq); |
484 | 484 | ||
485 | return (drive->mult_count == arg) ? 0 : -EIO; | 485 | return (drive->mult_count == arg) ? 0 : -EIO; |
486 | } | 486 | } |
487 | 487 | ||
488 | ide_devset_get_flag(nowerr, IDE_DFLAG_NOWERR); | 488 | ide_devset_get_flag(nowerr, IDE_DFLAG_NOWERR); |
489 | 489 | ||
490 | static int set_nowerr(ide_drive_t *drive, int arg) | 490 | static int set_nowerr(ide_drive_t *drive, int arg) |
491 | { | 491 | { |
492 | if (arg < 0 || arg > 1) | 492 | if (arg < 0 || arg > 1) |
493 | return -EINVAL; | 493 | return -EINVAL; |
494 | 494 | ||
495 | if (arg) | 495 | if (arg) |
496 | drive->dev_flags |= IDE_DFLAG_NOWERR; | 496 | drive->dev_flags |= IDE_DFLAG_NOWERR; |
497 | else | 497 | else |
498 | drive->dev_flags &= ~IDE_DFLAG_NOWERR; | 498 | drive->dev_flags &= ~IDE_DFLAG_NOWERR; |
499 | 499 | ||
500 | drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; | 500 | drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; |
501 | 501 | ||
502 | return 0; | 502 | return 0; |
503 | } | 503 | } |
504 | 504 | ||
505 | static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) | 505 | static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) |
506 | { | 506 | { |
507 | struct ide_cmd cmd; | 507 | struct ide_cmd cmd; |
508 | 508 | ||
509 | memset(&cmd, 0, sizeof(cmd)); | 509 | memset(&cmd, 0, sizeof(cmd)); |
510 | cmd.tf.feature = feature; | 510 | cmd.tf.feature = feature; |
511 | cmd.tf.nsect = nsect; | 511 | cmd.tf.nsect = nsect; |
512 | cmd.tf.command = ATA_CMD_SET_FEATURES; | 512 | cmd.tf.command = ATA_CMD_SET_FEATURES; |
513 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 513 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
514 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; | 514 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; |
515 | 515 | ||
516 | return ide_no_data_taskfile(drive, &cmd); | 516 | return ide_no_data_taskfile(drive, &cmd); |
517 | } | 517 | } |
518 | 518 | ||
519 | static void update_ordered(ide_drive_t *drive) | 519 | static void update_flush(ide_drive_t *drive) |
520 | { | 520 | { |
521 | u16 *id = drive->id; | 521 | u16 *id = drive->id; |
522 | unsigned ordered = QUEUE_ORDERED_NONE; | 522 | unsigned flush = 0; |
523 | 523 | ||
524 | if (drive->dev_flags & IDE_DFLAG_WCACHE) { | 524 | if (drive->dev_flags & IDE_DFLAG_WCACHE) { |
525 | unsigned long long capacity; | 525 | unsigned long long capacity; |
526 | int barrier; | 526 | int barrier; |
527 | /* | 527 | /* |
528 | * We must avoid issuing commands a drive does not | 528 | * We must avoid issuing commands a drive does not |
529 | * understand or we may crash it. We check flush cache | 529 | * understand or we may crash it. We check flush cache |
530 | * is supported. We also check we have the LBA48 flush | 530 | * is supported. We also check we have the LBA48 flush |
531 | * cache if the drive capacity is too large. By this | 531 | * cache if the drive capacity is too large. By this |
532 | * time we have trimmed the drive capacity if LBA48 is | 532 | * time we have trimmed the drive capacity if LBA48 is |
533 | * not available so we don't need to recheck that. | 533 | * not available so we don't need to recheck that. |
534 | */ | 534 | */ |
535 | capacity = ide_gd_capacity(drive); | 535 | capacity = ide_gd_capacity(drive); |
536 | barrier = ata_id_flush_enabled(id) && | 536 | barrier = ata_id_flush_enabled(id) && |
537 | (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 && | 537 | (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 && |
538 | ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 || | 538 | ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 || |
539 | capacity <= (1ULL << 28) || | 539 | capacity <= (1ULL << 28) || |
540 | ata_id_flush_ext_enabled(id)); | 540 | ata_id_flush_ext_enabled(id)); |
541 | 541 | ||
542 | printk(KERN_INFO "%s: cache flushes %ssupported\n", | 542 | printk(KERN_INFO "%s: cache flushes %ssupported\n", |
543 | drive->name, barrier ? "" : "not "); | 543 | drive->name, barrier ? "" : "not "); |
544 | 544 | ||
545 | if (barrier) { | 545 | if (barrier) { |
546 | ordered = QUEUE_ORDERED_DRAIN_FLUSH; | 546 | flush = REQ_FLUSH; |
547 | blk_queue_prep_rq(drive->queue, idedisk_prep_fn); | 547 | blk_queue_prep_rq(drive->queue, idedisk_prep_fn); |
548 | } | 548 | } |
549 | } else | 549 | } |
550 | ordered = QUEUE_ORDERED_DRAIN; | ||
551 | 550 | ||
552 | blk_queue_ordered(drive->queue, ordered); | 551 | blk_queue_flush(drive->queue, flush); |
553 | } | 552 | } |
554 | 553 | ||
555 | ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); | 554 | ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); |
556 | 555 | ||
557 | static int set_wcache(ide_drive_t *drive, int arg) | 556 | static int set_wcache(ide_drive_t *drive, int arg) |
558 | { | 557 | { |
559 | int err = 1; | 558 | int err = 1; |
560 | 559 | ||
561 | if (arg < 0 || arg > 1) | 560 | if (arg < 0 || arg > 1) |
562 | return -EINVAL; | 561 | return -EINVAL; |
563 | 562 | ||
564 | if (ata_id_flush_enabled(drive->id)) { | 563 | if (ata_id_flush_enabled(drive->id)) { |
565 | err = ide_do_setfeature(drive, | 564 | err = ide_do_setfeature(drive, |
566 | arg ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF, 0); | 565 | arg ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF, 0); |
567 | if (err == 0) { | 566 | if (err == 0) { |
568 | if (arg) | 567 | if (arg) |
569 | drive->dev_flags |= IDE_DFLAG_WCACHE; | 568 | drive->dev_flags |= IDE_DFLAG_WCACHE; |
570 | else | 569 | else |
571 | drive->dev_flags &= ~IDE_DFLAG_WCACHE; | 570 | drive->dev_flags &= ~IDE_DFLAG_WCACHE; |
572 | } | 571 | } |
573 | } | 572 | } |
574 | 573 | ||
575 | update_ordered(drive); | 574 | update_flush(drive); |
576 | 575 | ||
577 | return err; | 576 | return err; |
578 | } | 577 | } |
579 | 578 | ||
580 | static int do_idedisk_flushcache(ide_drive_t *drive) | 579 | static int do_idedisk_flushcache(ide_drive_t *drive) |
581 | { | 580 | { |
582 | struct ide_cmd cmd; | 581 | struct ide_cmd cmd; |
583 | 582 | ||
584 | memset(&cmd, 0, sizeof(cmd)); | 583 | memset(&cmd, 0, sizeof(cmd)); |
585 | if (ata_id_flush_ext_enabled(drive->id)) | 584 | if (ata_id_flush_ext_enabled(drive->id)) |
586 | cmd.tf.command = ATA_CMD_FLUSH_EXT; | 585 | cmd.tf.command = ATA_CMD_FLUSH_EXT; |
587 | else | 586 | else |
588 | cmd.tf.command = ATA_CMD_FLUSH; | 587 | cmd.tf.command = ATA_CMD_FLUSH; |
589 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 588 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
590 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; | 589 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; |
591 | 590 | ||
592 | return ide_no_data_taskfile(drive, &cmd); | 591 | return ide_no_data_taskfile(drive, &cmd); |
593 | } | 592 | } |
594 | 593 | ||
595 | ide_devset_get(acoustic, acoustic); | 594 | ide_devset_get(acoustic, acoustic); |
596 | 595 | ||
597 | static int set_acoustic(ide_drive_t *drive, int arg) | 596 | static int set_acoustic(ide_drive_t *drive, int arg) |
598 | { | 597 | { |
599 | if (arg < 0 || arg > 254) | 598 | if (arg < 0 || arg > 254) |
600 | return -EINVAL; | 599 | return -EINVAL; |
601 | 600 | ||
602 | ide_do_setfeature(drive, | 601 | ide_do_setfeature(drive, |
603 | arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF, arg); | 602 | arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF, arg); |
604 | 603 | ||
605 | drive->acoustic = arg; | 604 | drive->acoustic = arg; |
606 | 605 | ||
607 | return 0; | 606 | return 0; |
608 | } | 607 | } |
609 | 608 | ||
610 | ide_devset_get_flag(addressing, IDE_DFLAG_LBA48); | 609 | ide_devset_get_flag(addressing, IDE_DFLAG_LBA48); |
611 | 610 | ||
612 | /* | 611 | /* |
613 | * drive->addressing: | 612 | * drive->addressing: |
614 | * 0: 28-bit | 613 | * 0: 28-bit |
615 | * 1: 48-bit | 614 | * 1: 48-bit |
616 | * 2: 48-bit capable doing 28-bit | 615 | * 2: 48-bit capable doing 28-bit |
617 | */ | 616 | */ |
618 | static int set_addressing(ide_drive_t *drive, int arg) | 617 | static int set_addressing(ide_drive_t *drive, int arg) |
619 | { | 618 | { |
620 | if (arg < 0 || arg > 2) | 619 | if (arg < 0 || arg > 2) |
621 | return -EINVAL; | 620 | return -EINVAL; |
622 | 621 | ||
623 | if (arg && ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) || | 622 | if (arg && ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) || |
624 | ata_id_lba48_enabled(drive->id) == 0)) | 623 | ata_id_lba48_enabled(drive->id) == 0)) |
625 | return -EIO; | 624 | return -EIO; |
626 | 625 | ||
627 | if (arg == 2) | 626 | if (arg == 2) |
628 | arg = 0; | 627 | arg = 0; |
629 | 628 | ||
630 | if (arg) | 629 | if (arg) |
631 | drive->dev_flags |= IDE_DFLAG_LBA48; | 630 | drive->dev_flags |= IDE_DFLAG_LBA48; |
632 | else | 631 | else |
633 | drive->dev_flags &= ~IDE_DFLAG_LBA48; | 632 | drive->dev_flags &= ~IDE_DFLAG_LBA48; |
634 | 633 | ||
635 | return 0; | 634 | return 0; |
636 | } | 635 | } |
637 | 636 | ||
638 | ide_ext_devset_rw(acoustic, acoustic); | 637 | ide_ext_devset_rw(acoustic, acoustic); |
639 | ide_ext_devset_rw(address, addressing); | 638 | ide_ext_devset_rw(address, addressing); |
640 | ide_ext_devset_rw(multcount, multcount); | 639 | ide_ext_devset_rw(multcount, multcount); |
641 | ide_ext_devset_rw(wcache, wcache); | 640 | ide_ext_devset_rw(wcache, wcache); |
642 | 641 | ||
643 | ide_ext_devset_rw_sync(nowerr, nowerr); | 642 | ide_ext_devset_rw_sync(nowerr, nowerr); |
644 | 643 | ||
645 | static int ide_disk_check(ide_drive_t *drive, const char *s) | 644 | static int ide_disk_check(ide_drive_t *drive, const char *s) |
646 | { | 645 | { |
647 | return 1; | 646 | return 1; |
648 | } | 647 | } |
649 | 648 | ||
650 | static void ide_disk_setup(ide_drive_t *drive) | 649 | static void ide_disk_setup(ide_drive_t *drive) |
651 | { | 650 | { |
652 | struct ide_disk_obj *idkp = drive->driver_data; | 651 | struct ide_disk_obj *idkp = drive->driver_data; |
653 | struct request_queue *q = drive->queue; | 652 | struct request_queue *q = drive->queue; |
654 | ide_hwif_t *hwif = drive->hwif; | 653 | ide_hwif_t *hwif = drive->hwif; |
655 | u16 *id = drive->id; | 654 | u16 *id = drive->id; |
656 | char *m = (char *)&id[ATA_ID_PROD]; | 655 | char *m = (char *)&id[ATA_ID_PROD]; |
657 | unsigned long long capacity; | 656 | unsigned long long capacity; |
658 | 657 | ||
659 | ide_proc_register_driver(drive, idkp->driver); | 658 | ide_proc_register_driver(drive, idkp->driver); |
660 | 659 | ||
661 | if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) | 660 | if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) |
662 | return; | 661 | return; |
663 | 662 | ||
664 | if (drive->dev_flags & IDE_DFLAG_REMOVABLE) { | 663 | if (drive->dev_flags & IDE_DFLAG_REMOVABLE) { |
665 | /* | 664 | /* |
666 | * Removable disks (eg. SYQUEST); ignore 'WD' drives | 665 | * Removable disks (eg. SYQUEST); ignore 'WD' drives |
667 | */ | 666 | */ |
668 | if (m[0] != 'W' || m[1] != 'D') | 667 | if (m[0] != 'W' || m[1] != 'D') |
669 | drive->dev_flags |= IDE_DFLAG_DOORLOCKING; | 668 | drive->dev_flags |= IDE_DFLAG_DOORLOCKING; |
670 | } | 669 | } |
671 | 670 | ||
672 | (void)set_addressing(drive, 1); | 671 | (void)set_addressing(drive, 1); |
673 | 672 | ||
674 | if (drive->dev_flags & IDE_DFLAG_LBA48) { | 673 | if (drive->dev_flags & IDE_DFLAG_LBA48) { |
675 | int max_s = 2048; | 674 | int max_s = 2048; |
676 | 675 | ||
677 | if (max_s > hwif->rqsize) | 676 | if (max_s > hwif->rqsize) |
678 | max_s = hwif->rqsize; | 677 | max_s = hwif->rqsize; |
679 | 678 | ||
680 | blk_queue_max_hw_sectors(q, max_s); | 679 | blk_queue_max_hw_sectors(q, max_s); |
681 | } | 680 | } |
682 | 681 | ||
683 | printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name, | 682 | printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name, |
684 | queue_max_sectors(q) / 2); | 683 | queue_max_sectors(q) / 2); |
685 | 684 | ||
686 | if (ata_id_is_ssd(id)) | 685 | if (ata_id_is_ssd(id)) |
687 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); | 686 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); |
688 | 687 | ||
689 | /* calculate drive capacity, and select LBA if possible */ | 688 | /* calculate drive capacity, and select LBA if possible */ |
690 | ide_disk_get_capacity(drive); | 689 | ide_disk_get_capacity(drive); |
691 | 690 | ||
692 | /* | 691 | /* |
693 | * if possible, give fdisk access to more of the drive, | 692 | * if possible, give fdisk access to more of the drive, |
694 | * by correcting bios_cyls: | 693 | * by correcting bios_cyls: |
695 | */ | 694 | */ |
696 | capacity = ide_gd_capacity(drive); | 695 | capacity = ide_gd_capacity(drive); |
697 | 696 | ||
698 | if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) { | 697 | if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) { |
699 | if (ata_id_lba48_enabled(drive->id)) { | 698 | if (ata_id_lba48_enabled(drive->id)) { |
700 | /* compatibility */ | 699 | /* compatibility */ |
701 | drive->bios_sect = 63; | 700 | drive->bios_sect = 63; |
702 | drive->bios_head = 255; | 701 | drive->bios_head = 255; |
703 | } | 702 | } |
704 | 703 | ||
705 | if (drive->bios_sect && drive->bios_head) { | 704 | if (drive->bios_sect && drive->bios_head) { |
706 | unsigned int cap0 = capacity; /* truncate to 32 bits */ | 705 | unsigned int cap0 = capacity; /* truncate to 32 bits */ |
707 | unsigned int cylsz, cyl; | 706 | unsigned int cylsz, cyl; |
708 | 707 | ||
709 | if (cap0 != capacity) | 708 | if (cap0 != capacity) |
710 | drive->bios_cyl = 65535; | 709 | drive->bios_cyl = 65535; |
711 | else { | 710 | else { |
712 | cylsz = drive->bios_sect * drive->bios_head; | 711 | cylsz = drive->bios_sect * drive->bios_head; |
713 | cyl = cap0 / cylsz; | 712 | cyl = cap0 / cylsz; |
714 | if (cyl > 65535) | 713 | if (cyl > 65535) |
715 | cyl = 65535; | 714 | cyl = 65535; |
716 | if (cyl > drive->bios_cyl) | 715 | if (cyl > drive->bios_cyl) |
717 | drive->bios_cyl = cyl; | 716 | drive->bios_cyl = cyl; |
718 | } | 717 | } |
719 | } | 718 | } |
720 | } | 719 | } |
721 | printk(KERN_INFO "%s: %llu sectors (%llu MB)", | 720 | printk(KERN_INFO "%s: %llu sectors (%llu MB)", |
722 | drive->name, capacity, sectors_to_MB(capacity)); | 721 | drive->name, capacity, sectors_to_MB(capacity)); |
723 | 722 | ||
724 | /* Only print cache size when it was specified */ | 723 | /* Only print cache size when it was specified */ |
725 | if (id[ATA_ID_BUF_SIZE]) | 724 | if (id[ATA_ID_BUF_SIZE]) |
726 | printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2); | 725 | printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2); |
727 | 726 | ||
728 | printk(KERN_CONT ", CHS=%d/%d/%d\n", | 727 | printk(KERN_CONT ", CHS=%d/%d/%d\n", |
729 | drive->bios_cyl, drive->bios_head, drive->bios_sect); | 728 | drive->bios_cyl, drive->bios_head, drive->bios_sect); |
730 | 729 | ||
731 | /* write cache enabled? */ | 730 | /* write cache enabled? */ |
732 | if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id)) | 731 | if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id)) |
733 | drive->dev_flags |= IDE_DFLAG_WCACHE; | 732 | drive->dev_flags |= IDE_DFLAG_WCACHE; |
734 | 733 | ||
735 | set_wcache(drive, 1); | 734 | set_wcache(drive, 1); |
736 | 735 | ||
737 | if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 && | 736 | if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 && |
738 | (drive->head == 0 || drive->head > 16)) { | 737 | (drive->head == 0 || drive->head > 16)) { |
739 | printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n", | 738 | printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n", |
740 | drive->name, drive->head); | 739 | drive->name, drive->head); |
741 | drive->dev_flags &= ~IDE_DFLAG_ATTACH; | 740 | drive->dev_flags &= ~IDE_DFLAG_ATTACH; |
742 | } else | 741 | } else |
743 | drive->dev_flags |= IDE_DFLAG_ATTACH; | 742 | drive->dev_flags |= IDE_DFLAG_ATTACH; |
744 | } | 743 | } |
745 | 744 | ||
746 | static void ide_disk_flush(ide_drive_t *drive) | 745 | static void ide_disk_flush(ide_drive_t *drive) |
747 | { | 746 | { |
748 | if (ata_id_flush_enabled(drive->id) == 0 || | 747 | if (ata_id_flush_enabled(drive->id) == 0 || |
749 | (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) | 748 | (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) |
750 | return; | 749 | return; |
751 | 750 | ||
752 | if (do_idedisk_flushcache(drive)) | 751 | if (do_idedisk_flushcache(drive)) |
753 | printk(KERN_INFO "%s: wcache flush failed!\n", drive->name); | 752 | printk(KERN_INFO "%s: wcache flush failed!\n", drive->name); |
754 | } | 753 | } |
755 | 754 | ||
756 | static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk) | 755 | static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk) |
757 | { | 756 | { |
758 | return 0; | 757 | return 0; |
759 | } | 758 | } |
760 | 759 | ||
761 | static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk, | 760 | static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk, |
762 | int on) | 761 | int on) |
763 | { | 762 | { |
764 | struct ide_cmd cmd; | 763 | struct ide_cmd cmd; |
765 | int ret; | 764 | int ret; |
766 | 765 | ||
767 | if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) | 766 | if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) |
768 | return 0; | 767 | return 0; |
769 | 768 | ||
770 | memset(&cmd, 0, sizeof(cmd)); | 769 | memset(&cmd, 0, sizeof(cmd)); |
771 | cmd.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; | 770 | cmd.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; |
772 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; | 771 | cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; |
773 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; | 772 | cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; |
774 | 773 | ||
775 | ret = ide_no_data_taskfile(drive, &cmd); | 774 | ret = ide_no_data_taskfile(drive, &cmd); |
776 | 775 | ||
777 | if (ret) | 776 | if (ret) |
778 | drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING; | 777 | drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING; |
779 | 778 | ||
780 | return ret; | 779 | return ret; |
781 | } | 780 | } |
782 | 781 | ||
783 | const struct ide_disk_ops ide_ata_disk_ops = { | 782 | const struct ide_disk_ops ide_ata_disk_ops = { |
784 | .check = ide_disk_check, | 783 | .check = ide_disk_check, |
785 | .unlock_native_capacity = ide_disk_unlock_native_capacity, | 784 | .unlock_native_capacity = ide_disk_unlock_native_capacity, |
786 | .get_capacity = ide_disk_get_capacity, | 785 | .get_capacity = ide_disk_get_capacity, |
787 | .setup = ide_disk_setup, | 786 | .setup = ide_disk_setup, |
788 | .flush = ide_disk_flush, | 787 | .flush = ide_disk_flush, |
789 | .init_media = ide_disk_init_media, | 788 | .init_media = ide_disk_init_media, |
790 | .set_doorlock = ide_disk_set_doorlock, | 789 | .set_doorlock = ide_disk_set_doorlock, |
791 | .do_request = ide_do_rw_disk, | 790 | .do_request = ide_do_rw_disk, |
792 | .ioctl = ide_disk_ioctl, | 791 | .ioctl = ide_disk_ioctl, |
793 | }; | 792 | }; |
794 | 793 |
drivers/md/dm.c
1 | /* | 1 | /* |
2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | 8 | #include "dm.h" |
9 | #include "dm-uevent.h" | 9 | #include "dm-uevent.h" |
10 | 10 | ||
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/mutex.h> | 13 | #include <linux/mutex.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/blkpg.h> | 15 | #include <linux/blkpg.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
18 | #include <linux/smp_lock.h> | 18 | #include <linux/smp_lock.h> |
19 | #include <linux/mempool.h> | 19 | #include <linux/mempool.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/idr.h> | 21 | #include <linux/idr.h> |
22 | #include <linux/hdreg.h> | 22 | #include <linux/hdreg.h> |
23 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
24 | 24 | ||
25 | #include <trace/events/block.h> | 25 | #include <trace/events/block.h> |
26 | 26 | ||
27 | #define DM_MSG_PREFIX "core" | 27 | #define DM_MSG_PREFIX "core" |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Cookies are numeric values sent with CHANGE and REMOVE | 30 | * Cookies are numeric values sent with CHANGE and REMOVE |
31 | * uevents while resuming, removing or renaming the device. | 31 | * uevents while resuming, removing or renaming the device. |
32 | */ | 32 | */ |
33 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" | 33 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" |
34 | #define DM_COOKIE_LENGTH 24 | 34 | #define DM_COOKIE_LENGTH 24 |
35 | 35 | ||
36 | static const char *_name = DM_NAME; | 36 | static const char *_name = DM_NAME; |
37 | 37 | ||
38 | static unsigned int major = 0; | 38 | static unsigned int major = 0; |
39 | static unsigned int _major = 0; | 39 | static unsigned int _major = 0; |
40 | 40 | ||
41 | static DEFINE_SPINLOCK(_minor_lock); | 41 | static DEFINE_SPINLOCK(_minor_lock); |
42 | /* | 42 | /* |
43 | * For bio-based dm. | 43 | * For bio-based dm. |
44 | * One of these is allocated per bio. | 44 | * One of these is allocated per bio. |
45 | */ | 45 | */ |
46 | struct dm_io { | 46 | struct dm_io { |
47 | struct mapped_device *md; | 47 | struct mapped_device *md; |
48 | int error; | 48 | int error; |
49 | atomic_t io_count; | 49 | atomic_t io_count; |
50 | struct bio *bio; | 50 | struct bio *bio; |
51 | unsigned long start_time; | 51 | unsigned long start_time; |
52 | spinlock_t endio_lock; | 52 | spinlock_t endio_lock; |
53 | }; | 53 | }; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * For bio-based dm. | 56 | * For bio-based dm. |
57 | * One of these is allocated per target within a bio. Hopefully | 57 | * One of these is allocated per target within a bio. Hopefully |
58 | * this will be simplified out one day. | 58 | * this will be simplified out one day. |
59 | */ | 59 | */ |
60 | struct dm_target_io { | 60 | struct dm_target_io { |
61 | struct dm_io *io; | 61 | struct dm_io *io; |
62 | struct dm_target *ti; | 62 | struct dm_target *ti; |
63 | union map_info info; | 63 | union map_info info; |
64 | }; | 64 | }; |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * For request-based dm. | 67 | * For request-based dm. |
68 | * One of these is allocated per request. | 68 | * One of these is allocated per request. |
69 | */ | 69 | */ |
70 | struct dm_rq_target_io { | 70 | struct dm_rq_target_io { |
71 | struct mapped_device *md; | 71 | struct mapped_device *md; |
72 | struct dm_target *ti; | 72 | struct dm_target *ti; |
73 | struct request *orig, clone; | 73 | struct request *orig, clone; |
74 | int error; | 74 | int error; |
75 | union map_info info; | 75 | union map_info info; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * For request-based dm. | 79 | * For request-based dm. |
80 | * One of these is allocated per bio. | 80 | * One of these is allocated per bio. |
81 | */ | 81 | */ |
82 | struct dm_rq_clone_bio_info { | 82 | struct dm_rq_clone_bio_info { |
83 | struct bio *orig; | 83 | struct bio *orig; |
84 | struct dm_rq_target_io *tio; | 84 | struct dm_rq_target_io *tio; |
85 | }; | 85 | }; |
86 | 86 | ||
87 | union map_info *dm_get_mapinfo(struct bio *bio) | 87 | union map_info *dm_get_mapinfo(struct bio *bio) |
88 | { | 88 | { |
89 | if (bio && bio->bi_private) | 89 | if (bio && bio->bi_private) |
90 | return &((struct dm_target_io *)bio->bi_private)->info; | 90 | return &((struct dm_target_io *)bio->bi_private)->info; |
91 | return NULL; | 91 | return NULL; |
92 | } | 92 | } |
93 | 93 | ||
94 | union map_info *dm_get_rq_mapinfo(struct request *rq) | 94 | union map_info *dm_get_rq_mapinfo(struct request *rq) |
95 | { | 95 | { |
96 | if (rq && rq->end_io_data) | 96 | if (rq && rq->end_io_data) |
97 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; | 97 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; |
98 | return NULL; | 98 | return NULL; |
99 | } | 99 | } |
100 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | 100 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); |
101 | 101 | ||
102 | #define MINOR_ALLOCED ((void *)-1) | 102 | #define MINOR_ALLOCED ((void *)-1) |
103 | 103 | ||
104 | /* | 104 | /* |
105 | * Bits for the md->flags field. | 105 | * Bits for the md->flags field. |
106 | */ | 106 | */ |
107 | #define DMF_BLOCK_IO_FOR_SUSPEND 0 | 107 | #define DMF_BLOCK_IO_FOR_SUSPEND 0 |
108 | #define DMF_SUSPENDED 1 | 108 | #define DMF_SUSPENDED 1 |
109 | #define DMF_FROZEN 2 | 109 | #define DMF_FROZEN 2 |
110 | #define DMF_FREEING 3 | 110 | #define DMF_FREEING 3 |
111 | #define DMF_DELETING 4 | 111 | #define DMF_DELETING 4 |
112 | #define DMF_NOFLUSH_SUSPENDING 5 | 112 | #define DMF_NOFLUSH_SUSPENDING 5 |
113 | #define DMF_QUEUE_IO_TO_THREAD 6 | 113 | #define DMF_QUEUE_IO_TO_THREAD 6 |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * Work processed by per-device workqueue. | 116 | * Work processed by per-device workqueue. |
117 | */ | 117 | */ |
118 | struct mapped_device { | 118 | struct mapped_device { |
119 | struct rw_semaphore io_lock; | 119 | struct rw_semaphore io_lock; |
120 | struct mutex suspend_lock; | 120 | struct mutex suspend_lock; |
121 | rwlock_t map_lock; | 121 | rwlock_t map_lock; |
122 | atomic_t holders; | 122 | atomic_t holders; |
123 | atomic_t open_count; | 123 | atomic_t open_count; |
124 | 124 | ||
125 | unsigned long flags; | 125 | unsigned long flags; |
126 | 126 | ||
127 | struct request_queue *queue; | 127 | struct request_queue *queue; |
128 | unsigned type; | 128 | unsigned type; |
129 | /* Protect queue and type against concurrent access. */ | 129 | /* Protect queue and type against concurrent access. */ |
130 | struct mutex type_lock; | 130 | struct mutex type_lock; |
131 | 131 | ||
132 | struct gendisk *disk; | 132 | struct gendisk *disk; |
133 | char name[16]; | 133 | char name[16]; |
134 | 134 | ||
135 | void *interface_ptr; | 135 | void *interface_ptr; |
136 | 136 | ||
137 | /* | 137 | /* |
138 | * A list of ios that arrived while we were suspended. | 138 | * A list of ios that arrived while we were suspended. |
139 | */ | 139 | */ |
140 | atomic_t pending[2]; | 140 | atomic_t pending[2]; |
141 | wait_queue_head_t wait; | 141 | wait_queue_head_t wait; |
142 | struct work_struct work; | 142 | struct work_struct work; |
143 | struct bio_list deferred; | 143 | struct bio_list deferred; |
144 | spinlock_t deferred_lock; | 144 | spinlock_t deferred_lock; |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * An error from the barrier request currently being processed. | 147 | * An error from the barrier request currently being processed. |
148 | */ | 148 | */ |
149 | int barrier_error; | 149 | int barrier_error; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Protect barrier_error from concurrent endio processing | 152 | * Protect barrier_error from concurrent endio processing |
153 | * in request-based dm. | 153 | * in request-based dm. |
154 | */ | 154 | */ |
155 | spinlock_t barrier_error_lock; | 155 | spinlock_t barrier_error_lock; |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * Processing queue (flush/barriers) | 158 | * Processing queue (flush/barriers) |
159 | */ | 159 | */ |
160 | struct workqueue_struct *wq; | 160 | struct workqueue_struct *wq; |
161 | struct work_struct barrier_work; | 161 | struct work_struct barrier_work; |
162 | 162 | ||
163 | /* A pointer to the currently processing pre/post flush request */ | 163 | /* A pointer to the currently processing pre/post flush request */ |
164 | struct request *flush_request; | 164 | struct request *flush_request; |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * The current mapping. | 167 | * The current mapping. |
168 | */ | 168 | */ |
169 | struct dm_table *map; | 169 | struct dm_table *map; |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * io objects are allocated from here. | 172 | * io objects are allocated from here. |
173 | */ | 173 | */ |
174 | mempool_t *io_pool; | 174 | mempool_t *io_pool; |
175 | mempool_t *tio_pool; | 175 | mempool_t *tio_pool; |
176 | 176 | ||
177 | struct bio_set *bs; | 177 | struct bio_set *bs; |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Event handling. | 180 | * Event handling. |
181 | */ | 181 | */ |
182 | atomic_t event_nr; | 182 | atomic_t event_nr; |
183 | wait_queue_head_t eventq; | 183 | wait_queue_head_t eventq; |
184 | atomic_t uevent_seq; | 184 | atomic_t uevent_seq; |
185 | struct list_head uevent_list; | 185 | struct list_head uevent_list; |
186 | spinlock_t uevent_lock; /* Protect access to uevent_list */ | 186 | spinlock_t uevent_lock; /* Protect access to uevent_list */ |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * freeze/thaw support require holding onto a super block | 189 | * freeze/thaw support require holding onto a super block |
190 | */ | 190 | */ |
191 | struct super_block *frozen_sb; | 191 | struct super_block *frozen_sb; |
192 | struct block_device *bdev; | 192 | struct block_device *bdev; |
193 | 193 | ||
194 | /* forced geometry settings */ | 194 | /* forced geometry settings */ |
195 | struct hd_geometry geometry; | 195 | struct hd_geometry geometry; |
196 | 196 | ||
197 | /* For saving the address of __make_request for request based dm */ | 197 | /* For saving the address of __make_request for request based dm */ |
198 | make_request_fn *saved_make_request_fn; | 198 | make_request_fn *saved_make_request_fn; |
199 | 199 | ||
200 | /* sysfs handle */ | 200 | /* sysfs handle */ |
201 | struct kobject kobj; | 201 | struct kobject kobj; |
202 | 202 | ||
203 | /* zero-length barrier that will be cloned and submitted to targets */ | 203 | /* zero-length barrier that will be cloned and submitted to targets */ |
204 | struct bio barrier_bio; | 204 | struct bio barrier_bio; |
205 | }; | 205 | }; |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * For mempools pre-allocation at the table loading time. | 208 | * For mempools pre-allocation at the table loading time. |
209 | */ | 209 | */ |
210 | struct dm_md_mempools { | 210 | struct dm_md_mempools { |
211 | mempool_t *io_pool; | 211 | mempool_t *io_pool; |
212 | mempool_t *tio_pool; | 212 | mempool_t *tio_pool; |
213 | struct bio_set *bs; | 213 | struct bio_set *bs; |
214 | }; | 214 | }; |
215 | 215 | ||
216 | #define MIN_IOS 256 | 216 | #define MIN_IOS 256 |
217 | static struct kmem_cache *_io_cache; | 217 | static struct kmem_cache *_io_cache; |
218 | static struct kmem_cache *_tio_cache; | 218 | static struct kmem_cache *_tio_cache; |
219 | static struct kmem_cache *_rq_tio_cache; | 219 | static struct kmem_cache *_rq_tio_cache; |
220 | static struct kmem_cache *_rq_bio_info_cache; | 220 | static struct kmem_cache *_rq_bio_info_cache; |
221 | 221 | ||
222 | static int __init local_init(void) | 222 | static int __init local_init(void) |
223 | { | 223 | { |
224 | int r = -ENOMEM; | 224 | int r = -ENOMEM; |
225 | 225 | ||
226 | /* allocate a slab for the dm_ios */ | 226 | /* allocate a slab for the dm_ios */ |
227 | _io_cache = KMEM_CACHE(dm_io, 0); | 227 | _io_cache = KMEM_CACHE(dm_io, 0); |
228 | if (!_io_cache) | 228 | if (!_io_cache) |
229 | return r; | 229 | return r; |
230 | 230 | ||
231 | /* allocate a slab for the target ios */ | 231 | /* allocate a slab for the target ios */ |
232 | _tio_cache = KMEM_CACHE(dm_target_io, 0); | 232 | _tio_cache = KMEM_CACHE(dm_target_io, 0); |
233 | if (!_tio_cache) | 233 | if (!_tio_cache) |
234 | goto out_free_io_cache; | 234 | goto out_free_io_cache; |
235 | 235 | ||
236 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); | 236 | _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); |
237 | if (!_rq_tio_cache) | 237 | if (!_rq_tio_cache) |
238 | goto out_free_tio_cache; | 238 | goto out_free_tio_cache; |
239 | 239 | ||
240 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); | 240 | _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); |
241 | if (!_rq_bio_info_cache) | 241 | if (!_rq_bio_info_cache) |
242 | goto out_free_rq_tio_cache; | 242 | goto out_free_rq_tio_cache; |
243 | 243 | ||
244 | r = dm_uevent_init(); | 244 | r = dm_uevent_init(); |
245 | if (r) | 245 | if (r) |
246 | goto out_free_rq_bio_info_cache; | 246 | goto out_free_rq_bio_info_cache; |
247 | 247 | ||
248 | _major = major; | 248 | _major = major; |
249 | r = register_blkdev(_major, _name); | 249 | r = register_blkdev(_major, _name); |
250 | if (r < 0) | 250 | if (r < 0) |
251 | goto out_uevent_exit; | 251 | goto out_uevent_exit; |
252 | 252 | ||
253 | if (!_major) | 253 | if (!_major) |
254 | _major = r; | 254 | _major = r; |
255 | 255 | ||
256 | return 0; | 256 | return 0; |
257 | 257 | ||
258 | out_uevent_exit: | 258 | out_uevent_exit: |
259 | dm_uevent_exit(); | 259 | dm_uevent_exit(); |
260 | out_free_rq_bio_info_cache: | 260 | out_free_rq_bio_info_cache: |
261 | kmem_cache_destroy(_rq_bio_info_cache); | 261 | kmem_cache_destroy(_rq_bio_info_cache); |
262 | out_free_rq_tio_cache: | 262 | out_free_rq_tio_cache: |
263 | kmem_cache_destroy(_rq_tio_cache); | 263 | kmem_cache_destroy(_rq_tio_cache); |
264 | out_free_tio_cache: | 264 | out_free_tio_cache: |
265 | kmem_cache_destroy(_tio_cache); | 265 | kmem_cache_destroy(_tio_cache); |
266 | out_free_io_cache: | 266 | out_free_io_cache: |
267 | kmem_cache_destroy(_io_cache); | 267 | kmem_cache_destroy(_io_cache); |
268 | 268 | ||
269 | return r; | 269 | return r; |
270 | } | 270 | } |
271 | 271 | ||
272 | static void local_exit(void) | 272 | static void local_exit(void) |
273 | { | 273 | { |
274 | kmem_cache_destroy(_rq_bio_info_cache); | 274 | kmem_cache_destroy(_rq_bio_info_cache); |
275 | kmem_cache_destroy(_rq_tio_cache); | 275 | kmem_cache_destroy(_rq_tio_cache); |
276 | kmem_cache_destroy(_tio_cache); | 276 | kmem_cache_destroy(_tio_cache); |
277 | kmem_cache_destroy(_io_cache); | 277 | kmem_cache_destroy(_io_cache); |
278 | unregister_blkdev(_major, _name); | 278 | unregister_blkdev(_major, _name); |
279 | dm_uevent_exit(); | 279 | dm_uevent_exit(); |
280 | 280 | ||
281 | _major = 0; | 281 | _major = 0; |
282 | 282 | ||
283 | DMINFO("cleaned up"); | 283 | DMINFO("cleaned up"); |
284 | } | 284 | } |
285 | 285 | ||
286 | static int (*_inits[])(void) __initdata = { | 286 | static int (*_inits[])(void) __initdata = { |
287 | local_init, | 287 | local_init, |
288 | dm_target_init, | 288 | dm_target_init, |
289 | dm_linear_init, | 289 | dm_linear_init, |
290 | dm_stripe_init, | 290 | dm_stripe_init, |
291 | dm_io_init, | 291 | dm_io_init, |
292 | dm_kcopyd_init, | 292 | dm_kcopyd_init, |
293 | dm_interface_init, | 293 | dm_interface_init, |
294 | }; | 294 | }; |
295 | 295 | ||
296 | static void (*_exits[])(void) = { | 296 | static void (*_exits[])(void) = { |
297 | local_exit, | 297 | local_exit, |
298 | dm_target_exit, | 298 | dm_target_exit, |
299 | dm_linear_exit, | 299 | dm_linear_exit, |
300 | dm_stripe_exit, | 300 | dm_stripe_exit, |
301 | dm_io_exit, | 301 | dm_io_exit, |
302 | dm_kcopyd_exit, | 302 | dm_kcopyd_exit, |
303 | dm_interface_exit, | 303 | dm_interface_exit, |
304 | }; | 304 | }; |
305 | 305 | ||
306 | static int __init dm_init(void) | 306 | static int __init dm_init(void) |
307 | { | 307 | { |
308 | const int count = ARRAY_SIZE(_inits); | 308 | const int count = ARRAY_SIZE(_inits); |
309 | 309 | ||
310 | int r, i; | 310 | int r, i; |
311 | 311 | ||
312 | for (i = 0; i < count; i++) { | 312 | for (i = 0; i < count; i++) { |
313 | r = _inits[i](); | 313 | r = _inits[i](); |
314 | if (r) | 314 | if (r) |
315 | goto bad; | 315 | goto bad; |
316 | } | 316 | } |
317 | 317 | ||
318 | return 0; | 318 | return 0; |
319 | 319 | ||
320 | bad: | 320 | bad: |
321 | while (i--) | 321 | while (i--) |
322 | _exits[i](); | 322 | _exits[i](); |
323 | 323 | ||
324 | return r; | 324 | return r; |
325 | } | 325 | } |
326 | 326 | ||
327 | static void __exit dm_exit(void) | 327 | static void __exit dm_exit(void) |
328 | { | 328 | { |
329 | int i = ARRAY_SIZE(_exits); | 329 | int i = ARRAY_SIZE(_exits); |
330 | 330 | ||
331 | while (i--) | 331 | while (i--) |
332 | _exits[i](); | 332 | _exits[i](); |
333 | } | 333 | } |
334 | 334 | ||
335 | /* | 335 | /* |
336 | * Block device functions | 336 | * Block device functions |
337 | */ | 337 | */ |
338 | int dm_deleting_md(struct mapped_device *md) | 338 | int dm_deleting_md(struct mapped_device *md) |
339 | { | 339 | { |
340 | return test_bit(DMF_DELETING, &md->flags); | 340 | return test_bit(DMF_DELETING, &md->flags); |
341 | } | 341 | } |
342 | 342 | ||
343 | static int dm_blk_open(struct block_device *bdev, fmode_t mode) | 343 | static int dm_blk_open(struct block_device *bdev, fmode_t mode) |
344 | { | 344 | { |
345 | struct mapped_device *md; | 345 | struct mapped_device *md; |
346 | 346 | ||
347 | lock_kernel(); | 347 | lock_kernel(); |
348 | spin_lock(&_minor_lock); | 348 | spin_lock(&_minor_lock); |
349 | 349 | ||
350 | md = bdev->bd_disk->private_data; | 350 | md = bdev->bd_disk->private_data; |
351 | if (!md) | 351 | if (!md) |
352 | goto out; | 352 | goto out; |
353 | 353 | ||
354 | if (test_bit(DMF_FREEING, &md->flags) || | 354 | if (test_bit(DMF_FREEING, &md->flags) || |
355 | dm_deleting_md(md)) { | 355 | dm_deleting_md(md)) { |
356 | md = NULL; | 356 | md = NULL; |
357 | goto out; | 357 | goto out; |
358 | } | 358 | } |
359 | 359 | ||
360 | dm_get(md); | 360 | dm_get(md); |
361 | atomic_inc(&md->open_count); | 361 | atomic_inc(&md->open_count); |
362 | 362 | ||
363 | out: | 363 | out: |
364 | spin_unlock(&_minor_lock); | 364 | spin_unlock(&_minor_lock); |
365 | unlock_kernel(); | 365 | unlock_kernel(); |
366 | 366 | ||
367 | return md ? 0 : -ENXIO; | 367 | return md ? 0 : -ENXIO; |
368 | } | 368 | } |
369 | 369 | ||
370 | static int dm_blk_close(struct gendisk *disk, fmode_t mode) | 370 | static int dm_blk_close(struct gendisk *disk, fmode_t mode) |
371 | { | 371 | { |
372 | struct mapped_device *md = disk->private_data; | 372 | struct mapped_device *md = disk->private_data; |
373 | 373 | ||
374 | lock_kernel(); | 374 | lock_kernel(); |
375 | atomic_dec(&md->open_count); | 375 | atomic_dec(&md->open_count); |
376 | dm_put(md); | 376 | dm_put(md); |
377 | unlock_kernel(); | 377 | unlock_kernel(); |
378 | 378 | ||
379 | return 0; | 379 | return 0; |
380 | } | 380 | } |
381 | 381 | ||
382 | int dm_open_count(struct mapped_device *md) | 382 | int dm_open_count(struct mapped_device *md) |
383 | { | 383 | { |
384 | return atomic_read(&md->open_count); | 384 | return atomic_read(&md->open_count); |
385 | } | 385 | } |
386 | 386 | ||
387 | /* | 387 | /* |
388 | * Guarantees nothing is using the device before it's deleted. | 388 | * Guarantees nothing is using the device before it's deleted. |
389 | */ | 389 | */ |
390 | int dm_lock_for_deletion(struct mapped_device *md) | 390 | int dm_lock_for_deletion(struct mapped_device *md) |
391 | { | 391 | { |
392 | int r = 0; | 392 | int r = 0; |
393 | 393 | ||
394 | spin_lock(&_minor_lock); | 394 | spin_lock(&_minor_lock); |
395 | 395 | ||
396 | if (dm_open_count(md)) | 396 | if (dm_open_count(md)) |
397 | r = -EBUSY; | 397 | r = -EBUSY; |
398 | else | 398 | else |
399 | set_bit(DMF_DELETING, &md->flags); | 399 | set_bit(DMF_DELETING, &md->flags); |
400 | 400 | ||
401 | spin_unlock(&_minor_lock); | 401 | spin_unlock(&_minor_lock); |
402 | 402 | ||
403 | return r; | 403 | return r; |
404 | } | 404 | } |
405 | 405 | ||
406 | static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) | 406 | static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
407 | { | 407 | { |
408 | struct mapped_device *md = bdev->bd_disk->private_data; | 408 | struct mapped_device *md = bdev->bd_disk->private_data; |
409 | 409 | ||
410 | return dm_get_geometry(md, geo); | 410 | return dm_get_geometry(md, geo); |
411 | } | 411 | } |
412 | 412 | ||
413 | static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, | 413 | static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, |
414 | unsigned int cmd, unsigned long arg) | 414 | unsigned int cmd, unsigned long arg) |
415 | { | 415 | { |
416 | struct mapped_device *md = bdev->bd_disk->private_data; | 416 | struct mapped_device *md = bdev->bd_disk->private_data; |
417 | struct dm_table *map = dm_get_live_table(md); | 417 | struct dm_table *map = dm_get_live_table(md); |
418 | struct dm_target *tgt; | 418 | struct dm_target *tgt; |
419 | int r = -ENOTTY; | 419 | int r = -ENOTTY; |
420 | 420 | ||
421 | if (!map || !dm_table_get_size(map)) | 421 | if (!map || !dm_table_get_size(map)) |
422 | goto out; | 422 | goto out; |
423 | 423 | ||
424 | /* We only support devices that have a single target */ | 424 | /* We only support devices that have a single target */ |
425 | if (dm_table_get_num_targets(map) != 1) | 425 | if (dm_table_get_num_targets(map) != 1) |
426 | goto out; | 426 | goto out; |
427 | 427 | ||
428 | tgt = dm_table_get_target(map, 0); | 428 | tgt = dm_table_get_target(map, 0); |
429 | 429 | ||
430 | if (dm_suspended_md(md)) { | 430 | if (dm_suspended_md(md)) { |
431 | r = -EAGAIN; | 431 | r = -EAGAIN; |
432 | goto out; | 432 | goto out; |
433 | } | 433 | } |
434 | 434 | ||
435 | if (tgt->type->ioctl) | 435 | if (tgt->type->ioctl) |
436 | r = tgt->type->ioctl(tgt, cmd, arg); | 436 | r = tgt->type->ioctl(tgt, cmd, arg); |
437 | 437 | ||
438 | out: | 438 | out: |
439 | dm_table_put(map); | 439 | dm_table_put(map); |
440 | 440 | ||
441 | return r; | 441 | return r; |
442 | } | 442 | } |
443 | 443 | ||
444 | static struct dm_io *alloc_io(struct mapped_device *md) | 444 | static struct dm_io *alloc_io(struct mapped_device *md) |
445 | { | 445 | { |
446 | return mempool_alloc(md->io_pool, GFP_NOIO); | 446 | return mempool_alloc(md->io_pool, GFP_NOIO); |
447 | } | 447 | } |
448 | 448 | ||
449 | static void free_io(struct mapped_device *md, struct dm_io *io) | 449 | static void free_io(struct mapped_device *md, struct dm_io *io) |
450 | { | 450 | { |
451 | mempool_free(io, md->io_pool); | 451 | mempool_free(io, md->io_pool); |
452 | } | 452 | } |
453 | 453 | ||
454 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | 454 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) |
455 | { | 455 | { |
456 | mempool_free(tio, md->tio_pool); | 456 | mempool_free(tio, md->tio_pool); |
457 | } | 457 | } |
458 | 458 | ||
459 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, | 459 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, |
460 | gfp_t gfp_mask) | 460 | gfp_t gfp_mask) |
461 | { | 461 | { |
462 | return mempool_alloc(md->tio_pool, gfp_mask); | 462 | return mempool_alloc(md->tio_pool, gfp_mask); |
463 | } | 463 | } |
464 | 464 | ||
465 | static void free_rq_tio(struct dm_rq_target_io *tio) | 465 | static void free_rq_tio(struct dm_rq_target_io *tio) |
466 | { | 466 | { |
467 | mempool_free(tio, tio->md->tio_pool); | 467 | mempool_free(tio, tio->md->tio_pool); |
468 | } | 468 | } |
469 | 469 | ||
470 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | 470 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) |
471 | { | 471 | { |
472 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | 472 | return mempool_alloc(md->io_pool, GFP_ATOMIC); |
473 | } | 473 | } |
474 | 474 | ||
475 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | 475 | static void free_bio_info(struct dm_rq_clone_bio_info *info) |
476 | { | 476 | { |
477 | mempool_free(info, info->tio->md->io_pool); | 477 | mempool_free(info, info->tio->md->io_pool); |
478 | } | 478 | } |
479 | 479 | ||
480 | static int md_in_flight(struct mapped_device *md) | 480 | static int md_in_flight(struct mapped_device *md) |
481 | { | 481 | { |
482 | return atomic_read(&md->pending[READ]) + | 482 | return atomic_read(&md->pending[READ]) + |
483 | atomic_read(&md->pending[WRITE]); | 483 | atomic_read(&md->pending[WRITE]); |
484 | } | 484 | } |
485 | 485 | ||
486 | static void start_io_acct(struct dm_io *io) | 486 | static void start_io_acct(struct dm_io *io) |
487 | { | 487 | { |
488 | struct mapped_device *md = io->md; | 488 | struct mapped_device *md = io->md; |
489 | int cpu; | 489 | int cpu; |
490 | int rw = bio_data_dir(io->bio); | 490 | int rw = bio_data_dir(io->bio); |
491 | 491 | ||
492 | io->start_time = jiffies; | 492 | io->start_time = jiffies; |
493 | 493 | ||
494 | cpu = part_stat_lock(); | 494 | cpu = part_stat_lock(); |
495 | part_round_stats(cpu, &dm_disk(md)->part0); | 495 | part_round_stats(cpu, &dm_disk(md)->part0); |
496 | part_stat_unlock(); | 496 | part_stat_unlock(); |
497 | dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); | 497 | dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); |
498 | } | 498 | } |
499 | 499 | ||
500 | static void end_io_acct(struct dm_io *io) | 500 | static void end_io_acct(struct dm_io *io) |
501 | { | 501 | { |
502 | struct mapped_device *md = io->md; | 502 | struct mapped_device *md = io->md; |
503 | struct bio *bio = io->bio; | 503 | struct bio *bio = io->bio; |
504 | unsigned long duration = jiffies - io->start_time; | 504 | unsigned long duration = jiffies - io->start_time; |
505 | int pending, cpu; | 505 | int pending, cpu; |
506 | int rw = bio_data_dir(bio); | 506 | int rw = bio_data_dir(bio); |
507 | 507 | ||
508 | cpu = part_stat_lock(); | 508 | cpu = part_stat_lock(); |
509 | part_round_stats(cpu, &dm_disk(md)->part0); | 509 | part_round_stats(cpu, &dm_disk(md)->part0); |
510 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); | 510 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); |
511 | part_stat_unlock(); | 511 | part_stat_unlock(); |
512 | 512 | ||
513 | /* | 513 | /* |
514 | * After this is decremented the bio must not be touched if it is | 514 | * After this is decremented the bio must not be touched if it is |
515 | * a barrier. | 515 | * a barrier. |
516 | */ | 516 | */ |
517 | dm_disk(md)->part0.in_flight[rw] = pending = | 517 | dm_disk(md)->part0.in_flight[rw] = pending = |
518 | atomic_dec_return(&md->pending[rw]); | 518 | atomic_dec_return(&md->pending[rw]); |
519 | pending += atomic_read(&md->pending[rw^0x1]); | 519 | pending += atomic_read(&md->pending[rw^0x1]); |
520 | 520 | ||
521 | /* nudge anyone waiting on suspend queue */ | 521 | /* nudge anyone waiting on suspend queue */ |
522 | if (!pending) | 522 | if (!pending) |
523 | wake_up(&md->wait); | 523 | wake_up(&md->wait); |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * Add the bio to the list of deferred io. | 527 | * Add the bio to the list of deferred io. |
528 | */ | 528 | */ |
529 | static void queue_io(struct mapped_device *md, struct bio *bio) | 529 | static void queue_io(struct mapped_device *md, struct bio *bio) |
530 | { | 530 | { |
531 | down_write(&md->io_lock); | 531 | down_write(&md->io_lock); |
532 | 532 | ||
533 | spin_lock_irq(&md->deferred_lock); | 533 | spin_lock_irq(&md->deferred_lock); |
534 | bio_list_add(&md->deferred, bio); | 534 | bio_list_add(&md->deferred, bio); |
535 | spin_unlock_irq(&md->deferred_lock); | 535 | spin_unlock_irq(&md->deferred_lock); |
536 | 536 | ||
537 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | 537 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) |
538 | queue_work(md->wq, &md->work); | 538 | queue_work(md->wq, &md->work); |
539 | 539 | ||
540 | up_write(&md->io_lock); | 540 | up_write(&md->io_lock); |
541 | } | 541 | } |
542 | 542 | ||
543 | /* | 543 | /* |
544 | * Everyone (including functions in this file), should use this | 544 | * Everyone (including functions in this file), should use this |
545 | * function to access the md->map field, and make sure they call | 545 | * function to access the md->map field, and make sure they call |
546 | * dm_table_put() when finished. | 546 | * dm_table_put() when finished. |
547 | */ | 547 | */ |
548 | struct dm_table *dm_get_live_table(struct mapped_device *md) | 548 | struct dm_table *dm_get_live_table(struct mapped_device *md) |
549 | { | 549 | { |
550 | struct dm_table *t; | 550 | struct dm_table *t; |
551 | unsigned long flags; | 551 | unsigned long flags; |
552 | 552 | ||
553 | read_lock_irqsave(&md->map_lock, flags); | 553 | read_lock_irqsave(&md->map_lock, flags); |
554 | t = md->map; | 554 | t = md->map; |
555 | if (t) | 555 | if (t) |
556 | dm_table_get(t); | 556 | dm_table_get(t); |
557 | read_unlock_irqrestore(&md->map_lock, flags); | 557 | read_unlock_irqrestore(&md->map_lock, flags); |
558 | 558 | ||
559 | return t; | 559 | return t; |
560 | } | 560 | } |
561 | 561 | ||
562 | /* | 562 | /* |
563 | * Get the geometry associated with a dm device | 563 | * Get the geometry associated with a dm device |
564 | */ | 564 | */ |
565 | int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) | 565 | int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) |
566 | { | 566 | { |
567 | *geo = md->geometry; | 567 | *geo = md->geometry; |
568 | 568 | ||
569 | return 0; | 569 | return 0; |
570 | } | 570 | } |
571 | 571 | ||
572 | /* | 572 | /* |
573 | * Set the geometry of a device. | 573 | * Set the geometry of a device. |
574 | */ | 574 | */ |
575 | int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) | 575 | int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) |
576 | { | 576 | { |
577 | sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; | 577 | sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; |
578 | 578 | ||
579 | if (geo->start > sz) { | 579 | if (geo->start > sz) { |
580 | DMWARN("Start sector is beyond the geometry limits."); | 580 | DMWARN("Start sector is beyond the geometry limits."); |
581 | return -EINVAL; | 581 | return -EINVAL; |
582 | } | 582 | } |
583 | 583 | ||
584 | md->geometry = *geo; | 584 | md->geometry = *geo; |
585 | 585 | ||
586 | return 0; | 586 | return 0; |
587 | } | 587 | } |
588 | 588 | ||
589 | /*----------------------------------------------------------------- | 589 | /*----------------------------------------------------------------- |
590 | * CRUD START: | 590 | * CRUD START: |
591 | * A more elegant soln is in the works that uses the queue | 591 | * A more elegant soln is in the works that uses the queue |
592 | * merge fn, unfortunately there are a couple of changes to | 592 | * merge fn, unfortunately there are a couple of changes to |
593 | * the block layer that I want to make for this. So in the | 593 | * the block layer that I want to make for this. So in the |
594 | * interests of getting something for people to use I give | 594 | * interests of getting something for people to use I give |
595 | * you this clearly demarcated crap. | 595 | * you this clearly demarcated crap. |
596 | *---------------------------------------------------------------*/ | 596 | *---------------------------------------------------------------*/ |
597 | 597 | ||
598 | static int __noflush_suspending(struct mapped_device *md) | 598 | static int __noflush_suspending(struct mapped_device *md) |
599 | { | 599 | { |
600 | return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 600 | return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
601 | } | 601 | } |
602 | 602 | ||
603 | /* | 603 | /* |
604 | * Decrements the number of outstanding ios that a bio has been | 604 | * Decrements the number of outstanding ios that a bio has been |
605 | * cloned into, completing the original io if necc. | 605 | * cloned into, completing the original io if necc. |
606 | */ | 606 | */ |
607 | static void dec_pending(struct dm_io *io, int error) | 607 | static void dec_pending(struct dm_io *io, int error) |
608 | { | 608 | { |
609 | unsigned long flags; | 609 | unsigned long flags; |
610 | int io_error; | 610 | int io_error; |
611 | struct bio *bio; | 611 | struct bio *bio; |
612 | struct mapped_device *md = io->md; | 612 | struct mapped_device *md = io->md; |
613 | 613 | ||
614 | /* Push-back supersedes any I/O errors */ | 614 | /* Push-back supersedes any I/O errors */ |
615 | if (unlikely(error)) { | 615 | if (unlikely(error)) { |
616 | spin_lock_irqsave(&io->endio_lock, flags); | 616 | spin_lock_irqsave(&io->endio_lock, flags); |
617 | if (!(io->error > 0 && __noflush_suspending(md))) | 617 | if (!(io->error > 0 && __noflush_suspending(md))) |
618 | io->error = error; | 618 | io->error = error; |
619 | spin_unlock_irqrestore(&io->endio_lock, flags); | 619 | spin_unlock_irqrestore(&io->endio_lock, flags); |
620 | } | 620 | } |
621 | 621 | ||
622 | if (atomic_dec_and_test(&io->io_count)) { | 622 | if (atomic_dec_and_test(&io->io_count)) { |
623 | if (io->error == DM_ENDIO_REQUEUE) { | 623 | if (io->error == DM_ENDIO_REQUEUE) { |
624 | /* | 624 | /* |
625 | * Target requested pushing back the I/O. | 625 | * Target requested pushing back the I/O. |
626 | */ | 626 | */ |
627 | spin_lock_irqsave(&md->deferred_lock, flags); | 627 | spin_lock_irqsave(&md->deferred_lock, flags); |
628 | if (__noflush_suspending(md)) { | 628 | if (__noflush_suspending(md)) { |
629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) |
630 | bio_list_add_head(&md->deferred, | 630 | bio_list_add_head(&md->deferred, |
631 | io->bio); | 631 | io->bio); |
632 | } else | 632 | } else |
633 | /* noflush suspend was interrupted. */ | 633 | /* noflush suspend was interrupted. */ |
634 | io->error = -EIO; | 634 | io->error = -EIO; |
635 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 635 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
636 | } | 636 | } |
637 | 637 | ||
638 | io_error = io->error; | 638 | io_error = io->error; |
639 | bio = io->bio; | 639 | bio = io->bio; |
640 | 640 | ||
641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 641 | if (bio->bi_rw & REQ_HARDBARRIER) { |
642 | /* | 642 | /* |
643 | * There can be just one barrier request so we use | 643 | * There can be just one barrier request so we use |
644 | * a per-device variable for error reporting. | 644 | * a per-device variable for error reporting. |
645 | * Note that you can't touch the bio after end_io_acct | 645 | * Note that you can't touch the bio after end_io_acct |
646 | * | 646 | * |
647 | * We ignore -EOPNOTSUPP for empty flush reported by | 647 | * We ignore -EOPNOTSUPP for empty flush reported by |
648 | * underlying devices. We assume that if the device | 648 | * underlying devices. We assume that if the device |
649 | * doesn't support empty barriers, it doesn't need | 649 | * doesn't support empty barriers, it doesn't need |
650 | * cache flushing commands. | 650 | * cache flushing commands. |
651 | */ | 651 | */ |
652 | if (!md->barrier_error && | 652 | if (!md->barrier_error && |
653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) |
654 | md->barrier_error = io_error; | 654 | md->barrier_error = io_error; |
655 | end_io_acct(io); | 655 | end_io_acct(io); |
656 | free_io(md, io); | 656 | free_io(md, io); |
657 | } else { | 657 | } else { |
658 | end_io_acct(io); | 658 | end_io_acct(io); |
659 | free_io(md, io); | 659 | free_io(md, io); |
660 | 660 | ||
661 | if (io_error != DM_ENDIO_REQUEUE) { | 661 | if (io_error != DM_ENDIO_REQUEUE) { |
662 | trace_block_bio_complete(md->queue, bio); | 662 | trace_block_bio_complete(md->queue, bio); |
663 | 663 | ||
664 | bio_endio(bio, io_error); | 664 | bio_endio(bio, io_error); |
665 | } | 665 | } |
666 | } | 666 | } |
667 | } | 667 | } |
668 | } | 668 | } |
669 | 669 | ||
670 | static void clone_endio(struct bio *bio, int error) | 670 | static void clone_endio(struct bio *bio, int error) |
671 | { | 671 | { |
672 | int r = 0; | 672 | int r = 0; |
673 | struct dm_target_io *tio = bio->bi_private; | 673 | struct dm_target_io *tio = bio->bi_private; |
674 | struct dm_io *io = tio->io; | 674 | struct dm_io *io = tio->io; |
675 | struct mapped_device *md = tio->io->md; | 675 | struct mapped_device *md = tio->io->md; |
676 | dm_endio_fn endio = tio->ti->type->end_io; | 676 | dm_endio_fn endio = tio->ti->type->end_io; |
677 | 677 | ||
678 | if (!bio_flagged(bio, BIO_UPTODATE) && !error) | 678 | if (!bio_flagged(bio, BIO_UPTODATE) && !error) |
679 | error = -EIO; | 679 | error = -EIO; |
680 | 680 | ||
681 | if (endio) { | 681 | if (endio) { |
682 | r = endio(tio->ti, bio, error, &tio->info); | 682 | r = endio(tio->ti, bio, error, &tio->info); |
683 | if (r < 0 || r == DM_ENDIO_REQUEUE) | 683 | if (r < 0 || r == DM_ENDIO_REQUEUE) |
684 | /* | 684 | /* |
685 | * error and requeue request are handled | 685 | * error and requeue request are handled |
686 | * in dec_pending(). | 686 | * in dec_pending(). |
687 | */ | 687 | */ |
688 | error = r; | 688 | error = r; |
689 | else if (r == DM_ENDIO_INCOMPLETE) | 689 | else if (r == DM_ENDIO_INCOMPLETE) |
690 | /* The target will handle the io */ | 690 | /* The target will handle the io */ |
691 | return; | 691 | return; |
692 | else if (r) { | 692 | else if (r) { |
693 | DMWARN("unimplemented target endio return value: %d", r); | 693 | DMWARN("unimplemented target endio return value: %d", r); |
694 | BUG(); | 694 | BUG(); |
695 | } | 695 | } |
696 | } | 696 | } |
697 | 697 | ||
698 | /* | 698 | /* |
699 | * Store md for cleanup instead of tio which is about to get freed. | 699 | * Store md for cleanup instead of tio which is about to get freed. |
700 | */ | 700 | */ |
701 | bio->bi_private = md->bs; | 701 | bio->bi_private = md->bs; |
702 | 702 | ||
703 | free_tio(md, tio); | 703 | free_tio(md, tio); |
704 | bio_put(bio); | 704 | bio_put(bio); |
705 | dec_pending(io, error); | 705 | dec_pending(io, error); |
706 | } | 706 | } |
707 | 707 | ||
708 | /* | 708 | /* |
709 | * Partial completion handling for request-based dm | 709 | * Partial completion handling for request-based dm |
710 | */ | 710 | */ |
711 | static void end_clone_bio(struct bio *clone, int error) | 711 | static void end_clone_bio(struct bio *clone, int error) |
712 | { | 712 | { |
713 | struct dm_rq_clone_bio_info *info = clone->bi_private; | 713 | struct dm_rq_clone_bio_info *info = clone->bi_private; |
714 | struct dm_rq_target_io *tio = info->tio; | 714 | struct dm_rq_target_io *tio = info->tio; |
715 | struct bio *bio = info->orig; | 715 | struct bio *bio = info->orig; |
716 | unsigned int nr_bytes = info->orig->bi_size; | 716 | unsigned int nr_bytes = info->orig->bi_size; |
717 | 717 | ||
718 | bio_put(clone); | 718 | bio_put(clone); |
719 | 719 | ||
720 | if (tio->error) | 720 | if (tio->error) |
721 | /* | 721 | /* |
722 | * An error has already been detected on the request. | 722 | * An error has already been detected on the request. |
723 | * Once error occurred, just let clone->end_io() handle | 723 | * Once error occurred, just let clone->end_io() handle |
724 | * the remainder. | 724 | * the remainder. |
725 | */ | 725 | */ |
726 | return; | 726 | return; |
727 | else if (error) { | 727 | else if (error) { |
728 | /* | 728 | /* |
729 | * Don't notice the error to the upper layer yet. | 729 | * Don't notice the error to the upper layer yet. |
730 | * The error handling decision is made by the target driver, | 730 | * The error handling decision is made by the target driver, |
731 | * when the request is completed. | 731 | * when the request is completed. |
732 | */ | 732 | */ |
733 | tio->error = error; | 733 | tio->error = error; |
734 | return; | 734 | return; |
735 | } | 735 | } |
736 | 736 | ||
737 | /* | 737 | /* |
738 | * I/O for the bio successfully completed. | 738 | * I/O for the bio successfully completed. |
739 | * Notice the data completion to the upper layer. | 739 | * Notice the data completion to the upper layer. |
740 | */ | 740 | */ |
741 | 741 | ||
742 | /* | 742 | /* |
743 | * bios are processed from the head of the list. | 743 | * bios are processed from the head of the list. |
744 | * So the completing bio should always be rq->bio. | 744 | * So the completing bio should always be rq->bio. |
745 | * If it's not, something wrong is happening. | 745 | * If it's not, something wrong is happening. |
746 | */ | 746 | */ |
747 | if (tio->orig->bio != bio) | 747 | if (tio->orig->bio != bio) |
748 | DMERR("bio completion is going in the middle of the request"); | 748 | DMERR("bio completion is going in the middle of the request"); |
749 | 749 | ||
750 | /* | 750 | /* |
751 | * Update the original request. | 751 | * Update the original request. |
752 | * Do not use blk_end_request() here, because it may complete | 752 | * Do not use blk_end_request() here, because it may complete |
753 | * the original request before the clone, and break the ordering. | 753 | * the original request before the clone, and break the ordering. |
754 | */ | 754 | */ |
755 | blk_update_request(tio->orig, 0, nr_bytes); | 755 | blk_update_request(tio->orig, 0, nr_bytes); |
756 | } | 756 | } |
757 | 757 | ||
758 | static void store_barrier_error(struct mapped_device *md, int error) | 758 | static void store_barrier_error(struct mapped_device *md, int error) |
759 | { | 759 | { |
760 | unsigned long flags; | 760 | unsigned long flags; |
761 | 761 | ||
762 | spin_lock_irqsave(&md->barrier_error_lock, flags); | 762 | spin_lock_irqsave(&md->barrier_error_lock, flags); |
763 | /* | 763 | /* |
764 | * Basically, the first error is taken, but: | 764 | * Basically, the first error is taken, but: |
765 | * -EOPNOTSUPP supersedes any I/O error. | 765 | * -EOPNOTSUPP supersedes any I/O error. |
766 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | 766 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. |
767 | */ | 767 | */ |
768 | if (!md->barrier_error || error == -EOPNOTSUPP || | 768 | if (!md->barrier_error || error == -EOPNOTSUPP || |
769 | (md->barrier_error != -EOPNOTSUPP && | 769 | (md->barrier_error != -EOPNOTSUPP && |
770 | error == DM_ENDIO_REQUEUE)) | 770 | error == DM_ENDIO_REQUEUE)) |
771 | md->barrier_error = error; | 771 | md->barrier_error = error; |
772 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | 772 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); |
773 | } | 773 | } |
774 | 774 | ||
775 | /* | 775 | /* |
776 | * Don't touch any member of the md after calling this function because | 776 | * Don't touch any member of the md after calling this function because |
777 | * the md may be freed in dm_put() at the end of this function. | 777 | * the md may be freed in dm_put() at the end of this function. |
778 | * Or do dm_get() before calling this function and dm_put() later. | 778 | * Or do dm_get() before calling this function and dm_put() later. |
779 | */ | 779 | */ |
780 | static void rq_completed(struct mapped_device *md, int rw, int run_queue) | 780 | static void rq_completed(struct mapped_device *md, int rw, int run_queue) |
781 | { | 781 | { |
782 | atomic_dec(&md->pending[rw]); | 782 | atomic_dec(&md->pending[rw]); |
783 | 783 | ||
784 | /* nudge anyone waiting on suspend queue */ | 784 | /* nudge anyone waiting on suspend queue */ |
785 | if (!md_in_flight(md)) | 785 | if (!md_in_flight(md)) |
786 | wake_up(&md->wait); | 786 | wake_up(&md->wait); |
787 | 787 | ||
788 | if (run_queue) | 788 | if (run_queue) |
789 | blk_run_queue(md->queue); | 789 | blk_run_queue(md->queue); |
790 | 790 | ||
791 | /* | 791 | /* |
792 | * dm_put() must be at the end of this function. See the comment above | 792 | * dm_put() must be at the end of this function. See the comment above |
793 | */ | 793 | */ |
794 | dm_put(md); | 794 | dm_put(md); |
795 | } | 795 | } |
796 | 796 | ||
797 | static void free_rq_clone(struct request *clone) | 797 | static void free_rq_clone(struct request *clone) |
798 | { | 798 | { |
799 | struct dm_rq_target_io *tio = clone->end_io_data; | 799 | struct dm_rq_target_io *tio = clone->end_io_data; |
800 | 800 | ||
801 | blk_rq_unprep_clone(clone); | 801 | blk_rq_unprep_clone(clone); |
802 | free_rq_tio(tio); | 802 | free_rq_tio(tio); |
803 | } | 803 | } |
804 | 804 | ||
805 | /* | 805 | /* |
806 | * Complete the clone and the original request. | 806 | * Complete the clone and the original request. |
807 | * Must be called without queue lock. | 807 | * Must be called without queue lock. |
808 | */ | 808 | */ |
809 | static void dm_end_request(struct request *clone, int error) | 809 | static void dm_end_request(struct request *clone, int error) |
810 | { | 810 | { |
811 | int rw = rq_data_dir(clone); | 811 | int rw = rq_data_dir(clone); |
812 | int run_queue = 1; | 812 | int run_queue = 1; |
813 | bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; | 813 | bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; |
814 | struct dm_rq_target_io *tio = clone->end_io_data; | 814 | struct dm_rq_target_io *tio = clone->end_io_data; |
815 | struct mapped_device *md = tio->md; | 815 | struct mapped_device *md = tio->md; |
816 | struct request *rq = tio->orig; | 816 | struct request *rq = tio->orig; |
817 | 817 | ||
818 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { | 818 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { |
819 | rq->errors = clone->errors; | 819 | rq->errors = clone->errors; |
820 | rq->resid_len = clone->resid_len; | 820 | rq->resid_len = clone->resid_len; |
821 | 821 | ||
822 | if (rq->sense) | 822 | if (rq->sense) |
823 | /* | 823 | /* |
824 | * We are using the sense buffer of the original | 824 | * We are using the sense buffer of the original |
825 | * request. | 825 | * request. |
826 | * So setting the length of the sense data is enough. | 826 | * So setting the length of the sense data is enough. |
827 | */ | 827 | */ |
828 | rq->sense_len = clone->sense_len; | 828 | rq->sense_len = clone->sense_len; |
829 | } | 829 | } |
830 | 830 | ||
831 | free_rq_clone(clone); | 831 | free_rq_clone(clone); |
832 | 832 | ||
833 | if (unlikely(is_barrier)) { | 833 | if (unlikely(is_barrier)) { |
834 | if (unlikely(error)) | 834 | if (unlikely(error)) |
835 | store_barrier_error(md, error); | 835 | store_barrier_error(md, error); |
836 | run_queue = 0; | 836 | run_queue = 0; |
837 | } else | 837 | } else |
838 | blk_end_request_all(rq, error); | 838 | blk_end_request_all(rq, error); |
839 | 839 | ||
840 | rq_completed(md, rw, run_queue); | 840 | rq_completed(md, rw, run_queue); |
841 | } | 841 | } |
842 | 842 | ||
843 | static void dm_unprep_request(struct request *rq) | 843 | static void dm_unprep_request(struct request *rq) |
844 | { | 844 | { |
845 | struct request *clone = rq->special; | 845 | struct request *clone = rq->special; |
846 | 846 | ||
847 | rq->special = NULL; | 847 | rq->special = NULL; |
848 | rq->cmd_flags &= ~REQ_DONTPREP; | 848 | rq->cmd_flags &= ~REQ_DONTPREP; |
849 | 849 | ||
850 | free_rq_clone(clone); | 850 | free_rq_clone(clone); |
851 | } | 851 | } |
852 | 852 | ||
853 | /* | 853 | /* |
854 | * Requeue the original request of a clone. | 854 | * Requeue the original request of a clone. |
855 | */ | 855 | */ |
856 | void dm_requeue_unmapped_request(struct request *clone) | 856 | void dm_requeue_unmapped_request(struct request *clone) |
857 | { | 857 | { |
858 | int rw = rq_data_dir(clone); | 858 | int rw = rq_data_dir(clone); |
859 | struct dm_rq_target_io *tio = clone->end_io_data; | 859 | struct dm_rq_target_io *tio = clone->end_io_data; |
860 | struct mapped_device *md = tio->md; | 860 | struct mapped_device *md = tio->md; |
861 | struct request *rq = tio->orig; | 861 | struct request *rq = tio->orig; |
862 | struct request_queue *q = rq->q; | 862 | struct request_queue *q = rq->q; |
863 | unsigned long flags; | 863 | unsigned long flags; |
864 | 864 | ||
865 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | 865 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { |
866 | /* | 866 | /* |
867 | * Barrier clones share an original request. | 867 | * Barrier clones share an original request. |
868 | * Leave it to dm_end_request(), which handles this special | 868 | * Leave it to dm_end_request(), which handles this special |
869 | * case. | 869 | * case. |
870 | */ | 870 | */ |
871 | dm_end_request(clone, DM_ENDIO_REQUEUE); | 871 | dm_end_request(clone, DM_ENDIO_REQUEUE); |
872 | return; | 872 | return; |
873 | } | 873 | } |
874 | 874 | ||
875 | dm_unprep_request(rq); | 875 | dm_unprep_request(rq); |
876 | 876 | ||
877 | spin_lock_irqsave(q->queue_lock, flags); | 877 | spin_lock_irqsave(q->queue_lock, flags); |
878 | if (elv_queue_empty(q)) | 878 | if (elv_queue_empty(q)) |
879 | blk_plug_device(q); | 879 | blk_plug_device(q); |
880 | blk_requeue_request(q, rq); | 880 | blk_requeue_request(q, rq); |
881 | spin_unlock_irqrestore(q->queue_lock, flags); | 881 | spin_unlock_irqrestore(q->queue_lock, flags); |
882 | 882 | ||
883 | rq_completed(md, rw, 0); | 883 | rq_completed(md, rw, 0); |
884 | } | 884 | } |
885 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); | 885 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); |
886 | 886 | ||
887 | static void __stop_queue(struct request_queue *q) | 887 | static void __stop_queue(struct request_queue *q) |
888 | { | 888 | { |
889 | blk_stop_queue(q); | 889 | blk_stop_queue(q); |
890 | } | 890 | } |
891 | 891 | ||
892 | static void stop_queue(struct request_queue *q) | 892 | static void stop_queue(struct request_queue *q) |
893 | { | 893 | { |
894 | unsigned long flags; | 894 | unsigned long flags; |
895 | 895 | ||
896 | spin_lock_irqsave(q->queue_lock, flags); | 896 | spin_lock_irqsave(q->queue_lock, flags); |
897 | __stop_queue(q); | 897 | __stop_queue(q); |
898 | spin_unlock_irqrestore(q->queue_lock, flags); | 898 | spin_unlock_irqrestore(q->queue_lock, flags); |
899 | } | 899 | } |
900 | 900 | ||
901 | static void __start_queue(struct request_queue *q) | 901 | static void __start_queue(struct request_queue *q) |
902 | { | 902 | { |
903 | if (blk_queue_stopped(q)) | 903 | if (blk_queue_stopped(q)) |
904 | blk_start_queue(q); | 904 | blk_start_queue(q); |
905 | } | 905 | } |
906 | 906 | ||
907 | static void start_queue(struct request_queue *q) | 907 | static void start_queue(struct request_queue *q) |
908 | { | 908 | { |
909 | unsigned long flags; | 909 | unsigned long flags; |
910 | 910 | ||
911 | spin_lock_irqsave(q->queue_lock, flags); | 911 | spin_lock_irqsave(q->queue_lock, flags); |
912 | __start_queue(q); | 912 | __start_queue(q); |
913 | spin_unlock_irqrestore(q->queue_lock, flags); | 913 | spin_unlock_irqrestore(q->queue_lock, flags); |
914 | } | 914 | } |
915 | 915 | ||
916 | static void dm_done(struct request *clone, int error, bool mapped) | 916 | static void dm_done(struct request *clone, int error, bool mapped) |
917 | { | 917 | { |
918 | int r = error; | 918 | int r = error; |
919 | struct dm_rq_target_io *tio = clone->end_io_data; | 919 | struct dm_rq_target_io *tio = clone->end_io_data; |
920 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | 920 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; |
921 | 921 | ||
922 | if (mapped && rq_end_io) | 922 | if (mapped && rq_end_io) |
923 | r = rq_end_io(tio->ti, clone, error, &tio->info); | 923 | r = rq_end_io(tio->ti, clone, error, &tio->info); |
924 | 924 | ||
925 | if (r <= 0) | 925 | if (r <= 0) |
926 | /* The target wants to complete the I/O */ | 926 | /* The target wants to complete the I/O */ |
927 | dm_end_request(clone, r); | 927 | dm_end_request(clone, r); |
928 | else if (r == DM_ENDIO_INCOMPLETE) | 928 | else if (r == DM_ENDIO_INCOMPLETE) |
929 | /* The target will handle the I/O */ | 929 | /* The target will handle the I/O */ |
930 | return; | 930 | return; |
931 | else if (r == DM_ENDIO_REQUEUE) | 931 | else if (r == DM_ENDIO_REQUEUE) |
932 | /* The target wants to requeue the I/O */ | 932 | /* The target wants to requeue the I/O */ |
933 | dm_requeue_unmapped_request(clone); | 933 | dm_requeue_unmapped_request(clone); |
934 | else { | 934 | else { |
935 | DMWARN("unimplemented target endio return value: %d", r); | 935 | DMWARN("unimplemented target endio return value: %d", r); |
936 | BUG(); | 936 | BUG(); |
937 | } | 937 | } |
938 | } | 938 | } |
939 | 939 | ||
940 | /* | 940 | /* |
941 | * Request completion handler for request-based dm | 941 | * Request completion handler for request-based dm |
942 | */ | 942 | */ |
943 | static void dm_softirq_done(struct request *rq) | 943 | static void dm_softirq_done(struct request *rq) |
944 | { | 944 | { |
945 | bool mapped = true; | 945 | bool mapped = true; |
946 | struct request *clone = rq->completion_data; | 946 | struct request *clone = rq->completion_data; |
947 | struct dm_rq_target_io *tio = clone->end_io_data; | 947 | struct dm_rq_target_io *tio = clone->end_io_data; |
948 | 948 | ||
949 | if (rq->cmd_flags & REQ_FAILED) | 949 | if (rq->cmd_flags & REQ_FAILED) |
950 | mapped = false; | 950 | mapped = false; |
951 | 951 | ||
952 | dm_done(clone, tio->error, mapped); | 952 | dm_done(clone, tio->error, mapped); |
953 | } | 953 | } |
954 | 954 | ||
955 | /* | 955 | /* |
956 | * Complete the clone and the original request with the error status | 956 | * Complete the clone and the original request with the error status |
957 | * through softirq context. | 957 | * through softirq context. |
958 | */ | 958 | */ |
959 | static void dm_complete_request(struct request *clone, int error) | 959 | static void dm_complete_request(struct request *clone, int error) |
960 | { | 960 | { |
961 | struct dm_rq_target_io *tio = clone->end_io_data; | 961 | struct dm_rq_target_io *tio = clone->end_io_data; |
962 | struct request *rq = tio->orig; | 962 | struct request *rq = tio->orig; |
963 | 963 | ||
964 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | 964 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { |
965 | /* | 965 | /* |
966 | * Barrier clones share an original request. So can't use | 966 | * Barrier clones share an original request. So can't use |
967 | * softirq_done with the original. | 967 | * softirq_done with the original. |
968 | * Pass the clone to dm_done() directly in this special case. | 968 | * Pass the clone to dm_done() directly in this special case. |
969 | * It is safe (even if clone->q->queue_lock is held here) | 969 | * It is safe (even if clone->q->queue_lock is held here) |
970 | * because there is no I/O dispatching during the completion | 970 | * because there is no I/O dispatching during the completion |
971 | * of barrier clone. | 971 | * of barrier clone. |
972 | */ | 972 | */ |
973 | dm_done(clone, error, true); | 973 | dm_done(clone, error, true); |
974 | return; | 974 | return; |
975 | } | 975 | } |
976 | 976 | ||
977 | tio->error = error; | 977 | tio->error = error; |
978 | rq->completion_data = clone; | 978 | rq->completion_data = clone; |
979 | blk_complete_request(rq); | 979 | blk_complete_request(rq); |
980 | } | 980 | } |
981 | 981 | ||
982 | /* | 982 | /* |
983 | * Complete the not-mapped clone and the original request with the error status | 983 | * Complete the not-mapped clone and the original request with the error status |
984 | * through softirq context. | 984 | * through softirq context. |
985 | * Target's rq_end_io() function isn't called. | 985 | * Target's rq_end_io() function isn't called. |
986 | * This may be used when the target's map_rq() function fails. | 986 | * This may be used when the target's map_rq() function fails. |
987 | */ | 987 | */ |
988 | void dm_kill_unmapped_request(struct request *clone, int error) | 988 | void dm_kill_unmapped_request(struct request *clone, int error) |
989 | { | 989 | { |
990 | struct dm_rq_target_io *tio = clone->end_io_data; | 990 | struct dm_rq_target_io *tio = clone->end_io_data; |
991 | struct request *rq = tio->orig; | 991 | struct request *rq = tio->orig; |
992 | 992 | ||
993 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | 993 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { |
994 | /* | 994 | /* |
995 | * Barrier clones share an original request. | 995 | * Barrier clones share an original request. |
996 | * Leave it to dm_end_request(), which handles this special | 996 | * Leave it to dm_end_request(), which handles this special |
997 | * case. | 997 | * case. |
998 | */ | 998 | */ |
999 | BUG_ON(error > 0); | 999 | BUG_ON(error > 0); |
1000 | dm_end_request(clone, error); | 1000 | dm_end_request(clone, error); |
1001 | return; | 1001 | return; |
1002 | } | 1002 | } |
1003 | 1003 | ||
1004 | rq->cmd_flags |= REQ_FAILED; | 1004 | rq->cmd_flags |= REQ_FAILED; |
1005 | dm_complete_request(clone, error); | 1005 | dm_complete_request(clone, error); |
1006 | } | 1006 | } |
1007 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); | 1007 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); |
1008 | 1008 | ||
1009 | /* | 1009 | /* |
1010 | * Called with the queue lock held | 1010 | * Called with the queue lock held |
1011 | */ | 1011 | */ |
1012 | static void end_clone_request(struct request *clone, int error) | 1012 | static void end_clone_request(struct request *clone, int error) |
1013 | { | 1013 | { |
1014 | /* | 1014 | /* |
1015 | * For just cleaning up the information of the queue in which | 1015 | * For just cleaning up the information of the queue in which |
1016 | * the clone was dispatched. | 1016 | * the clone was dispatched. |
1017 | * The clone is *NOT* freed actually here because it is alloced from | 1017 | * The clone is *NOT* freed actually here because it is alloced from |
1018 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. | 1018 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. |
1019 | */ | 1019 | */ |
1020 | __blk_put_request(clone->q, clone); | 1020 | __blk_put_request(clone->q, clone); |
1021 | 1021 | ||
1022 | /* | 1022 | /* |
1023 | * Actual request completion is done in a softirq context which doesn't | 1023 | * Actual request completion is done in a softirq context which doesn't |
1024 | * hold the queue lock. Otherwise, deadlock could occur because: | 1024 | * hold the queue lock. Otherwise, deadlock could occur because: |
1025 | * - another request may be submitted by the upper level driver | 1025 | * - another request may be submitted by the upper level driver |
1026 | * of the stacking during the completion | 1026 | * of the stacking during the completion |
1027 | * - the submission which requires queue lock may be done | 1027 | * - the submission which requires queue lock may be done |
1028 | * against this queue | 1028 | * against this queue |
1029 | */ | 1029 | */ |
1030 | dm_complete_request(clone, error); | 1030 | dm_complete_request(clone, error); |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | /* | 1033 | /* |
1034 | * Return maximum size of I/O possible at the supplied sector up to the current | 1034 | * Return maximum size of I/O possible at the supplied sector up to the current |
1035 | * target boundary. | 1035 | * target boundary. |
1036 | */ | 1036 | */ |
1037 | static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) | 1037 | static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) |
1038 | { | 1038 | { |
1039 | sector_t target_offset = dm_target_offset(ti, sector); | 1039 | sector_t target_offset = dm_target_offset(ti, sector); |
1040 | 1040 | ||
1041 | return ti->len - target_offset; | 1041 | return ti->len - target_offset; |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | static sector_t max_io_len(sector_t sector, struct dm_target *ti) | 1044 | static sector_t max_io_len(sector_t sector, struct dm_target *ti) |
1045 | { | 1045 | { |
1046 | sector_t len = max_io_len_target_boundary(sector, ti); | 1046 | sector_t len = max_io_len_target_boundary(sector, ti); |
1047 | 1047 | ||
1048 | /* | 1048 | /* |
1049 | * Does the target need to split even further ? | 1049 | * Does the target need to split even further ? |
1050 | */ | 1050 | */ |
1051 | if (ti->split_io) { | 1051 | if (ti->split_io) { |
1052 | sector_t boundary; | 1052 | sector_t boundary; |
1053 | sector_t offset = dm_target_offset(ti, sector); | 1053 | sector_t offset = dm_target_offset(ti, sector); |
1054 | boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) | 1054 | boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) |
1055 | - offset; | 1055 | - offset; |
1056 | if (len > boundary) | 1056 | if (len > boundary) |
1057 | len = boundary; | 1057 | len = boundary; |
1058 | } | 1058 | } |
1059 | 1059 | ||
1060 | return len; | 1060 | return len; |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | static void __map_bio(struct dm_target *ti, struct bio *clone, | 1063 | static void __map_bio(struct dm_target *ti, struct bio *clone, |
1064 | struct dm_target_io *tio) | 1064 | struct dm_target_io *tio) |
1065 | { | 1065 | { |
1066 | int r; | 1066 | int r; |
1067 | sector_t sector; | 1067 | sector_t sector; |
1068 | struct mapped_device *md; | 1068 | struct mapped_device *md; |
1069 | 1069 | ||
1070 | clone->bi_end_io = clone_endio; | 1070 | clone->bi_end_io = clone_endio; |
1071 | clone->bi_private = tio; | 1071 | clone->bi_private = tio; |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * Map the clone. If r == 0 we don't need to do | 1074 | * Map the clone. If r == 0 we don't need to do |
1075 | * anything, the target has assumed ownership of | 1075 | * anything, the target has assumed ownership of |
1076 | * this io. | 1076 | * this io. |
1077 | */ | 1077 | */ |
1078 | atomic_inc(&tio->io->io_count); | 1078 | atomic_inc(&tio->io->io_count); |
1079 | sector = clone->bi_sector; | 1079 | sector = clone->bi_sector; |
1080 | r = ti->type->map(ti, clone, &tio->info); | 1080 | r = ti->type->map(ti, clone, &tio->info); |
1081 | if (r == DM_MAPIO_REMAPPED) { | 1081 | if (r == DM_MAPIO_REMAPPED) { |
1082 | /* the bio has been remapped so dispatch it */ | 1082 | /* the bio has been remapped so dispatch it */ |
1083 | 1083 | ||
1084 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, | 1084 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, |
1085 | tio->io->bio->bi_bdev->bd_dev, sector); | 1085 | tio->io->bio->bi_bdev->bd_dev, sector); |
1086 | 1086 | ||
1087 | generic_make_request(clone); | 1087 | generic_make_request(clone); |
1088 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { | 1088 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
1089 | /* error the io and bail out, or requeue it if needed */ | 1089 | /* error the io and bail out, or requeue it if needed */ |
1090 | md = tio->io->md; | 1090 | md = tio->io->md; |
1091 | dec_pending(tio->io, r); | 1091 | dec_pending(tio->io, r); |
1092 | /* | 1092 | /* |
1093 | * Store bio_set for cleanup. | 1093 | * Store bio_set for cleanup. |
1094 | */ | 1094 | */ |
1095 | clone->bi_private = md->bs; | 1095 | clone->bi_private = md->bs; |
1096 | bio_put(clone); | 1096 | bio_put(clone); |
1097 | free_tio(md, tio); | 1097 | free_tio(md, tio); |
1098 | } else if (r) { | 1098 | } else if (r) { |
1099 | DMWARN("unimplemented target map return value: %d", r); | 1099 | DMWARN("unimplemented target map return value: %d", r); |
1100 | BUG(); | 1100 | BUG(); |
1101 | } | 1101 | } |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | struct clone_info { | 1104 | struct clone_info { |
1105 | struct mapped_device *md; | 1105 | struct mapped_device *md; |
1106 | struct dm_table *map; | 1106 | struct dm_table *map; |
1107 | struct bio *bio; | 1107 | struct bio *bio; |
1108 | struct dm_io *io; | 1108 | struct dm_io *io; |
1109 | sector_t sector; | 1109 | sector_t sector; |
1110 | sector_t sector_count; | 1110 | sector_t sector_count; |
1111 | unsigned short idx; | 1111 | unsigned short idx; |
1112 | }; | 1112 | }; |
1113 | 1113 | ||
1114 | static void dm_bio_destructor(struct bio *bio) | 1114 | static void dm_bio_destructor(struct bio *bio) |
1115 | { | 1115 | { |
1116 | struct bio_set *bs = bio->bi_private; | 1116 | struct bio_set *bs = bio->bi_private; |
1117 | 1117 | ||
1118 | bio_free(bio, bs); | 1118 | bio_free(bio, bs); |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | /* | 1121 | /* |
1122 | * Creates a little bio that is just does part of a bvec. | 1122 | * Creates a little bio that is just does part of a bvec. |
1123 | */ | 1123 | */ |
1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1125 | unsigned short idx, unsigned int offset, | 1125 | unsigned short idx, unsigned int offset, |
1126 | unsigned int len, struct bio_set *bs) | 1126 | unsigned int len, struct bio_set *bs) |
1127 | { | 1127 | { |
1128 | struct bio *clone; | 1128 | struct bio *clone; |
1129 | struct bio_vec *bv = bio->bi_io_vec + idx; | 1129 | struct bio_vec *bv = bio->bi_io_vec + idx; |
1130 | 1130 | ||
1131 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); | 1131 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); |
1132 | clone->bi_destructor = dm_bio_destructor; | 1132 | clone->bi_destructor = dm_bio_destructor; |
1133 | *clone->bi_io_vec = *bv; | 1133 | *clone->bi_io_vec = *bv; |
1134 | 1134 | ||
1135 | clone->bi_sector = sector; | 1135 | clone->bi_sector = sector; |
1136 | clone->bi_bdev = bio->bi_bdev; | 1136 | clone->bi_bdev = bio->bi_bdev; |
1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; |
1138 | clone->bi_vcnt = 1; | 1138 | clone->bi_vcnt = 1; |
1139 | clone->bi_size = to_bytes(len); | 1139 | clone->bi_size = to_bytes(len); |
1140 | clone->bi_io_vec->bv_offset = offset; | 1140 | clone->bi_io_vec->bv_offset = offset; |
1141 | clone->bi_io_vec->bv_len = clone->bi_size; | 1141 | clone->bi_io_vec->bv_len = clone->bi_size; |
1142 | clone->bi_flags |= 1 << BIO_CLONED; | 1142 | clone->bi_flags |= 1 << BIO_CLONED; |
1143 | 1143 | ||
1144 | if (bio_integrity(bio)) { | 1144 | if (bio_integrity(bio)) { |
1145 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1145 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
1146 | bio_integrity_trim(clone, | 1146 | bio_integrity_trim(clone, |
1147 | bio_sector_offset(bio, idx, offset), len); | 1147 | bio_sector_offset(bio, idx, offset), len); |
1148 | } | 1148 | } |
1149 | 1149 | ||
1150 | return clone; | 1150 | return clone; |
1151 | } | 1151 | } |
1152 | 1152 | ||
1153 | /* | 1153 | /* |
1154 | * Creates a bio that consists of range of complete bvecs. | 1154 | * Creates a bio that consists of range of complete bvecs. |
1155 | */ | 1155 | */ |
1156 | static struct bio *clone_bio(struct bio *bio, sector_t sector, | 1156 | static struct bio *clone_bio(struct bio *bio, sector_t sector, |
1157 | unsigned short idx, unsigned short bv_count, | 1157 | unsigned short idx, unsigned short bv_count, |
1158 | unsigned int len, struct bio_set *bs) | 1158 | unsigned int len, struct bio_set *bs) |
1159 | { | 1159 | { |
1160 | struct bio *clone; | 1160 | struct bio *clone; |
1161 | 1161 | ||
1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1163 | __bio_clone(clone, bio); | 1163 | __bio_clone(clone, bio); |
1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | 1164 | clone->bi_rw &= ~REQ_HARDBARRIER; |
1165 | clone->bi_destructor = dm_bio_destructor; | 1165 | clone->bi_destructor = dm_bio_destructor; |
1166 | clone->bi_sector = sector; | 1166 | clone->bi_sector = sector; |
1167 | clone->bi_idx = idx; | 1167 | clone->bi_idx = idx; |
1168 | clone->bi_vcnt = idx + bv_count; | 1168 | clone->bi_vcnt = idx + bv_count; |
1169 | clone->bi_size = to_bytes(len); | 1169 | clone->bi_size = to_bytes(len); |
1170 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 1170 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
1171 | 1171 | ||
1172 | if (bio_integrity(bio)) { | 1172 | if (bio_integrity(bio)) { |
1173 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1173 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
1174 | 1174 | ||
1175 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | 1175 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) |
1176 | bio_integrity_trim(clone, | 1176 | bio_integrity_trim(clone, |
1177 | bio_sector_offset(bio, idx, 0), len); | 1177 | bio_sector_offset(bio, idx, 0), len); |
1178 | } | 1178 | } |
1179 | 1179 | ||
1180 | return clone; | 1180 | return clone; |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | static struct dm_target_io *alloc_tio(struct clone_info *ci, | 1183 | static struct dm_target_io *alloc_tio(struct clone_info *ci, |
1184 | struct dm_target *ti) | 1184 | struct dm_target *ti) |
1185 | { | 1185 | { |
1186 | struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); | 1186 | struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); |
1187 | 1187 | ||
1188 | tio->io = ci->io; | 1188 | tio->io = ci->io; |
1189 | tio->ti = ti; | 1189 | tio->ti = ti; |
1190 | memset(&tio->info, 0, sizeof(tio->info)); | 1190 | memset(&tio->info, 0, sizeof(tio->info)); |
1191 | 1191 | ||
1192 | return tio; | 1192 | return tio; |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, | 1195 | static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, |
1196 | unsigned request_nr, sector_t len) | 1196 | unsigned request_nr, sector_t len) |
1197 | { | 1197 | { |
1198 | struct dm_target_io *tio = alloc_tio(ci, ti); | 1198 | struct dm_target_io *tio = alloc_tio(ci, ti); |
1199 | struct bio *clone; | 1199 | struct bio *clone; |
1200 | 1200 | ||
1201 | tio->info.target_request_nr = request_nr; | 1201 | tio->info.target_request_nr = request_nr; |
1202 | 1202 | ||
1203 | /* | 1203 | /* |
1204 | * Discard requests require the bio's inline iovecs be initialized. | 1204 | * Discard requests require the bio's inline iovecs be initialized. |
1205 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush | 1205 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush |
1206 | * and discard, so no need for concern about wasted bvec allocations. | 1206 | * and discard, so no need for concern about wasted bvec allocations. |
1207 | */ | 1207 | */ |
1208 | clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); | 1208 | clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); |
1209 | __bio_clone(clone, ci->bio); | 1209 | __bio_clone(clone, ci->bio); |
1210 | clone->bi_destructor = dm_bio_destructor; | 1210 | clone->bi_destructor = dm_bio_destructor; |
1211 | if (len) { | 1211 | if (len) { |
1212 | clone->bi_sector = ci->sector; | 1212 | clone->bi_sector = ci->sector; |
1213 | clone->bi_size = to_bytes(len); | 1213 | clone->bi_size = to_bytes(len); |
1214 | } | 1214 | } |
1215 | 1215 | ||
1216 | __map_bio(ti, clone, tio); | 1216 | __map_bio(ti, clone, tio); |
1217 | } | 1217 | } |
1218 | 1218 | ||
1219 | static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | 1219 | static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, |
1220 | unsigned num_requests, sector_t len) | 1220 | unsigned num_requests, sector_t len) |
1221 | { | 1221 | { |
1222 | unsigned request_nr; | 1222 | unsigned request_nr; |
1223 | 1223 | ||
1224 | for (request_nr = 0; request_nr < num_requests; request_nr++) | 1224 | for (request_nr = 0; request_nr < num_requests; request_nr++) |
1225 | __issue_target_request(ci, ti, request_nr, len); | 1225 | __issue_target_request(ci, ti, request_nr, len); |
1226 | } | 1226 | } |
1227 | 1227 | ||
1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) |
1229 | { | 1229 | { |
1230 | unsigned target_nr = 0; | 1230 | unsigned target_nr = 0; |
1231 | struct dm_target *ti; | 1231 | struct dm_target *ti; |
1232 | 1232 | ||
1233 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | 1233 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
1234 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); | 1234 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); |
1235 | 1235 | ||
1236 | ci->sector_count = 0; | 1236 | ci->sector_count = 0; |
1237 | 1237 | ||
1238 | return 0; | 1238 | return 0; |
1239 | } | 1239 | } |
1240 | 1240 | ||
1241 | /* | 1241 | /* |
1242 | * Perform all io with a single clone. | 1242 | * Perform all io with a single clone. |
1243 | */ | 1243 | */ |
1244 | static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) | 1244 | static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) |
1245 | { | 1245 | { |
1246 | struct bio *clone, *bio = ci->bio; | 1246 | struct bio *clone, *bio = ci->bio; |
1247 | struct dm_target_io *tio; | 1247 | struct dm_target_io *tio; |
1248 | 1248 | ||
1249 | tio = alloc_tio(ci, ti); | 1249 | tio = alloc_tio(ci, ti); |
1250 | clone = clone_bio(bio, ci->sector, ci->idx, | 1250 | clone = clone_bio(bio, ci->sector, ci->idx, |
1251 | bio->bi_vcnt - ci->idx, ci->sector_count, | 1251 | bio->bi_vcnt - ci->idx, ci->sector_count, |
1252 | ci->md->bs); | 1252 | ci->md->bs); |
1253 | __map_bio(ti, clone, tio); | 1253 | __map_bio(ti, clone, tio); |
1254 | ci->sector_count = 0; | 1254 | ci->sector_count = 0; |
1255 | } | 1255 | } |
1256 | 1256 | ||
1257 | static int __clone_and_map_discard(struct clone_info *ci) | 1257 | static int __clone_and_map_discard(struct clone_info *ci) |
1258 | { | 1258 | { |
1259 | struct dm_target *ti; | 1259 | struct dm_target *ti; |
1260 | sector_t len; | 1260 | sector_t len; |
1261 | 1261 | ||
1262 | do { | 1262 | do { |
1263 | ti = dm_table_find_target(ci->map, ci->sector); | 1263 | ti = dm_table_find_target(ci->map, ci->sector); |
1264 | if (!dm_target_is_valid(ti)) | 1264 | if (!dm_target_is_valid(ti)) |
1265 | return -EIO; | 1265 | return -EIO; |
1266 | 1266 | ||
1267 | /* | 1267 | /* |
1268 | * Even though the device advertised discard support, | 1268 | * Even though the device advertised discard support, |
1269 | * reconfiguration might have changed that since the | 1269 | * reconfiguration might have changed that since the |
1270 | * check was performed. | 1270 | * check was performed. |
1271 | */ | 1271 | */ |
1272 | if (!ti->num_discard_requests) | 1272 | if (!ti->num_discard_requests) |
1273 | return -EOPNOTSUPP; | 1273 | return -EOPNOTSUPP; |
1274 | 1274 | ||
1275 | len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); | 1275 | len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); |
1276 | 1276 | ||
1277 | __issue_target_requests(ci, ti, ti->num_discard_requests, len); | 1277 | __issue_target_requests(ci, ti, ti->num_discard_requests, len); |
1278 | 1278 | ||
1279 | ci->sector += len; | 1279 | ci->sector += len; |
1280 | } while (ci->sector_count -= len); | 1280 | } while (ci->sector_count -= len); |
1281 | 1281 | ||
1282 | return 0; | 1282 | return 0; |
1283 | } | 1283 | } |
1284 | 1284 | ||
1285 | static int __clone_and_map(struct clone_info *ci) | 1285 | static int __clone_and_map(struct clone_info *ci) |
1286 | { | 1286 | { |
1287 | struct bio *clone, *bio = ci->bio; | 1287 | struct bio *clone, *bio = ci->bio; |
1288 | struct dm_target *ti; | 1288 | struct dm_target *ti; |
1289 | sector_t len = 0, max; | 1289 | sector_t len = 0, max; |
1290 | struct dm_target_io *tio; | 1290 | struct dm_target_io *tio; |
1291 | 1291 | ||
1292 | if (unlikely(bio_empty_barrier(bio))) | 1292 | if (unlikely(bio_empty_barrier(bio))) |
1293 | return __clone_and_map_empty_barrier(ci); | 1293 | return __clone_and_map_empty_barrier(ci); |
1294 | 1294 | ||
1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1296 | return __clone_and_map_discard(ci); | 1296 | return __clone_and_map_discard(ci); |
1297 | 1297 | ||
1298 | ti = dm_table_find_target(ci->map, ci->sector); | 1298 | ti = dm_table_find_target(ci->map, ci->sector); |
1299 | if (!dm_target_is_valid(ti)) | 1299 | if (!dm_target_is_valid(ti)) |
1300 | return -EIO; | 1300 | return -EIO; |
1301 | 1301 | ||
1302 | max = max_io_len(ci->sector, ti); | 1302 | max = max_io_len(ci->sector, ti); |
1303 | 1303 | ||
1304 | if (ci->sector_count <= max) { | 1304 | if (ci->sector_count <= max) { |
1305 | /* | 1305 | /* |
1306 | * Optimise for the simple case where we can do all of | 1306 | * Optimise for the simple case where we can do all of |
1307 | * the remaining io with a single clone. | 1307 | * the remaining io with a single clone. |
1308 | */ | 1308 | */ |
1309 | __clone_and_map_simple(ci, ti); | 1309 | __clone_and_map_simple(ci, ti); |
1310 | 1310 | ||
1311 | } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { | 1311 | } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { |
1312 | /* | 1312 | /* |
1313 | * There are some bvecs that don't span targets. | 1313 | * There are some bvecs that don't span targets. |
1314 | * Do as many of these as possible. | 1314 | * Do as many of these as possible. |
1315 | */ | 1315 | */ |
1316 | int i; | 1316 | int i; |
1317 | sector_t remaining = max; | 1317 | sector_t remaining = max; |
1318 | sector_t bv_len; | 1318 | sector_t bv_len; |
1319 | 1319 | ||
1320 | for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { | 1320 | for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { |
1321 | bv_len = to_sector(bio->bi_io_vec[i].bv_len); | 1321 | bv_len = to_sector(bio->bi_io_vec[i].bv_len); |
1322 | 1322 | ||
1323 | if (bv_len > remaining) | 1323 | if (bv_len > remaining) |
1324 | break; | 1324 | break; |
1325 | 1325 | ||
1326 | remaining -= bv_len; | 1326 | remaining -= bv_len; |
1327 | len += bv_len; | 1327 | len += bv_len; |
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | tio = alloc_tio(ci, ti); | 1330 | tio = alloc_tio(ci, ti); |
1331 | clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, | 1331 | clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, |
1332 | ci->md->bs); | 1332 | ci->md->bs); |
1333 | __map_bio(ti, clone, tio); | 1333 | __map_bio(ti, clone, tio); |
1334 | 1334 | ||
1335 | ci->sector += len; | 1335 | ci->sector += len; |
1336 | ci->sector_count -= len; | 1336 | ci->sector_count -= len; |
1337 | ci->idx = i; | 1337 | ci->idx = i; |
1338 | 1338 | ||
1339 | } else { | 1339 | } else { |
1340 | /* | 1340 | /* |
1341 | * Handle a bvec that must be split between two or more targets. | 1341 | * Handle a bvec that must be split between two or more targets. |
1342 | */ | 1342 | */ |
1343 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; | 1343 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; |
1344 | sector_t remaining = to_sector(bv->bv_len); | 1344 | sector_t remaining = to_sector(bv->bv_len); |
1345 | unsigned int offset = 0; | 1345 | unsigned int offset = 0; |
1346 | 1346 | ||
1347 | do { | 1347 | do { |
1348 | if (offset) { | 1348 | if (offset) { |
1349 | ti = dm_table_find_target(ci->map, ci->sector); | 1349 | ti = dm_table_find_target(ci->map, ci->sector); |
1350 | if (!dm_target_is_valid(ti)) | 1350 | if (!dm_target_is_valid(ti)) |
1351 | return -EIO; | 1351 | return -EIO; |
1352 | 1352 | ||
1353 | max = max_io_len(ci->sector, ti); | 1353 | max = max_io_len(ci->sector, ti); |
1354 | } | 1354 | } |
1355 | 1355 | ||
1356 | len = min(remaining, max); | 1356 | len = min(remaining, max); |
1357 | 1357 | ||
1358 | tio = alloc_tio(ci, ti); | 1358 | tio = alloc_tio(ci, ti); |
1359 | clone = split_bvec(bio, ci->sector, ci->idx, | 1359 | clone = split_bvec(bio, ci->sector, ci->idx, |
1360 | bv->bv_offset + offset, len, | 1360 | bv->bv_offset + offset, len, |
1361 | ci->md->bs); | 1361 | ci->md->bs); |
1362 | 1362 | ||
1363 | __map_bio(ti, clone, tio); | 1363 | __map_bio(ti, clone, tio); |
1364 | 1364 | ||
1365 | ci->sector += len; | 1365 | ci->sector += len; |
1366 | ci->sector_count -= len; | 1366 | ci->sector_count -= len; |
1367 | offset += to_bytes(len); | 1367 | offset += to_bytes(len); |
1368 | } while (remaining -= len); | 1368 | } while (remaining -= len); |
1369 | 1369 | ||
1370 | ci->idx++; | 1370 | ci->idx++; |
1371 | } | 1371 | } |
1372 | 1372 | ||
1373 | return 0; | 1373 | return 0; |
1374 | } | 1374 | } |
1375 | 1375 | ||
1376 | /* | 1376 | /* |
1377 | * Split the bio into several clones and submit it to targets. | 1377 | * Split the bio into several clones and submit it to targets. |
1378 | */ | 1378 | */ |
1379 | static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | 1379 | static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) |
1380 | { | 1380 | { |
1381 | struct clone_info ci; | 1381 | struct clone_info ci; |
1382 | int error = 0; | 1382 | int error = 0; |
1383 | 1383 | ||
1384 | ci.map = dm_get_live_table(md); | 1384 | ci.map = dm_get_live_table(md); |
1385 | if (unlikely(!ci.map)) { | 1385 | if (unlikely(!ci.map)) { |
1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) |
1387 | bio_io_error(bio); | 1387 | bio_io_error(bio); |
1388 | else | 1388 | else |
1389 | if (!md->barrier_error) | 1389 | if (!md->barrier_error) |
1390 | md->barrier_error = -EIO; | 1390 | md->barrier_error = -EIO; |
1391 | return; | 1391 | return; |
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | ci.md = md; | 1394 | ci.md = md; |
1395 | ci.bio = bio; | 1395 | ci.bio = bio; |
1396 | ci.io = alloc_io(md); | 1396 | ci.io = alloc_io(md); |
1397 | ci.io->error = 0; | 1397 | ci.io->error = 0; |
1398 | atomic_set(&ci.io->io_count, 1); | 1398 | atomic_set(&ci.io->io_count, 1); |
1399 | ci.io->bio = bio; | 1399 | ci.io->bio = bio; |
1400 | ci.io->md = md; | 1400 | ci.io->md = md; |
1401 | spin_lock_init(&ci.io->endio_lock); | 1401 | spin_lock_init(&ci.io->endio_lock); |
1402 | ci.sector = bio->bi_sector; | 1402 | ci.sector = bio->bi_sector; |
1403 | ci.sector_count = bio_sectors(bio); | 1403 | ci.sector_count = bio_sectors(bio); |
1404 | if (unlikely(bio_empty_barrier(bio))) | 1404 | if (unlikely(bio_empty_barrier(bio))) |
1405 | ci.sector_count = 1; | 1405 | ci.sector_count = 1; |
1406 | ci.idx = bio->bi_idx; | 1406 | ci.idx = bio->bi_idx; |
1407 | 1407 | ||
1408 | start_io_acct(ci.io); | 1408 | start_io_acct(ci.io); |
1409 | while (ci.sector_count && !error) | 1409 | while (ci.sector_count && !error) |
1410 | error = __clone_and_map(&ci); | 1410 | error = __clone_and_map(&ci); |
1411 | 1411 | ||
1412 | /* drop the extra reference count */ | 1412 | /* drop the extra reference count */ |
1413 | dec_pending(ci.io, error); | 1413 | dec_pending(ci.io, error); |
1414 | dm_table_put(ci.map); | 1414 | dm_table_put(ci.map); |
1415 | } | 1415 | } |
1416 | /*----------------------------------------------------------------- | 1416 | /*----------------------------------------------------------------- |
1417 | * CRUD END | 1417 | * CRUD END |
1418 | *---------------------------------------------------------------*/ | 1418 | *---------------------------------------------------------------*/ |
1419 | 1419 | ||
1420 | static int dm_merge_bvec(struct request_queue *q, | 1420 | static int dm_merge_bvec(struct request_queue *q, |
1421 | struct bvec_merge_data *bvm, | 1421 | struct bvec_merge_data *bvm, |
1422 | struct bio_vec *biovec) | 1422 | struct bio_vec *biovec) |
1423 | { | 1423 | { |
1424 | struct mapped_device *md = q->queuedata; | 1424 | struct mapped_device *md = q->queuedata; |
1425 | struct dm_table *map = dm_get_live_table(md); | 1425 | struct dm_table *map = dm_get_live_table(md); |
1426 | struct dm_target *ti; | 1426 | struct dm_target *ti; |
1427 | sector_t max_sectors; | 1427 | sector_t max_sectors; |
1428 | int max_size = 0; | 1428 | int max_size = 0; |
1429 | 1429 | ||
1430 | if (unlikely(!map)) | 1430 | if (unlikely(!map)) |
1431 | goto out; | 1431 | goto out; |
1432 | 1432 | ||
1433 | ti = dm_table_find_target(map, bvm->bi_sector); | 1433 | ti = dm_table_find_target(map, bvm->bi_sector); |
1434 | if (!dm_target_is_valid(ti)) | 1434 | if (!dm_target_is_valid(ti)) |
1435 | goto out_table; | 1435 | goto out_table; |
1436 | 1436 | ||
1437 | /* | 1437 | /* |
1438 | * Find maximum amount of I/O that won't need splitting | 1438 | * Find maximum amount of I/O that won't need splitting |
1439 | */ | 1439 | */ |
1440 | max_sectors = min(max_io_len(bvm->bi_sector, ti), | 1440 | max_sectors = min(max_io_len(bvm->bi_sector, ti), |
1441 | (sector_t) BIO_MAX_SECTORS); | 1441 | (sector_t) BIO_MAX_SECTORS); |
1442 | max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; | 1442 | max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; |
1443 | if (max_size < 0) | 1443 | if (max_size < 0) |
1444 | max_size = 0; | 1444 | max_size = 0; |
1445 | 1445 | ||
1446 | /* | 1446 | /* |
1447 | * merge_bvec_fn() returns number of bytes | 1447 | * merge_bvec_fn() returns number of bytes |
1448 | * it can accept at this offset | 1448 | * it can accept at this offset |
1449 | * max is precomputed maximal io size | 1449 | * max is precomputed maximal io size |
1450 | */ | 1450 | */ |
1451 | if (max_size && ti->type->merge) | 1451 | if (max_size && ti->type->merge) |
1452 | max_size = ti->type->merge(ti, bvm, biovec, max_size); | 1452 | max_size = ti->type->merge(ti, bvm, biovec, max_size); |
1453 | /* | 1453 | /* |
1454 | * If the target doesn't support merge method and some of the devices | 1454 | * If the target doesn't support merge method and some of the devices |
1455 | * provided their merge_bvec method (we know this by looking at | 1455 | * provided their merge_bvec method (we know this by looking at |
1456 | * queue_max_hw_sectors), then we can't allow bios with multiple vector | 1456 | * queue_max_hw_sectors), then we can't allow bios with multiple vector |
1457 | * entries. So always set max_size to 0, and the code below allows | 1457 | * entries. So always set max_size to 0, and the code below allows |
1458 | * just one page. | 1458 | * just one page. |
1459 | */ | 1459 | */ |
1460 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) | 1460 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) |
1461 | 1461 | ||
1462 | max_size = 0; | 1462 | max_size = 0; |
1463 | 1463 | ||
1464 | out_table: | 1464 | out_table: |
1465 | dm_table_put(map); | 1465 | dm_table_put(map); |
1466 | 1466 | ||
1467 | out: | 1467 | out: |
1468 | /* | 1468 | /* |
1469 | * Always allow an entire first page | 1469 | * Always allow an entire first page |
1470 | */ | 1470 | */ |
1471 | if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) | 1471 | if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) |
1472 | max_size = biovec->bv_len; | 1472 | max_size = biovec->bv_len; |
1473 | 1473 | ||
1474 | return max_size; | 1474 | return max_size; |
1475 | } | 1475 | } |
1476 | 1476 | ||
1477 | /* | 1477 | /* |
1478 | * The request function that just remaps the bio built up by | 1478 | * The request function that just remaps the bio built up by |
1479 | * dm_merge_bvec. | 1479 | * dm_merge_bvec. |
1480 | */ | 1480 | */ |
1481 | static int _dm_request(struct request_queue *q, struct bio *bio) | 1481 | static int _dm_request(struct request_queue *q, struct bio *bio) |
1482 | { | 1482 | { |
1483 | int rw = bio_data_dir(bio); | 1483 | int rw = bio_data_dir(bio); |
1484 | struct mapped_device *md = q->queuedata; | 1484 | struct mapped_device *md = q->queuedata; |
1485 | int cpu; | 1485 | int cpu; |
1486 | 1486 | ||
1487 | down_read(&md->io_lock); | 1487 | down_read(&md->io_lock); |
1488 | 1488 | ||
1489 | cpu = part_stat_lock(); | 1489 | cpu = part_stat_lock(); |
1490 | part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); | 1490 | part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); |
1491 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1491 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
1492 | part_stat_unlock(); | 1492 | part_stat_unlock(); |
1493 | 1493 | ||
1494 | /* | 1494 | /* |
1495 | * If we're suspended or the thread is processing barriers | 1495 | * If we're suspended or the thread is processing barriers |
1496 | * we have to queue this io for later. | 1496 | * we have to queue this io for later. |
1497 | */ | 1497 | */ |
1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | 1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || |
1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { |
1500 | up_read(&md->io_lock); | 1500 | up_read(&md->io_lock); |
1501 | 1501 | ||
1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && |
1503 | bio_rw(bio) == READA) { | 1503 | bio_rw(bio) == READA) { |
1504 | bio_io_error(bio); | 1504 | bio_io_error(bio); |
1505 | return 0; | 1505 | return 0; |
1506 | } | 1506 | } |
1507 | 1507 | ||
1508 | queue_io(md, bio); | 1508 | queue_io(md, bio); |
1509 | 1509 | ||
1510 | return 0; | 1510 | return 0; |
1511 | } | 1511 | } |
1512 | 1512 | ||
1513 | __split_and_process_bio(md, bio); | 1513 | __split_and_process_bio(md, bio); |
1514 | up_read(&md->io_lock); | 1514 | up_read(&md->io_lock); |
1515 | return 0; | 1515 | return 0; |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | static int dm_make_request(struct request_queue *q, struct bio *bio) | 1518 | static int dm_make_request(struct request_queue *q, struct bio *bio) |
1519 | { | 1519 | { |
1520 | struct mapped_device *md = q->queuedata; | 1520 | struct mapped_device *md = q->queuedata; |
1521 | 1521 | ||
1522 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | 1522 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ |
1523 | } | 1523 | } |
1524 | 1524 | ||
1525 | static int dm_request_based(struct mapped_device *md) | 1525 | static int dm_request_based(struct mapped_device *md) |
1526 | { | 1526 | { |
1527 | return blk_queue_stackable(md->queue); | 1527 | return blk_queue_stackable(md->queue); |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | static int dm_request(struct request_queue *q, struct bio *bio) | 1530 | static int dm_request(struct request_queue *q, struct bio *bio) |
1531 | { | 1531 | { |
1532 | struct mapped_device *md = q->queuedata; | 1532 | struct mapped_device *md = q->queuedata; |
1533 | 1533 | ||
1534 | if (dm_request_based(md)) | 1534 | if (dm_request_based(md)) |
1535 | return dm_make_request(q, bio); | 1535 | return dm_make_request(q, bio); |
1536 | 1536 | ||
1537 | return _dm_request(q, bio); | 1537 | return _dm_request(q, bio); |
1538 | } | 1538 | } |
1539 | 1539 | ||
1540 | static bool dm_rq_is_flush_request(struct request *rq) | 1540 | static bool dm_rq_is_flush_request(struct request *rq) |
1541 | { | 1541 | { |
1542 | if (rq->cmd_flags & REQ_FLUSH) | 1542 | if (rq->cmd_flags & REQ_FLUSH) |
1543 | return true; | 1543 | return true; |
1544 | else | 1544 | else |
1545 | return false; | 1545 | return false; |
1546 | } | 1546 | } |
1547 | 1547 | ||
1548 | void dm_dispatch_request(struct request *rq) | 1548 | void dm_dispatch_request(struct request *rq) |
1549 | { | 1549 | { |
1550 | int r; | 1550 | int r; |
1551 | 1551 | ||
1552 | if (blk_queue_io_stat(rq->q)) | 1552 | if (blk_queue_io_stat(rq->q)) |
1553 | rq->cmd_flags |= REQ_IO_STAT; | 1553 | rq->cmd_flags |= REQ_IO_STAT; |
1554 | 1554 | ||
1555 | rq->start_time = jiffies; | 1555 | rq->start_time = jiffies; |
1556 | r = blk_insert_cloned_request(rq->q, rq); | 1556 | r = blk_insert_cloned_request(rq->q, rq); |
1557 | if (r) | 1557 | if (r) |
1558 | dm_complete_request(rq, r); | 1558 | dm_complete_request(rq, r); |
1559 | } | 1559 | } |
1560 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | 1560 | EXPORT_SYMBOL_GPL(dm_dispatch_request); |
1561 | 1561 | ||
1562 | static void dm_rq_bio_destructor(struct bio *bio) | 1562 | static void dm_rq_bio_destructor(struct bio *bio) |
1563 | { | 1563 | { |
1564 | struct dm_rq_clone_bio_info *info = bio->bi_private; | 1564 | struct dm_rq_clone_bio_info *info = bio->bi_private; |
1565 | struct mapped_device *md = info->tio->md; | 1565 | struct mapped_device *md = info->tio->md; |
1566 | 1566 | ||
1567 | free_bio_info(info); | 1567 | free_bio_info(info); |
1568 | bio_free(bio, md->bs); | 1568 | bio_free(bio, md->bs); |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | 1571 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, |
1572 | void *data) | 1572 | void *data) |
1573 | { | 1573 | { |
1574 | struct dm_rq_target_io *tio = data; | 1574 | struct dm_rq_target_io *tio = data; |
1575 | struct mapped_device *md = tio->md; | 1575 | struct mapped_device *md = tio->md; |
1576 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | 1576 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); |
1577 | 1577 | ||
1578 | if (!info) | 1578 | if (!info) |
1579 | return -ENOMEM; | 1579 | return -ENOMEM; |
1580 | 1580 | ||
1581 | info->orig = bio_orig; | 1581 | info->orig = bio_orig; |
1582 | info->tio = tio; | 1582 | info->tio = tio; |
1583 | bio->bi_end_io = end_clone_bio; | 1583 | bio->bi_end_io = end_clone_bio; |
1584 | bio->bi_private = info; | 1584 | bio->bi_private = info; |
1585 | bio->bi_destructor = dm_rq_bio_destructor; | 1585 | bio->bi_destructor = dm_rq_bio_destructor; |
1586 | 1586 | ||
1587 | return 0; | 1587 | return 0; |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | static int setup_clone(struct request *clone, struct request *rq, | 1590 | static int setup_clone(struct request *clone, struct request *rq, |
1591 | struct dm_rq_target_io *tio) | 1591 | struct dm_rq_target_io *tio) |
1592 | { | 1592 | { |
1593 | int r; | 1593 | int r; |
1594 | 1594 | ||
1595 | if (dm_rq_is_flush_request(rq)) { | 1595 | if (dm_rq_is_flush_request(rq)) { |
1596 | blk_rq_init(NULL, clone); | 1596 | blk_rq_init(NULL, clone); |
1597 | clone->cmd_type = REQ_TYPE_FS; | 1597 | clone->cmd_type = REQ_TYPE_FS; |
1598 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | 1598 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); |
1599 | } else { | 1599 | } else { |
1600 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | 1600 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, |
1601 | dm_rq_bio_constructor, tio); | 1601 | dm_rq_bio_constructor, tio); |
1602 | if (r) | 1602 | if (r) |
1603 | return r; | 1603 | return r; |
1604 | 1604 | ||
1605 | clone->cmd = rq->cmd; | 1605 | clone->cmd = rq->cmd; |
1606 | clone->cmd_len = rq->cmd_len; | 1606 | clone->cmd_len = rq->cmd_len; |
1607 | clone->sense = rq->sense; | 1607 | clone->sense = rq->sense; |
1608 | clone->buffer = rq->buffer; | 1608 | clone->buffer = rq->buffer; |
1609 | } | 1609 | } |
1610 | 1610 | ||
1611 | clone->end_io = end_clone_request; | 1611 | clone->end_io = end_clone_request; |
1612 | clone->end_io_data = tio; | 1612 | clone->end_io_data = tio; |
1613 | 1613 | ||
1614 | return 0; | 1614 | return 0; |
1615 | } | 1615 | } |
1616 | 1616 | ||
1617 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, | 1617 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, |
1618 | gfp_t gfp_mask) | 1618 | gfp_t gfp_mask) |
1619 | { | 1619 | { |
1620 | struct request *clone; | 1620 | struct request *clone; |
1621 | struct dm_rq_target_io *tio; | 1621 | struct dm_rq_target_io *tio; |
1622 | 1622 | ||
1623 | tio = alloc_rq_tio(md, gfp_mask); | 1623 | tio = alloc_rq_tio(md, gfp_mask); |
1624 | if (!tio) | 1624 | if (!tio) |
1625 | return NULL; | 1625 | return NULL; |
1626 | 1626 | ||
1627 | tio->md = md; | 1627 | tio->md = md; |
1628 | tio->ti = NULL; | 1628 | tio->ti = NULL; |
1629 | tio->orig = rq; | 1629 | tio->orig = rq; |
1630 | tio->error = 0; | 1630 | tio->error = 0; |
1631 | memset(&tio->info, 0, sizeof(tio->info)); | 1631 | memset(&tio->info, 0, sizeof(tio->info)); |
1632 | 1632 | ||
1633 | clone = &tio->clone; | 1633 | clone = &tio->clone; |
1634 | if (setup_clone(clone, rq, tio)) { | 1634 | if (setup_clone(clone, rq, tio)) { |
1635 | /* -ENOMEM */ | 1635 | /* -ENOMEM */ |
1636 | free_rq_tio(tio); | 1636 | free_rq_tio(tio); |
1637 | return NULL; | 1637 | return NULL; |
1638 | } | 1638 | } |
1639 | 1639 | ||
1640 | return clone; | 1640 | return clone; |
1641 | } | 1641 | } |
1642 | 1642 | ||
1643 | /* | 1643 | /* |
1644 | * Called with the queue lock held. | 1644 | * Called with the queue lock held. |
1645 | */ | 1645 | */ |
1646 | static int dm_prep_fn(struct request_queue *q, struct request *rq) | 1646 | static int dm_prep_fn(struct request_queue *q, struct request *rq) |
1647 | { | 1647 | { |
1648 | struct mapped_device *md = q->queuedata; | 1648 | struct mapped_device *md = q->queuedata; |
1649 | struct request *clone; | 1649 | struct request *clone; |
1650 | 1650 | ||
1651 | if (unlikely(dm_rq_is_flush_request(rq))) | 1651 | if (unlikely(dm_rq_is_flush_request(rq))) |
1652 | return BLKPREP_OK; | 1652 | return BLKPREP_OK; |
1653 | 1653 | ||
1654 | if (unlikely(rq->special)) { | 1654 | if (unlikely(rq->special)) { |
1655 | DMWARN("Already has something in rq->special."); | 1655 | DMWARN("Already has something in rq->special."); |
1656 | return BLKPREP_KILL; | 1656 | return BLKPREP_KILL; |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | clone = clone_rq(rq, md, GFP_ATOMIC); | 1659 | clone = clone_rq(rq, md, GFP_ATOMIC); |
1660 | if (!clone) | 1660 | if (!clone) |
1661 | return BLKPREP_DEFER; | 1661 | return BLKPREP_DEFER; |
1662 | 1662 | ||
1663 | rq->special = clone; | 1663 | rq->special = clone; |
1664 | rq->cmd_flags |= REQ_DONTPREP; | 1664 | rq->cmd_flags |= REQ_DONTPREP; |
1665 | 1665 | ||
1666 | return BLKPREP_OK; | 1666 | return BLKPREP_OK; |
1667 | } | 1667 | } |
1668 | 1668 | ||
1669 | /* | 1669 | /* |
1670 | * Returns: | 1670 | * Returns: |
1671 | * 0 : the request has been processed (not requeued) | 1671 | * 0 : the request has been processed (not requeued) |
1672 | * !0 : the request has been requeued | 1672 | * !0 : the request has been requeued |
1673 | */ | 1673 | */ |
1674 | static int map_request(struct dm_target *ti, struct request *clone, | 1674 | static int map_request(struct dm_target *ti, struct request *clone, |
1675 | struct mapped_device *md) | 1675 | struct mapped_device *md) |
1676 | { | 1676 | { |
1677 | int r, requeued = 0; | 1677 | int r, requeued = 0; |
1678 | struct dm_rq_target_io *tio = clone->end_io_data; | 1678 | struct dm_rq_target_io *tio = clone->end_io_data; |
1679 | 1679 | ||
1680 | /* | 1680 | /* |
1681 | * Hold the md reference here for the in-flight I/O. | 1681 | * Hold the md reference here for the in-flight I/O. |
1682 | * We can't rely on the reference count by device opener, | 1682 | * We can't rely on the reference count by device opener, |
1683 | * because the device may be closed during the request completion | 1683 | * because the device may be closed during the request completion |
1684 | * when all bios are completed. | 1684 | * when all bios are completed. |
1685 | * See the comment in rq_completed() too. | 1685 | * See the comment in rq_completed() too. |
1686 | */ | 1686 | */ |
1687 | dm_get(md); | 1687 | dm_get(md); |
1688 | 1688 | ||
1689 | tio->ti = ti; | 1689 | tio->ti = ti; |
1690 | r = ti->type->map_rq(ti, clone, &tio->info); | 1690 | r = ti->type->map_rq(ti, clone, &tio->info); |
1691 | switch (r) { | 1691 | switch (r) { |
1692 | case DM_MAPIO_SUBMITTED: | 1692 | case DM_MAPIO_SUBMITTED: |
1693 | /* The target has taken the I/O to submit by itself later */ | 1693 | /* The target has taken the I/O to submit by itself later */ |
1694 | break; | 1694 | break; |
1695 | case DM_MAPIO_REMAPPED: | 1695 | case DM_MAPIO_REMAPPED: |
1696 | /* The target has remapped the I/O so dispatch it */ | 1696 | /* The target has remapped the I/O so dispatch it */ |
1697 | trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), | 1697 | trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), |
1698 | blk_rq_pos(tio->orig)); | 1698 | blk_rq_pos(tio->orig)); |
1699 | dm_dispatch_request(clone); | 1699 | dm_dispatch_request(clone); |
1700 | break; | 1700 | break; |
1701 | case DM_MAPIO_REQUEUE: | 1701 | case DM_MAPIO_REQUEUE: |
1702 | /* The target wants to requeue the I/O */ | 1702 | /* The target wants to requeue the I/O */ |
1703 | dm_requeue_unmapped_request(clone); | 1703 | dm_requeue_unmapped_request(clone); |
1704 | requeued = 1; | 1704 | requeued = 1; |
1705 | break; | 1705 | break; |
1706 | default: | 1706 | default: |
1707 | if (r > 0) { | 1707 | if (r > 0) { |
1708 | DMWARN("unimplemented target map return value: %d", r); | 1708 | DMWARN("unimplemented target map return value: %d", r); |
1709 | BUG(); | 1709 | BUG(); |
1710 | } | 1710 | } |
1711 | 1711 | ||
1712 | /* The target wants to complete the I/O */ | 1712 | /* The target wants to complete the I/O */ |
1713 | dm_kill_unmapped_request(clone, r); | 1713 | dm_kill_unmapped_request(clone, r); |
1714 | break; | 1714 | break; |
1715 | } | 1715 | } |
1716 | 1716 | ||
1717 | return requeued; | 1717 | return requeued; |
1718 | } | 1718 | } |
1719 | 1719 | ||
1720 | /* | 1720 | /* |
1721 | * q->request_fn for request-based dm. | 1721 | * q->request_fn for request-based dm. |
1722 | * Called with the queue lock held. | 1722 | * Called with the queue lock held. |
1723 | */ | 1723 | */ |
1724 | static void dm_request_fn(struct request_queue *q) | 1724 | static void dm_request_fn(struct request_queue *q) |
1725 | { | 1725 | { |
1726 | struct mapped_device *md = q->queuedata; | 1726 | struct mapped_device *md = q->queuedata; |
1727 | struct dm_table *map = dm_get_live_table(md); | 1727 | struct dm_table *map = dm_get_live_table(md); |
1728 | struct dm_target *ti; | 1728 | struct dm_target *ti; |
1729 | struct request *rq, *clone; | 1729 | struct request *rq, *clone; |
1730 | 1730 | ||
1731 | /* | 1731 | /* |
1732 | * For suspend, check blk_queue_stopped() and increment | 1732 | * For suspend, check blk_queue_stopped() and increment |
1733 | * ->pending within a single queue_lock not to increment the | 1733 | * ->pending within a single queue_lock not to increment the |
1734 | * number of in-flight I/Os after the queue is stopped in | 1734 | * number of in-flight I/Os after the queue is stopped in |
1735 | * dm_suspend(). | 1735 | * dm_suspend(). |
1736 | */ | 1736 | */ |
1737 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | 1737 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { |
1738 | rq = blk_peek_request(q); | 1738 | rq = blk_peek_request(q); |
1739 | if (!rq) | 1739 | if (!rq) |
1740 | goto plug_and_out; | 1740 | goto plug_and_out; |
1741 | 1741 | ||
1742 | if (unlikely(dm_rq_is_flush_request(rq))) { | 1742 | if (unlikely(dm_rq_is_flush_request(rq))) { |
1743 | BUG_ON(md->flush_request); | 1743 | BUG_ON(md->flush_request); |
1744 | md->flush_request = rq; | 1744 | md->flush_request = rq; |
1745 | blk_start_request(rq); | 1745 | blk_start_request(rq); |
1746 | queue_work(md->wq, &md->barrier_work); | 1746 | queue_work(md->wq, &md->barrier_work); |
1747 | goto out; | 1747 | goto out; |
1748 | } | 1748 | } |
1749 | 1749 | ||
1750 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | 1750 | ti = dm_table_find_target(map, blk_rq_pos(rq)); |
1751 | if (ti->type->busy && ti->type->busy(ti)) | 1751 | if (ti->type->busy && ti->type->busy(ti)) |
1752 | goto plug_and_out; | 1752 | goto plug_and_out; |
1753 | 1753 | ||
1754 | blk_start_request(rq); | 1754 | blk_start_request(rq); |
1755 | clone = rq->special; | 1755 | clone = rq->special; |
1756 | atomic_inc(&md->pending[rq_data_dir(clone)]); | 1756 | atomic_inc(&md->pending[rq_data_dir(clone)]); |
1757 | 1757 | ||
1758 | spin_unlock(q->queue_lock); | 1758 | spin_unlock(q->queue_lock); |
1759 | if (map_request(ti, clone, md)) | 1759 | if (map_request(ti, clone, md)) |
1760 | goto requeued; | 1760 | goto requeued; |
1761 | 1761 | ||
1762 | spin_lock_irq(q->queue_lock); | 1762 | spin_lock_irq(q->queue_lock); |
1763 | } | 1763 | } |
1764 | 1764 | ||
1765 | goto out; | 1765 | goto out; |
1766 | 1766 | ||
1767 | requeued: | 1767 | requeued: |
1768 | spin_lock_irq(q->queue_lock); | 1768 | spin_lock_irq(q->queue_lock); |
1769 | 1769 | ||
1770 | plug_and_out: | 1770 | plug_and_out: |
1771 | if (!elv_queue_empty(q)) | 1771 | if (!elv_queue_empty(q)) |
1772 | /* Some requests still remain, retry later */ | 1772 | /* Some requests still remain, retry later */ |
1773 | blk_plug_device(q); | 1773 | blk_plug_device(q); |
1774 | 1774 | ||
1775 | out: | 1775 | out: |
1776 | dm_table_put(map); | 1776 | dm_table_put(map); |
1777 | 1777 | ||
1778 | return; | 1778 | return; |
1779 | } | 1779 | } |
1780 | 1780 | ||
1781 | int dm_underlying_device_busy(struct request_queue *q) | 1781 | int dm_underlying_device_busy(struct request_queue *q) |
1782 | { | 1782 | { |
1783 | return blk_lld_busy(q); | 1783 | return blk_lld_busy(q); |
1784 | } | 1784 | } |
1785 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | 1785 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); |
1786 | 1786 | ||
1787 | static int dm_lld_busy(struct request_queue *q) | 1787 | static int dm_lld_busy(struct request_queue *q) |
1788 | { | 1788 | { |
1789 | int r; | 1789 | int r; |
1790 | struct mapped_device *md = q->queuedata; | 1790 | struct mapped_device *md = q->queuedata; |
1791 | struct dm_table *map = dm_get_live_table(md); | 1791 | struct dm_table *map = dm_get_live_table(md); |
1792 | 1792 | ||
1793 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | 1793 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) |
1794 | r = 1; | 1794 | r = 1; |
1795 | else | 1795 | else |
1796 | r = dm_table_any_busy_target(map); | 1796 | r = dm_table_any_busy_target(map); |
1797 | 1797 | ||
1798 | dm_table_put(map); | 1798 | dm_table_put(map); |
1799 | 1799 | ||
1800 | return r; | 1800 | return r; |
1801 | } | 1801 | } |
1802 | 1802 | ||
1803 | static void dm_unplug_all(struct request_queue *q) | 1803 | static void dm_unplug_all(struct request_queue *q) |
1804 | { | 1804 | { |
1805 | struct mapped_device *md = q->queuedata; | 1805 | struct mapped_device *md = q->queuedata; |
1806 | struct dm_table *map = dm_get_live_table(md); | 1806 | struct dm_table *map = dm_get_live_table(md); |
1807 | 1807 | ||
1808 | if (map) { | 1808 | if (map) { |
1809 | if (dm_request_based(md)) | 1809 | if (dm_request_based(md)) |
1810 | generic_unplug_device(q); | 1810 | generic_unplug_device(q); |
1811 | 1811 | ||
1812 | dm_table_unplug_all(map); | 1812 | dm_table_unplug_all(map); |
1813 | dm_table_put(map); | 1813 | dm_table_put(map); |
1814 | } | 1814 | } |
1815 | } | 1815 | } |
1816 | 1816 | ||
1817 | static int dm_any_congested(void *congested_data, int bdi_bits) | 1817 | static int dm_any_congested(void *congested_data, int bdi_bits) |
1818 | { | 1818 | { |
1819 | int r = bdi_bits; | 1819 | int r = bdi_bits; |
1820 | struct mapped_device *md = congested_data; | 1820 | struct mapped_device *md = congested_data; |
1821 | struct dm_table *map; | 1821 | struct dm_table *map; |
1822 | 1822 | ||
1823 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 1823 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
1824 | map = dm_get_live_table(md); | 1824 | map = dm_get_live_table(md); |
1825 | if (map) { | 1825 | if (map) { |
1826 | /* | 1826 | /* |
1827 | * Request-based dm cares about only own queue for | 1827 | * Request-based dm cares about only own queue for |
1828 | * the query about congestion status of request_queue | 1828 | * the query about congestion status of request_queue |
1829 | */ | 1829 | */ |
1830 | if (dm_request_based(md)) | 1830 | if (dm_request_based(md)) |
1831 | r = md->queue->backing_dev_info.state & | 1831 | r = md->queue->backing_dev_info.state & |
1832 | bdi_bits; | 1832 | bdi_bits; |
1833 | else | 1833 | else |
1834 | r = dm_table_any_congested(map, bdi_bits); | 1834 | r = dm_table_any_congested(map, bdi_bits); |
1835 | 1835 | ||
1836 | dm_table_put(map); | 1836 | dm_table_put(map); |
1837 | } | 1837 | } |
1838 | } | 1838 | } |
1839 | 1839 | ||
1840 | return r; | 1840 | return r; |
1841 | } | 1841 | } |
1842 | 1842 | ||
1843 | /*----------------------------------------------------------------- | 1843 | /*----------------------------------------------------------------- |
1844 | * An IDR is used to keep track of allocated minor numbers. | 1844 | * An IDR is used to keep track of allocated minor numbers. |
1845 | *---------------------------------------------------------------*/ | 1845 | *---------------------------------------------------------------*/ |
1846 | static DEFINE_IDR(_minor_idr); | 1846 | static DEFINE_IDR(_minor_idr); |
1847 | 1847 | ||
1848 | static void free_minor(int minor) | 1848 | static void free_minor(int minor) |
1849 | { | 1849 | { |
1850 | spin_lock(&_minor_lock); | 1850 | spin_lock(&_minor_lock); |
1851 | idr_remove(&_minor_idr, minor); | 1851 | idr_remove(&_minor_idr, minor); |
1852 | spin_unlock(&_minor_lock); | 1852 | spin_unlock(&_minor_lock); |
1853 | } | 1853 | } |
1854 | 1854 | ||
1855 | /* | 1855 | /* |
1856 | * See if the device with a specific minor # is free. | 1856 | * See if the device with a specific minor # is free. |
1857 | */ | 1857 | */ |
1858 | static int specific_minor(int minor) | 1858 | static int specific_minor(int minor) |
1859 | { | 1859 | { |
1860 | int r, m; | 1860 | int r, m; |
1861 | 1861 | ||
1862 | if (minor >= (1 << MINORBITS)) | 1862 | if (minor >= (1 << MINORBITS)) |
1863 | return -EINVAL; | 1863 | return -EINVAL; |
1864 | 1864 | ||
1865 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); | 1865 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); |
1866 | if (!r) | 1866 | if (!r) |
1867 | return -ENOMEM; | 1867 | return -ENOMEM; |
1868 | 1868 | ||
1869 | spin_lock(&_minor_lock); | 1869 | spin_lock(&_minor_lock); |
1870 | 1870 | ||
1871 | if (idr_find(&_minor_idr, minor)) { | 1871 | if (idr_find(&_minor_idr, minor)) { |
1872 | r = -EBUSY; | 1872 | r = -EBUSY; |
1873 | goto out; | 1873 | goto out; |
1874 | } | 1874 | } |
1875 | 1875 | ||
1876 | r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); | 1876 | r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); |
1877 | if (r) | 1877 | if (r) |
1878 | goto out; | 1878 | goto out; |
1879 | 1879 | ||
1880 | if (m != minor) { | 1880 | if (m != minor) { |
1881 | idr_remove(&_minor_idr, m); | 1881 | idr_remove(&_minor_idr, m); |
1882 | r = -EBUSY; | 1882 | r = -EBUSY; |
1883 | goto out; | 1883 | goto out; |
1884 | } | 1884 | } |
1885 | 1885 | ||
1886 | out: | 1886 | out: |
1887 | spin_unlock(&_minor_lock); | 1887 | spin_unlock(&_minor_lock); |
1888 | return r; | 1888 | return r; |
1889 | } | 1889 | } |
1890 | 1890 | ||
1891 | static int next_free_minor(int *minor) | 1891 | static int next_free_minor(int *minor) |
1892 | { | 1892 | { |
1893 | int r, m; | 1893 | int r, m; |
1894 | 1894 | ||
1895 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); | 1895 | r = idr_pre_get(&_minor_idr, GFP_KERNEL); |
1896 | if (!r) | 1896 | if (!r) |
1897 | return -ENOMEM; | 1897 | return -ENOMEM; |
1898 | 1898 | ||
1899 | spin_lock(&_minor_lock); | 1899 | spin_lock(&_minor_lock); |
1900 | 1900 | ||
1901 | r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); | 1901 | r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); |
1902 | if (r) | 1902 | if (r) |
1903 | goto out; | 1903 | goto out; |
1904 | 1904 | ||
1905 | if (m >= (1 << MINORBITS)) { | 1905 | if (m >= (1 << MINORBITS)) { |
1906 | idr_remove(&_minor_idr, m); | 1906 | idr_remove(&_minor_idr, m); |
1907 | r = -ENOSPC; | 1907 | r = -ENOSPC; |
1908 | goto out; | 1908 | goto out; |
1909 | } | 1909 | } |
1910 | 1910 | ||
1911 | *minor = m; | 1911 | *minor = m; |
1912 | 1912 | ||
1913 | out: | 1913 | out: |
1914 | spin_unlock(&_minor_lock); | 1914 | spin_unlock(&_minor_lock); |
1915 | return r; | 1915 | return r; |
1916 | } | 1916 | } |
1917 | 1917 | ||
1918 | static const struct block_device_operations dm_blk_dops; | 1918 | static const struct block_device_operations dm_blk_dops; |
1919 | 1919 | ||
1920 | static void dm_wq_work(struct work_struct *work); | 1920 | static void dm_wq_work(struct work_struct *work); |
1921 | static void dm_rq_barrier_work(struct work_struct *work); | 1921 | static void dm_rq_barrier_work(struct work_struct *work); |
1922 | 1922 | ||
1923 | static void dm_init_md_queue(struct mapped_device *md) | 1923 | static void dm_init_md_queue(struct mapped_device *md) |
1924 | { | 1924 | { |
1925 | /* | 1925 | /* |
1926 | * Request-based dm devices cannot be stacked on top of bio-based dm | 1926 | * Request-based dm devices cannot be stacked on top of bio-based dm |
1927 | * devices. The type of this dm device has not been decided yet. | 1927 | * devices. The type of this dm device has not been decided yet. |
1928 | * The type is decided at the first table loading time. | 1928 | * The type is decided at the first table loading time. |
1929 | * To prevent problematic device stacking, clear the queue flag | 1929 | * To prevent problematic device stacking, clear the queue flag |
1930 | * for request stacking support until then. | 1930 | * for request stacking support until then. |
1931 | * | 1931 | * |
1932 | * This queue is new, so no concurrency on the queue_flags. | 1932 | * This queue is new, so no concurrency on the queue_flags. |
1933 | */ | 1933 | */ |
1934 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | 1934 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); |
1935 | 1935 | ||
1936 | md->queue->queuedata = md; | 1936 | md->queue->queuedata = md; |
1937 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 1937 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
1938 | md->queue->backing_dev_info.congested_data = md; | 1938 | md->queue->backing_dev_info.congested_data = md; |
1939 | blk_queue_make_request(md->queue, dm_request); | 1939 | blk_queue_make_request(md->queue, dm_request); |
1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1941 | md->queue->unplug_fn = dm_unplug_all; | 1941 | md->queue->unplug_fn = dm_unplug_all; |
1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1943 | } | 1943 | } |
1944 | 1944 | ||
1945 | /* | 1945 | /* |
1946 | * Allocate and initialise a blank device with a given minor. | 1946 | * Allocate and initialise a blank device with a given minor. |
1947 | */ | 1947 | */ |
1948 | static struct mapped_device *alloc_dev(int minor) | 1948 | static struct mapped_device *alloc_dev(int minor) |
1949 | { | 1949 | { |
1950 | int r; | 1950 | int r; |
1951 | struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); | 1951 | struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); |
1952 | void *old_md; | 1952 | void *old_md; |
1953 | 1953 | ||
1954 | if (!md) { | 1954 | if (!md) { |
1955 | DMWARN("unable to allocate device, out of memory."); | 1955 | DMWARN("unable to allocate device, out of memory."); |
1956 | return NULL; | 1956 | return NULL; |
1957 | } | 1957 | } |
1958 | 1958 | ||
1959 | if (!try_module_get(THIS_MODULE)) | 1959 | if (!try_module_get(THIS_MODULE)) |
1960 | goto bad_module_get; | 1960 | goto bad_module_get; |
1961 | 1961 | ||
1962 | /* get a minor number for the dev */ | 1962 | /* get a minor number for the dev */ |
1963 | if (minor == DM_ANY_MINOR) | 1963 | if (minor == DM_ANY_MINOR) |
1964 | r = next_free_minor(&minor); | 1964 | r = next_free_minor(&minor); |
1965 | else | 1965 | else |
1966 | r = specific_minor(minor); | 1966 | r = specific_minor(minor); |
1967 | if (r < 0) | 1967 | if (r < 0) |
1968 | goto bad_minor; | 1968 | goto bad_minor; |
1969 | 1969 | ||
1970 | md->type = DM_TYPE_NONE; | 1970 | md->type = DM_TYPE_NONE; |
1971 | init_rwsem(&md->io_lock); | 1971 | init_rwsem(&md->io_lock); |
1972 | mutex_init(&md->suspend_lock); | 1972 | mutex_init(&md->suspend_lock); |
1973 | mutex_init(&md->type_lock); | 1973 | mutex_init(&md->type_lock); |
1974 | spin_lock_init(&md->deferred_lock); | 1974 | spin_lock_init(&md->deferred_lock); |
1975 | spin_lock_init(&md->barrier_error_lock); | 1975 | spin_lock_init(&md->barrier_error_lock); |
1976 | rwlock_init(&md->map_lock); | 1976 | rwlock_init(&md->map_lock); |
1977 | atomic_set(&md->holders, 1); | 1977 | atomic_set(&md->holders, 1); |
1978 | atomic_set(&md->open_count, 0); | 1978 | atomic_set(&md->open_count, 0); |
1979 | atomic_set(&md->event_nr, 0); | 1979 | atomic_set(&md->event_nr, 0); |
1980 | atomic_set(&md->uevent_seq, 0); | 1980 | atomic_set(&md->uevent_seq, 0); |
1981 | INIT_LIST_HEAD(&md->uevent_list); | 1981 | INIT_LIST_HEAD(&md->uevent_list); |
1982 | spin_lock_init(&md->uevent_lock); | 1982 | spin_lock_init(&md->uevent_lock); |
1983 | 1983 | ||
1984 | md->queue = blk_alloc_queue(GFP_KERNEL); | 1984 | md->queue = blk_alloc_queue(GFP_KERNEL); |
1985 | if (!md->queue) | 1985 | if (!md->queue) |
1986 | goto bad_queue; | 1986 | goto bad_queue; |
1987 | 1987 | ||
1988 | dm_init_md_queue(md); | 1988 | dm_init_md_queue(md); |
1989 | 1989 | ||
1990 | md->disk = alloc_disk(1); | 1990 | md->disk = alloc_disk(1); |
1991 | if (!md->disk) | 1991 | if (!md->disk) |
1992 | goto bad_disk; | 1992 | goto bad_disk; |
1993 | 1993 | ||
1994 | atomic_set(&md->pending[0], 0); | 1994 | atomic_set(&md->pending[0], 0); |
1995 | atomic_set(&md->pending[1], 0); | 1995 | atomic_set(&md->pending[1], 0); |
1996 | init_waitqueue_head(&md->wait); | 1996 | init_waitqueue_head(&md->wait); |
1997 | INIT_WORK(&md->work, dm_wq_work); | 1997 | INIT_WORK(&md->work, dm_wq_work); |
1998 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | 1998 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); |
1999 | init_waitqueue_head(&md->eventq); | 1999 | init_waitqueue_head(&md->eventq); |
2000 | 2000 | ||
2001 | md->disk->major = _major; | 2001 | md->disk->major = _major; |
2002 | md->disk->first_minor = minor; | 2002 | md->disk->first_minor = minor; |
2003 | md->disk->fops = &dm_blk_dops; | 2003 | md->disk->fops = &dm_blk_dops; |
2004 | md->disk->queue = md->queue; | 2004 | md->disk->queue = md->queue; |
2005 | md->disk->private_data = md; | 2005 | md->disk->private_data = md; |
2006 | sprintf(md->disk->disk_name, "dm-%d", minor); | 2006 | sprintf(md->disk->disk_name, "dm-%d", minor); |
2007 | add_disk(md->disk); | 2007 | add_disk(md->disk); |
2008 | format_dev_t(md->name, MKDEV(_major, minor)); | 2008 | format_dev_t(md->name, MKDEV(_major, minor)); |
2009 | 2009 | ||
2010 | md->wq = create_singlethread_workqueue("kdmflush"); | 2010 | md->wq = create_singlethread_workqueue("kdmflush"); |
2011 | if (!md->wq) | 2011 | if (!md->wq) |
2012 | goto bad_thread; | 2012 | goto bad_thread; |
2013 | 2013 | ||
2014 | md->bdev = bdget_disk(md->disk, 0); | 2014 | md->bdev = bdget_disk(md->disk, 0); |
2015 | if (!md->bdev) | 2015 | if (!md->bdev) |
2016 | goto bad_bdev; | 2016 | goto bad_bdev; |
2017 | 2017 | ||
2018 | /* Populate the mapping, nobody knows we exist yet */ | 2018 | /* Populate the mapping, nobody knows we exist yet */ |
2019 | spin_lock(&_minor_lock); | 2019 | spin_lock(&_minor_lock); |
2020 | old_md = idr_replace(&_minor_idr, md, minor); | 2020 | old_md = idr_replace(&_minor_idr, md, minor); |
2021 | spin_unlock(&_minor_lock); | 2021 | spin_unlock(&_minor_lock); |
2022 | 2022 | ||
2023 | BUG_ON(old_md != MINOR_ALLOCED); | 2023 | BUG_ON(old_md != MINOR_ALLOCED); |
2024 | 2024 | ||
2025 | return md; | 2025 | return md; |
2026 | 2026 | ||
2027 | bad_bdev: | 2027 | bad_bdev: |
2028 | destroy_workqueue(md->wq); | 2028 | destroy_workqueue(md->wq); |
2029 | bad_thread: | 2029 | bad_thread: |
2030 | del_gendisk(md->disk); | 2030 | del_gendisk(md->disk); |
2031 | put_disk(md->disk); | 2031 | put_disk(md->disk); |
2032 | bad_disk: | 2032 | bad_disk: |
2033 | blk_cleanup_queue(md->queue); | 2033 | blk_cleanup_queue(md->queue); |
2034 | bad_queue: | 2034 | bad_queue: |
2035 | free_minor(minor); | 2035 | free_minor(minor); |
2036 | bad_minor: | 2036 | bad_minor: |
2037 | module_put(THIS_MODULE); | 2037 | module_put(THIS_MODULE); |
2038 | bad_module_get: | 2038 | bad_module_get: |
2039 | kfree(md); | 2039 | kfree(md); |
2040 | return NULL; | 2040 | return NULL; |
2041 | } | 2041 | } |
2042 | 2042 | ||
2043 | static void unlock_fs(struct mapped_device *md); | 2043 | static void unlock_fs(struct mapped_device *md); |
2044 | 2044 | ||
2045 | static void free_dev(struct mapped_device *md) | 2045 | static void free_dev(struct mapped_device *md) |
2046 | { | 2046 | { |
2047 | int minor = MINOR(disk_devt(md->disk)); | 2047 | int minor = MINOR(disk_devt(md->disk)); |
2048 | 2048 | ||
2049 | unlock_fs(md); | 2049 | unlock_fs(md); |
2050 | bdput(md->bdev); | 2050 | bdput(md->bdev); |
2051 | destroy_workqueue(md->wq); | 2051 | destroy_workqueue(md->wq); |
2052 | if (md->tio_pool) | 2052 | if (md->tio_pool) |
2053 | mempool_destroy(md->tio_pool); | 2053 | mempool_destroy(md->tio_pool); |
2054 | if (md->io_pool) | 2054 | if (md->io_pool) |
2055 | mempool_destroy(md->io_pool); | 2055 | mempool_destroy(md->io_pool); |
2056 | if (md->bs) | 2056 | if (md->bs) |
2057 | bioset_free(md->bs); | 2057 | bioset_free(md->bs); |
2058 | blk_integrity_unregister(md->disk); | 2058 | blk_integrity_unregister(md->disk); |
2059 | del_gendisk(md->disk); | 2059 | del_gendisk(md->disk); |
2060 | free_minor(minor); | 2060 | free_minor(minor); |
2061 | 2061 | ||
2062 | spin_lock(&_minor_lock); | 2062 | spin_lock(&_minor_lock); |
2063 | md->disk->private_data = NULL; | 2063 | md->disk->private_data = NULL; |
2064 | spin_unlock(&_minor_lock); | 2064 | spin_unlock(&_minor_lock); |
2065 | 2065 | ||
2066 | put_disk(md->disk); | 2066 | put_disk(md->disk); |
2067 | blk_cleanup_queue(md->queue); | 2067 | blk_cleanup_queue(md->queue); |
2068 | module_put(THIS_MODULE); | 2068 | module_put(THIS_MODULE); |
2069 | kfree(md); | 2069 | kfree(md); |
2070 | } | 2070 | } |
2071 | 2071 | ||
2072 | static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | 2072 | static void __bind_mempools(struct mapped_device *md, struct dm_table *t) |
2073 | { | 2073 | { |
2074 | struct dm_md_mempools *p; | 2074 | struct dm_md_mempools *p; |
2075 | 2075 | ||
2076 | if (md->io_pool && md->tio_pool && md->bs) | 2076 | if (md->io_pool && md->tio_pool && md->bs) |
2077 | /* the md already has necessary mempools */ | 2077 | /* the md already has necessary mempools */ |
2078 | goto out; | 2078 | goto out; |
2079 | 2079 | ||
2080 | p = dm_table_get_md_mempools(t); | 2080 | p = dm_table_get_md_mempools(t); |
2081 | BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); | 2081 | BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); |
2082 | 2082 | ||
2083 | md->io_pool = p->io_pool; | 2083 | md->io_pool = p->io_pool; |
2084 | p->io_pool = NULL; | 2084 | p->io_pool = NULL; |
2085 | md->tio_pool = p->tio_pool; | 2085 | md->tio_pool = p->tio_pool; |
2086 | p->tio_pool = NULL; | 2086 | p->tio_pool = NULL; |
2087 | md->bs = p->bs; | 2087 | md->bs = p->bs; |
2088 | p->bs = NULL; | 2088 | p->bs = NULL; |
2089 | 2089 | ||
2090 | out: | 2090 | out: |
2091 | /* mempool bind completed, now no need any mempools in the table */ | 2091 | /* mempool bind completed, now no need any mempools in the table */ |
2092 | dm_table_free_md_mempools(t); | 2092 | dm_table_free_md_mempools(t); |
2093 | } | 2093 | } |
2094 | 2094 | ||
2095 | /* | 2095 | /* |
2096 | * Bind a table to the device. | 2096 | * Bind a table to the device. |
2097 | */ | 2097 | */ |
2098 | static void event_callback(void *context) | 2098 | static void event_callback(void *context) |
2099 | { | 2099 | { |
2100 | unsigned long flags; | 2100 | unsigned long flags; |
2101 | LIST_HEAD(uevents); | 2101 | LIST_HEAD(uevents); |
2102 | struct mapped_device *md = (struct mapped_device *) context; | 2102 | struct mapped_device *md = (struct mapped_device *) context; |
2103 | 2103 | ||
2104 | spin_lock_irqsave(&md->uevent_lock, flags); | 2104 | spin_lock_irqsave(&md->uevent_lock, flags); |
2105 | list_splice_init(&md->uevent_list, &uevents); | 2105 | list_splice_init(&md->uevent_list, &uevents); |
2106 | spin_unlock_irqrestore(&md->uevent_lock, flags); | 2106 | spin_unlock_irqrestore(&md->uevent_lock, flags); |
2107 | 2107 | ||
2108 | dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); | 2108 | dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); |
2109 | 2109 | ||
2110 | atomic_inc(&md->event_nr); | 2110 | atomic_inc(&md->event_nr); |
2111 | wake_up(&md->eventq); | 2111 | wake_up(&md->eventq); |
2112 | } | 2112 | } |
2113 | 2113 | ||
2114 | static void __set_size(struct mapped_device *md, sector_t size) | 2114 | static void __set_size(struct mapped_device *md, sector_t size) |
2115 | { | 2115 | { |
2116 | set_capacity(md->disk, size); | 2116 | set_capacity(md->disk, size); |
2117 | 2117 | ||
2118 | mutex_lock(&md->bdev->bd_inode->i_mutex); | 2118 | mutex_lock(&md->bdev->bd_inode->i_mutex); |
2119 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); | 2119 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); |
2120 | mutex_unlock(&md->bdev->bd_inode->i_mutex); | 2120 | mutex_unlock(&md->bdev->bd_inode->i_mutex); |
2121 | } | 2121 | } |
2122 | 2122 | ||
2123 | /* | 2123 | /* |
2124 | * Returns old map, which caller must destroy. | 2124 | * Returns old map, which caller must destroy. |
2125 | */ | 2125 | */ |
2126 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | 2126 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, |
2127 | struct queue_limits *limits) | 2127 | struct queue_limits *limits) |
2128 | { | 2128 | { |
2129 | struct dm_table *old_map; | 2129 | struct dm_table *old_map; |
2130 | struct request_queue *q = md->queue; | 2130 | struct request_queue *q = md->queue; |
2131 | sector_t size; | 2131 | sector_t size; |
2132 | unsigned long flags; | 2132 | unsigned long flags; |
2133 | 2133 | ||
2134 | size = dm_table_get_size(t); | 2134 | size = dm_table_get_size(t); |
2135 | 2135 | ||
2136 | /* | 2136 | /* |
2137 | * Wipe any geometry if the size of the table changed. | 2137 | * Wipe any geometry if the size of the table changed. |
2138 | */ | 2138 | */ |
2139 | if (size != get_capacity(md->disk)) | 2139 | if (size != get_capacity(md->disk)) |
2140 | memset(&md->geometry, 0, sizeof(md->geometry)); | 2140 | memset(&md->geometry, 0, sizeof(md->geometry)); |
2141 | 2141 | ||
2142 | __set_size(md, size); | 2142 | __set_size(md, size); |
2143 | 2143 | ||
2144 | dm_table_event_callback(t, event_callback, md); | 2144 | dm_table_event_callback(t, event_callback, md); |
2145 | 2145 | ||
2146 | /* | 2146 | /* |
2147 | * The queue hasn't been stopped yet, if the old table type wasn't | 2147 | * The queue hasn't been stopped yet, if the old table type wasn't |
2148 | * for request-based during suspension. So stop it to prevent | 2148 | * for request-based during suspension. So stop it to prevent |
2149 | * I/O mapping before resume. | 2149 | * I/O mapping before resume. |
2150 | * This must be done before setting the queue restrictions, | 2150 | * This must be done before setting the queue restrictions, |
2151 | * because request-based dm may be run just after the setting. | 2151 | * because request-based dm may be run just after the setting. |
2152 | */ | 2152 | */ |
2153 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) | 2153 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) |
2154 | stop_queue(q); | 2154 | stop_queue(q); |
2155 | 2155 | ||
2156 | __bind_mempools(md, t); | 2156 | __bind_mempools(md, t); |
2157 | 2157 | ||
2158 | write_lock_irqsave(&md->map_lock, flags); | 2158 | write_lock_irqsave(&md->map_lock, flags); |
2159 | old_map = md->map; | 2159 | old_map = md->map; |
2160 | md->map = t; | 2160 | md->map = t; |
2161 | dm_table_set_restrictions(t, q, limits); | 2161 | dm_table_set_restrictions(t, q, limits); |
2162 | write_unlock_irqrestore(&md->map_lock, flags); | 2162 | write_unlock_irqrestore(&md->map_lock, flags); |
2163 | 2163 | ||
2164 | return old_map; | 2164 | return old_map; |
2165 | } | 2165 | } |
2166 | 2166 | ||
2167 | /* | 2167 | /* |
2168 | * Returns unbound table for the caller to free. | 2168 | * Returns unbound table for the caller to free. |
2169 | */ | 2169 | */ |
2170 | static struct dm_table *__unbind(struct mapped_device *md) | 2170 | static struct dm_table *__unbind(struct mapped_device *md) |
2171 | { | 2171 | { |
2172 | struct dm_table *map = md->map; | 2172 | struct dm_table *map = md->map; |
2173 | unsigned long flags; | 2173 | unsigned long flags; |
2174 | 2174 | ||
2175 | if (!map) | 2175 | if (!map) |
2176 | return NULL; | 2176 | return NULL; |
2177 | 2177 | ||
2178 | dm_table_event_callback(map, NULL, NULL); | 2178 | dm_table_event_callback(map, NULL, NULL); |
2179 | write_lock_irqsave(&md->map_lock, flags); | 2179 | write_lock_irqsave(&md->map_lock, flags); |
2180 | md->map = NULL; | 2180 | md->map = NULL; |
2181 | write_unlock_irqrestore(&md->map_lock, flags); | 2181 | write_unlock_irqrestore(&md->map_lock, flags); |
2182 | 2182 | ||
2183 | return map; | 2183 | return map; |
2184 | } | 2184 | } |
2185 | 2185 | ||
2186 | /* | 2186 | /* |
2187 | * Constructor for a new device. | 2187 | * Constructor for a new device. |
2188 | */ | 2188 | */ |
2189 | int dm_create(int minor, struct mapped_device **result) | 2189 | int dm_create(int minor, struct mapped_device **result) |
2190 | { | 2190 | { |
2191 | struct mapped_device *md; | 2191 | struct mapped_device *md; |
2192 | 2192 | ||
2193 | md = alloc_dev(minor); | 2193 | md = alloc_dev(minor); |
2194 | if (!md) | 2194 | if (!md) |
2195 | return -ENXIO; | 2195 | return -ENXIO; |
2196 | 2196 | ||
2197 | dm_sysfs_init(md); | 2197 | dm_sysfs_init(md); |
2198 | 2198 | ||
2199 | *result = md; | 2199 | *result = md; |
2200 | return 0; | 2200 | return 0; |
2201 | } | 2201 | } |
2202 | 2202 | ||
2203 | /* | 2203 | /* |
2204 | * Functions to manage md->type. | 2204 | * Functions to manage md->type. |
2205 | * All are required to hold md->type_lock. | 2205 | * All are required to hold md->type_lock. |
2206 | */ | 2206 | */ |
2207 | void dm_lock_md_type(struct mapped_device *md) | 2207 | void dm_lock_md_type(struct mapped_device *md) |
2208 | { | 2208 | { |
2209 | mutex_lock(&md->type_lock); | 2209 | mutex_lock(&md->type_lock); |
2210 | } | 2210 | } |
2211 | 2211 | ||
2212 | void dm_unlock_md_type(struct mapped_device *md) | 2212 | void dm_unlock_md_type(struct mapped_device *md) |
2213 | { | 2213 | { |
2214 | mutex_unlock(&md->type_lock); | 2214 | mutex_unlock(&md->type_lock); |
2215 | } | 2215 | } |
2216 | 2216 | ||
2217 | void dm_set_md_type(struct mapped_device *md, unsigned type) | 2217 | void dm_set_md_type(struct mapped_device *md, unsigned type) |
2218 | { | 2218 | { |
2219 | md->type = type; | 2219 | md->type = type; |
2220 | } | 2220 | } |
2221 | 2221 | ||
2222 | unsigned dm_get_md_type(struct mapped_device *md) | 2222 | unsigned dm_get_md_type(struct mapped_device *md) |
2223 | { | 2223 | { |
2224 | return md->type; | 2224 | return md->type; |
2225 | } | 2225 | } |
2226 | 2226 | ||
2227 | /* | 2227 | /* |
2228 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). | 2228 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). |
2229 | */ | 2229 | */ |
2230 | static int dm_init_request_based_queue(struct mapped_device *md) | 2230 | static int dm_init_request_based_queue(struct mapped_device *md) |
2231 | { | 2231 | { |
2232 | struct request_queue *q = NULL; | 2232 | struct request_queue *q = NULL; |
2233 | 2233 | ||
2234 | if (md->queue->elevator) | 2234 | if (md->queue->elevator) |
2235 | return 1; | 2235 | return 1; |
2236 | 2236 | ||
2237 | /* Fully initialize the queue */ | 2237 | /* Fully initialize the queue */ |
2238 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); | 2238 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); |
2239 | if (!q) | 2239 | if (!q) |
2240 | return 0; | 2240 | return 0; |
2241 | 2241 | ||
2242 | md->queue = q; | 2242 | md->queue = q; |
2243 | md->saved_make_request_fn = md->queue->make_request_fn; | 2243 | md->saved_make_request_fn = md->queue->make_request_fn; |
2244 | dm_init_md_queue(md); | 2244 | dm_init_md_queue(md); |
2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); | 2248 | blk_queue_flush(md->queue, REQ_FLUSH); |
2249 | 2249 | ||
2250 | elv_register_queue(md->queue); | 2250 | elv_register_queue(md->queue); |
2251 | 2251 | ||
2252 | return 1; | 2252 | return 1; |
2253 | } | 2253 | } |
2254 | 2254 | ||
2255 | /* | 2255 | /* |
2256 | * Setup the DM device's queue based on md's type | 2256 | * Setup the DM device's queue based on md's type |
2257 | */ | 2257 | */ |
2258 | int dm_setup_md_queue(struct mapped_device *md) | 2258 | int dm_setup_md_queue(struct mapped_device *md) |
2259 | { | 2259 | { |
2260 | if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && | 2260 | if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && |
2261 | !dm_init_request_based_queue(md)) { | 2261 | !dm_init_request_based_queue(md)) { |
2262 | DMWARN("Cannot initialize queue for request-based mapped device"); | 2262 | DMWARN("Cannot initialize queue for request-based mapped device"); |
2263 | return -EINVAL; | 2263 | return -EINVAL; |
2264 | } | 2264 | } |
2265 | 2265 | ||
2266 | return 0; | 2266 | return 0; |
2267 | } | 2267 | } |
2268 | 2268 | ||
2269 | static struct mapped_device *dm_find_md(dev_t dev) | 2269 | static struct mapped_device *dm_find_md(dev_t dev) |
2270 | { | 2270 | { |
2271 | struct mapped_device *md; | 2271 | struct mapped_device *md; |
2272 | unsigned minor = MINOR(dev); | 2272 | unsigned minor = MINOR(dev); |
2273 | 2273 | ||
2274 | if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) | 2274 | if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) |
2275 | return NULL; | 2275 | return NULL; |
2276 | 2276 | ||
2277 | spin_lock(&_minor_lock); | 2277 | spin_lock(&_minor_lock); |
2278 | 2278 | ||
2279 | md = idr_find(&_minor_idr, minor); | 2279 | md = idr_find(&_minor_idr, minor); |
2280 | if (md && (md == MINOR_ALLOCED || | 2280 | if (md && (md == MINOR_ALLOCED || |
2281 | (MINOR(disk_devt(dm_disk(md))) != minor) || | 2281 | (MINOR(disk_devt(dm_disk(md))) != minor) || |
2282 | dm_deleting_md(md) || | 2282 | dm_deleting_md(md) || |
2283 | test_bit(DMF_FREEING, &md->flags))) { | 2283 | test_bit(DMF_FREEING, &md->flags))) { |
2284 | md = NULL; | 2284 | md = NULL; |
2285 | goto out; | 2285 | goto out; |
2286 | } | 2286 | } |
2287 | 2287 | ||
2288 | out: | 2288 | out: |
2289 | spin_unlock(&_minor_lock); | 2289 | spin_unlock(&_minor_lock); |
2290 | 2290 | ||
2291 | return md; | 2291 | return md; |
2292 | } | 2292 | } |
2293 | 2293 | ||
2294 | struct mapped_device *dm_get_md(dev_t dev) | 2294 | struct mapped_device *dm_get_md(dev_t dev) |
2295 | { | 2295 | { |
2296 | struct mapped_device *md = dm_find_md(dev); | 2296 | struct mapped_device *md = dm_find_md(dev); |
2297 | 2297 | ||
2298 | if (md) | 2298 | if (md) |
2299 | dm_get(md); | 2299 | dm_get(md); |
2300 | 2300 | ||
2301 | return md; | 2301 | return md; |
2302 | } | 2302 | } |
2303 | 2303 | ||
2304 | void *dm_get_mdptr(struct mapped_device *md) | 2304 | void *dm_get_mdptr(struct mapped_device *md) |
2305 | { | 2305 | { |
2306 | return md->interface_ptr; | 2306 | return md->interface_ptr; |
2307 | } | 2307 | } |
2308 | 2308 | ||
2309 | void dm_set_mdptr(struct mapped_device *md, void *ptr) | 2309 | void dm_set_mdptr(struct mapped_device *md, void *ptr) |
2310 | { | 2310 | { |
2311 | md->interface_ptr = ptr; | 2311 | md->interface_ptr = ptr; |
2312 | } | 2312 | } |
2313 | 2313 | ||
2314 | void dm_get(struct mapped_device *md) | 2314 | void dm_get(struct mapped_device *md) |
2315 | { | 2315 | { |
2316 | atomic_inc(&md->holders); | 2316 | atomic_inc(&md->holders); |
2317 | BUG_ON(test_bit(DMF_FREEING, &md->flags)); | 2317 | BUG_ON(test_bit(DMF_FREEING, &md->flags)); |
2318 | } | 2318 | } |
2319 | 2319 | ||
2320 | const char *dm_device_name(struct mapped_device *md) | 2320 | const char *dm_device_name(struct mapped_device *md) |
2321 | { | 2321 | { |
2322 | return md->name; | 2322 | return md->name; |
2323 | } | 2323 | } |
2324 | EXPORT_SYMBOL_GPL(dm_device_name); | 2324 | EXPORT_SYMBOL_GPL(dm_device_name); |
2325 | 2325 | ||
2326 | static void __dm_destroy(struct mapped_device *md, bool wait) | 2326 | static void __dm_destroy(struct mapped_device *md, bool wait) |
2327 | { | 2327 | { |
2328 | struct dm_table *map; | 2328 | struct dm_table *map; |
2329 | 2329 | ||
2330 | might_sleep(); | 2330 | might_sleep(); |
2331 | 2331 | ||
2332 | spin_lock(&_minor_lock); | 2332 | spin_lock(&_minor_lock); |
2333 | map = dm_get_live_table(md); | 2333 | map = dm_get_live_table(md); |
2334 | idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); | 2334 | idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); |
2335 | set_bit(DMF_FREEING, &md->flags); | 2335 | set_bit(DMF_FREEING, &md->flags); |
2336 | spin_unlock(&_minor_lock); | 2336 | spin_unlock(&_minor_lock); |
2337 | 2337 | ||
2338 | if (!dm_suspended_md(md)) { | 2338 | if (!dm_suspended_md(md)) { |
2339 | dm_table_presuspend_targets(map); | 2339 | dm_table_presuspend_targets(map); |
2340 | dm_table_postsuspend_targets(map); | 2340 | dm_table_postsuspend_targets(map); |
2341 | } | 2341 | } |
2342 | 2342 | ||
2343 | /* | 2343 | /* |
2344 | * Rare, but there may be I/O requests still going to complete, | 2344 | * Rare, but there may be I/O requests still going to complete, |
2345 | * for example. Wait for all references to disappear. | 2345 | * for example. Wait for all references to disappear. |
2346 | * No one should increment the reference count of the mapped_device, | 2346 | * No one should increment the reference count of the mapped_device, |
2347 | * after the mapped_device state becomes DMF_FREEING. | 2347 | * after the mapped_device state becomes DMF_FREEING. |
2348 | */ | 2348 | */ |
2349 | if (wait) | 2349 | if (wait) |
2350 | while (atomic_read(&md->holders)) | 2350 | while (atomic_read(&md->holders)) |
2351 | msleep(1); | 2351 | msleep(1); |
2352 | else if (atomic_read(&md->holders)) | 2352 | else if (atomic_read(&md->holders)) |
2353 | DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", | 2353 | DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", |
2354 | dm_device_name(md), atomic_read(&md->holders)); | 2354 | dm_device_name(md), atomic_read(&md->holders)); |
2355 | 2355 | ||
2356 | dm_sysfs_exit(md); | 2356 | dm_sysfs_exit(md); |
2357 | dm_table_put(map); | 2357 | dm_table_put(map); |
2358 | dm_table_destroy(__unbind(md)); | 2358 | dm_table_destroy(__unbind(md)); |
2359 | free_dev(md); | 2359 | free_dev(md); |
2360 | } | 2360 | } |
2361 | 2361 | ||
2362 | void dm_destroy(struct mapped_device *md) | 2362 | void dm_destroy(struct mapped_device *md) |
2363 | { | 2363 | { |
2364 | __dm_destroy(md, true); | 2364 | __dm_destroy(md, true); |
2365 | } | 2365 | } |
2366 | 2366 | ||
2367 | void dm_destroy_immediate(struct mapped_device *md) | 2367 | void dm_destroy_immediate(struct mapped_device *md) |
2368 | { | 2368 | { |
2369 | __dm_destroy(md, false); | 2369 | __dm_destroy(md, false); |
2370 | } | 2370 | } |
2371 | 2371 | ||
2372 | void dm_put(struct mapped_device *md) | 2372 | void dm_put(struct mapped_device *md) |
2373 | { | 2373 | { |
2374 | atomic_dec(&md->holders); | 2374 | atomic_dec(&md->holders); |
2375 | } | 2375 | } |
2376 | EXPORT_SYMBOL_GPL(dm_put); | 2376 | EXPORT_SYMBOL_GPL(dm_put); |
2377 | 2377 | ||
2378 | static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | 2378 | static int dm_wait_for_completion(struct mapped_device *md, int interruptible) |
2379 | { | 2379 | { |
2380 | int r = 0; | 2380 | int r = 0; |
2381 | DECLARE_WAITQUEUE(wait, current); | 2381 | DECLARE_WAITQUEUE(wait, current); |
2382 | 2382 | ||
2383 | dm_unplug_all(md->queue); | 2383 | dm_unplug_all(md->queue); |
2384 | 2384 | ||
2385 | add_wait_queue(&md->wait, &wait); | 2385 | add_wait_queue(&md->wait, &wait); |
2386 | 2386 | ||
2387 | while (1) { | 2387 | while (1) { |
2388 | set_current_state(interruptible); | 2388 | set_current_state(interruptible); |
2389 | 2389 | ||
2390 | smp_mb(); | 2390 | smp_mb(); |
2391 | if (!md_in_flight(md)) | 2391 | if (!md_in_flight(md)) |
2392 | break; | 2392 | break; |
2393 | 2393 | ||
2394 | if (interruptible == TASK_INTERRUPTIBLE && | 2394 | if (interruptible == TASK_INTERRUPTIBLE && |
2395 | signal_pending(current)) { | 2395 | signal_pending(current)) { |
2396 | r = -EINTR; | 2396 | r = -EINTR; |
2397 | break; | 2397 | break; |
2398 | } | 2398 | } |
2399 | 2399 | ||
2400 | io_schedule(); | 2400 | io_schedule(); |
2401 | } | 2401 | } |
2402 | set_current_state(TASK_RUNNING); | 2402 | set_current_state(TASK_RUNNING); |
2403 | 2403 | ||
2404 | remove_wait_queue(&md->wait, &wait); | 2404 | remove_wait_queue(&md->wait, &wait); |
2405 | 2405 | ||
2406 | return r; | 2406 | return r; |
2407 | } | 2407 | } |
2408 | 2408 | ||
2409 | static void dm_flush(struct mapped_device *md) | 2409 | static void dm_flush(struct mapped_device *md) |
2410 | { | 2410 | { |
2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2412 | 2412 | ||
2413 | bio_init(&md->barrier_bio); | 2413 | bio_init(&md->barrier_bio); |
2414 | md->barrier_bio.bi_bdev = md->bdev; | 2414 | md->barrier_bio.bi_bdev = md->bdev; |
2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | 2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; |
2416 | __split_and_process_bio(md, &md->barrier_bio); | 2416 | __split_and_process_bio(md, &md->barrier_bio); |
2417 | 2417 | ||
2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2419 | } | 2419 | } |
2420 | 2420 | ||
2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | 2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) |
2422 | { | 2422 | { |
2423 | md->barrier_error = 0; | 2423 | md->barrier_error = 0; |
2424 | 2424 | ||
2425 | dm_flush(md); | 2425 | dm_flush(md); |
2426 | 2426 | ||
2427 | if (!bio_empty_barrier(bio)) { | 2427 | if (!bio_empty_barrier(bio)) { |
2428 | __split_and_process_bio(md, bio); | 2428 | __split_and_process_bio(md, bio); |
2429 | /* | 2429 | /* |
2430 | * If the request isn't supported, don't waste time with | 2430 | * If the request isn't supported, don't waste time with |
2431 | * the second flush. | 2431 | * the second flush. |
2432 | */ | 2432 | */ |
2433 | if (md->barrier_error != -EOPNOTSUPP) | 2433 | if (md->barrier_error != -EOPNOTSUPP) |
2434 | dm_flush(md); | 2434 | dm_flush(md); |
2435 | } | 2435 | } |
2436 | 2436 | ||
2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | 2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) |
2438 | bio_endio(bio, md->barrier_error); | 2438 | bio_endio(bio, md->barrier_error); |
2439 | else { | 2439 | else { |
2440 | spin_lock_irq(&md->deferred_lock); | 2440 | spin_lock_irq(&md->deferred_lock); |
2441 | bio_list_add_head(&md->deferred, bio); | 2441 | bio_list_add_head(&md->deferred, bio); |
2442 | spin_unlock_irq(&md->deferred_lock); | 2442 | spin_unlock_irq(&md->deferred_lock); |
2443 | } | 2443 | } |
2444 | } | 2444 | } |
2445 | 2445 | ||
2446 | /* | 2446 | /* |
2447 | * Process the deferred bios | 2447 | * Process the deferred bios |
2448 | */ | 2448 | */ |
2449 | static void dm_wq_work(struct work_struct *work) | 2449 | static void dm_wq_work(struct work_struct *work) |
2450 | { | 2450 | { |
2451 | struct mapped_device *md = container_of(work, struct mapped_device, | 2451 | struct mapped_device *md = container_of(work, struct mapped_device, |
2452 | work); | 2452 | work); |
2453 | struct bio *c; | 2453 | struct bio *c; |
2454 | 2454 | ||
2455 | down_write(&md->io_lock); | 2455 | down_write(&md->io_lock); |
2456 | 2456 | ||
2457 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2457 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
2458 | spin_lock_irq(&md->deferred_lock); | 2458 | spin_lock_irq(&md->deferred_lock); |
2459 | c = bio_list_pop(&md->deferred); | 2459 | c = bio_list_pop(&md->deferred); |
2460 | spin_unlock_irq(&md->deferred_lock); | 2460 | spin_unlock_irq(&md->deferred_lock); |
2461 | 2461 | ||
2462 | if (!c) { | 2462 | if (!c) { |
2463 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | 2463 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); |
2464 | break; | 2464 | break; |
2465 | } | 2465 | } |
2466 | 2466 | ||
2467 | up_write(&md->io_lock); | 2467 | up_write(&md->io_lock); |
2468 | 2468 | ||
2469 | if (dm_request_based(md)) | 2469 | if (dm_request_based(md)) |
2470 | generic_make_request(c); | 2470 | generic_make_request(c); |
2471 | else { | 2471 | else { |
2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2472 | if (c->bi_rw & REQ_HARDBARRIER) |
2473 | process_barrier(md, c); | 2473 | process_barrier(md, c); |
2474 | else | 2474 | else |
2475 | __split_and_process_bio(md, c); | 2475 | __split_and_process_bio(md, c); |
2476 | } | 2476 | } |
2477 | 2477 | ||
2478 | down_write(&md->io_lock); | 2478 | down_write(&md->io_lock); |
2479 | } | 2479 | } |
2480 | 2480 | ||
2481 | up_write(&md->io_lock); | 2481 | up_write(&md->io_lock); |
2482 | } | 2482 | } |
2483 | 2483 | ||
2484 | static void dm_queue_flush(struct mapped_device *md) | 2484 | static void dm_queue_flush(struct mapped_device *md) |
2485 | { | 2485 | { |
2486 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2486 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2487 | smp_mb__after_clear_bit(); | 2487 | smp_mb__after_clear_bit(); |
2488 | queue_work(md->wq, &md->work); | 2488 | queue_work(md->wq, &md->work); |
2489 | } | 2489 | } |
2490 | 2490 | ||
2491 | static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) | 2491 | static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) |
2492 | { | 2492 | { |
2493 | struct dm_rq_target_io *tio = clone->end_io_data; | 2493 | struct dm_rq_target_io *tio = clone->end_io_data; |
2494 | 2494 | ||
2495 | tio->info.target_request_nr = request_nr; | 2495 | tio->info.target_request_nr = request_nr; |
2496 | } | 2496 | } |
2497 | 2497 | ||
2498 | /* Issue barrier requests to targets and wait for their completion. */ | 2498 | /* Issue barrier requests to targets and wait for their completion. */ |
2499 | static int dm_rq_barrier(struct mapped_device *md) | 2499 | static int dm_rq_barrier(struct mapped_device *md) |
2500 | { | 2500 | { |
2501 | int i, j; | 2501 | int i, j; |
2502 | struct dm_table *map = dm_get_live_table(md); | 2502 | struct dm_table *map = dm_get_live_table(md); |
2503 | unsigned num_targets = dm_table_get_num_targets(map); | 2503 | unsigned num_targets = dm_table_get_num_targets(map); |
2504 | struct dm_target *ti; | 2504 | struct dm_target *ti; |
2505 | struct request *clone; | 2505 | struct request *clone; |
2506 | 2506 | ||
2507 | md->barrier_error = 0; | 2507 | md->barrier_error = 0; |
2508 | 2508 | ||
2509 | for (i = 0; i < num_targets; i++) { | 2509 | for (i = 0; i < num_targets; i++) { |
2510 | ti = dm_table_get_target(map, i); | 2510 | ti = dm_table_get_target(map, i); |
2511 | for (j = 0; j < ti->num_flush_requests; j++) { | 2511 | for (j = 0; j < ti->num_flush_requests; j++) { |
2512 | clone = clone_rq(md->flush_request, md, GFP_NOIO); | 2512 | clone = clone_rq(md->flush_request, md, GFP_NOIO); |
2513 | dm_rq_set_target_request_nr(clone, j); | 2513 | dm_rq_set_target_request_nr(clone, j); |
2514 | atomic_inc(&md->pending[rq_data_dir(clone)]); | 2514 | atomic_inc(&md->pending[rq_data_dir(clone)]); |
2515 | map_request(ti, clone, md); | 2515 | map_request(ti, clone, md); |
2516 | } | 2516 | } |
2517 | } | 2517 | } |
2518 | 2518 | ||
2519 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2519 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2520 | dm_table_put(map); | 2520 | dm_table_put(map); |
2521 | 2521 | ||
2522 | return md->barrier_error; | 2522 | return md->barrier_error; |
2523 | } | 2523 | } |
2524 | 2524 | ||
2525 | static void dm_rq_barrier_work(struct work_struct *work) | 2525 | static void dm_rq_barrier_work(struct work_struct *work) |
2526 | { | 2526 | { |
2527 | int error; | 2527 | int error; |
2528 | struct mapped_device *md = container_of(work, struct mapped_device, | 2528 | struct mapped_device *md = container_of(work, struct mapped_device, |
2529 | barrier_work); | 2529 | barrier_work); |
2530 | struct request_queue *q = md->queue; | 2530 | struct request_queue *q = md->queue; |
2531 | struct request *rq; | 2531 | struct request *rq; |
2532 | unsigned long flags; | 2532 | unsigned long flags; |
2533 | 2533 | ||
2534 | /* | 2534 | /* |
2535 | * Hold the md reference here and leave it at the last part so that | 2535 | * Hold the md reference here and leave it at the last part so that |
2536 | * the md can't be deleted by device opener when the barrier request | 2536 | * the md can't be deleted by device opener when the barrier request |
2537 | * completes. | 2537 | * completes. |
2538 | */ | 2538 | */ |
2539 | dm_get(md); | 2539 | dm_get(md); |
2540 | 2540 | ||
2541 | error = dm_rq_barrier(md); | 2541 | error = dm_rq_barrier(md); |
2542 | 2542 | ||
2543 | rq = md->flush_request; | 2543 | rq = md->flush_request; |
2544 | md->flush_request = NULL; | 2544 | md->flush_request = NULL; |
2545 | 2545 | ||
2546 | if (error == DM_ENDIO_REQUEUE) { | 2546 | if (error == DM_ENDIO_REQUEUE) { |
2547 | spin_lock_irqsave(q->queue_lock, flags); | 2547 | spin_lock_irqsave(q->queue_lock, flags); |
2548 | blk_requeue_request(q, rq); | 2548 | blk_requeue_request(q, rq); |
2549 | spin_unlock_irqrestore(q->queue_lock, flags); | 2549 | spin_unlock_irqrestore(q->queue_lock, flags); |
2550 | } else | 2550 | } else |
2551 | blk_end_request_all(rq, error); | 2551 | blk_end_request_all(rq, error); |
2552 | 2552 | ||
2553 | blk_run_queue(q); | 2553 | blk_run_queue(q); |
2554 | 2554 | ||
2555 | dm_put(md); | 2555 | dm_put(md); |
2556 | } | 2556 | } |
2557 | 2557 | ||
2558 | /* | 2558 | /* |
2559 | * Swap in a new table, returning the old one for the caller to destroy. | 2559 | * Swap in a new table, returning the old one for the caller to destroy. |
2560 | */ | 2560 | */ |
2561 | struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) | 2561 | struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) |
2562 | { | 2562 | { |
2563 | struct dm_table *map = ERR_PTR(-EINVAL); | 2563 | struct dm_table *map = ERR_PTR(-EINVAL); |
2564 | struct queue_limits limits; | 2564 | struct queue_limits limits; |
2565 | int r; | 2565 | int r; |
2566 | 2566 | ||
2567 | mutex_lock(&md->suspend_lock); | 2567 | mutex_lock(&md->suspend_lock); |
2568 | 2568 | ||
2569 | /* device must be suspended */ | 2569 | /* device must be suspended */ |
2570 | if (!dm_suspended_md(md)) | 2570 | if (!dm_suspended_md(md)) |
2571 | goto out; | 2571 | goto out; |
2572 | 2572 | ||
2573 | r = dm_calculate_queue_limits(table, &limits); | 2573 | r = dm_calculate_queue_limits(table, &limits); |
2574 | if (r) { | 2574 | if (r) { |
2575 | map = ERR_PTR(r); | 2575 | map = ERR_PTR(r); |
2576 | goto out; | 2576 | goto out; |
2577 | } | 2577 | } |
2578 | 2578 | ||
2579 | map = __bind(md, table, &limits); | 2579 | map = __bind(md, table, &limits); |
2580 | 2580 | ||
2581 | out: | 2581 | out: |
2582 | mutex_unlock(&md->suspend_lock); | 2582 | mutex_unlock(&md->suspend_lock); |
2583 | return map; | 2583 | return map; |
2584 | } | 2584 | } |
2585 | 2585 | ||
2586 | /* | 2586 | /* |
2587 | * Functions to lock and unlock any filesystem running on the | 2587 | * Functions to lock and unlock any filesystem running on the |
2588 | * device. | 2588 | * device. |
2589 | */ | 2589 | */ |
2590 | static int lock_fs(struct mapped_device *md) | 2590 | static int lock_fs(struct mapped_device *md) |
2591 | { | 2591 | { |
2592 | int r; | 2592 | int r; |
2593 | 2593 | ||
2594 | WARN_ON(md->frozen_sb); | 2594 | WARN_ON(md->frozen_sb); |
2595 | 2595 | ||
2596 | md->frozen_sb = freeze_bdev(md->bdev); | 2596 | md->frozen_sb = freeze_bdev(md->bdev); |
2597 | if (IS_ERR(md->frozen_sb)) { | 2597 | if (IS_ERR(md->frozen_sb)) { |
2598 | r = PTR_ERR(md->frozen_sb); | 2598 | r = PTR_ERR(md->frozen_sb); |
2599 | md->frozen_sb = NULL; | 2599 | md->frozen_sb = NULL; |
2600 | return r; | 2600 | return r; |
2601 | } | 2601 | } |
2602 | 2602 | ||
2603 | set_bit(DMF_FROZEN, &md->flags); | 2603 | set_bit(DMF_FROZEN, &md->flags); |
2604 | 2604 | ||
2605 | return 0; | 2605 | return 0; |
2606 | } | 2606 | } |
2607 | 2607 | ||
2608 | static void unlock_fs(struct mapped_device *md) | 2608 | static void unlock_fs(struct mapped_device *md) |
2609 | { | 2609 | { |
2610 | if (!test_bit(DMF_FROZEN, &md->flags)) | 2610 | if (!test_bit(DMF_FROZEN, &md->flags)) |
2611 | return; | 2611 | return; |
2612 | 2612 | ||
2613 | thaw_bdev(md->bdev, md->frozen_sb); | 2613 | thaw_bdev(md->bdev, md->frozen_sb); |
2614 | md->frozen_sb = NULL; | 2614 | md->frozen_sb = NULL; |
2615 | clear_bit(DMF_FROZEN, &md->flags); | 2615 | clear_bit(DMF_FROZEN, &md->flags); |
2616 | } | 2616 | } |
2617 | 2617 | ||
2618 | /* | 2618 | /* |
2619 | * We need to be able to change a mapping table under a mounted | 2619 | * We need to be able to change a mapping table under a mounted |
2620 | * filesystem. For example we might want to move some data in | 2620 | * filesystem. For example we might want to move some data in |
2621 | * the background. Before the table can be swapped with | 2621 | * the background. Before the table can be swapped with |
2622 | * dm_bind_table, dm_suspend must be called to flush any in | 2622 | * dm_bind_table, dm_suspend must be called to flush any in |
2623 | * flight bios and ensure that any further io gets deferred. | 2623 | * flight bios and ensure that any further io gets deferred. |
2624 | */ | 2624 | */ |
2625 | /* | 2625 | /* |
2626 | * Suspend mechanism in request-based dm. | 2626 | * Suspend mechanism in request-based dm. |
2627 | * | 2627 | * |
2628 | * 1. Flush all I/Os by lock_fs() if needed. | 2628 | * 1. Flush all I/Os by lock_fs() if needed. |
2629 | * 2. Stop dispatching any I/O by stopping the request_queue. | 2629 | * 2. Stop dispatching any I/O by stopping the request_queue. |
2630 | * 3. Wait for all in-flight I/Os to be completed or requeued. | 2630 | * 3. Wait for all in-flight I/Os to be completed or requeued. |
2631 | * | 2631 | * |
2632 | * To abort suspend, start the request_queue. | 2632 | * To abort suspend, start the request_queue. |
2633 | */ | 2633 | */ |
2634 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2634 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
2635 | { | 2635 | { |
2636 | struct dm_table *map = NULL; | 2636 | struct dm_table *map = NULL; |
2637 | int r = 0; | 2637 | int r = 0; |
2638 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | 2638 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; |
2639 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | 2639 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; |
2640 | 2640 | ||
2641 | mutex_lock(&md->suspend_lock); | 2641 | mutex_lock(&md->suspend_lock); |
2642 | 2642 | ||
2643 | if (dm_suspended_md(md)) { | 2643 | if (dm_suspended_md(md)) { |
2644 | r = -EINVAL; | 2644 | r = -EINVAL; |
2645 | goto out_unlock; | 2645 | goto out_unlock; |
2646 | } | 2646 | } |
2647 | 2647 | ||
2648 | map = dm_get_live_table(md); | 2648 | map = dm_get_live_table(md); |
2649 | 2649 | ||
2650 | /* | 2650 | /* |
2651 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | 2651 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. |
2652 | * This flag is cleared before dm_suspend returns. | 2652 | * This flag is cleared before dm_suspend returns. |
2653 | */ | 2653 | */ |
2654 | if (noflush) | 2654 | if (noflush) |
2655 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 2655 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
2656 | 2656 | ||
2657 | /* This does not get reverted if there's an error later. */ | 2657 | /* This does not get reverted if there's an error later. */ |
2658 | dm_table_presuspend_targets(map); | 2658 | dm_table_presuspend_targets(map); |
2659 | 2659 | ||
2660 | /* | 2660 | /* |
2661 | * Flush I/O to the device. | 2661 | * Flush I/O to the device. |
2662 | * Any I/O submitted after lock_fs() may not be flushed. | 2662 | * Any I/O submitted after lock_fs() may not be flushed. |
2663 | * noflush takes precedence over do_lockfs. | 2663 | * noflush takes precedence over do_lockfs. |
2664 | * (lock_fs() flushes I/Os and waits for them to complete.) | 2664 | * (lock_fs() flushes I/Os and waits for them to complete.) |
2665 | */ | 2665 | */ |
2666 | if (!noflush && do_lockfs) { | 2666 | if (!noflush && do_lockfs) { |
2667 | r = lock_fs(md); | 2667 | r = lock_fs(md); |
2668 | if (r) | 2668 | if (r) |
2669 | goto out; | 2669 | goto out; |
2670 | } | 2670 | } |
2671 | 2671 | ||
2672 | /* | 2672 | /* |
2673 | * Here we must make sure that no processes are submitting requests | 2673 | * Here we must make sure that no processes are submitting requests |
2674 | * to target drivers i.e. no one may be executing | 2674 | * to target drivers i.e. no one may be executing |
2675 | * __split_and_process_bio. This is called from dm_request and | 2675 | * __split_and_process_bio. This is called from dm_request and |
2676 | * dm_wq_work. | 2676 | * dm_wq_work. |
2677 | * | 2677 | * |
2678 | * To get all processes out of __split_and_process_bio in dm_request, | 2678 | * To get all processes out of __split_and_process_bio in dm_request, |
2679 | * we take the write lock. To prevent any process from reentering | 2679 | * we take the write lock. To prevent any process from reentering |
2680 | * __split_and_process_bio from dm_request, we set | 2680 | * __split_and_process_bio from dm_request, we set |
2681 | * DMF_QUEUE_IO_TO_THREAD. | 2681 | * DMF_QUEUE_IO_TO_THREAD. |
2682 | * | 2682 | * |
2683 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | 2683 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND |
2684 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | 2684 | * and call flush_workqueue(md->wq). flush_workqueue will wait until |
2685 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | 2685 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any |
2686 | * further calls to __split_and_process_bio from dm_wq_work. | 2686 | * further calls to __split_and_process_bio from dm_wq_work. |
2687 | */ | 2687 | */ |
2688 | down_write(&md->io_lock); | 2688 | down_write(&md->io_lock); |
2689 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2689 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2690 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | 2690 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); |
2691 | up_write(&md->io_lock); | 2691 | up_write(&md->io_lock); |
2692 | 2692 | ||
2693 | /* | 2693 | /* |
2694 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | 2694 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which |
2695 | * can be kicked until md->queue is stopped. So stop md->queue before | 2695 | * can be kicked until md->queue is stopped. So stop md->queue before |
2696 | * flushing md->wq. | 2696 | * flushing md->wq. |
2697 | */ | 2697 | */ |
2698 | if (dm_request_based(md)) | 2698 | if (dm_request_based(md)) |
2699 | stop_queue(md->queue); | 2699 | stop_queue(md->queue); |
2700 | 2700 | ||
2701 | flush_workqueue(md->wq); | 2701 | flush_workqueue(md->wq); |
2702 | 2702 | ||
2703 | /* | 2703 | /* |
2704 | * At this point no more requests are entering target request routines. | 2704 | * At this point no more requests are entering target request routines. |
2705 | * We call dm_wait_for_completion to wait for all existing requests | 2705 | * We call dm_wait_for_completion to wait for all existing requests |
2706 | * to finish. | 2706 | * to finish. |
2707 | */ | 2707 | */ |
2708 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); | 2708 | r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); |
2709 | 2709 | ||
2710 | down_write(&md->io_lock); | 2710 | down_write(&md->io_lock); |
2711 | if (noflush) | 2711 | if (noflush) |
2712 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | 2712 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); |
2713 | up_write(&md->io_lock); | 2713 | up_write(&md->io_lock); |
2714 | 2714 | ||
2715 | /* were we interrupted ? */ | 2715 | /* were we interrupted ? */ |
2716 | if (r < 0) { | 2716 | if (r < 0) { |
2717 | dm_queue_flush(md); | 2717 | dm_queue_flush(md); |
2718 | 2718 | ||
2719 | if (dm_request_based(md)) | 2719 | if (dm_request_based(md)) |
2720 | start_queue(md->queue); | 2720 | start_queue(md->queue); |
2721 | 2721 | ||
2722 | unlock_fs(md); | 2722 | unlock_fs(md); |
2723 | goto out; /* pushback list is already flushed, so skip flush */ | 2723 | goto out; /* pushback list is already flushed, so skip flush */ |
2724 | } | 2724 | } |
2725 | 2725 | ||
2726 | /* | 2726 | /* |
2727 | * If dm_wait_for_completion returned 0, the device is completely | 2727 | * If dm_wait_for_completion returned 0, the device is completely |
2728 | * quiescent now. There is no request-processing activity. All new | 2728 | * quiescent now. There is no request-processing activity. All new |
2729 | * requests are being added to md->deferred list. | 2729 | * requests are being added to md->deferred list. |
2730 | */ | 2730 | */ |
2731 | 2731 | ||
2732 | set_bit(DMF_SUSPENDED, &md->flags); | 2732 | set_bit(DMF_SUSPENDED, &md->flags); |
2733 | 2733 | ||
2734 | dm_table_postsuspend_targets(map); | 2734 | dm_table_postsuspend_targets(map); |
2735 | 2735 | ||
2736 | out: | 2736 | out: |
2737 | dm_table_put(map); | 2737 | dm_table_put(map); |
2738 | 2738 | ||
2739 | out_unlock: | 2739 | out_unlock: |
2740 | mutex_unlock(&md->suspend_lock); | 2740 | mutex_unlock(&md->suspend_lock); |
2741 | return r; | 2741 | return r; |
2742 | } | 2742 | } |
2743 | 2743 | ||
2744 | int dm_resume(struct mapped_device *md) | 2744 | int dm_resume(struct mapped_device *md) |
2745 | { | 2745 | { |
2746 | int r = -EINVAL; | 2746 | int r = -EINVAL; |
2747 | struct dm_table *map = NULL; | 2747 | struct dm_table *map = NULL; |
2748 | 2748 | ||
2749 | mutex_lock(&md->suspend_lock); | 2749 | mutex_lock(&md->suspend_lock); |
2750 | if (!dm_suspended_md(md)) | 2750 | if (!dm_suspended_md(md)) |
2751 | goto out; | 2751 | goto out; |
2752 | 2752 | ||
2753 | map = dm_get_live_table(md); | 2753 | map = dm_get_live_table(md); |
2754 | if (!map || !dm_table_get_size(map)) | 2754 | if (!map || !dm_table_get_size(map)) |
2755 | goto out; | 2755 | goto out; |
2756 | 2756 | ||
2757 | r = dm_table_resume_targets(map); | 2757 | r = dm_table_resume_targets(map); |
2758 | if (r) | 2758 | if (r) |
2759 | goto out; | 2759 | goto out; |
2760 | 2760 | ||
2761 | dm_queue_flush(md); | 2761 | dm_queue_flush(md); |
2762 | 2762 | ||
2763 | /* | 2763 | /* |
2764 | * Flushing deferred I/Os must be done after targets are resumed | 2764 | * Flushing deferred I/Os must be done after targets are resumed |
2765 | * so that mapping of targets can work correctly. | 2765 | * so that mapping of targets can work correctly. |
2766 | * Request-based dm is queueing the deferred I/Os in its request_queue. | 2766 | * Request-based dm is queueing the deferred I/Os in its request_queue. |
2767 | */ | 2767 | */ |
2768 | if (dm_request_based(md)) | 2768 | if (dm_request_based(md)) |
2769 | start_queue(md->queue); | 2769 | start_queue(md->queue); |
2770 | 2770 | ||
2771 | unlock_fs(md); | 2771 | unlock_fs(md); |
2772 | 2772 | ||
2773 | clear_bit(DMF_SUSPENDED, &md->flags); | 2773 | clear_bit(DMF_SUSPENDED, &md->flags); |
2774 | 2774 | ||
2775 | dm_table_unplug_all(map); | 2775 | dm_table_unplug_all(map); |
2776 | r = 0; | 2776 | r = 0; |
2777 | out: | 2777 | out: |
2778 | dm_table_put(map); | 2778 | dm_table_put(map); |
2779 | mutex_unlock(&md->suspend_lock); | 2779 | mutex_unlock(&md->suspend_lock); |
2780 | 2780 | ||
2781 | return r; | 2781 | return r; |
2782 | } | 2782 | } |
2783 | 2783 | ||
2784 | /*----------------------------------------------------------------- | 2784 | /*----------------------------------------------------------------- |
2785 | * Event notification. | 2785 | * Event notification. |
2786 | *---------------------------------------------------------------*/ | 2786 | *---------------------------------------------------------------*/ |
2787 | int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, | 2787 | int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, |
2788 | unsigned cookie) | 2788 | unsigned cookie) |
2789 | { | 2789 | { |
2790 | char udev_cookie[DM_COOKIE_LENGTH]; | 2790 | char udev_cookie[DM_COOKIE_LENGTH]; |
2791 | char *envp[] = { udev_cookie, NULL }; | 2791 | char *envp[] = { udev_cookie, NULL }; |
2792 | 2792 | ||
2793 | if (!cookie) | 2793 | if (!cookie) |
2794 | return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); | 2794 | return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); |
2795 | else { | 2795 | else { |
2796 | snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", | 2796 | snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", |
2797 | DM_COOKIE_ENV_VAR_NAME, cookie); | 2797 | DM_COOKIE_ENV_VAR_NAME, cookie); |
2798 | return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, | 2798 | return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, |
2799 | action, envp); | 2799 | action, envp); |
2800 | } | 2800 | } |
2801 | } | 2801 | } |
2802 | 2802 | ||
2803 | uint32_t dm_next_uevent_seq(struct mapped_device *md) | 2803 | uint32_t dm_next_uevent_seq(struct mapped_device *md) |
2804 | { | 2804 | { |
2805 | return atomic_add_return(1, &md->uevent_seq); | 2805 | return atomic_add_return(1, &md->uevent_seq); |
2806 | } | 2806 | } |
2807 | 2807 | ||
2808 | uint32_t dm_get_event_nr(struct mapped_device *md) | 2808 | uint32_t dm_get_event_nr(struct mapped_device *md) |
2809 | { | 2809 | { |
2810 | return atomic_read(&md->event_nr); | 2810 | return atomic_read(&md->event_nr); |
2811 | } | 2811 | } |
2812 | 2812 | ||
2813 | int dm_wait_event(struct mapped_device *md, int event_nr) | 2813 | int dm_wait_event(struct mapped_device *md, int event_nr) |
2814 | { | 2814 | { |
2815 | return wait_event_interruptible(md->eventq, | 2815 | return wait_event_interruptible(md->eventq, |
2816 | (event_nr != atomic_read(&md->event_nr))); | 2816 | (event_nr != atomic_read(&md->event_nr))); |
2817 | } | 2817 | } |
2818 | 2818 | ||
2819 | void dm_uevent_add(struct mapped_device *md, struct list_head *elist) | 2819 | void dm_uevent_add(struct mapped_device *md, struct list_head *elist) |
2820 | { | 2820 | { |
2821 | unsigned long flags; | 2821 | unsigned long flags; |
2822 | 2822 | ||
2823 | spin_lock_irqsave(&md->uevent_lock, flags); | 2823 | spin_lock_irqsave(&md->uevent_lock, flags); |
2824 | list_add(elist, &md->uevent_list); | 2824 | list_add(elist, &md->uevent_list); |
2825 | spin_unlock_irqrestore(&md->uevent_lock, flags); | 2825 | spin_unlock_irqrestore(&md->uevent_lock, flags); |
2826 | } | 2826 | } |
2827 | 2827 | ||
2828 | /* | 2828 | /* |
2829 | * The gendisk is only valid as long as you have a reference | 2829 | * The gendisk is only valid as long as you have a reference |
2830 | * count on 'md'. | 2830 | * count on 'md'. |
2831 | */ | 2831 | */ |
2832 | struct gendisk *dm_disk(struct mapped_device *md) | 2832 | struct gendisk *dm_disk(struct mapped_device *md) |
2833 | { | 2833 | { |
2834 | return md->disk; | 2834 | return md->disk; |
2835 | } | 2835 | } |
2836 | 2836 | ||
2837 | struct kobject *dm_kobject(struct mapped_device *md) | 2837 | struct kobject *dm_kobject(struct mapped_device *md) |
2838 | { | 2838 | { |
2839 | return &md->kobj; | 2839 | return &md->kobj; |
2840 | } | 2840 | } |
2841 | 2841 | ||
2842 | /* | 2842 | /* |
2843 | * struct mapped_device should not be exported outside of dm.c | 2843 | * struct mapped_device should not be exported outside of dm.c |
2844 | * so use this check to verify that kobj is part of md structure | 2844 | * so use this check to verify that kobj is part of md structure |
2845 | */ | 2845 | */ |
2846 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | 2846 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) |
2847 | { | 2847 | { |
2848 | struct mapped_device *md; | 2848 | struct mapped_device *md; |
2849 | 2849 | ||
2850 | md = container_of(kobj, struct mapped_device, kobj); | 2850 | md = container_of(kobj, struct mapped_device, kobj); |
2851 | if (&md->kobj != kobj) | 2851 | if (&md->kobj != kobj) |
2852 | return NULL; | 2852 | return NULL; |
2853 | 2853 | ||
2854 | if (test_bit(DMF_FREEING, &md->flags) || | 2854 | if (test_bit(DMF_FREEING, &md->flags) || |
2855 | dm_deleting_md(md)) | 2855 | dm_deleting_md(md)) |
2856 | return NULL; | 2856 | return NULL; |
2857 | 2857 | ||
2858 | dm_get(md); | 2858 | dm_get(md); |
2859 | return md; | 2859 | return md; |
2860 | } | 2860 | } |
2861 | 2861 | ||
2862 | int dm_suspended_md(struct mapped_device *md) | 2862 | int dm_suspended_md(struct mapped_device *md) |
2863 | { | 2863 | { |
2864 | return test_bit(DMF_SUSPENDED, &md->flags); | 2864 | return test_bit(DMF_SUSPENDED, &md->flags); |
2865 | } | 2865 | } |
2866 | 2866 | ||
2867 | int dm_suspended(struct dm_target *ti) | 2867 | int dm_suspended(struct dm_target *ti) |
2868 | { | 2868 | { |
2869 | return dm_suspended_md(dm_table_get_md(ti->table)); | 2869 | return dm_suspended_md(dm_table_get_md(ti->table)); |
2870 | } | 2870 | } |
2871 | EXPORT_SYMBOL_GPL(dm_suspended); | 2871 | EXPORT_SYMBOL_GPL(dm_suspended); |
2872 | 2872 | ||
2873 | int dm_noflush_suspending(struct dm_target *ti) | 2873 | int dm_noflush_suspending(struct dm_target *ti) |
2874 | { | 2874 | { |
2875 | return __noflush_suspending(dm_table_get_md(ti->table)); | 2875 | return __noflush_suspending(dm_table_get_md(ti->table)); |
2876 | } | 2876 | } |
2877 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | 2877 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); |
2878 | 2878 | ||
2879 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type) | 2879 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type) |
2880 | { | 2880 | { |
2881 | struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); | 2881 | struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); |
2882 | 2882 | ||
2883 | if (!pools) | 2883 | if (!pools) |
2884 | return NULL; | 2884 | return NULL; |
2885 | 2885 | ||
2886 | pools->io_pool = (type == DM_TYPE_BIO_BASED) ? | 2886 | pools->io_pool = (type == DM_TYPE_BIO_BASED) ? |
2887 | mempool_create_slab_pool(MIN_IOS, _io_cache) : | 2887 | mempool_create_slab_pool(MIN_IOS, _io_cache) : |
2888 | mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); | 2888 | mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); |
2889 | if (!pools->io_pool) | 2889 | if (!pools->io_pool) |
2890 | goto free_pools_and_out; | 2890 | goto free_pools_and_out; |
2891 | 2891 | ||
2892 | pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? | 2892 | pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? |
2893 | mempool_create_slab_pool(MIN_IOS, _tio_cache) : | 2893 | mempool_create_slab_pool(MIN_IOS, _tio_cache) : |
2894 | mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); | 2894 | mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); |
2895 | if (!pools->tio_pool) | 2895 | if (!pools->tio_pool) |
2896 | goto free_io_pool_and_out; | 2896 | goto free_io_pool_and_out; |
2897 | 2897 | ||
2898 | pools->bs = (type == DM_TYPE_BIO_BASED) ? | 2898 | pools->bs = (type == DM_TYPE_BIO_BASED) ? |
2899 | bioset_create(16, 0) : bioset_create(MIN_IOS, 0); | 2899 | bioset_create(16, 0) : bioset_create(MIN_IOS, 0); |
2900 | if (!pools->bs) | 2900 | if (!pools->bs) |
2901 | goto free_tio_pool_and_out; | 2901 | goto free_tio_pool_and_out; |
2902 | 2902 | ||
2903 | return pools; | 2903 | return pools; |
2904 | 2904 | ||
2905 | free_tio_pool_and_out: | 2905 | free_tio_pool_and_out: |
2906 | mempool_destroy(pools->tio_pool); | 2906 | mempool_destroy(pools->tio_pool); |
2907 | 2907 | ||
2908 | free_io_pool_and_out: | 2908 | free_io_pool_and_out: |
2909 | mempool_destroy(pools->io_pool); | 2909 | mempool_destroy(pools->io_pool); |
2910 | 2910 | ||
2911 | free_pools_and_out: | 2911 | free_pools_and_out: |
2912 | kfree(pools); | 2912 | kfree(pools); |
2913 | 2913 | ||
2914 | return NULL; | 2914 | return NULL; |
2915 | } | 2915 | } |
2916 | 2916 | ||
2917 | void dm_free_md_mempools(struct dm_md_mempools *pools) | 2917 | void dm_free_md_mempools(struct dm_md_mempools *pools) |
2918 | { | 2918 | { |
2919 | if (!pools) | 2919 | if (!pools) |
2920 | return; | 2920 | return; |
2921 | 2921 | ||
2922 | if (pools->io_pool) | 2922 | if (pools->io_pool) |
2923 | mempool_destroy(pools->io_pool); | 2923 | mempool_destroy(pools->io_pool); |
2924 | 2924 | ||
2925 | if (pools->tio_pool) | 2925 | if (pools->tio_pool) |
2926 | mempool_destroy(pools->tio_pool); | 2926 | mempool_destroy(pools->tio_pool); |
2927 | 2927 | ||
2928 | if (pools->bs) | 2928 | if (pools->bs) |
2929 | bioset_free(pools->bs); | 2929 | bioset_free(pools->bs); |
2930 | 2930 | ||
2931 | kfree(pools); | 2931 | kfree(pools); |
2932 | } | 2932 | } |
2933 | 2933 | ||
2934 | static const struct block_device_operations dm_blk_dops = { | 2934 | static const struct block_device_operations dm_blk_dops = { |
2935 | .open = dm_blk_open, | 2935 | .open = dm_blk_open, |
2936 | .release = dm_blk_close, | 2936 | .release = dm_blk_close, |
2937 | .ioctl = dm_blk_ioctl, | 2937 | .ioctl = dm_blk_ioctl, |
2938 | .getgeo = dm_blk_getgeo, | 2938 | .getgeo = dm_blk_getgeo, |
2939 | .owner = THIS_MODULE | 2939 | .owner = THIS_MODULE |
2940 | }; | 2940 | }; |
2941 | 2941 | ||
2942 | EXPORT_SYMBOL(dm_get_mapinfo); | 2942 | EXPORT_SYMBOL(dm_get_mapinfo); |
2943 | 2943 | ||
2944 | /* | 2944 | /* |
2945 | * module hooks | 2945 | * module hooks |
2946 | */ | 2946 | */ |
2947 | module_init(dm_init); | 2947 | module_init(dm_init); |
2948 | module_exit(dm_exit); | 2948 | module_exit(dm_exit); |
2949 | 2949 | ||
2950 | module_param(major, uint, 0); | 2950 | module_param(major, uint, 0); |
2951 | MODULE_PARM_DESC(major, "The major number of the device mapper"); | 2951 | MODULE_PARM_DESC(major, "The major number of the device mapper"); |
2952 | MODULE_DESCRIPTION(DM_NAME " driver"); | 2952 | MODULE_DESCRIPTION(DM_NAME " driver"); |
2953 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 2953 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); |
2954 | MODULE_LICENSE("GPL"); | 2954 | MODULE_LICENSE("GPL"); |
2955 | 2955 |
drivers/mmc/card/queue.c
1 | /* | 1 | /* |
2 | * linux/drivers/mmc/card/queue.c | 2 | * linux/drivers/mmc/card/queue.c |
3 | * | 3 | * |
4 | * Copyright (C) 2003 Russell King, All Rights Reserved. | 4 | * Copyright (C) 2003 Russell King, All Rights Reserved. |
5 | * Copyright 2006-2007 Pierre Ossman | 5 | * Copyright 2006-2007 Pierre Ossman |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | * | 10 | * |
11 | */ | 11 | */ |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/blkdev.h> | 14 | #include <linux/blkdev.h> |
15 | #include <linux/freezer.h> | 15 | #include <linux/freezer.h> |
16 | #include <linux/kthread.h> | 16 | #include <linux/kthread.h> |
17 | #include <linux/scatterlist.h> | 17 | #include <linux/scatterlist.h> |
18 | 18 | ||
19 | #include <linux/mmc/card.h> | 19 | #include <linux/mmc/card.h> |
20 | #include <linux/mmc/host.h> | 20 | #include <linux/mmc/host.h> |
21 | #include "queue.h" | 21 | #include "queue.h" |
22 | 22 | ||
23 | #define MMC_QUEUE_BOUNCESZ 65536 | 23 | #define MMC_QUEUE_BOUNCESZ 65536 |
24 | 24 | ||
25 | #define MMC_QUEUE_SUSPENDED (1 << 0) | 25 | #define MMC_QUEUE_SUSPENDED (1 << 0) |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Prepare a MMC request. This just filters out odd stuff. | 28 | * Prepare a MMC request. This just filters out odd stuff. |
29 | */ | 29 | */ |
30 | static int mmc_prep_request(struct request_queue *q, struct request *req) | 30 | static int mmc_prep_request(struct request_queue *q, struct request *req) |
31 | { | 31 | { |
32 | /* | 32 | /* |
33 | * We only like normal block requests and discards. | 33 | * We only like normal block requests and discards. |
34 | */ | 34 | */ |
35 | if (req->cmd_type != REQ_TYPE_FS && !(req->cmd_flags & REQ_DISCARD)) { | 35 | if (req->cmd_type != REQ_TYPE_FS && !(req->cmd_flags & REQ_DISCARD)) { |
36 | blk_dump_rq_flags(req, "MMC bad request"); | 36 | blk_dump_rq_flags(req, "MMC bad request"); |
37 | return BLKPREP_KILL; | 37 | return BLKPREP_KILL; |
38 | } | 38 | } |
39 | 39 | ||
40 | req->cmd_flags |= REQ_DONTPREP; | 40 | req->cmd_flags |= REQ_DONTPREP; |
41 | 41 | ||
42 | return BLKPREP_OK; | 42 | return BLKPREP_OK; |
43 | } | 43 | } |
44 | 44 | ||
45 | static int mmc_queue_thread(void *d) | 45 | static int mmc_queue_thread(void *d) |
46 | { | 46 | { |
47 | struct mmc_queue *mq = d; | 47 | struct mmc_queue *mq = d; |
48 | struct request_queue *q = mq->queue; | 48 | struct request_queue *q = mq->queue; |
49 | 49 | ||
50 | current->flags |= PF_MEMALLOC; | 50 | current->flags |= PF_MEMALLOC; |
51 | 51 | ||
52 | down(&mq->thread_sem); | 52 | down(&mq->thread_sem); |
53 | do { | 53 | do { |
54 | struct request *req = NULL; | 54 | struct request *req = NULL; |
55 | 55 | ||
56 | spin_lock_irq(q->queue_lock); | 56 | spin_lock_irq(q->queue_lock); |
57 | set_current_state(TASK_INTERRUPTIBLE); | 57 | set_current_state(TASK_INTERRUPTIBLE); |
58 | if (!blk_queue_plugged(q)) | 58 | if (!blk_queue_plugged(q)) |
59 | req = blk_fetch_request(q); | 59 | req = blk_fetch_request(q); |
60 | mq->req = req; | 60 | mq->req = req; |
61 | spin_unlock_irq(q->queue_lock); | 61 | spin_unlock_irq(q->queue_lock); |
62 | 62 | ||
63 | if (!req) { | 63 | if (!req) { |
64 | if (kthread_should_stop()) { | 64 | if (kthread_should_stop()) { |
65 | set_current_state(TASK_RUNNING); | 65 | set_current_state(TASK_RUNNING); |
66 | break; | 66 | break; |
67 | } | 67 | } |
68 | up(&mq->thread_sem); | 68 | up(&mq->thread_sem); |
69 | schedule(); | 69 | schedule(); |
70 | down(&mq->thread_sem); | 70 | down(&mq->thread_sem); |
71 | continue; | 71 | continue; |
72 | } | 72 | } |
73 | set_current_state(TASK_RUNNING); | 73 | set_current_state(TASK_RUNNING); |
74 | 74 | ||
75 | mq->issue_fn(mq, req); | 75 | mq->issue_fn(mq, req); |
76 | } while (1); | 76 | } while (1); |
77 | up(&mq->thread_sem); | 77 | up(&mq->thread_sem); |
78 | 78 | ||
79 | return 0; | 79 | return 0; |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Generic MMC request handler. This is called for any queue on a | 83 | * Generic MMC request handler. This is called for any queue on a |
84 | * particular host. When the host is not busy, we look for a request | 84 | * particular host. When the host is not busy, we look for a request |
85 | * on any queue on this host, and attempt to issue it. This may | 85 | * on any queue on this host, and attempt to issue it. This may |
86 | * not be the queue we were asked to process. | 86 | * not be the queue we were asked to process. |
87 | */ | 87 | */ |
88 | static void mmc_request(struct request_queue *q) | 88 | static void mmc_request(struct request_queue *q) |
89 | { | 89 | { |
90 | struct mmc_queue *mq = q->queuedata; | 90 | struct mmc_queue *mq = q->queuedata; |
91 | struct request *req; | 91 | struct request *req; |
92 | 92 | ||
93 | if (!mq) { | 93 | if (!mq) { |
94 | while ((req = blk_fetch_request(q)) != NULL) { | 94 | while ((req = blk_fetch_request(q)) != NULL) { |
95 | req->cmd_flags |= REQ_QUIET; | 95 | req->cmd_flags |= REQ_QUIET; |
96 | __blk_end_request_all(req, -EIO); | 96 | __blk_end_request_all(req, -EIO); |
97 | } | 97 | } |
98 | return; | 98 | return; |
99 | } | 99 | } |
100 | 100 | ||
101 | if (!mq->req) | 101 | if (!mq->req) |
102 | wake_up_process(mq->thread); | 102 | wake_up_process(mq->thread); |
103 | } | 103 | } |
104 | 104 | ||
105 | /** | 105 | /** |
106 | * mmc_init_queue - initialise a queue structure. | 106 | * mmc_init_queue - initialise a queue structure. |
107 | * @mq: mmc queue | 107 | * @mq: mmc queue |
108 | * @card: mmc card to attach this queue | 108 | * @card: mmc card to attach this queue |
109 | * @lock: queue lock | 109 | * @lock: queue lock |
110 | * | 110 | * |
111 | * Initialise a MMC card request queue. | 111 | * Initialise a MMC card request queue. |
112 | */ | 112 | */ |
113 | int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock) | 113 | int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock) |
114 | { | 114 | { |
115 | struct mmc_host *host = card->host; | 115 | struct mmc_host *host = card->host; |
116 | u64 limit = BLK_BOUNCE_HIGH; | 116 | u64 limit = BLK_BOUNCE_HIGH; |
117 | int ret; | 117 | int ret; |
118 | 118 | ||
119 | if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) | 119 | if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) |
120 | limit = *mmc_dev(host)->dma_mask; | 120 | limit = *mmc_dev(host)->dma_mask; |
121 | 121 | ||
122 | mq->card = card; | 122 | mq->card = card; |
123 | mq->queue = blk_init_queue(mmc_request, lock); | 123 | mq->queue = blk_init_queue(mmc_request, lock); |
124 | if (!mq->queue) | 124 | if (!mq->queue) |
125 | return -ENOMEM; | 125 | return -ENOMEM; |
126 | 126 | ||
127 | mq->queue->queuedata = mq; | 127 | mq->queue->queuedata = mq; |
128 | mq->req = NULL; | 128 | mq->req = NULL; |
129 | 129 | ||
130 | blk_queue_prep_rq(mq->queue, mmc_prep_request); | 130 | blk_queue_prep_rq(mq->queue, mmc_prep_request); |
131 | blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); | ||
132 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); | 131 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); |
133 | if (mmc_can_erase(card)) { | 132 | if (mmc_can_erase(card)) { |
134 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); | 133 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); |
135 | mq->queue->limits.max_discard_sectors = UINT_MAX; | 134 | mq->queue->limits.max_discard_sectors = UINT_MAX; |
136 | if (card->erased_byte == 0) | 135 | if (card->erased_byte == 0) |
137 | mq->queue->limits.discard_zeroes_data = 1; | 136 | mq->queue->limits.discard_zeroes_data = 1; |
138 | if (!mmc_can_trim(card) && is_power_of_2(card->erase_size)) { | 137 | if (!mmc_can_trim(card) && is_power_of_2(card->erase_size)) { |
139 | mq->queue->limits.discard_granularity = | 138 | mq->queue->limits.discard_granularity = |
140 | card->erase_size << 9; | 139 | card->erase_size << 9; |
141 | mq->queue->limits.discard_alignment = | 140 | mq->queue->limits.discard_alignment = |
142 | card->erase_size << 9; | 141 | card->erase_size << 9; |
143 | } | 142 | } |
144 | if (mmc_can_secure_erase_trim(card)) | 143 | if (mmc_can_secure_erase_trim(card)) |
145 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, | 144 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, |
146 | mq->queue); | 145 | mq->queue); |
147 | } | 146 | } |
148 | 147 | ||
149 | #ifdef CONFIG_MMC_BLOCK_BOUNCE | 148 | #ifdef CONFIG_MMC_BLOCK_BOUNCE |
150 | if (host->max_hw_segs == 1) { | 149 | if (host->max_hw_segs == 1) { |
151 | unsigned int bouncesz; | 150 | unsigned int bouncesz; |
152 | 151 | ||
153 | bouncesz = MMC_QUEUE_BOUNCESZ; | 152 | bouncesz = MMC_QUEUE_BOUNCESZ; |
154 | 153 | ||
155 | if (bouncesz > host->max_req_size) | 154 | if (bouncesz > host->max_req_size) |
156 | bouncesz = host->max_req_size; | 155 | bouncesz = host->max_req_size; |
157 | if (bouncesz > host->max_seg_size) | 156 | if (bouncesz > host->max_seg_size) |
158 | bouncesz = host->max_seg_size; | 157 | bouncesz = host->max_seg_size; |
159 | if (bouncesz > (host->max_blk_count * 512)) | 158 | if (bouncesz > (host->max_blk_count * 512)) |
160 | bouncesz = host->max_blk_count * 512; | 159 | bouncesz = host->max_blk_count * 512; |
161 | 160 | ||
162 | if (bouncesz > 512) { | 161 | if (bouncesz > 512) { |
163 | mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); | 162 | mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); |
164 | if (!mq->bounce_buf) { | 163 | if (!mq->bounce_buf) { |
165 | printk(KERN_WARNING "%s: unable to " | 164 | printk(KERN_WARNING "%s: unable to " |
166 | "allocate bounce buffer\n", | 165 | "allocate bounce buffer\n", |
167 | mmc_card_name(card)); | 166 | mmc_card_name(card)); |
168 | } | 167 | } |
169 | } | 168 | } |
170 | 169 | ||
171 | if (mq->bounce_buf) { | 170 | if (mq->bounce_buf) { |
172 | blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY); | 171 | blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY); |
173 | blk_queue_max_hw_sectors(mq->queue, bouncesz / 512); | 172 | blk_queue_max_hw_sectors(mq->queue, bouncesz / 512); |
174 | blk_queue_max_segments(mq->queue, bouncesz / 512); | 173 | blk_queue_max_segments(mq->queue, bouncesz / 512); |
175 | blk_queue_max_segment_size(mq->queue, bouncesz); | 174 | blk_queue_max_segment_size(mq->queue, bouncesz); |
176 | 175 | ||
177 | mq->sg = kmalloc(sizeof(struct scatterlist), | 176 | mq->sg = kmalloc(sizeof(struct scatterlist), |
178 | GFP_KERNEL); | 177 | GFP_KERNEL); |
179 | if (!mq->sg) { | 178 | if (!mq->sg) { |
180 | ret = -ENOMEM; | 179 | ret = -ENOMEM; |
181 | goto cleanup_queue; | 180 | goto cleanup_queue; |
182 | } | 181 | } |
183 | sg_init_table(mq->sg, 1); | 182 | sg_init_table(mq->sg, 1); |
184 | 183 | ||
185 | mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * | 184 | mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * |
186 | bouncesz / 512, GFP_KERNEL); | 185 | bouncesz / 512, GFP_KERNEL); |
187 | if (!mq->bounce_sg) { | 186 | if (!mq->bounce_sg) { |
188 | ret = -ENOMEM; | 187 | ret = -ENOMEM; |
189 | goto cleanup_queue; | 188 | goto cleanup_queue; |
190 | } | 189 | } |
191 | sg_init_table(mq->bounce_sg, bouncesz / 512); | 190 | sg_init_table(mq->bounce_sg, bouncesz / 512); |
192 | } | 191 | } |
193 | } | 192 | } |
194 | #endif | 193 | #endif |
195 | 194 | ||
196 | if (!mq->bounce_buf) { | 195 | if (!mq->bounce_buf) { |
197 | blk_queue_bounce_limit(mq->queue, limit); | 196 | blk_queue_bounce_limit(mq->queue, limit); |
198 | blk_queue_max_hw_sectors(mq->queue, | 197 | blk_queue_max_hw_sectors(mq->queue, |
199 | min(host->max_blk_count, host->max_req_size / 512)); | 198 | min(host->max_blk_count, host->max_req_size / 512)); |
200 | blk_queue_max_segments(mq->queue, host->max_hw_segs); | 199 | blk_queue_max_segments(mq->queue, host->max_hw_segs); |
201 | blk_queue_max_segment_size(mq->queue, host->max_seg_size); | 200 | blk_queue_max_segment_size(mq->queue, host->max_seg_size); |
202 | 201 | ||
203 | mq->sg = kmalloc(sizeof(struct scatterlist) * | 202 | mq->sg = kmalloc(sizeof(struct scatterlist) * |
204 | host->max_phys_segs, GFP_KERNEL); | 203 | host->max_phys_segs, GFP_KERNEL); |
205 | if (!mq->sg) { | 204 | if (!mq->sg) { |
206 | ret = -ENOMEM; | 205 | ret = -ENOMEM; |
207 | goto cleanup_queue; | 206 | goto cleanup_queue; |
208 | } | 207 | } |
209 | sg_init_table(mq->sg, host->max_phys_segs); | 208 | sg_init_table(mq->sg, host->max_phys_segs); |
210 | } | 209 | } |
211 | 210 | ||
212 | init_MUTEX(&mq->thread_sem); | 211 | init_MUTEX(&mq->thread_sem); |
213 | 212 | ||
214 | mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); | 213 | mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); |
215 | if (IS_ERR(mq->thread)) { | 214 | if (IS_ERR(mq->thread)) { |
216 | ret = PTR_ERR(mq->thread); | 215 | ret = PTR_ERR(mq->thread); |
217 | goto free_bounce_sg; | 216 | goto free_bounce_sg; |
218 | } | 217 | } |
219 | 218 | ||
220 | return 0; | 219 | return 0; |
221 | free_bounce_sg: | 220 | free_bounce_sg: |
222 | if (mq->bounce_sg) | 221 | if (mq->bounce_sg) |
223 | kfree(mq->bounce_sg); | 222 | kfree(mq->bounce_sg); |
224 | mq->bounce_sg = NULL; | 223 | mq->bounce_sg = NULL; |
225 | cleanup_queue: | 224 | cleanup_queue: |
226 | if (mq->sg) | 225 | if (mq->sg) |
227 | kfree(mq->sg); | 226 | kfree(mq->sg); |
228 | mq->sg = NULL; | 227 | mq->sg = NULL; |
229 | if (mq->bounce_buf) | 228 | if (mq->bounce_buf) |
230 | kfree(mq->bounce_buf); | 229 | kfree(mq->bounce_buf); |
231 | mq->bounce_buf = NULL; | 230 | mq->bounce_buf = NULL; |
232 | blk_cleanup_queue(mq->queue); | 231 | blk_cleanup_queue(mq->queue); |
233 | return ret; | 232 | return ret; |
234 | } | 233 | } |
235 | 234 | ||
236 | void mmc_cleanup_queue(struct mmc_queue *mq) | 235 | void mmc_cleanup_queue(struct mmc_queue *mq) |
237 | { | 236 | { |
238 | struct request_queue *q = mq->queue; | 237 | struct request_queue *q = mq->queue; |
239 | unsigned long flags; | 238 | unsigned long flags; |
240 | 239 | ||
241 | /* Make sure the queue isn't suspended, as that will deadlock */ | 240 | /* Make sure the queue isn't suspended, as that will deadlock */ |
242 | mmc_queue_resume(mq); | 241 | mmc_queue_resume(mq); |
243 | 242 | ||
244 | /* Then terminate our worker thread */ | 243 | /* Then terminate our worker thread */ |
245 | kthread_stop(mq->thread); | 244 | kthread_stop(mq->thread); |
246 | 245 | ||
247 | /* Empty the queue */ | 246 | /* Empty the queue */ |
248 | spin_lock_irqsave(q->queue_lock, flags); | 247 | spin_lock_irqsave(q->queue_lock, flags); |
249 | q->queuedata = NULL; | 248 | q->queuedata = NULL; |
250 | blk_start_queue(q); | 249 | blk_start_queue(q); |
251 | spin_unlock_irqrestore(q->queue_lock, flags); | 250 | spin_unlock_irqrestore(q->queue_lock, flags); |
252 | 251 | ||
253 | if (mq->bounce_sg) | 252 | if (mq->bounce_sg) |
254 | kfree(mq->bounce_sg); | 253 | kfree(mq->bounce_sg); |
255 | mq->bounce_sg = NULL; | 254 | mq->bounce_sg = NULL; |
256 | 255 | ||
257 | kfree(mq->sg); | 256 | kfree(mq->sg); |
258 | mq->sg = NULL; | 257 | mq->sg = NULL; |
259 | 258 | ||
260 | if (mq->bounce_buf) | 259 | if (mq->bounce_buf) |
261 | kfree(mq->bounce_buf); | 260 | kfree(mq->bounce_buf); |
262 | mq->bounce_buf = NULL; | 261 | mq->bounce_buf = NULL; |
263 | 262 | ||
264 | mq->card = NULL; | 263 | mq->card = NULL; |
265 | } | 264 | } |
266 | EXPORT_SYMBOL(mmc_cleanup_queue); | 265 | EXPORT_SYMBOL(mmc_cleanup_queue); |
267 | 266 | ||
268 | /** | 267 | /** |
269 | * mmc_queue_suspend - suspend a MMC request queue | 268 | * mmc_queue_suspend - suspend a MMC request queue |
270 | * @mq: MMC queue to suspend | 269 | * @mq: MMC queue to suspend |
271 | * | 270 | * |
272 | * Stop the block request queue, and wait for our thread to | 271 | * Stop the block request queue, and wait for our thread to |
273 | * complete any outstanding requests. This ensures that we | 272 | * complete any outstanding requests. This ensures that we |
274 | * won't suspend while a request is being processed. | 273 | * won't suspend while a request is being processed. |
275 | */ | 274 | */ |
276 | void mmc_queue_suspend(struct mmc_queue *mq) | 275 | void mmc_queue_suspend(struct mmc_queue *mq) |
277 | { | 276 | { |
278 | struct request_queue *q = mq->queue; | 277 | struct request_queue *q = mq->queue; |
279 | unsigned long flags; | 278 | unsigned long flags; |
280 | 279 | ||
281 | if (!(mq->flags & MMC_QUEUE_SUSPENDED)) { | 280 | if (!(mq->flags & MMC_QUEUE_SUSPENDED)) { |
282 | mq->flags |= MMC_QUEUE_SUSPENDED; | 281 | mq->flags |= MMC_QUEUE_SUSPENDED; |
283 | 282 | ||
284 | spin_lock_irqsave(q->queue_lock, flags); | 283 | spin_lock_irqsave(q->queue_lock, flags); |
285 | blk_stop_queue(q); | 284 | blk_stop_queue(q); |
286 | spin_unlock_irqrestore(q->queue_lock, flags); | 285 | spin_unlock_irqrestore(q->queue_lock, flags); |
287 | 286 | ||
288 | down(&mq->thread_sem); | 287 | down(&mq->thread_sem); |
289 | } | 288 | } |
290 | } | 289 | } |
291 | 290 | ||
292 | /** | 291 | /** |
293 | * mmc_queue_resume - resume a previously suspended MMC request queue | 292 | * mmc_queue_resume - resume a previously suspended MMC request queue |
294 | * @mq: MMC queue to resume | 293 | * @mq: MMC queue to resume |
295 | */ | 294 | */ |
296 | void mmc_queue_resume(struct mmc_queue *mq) | 295 | void mmc_queue_resume(struct mmc_queue *mq) |
297 | { | 296 | { |
298 | struct request_queue *q = mq->queue; | 297 | struct request_queue *q = mq->queue; |
299 | unsigned long flags; | 298 | unsigned long flags; |
300 | 299 | ||
301 | if (mq->flags & MMC_QUEUE_SUSPENDED) { | 300 | if (mq->flags & MMC_QUEUE_SUSPENDED) { |
302 | mq->flags &= ~MMC_QUEUE_SUSPENDED; | 301 | mq->flags &= ~MMC_QUEUE_SUSPENDED; |
303 | 302 | ||
304 | up(&mq->thread_sem); | 303 | up(&mq->thread_sem); |
305 | 304 | ||
306 | spin_lock_irqsave(q->queue_lock, flags); | 305 | spin_lock_irqsave(q->queue_lock, flags); |
307 | blk_start_queue(q); | 306 | blk_start_queue(q); |
308 | spin_unlock_irqrestore(q->queue_lock, flags); | 307 | spin_unlock_irqrestore(q->queue_lock, flags); |
309 | } | 308 | } |
310 | } | 309 | } |
311 | 310 | ||
312 | /* | 311 | /* |
313 | * Prepare the sg list(s) to be handed of to the host driver | 312 | * Prepare the sg list(s) to be handed of to the host driver |
314 | */ | 313 | */ |
315 | unsigned int mmc_queue_map_sg(struct mmc_queue *mq) | 314 | unsigned int mmc_queue_map_sg(struct mmc_queue *mq) |
316 | { | 315 | { |
317 | unsigned int sg_len; | 316 | unsigned int sg_len; |
318 | size_t buflen; | 317 | size_t buflen; |
319 | struct scatterlist *sg; | 318 | struct scatterlist *sg; |
320 | int i; | 319 | int i; |
321 | 320 | ||
322 | if (!mq->bounce_buf) | 321 | if (!mq->bounce_buf) |
323 | return blk_rq_map_sg(mq->queue, mq->req, mq->sg); | 322 | return blk_rq_map_sg(mq->queue, mq->req, mq->sg); |
324 | 323 | ||
325 | BUG_ON(!mq->bounce_sg); | 324 | BUG_ON(!mq->bounce_sg); |
326 | 325 | ||
327 | sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); | 326 | sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); |
328 | 327 | ||
329 | mq->bounce_sg_len = sg_len; | 328 | mq->bounce_sg_len = sg_len; |
330 | 329 | ||
331 | buflen = 0; | 330 | buflen = 0; |
332 | for_each_sg(mq->bounce_sg, sg, sg_len, i) | 331 | for_each_sg(mq->bounce_sg, sg, sg_len, i) |
333 | buflen += sg->length; | 332 | buflen += sg->length; |
334 | 333 | ||
335 | sg_init_one(mq->sg, mq->bounce_buf, buflen); | 334 | sg_init_one(mq->sg, mq->bounce_buf, buflen); |
336 | 335 | ||
337 | return 1; | 336 | return 1; |
338 | } | 337 | } |
339 | 338 | ||
340 | /* | 339 | /* |
341 | * If writing, bounce the data to the buffer before the request | 340 | * If writing, bounce the data to the buffer before the request |
342 | * is sent to the host driver | 341 | * is sent to the host driver |
343 | */ | 342 | */ |
344 | void mmc_queue_bounce_pre(struct mmc_queue *mq) | 343 | void mmc_queue_bounce_pre(struct mmc_queue *mq) |
345 | { | 344 | { |
346 | unsigned long flags; | 345 | unsigned long flags; |
347 | 346 | ||
348 | if (!mq->bounce_buf) | 347 | if (!mq->bounce_buf) |
349 | return; | 348 | return; |
350 | 349 | ||
351 | if (rq_data_dir(mq->req) != WRITE) | 350 | if (rq_data_dir(mq->req) != WRITE) |
352 | return; | 351 | return; |
353 | 352 | ||
354 | local_irq_save(flags); | 353 | local_irq_save(flags); |
355 | sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len, | 354 | sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len, |
356 | mq->bounce_buf, mq->sg[0].length); | 355 | mq->bounce_buf, mq->sg[0].length); |
357 | local_irq_restore(flags); | 356 | local_irq_restore(flags); |
358 | } | 357 | } |
359 | 358 | ||
360 | /* | 359 | /* |
361 | * If reading, bounce the data from the buffer after the request | 360 | * If reading, bounce the data from the buffer after the request |
362 | * has been handled by the host driver | 361 | * has been handled by the host driver |
363 | */ | 362 | */ |
364 | void mmc_queue_bounce_post(struct mmc_queue *mq) | 363 | void mmc_queue_bounce_post(struct mmc_queue *mq) |
365 | { | 364 | { |
366 | unsigned long flags; | 365 | unsigned long flags; |
367 | 366 | ||
368 | if (!mq->bounce_buf) | 367 | if (!mq->bounce_buf) |
369 | return; | 368 | return; |
370 | 369 | ||
371 | if (rq_data_dir(mq->req) != READ) | 370 | if (rq_data_dir(mq->req) != READ) |
372 | return; | 371 | return; |
373 | 372 | ||
374 | local_irq_save(flags); | 373 | local_irq_save(flags); |
375 | sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len, | 374 | sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len, |
376 | mq->bounce_buf, mq->sg[0].length); | 375 | mq->bounce_buf, mq->sg[0].length); |
377 | local_irq_restore(flags); | 376 | local_irq_restore(flags); |
378 | } | 377 | } |
379 | 378 | ||
380 | 379 |
drivers/s390/block/dasd.c
1 | /* | 1 | /* |
2 | * File...........: linux/drivers/s390/block/dasd.c | 2 | * File...........: linux/drivers/s390/block/dasd.c |
3 | * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> | 3 | * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> |
4 | * Horst Hummel <Horst.Hummel@de.ibm.com> | 4 | * Horst Hummel <Horst.Hummel@de.ibm.com> |
5 | * Carsten Otte <Cotte@de.ibm.com> | 5 | * Carsten Otte <Cotte@de.ibm.com> |
6 | * Martin Schwidefsky <schwidefsky@de.ibm.com> | 6 | * Martin Schwidefsky <schwidefsky@de.ibm.com> |
7 | * Bugreports.to..: <Linux390@de.ibm.com> | 7 | * Bugreports.to..: <Linux390@de.ibm.com> |
8 | * Copyright IBM Corp. 1999, 2009 | 8 | * Copyright IBM Corp. 1999, 2009 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define KMSG_COMPONENT "dasd" | 11 | #define KMSG_COMPONENT "dasd" |
12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | 12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
13 | 13 | ||
14 | #include <linux/kmod.h> | 14 | #include <linux/kmod.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/major.h> | 18 | #include <linux/major.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
21 | #include <linux/hdreg.h> | 21 | #include <linux/hdreg.h> |
22 | #include <linux/async.h> | 22 | #include <linux/async.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/smp_lock.h> | 24 | #include <linux/smp_lock.h> |
25 | 25 | ||
26 | #include <asm/ccwdev.h> | 26 | #include <asm/ccwdev.h> |
27 | #include <asm/ebcdic.h> | 27 | #include <asm/ebcdic.h> |
28 | #include <asm/idals.h> | 28 | #include <asm/idals.h> |
29 | #include <asm/itcw.h> | 29 | #include <asm/itcw.h> |
30 | #include <asm/diag.h> | 30 | #include <asm/diag.h> |
31 | 31 | ||
32 | /* This is ugly... */ | 32 | /* This is ugly... */ |
33 | #define PRINTK_HEADER "dasd:" | 33 | #define PRINTK_HEADER "dasd:" |
34 | 34 | ||
35 | #include "dasd_int.h" | 35 | #include "dasd_int.h" |
36 | /* | 36 | /* |
37 | * SECTION: Constant definitions to be used within this file | 37 | * SECTION: Constant definitions to be used within this file |
38 | */ | 38 | */ |
39 | #define DASD_CHANQ_MAX_SIZE 4 | 39 | #define DASD_CHANQ_MAX_SIZE 4 |
40 | 40 | ||
41 | #define DASD_SLEEPON_START_TAG (void *) 1 | 41 | #define DASD_SLEEPON_START_TAG (void *) 1 |
42 | #define DASD_SLEEPON_END_TAG (void *) 2 | 42 | #define DASD_SLEEPON_END_TAG (void *) 2 |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * SECTION: exported variables of dasd.c | 45 | * SECTION: exported variables of dasd.c |
46 | */ | 46 | */ |
47 | debug_info_t *dasd_debug_area; | 47 | debug_info_t *dasd_debug_area; |
48 | struct dasd_discipline *dasd_diag_discipline_pointer; | 48 | struct dasd_discipline *dasd_diag_discipline_pointer; |
49 | void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *); | 49 | void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *); |
50 | 50 | ||
51 | MODULE_AUTHOR("Holger Smolinski <Holger.Smolinski@de.ibm.com>"); | 51 | MODULE_AUTHOR("Holger Smolinski <Holger.Smolinski@de.ibm.com>"); |
52 | MODULE_DESCRIPTION("Linux on S/390 DASD device driver," | 52 | MODULE_DESCRIPTION("Linux on S/390 DASD device driver," |
53 | " Copyright 2000 IBM Corporation"); | 53 | " Copyright 2000 IBM Corporation"); |
54 | MODULE_SUPPORTED_DEVICE("dasd"); | 54 | MODULE_SUPPORTED_DEVICE("dasd"); |
55 | MODULE_LICENSE("GPL"); | 55 | MODULE_LICENSE("GPL"); |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * SECTION: prototypes for static functions of dasd.c | 58 | * SECTION: prototypes for static functions of dasd.c |
59 | */ | 59 | */ |
60 | static int dasd_alloc_queue(struct dasd_block *); | 60 | static int dasd_alloc_queue(struct dasd_block *); |
61 | static void dasd_setup_queue(struct dasd_block *); | 61 | static void dasd_setup_queue(struct dasd_block *); |
62 | static void dasd_free_queue(struct dasd_block *); | 62 | static void dasd_free_queue(struct dasd_block *); |
63 | static void dasd_flush_request_queue(struct dasd_block *); | 63 | static void dasd_flush_request_queue(struct dasd_block *); |
64 | static int dasd_flush_block_queue(struct dasd_block *); | 64 | static int dasd_flush_block_queue(struct dasd_block *); |
65 | static void dasd_device_tasklet(struct dasd_device *); | 65 | static void dasd_device_tasklet(struct dasd_device *); |
66 | static void dasd_block_tasklet(struct dasd_block *); | 66 | static void dasd_block_tasklet(struct dasd_block *); |
67 | static void do_kick_device(struct work_struct *); | 67 | static void do_kick_device(struct work_struct *); |
68 | static void do_restore_device(struct work_struct *); | 68 | static void do_restore_device(struct work_struct *); |
69 | static void do_reload_device(struct work_struct *); | 69 | static void do_reload_device(struct work_struct *); |
70 | static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); | 70 | static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); |
71 | static void dasd_device_timeout(unsigned long); | 71 | static void dasd_device_timeout(unsigned long); |
72 | static void dasd_block_timeout(unsigned long); | 72 | static void dasd_block_timeout(unsigned long); |
73 | static void __dasd_process_erp(struct dasd_device *, struct dasd_ccw_req *); | 73 | static void __dasd_process_erp(struct dasd_device *, struct dasd_ccw_req *); |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * SECTION: Operations on the device structure. | 76 | * SECTION: Operations on the device structure. |
77 | */ | 77 | */ |
78 | static wait_queue_head_t dasd_init_waitq; | 78 | static wait_queue_head_t dasd_init_waitq; |
79 | static wait_queue_head_t dasd_flush_wq; | 79 | static wait_queue_head_t dasd_flush_wq; |
80 | static wait_queue_head_t generic_waitq; | 80 | static wait_queue_head_t generic_waitq; |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Allocate memory for a new device structure. | 83 | * Allocate memory for a new device structure. |
84 | */ | 84 | */ |
85 | struct dasd_device *dasd_alloc_device(void) | 85 | struct dasd_device *dasd_alloc_device(void) |
86 | { | 86 | { |
87 | struct dasd_device *device; | 87 | struct dasd_device *device; |
88 | 88 | ||
89 | device = kzalloc(sizeof(struct dasd_device), GFP_ATOMIC); | 89 | device = kzalloc(sizeof(struct dasd_device), GFP_ATOMIC); |
90 | if (!device) | 90 | if (!device) |
91 | return ERR_PTR(-ENOMEM); | 91 | return ERR_PTR(-ENOMEM); |
92 | 92 | ||
93 | /* Get two pages for normal block device operations. */ | 93 | /* Get two pages for normal block device operations. */ |
94 | device->ccw_mem = (void *) __get_free_pages(GFP_ATOMIC | GFP_DMA, 1); | 94 | device->ccw_mem = (void *) __get_free_pages(GFP_ATOMIC | GFP_DMA, 1); |
95 | if (!device->ccw_mem) { | 95 | if (!device->ccw_mem) { |
96 | kfree(device); | 96 | kfree(device); |
97 | return ERR_PTR(-ENOMEM); | 97 | return ERR_PTR(-ENOMEM); |
98 | } | 98 | } |
99 | /* Get one page for error recovery. */ | 99 | /* Get one page for error recovery. */ |
100 | device->erp_mem = (void *) get_zeroed_page(GFP_ATOMIC | GFP_DMA); | 100 | device->erp_mem = (void *) get_zeroed_page(GFP_ATOMIC | GFP_DMA); |
101 | if (!device->erp_mem) { | 101 | if (!device->erp_mem) { |
102 | free_pages((unsigned long) device->ccw_mem, 1); | 102 | free_pages((unsigned long) device->ccw_mem, 1); |
103 | kfree(device); | 103 | kfree(device); |
104 | return ERR_PTR(-ENOMEM); | 104 | return ERR_PTR(-ENOMEM); |
105 | } | 105 | } |
106 | 106 | ||
107 | dasd_init_chunklist(&device->ccw_chunks, device->ccw_mem, PAGE_SIZE*2); | 107 | dasd_init_chunklist(&device->ccw_chunks, device->ccw_mem, PAGE_SIZE*2); |
108 | dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE); | 108 | dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE); |
109 | spin_lock_init(&device->mem_lock); | 109 | spin_lock_init(&device->mem_lock); |
110 | atomic_set(&device->tasklet_scheduled, 0); | 110 | atomic_set(&device->tasklet_scheduled, 0); |
111 | tasklet_init(&device->tasklet, | 111 | tasklet_init(&device->tasklet, |
112 | (void (*)(unsigned long)) dasd_device_tasklet, | 112 | (void (*)(unsigned long)) dasd_device_tasklet, |
113 | (unsigned long) device); | 113 | (unsigned long) device); |
114 | INIT_LIST_HEAD(&device->ccw_queue); | 114 | INIT_LIST_HEAD(&device->ccw_queue); |
115 | init_timer(&device->timer); | 115 | init_timer(&device->timer); |
116 | device->timer.function = dasd_device_timeout; | 116 | device->timer.function = dasd_device_timeout; |
117 | device->timer.data = (unsigned long) device; | 117 | device->timer.data = (unsigned long) device; |
118 | INIT_WORK(&device->kick_work, do_kick_device); | 118 | INIT_WORK(&device->kick_work, do_kick_device); |
119 | INIT_WORK(&device->restore_device, do_restore_device); | 119 | INIT_WORK(&device->restore_device, do_restore_device); |
120 | INIT_WORK(&device->reload_device, do_reload_device); | 120 | INIT_WORK(&device->reload_device, do_reload_device); |
121 | device->state = DASD_STATE_NEW; | 121 | device->state = DASD_STATE_NEW; |
122 | device->target = DASD_STATE_NEW; | 122 | device->target = DASD_STATE_NEW; |
123 | mutex_init(&device->state_mutex); | 123 | mutex_init(&device->state_mutex); |
124 | 124 | ||
125 | return device; | 125 | return device; |
126 | } | 126 | } |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Free memory of a device structure. | 129 | * Free memory of a device structure. |
130 | */ | 130 | */ |
131 | void dasd_free_device(struct dasd_device *device) | 131 | void dasd_free_device(struct dasd_device *device) |
132 | { | 132 | { |
133 | kfree(device->private); | 133 | kfree(device->private); |
134 | free_page((unsigned long) device->erp_mem); | 134 | free_page((unsigned long) device->erp_mem); |
135 | free_pages((unsigned long) device->ccw_mem, 1); | 135 | free_pages((unsigned long) device->ccw_mem, 1); |
136 | kfree(device); | 136 | kfree(device); |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * Allocate memory for a new device structure. | 140 | * Allocate memory for a new device structure. |
141 | */ | 141 | */ |
142 | struct dasd_block *dasd_alloc_block(void) | 142 | struct dasd_block *dasd_alloc_block(void) |
143 | { | 143 | { |
144 | struct dasd_block *block; | 144 | struct dasd_block *block; |
145 | 145 | ||
146 | block = kzalloc(sizeof(*block), GFP_ATOMIC); | 146 | block = kzalloc(sizeof(*block), GFP_ATOMIC); |
147 | if (!block) | 147 | if (!block) |
148 | return ERR_PTR(-ENOMEM); | 148 | return ERR_PTR(-ENOMEM); |
149 | /* open_count = 0 means device online but not in use */ | 149 | /* open_count = 0 means device online but not in use */ |
150 | atomic_set(&block->open_count, -1); | 150 | atomic_set(&block->open_count, -1); |
151 | 151 | ||
152 | spin_lock_init(&block->request_queue_lock); | 152 | spin_lock_init(&block->request_queue_lock); |
153 | atomic_set(&block->tasklet_scheduled, 0); | 153 | atomic_set(&block->tasklet_scheduled, 0); |
154 | tasklet_init(&block->tasklet, | 154 | tasklet_init(&block->tasklet, |
155 | (void (*)(unsigned long)) dasd_block_tasklet, | 155 | (void (*)(unsigned long)) dasd_block_tasklet, |
156 | (unsigned long) block); | 156 | (unsigned long) block); |
157 | INIT_LIST_HEAD(&block->ccw_queue); | 157 | INIT_LIST_HEAD(&block->ccw_queue); |
158 | spin_lock_init(&block->queue_lock); | 158 | spin_lock_init(&block->queue_lock); |
159 | init_timer(&block->timer); | 159 | init_timer(&block->timer); |
160 | block->timer.function = dasd_block_timeout; | 160 | block->timer.function = dasd_block_timeout; |
161 | block->timer.data = (unsigned long) block; | 161 | block->timer.data = (unsigned long) block; |
162 | 162 | ||
163 | return block; | 163 | return block; |
164 | } | 164 | } |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * Free memory of a device structure. | 167 | * Free memory of a device structure. |
168 | */ | 168 | */ |
169 | void dasd_free_block(struct dasd_block *block) | 169 | void dasd_free_block(struct dasd_block *block) |
170 | { | 170 | { |
171 | kfree(block); | 171 | kfree(block); |
172 | } | 172 | } |
173 | 173 | ||
174 | /* | 174 | /* |
175 | * Make a new device known to the system. | 175 | * Make a new device known to the system. |
176 | */ | 176 | */ |
177 | static int dasd_state_new_to_known(struct dasd_device *device) | 177 | static int dasd_state_new_to_known(struct dasd_device *device) |
178 | { | 178 | { |
179 | int rc; | 179 | int rc; |
180 | 180 | ||
181 | /* | 181 | /* |
182 | * As long as the device is not in state DASD_STATE_NEW we want to | 182 | * As long as the device is not in state DASD_STATE_NEW we want to |
183 | * keep the reference count > 0. | 183 | * keep the reference count > 0. |
184 | */ | 184 | */ |
185 | dasd_get_device(device); | 185 | dasd_get_device(device); |
186 | 186 | ||
187 | if (device->block) { | 187 | if (device->block) { |
188 | rc = dasd_alloc_queue(device->block); | 188 | rc = dasd_alloc_queue(device->block); |
189 | if (rc) { | 189 | if (rc) { |
190 | dasd_put_device(device); | 190 | dasd_put_device(device); |
191 | return rc; | 191 | return rc; |
192 | } | 192 | } |
193 | } | 193 | } |
194 | device->state = DASD_STATE_KNOWN; | 194 | device->state = DASD_STATE_KNOWN; |
195 | return 0; | 195 | return 0; |
196 | } | 196 | } |
197 | 197 | ||
198 | /* | 198 | /* |
199 | * Let the system forget about a device. | 199 | * Let the system forget about a device. |
200 | */ | 200 | */ |
201 | static int dasd_state_known_to_new(struct dasd_device *device) | 201 | static int dasd_state_known_to_new(struct dasd_device *device) |
202 | { | 202 | { |
203 | /* Disable extended error reporting for this device. */ | 203 | /* Disable extended error reporting for this device. */ |
204 | dasd_eer_disable(device); | 204 | dasd_eer_disable(device); |
205 | /* Forget the discipline information. */ | 205 | /* Forget the discipline information. */ |
206 | if (device->discipline) { | 206 | if (device->discipline) { |
207 | if (device->discipline->uncheck_device) | 207 | if (device->discipline->uncheck_device) |
208 | device->discipline->uncheck_device(device); | 208 | device->discipline->uncheck_device(device); |
209 | module_put(device->discipline->owner); | 209 | module_put(device->discipline->owner); |
210 | } | 210 | } |
211 | device->discipline = NULL; | 211 | device->discipline = NULL; |
212 | if (device->base_discipline) | 212 | if (device->base_discipline) |
213 | module_put(device->base_discipline->owner); | 213 | module_put(device->base_discipline->owner); |
214 | device->base_discipline = NULL; | 214 | device->base_discipline = NULL; |
215 | device->state = DASD_STATE_NEW; | 215 | device->state = DASD_STATE_NEW; |
216 | 216 | ||
217 | if (device->block) | 217 | if (device->block) |
218 | dasd_free_queue(device->block); | 218 | dasd_free_queue(device->block); |
219 | 219 | ||
220 | /* Give up reference we took in dasd_state_new_to_known. */ | 220 | /* Give up reference we took in dasd_state_new_to_known. */ |
221 | dasd_put_device(device); | 221 | dasd_put_device(device); |
222 | return 0; | 222 | return 0; |
223 | } | 223 | } |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Request the irq line for the device. | 226 | * Request the irq line for the device. |
227 | */ | 227 | */ |
228 | static int dasd_state_known_to_basic(struct dasd_device *device) | 228 | static int dasd_state_known_to_basic(struct dasd_device *device) |
229 | { | 229 | { |
230 | int rc; | 230 | int rc; |
231 | 231 | ||
232 | /* Allocate and register gendisk structure. */ | 232 | /* Allocate and register gendisk structure. */ |
233 | if (device->block) { | 233 | if (device->block) { |
234 | rc = dasd_gendisk_alloc(device->block); | 234 | rc = dasd_gendisk_alloc(device->block); |
235 | if (rc) | 235 | if (rc) |
236 | return rc; | 236 | return rc; |
237 | } | 237 | } |
238 | /* register 'device' debug area, used for all DBF_DEV_XXX calls */ | 238 | /* register 'device' debug area, used for all DBF_DEV_XXX calls */ |
239 | device->debug_area = debug_register(dev_name(&device->cdev->dev), 4, 1, | 239 | device->debug_area = debug_register(dev_name(&device->cdev->dev), 4, 1, |
240 | 8 * sizeof(long)); | 240 | 8 * sizeof(long)); |
241 | debug_register_view(device->debug_area, &debug_sprintf_view); | 241 | debug_register_view(device->debug_area, &debug_sprintf_view); |
242 | debug_set_level(device->debug_area, DBF_WARNING); | 242 | debug_set_level(device->debug_area, DBF_WARNING); |
243 | DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created"); | 243 | DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created"); |
244 | 244 | ||
245 | device->state = DASD_STATE_BASIC; | 245 | device->state = DASD_STATE_BASIC; |
246 | return 0; | 246 | return 0; |
247 | } | 247 | } |
248 | 248 | ||
249 | /* | 249 | /* |
250 | * Release the irq line for the device. Terminate any running i/o. | 250 | * Release the irq line for the device. Terminate any running i/o. |
251 | */ | 251 | */ |
252 | static int dasd_state_basic_to_known(struct dasd_device *device) | 252 | static int dasd_state_basic_to_known(struct dasd_device *device) |
253 | { | 253 | { |
254 | int rc; | 254 | int rc; |
255 | if (device->block) { | 255 | if (device->block) { |
256 | dasd_gendisk_free(device->block); | 256 | dasd_gendisk_free(device->block); |
257 | dasd_block_clear_timer(device->block); | 257 | dasd_block_clear_timer(device->block); |
258 | } | 258 | } |
259 | rc = dasd_flush_device_queue(device); | 259 | rc = dasd_flush_device_queue(device); |
260 | if (rc) | 260 | if (rc) |
261 | return rc; | 261 | return rc; |
262 | dasd_device_clear_timer(device); | 262 | dasd_device_clear_timer(device); |
263 | 263 | ||
264 | DBF_DEV_EVENT(DBF_EMERG, device, "%p debug area deleted", device); | 264 | DBF_DEV_EVENT(DBF_EMERG, device, "%p debug area deleted", device); |
265 | if (device->debug_area != NULL) { | 265 | if (device->debug_area != NULL) { |
266 | debug_unregister(device->debug_area); | 266 | debug_unregister(device->debug_area); |
267 | device->debug_area = NULL; | 267 | device->debug_area = NULL; |
268 | } | 268 | } |
269 | device->state = DASD_STATE_KNOWN; | 269 | device->state = DASD_STATE_KNOWN; |
270 | return 0; | 270 | return 0; |
271 | } | 271 | } |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * Do the initial analysis. The do_analysis function may return | 274 | * Do the initial analysis. The do_analysis function may return |
275 | * -EAGAIN in which case the device keeps the state DASD_STATE_BASIC | 275 | * -EAGAIN in which case the device keeps the state DASD_STATE_BASIC |
276 | * until the discipline decides to continue the startup sequence | 276 | * until the discipline decides to continue the startup sequence |
277 | * by calling the function dasd_change_state. The eckd disciplines | 277 | * by calling the function dasd_change_state. The eckd disciplines |
278 | * uses this to start a ccw that detects the format. The completion | 278 | * uses this to start a ccw that detects the format. The completion |
279 | * interrupt for this detection ccw uses the kernel event daemon to | 279 | * interrupt for this detection ccw uses the kernel event daemon to |
280 | * trigger the call to dasd_change_state. All this is done in the | 280 | * trigger the call to dasd_change_state. All this is done in the |
281 | * discipline code, see dasd_eckd.c. | 281 | * discipline code, see dasd_eckd.c. |
282 | * After the analysis ccw is done (do_analysis returned 0) the block | 282 | * After the analysis ccw is done (do_analysis returned 0) the block |
283 | * device is setup. | 283 | * device is setup. |
284 | * In case the analysis returns an error, the device setup is stopped | 284 | * In case the analysis returns an error, the device setup is stopped |
285 | * (a fake disk was already added to allow formatting). | 285 | * (a fake disk was already added to allow formatting). |
286 | */ | 286 | */ |
287 | static int dasd_state_basic_to_ready(struct dasd_device *device) | 287 | static int dasd_state_basic_to_ready(struct dasd_device *device) |
288 | { | 288 | { |
289 | int rc; | 289 | int rc; |
290 | struct dasd_block *block; | 290 | struct dasd_block *block; |
291 | 291 | ||
292 | rc = 0; | 292 | rc = 0; |
293 | block = device->block; | 293 | block = device->block; |
294 | /* make disk known with correct capacity */ | 294 | /* make disk known with correct capacity */ |
295 | if (block) { | 295 | if (block) { |
296 | if (block->base->discipline->do_analysis != NULL) | 296 | if (block->base->discipline->do_analysis != NULL) |
297 | rc = block->base->discipline->do_analysis(block); | 297 | rc = block->base->discipline->do_analysis(block); |
298 | if (rc) { | 298 | if (rc) { |
299 | if (rc != -EAGAIN) | 299 | if (rc != -EAGAIN) |
300 | device->state = DASD_STATE_UNFMT; | 300 | device->state = DASD_STATE_UNFMT; |
301 | return rc; | 301 | return rc; |
302 | } | 302 | } |
303 | dasd_setup_queue(block); | 303 | dasd_setup_queue(block); |
304 | set_capacity(block->gdp, | 304 | set_capacity(block->gdp, |
305 | block->blocks << block->s2b_shift); | 305 | block->blocks << block->s2b_shift); |
306 | device->state = DASD_STATE_READY; | 306 | device->state = DASD_STATE_READY; |
307 | rc = dasd_scan_partitions(block); | 307 | rc = dasd_scan_partitions(block); |
308 | if (rc) | 308 | if (rc) |
309 | device->state = DASD_STATE_BASIC; | 309 | device->state = DASD_STATE_BASIC; |
310 | } else { | 310 | } else { |
311 | device->state = DASD_STATE_READY; | 311 | device->state = DASD_STATE_READY; |
312 | } | 312 | } |
313 | return rc; | 313 | return rc; |
314 | } | 314 | } |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * Remove device from block device layer. Destroy dirty buffers. | 317 | * Remove device from block device layer. Destroy dirty buffers. |
318 | * Forget format information. Check if the target level is basic | 318 | * Forget format information. Check if the target level is basic |
319 | * and if it is create fake disk for formatting. | 319 | * and if it is create fake disk for formatting. |
320 | */ | 320 | */ |
321 | static int dasd_state_ready_to_basic(struct dasd_device *device) | 321 | static int dasd_state_ready_to_basic(struct dasd_device *device) |
322 | { | 322 | { |
323 | int rc; | 323 | int rc; |
324 | 324 | ||
325 | device->state = DASD_STATE_BASIC; | 325 | device->state = DASD_STATE_BASIC; |
326 | if (device->block) { | 326 | if (device->block) { |
327 | struct dasd_block *block = device->block; | 327 | struct dasd_block *block = device->block; |
328 | rc = dasd_flush_block_queue(block); | 328 | rc = dasd_flush_block_queue(block); |
329 | if (rc) { | 329 | if (rc) { |
330 | device->state = DASD_STATE_READY; | 330 | device->state = DASD_STATE_READY; |
331 | return rc; | 331 | return rc; |
332 | } | 332 | } |
333 | dasd_flush_request_queue(block); | 333 | dasd_flush_request_queue(block); |
334 | dasd_destroy_partitions(block); | 334 | dasd_destroy_partitions(block); |
335 | block->blocks = 0; | 335 | block->blocks = 0; |
336 | block->bp_block = 0; | 336 | block->bp_block = 0; |
337 | block->s2b_shift = 0; | 337 | block->s2b_shift = 0; |
338 | } | 338 | } |
339 | return 0; | 339 | return 0; |
340 | } | 340 | } |
341 | 341 | ||
342 | /* | 342 | /* |
343 | * Back to basic. | 343 | * Back to basic. |
344 | */ | 344 | */ |
345 | static int dasd_state_unfmt_to_basic(struct dasd_device *device) | 345 | static int dasd_state_unfmt_to_basic(struct dasd_device *device) |
346 | { | 346 | { |
347 | device->state = DASD_STATE_BASIC; | 347 | device->state = DASD_STATE_BASIC; |
348 | return 0; | 348 | return 0; |
349 | } | 349 | } |
350 | 350 | ||
351 | /* | 351 | /* |
352 | * Make the device online and schedule the bottom half to start | 352 | * Make the device online and schedule the bottom half to start |
353 | * the requeueing of requests from the linux request queue to the | 353 | * the requeueing of requests from the linux request queue to the |
354 | * ccw queue. | 354 | * ccw queue. |
355 | */ | 355 | */ |
356 | static int | 356 | static int |
357 | dasd_state_ready_to_online(struct dasd_device * device) | 357 | dasd_state_ready_to_online(struct dasd_device * device) |
358 | { | 358 | { |
359 | int rc; | 359 | int rc; |
360 | struct gendisk *disk; | 360 | struct gendisk *disk; |
361 | struct disk_part_iter piter; | 361 | struct disk_part_iter piter; |
362 | struct hd_struct *part; | 362 | struct hd_struct *part; |
363 | 363 | ||
364 | if (device->discipline->ready_to_online) { | 364 | if (device->discipline->ready_to_online) { |
365 | rc = device->discipline->ready_to_online(device); | 365 | rc = device->discipline->ready_to_online(device); |
366 | if (rc) | 366 | if (rc) |
367 | return rc; | 367 | return rc; |
368 | } | 368 | } |
369 | device->state = DASD_STATE_ONLINE; | 369 | device->state = DASD_STATE_ONLINE; |
370 | if (device->block) { | 370 | if (device->block) { |
371 | dasd_schedule_block_bh(device->block); | 371 | dasd_schedule_block_bh(device->block); |
372 | disk = device->block->bdev->bd_disk; | 372 | disk = device->block->bdev->bd_disk; |
373 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | 373 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); |
374 | while ((part = disk_part_iter_next(&piter))) | 374 | while ((part = disk_part_iter_next(&piter))) |
375 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); | 375 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); |
376 | disk_part_iter_exit(&piter); | 376 | disk_part_iter_exit(&piter); |
377 | } | 377 | } |
378 | return 0; | 378 | return 0; |
379 | } | 379 | } |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Stop the requeueing of requests again. | 382 | * Stop the requeueing of requests again. |
383 | */ | 383 | */ |
384 | static int dasd_state_online_to_ready(struct dasd_device *device) | 384 | static int dasd_state_online_to_ready(struct dasd_device *device) |
385 | { | 385 | { |
386 | int rc; | 386 | int rc; |
387 | struct gendisk *disk; | 387 | struct gendisk *disk; |
388 | struct disk_part_iter piter; | 388 | struct disk_part_iter piter; |
389 | struct hd_struct *part; | 389 | struct hd_struct *part; |
390 | 390 | ||
391 | if (device->discipline->online_to_ready) { | 391 | if (device->discipline->online_to_ready) { |
392 | rc = device->discipline->online_to_ready(device); | 392 | rc = device->discipline->online_to_ready(device); |
393 | if (rc) | 393 | if (rc) |
394 | return rc; | 394 | return rc; |
395 | } | 395 | } |
396 | device->state = DASD_STATE_READY; | 396 | device->state = DASD_STATE_READY; |
397 | if (device->block) { | 397 | if (device->block) { |
398 | disk = device->block->bdev->bd_disk; | 398 | disk = device->block->bdev->bd_disk; |
399 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | 399 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); |
400 | while ((part = disk_part_iter_next(&piter))) | 400 | while ((part = disk_part_iter_next(&piter))) |
401 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); | 401 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); |
402 | disk_part_iter_exit(&piter); | 402 | disk_part_iter_exit(&piter); |
403 | } | 403 | } |
404 | return 0; | 404 | return 0; |
405 | } | 405 | } |
406 | 406 | ||
407 | /* | 407 | /* |
408 | * Device startup state changes. | 408 | * Device startup state changes. |
409 | */ | 409 | */ |
410 | static int dasd_increase_state(struct dasd_device *device) | 410 | static int dasd_increase_state(struct dasd_device *device) |
411 | { | 411 | { |
412 | int rc; | 412 | int rc; |
413 | 413 | ||
414 | rc = 0; | 414 | rc = 0; |
415 | if (device->state == DASD_STATE_NEW && | 415 | if (device->state == DASD_STATE_NEW && |
416 | device->target >= DASD_STATE_KNOWN) | 416 | device->target >= DASD_STATE_KNOWN) |
417 | rc = dasd_state_new_to_known(device); | 417 | rc = dasd_state_new_to_known(device); |
418 | 418 | ||
419 | if (!rc && | 419 | if (!rc && |
420 | device->state == DASD_STATE_KNOWN && | 420 | device->state == DASD_STATE_KNOWN && |
421 | device->target >= DASD_STATE_BASIC) | 421 | device->target >= DASD_STATE_BASIC) |
422 | rc = dasd_state_known_to_basic(device); | 422 | rc = dasd_state_known_to_basic(device); |
423 | 423 | ||
424 | if (!rc && | 424 | if (!rc && |
425 | device->state == DASD_STATE_BASIC && | 425 | device->state == DASD_STATE_BASIC && |
426 | device->target >= DASD_STATE_READY) | 426 | device->target >= DASD_STATE_READY) |
427 | rc = dasd_state_basic_to_ready(device); | 427 | rc = dasd_state_basic_to_ready(device); |
428 | 428 | ||
429 | if (!rc && | 429 | if (!rc && |
430 | device->state == DASD_STATE_UNFMT && | 430 | device->state == DASD_STATE_UNFMT && |
431 | device->target > DASD_STATE_UNFMT) | 431 | device->target > DASD_STATE_UNFMT) |
432 | rc = -EPERM; | 432 | rc = -EPERM; |
433 | 433 | ||
434 | if (!rc && | 434 | if (!rc && |
435 | device->state == DASD_STATE_READY && | 435 | device->state == DASD_STATE_READY && |
436 | device->target >= DASD_STATE_ONLINE) | 436 | device->target >= DASD_STATE_ONLINE) |
437 | rc = dasd_state_ready_to_online(device); | 437 | rc = dasd_state_ready_to_online(device); |
438 | 438 | ||
439 | return rc; | 439 | return rc; |
440 | } | 440 | } |
441 | 441 | ||
442 | /* | 442 | /* |
443 | * Device shutdown state changes. | 443 | * Device shutdown state changes. |
444 | */ | 444 | */ |
445 | static int dasd_decrease_state(struct dasd_device *device) | 445 | static int dasd_decrease_state(struct dasd_device *device) |
446 | { | 446 | { |
447 | int rc; | 447 | int rc; |
448 | 448 | ||
449 | rc = 0; | 449 | rc = 0; |
450 | if (device->state == DASD_STATE_ONLINE && | 450 | if (device->state == DASD_STATE_ONLINE && |
451 | device->target <= DASD_STATE_READY) | 451 | device->target <= DASD_STATE_READY) |
452 | rc = dasd_state_online_to_ready(device); | 452 | rc = dasd_state_online_to_ready(device); |
453 | 453 | ||
454 | if (!rc && | 454 | if (!rc && |
455 | device->state == DASD_STATE_READY && | 455 | device->state == DASD_STATE_READY && |
456 | device->target <= DASD_STATE_BASIC) | 456 | device->target <= DASD_STATE_BASIC) |
457 | rc = dasd_state_ready_to_basic(device); | 457 | rc = dasd_state_ready_to_basic(device); |
458 | 458 | ||
459 | if (!rc && | 459 | if (!rc && |
460 | device->state == DASD_STATE_UNFMT && | 460 | device->state == DASD_STATE_UNFMT && |
461 | device->target <= DASD_STATE_BASIC) | 461 | device->target <= DASD_STATE_BASIC) |
462 | rc = dasd_state_unfmt_to_basic(device); | 462 | rc = dasd_state_unfmt_to_basic(device); |
463 | 463 | ||
464 | if (!rc && | 464 | if (!rc && |
465 | device->state == DASD_STATE_BASIC && | 465 | device->state == DASD_STATE_BASIC && |
466 | device->target <= DASD_STATE_KNOWN) | 466 | device->target <= DASD_STATE_KNOWN) |
467 | rc = dasd_state_basic_to_known(device); | 467 | rc = dasd_state_basic_to_known(device); |
468 | 468 | ||
469 | if (!rc && | 469 | if (!rc && |
470 | device->state == DASD_STATE_KNOWN && | 470 | device->state == DASD_STATE_KNOWN && |
471 | device->target <= DASD_STATE_NEW) | 471 | device->target <= DASD_STATE_NEW) |
472 | rc = dasd_state_known_to_new(device); | 472 | rc = dasd_state_known_to_new(device); |
473 | 473 | ||
474 | return rc; | 474 | return rc; |
475 | } | 475 | } |
476 | 476 | ||
477 | /* | 477 | /* |
478 | * This is the main startup/shutdown routine. | 478 | * This is the main startup/shutdown routine. |
479 | */ | 479 | */ |
480 | static void dasd_change_state(struct dasd_device *device) | 480 | static void dasd_change_state(struct dasd_device *device) |
481 | { | 481 | { |
482 | int rc; | 482 | int rc; |
483 | 483 | ||
484 | if (device->state == device->target) | 484 | if (device->state == device->target) |
485 | /* Already where we want to go today... */ | 485 | /* Already where we want to go today... */ |
486 | return; | 486 | return; |
487 | if (device->state < device->target) | 487 | if (device->state < device->target) |
488 | rc = dasd_increase_state(device); | 488 | rc = dasd_increase_state(device); |
489 | else | 489 | else |
490 | rc = dasd_decrease_state(device); | 490 | rc = dasd_decrease_state(device); |
491 | if (rc == -EAGAIN) | 491 | if (rc == -EAGAIN) |
492 | return; | 492 | return; |
493 | if (rc) | 493 | if (rc) |
494 | device->target = device->state; | 494 | device->target = device->state; |
495 | 495 | ||
496 | if (device->state == device->target) | 496 | if (device->state == device->target) |
497 | wake_up(&dasd_init_waitq); | 497 | wake_up(&dasd_init_waitq); |
498 | 498 | ||
499 | /* let user-space know that the device status changed */ | 499 | /* let user-space know that the device status changed */ |
500 | kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE); | 500 | kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE); |
501 | } | 501 | } |
502 | 502 | ||
503 | /* | 503 | /* |
504 | * Kick starter for devices that did not complete the startup/shutdown | 504 | * Kick starter for devices that did not complete the startup/shutdown |
505 | * procedure or were sleeping because of a pending state. | 505 | * procedure or were sleeping because of a pending state. |
506 | * dasd_kick_device will schedule a call do do_kick_device to the kernel | 506 | * dasd_kick_device will schedule a call do do_kick_device to the kernel |
507 | * event daemon. | 507 | * event daemon. |
508 | */ | 508 | */ |
509 | static void do_kick_device(struct work_struct *work) | 509 | static void do_kick_device(struct work_struct *work) |
510 | { | 510 | { |
511 | struct dasd_device *device = container_of(work, struct dasd_device, kick_work); | 511 | struct dasd_device *device = container_of(work, struct dasd_device, kick_work); |
512 | mutex_lock(&device->state_mutex); | 512 | mutex_lock(&device->state_mutex); |
513 | dasd_change_state(device); | 513 | dasd_change_state(device); |
514 | mutex_unlock(&device->state_mutex); | 514 | mutex_unlock(&device->state_mutex); |
515 | dasd_schedule_device_bh(device); | 515 | dasd_schedule_device_bh(device); |
516 | dasd_put_device(device); | 516 | dasd_put_device(device); |
517 | } | 517 | } |
518 | 518 | ||
519 | void dasd_kick_device(struct dasd_device *device) | 519 | void dasd_kick_device(struct dasd_device *device) |
520 | { | 520 | { |
521 | dasd_get_device(device); | 521 | dasd_get_device(device); |
522 | /* queue call to dasd_kick_device to the kernel event daemon. */ | 522 | /* queue call to dasd_kick_device to the kernel event daemon. */ |
523 | schedule_work(&device->kick_work); | 523 | schedule_work(&device->kick_work); |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * dasd_reload_device will schedule a call do do_reload_device to the kernel | 527 | * dasd_reload_device will schedule a call do do_reload_device to the kernel |
528 | * event daemon. | 528 | * event daemon. |
529 | */ | 529 | */ |
530 | static void do_reload_device(struct work_struct *work) | 530 | static void do_reload_device(struct work_struct *work) |
531 | { | 531 | { |
532 | struct dasd_device *device = container_of(work, struct dasd_device, | 532 | struct dasd_device *device = container_of(work, struct dasd_device, |
533 | reload_device); | 533 | reload_device); |
534 | device->discipline->reload(device); | 534 | device->discipline->reload(device); |
535 | dasd_put_device(device); | 535 | dasd_put_device(device); |
536 | } | 536 | } |
537 | 537 | ||
538 | void dasd_reload_device(struct dasd_device *device) | 538 | void dasd_reload_device(struct dasd_device *device) |
539 | { | 539 | { |
540 | dasd_get_device(device); | 540 | dasd_get_device(device); |
541 | /* queue call to dasd_reload_device to the kernel event daemon. */ | 541 | /* queue call to dasd_reload_device to the kernel event daemon. */ |
542 | schedule_work(&device->reload_device); | 542 | schedule_work(&device->reload_device); |
543 | } | 543 | } |
544 | EXPORT_SYMBOL(dasd_reload_device); | 544 | EXPORT_SYMBOL(dasd_reload_device); |
545 | 545 | ||
546 | /* | 546 | /* |
547 | * dasd_restore_device will schedule a call do do_restore_device to the kernel | 547 | * dasd_restore_device will schedule a call do do_restore_device to the kernel |
548 | * event daemon. | 548 | * event daemon. |
549 | */ | 549 | */ |
550 | static void do_restore_device(struct work_struct *work) | 550 | static void do_restore_device(struct work_struct *work) |
551 | { | 551 | { |
552 | struct dasd_device *device = container_of(work, struct dasd_device, | 552 | struct dasd_device *device = container_of(work, struct dasd_device, |
553 | restore_device); | 553 | restore_device); |
554 | device->cdev->drv->restore(device->cdev); | 554 | device->cdev->drv->restore(device->cdev); |
555 | dasd_put_device(device); | 555 | dasd_put_device(device); |
556 | } | 556 | } |
557 | 557 | ||
558 | void dasd_restore_device(struct dasd_device *device) | 558 | void dasd_restore_device(struct dasd_device *device) |
559 | { | 559 | { |
560 | dasd_get_device(device); | 560 | dasd_get_device(device); |
561 | /* queue call to dasd_restore_device to the kernel event daemon. */ | 561 | /* queue call to dasd_restore_device to the kernel event daemon. */ |
562 | schedule_work(&device->restore_device); | 562 | schedule_work(&device->restore_device); |
563 | } | 563 | } |
564 | 564 | ||
565 | /* | 565 | /* |
566 | * Set the target state for a device and starts the state change. | 566 | * Set the target state for a device and starts the state change. |
567 | */ | 567 | */ |
568 | void dasd_set_target_state(struct dasd_device *device, int target) | 568 | void dasd_set_target_state(struct dasd_device *device, int target) |
569 | { | 569 | { |
570 | dasd_get_device(device); | 570 | dasd_get_device(device); |
571 | mutex_lock(&device->state_mutex); | 571 | mutex_lock(&device->state_mutex); |
572 | /* If we are in probeonly mode stop at DASD_STATE_READY. */ | 572 | /* If we are in probeonly mode stop at DASD_STATE_READY. */ |
573 | if (dasd_probeonly && target > DASD_STATE_READY) | 573 | if (dasd_probeonly && target > DASD_STATE_READY) |
574 | target = DASD_STATE_READY; | 574 | target = DASD_STATE_READY; |
575 | if (device->target != target) { | 575 | if (device->target != target) { |
576 | if (device->state == target) | 576 | if (device->state == target) |
577 | wake_up(&dasd_init_waitq); | 577 | wake_up(&dasd_init_waitq); |
578 | device->target = target; | 578 | device->target = target; |
579 | } | 579 | } |
580 | if (device->state != device->target) | 580 | if (device->state != device->target) |
581 | dasd_change_state(device); | 581 | dasd_change_state(device); |
582 | mutex_unlock(&device->state_mutex); | 582 | mutex_unlock(&device->state_mutex); |
583 | dasd_put_device(device); | 583 | dasd_put_device(device); |
584 | } | 584 | } |
585 | 585 | ||
586 | /* | 586 | /* |
587 | * Enable devices with device numbers in [from..to]. | 587 | * Enable devices with device numbers in [from..to]. |
588 | */ | 588 | */ |
589 | static inline int _wait_for_device(struct dasd_device *device) | 589 | static inline int _wait_for_device(struct dasd_device *device) |
590 | { | 590 | { |
591 | return (device->state == device->target); | 591 | return (device->state == device->target); |
592 | } | 592 | } |
593 | 593 | ||
594 | void dasd_enable_device(struct dasd_device *device) | 594 | void dasd_enable_device(struct dasd_device *device) |
595 | { | 595 | { |
596 | dasd_set_target_state(device, DASD_STATE_ONLINE); | 596 | dasd_set_target_state(device, DASD_STATE_ONLINE); |
597 | if (device->state <= DASD_STATE_KNOWN) | 597 | if (device->state <= DASD_STATE_KNOWN) |
598 | /* No discipline for device found. */ | 598 | /* No discipline for device found. */ |
599 | dasd_set_target_state(device, DASD_STATE_NEW); | 599 | dasd_set_target_state(device, DASD_STATE_NEW); |
600 | /* Now wait for the devices to come up. */ | 600 | /* Now wait for the devices to come up. */ |
601 | wait_event(dasd_init_waitq, _wait_for_device(device)); | 601 | wait_event(dasd_init_waitq, _wait_for_device(device)); |
602 | } | 602 | } |
603 | 603 | ||
604 | /* | 604 | /* |
605 | * SECTION: device operation (interrupt handler, start i/o, term i/o ...) | 605 | * SECTION: device operation (interrupt handler, start i/o, term i/o ...) |
606 | */ | 606 | */ |
607 | #ifdef CONFIG_DASD_PROFILE | 607 | #ifdef CONFIG_DASD_PROFILE |
608 | 608 | ||
609 | struct dasd_profile_info_t dasd_global_profile; | 609 | struct dasd_profile_info_t dasd_global_profile; |
610 | unsigned int dasd_profile_level = DASD_PROFILE_OFF; | 610 | unsigned int dasd_profile_level = DASD_PROFILE_OFF; |
611 | 611 | ||
612 | /* | 612 | /* |
613 | * Increments counter in global and local profiling structures. | 613 | * Increments counter in global and local profiling structures. |
614 | */ | 614 | */ |
615 | #define dasd_profile_counter(value, counter, block) \ | 615 | #define dasd_profile_counter(value, counter, block) \ |
616 | { \ | 616 | { \ |
617 | int index; \ | 617 | int index; \ |
618 | for (index = 0; index < 31 && value >> (2+index); index++); \ | 618 | for (index = 0; index < 31 && value >> (2+index); index++); \ |
619 | dasd_global_profile.counter[index]++; \ | 619 | dasd_global_profile.counter[index]++; \ |
620 | block->profile.counter[index]++; \ | 620 | block->profile.counter[index]++; \ |
621 | } | 621 | } |
622 | 622 | ||
623 | /* | 623 | /* |
624 | * Add profiling information for cqr before execution. | 624 | * Add profiling information for cqr before execution. |
625 | */ | 625 | */ |
626 | static void dasd_profile_start(struct dasd_block *block, | 626 | static void dasd_profile_start(struct dasd_block *block, |
627 | struct dasd_ccw_req *cqr, | 627 | struct dasd_ccw_req *cqr, |
628 | struct request *req) | 628 | struct request *req) |
629 | { | 629 | { |
630 | struct list_head *l; | 630 | struct list_head *l; |
631 | unsigned int counter; | 631 | unsigned int counter; |
632 | 632 | ||
633 | if (dasd_profile_level != DASD_PROFILE_ON) | 633 | if (dasd_profile_level != DASD_PROFILE_ON) |
634 | return; | 634 | return; |
635 | 635 | ||
636 | /* count the length of the chanq for statistics */ | 636 | /* count the length of the chanq for statistics */ |
637 | counter = 0; | 637 | counter = 0; |
638 | list_for_each(l, &block->ccw_queue) | 638 | list_for_each(l, &block->ccw_queue) |
639 | if (++counter >= 31) | 639 | if (++counter >= 31) |
640 | break; | 640 | break; |
641 | dasd_global_profile.dasd_io_nr_req[counter]++; | 641 | dasd_global_profile.dasd_io_nr_req[counter]++; |
642 | block->profile.dasd_io_nr_req[counter]++; | 642 | block->profile.dasd_io_nr_req[counter]++; |
643 | } | 643 | } |
644 | 644 | ||
645 | /* | 645 | /* |
646 | * Add profiling information for cqr after execution. | 646 | * Add profiling information for cqr after execution. |
647 | */ | 647 | */ |
648 | static void dasd_profile_end(struct dasd_block *block, | 648 | static void dasd_profile_end(struct dasd_block *block, |
649 | struct dasd_ccw_req *cqr, | 649 | struct dasd_ccw_req *cqr, |
650 | struct request *req) | 650 | struct request *req) |
651 | { | 651 | { |
652 | long strtime, irqtime, endtime, tottime; /* in microseconds */ | 652 | long strtime, irqtime, endtime, tottime; /* in microseconds */ |
653 | long tottimeps, sectors; | 653 | long tottimeps, sectors; |
654 | 654 | ||
655 | if (dasd_profile_level != DASD_PROFILE_ON) | 655 | if (dasd_profile_level != DASD_PROFILE_ON) |
656 | return; | 656 | return; |
657 | 657 | ||
658 | sectors = blk_rq_sectors(req); | 658 | sectors = blk_rq_sectors(req); |
659 | if (!cqr->buildclk || !cqr->startclk || | 659 | if (!cqr->buildclk || !cqr->startclk || |
660 | !cqr->stopclk || !cqr->endclk || | 660 | !cqr->stopclk || !cqr->endclk || |
661 | !sectors) | 661 | !sectors) |
662 | return; | 662 | return; |
663 | 663 | ||
664 | strtime = ((cqr->startclk - cqr->buildclk) >> 12); | 664 | strtime = ((cqr->startclk - cqr->buildclk) >> 12); |
665 | irqtime = ((cqr->stopclk - cqr->startclk) >> 12); | 665 | irqtime = ((cqr->stopclk - cqr->startclk) >> 12); |
666 | endtime = ((cqr->endclk - cqr->stopclk) >> 12); | 666 | endtime = ((cqr->endclk - cqr->stopclk) >> 12); |
667 | tottime = ((cqr->endclk - cqr->buildclk) >> 12); | 667 | tottime = ((cqr->endclk - cqr->buildclk) >> 12); |
668 | tottimeps = tottime / sectors; | 668 | tottimeps = tottime / sectors; |
669 | 669 | ||
670 | if (!dasd_global_profile.dasd_io_reqs) | 670 | if (!dasd_global_profile.dasd_io_reqs) |
671 | memset(&dasd_global_profile, 0, | 671 | memset(&dasd_global_profile, 0, |
672 | sizeof(struct dasd_profile_info_t)); | 672 | sizeof(struct dasd_profile_info_t)); |
673 | dasd_global_profile.dasd_io_reqs++; | 673 | dasd_global_profile.dasd_io_reqs++; |
674 | dasd_global_profile.dasd_io_sects += sectors; | 674 | dasd_global_profile.dasd_io_sects += sectors; |
675 | 675 | ||
676 | if (!block->profile.dasd_io_reqs) | 676 | if (!block->profile.dasd_io_reqs) |
677 | memset(&block->profile, 0, | 677 | memset(&block->profile, 0, |
678 | sizeof(struct dasd_profile_info_t)); | 678 | sizeof(struct dasd_profile_info_t)); |
679 | block->profile.dasd_io_reqs++; | 679 | block->profile.dasd_io_reqs++; |
680 | block->profile.dasd_io_sects += sectors; | 680 | block->profile.dasd_io_sects += sectors; |
681 | 681 | ||
682 | dasd_profile_counter(sectors, dasd_io_secs, block); | 682 | dasd_profile_counter(sectors, dasd_io_secs, block); |
683 | dasd_profile_counter(tottime, dasd_io_times, block); | 683 | dasd_profile_counter(tottime, dasd_io_times, block); |
684 | dasd_profile_counter(tottimeps, dasd_io_timps, block); | 684 | dasd_profile_counter(tottimeps, dasd_io_timps, block); |
685 | dasd_profile_counter(strtime, dasd_io_time1, block); | 685 | dasd_profile_counter(strtime, dasd_io_time1, block); |
686 | dasd_profile_counter(irqtime, dasd_io_time2, block); | 686 | dasd_profile_counter(irqtime, dasd_io_time2, block); |
687 | dasd_profile_counter(irqtime / sectors, dasd_io_time2ps, block); | 687 | dasd_profile_counter(irqtime / sectors, dasd_io_time2ps, block); |
688 | dasd_profile_counter(endtime, dasd_io_time3, block); | 688 | dasd_profile_counter(endtime, dasd_io_time3, block); |
689 | } | 689 | } |
690 | #else | 690 | #else |
691 | #define dasd_profile_start(block, cqr, req) do {} while (0) | 691 | #define dasd_profile_start(block, cqr, req) do {} while (0) |
692 | #define dasd_profile_end(block, cqr, req) do {} while (0) | 692 | #define dasd_profile_end(block, cqr, req) do {} while (0) |
693 | #endif /* CONFIG_DASD_PROFILE */ | 693 | #endif /* CONFIG_DASD_PROFILE */ |
694 | 694 | ||
695 | /* | 695 | /* |
696 | * Allocate memory for a channel program with 'cplength' channel | 696 | * Allocate memory for a channel program with 'cplength' channel |
697 | * command words and 'datasize' additional space. There are two | 697 | * command words and 'datasize' additional space. There are two |
698 | * variantes: 1) dasd_kmalloc_request uses kmalloc to get the needed | 698 | * variantes: 1) dasd_kmalloc_request uses kmalloc to get the needed |
699 | * memory and 2) dasd_smalloc_request uses the static ccw memory | 699 | * memory and 2) dasd_smalloc_request uses the static ccw memory |
700 | * that gets allocated for each device. | 700 | * that gets allocated for each device. |
701 | */ | 701 | */ |
702 | struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength, | 702 | struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength, |
703 | int datasize, | 703 | int datasize, |
704 | struct dasd_device *device) | 704 | struct dasd_device *device) |
705 | { | 705 | { |
706 | struct dasd_ccw_req *cqr; | 706 | struct dasd_ccw_req *cqr; |
707 | 707 | ||
708 | /* Sanity checks */ | 708 | /* Sanity checks */ |
709 | BUG_ON(datasize > PAGE_SIZE || | 709 | BUG_ON(datasize > PAGE_SIZE || |
710 | (cplength*sizeof(struct ccw1)) > PAGE_SIZE); | 710 | (cplength*sizeof(struct ccw1)) > PAGE_SIZE); |
711 | 711 | ||
712 | cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC); | 712 | cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC); |
713 | if (cqr == NULL) | 713 | if (cqr == NULL) |
714 | return ERR_PTR(-ENOMEM); | 714 | return ERR_PTR(-ENOMEM); |
715 | cqr->cpaddr = NULL; | 715 | cqr->cpaddr = NULL; |
716 | if (cplength > 0) { | 716 | if (cplength > 0) { |
717 | cqr->cpaddr = kcalloc(cplength, sizeof(struct ccw1), | 717 | cqr->cpaddr = kcalloc(cplength, sizeof(struct ccw1), |
718 | GFP_ATOMIC | GFP_DMA); | 718 | GFP_ATOMIC | GFP_DMA); |
719 | if (cqr->cpaddr == NULL) { | 719 | if (cqr->cpaddr == NULL) { |
720 | kfree(cqr); | 720 | kfree(cqr); |
721 | return ERR_PTR(-ENOMEM); | 721 | return ERR_PTR(-ENOMEM); |
722 | } | 722 | } |
723 | } | 723 | } |
724 | cqr->data = NULL; | 724 | cqr->data = NULL; |
725 | if (datasize > 0) { | 725 | if (datasize > 0) { |
726 | cqr->data = kzalloc(datasize, GFP_ATOMIC | GFP_DMA); | 726 | cqr->data = kzalloc(datasize, GFP_ATOMIC | GFP_DMA); |
727 | if (cqr->data == NULL) { | 727 | if (cqr->data == NULL) { |
728 | kfree(cqr->cpaddr); | 728 | kfree(cqr->cpaddr); |
729 | kfree(cqr); | 729 | kfree(cqr); |
730 | return ERR_PTR(-ENOMEM); | 730 | return ERR_PTR(-ENOMEM); |
731 | } | 731 | } |
732 | } | 732 | } |
733 | cqr->magic = magic; | 733 | cqr->magic = magic; |
734 | set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); | 734 | set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); |
735 | dasd_get_device(device); | 735 | dasd_get_device(device); |
736 | return cqr; | 736 | return cqr; |
737 | } | 737 | } |
738 | 738 | ||
739 | struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, | 739 | struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, |
740 | int datasize, | 740 | int datasize, |
741 | struct dasd_device *device) | 741 | struct dasd_device *device) |
742 | { | 742 | { |
743 | unsigned long flags; | 743 | unsigned long flags; |
744 | struct dasd_ccw_req *cqr; | 744 | struct dasd_ccw_req *cqr; |
745 | char *data; | 745 | char *data; |
746 | int size; | 746 | int size; |
747 | 747 | ||
748 | /* Sanity checks */ | 748 | /* Sanity checks */ |
749 | BUG_ON(datasize > PAGE_SIZE || | 749 | BUG_ON(datasize > PAGE_SIZE || |
750 | (cplength*sizeof(struct ccw1)) > PAGE_SIZE); | 750 | (cplength*sizeof(struct ccw1)) > PAGE_SIZE); |
751 | 751 | ||
752 | size = (sizeof(struct dasd_ccw_req) + 7L) & -8L; | 752 | size = (sizeof(struct dasd_ccw_req) + 7L) & -8L; |
753 | if (cplength > 0) | 753 | if (cplength > 0) |
754 | size += cplength * sizeof(struct ccw1); | 754 | size += cplength * sizeof(struct ccw1); |
755 | if (datasize > 0) | 755 | if (datasize > 0) |
756 | size += datasize; | 756 | size += datasize; |
757 | spin_lock_irqsave(&device->mem_lock, flags); | 757 | spin_lock_irqsave(&device->mem_lock, flags); |
758 | cqr = (struct dasd_ccw_req *) | 758 | cqr = (struct dasd_ccw_req *) |
759 | dasd_alloc_chunk(&device->ccw_chunks, size); | 759 | dasd_alloc_chunk(&device->ccw_chunks, size); |
760 | spin_unlock_irqrestore(&device->mem_lock, flags); | 760 | spin_unlock_irqrestore(&device->mem_lock, flags); |
761 | if (cqr == NULL) | 761 | if (cqr == NULL) |
762 | return ERR_PTR(-ENOMEM); | 762 | return ERR_PTR(-ENOMEM); |
763 | memset(cqr, 0, sizeof(struct dasd_ccw_req)); | 763 | memset(cqr, 0, sizeof(struct dasd_ccw_req)); |
764 | data = (char *) cqr + ((sizeof(struct dasd_ccw_req) + 7L) & -8L); | 764 | data = (char *) cqr + ((sizeof(struct dasd_ccw_req) + 7L) & -8L); |
765 | cqr->cpaddr = NULL; | 765 | cqr->cpaddr = NULL; |
766 | if (cplength > 0) { | 766 | if (cplength > 0) { |
767 | cqr->cpaddr = (struct ccw1 *) data; | 767 | cqr->cpaddr = (struct ccw1 *) data; |
768 | data += cplength*sizeof(struct ccw1); | 768 | data += cplength*sizeof(struct ccw1); |
769 | memset(cqr->cpaddr, 0, cplength*sizeof(struct ccw1)); | 769 | memset(cqr->cpaddr, 0, cplength*sizeof(struct ccw1)); |
770 | } | 770 | } |
771 | cqr->data = NULL; | 771 | cqr->data = NULL; |
772 | if (datasize > 0) { | 772 | if (datasize > 0) { |
773 | cqr->data = data; | 773 | cqr->data = data; |
774 | memset(cqr->data, 0, datasize); | 774 | memset(cqr->data, 0, datasize); |
775 | } | 775 | } |
776 | cqr->magic = magic; | 776 | cqr->magic = magic; |
777 | set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); | 777 | set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); |
778 | dasd_get_device(device); | 778 | dasd_get_device(device); |
779 | return cqr; | 779 | return cqr; |
780 | } | 780 | } |
781 | 781 | ||
782 | /* | 782 | /* |
783 | * Free memory of a channel program. This function needs to free all the | 783 | * Free memory of a channel program. This function needs to free all the |
784 | * idal lists that might have been created by dasd_set_cda and the | 784 | * idal lists that might have been created by dasd_set_cda and the |
785 | * struct dasd_ccw_req itself. | 785 | * struct dasd_ccw_req itself. |
786 | */ | 786 | */ |
787 | void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) | 787 | void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) |
788 | { | 788 | { |
789 | #ifdef CONFIG_64BIT | 789 | #ifdef CONFIG_64BIT |
790 | struct ccw1 *ccw; | 790 | struct ccw1 *ccw; |
791 | 791 | ||
792 | /* Clear any idals used for the request. */ | 792 | /* Clear any idals used for the request. */ |
793 | ccw = cqr->cpaddr; | 793 | ccw = cqr->cpaddr; |
794 | do { | 794 | do { |
795 | clear_normalized_cda(ccw); | 795 | clear_normalized_cda(ccw); |
796 | } while (ccw++->flags & (CCW_FLAG_CC | CCW_FLAG_DC)); | 796 | } while (ccw++->flags & (CCW_FLAG_CC | CCW_FLAG_DC)); |
797 | #endif | 797 | #endif |
798 | kfree(cqr->cpaddr); | 798 | kfree(cqr->cpaddr); |
799 | kfree(cqr->data); | 799 | kfree(cqr->data); |
800 | kfree(cqr); | 800 | kfree(cqr); |
801 | dasd_put_device(device); | 801 | dasd_put_device(device); |
802 | } | 802 | } |
803 | 803 | ||
804 | void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) | 804 | void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) |
805 | { | 805 | { |
806 | unsigned long flags; | 806 | unsigned long flags; |
807 | 807 | ||
808 | spin_lock_irqsave(&device->mem_lock, flags); | 808 | spin_lock_irqsave(&device->mem_lock, flags); |
809 | dasd_free_chunk(&device->ccw_chunks, cqr); | 809 | dasd_free_chunk(&device->ccw_chunks, cqr); |
810 | spin_unlock_irqrestore(&device->mem_lock, flags); | 810 | spin_unlock_irqrestore(&device->mem_lock, flags); |
811 | dasd_put_device(device); | 811 | dasd_put_device(device); |
812 | } | 812 | } |
813 | 813 | ||
814 | /* | 814 | /* |
815 | * Check discipline magic in cqr. | 815 | * Check discipline magic in cqr. |
816 | */ | 816 | */ |
817 | static inline int dasd_check_cqr(struct dasd_ccw_req *cqr) | 817 | static inline int dasd_check_cqr(struct dasd_ccw_req *cqr) |
818 | { | 818 | { |
819 | struct dasd_device *device; | 819 | struct dasd_device *device; |
820 | 820 | ||
821 | if (cqr == NULL) | 821 | if (cqr == NULL) |
822 | return -EINVAL; | 822 | return -EINVAL; |
823 | device = cqr->startdev; | 823 | device = cqr->startdev; |
824 | if (strncmp((char *) &cqr->magic, device->discipline->ebcname, 4)) { | 824 | if (strncmp((char *) &cqr->magic, device->discipline->ebcname, 4)) { |
825 | DBF_DEV_EVENT(DBF_WARNING, device, | 825 | DBF_DEV_EVENT(DBF_WARNING, device, |
826 | " dasd_ccw_req 0x%08x magic doesn't match" | 826 | " dasd_ccw_req 0x%08x magic doesn't match" |
827 | " discipline 0x%08x", | 827 | " discipline 0x%08x", |
828 | cqr->magic, | 828 | cqr->magic, |
829 | *(unsigned int *) device->discipline->name); | 829 | *(unsigned int *) device->discipline->name); |
830 | return -EINVAL; | 830 | return -EINVAL; |
831 | } | 831 | } |
832 | return 0; | 832 | return 0; |
833 | } | 833 | } |
834 | 834 | ||
835 | /* | 835 | /* |
836 | * Terminate the current i/o and set the request to clear_pending. | 836 | * Terminate the current i/o and set the request to clear_pending. |
837 | * Timer keeps device runnig. | 837 | * Timer keeps device runnig. |
838 | * ccw_device_clear can fail if the i/o subsystem | 838 | * ccw_device_clear can fail if the i/o subsystem |
839 | * is in a bad mood. | 839 | * is in a bad mood. |
840 | */ | 840 | */ |
841 | int dasd_term_IO(struct dasd_ccw_req *cqr) | 841 | int dasd_term_IO(struct dasd_ccw_req *cqr) |
842 | { | 842 | { |
843 | struct dasd_device *device; | 843 | struct dasd_device *device; |
844 | int retries, rc; | 844 | int retries, rc; |
845 | char errorstring[ERRORLENGTH]; | 845 | char errorstring[ERRORLENGTH]; |
846 | 846 | ||
847 | /* Check the cqr */ | 847 | /* Check the cqr */ |
848 | rc = dasd_check_cqr(cqr); | 848 | rc = dasd_check_cqr(cqr); |
849 | if (rc) | 849 | if (rc) |
850 | return rc; | 850 | return rc; |
851 | retries = 0; | 851 | retries = 0; |
852 | device = (struct dasd_device *) cqr->startdev; | 852 | device = (struct dasd_device *) cqr->startdev; |
853 | while ((retries < 5) && (cqr->status == DASD_CQR_IN_IO)) { | 853 | while ((retries < 5) && (cqr->status == DASD_CQR_IN_IO)) { |
854 | rc = ccw_device_clear(device->cdev, (long) cqr); | 854 | rc = ccw_device_clear(device->cdev, (long) cqr); |
855 | switch (rc) { | 855 | switch (rc) { |
856 | case 0: /* termination successful */ | 856 | case 0: /* termination successful */ |
857 | cqr->retries--; | 857 | cqr->retries--; |
858 | cqr->status = DASD_CQR_CLEAR_PENDING; | 858 | cqr->status = DASD_CQR_CLEAR_PENDING; |
859 | cqr->stopclk = get_clock(); | 859 | cqr->stopclk = get_clock(); |
860 | cqr->starttime = 0; | 860 | cqr->starttime = 0; |
861 | DBF_DEV_EVENT(DBF_DEBUG, device, | 861 | DBF_DEV_EVENT(DBF_DEBUG, device, |
862 | "terminate cqr %p successful", | 862 | "terminate cqr %p successful", |
863 | cqr); | 863 | cqr); |
864 | break; | 864 | break; |
865 | case -ENODEV: | 865 | case -ENODEV: |
866 | DBF_DEV_EVENT(DBF_ERR, device, "%s", | 866 | DBF_DEV_EVENT(DBF_ERR, device, "%s", |
867 | "device gone, retry"); | 867 | "device gone, retry"); |
868 | break; | 868 | break; |
869 | case -EIO: | 869 | case -EIO: |
870 | DBF_DEV_EVENT(DBF_ERR, device, "%s", | 870 | DBF_DEV_EVENT(DBF_ERR, device, "%s", |
871 | "I/O error, retry"); | 871 | "I/O error, retry"); |
872 | break; | 872 | break; |
873 | case -EINVAL: | 873 | case -EINVAL: |
874 | case -EBUSY: | 874 | case -EBUSY: |
875 | DBF_DEV_EVENT(DBF_ERR, device, "%s", | 875 | DBF_DEV_EVENT(DBF_ERR, device, "%s", |
876 | "device busy, retry later"); | 876 | "device busy, retry later"); |
877 | break; | 877 | break; |
878 | default: | 878 | default: |
879 | /* internal error 10 - unknown rc*/ | 879 | /* internal error 10 - unknown rc*/ |
880 | snprintf(errorstring, ERRORLENGTH, "10 %d", rc); | 880 | snprintf(errorstring, ERRORLENGTH, "10 %d", rc); |
881 | dev_err(&device->cdev->dev, "An error occurred in the " | 881 | dev_err(&device->cdev->dev, "An error occurred in the " |
882 | "DASD device driver, reason=%s\n", errorstring); | 882 | "DASD device driver, reason=%s\n", errorstring); |
883 | BUG(); | 883 | BUG(); |
884 | break; | 884 | break; |
885 | } | 885 | } |
886 | retries++; | 886 | retries++; |
887 | } | 887 | } |
888 | dasd_schedule_device_bh(device); | 888 | dasd_schedule_device_bh(device); |
889 | return rc; | 889 | return rc; |
890 | } | 890 | } |
891 | 891 | ||
892 | /* | 892 | /* |
893 | * Start the i/o. This start_IO can fail if the channel is really busy. | 893 | * Start the i/o. This start_IO can fail if the channel is really busy. |
894 | * In that case set up a timer to start the request later. | 894 | * In that case set up a timer to start the request later. |
895 | */ | 895 | */ |
896 | int dasd_start_IO(struct dasd_ccw_req *cqr) | 896 | int dasd_start_IO(struct dasd_ccw_req *cqr) |
897 | { | 897 | { |
898 | struct dasd_device *device; | 898 | struct dasd_device *device; |
899 | int rc; | 899 | int rc; |
900 | char errorstring[ERRORLENGTH]; | 900 | char errorstring[ERRORLENGTH]; |
901 | 901 | ||
902 | /* Check the cqr */ | 902 | /* Check the cqr */ |
903 | rc = dasd_check_cqr(cqr); | 903 | rc = dasd_check_cqr(cqr); |
904 | if (rc) { | 904 | if (rc) { |
905 | cqr->intrc = rc; | 905 | cqr->intrc = rc; |
906 | return rc; | 906 | return rc; |
907 | } | 907 | } |
908 | device = (struct dasd_device *) cqr->startdev; | 908 | device = (struct dasd_device *) cqr->startdev; |
909 | if (cqr->retries < 0) { | 909 | if (cqr->retries < 0) { |
910 | /* internal error 14 - start_IO run out of retries */ | 910 | /* internal error 14 - start_IO run out of retries */ |
911 | sprintf(errorstring, "14 %p", cqr); | 911 | sprintf(errorstring, "14 %p", cqr); |
912 | dev_err(&device->cdev->dev, "An error occurred in the DASD " | 912 | dev_err(&device->cdev->dev, "An error occurred in the DASD " |
913 | "device driver, reason=%s\n", errorstring); | 913 | "device driver, reason=%s\n", errorstring); |
914 | cqr->status = DASD_CQR_ERROR; | 914 | cqr->status = DASD_CQR_ERROR; |
915 | return -EIO; | 915 | return -EIO; |
916 | } | 916 | } |
917 | cqr->startclk = get_clock(); | 917 | cqr->startclk = get_clock(); |
918 | cqr->starttime = jiffies; | 918 | cqr->starttime = jiffies; |
919 | cqr->retries--; | 919 | cqr->retries--; |
920 | if (cqr->cpmode == 1) { | 920 | if (cqr->cpmode == 1) { |
921 | rc = ccw_device_tm_start(device->cdev, cqr->cpaddr, | 921 | rc = ccw_device_tm_start(device->cdev, cqr->cpaddr, |
922 | (long) cqr, cqr->lpm); | 922 | (long) cqr, cqr->lpm); |
923 | } else { | 923 | } else { |
924 | rc = ccw_device_start(device->cdev, cqr->cpaddr, | 924 | rc = ccw_device_start(device->cdev, cqr->cpaddr, |
925 | (long) cqr, cqr->lpm, 0); | 925 | (long) cqr, cqr->lpm, 0); |
926 | } | 926 | } |
927 | switch (rc) { | 927 | switch (rc) { |
928 | case 0: | 928 | case 0: |
929 | cqr->status = DASD_CQR_IN_IO; | 929 | cqr->status = DASD_CQR_IN_IO; |
930 | break; | 930 | break; |
931 | case -EBUSY: | 931 | case -EBUSY: |
932 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", | 932 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", |
933 | "start_IO: device busy, retry later"); | 933 | "start_IO: device busy, retry later"); |
934 | break; | 934 | break; |
935 | case -ETIMEDOUT: | 935 | case -ETIMEDOUT: |
936 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", | 936 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", |
937 | "start_IO: request timeout, retry later"); | 937 | "start_IO: request timeout, retry later"); |
938 | break; | 938 | break; |
939 | case -EACCES: | 939 | case -EACCES: |
940 | /* -EACCES indicates that the request used only a | 940 | /* -EACCES indicates that the request used only a |
941 | * subset of the available pathes and all these | 941 | * subset of the available pathes and all these |
942 | * pathes are gone. | 942 | * pathes are gone. |
943 | * Do a retry with all available pathes. | 943 | * Do a retry with all available pathes. |
944 | */ | 944 | */ |
945 | cqr->lpm = LPM_ANYPATH; | 945 | cqr->lpm = LPM_ANYPATH; |
946 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", | 946 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", |
947 | "start_IO: selected pathes gone," | 947 | "start_IO: selected pathes gone," |
948 | " retry on all pathes"); | 948 | " retry on all pathes"); |
949 | break; | 949 | break; |
950 | case -ENODEV: | 950 | case -ENODEV: |
951 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", | 951 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", |
952 | "start_IO: -ENODEV device gone, retry"); | 952 | "start_IO: -ENODEV device gone, retry"); |
953 | break; | 953 | break; |
954 | case -EIO: | 954 | case -EIO: |
955 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", | 955 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", |
956 | "start_IO: -EIO device gone, retry"); | 956 | "start_IO: -EIO device gone, retry"); |
957 | break; | 957 | break; |
958 | case -EINVAL: | 958 | case -EINVAL: |
959 | /* most likely caused in power management context */ | 959 | /* most likely caused in power management context */ |
960 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", | 960 | DBF_DEV_EVENT(DBF_DEBUG, device, "%s", |
961 | "start_IO: -EINVAL device currently " | 961 | "start_IO: -EINVAL device currently " |
962 | "not accessible"); | 962 | "not accessible"); |
963 | break; | 963 | break; |
964 | default: | 964 | default: |
965 | /* internal error 11 - unknown rc */ | 965 | /* internal error 11 - unknown rc */ |
966 | snprintf(errorstring, ERRORLENGTH, "11 %d", rc); | 966 | snprintf(errorstring, ERRORLENGTH, "11 %d", rc); |
967 | dev_err(&device->cdev->dev, | 967 | dev_err(&device->cdev->dev, |
968 | "An error occurred in the DASD device driver, " | 968 | "An error occurred in the DASD device driver, " |
969 | "reason=%s\n", errorstring); | 969 | "reason=%s\n", errorstring); |
970 | BUG(); | 970 | BUG(); |
971 | break; | 971 | break; |
972 | } | 972 | } |
973 | cqr->intrc = rc; | 973 | cqr->intrc = rc; |
974 | return rc; | 974 | return rc; |
975 | } | 975 | } |
976 | 976 | ||
977 | /* | 977 | /* |
978 | * Timeout function for dasd devices. This is used for different purposes | 978 | * Timeout function for dasd devices. This is used for different purposes |
979 | * 1) missing interrupt handler for normal operation | 979 | * 1) missing interrupt handler for normal operation |
980 | * 2) delayed start of request where start_IO failed with -EBUSY | 980 | * 2) delayed start of request where start_IO failed with -EBUSY |
981 | * 3) timeout for missing state change interrupts | 981 | * 3) timeout for missing state change interrupts |
982 | * The head of the ccw queue will have status DASD_CQR_IN_IO for 1), | 982 | * The head of the ccw queue will have status DASD_CQR_IN_IO for 1), |
983 | * DASD_CQR_QUEUED for 2) and 3). | 983 | * DASD_CQR_QUEUED for 2) and 3). |
984 | */ | 984 | */ |
985 | static void dasd_device_timeout(unsigned long ptr) | 985 | static void dasd_device_timeout(unsigned long ptr) |
986 | { | 986 | { |
987 | unsigned long flags; | 987 | unsigned long flags; |
988 | struct dasd_device *device; | 988 | struct dasd_device *device; |
989 | 989 | ||
990 | device = (struct dasd_device *) ptr; | 990 | device = (struct dasd_device *) ptr; |
991 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); | 991 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); |
992 | /* re-activate request queue */ | 992 | /* re-activate request queue */ |
993 | dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); | 993 | dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); |
994 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); | 994 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); |
995 | dasd_schedule_device_bh(device); | 995 | dasd_schedule_device_bh(device); |
996 | } | 996 | } |
997 | 997 | ||
998 | /* | 998 | /* |
999 | * Setup timeout for a device in jiffies. | 999 | * Setup timeout for a device in jiffies. |
1000 | */ | 1000 | */ |
1001 | void dasd_device_set_timer(struct dasd_device *device, int expires) | 1001 | void dasd_device_set_timer(struct dasd_device *device, int expires) |
1002 | { | 1002 | { |
1003 | if (expires == 0) | 1003 | if (expires == 0) |
1004 | del_timer(&device->timer); | 1004 | del_timer(&device->timer); |
1005 | else | 1005 | else |
1006 | mod_timer(&device->timer, jiffies + expires); | 1006 | mod_timer(&device->timer, jiffies + expires); |
1007 | } | 1007 | } |
1008 | 1008 | ||
1009 | /* | 1009 | /* |
1010 | * Clear timeout for a device. | 1010 | * Clear timeout for a device. |
1011 | */ | 1011 | */ |
1012 | void dasd_device_clear_timer(struct dasd_device *device) | 1012 | void dasd_device_clear_timer(struct dasd_device *device) |
1013 | { | 1013 | { |
1014 | del_timer(&device->timer); | 1014 | del_timer(&device->timer); |
1015 | } | 1015 | } |
1016 | 1016 | ||
1017 | static void dasd_handle_killed_request(struct ccw_device *cdev, | 1017 | static void dasd_handle_killed_request(struct ccw_device *cdev, |
1018 | unsigned long intparm) | 1018 | unsigned long intparm) |
1019 | { | 1019 | { |
1020 | struct dasd_ccw_req *cqr; | 1020 | struct dasd_ccw_req *cqr; |
1021 | struct dasd_device *device; | 1021 | struct dasd_device *device; |
1022 | 1022 | ||
1023 | if (!intparm) | 1023 | if (!intparm) |
1024 | return; | 1024 | return; |
1025 | cqr = (struct dasd_ccw_req *) intparm; | 1025 | cqr = (struct dasd_ccw_req *) intparm; |
1026 | if (cqr->status != DASD_CQR_IN_IO) { | 1026 | if (cqr->status != DASD_CQR_IN_IO) { |
1027 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, | 1027 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, |
1028 | "invalid status in handle_killed_request: " | 1028 | "invalid status in handle_killed_request: " |
1029 | "%02x", cqr->status); | 1029 | "%02x", cqr->status); |
1030 | return; | 1030 | return; |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | device = dasd_device_from_cdev_locked(cdev); | 1033 | device = dasd_device_from_cdev_locked(cdev); |
1034 | if (IS_ERR(device)) { | 1034 | if (IS_ERR(device)) { |
1035 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", | 1035 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", |
1036 | "unable to get device from cdev"); | 1036 | "unable to get device from cdev"); |
1037 | return; | 1037 | return; |
1038 | } | 1038 | } |
1039 | 1039 | ||
1040 | if (!cqr->startdev || | 1040 | if (!cqr->startdev || |
1041 | device != cqr->startdev || | 1041 | device != cqr->startdev || |
1042 | strncmp(cqr->startdev->discipline->ebcname, | 1042 | strncmp(cqr->startdev->discipline->ebcname, |
1043 | (char *) &cqr->magic, 4)) { | 1043 | (char *) &cqr->magic, 4)) { |
1044 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", | 1044 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", |
1045 | "invalid device in request"); | 1045 | "invalid device in request"); |
1046 | dasd_put_device(device); | 1046 | dasd_put_device(device); |
1047 | return; | 1047 | return; |
1048 | } | 1048 | } |
1049 | 1049 | ||
1050 | /* Schedule request to be retried. */ | 1050 | /* Schedule request to be retried. */ |
1051 | cqr->status = DASD_CQR_QUEUED; | 1051 | cqr->status = DASD_CQR_QUEUED; |
1052 | 1052 | ||
1053 | dasd_device_clear_timer(device); | 1053 | dasd_device_clear_timer(device); |
1054 | dasd_schedule_device_bh(device); | 1054 | dasd_schedule_device_bh(device); |
1055 | dasd_put_device(device); | 1055 | dasd_put_device(device); |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | void dasd_generic_handle_state_change(struct dasd_device *device) | 1058 | void dasd_generic_handle_state_change(struct dasd_device *device) |
1059 | { | 1059 | { |
1060 | /* First of all start sense subsystem status request. */ | 1060 | /* First of all start sense subsystem status request. */ |
1061 | dasd_eer_snss(device); | 1061 | dasd_eer_snss(device); |
1062 | 1062 | ||
1063 | dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); | 1063 | dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); |
1064 | dasd_schedule_device_bh(device); | 1064 | dasd_schedule_device_bh(device); |
1065 | if (device->block) | 1065 | if (device->block) |
1066 | dasd_schedule_block_bh(device->block); | 1066 | dasd_schedule_block_bh(device->block); |
1067 | } | 1067 | } |
1068 | 1068 | ||
1069 | /* | 1069 | /* |
1070 | * Interrupt handler for "normal" ssch-io based dasd devices. | 1070 | * Interrupt handler for "normal" ssch-io based dasd devices. |
1071 | */ | 1071 | */ |
1072 | void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, | 1072 | void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, |
1073 | struct irb *irb) | 1073 | struct irb *irb) |
1074 | { | 1074 | { |
1075 | struct dasd_ccw_req *cqr, *next; | 1075 | struct dasd_ccw_req *cqr, *next; |
1076 | struct dasd_device *device; | 1076 | struct dasd_device *device; |
1077 | unsigned long long now; | 1077 | unsigned long long now; |
1078 | int expires; | 1078 | int expires; |
1079 | 1079 | ||
1080 | if (IS_ERR(irb)) { | 1080 | if (IS_ERR(irb)) { |
1081 | switch (PTR_ERR(irb)) { | 1081 | switch (PTR_ERR(irb)) { |
1082 | case -EIO: | 1082 | case -EIO: |
1083 | break; | 1083 | break; |
1084 | case -ETIMEDOUT: | 1084 | case -ETIMEDOUT: |
1085 | DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " | 1085 | DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " |
1086 | "request timed out\n", __func__); | 1086 | "request timed out\n", __func__); |
1087 | break; | 1087 | break; |
1088 | default: | 1088 | default: |
1089 | DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " | 1089 | DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " |
1090 | "unknown error %ld\n", __func__, | 1090 | "unknown error %ld\n", __func__, |
1091 | PTR_ERR(irb)); | 1091 | PTR_ERR(irb)); |
1092 | } | 1092 | } |
1093 | dasd_handle_killed_request(cdev, intparm); | 1093 | dasd_handle_killed_request(cdev, intparm); |
1094 | return; | 1094 | return; |
1095 | } | 1095 | } |
1096 | 1096 | ||
1097 | now = get_clock(); | 1097 | now = get_clock(); |
1098 | 1098 | ||
1099 | /* check for unsolicited interrupts */ | 1099 | /* check for unsolicited interrupts */ |
1100 | cqr = (struct dasd_ccw_req *) intparm; | 1100 | cqr = (struct dasd_ccw_req *) intparm; |
1101 | if (!cqr || ((scsw_cc(&irb->scsw) == 1) && | 1101 | if (!cqr || ((scsw_cc(&irb->scsw) == 1) && |
1102 | (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC) && | 1102 | (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC) && |
1103 | (scsw_stctl(&irb->scsw) & SCSW_STCTL_STATUS_PEND))) { | 1103 | (scsw_stctl(&irb->scsw) & SCSW_STCTL_STATUS_PEND))) { |
1104 | if (cqr && cqr->status == DASD_CQR_IN_IO) | 1104 | if (cqr && cqr->status == DASD_CQR_IN_IO) |
1105 | cqr->status = DASD_CQR_QUEUED; | 1105 | cqr->status = DASD_CQR_QUEUED; |
1106 | device = dasd_device_from_cdev_locked(cdev); | 1106 | device = dasd_device_from_cdev_locked(cdev); |
1107 | if (!IS_ERR(device)) { | 1107 | if (!IS_ERR(device)) { |
1108 | dasd_device_clear_timer(device); | 1108 | dasd_device_clear_timer(device); |
1109 | device->discipline->handle_unsolicited_interrupt(device, | 1109 | device->discipline->handle_unsolicited_interrupt(device, |
1110 | irb); | 1110 | irb); |
1111 | dasd_put_device(device); | 1111 | dasd_put_device(device); |
1112 | } | 1112 | } |
1113 | return; | 1113 | return; |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | device = (struct dasd_device *) cqr->startdev; | 1116 | device = (struct dasd_device *) cqr->startdev; |
1117 | if (!device || | 1117 | if (!device || |
1118 | strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) { | 1118 | strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) { |
1119 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", | 1119 | DBF_EVENT_DEVID(DBF_DEBUG, cdev, "%s", |
1120 | "invalid device in request"); | 1120 | "invalid device in request"); |
1121 | return; | 1121 | return; |
1122 | } | 1122 | } |
1123 | 1123 | ||
1124 | /* Check for clear pending */ | 1124 | /* Check for clear pending */ |
1125 | if (cqr->status == DASD_CQR_CLEAR_PENDING && | 1125 | if (cqr->status == DASD_CQR_CLEAR_PENDING && |
1126 | scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) { | 1126 | scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) { |
1127 | cqr->status = DASD_CQR_CLEARED; | 1127 | cqr->status = DASD_CQR_CLEARED; |
1128 | dasd_device_clear_timer(device); | 1128 | dasd_device_clear_timer(device); |
1129 | wake_up(&dasd_flush_wq); | 1129 | wake_up(&dasd_flush_wq); |
1130 | dasd_schedule_device_bh(device); | 1130 | dasd_schedule_device_bh(device); |
1131 | return; | 1131 | return; |
1132 | } | 1132 | } |
1133 | 1133 | ||
1134 | /* check status - the request might have been killed by dyn detach */ | 1134 | /* check status - the request might have been killed by dyn detach */ |
1135 | if (cqr->status != DASD_CQR_IN_IO) { | 1135 | if (cqr->status != DASD_CQR_IN_IO) { |
1136 | DBF_DEV_EVENT(DBF_DEBUG, device, "invalid status: bus_id %s, " | 1136 | DBF_DEV_EVENT(DBF_DEBUG, device, "invalid status: bus_id %s, " |
1137 | "status %02x", dev_name(&cdev->dev), cqr->status); | 1137 | "status %02x", dev_name(&cdev->dev), cqr->status); |
1138 | return; | 1138 | return; |
1139 | } | 1139 | } |
1140 | 1140 | ||
1141 | next = NULL; | 1141 | next = NULL; |
1142 | expires = 0; | 1142 | expires = 0; |
1143 | if (scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) && | 1143 | if (scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) && |
1144 | scsw_cstat(&irb->scsw) == 0) { | 1144 | scsw_cstat(&irb->scsw) == 0) { |
1145 | /* request was completed successfully */ | 1145 | /* request was completed successfully */ |
1146 | cqr->status = DASD_CQR_SUCCESS; | 1146 | cqr->status = DASD_CQR_SUCCESS; |
1147 | cqr->stopclk = now; | 1147 | cqr->stopclk = now; |
1148 | /* Start first request on queue if possible -> fast_io. */ | 1148 | /* Start first request on queue if possible -> fast_io. */ |
1149 | if (cqr->devlist.next != &device->ccw_queue) { | 1149 | if (cqr->devlist.next != &device->ccw_queue) { |
1150 | next = list_entry(cqr->devlist.next, | 1150 | next = list_entry(cqr->devlist.next, |
1151 | struct dasd_ccw_req, devlist); | 1151 | struct dasd_ccw_req, devlist); |
1152 | } | 1152 | } |
1153 | } else { /* error */ | 1153 | } else { /* error */ |
1154 | memcpy(&cqr->irb, irb, sizeof(struct irb)); | 1154 | memcpy(&cqr->irb, irb, sizeof(struct irb)); |
1155 | /* log sense for every failed I/O to s390 debugfeature */ | 1155 | /* log sense for every failed I/O to s390 debugfeature */ |
1156 | dasd_log_sense_dbf(cqr, irb); | 1156 | dasd_log_sense_dbf(cqr, irb); |
1157 | if (device->features & DASD_FEATURE_ERPLOG) { | 1157 | if (device->features & DASD_FEATURE_ERPLOG) { |
1158 | dasd_log_sense(cqr, irb); | 1158 | dasd_log_sense(cqr, irb); |
1159 | } | 1159 | } |
1160 | 1160 | ||
1161 | /* | 1161 | /* |
1162 | * If we don't want complex ERP for this request, then just | 1162 | * If we don't want complex ERP for this request, then just |
1163 | * reset this and retry it in the fastpath | 1163 | * reset this and retry it in the fastpath |
1164 | */ | 1164 | */ |
1165 | if (!test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags) && | 1165 | if (!test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags) && |
1166 | cqr->retries > 0) { | 1166 | cqr->retries > 0) { |
1167 | if (cqr->lpm == LPM_ANYPATH) | 1167 | if (cqr->lpm == LPM_ANYPATH) |
1168 | DBF_DEV_EVENT(DBF_DEBUG, device, | 1168 | DBF_DEV_EVENT(DBF_DEBUG, device, |
1169 | "default ERP in fastpath " | 1169 | "default ERP in fastpath " |
1170 | "(%i retries left)", | 1170 | "(%i retries left)", |
1171 | cqr->retries); | 1171 | cqr->retries); |
1172 | cqr->lpm = LPM_ANYPATH; | 1172 | cqr->lpm = LPM_ANYPATH; |
1173 | cqr->status = DASD_CQR_QUEUED; | 1173 | cqr->status = DASD_CQR_QUEUED; |
1174 | next = cqr; | 1174 | next = cqr; |
1175 | } else | 1175 | } else |
1176 | cqr->status = DASD_CQR_ERROR; | 1176 | cqr->status = DASD_CQR_ERROR; |
1177 | } | 1177 | } |
1178 | if (next && (next->status == DASD_CQR_QUEUED) && | 1178 | if (next && (next->status == DASD_CQR_QUEUED) && |
1179 | (!device->stopped)) { | 1179 | (!device->stopped)) { |
1180 | if (device->discipline->start_IO(next) == 0) | 1180 | if (device->discipline->start_IO(next) == 0) |
1181 | expires = next->expires; | 1181 | expires = next->expires; |
1182 | } | 1182 | } |
1183 | if (expires != 0) | 1183 | if (expires != 0) |
1184 | dasd_device_set_timer(device, expires); | 1184 | dasd_device_set_timer(device, expires); |
1185 | else | 1185 | else |
1186 | dasd_device_clear_timer(device); | 1186 | dasd_device_clear_timer(device); |
1187 | dasd_schedule_device_bh(device); | 1187 | dasd_schedule_device_bh(device); |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | enum uc_todo dasd_generic_uc_handler(struct ccw_device *cdev, struct irb *irb) | 1190 | enum uc_todo dasd_generic_uc_handler(struct ccw_device *cdev, struct irb *irb) |
1191 | { | 1191 | { |
1192 | struct dasd_device *device; | 1192 | struct dasd_device *device; |
1193 | 1193 | ||
1194 | device = dasd_device_from_cdev_locked(cdev); | 1194 | device = dasd_device_from_cdev_locked(cdev); |
1195 | 1195 | ||
1196 | if (IS_ERR(device)) | 1196 | if (IS_ERR(device)) |
1197 | goto out; | 1197 | goto out; |
1198 | if (test_bit(DASD_FLAG_OFFLINE, &device->flags) || | 1198 | if (test_bit(DASD_FLAG_OFFLINE, &device->flags) || |
1199 | device->state != device->target || | 1199 | device->state != device->target || |
1200 | !device->discipline->handle_unsolicited_interrupt){ | 1200 | !device->discipline->handle_unsolicited_interrupt){ |
1201 | dasd_put_device(device); | 1201 | dasd_put_device(device); |
1202 | goto out; | 1202 | goto out; |
1203 | } | 1203 | } |
1204 | 1204 | ||
1205 | dasd_device_clear_timer(device); | 1205 | dasd_device_clear_timer(device); |
1206 | device->discipline->handle_unsolicited_interrupt(device, irb); | 1206 | device->discipline->handle_unsolicited_interrupt(device, irb); |
1207 | dasd_put_device(device); | 1207 | dasd_put_device(device); |
1208 | out: | 1208 | out: |
1209 | return UC_TODO_RETRY; | 1209 | return UC_TODO_RETRY; |
1210 | } | 1210 | } |
1211 | EXPORT_SYMBOL_GPL(dasd_generic_uc_handler); | 1211 | EXPORT_SYMBOL_GPL(dasd_generic_uc_handler); |
1212 | 1212 | ||
1213 | /* | 1213 | /* |
1214 | * If we have an error on a dasd_block layer request then we cancel | 1214 | * If we have an error on a dasd_block layer request then we cancel |
1215 | * and return all further requests from the same dasd_block as well. | 1215 | * and return all further requests from the same dasd_block as well. |
1216 | */ | 1216 | */ |
1217 | static void __dasd_device_recovery(struct dasd_device *device, | 1217 | static void __dasd_device_recovery(struct dasd_device *device, |
1218 | struct dasd_ccw_req *ref_cqr) | 1218 | struct dasd_ccw_req *ref_cqr) |
1219 | { | 1219 | { |
1220 | struct list_head *l, *n; | 1220 | struct list_head *l, *n; |
1221 | struct dasd_ccw_req *cqr; | 1221 | struct dasd_ccw_req *cqr; |
1222 | 1222 | ||
1223 | /* | 1223 | /* |
1224 | * only requeue request that came from the dasd_block layer | 1224 | * only requeue request that came from the dasd_block layer |
1225 | */ | 1225 | */ |
1226 | if (!ref_cqr->block) | 1226 | if (!ref_cqr->block) |
1227 | return; | 1227 | return; |
1228 | 1228 | ||
1229 | list_for_each_safe(l, n, &device->ccw_queue) { | 1229 | list_for_each_safe(l, n, &device->ccw_queue) { |
1230 | cqr = list_entry(l, struct dasd_ccw_req, devlist); | 1230 | cqr = list_entry(l, struct dasd_ccw_req, devlist); |
1231 | if (cqr->status == DASD_CQR_QUEUED && | 1231 | if (cqr->status == DASD_CQR_QUEUED && |
1232 | ref_cqr->block == cqr->block) { | 1232 | ref_cqr->block == cqr->block) { |
1233 | cqr->status = DASD_CQR_CLEARED; | 1233 | cqr->status = DASD_CQR_CLEARED; |
1234 | } | 1234 | } |
1235 | } | 1235 | } |
1236 | }; | 1236 | }; |
1237 | 1237 | ||
1238 | /* | 1238 | /* |
1239 | * Remove those ccw requests from the queue that need to be returned | 1239 | * Remove those ccw requests from the queue that need to be returned |
1240 | * to the upper layer. | 1240 | * to the upper layer. |
1241 | */ | 1241 | */ |
1242 | static void __dasd_device_process_ccw_queue(struct dasd_device *device, | 1242 | static void __dasd_device_process_ccw_queue(struct dasd_device *device, |
1243 | struct list_head *final_queue) | 1243 | struct list_head *final_queue) |
1244 | { | 1244 | { |
1245 | struct list_head *l, *n; | 1245 | struct list_head *l, *n; |
1246 | struct dasd_ccw_req *cqr; | 1246 | struct dasd_ccw_req *cqr; |
1247 | 1247 | ||
1248 | /* Process request with final status. */ | 1248 | /* Process request with final status. */ |
1249 | list_for_each_safe(l, n, &device->ccw_queue) { | 1249 | list_for_each_safe(l, n, &device->ccw_queue) { |
1250 | cqr = list_entry(l, struct dasd_ccw_req, devlist); | 1250 | cqr = list_entry(l, struct dasd_ccw_req, devlist); |
1251 | 1251 | ||
1252 | /* Stop list processing at the first non-final request. */ | 1252 | /* Stop list processing at the first non-final request. */ |
1253 | if (cqr->status == DASD_CQR_QUEUED || | 1253 | if (cqr->status == DASD_CQR_QUEUED || |
1254 | cqr->status == DASD_CQR_IN_IO || | 1254 | cqr->status == DASD_CQR_IN_IO || |
1255 | cqr->status == DASD_CQR_CLEAR_PENDING) | 1255 | cqr->status == DASD_CQR_CLEAR_PENDING) |
1256 | break; | 1256 | break; |
1257 | if (cqr->status == DASD_CQR_ERROR) { | 1257 | if (cqr->status == DASD_CQR_ERROR) { |
1258 | __dasd_device_recovery(device, cqr); | 1258 | __dasd_device_recovery(device, cqr); |
1259 | } | 1259 | } |
1260 | /* Rechain finished requests to final queue */ | 1260 | /* Rechain finished requests to final queue */ |
1261 | list_move_tail(&cqr->devlist, final_queue); | 1261 | list_move_tail(&cqr->devlist, final_queue); |
1262 | } | 1262 | } |
1263 | } | 1263 | } |
1264 | 1264 | ||
1265 | /* | 1265 | /* |
1266 | * the cqrs from the final queue are returned to the upper layer | 1266 | * the cqrs from the final queue are returned to the upper layer |
1267 | * by setting a dasd_block state and calling the callback function | 1267 | * by setting a dasd_block state and calling the callback function |
1268 | */ | 1268 | */ |
1269 | static void __dasd_device_process_final_queue(struct dasd_device *device, | 1269 | static void __dasd_device_process_final_queue(struct dasd_device *device, |
1270 | struct list_head *final_queue) | 1270 | struct list_head *final_queue) |
1271 | { | 1271 | { |
1272 | struct list_head *l, *n; | 1272 | struct list_head *l, *n; |
1273 | struct dasd_ccw_req *cqr; | 1273 | struct dasd_ccw_req *cqr; |
1274 | struct dasd_block *block; | 1274 | struct dasd_block *block; |
1275 | void (*callback)(struct dasd_ccw_req *, void *data); | 1275 | void (*callback)(struct dasd_ccw_req *, void *data); |
1276 | void *callback_data; | 1276 | void *callback_data; |
1277 | char errorstring[ERRORLENGTH]; | 1277 | char errorstring[ERRORLENGTH]; |
1278 | 1278 | ||
1279 | list_for_each_safe(l, n, final_queue) { | 1279 | list_for_each_safe(l, n, final_queue) { |
1280 | cqr = list_entry(l, struct dasd_ccw_req, devlist); | 1280 | cqr = list_entry(l, struct dasd_ccw_req, devlist); |
1281 | list_del_init(&cqr->devlist); | 1281 | list_del_init(&cqr->devlist); |
1282 | block = cqr->block; | 1282 | block = cqr->block; |
1283 | callback = cqr->callback; | 1283 | callback = cqr->callback; |
1284 | callback_data = cqr->callback_data; | 1284 | callback_data = cqr->callback_data; |
1285 | if (block) | 1285 | if (block) |
1286 | spin_lock_bh(&block->queue_lock); | 1286 | spin_lock_bh(&block->queue_lock); |
1287 | switch (cqr->status) { | 1287 | switch (cqr->status) { |
1288 | case DASD_CQR_SUCCESS: | 1288 | case DASD_CQR_SUCCESS: |
1289 | cqr->status = DASD_CQR_DONE; | 1289 | cqr->status = DASD_CQR_DONE; |
1290 | break; | 1290 | break; |
1291 | case DASD_CQR_ERROR: | 1291 | case DASD_CQR_ERROR: |
1292 | cqr->status = DASD_CQR_NEED_ERP; | 1292 | cqr->status = DASD_CQR_NEED_ERP; |
1293 | break; | 1293 | break; |
1294 | case DASD_CQR_CLEARED: | 1294 | case DASD_CQR_CLEARED: |
1295 | cqr->status = DASD_CQR_TERMINATED; | 1295 | cqr->status = DASD_CQR_TERMINATED; |
1296 | break; | 1296 | break; |
1297 | default: | 1297 | default: |
1298 | /* internal error 12 - wrong cqr status*/ | 1298 | /* internal error 12 - wrong cqr status*/ |
1299 | snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status); | 1299 | snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status); |
1300 | dev_err(&device->cdev->dev, | 1300 | dev_err(&device->cdev->dev, |
1301 | "An error occurred in the DASD device driver, " | 1301 | "An error occurred in the DASD device driver, " |
1302 | "reason=%s\n", errorstring); | 1302 | "reason=%s\n", errorstring); |
1303 | BUG(); | 1303 | BUG(); |
1304 | } | 1304 | } |
1305 | if (cqr->callback != NULL) | 1305 | if (cqr->callback != NULL) |
1306 | (callback)(cqr, callback_data); | 1306 | (callback)(cqr, callback_data); |
1307 | if (block) | 1307 | if (block) |
1308 | spin_unlock_bh(&block->queue_lock); | 1308 | spin_unlock_bh(&block->queue_lock); |
1309 | } | 1309 | } |
1310 | } | 1310 | } |
1311 | 1311 | ||
1312 | /* | 1312 | /* |
1313 | * Take a look at the first request on the ccw queue and check | 1313 | * Take a look at the first request on the ccw queue and check |
1314 | * if it reached its expire time. If so, terminate the IO. | 1314 | * if it reached its expire time. If so, terminate the IO. |
1315 | */ | 1315 | */ |
1316 | static void __dasd_device_check_expire(struct dasd_device *device) | 1316 | static void __dasd_device_check_expire(struct dasd_device *device) |
1317 | { | 1317 | { |
1318 | struct dasd_ccw_req *cqr; | 1318 | struct dasd_ccw_req *cqr; |
1319 | 1319 | ||
1320 | if (list_empty(&device->ccw_queue)) | 1320 | if (list_empty(&device->ccw_queue)) |
1321 | return; | 1321 | return; |
1322 | cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); | 1322 | cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); |
1323 | if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) && | 1323 | if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) && |
1324 | (time_after_eq(jiffies, cqr->expires + cqr->starttime))) { | 1324 | (time_after_eq(jiffies, cqr->expires + cqr->starttime))) { |
1325 | if (device->discipline->term_IO(cqr) != 0) { | 1325 | if (device->discipline->term_IO(cqr) != 0) { |
1326 | /* Hmpf, try again in 5 sec */ | 1326 | /* Hmpf, try again in 5 sec */ |
1327 | dev_err(&device->cdev->dev, | 1327 | dev_err(&device->cdev->dev, |
1328 | "cqr %p timed out (%lus) but cannot be " | 1328 | "cqr %p timed out (%lus) but cannot be " |
1329 | "ended, retrying in 5 s\n", | 1329 | "ended, retrying in 5 s\n", |
1330 | cqr, (cqr->expires/HZ)); | 1330 | cqr, (cqr->expires/HZ)); |
1331 | cqr->expires += 5*HZ; | 1331 | cqr->expires += 5*HZ; |
1332 | dasd_device_set_timer(device, 5*HZ); | 1332 | dasd_device_set_timer(device, 5*HZ); |
1333 | } else { | 1333 | } else { |
1334 | dev_err(&device->cdev->dev, | 1334 | dev_err(&device->cdev->dev, |
1335 | "cqr %p timed out (%lus), %i retries " | 1335 | "cqr %p timed out (%lus), %i retries " |
1336 | "remaining\n", cqr, (cqr->expires/HZ), | 1336 | "remaining\n", cqr, (cqr->expires/HZ), |
1337 | cqr->retries); | 1337 | cqr->retries); |
1338 | } | 1338 | } |
1339 | } | 1339 | } |
1340 | } | 1340 | } |
1341 | 1341 | ||
1342 | /* | 1342 | /* |
1343 | * Take a look at the first request on the ccw queue and check | 1343 | * Take a look at the first request on the ccw queue and check |
1344 | * if it needs to be started. | 1344 | * if it needs to be started. |
1345 | */ | 1345 | */ |
1346 | static void __dasd_device_start_head(struct dasd_device *device) | 1346 | static void __dasd_device_start_head(struct dasd_device *device) |
1347 | { | 1347 | { |
1348 | struct dasd_ccw_req *cqr; | 1348 | struct dasd_ccw_req *cqr; |
1349 | int rc; | 1349 | int rc; |
1350 | 1350 | ||
1351 | if (list_empty(&device->ccw_queue)) | 1351 | if (list_empty(&device->ccw_queue)) |
1352 | return; | 1352 | return; |
1353 | cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); | 1353 | cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); |
1354 | if (cqr->status != DASD_CQR_QUEUED) | 1354 | if (cqr->status != DASD_CQR_QUEUED) |
1355 | return; | 1355 | return; |
1356 | /* when device is stopped, return request to previous layer */ | 1356 | /* when device is stopped, return request to previous layer */ |
1357 | if (device->stopped) { | 1357 | if (device->stopped) { |
1358 | cqr->status = DASD_CQR_CLEARED; | 1358 | cqr->status = DASD_CQR_CLEARED; |
1359 | dasd_schedule_device_bh(device); | 1359 | dasd_schedule_device_bh(device); |
1360 | return; | 1360 | return; |
1361 | } | 1361 | } |
1362 | 1362 | ||
1363 | rc = device->discipline->start_IO(cqr); | 1363 | rc = device->discipline->start_IO(cqr); |
1364 | if (rc == 0) | 1364 | if (rc == 0) |
1365 | dasd_device_set_timer(device, cqr->expires); | 1365 | dasd_device_set_timer(device, cqr->expires); |
1366 | else if (rc == -EACCES) { | 1366 | else if (rc == -EACCES) { |
1367 | dasd_schedule_device_bh(device); | 1367 | dasd_schedule_device_bh(device); |
1368 | } else | 1368 | } else |
1369 | /* Hmpf, try again in 1/2 sec */ | 1369 | /* Hmpf, try again in 1/2 sec */ |
1370 | dasd_device_set_timer(device, 50); | 1370 | dasd_device_set_timer(device, 50); |
1371 | } | 1371 | } |
1372 | 1372 | ||
1373 | /* | 1373 | /* |
1374 | * Go through all request on the dasd_device request queue, | 1374 | * Go through all request on the dasd_device request queue, |
1375 | * terminate them on the cdev if necessary, and return them to the | 1375 | * terminate them on the cdev if necessary, and return them to the |
1376 | * submitting layer via callback. | 1376 | * submitting layer via callback. |
1377 | * Note: | 1377 | * Note: |
1378 | * Make sure that all 'submitting layers' still exist when | 1378 | * Make sure that all 'submitting layers' still exist when |
1379 | * this function is called!. In other words, when 'device' is a base | 1379 | * this function is called!. In other words, when 'device' is a base |
1380 | * device then all block layer requests must have been removed before | 1380 | * device then all block layer requests must have been removed before |
1381 | * via dasd_flush_block_queue. | 1381 | * via dasd_flush_block_queue. |
1382 | */ | 1382 | */ |
1383 | int dasd_flush_device_queue(struct dasd_device *device) | 1383 | int dasd_flush_device_queue(struct dasd_device *device) |
1384 | { | 1384 | { |
1385 | struct dasd_ccw_req *cqr, *n; | 1385 | struct dasd_ccw_req *cqr, *n; |
1386 | int rc; | 1386 | int rc; |
1387 | struct list_head flush_queue; | 1387 | struct list_head flush_queue; |
1388 | 1388 | ||
1389 | INIT_LIST_HEAD(&flush_queue); | 1389 | INIT_LIST_HEAD(&flush_queue); |
1390 | spin_lock_irq(get_ccwdev_lock(device->cdev)); | 1390 | spin_lock_irq(get_ccwdev_lock(device->cdev)); |
1391 | rc = 0; | 1391 | rc = 0; |
1392 | list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) { | 1392 | list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) { |
1393 | /* Check status and move request to flush_queue */ | 1393 | /* Check status and move request to flush_queue */ |
1394 | switch (cqr->status) { | 1394 | switch (cqr->status) { |
1395 | case DASD_CQR_IN_IO: | 1395 | case DASD_CQR_IN_IO: |
1396 | rc = device->discipline->term_IO(cqr); | 1396 | rc = device->discipline->term_IO(cqr); |
1397 | if (rc) { | 1397 | if (rc) { |
1398 | /* unable to terminate requeust */ | 1398 | /* unable to terminate requeust */ |
1399 | dev_err(&device->cdev->dev, | 1399 | dev_err(&device->cdev->dev, |
1400 | "Flushing the DASD request queue " | 1400 | "Flushing the DASD request queue " |
1401 | "failed for request %p\n", cqr); | 1401 | "failed for request %p\n", cqr); |
1402 | /* stop flush processing */ | 1402 | /* stop flush processing */ |
1403 | goto finished; | 1403 | goto finished; |
1404 | } | 1404 | } |
1405 | break; | 1405 | break; |
1406 | case DASD_CQR_QUEUED: | 1406 | case DASD_CQR_QUEUED: |
1407 | cqr->stopclk = get_clock(); | 1407 | cqr->stopclk = get_clock(); |
1408 | cqr->status = DASD_CQR_CLEARED; | 1408 | cqr->status = DASD_CQR_CLEARED; |
1409 | break; | 1409 | break; |
1410 | default: /* no need to modify the others */ | 1410 | default: /* no need to modify the others */ |
1411 | break; | 1411 | break; |
1412 | } | 1412 | } |
1413 | list_move_tail(&cqr->devlist, &flush_queue); | 1413 | list_move_tail(&cqr->devlist, &flush_queue); |
1414 | } | 1414 | } |
1415 | finished: | 1415 | finished: |
1416 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); | 1416 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); |
1417 | /* | 1417 | /* |
1418 | * After this point all requests must be in state CLEAR_PENDING, | 1418 | * After this point all requests must be in state CLEAR_PENDING, |
1419 | * CLEARED, SUCCESS or ERROR. Now wait for CLEAR_PENDING to become | 1419 | * CLEARED, SUCCESS or ERROR. Now wait for CLEAR_PENDING to become |
1420 | * one of the others. | 1420 | * one of the others. |
1421 | */ | 1421 | */ |
1422 | list_for_each_entry_safe(cqr, n, &flush_queue, devlist) | 1422 | list_for_each_entry_safe(cqr, n, &flush_queue, devlist) |
1423 | wait_event(dasd_flush_wq, | 1423 | wait_event(dasd_flush_wq, |
1424 | (cqr->status != DASD_CQR_CLEAR_PENDING)); | 1424 | (cqr->status != DASD_CQR_CLEAR_PENDING)); |
1425 | /* | 1425 | /* |
1426 | * Now set each request back to TERMINATED, DONE or NEED_ERP | 1426 | * Now set each request back to TERMINATED, DONE or NEED_ERP |
1427 | * and call the callback function of flushed requests | 1427 | * and call the callback function of flushed requests |
1428 | */ | 1428 | */ |
1429 | __dasd_device_process_final_queue(device, &flush_queue); | 1429 | __dasd_device_process_final_queue(device, &flush_queue); |
1430 | return rc; | 1430 | return rc; |
1431 | } | 1431 | } |
1432 | 1432 | ||
1433 | /* | 1433 | /* |
1434 | * Acquire the device lock and process queues for the device. | 1434 | * Acquire the device lock and process queues for the device. |
1435 | */ | 1435 | */ |
1436 | static void dasd_device_tasklet(struct dasd_device *device) | 1436 | static void dasd_device_tasklet(struct dasd_device *device) |
1437 | { | 1437 | { |
1438 | struct list_head final_queue; | 1438 | struct list_head final_queue; |
1439 | 1439 | ||
1440 | atomic_set (&device->tasklet_scheduled, 0); | 1440 | atomic_set (&device->tasklet_scheduled, 0); |
1441 | INIT_LIST_HEAD(&final_queue); | 1441 | INIT_LIST_HEAD(&final_queue); |
1442 | spin_lock_irq(get_ccwdev_lock(device->cdev)); | 1442 | spin_lock_irq(get_ccwdev_lock(device->cdev)); |
1443 | /* Check expire time of first request on the ccw queue. */ | 1443 | /* Check expire time of first request on the ccw queue. */ |
1444 | __dasd_device_check_expire(device); | 1444 | __dasd_device_check_expire(device); |
1445 | /* find final requests on ccw queue */ | 1445 | /* find final requests on ccw queue */ |
1446 | __dasd_device_process_ccw_queue(device, &final_queue); | 1446 | __dasd_device_process_ccw_queue(device, &final_queue); |
1447 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); | 1447 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); |
1448 | /* Now call the callback function of requests with final status */ | 1448 | /* Now call the callback function of requests with final status */ |
1449 | __dasd_device_process_final_queue(device, &final_queue); | 1449 | __dasd_device_process_final_queue(device, &final_queue); |
1450 | spin_lock_irq(get_ccwdev_lock(device->cdev)); | 1450 | spin_lock_irq(get_ccwdev_lock(device->cdev)); |
1451 | /* Now check if the head of the ccw queue needs to be started. */ | 1451 | /* Now check if the head of the ccw queue needs to be started. */ |
1452 | __dasd_device_start_head(device); | 1452 | __dasd_device_start_head(device); |
1453 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); | 1453 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); |
1454 | dasd_put_device(device); | 1454 | dasd_put_device(device); |
1455 | } | 1455 | } |
1456 | 1456 | ||
1457 | /* | 1457 | /* |
1458 | * Schedules a call to dasd_tasklet over the device tasklet. | 1458 | * Schedules a call to dasd_tasklet over the device tasklet. |
1459 | */ | 1459 | */ |
1460 | void dasd_schedule_device_bh(struct dasd_device *device) | 1460 | void dasd_schedule_device_bh(struct dasd_device *device) |
1461 | { | 1461 | { |
1462 | /* Protect against rescheduling. */ | 1462 | /* Protect against rescheduling. */ |
1463 | if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0) | 1463 | if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0) |
1464 | return; | 1464 | return; |
1465 | dasd_get_device(device); | 1465 | dasd_get_device(device); |
1466 | tasklet_hi_schedule(&device->tasklet); | 1466 | tasklet_hi_schedule(&device->tasklet); |
1467 | } | 1467 | } |
1468 | 1468 | ||
1469 | void dasd_device_set_stop_bits(struct dasd_device *device, int bits) | 1469 | void dasd_device_set_stop_bits(struct dasd_device *device, int bits) |
1470 | { | 1470 | { |
1471 | device->stopped |= bits; | 1471 | device->stopped |= bits; |
1472 | } | 1472 | } |
1473 | EXPORT_SYMBOL_GPL(dasd_device_set_stop_bits); | 1473 | EXPORT_SYMBOL_GPL(dasd_device_set_stop_bits); |
1474 | 1474 | ||
1475 | void dasd_device_remove_stop_bits(struct dasd_device *device, int bits) | 1475 | void dasd_device_remove_stop_bits(struct dasd_device *device, int bits) |
1476 | { | 1476 | { |
1477 | device->stopped &= ~bits; | 1477 | device->stopped &= ~bits; |
1478 | if (!device->stopped) | 1478 | if (!device->stopped) |
1479 | wake_up(&generic_waitq); | 1479 | wake_up(&generic_waitq); |
1480 | } | 1480 | } |
1481 | EXPORT_SYMBOL_GPL(dasd_device_remove_stop_bits); | 1481 | EXPORT_SYMBOL_GPL(dasd_device_remove_stop_bits); |
1482 | 1482 | ||
1483 | /* | 1483 | /* |
1484 | * Queue a request to the head of the device ccw_queue. | 1484 | * Queue a request to the head of the device ccw_queue. |
1485 | * Start the I/O if possible. | 1485 | * Start the I/O if possible. |
1486 | */ | 1486 | */ |
1487 | void dasd_add_request_head(struct dasd_ccw_req *cqr) | 1487 | void dasd_add_request_head(struct dasd_ccw_req *cqr) |
1488 | { | 1488 | { |
1489 | struct dasd_device *device; | 1489 | struct dasd_device *device; |
1490 | unsigned long flags; | 1490 | unsigned long flags; |
1491 | 1491 | ||
1492 | device = cqr->startdev; | 1492 | device = cqr->startdev; |
1493 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); | 1493 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); |
1494 | cqr->status = DASD_CQR_QUEUED; | 1494 | cqr->status = DASD_CQR_QUEUED; |
1495 | list_add(&cqr->devlist, &device->ccw_queue); | 1495 | list_add(&cqr->devlist, &device->ccw_queue); |
1496 | /* let the bh start the request to keep them in order */ | 1496 | /* let the bh start the request to keep them in order */ |
1497 | dasd_schedule_device_bh(device); | 1497 | dasd_schedule_device_bh(device); |
1498 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); | 1498 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); |
1499 | } | 1499 | } |
1500 | 1500 | ||
1501 | /* | 1501 | /* |
1502 | * Queue a request to the tail of the device ccw_queue. | 1502 | * Queue a request to the tail of the device ccw_queue. |
1503 | * Start the I/O if possible. | 1503 | * Start the I/O if possible. |
1504 | */ | 1504 | */ |
1505 | void dasd_add_request_tail(struct dasd_ccw_req *cqr) | 1505 | void dasd_add_request_tail(struct dasd_ccw_req *cqr) |
1506 | { | 1506 | { |
1507 | struct dasd_device *device; | 1507 | struct dasd_device *device; |
1508 | unsigned long flags; | 1508 | unsigned long flags; |
1509 | 1509 | ||
1510 | device = cqr->startdev; | 1510 | device = cqr->startdev; |
1511 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); | 1511 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); |
1512 | cqr->status = DASD_CQR_QUEUED; | 1512 | cqr->status = DASD_CQR_QUEUED; |
1513 | list_add_tail(&cqr->devlist, &device->ccw_queue); | 1513 | list_add_tail(&cqr->devlist, &device->ccw_queue); |
1514 | /* let the bh start the request to keep them in order */ | 1514 | /* let the bh start the request to keep them in order */ |
1515 | dasd_schedule_device_bh(device); | 1515 | dasd_schedule_device_bh(device); |
1516 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); | 1516 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); |
1517 | } | 1517 | } |
1518 | 1518 | ||
1519 | /* | 1519 | /* |
1520 | * Wakeup helper for the 'sleep_on' functions. | 1520 | * Wakeup helper for the 'sleep_on' functions. |
1521 | */ | 1521 | */ |
1522 | static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) | 1522 | static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) |
1523 | { | 1523 | { |
1524 | spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev)); | 1524 | spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev)); |
1525 | cqr->callback_data = DASD_SLEEPON_END_TAG; | 1525 | cqr->callback_data = DASD_SLEEPON_END_TAG; |
1526 | spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev)); | 1526 | spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev)); |
1527 | wake_up(&generic_waitq); | 1527 | wake_up(&generic_waitq); |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) | 1530 | static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) |
1531 | { | 1531 | { |
1532 | struct dasd_device *device; | 1532 | struct dasd_device *device; |
1533 | int rc; | 1533 | int rc; |
1534 | 1534 | ||
1535 | device = cqr->startdev; | 1535 | device = cqr->startdev; |
1536 | spin_lock_irq(get_ccwdev_lock(device->cdev)); | 1536 | spin_lock_irq(get_ccwdev_lock(device->cdev)); |
1537 | rc = (cqr->callback_data == DASD_SLEEPON_END_TAG); | 1537 | rc = (cqr->callback_data == DASD_SLEEPON_END_TAG); |
1538 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); | 1538 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); |
1539 | return rc; | 1539 | return rc; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | /* | 1542 | /* |
1543 | * checks if error recovery is necessary, returns 1 if yes, 0 otherwise. | 1543 | * checks if error recovery is necessary, returns 1 if yes, 0 otherwise. |
1544 | */ | 1544 | */ |
1545 | static int __dasd_sleep_on_erp(struct dasd_ccw_req *cqr) | 1545 | static int __dasd_sleep_on_erp(struct dasd_ccw_req *cqr) |
1546 | { | 1546 | { |
1547 | struct dasd_device *device; | 1547 | struct dasd_device *device; |
1548 | dasd_erp_fn_t erp_fn; | 1548 | dasd_erp_fn_t erp_fn; |
1549 | 1549 | ||
1550 | if (cqr->status == DASD_CQR_FILLED) | 1550 | if (cqr->status == DASD_CQR_FILLED) |
1551 | return 0; | 1551 | return 0; |
1552 | device = cqr->startdev; | 1552 | device = cqr->startdev; |
1553 | if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { | 1553 | if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { |
1554 | if (cqr->status == DASD_CQR_TERMINATED) { | 1554 | if (cqr->status == DASD_CQR_TERMINATED) { |
1555 | device->discipline->handle_terminated_request(cqr); | 1555 | device->discipline->handle_terminated_request(cqr); |
1556 | return 1; | 1556 | return 1; |
1557 | } | 1557 | } |
1558 | if (cqr->status == DASD_CQR_NEED_ERP) { | 1558 | if (cqr->status == DASD_CQR_NEED_ERP) { |
1559 | erp_fn = device->discipline->erp_action(cqr); | 1559 | erp_fn = device->discipline->erp_action(cqr); |
1560 | erp_fn(cqr); | 1560 | erp_fn(cqr); |
1561 | return 1; | 1561 | return 1; |
1562 | } | 1562 | } |
1563 | if (cqr->status == DASD_CQR_FAILED) | 1563 | if (cqr->status == DASD_CQR_FAILED) |
1564 | dasd_log_sense(cqr, &cqr->irb); | 1564 | dasd_log_sense(cqr, &cqr->irb); |
1565 | if (cqr->refers) { | 1565 | if (cqr->refers) { |
1566 | __dasd_process_erp(device, cqr); | 1566 | __dasd_process_erp(device, cqr); |
1567 | return 1; | 1567 | return 1; |
1568 | } | 1568 | } |
1569 | } | 1569 | } |
1570 | return 0; | 1570 | return 0; |
1571 | } | 1571 | } |
1572 | 1572 | ||
1573 | static int __dasd_sleep_on_loop_condition(struct dasd_ccw_req *cqr) | 1573 | static int __dasd_sleep_on_loop_condition(struct dasd_ccw_req *cqr) |
1574 | { | 1574 | { |
1575 | if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { | 1575 | if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { |
1576 | if (cqr->refers) /* erp is not done yet */ | 1576 | if (cqr->refers) /* erp is not done yet */ |
1577 | return 1; | 1577 | return 1; |
1578 | return ((cqr->status != DASD_CQR_DONE) && | 1578 | return ((cqr->status != DASD_CQR_DONE) && |
1579 | (cqr->status != DASD_CQR_FAILED)); | 1579 | (cqr->status != DASD_CQR_FAILED)); |
1580 | } else | 1580 | } else |
1581 | return (cqr->status == DASD_CQR_FILLED); | 1581 | return (cqr->status == DASD_CQR_FILLED); |
1582 | } | 1582 | } |
1583 | 1583 | ||
1584 | static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) | 1584 | static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) |
1585 | { | 1585 | { |
1586 | struct dasd_device *device; | 1586 | struct dasd_device *device; |
1587 | int rc; | 1587 | int rc; |
1588 | struct list_head ccw_queue; | 1588 | struct list_head ccw_queue; |
1589 | struct dasd_ccw_req *cqr; | 1589 | struct dasd_ccw_req *cqr; |
1590 | 1590 | ||
1591 | INIT_LIST_HEAD(&ccw_queue); | 1591 | INIT_LIST_HEAD(&ccw_queue); |
1592 | maincqr->status = DASD_CQR_FILLED; | 1592 | maincqr->status = DASD_CQR_FILLED; |
1593 | device = maincqr->startdev; | 1593 | device = maincqr->startdev; |
1594 | list_add(&maincqr->blocklist, &ccw_queue); | 1594 | list_add(&maincqr->blocklist, &ccw_queue); |
1595 | for (cqr = maincqr; __dasd_sleep_on_loop_condition(cqr); | 1595 | for (cqr = maincqr; __dasd_sleep_on_loop_condition(cqr); |
1596 | cqr = list_first_entry(&ccw_queue, | 1596 | cqr = list_first_entry(&ccw_queue, |
1597 | struct dasd_ccw_req, blocklist)) { | 1597 | struct dasd_ccw_req, blocklist)) { |
1598 | 1598 | ||
1599 | if (__dasd_sleep_on_erp(cqr)) | 1599 | if (__dasd_sleep_on_erp(cqr)) |
1600 | continue; | 1600 | continue; |
1601 | if (cqr->status != DASD_CQR_FILLED) /* could be failed */ | 1601 | if (cqr->status != DASD_CQR_FILLED) /* could be failed */ |
1602 | continue; | 1602 | continue; |
1603 | 1603 | ||
1604 | /* Non-temporary stop condition will trigger fail fast */ | 1604 | /* Non-temporary stop condition will trigger fail fast */ |
1605 | if (device->stopped & ~DASD_STOPPED_PENDING && | 1605 | if (device->stopped & ~DASD_STOPPED_PENDING && |
1606 | test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && | 1606 | test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && |
1607 | (!dasd_eer_enabled(device))) { | 1607 | (!dasd_eer_enabled(device))) { |
1608 | cqr->status = DASD_CQR_FAILED; | 1608 | cqr->status = DASD_CQR_FAILED; |
1609 | continue; | 1609 | continue; |
1610 | } | 1610 | } |
1611 | 1611 | ||
1612 | /* Don't try to start requests if device is stopped */ | 1612 | /* Don't try to start requests if device is stopped */ |
1613 | if (interruptible) { | 1613 | if (interruptible) { |
1614 | rc = wait_event_interruptible( | 1614 | rc = wait_event_interruptible( |
1615 | generic_waitq, !(device->stopped)); | 1615 | generic_waitq, !(device->stopped)); |
1616 | if (rc == -ERESTARTSYS) { | 1616 | if (rc == -ERESTARTSYS) { |
1617 | cqr->status = DASD_CQR_FAILED; | 1617 | cqr->status = DASD_CQR_FAILED; |
1618 | maincqr->intrc = rc; | 1618 | maincqr->intrc = rc; |
1619 | continue; | 1619 | continue; |
1620 | } | 1620 | } |
1621 | } else | 1621 | } else |
1622 | wait_event(generic_waitq, !(device->stopped)); | 1622 | wait_event(generic_waitq, !(device->stopped)); |
1623 | 1623 | ||
1624 | cqr->callback = dasd_wakeup_cb; | 1624 | cqr->callback = dasd_wakeup_cb; |
1625 | cqr->callback_data = DASD_SLEEPON_START_TAG; | 1625 | cqr->callback_data = DASD_SLEEPON_START_TAG; |
1626 | dasd_add_request_tail(cqr); | 1626 | dasd_add_request_tail(cqr); |
1627 | if (interruptible) { | 1627 | if (interruptible) { |
1628 | rc = wait_event_interruptible( | 1628 | rc = wait_event_interruptible( |
1629 | generic_waitq, _wait_for_wakeup(cqr)); | 1629 | generic_waitq, _wait_for_wakeup(cqr)); |
1630 | if (rc == -ERESTARTSYS) { | 1630 | if (rc == -ERESTARTSYS) { |
1631 | dasd_cancel_req(cqr); | 1631 | dasd_cancel_req(cqr); |
1632 | /* wait (non-interruptible) for final status */ | 1632 | /* wait (non-interruptible) for final status */ |
1633 | wait_event(generic_waitq, | 1633 | wait_event(generic_waitq, |
1634 | _wait_for_wakeup(cqr)); | 1634 | _wait_for_wakeup(cqr)); |
1635 | cqr->status = DASD_CQR_FAILED; | 1635 | cqr->status = DASD_CQR_FAILED; |
1636 | maincqr->intrc = rc; | 1636 | maincqr->intrc = rc; |
1637 | continue; | 1637 | continue; |
1638 | } | 1638 | } |
1639 | } else | 1639 | } else |
1640 | wait_event(generic_waitq, _wait_for_wakeup(cqr)); | 1640 | wait_event(generic_waitq, _wait_for_wakeup(cqr)); |
1641 | } | 1641 | } |
1642 | 1642 | ||
1643 | maincqr->endclk = get_clock(); | 1643 | maincqr->endclk = get_clock(); |
1644 | if ((maincqr->status != DASD_CQR_DONE) && | 1644 | if ((maincqr->status != DASD_CQR_DONE) && |
1645 | (maincqr->intrc != -ERESTARTSYS)) | 1645 | (maincqr->intrc != -ERESTARTSYS)) |
1646 | dasd_log_sense(maincqr, &maincqr->irb); | 1646 | dasd_log_sense(maincqr, &maincqr->irb); |
1647 | if (maincqr->status == DASD_CQR_DONE) | 1647 | if (maincqr->status == DASD_CQR_DONE) |
1648 | rc = 0; | 1648 | rc = 0; |
1649 | else if (maincqr->intrc) | 1649 | else if (maincqr->intrc) |
1650 | rc = maincqr->intrc; | 1650 | rc = maincqr->intrc; |
1651 | else | 1651 | else |
1652 | rc = -EIO; | 1652 | rc = -EIO; |
1653 | return rc; | 1653 | return rc; |
1654 | } | 1654 | } |
1655 | 1655 | ||
1656 | /* | 1656 | /* |
1657 | * Queue a request to the tail of the device ccw_queue and wait for | 1657 | * Queue a request to the tail of the device ccw_queue and wait for |
1658 | * it's completion. | 1658 | * it's completion. |
1659 | */ | 1659 | */ |
1660 | int dasd_sleep_on(struct dasd_ccw_req *cqr) | 1660 | int dasd_sleep_on(struct dasd_ccw_req *cqr) |
1661 | { | 1661 | { |
1662 | return _dasd_sleep_on(cqr, 0); | 1662 | return _dasd_sleep_on(cqr, 0); |
1663 | } | 1663 | } |
1664 | 1664 | ||
1665 | /* | 1665 | /* |
1666 | * Queue a request to the tail of the device ccw_queue and wait | 1666 | * Queue a request to the tail of the device ccw_queue and wait |
1667 | * interruptible for it's completion. | 1667 | * interruptible for it's completion. |
1668 | */ | 1668 | */ |
1669 | int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr) | 1669 | int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr) |
1670 | { | 1670 | { |
1671 | return _dasd_sleep_on(cqr, 1); | 1671 | return _dasd_sleep_on(cqr, 1); |
1672 | } | 1672 | } |
1673 | 1673 | ||
1674 | /* | 1674 | /* |
1675 | * Whoa nelly now it gets really hairy. For some functions (e.g. steal lock | 1675 | * Whoa nelly now it gets really hairy. For some functions (e.g. steal lock |
1676 | * for eckd devices) the currently running request has to be terminated | 1676 | * for eckd devices) the currently running request has to be terminated |
1677 | * and be put back to status queued, before the special request is added | 1677 | * and be put back to status queued, before the special request is added |
1678 | * to the head of the queue. Then the special request is waited on normally. | 1678 | * to the head of the queue. Then the special request is waited on normally. |
1679 | */ | 1679 | */ |
1680 | static inline int _dasd_term_running_cqr(struct dasd_device *device) | 1680 | static inline int _dasd_term_running_cqr(struct dasd_device *device) |
1681 | { | 1681 | { |
1682 | struct dasd_ccw_req *cqr; | 1682 | struct dasd_ccw_req *cqr; |
1683 | 1683 | ||
1684 | if (list_empty(&device->ccw_queue)) | 1684 | if (list_empty(&device->ccw_queue)) |
1685 | return 0; | 1685 | return 0; |
1686 | cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); | 1686 | cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); |
1687 | return device->discipline->term_IO(cqr); | 1687 | return device->discipline->term_IO(cqr); |
1688 | } | 1688 | } |
1689 | 1689 | ||
1690 | int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) | 1690 | int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) |
1691 | { | 1691 | { |
1692 | struct dasd_device *device; | 1692 | struct dasd_device *device; |
1693 | int rc; | 1693 | int rc; |
1694 | 1694 | ||
1695 | device = cqr->startdev; | 1695 | device = cqr->startdev; |
1696 | spin_lock_irq(get_ccwdev_lock(device->cdev)); | 1696 | spin_lock_irq(get_ccwdev_lock(device->cdev)); |
1697 | rc = _dasd_term_running_cqr(device); | 1697 | rc = _dasd_term_running_cqr(device); |
1698 | if (rc) { | 1698 | if (rc) { |
1699 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); | 1699 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); |
1700 | return rc; | 1700 | return rc; |
1701 | } | 1701 | } |
1702 | 1702 | ||
1703 | cqr->callback = dasd_wakeup_cb; | 1703 | cqr->callback = dasd_wakeup_cb; |
1704 | cqr->callback_data = DASD_SLEEPON_START_TAG; | 1704 | cqr->callback_data = DASD_SLEEPON_START_TAG; |
1705 | cqr->status = DASD_CQR_QUEUED; | 1705 | cqr->status = DASD_CQR_QUEUED; |
1706 | list_add(&cqr->devlist, &device->ccw_queue); | 1706 | list_add(&cqr->devlist, &device->ccw_queue); |
1707 | 1707 | ||
1708 | /* let the bh start the request to keep them in order */ | 1708 | /* let the bh start the request to keep them in order */ |
1709 | dasd_schedule_device_bh(device); | 1709 | dasd_schedule_device_bh(device); |
1710 | 1710 | ||
1711 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); | 1711 | spin_unlock_irq(get_ccwdev_lock(device->cdev)); |
1712 | 1712 | ||
1713 | wait_event(generic_waitq, _wait_for_wakeup(cqr)); | 1713 | wait_event(generic_waitq, _wait_for_wakeup(cqr)); |
1714 | 1714 | ||
1715 | if (cqr->status == DASD_CQR_DONE) | 1715 | if (cqr->status == DASD_CQR_DONE) |
1716 | rc = 0; | 1716 | rc = 0; |
1717 | else if (cqr->intrc) | 1717 | else if (cqr->intrc) |
1718 | rc = cqr->intrc; | 1718 | rc = cqr->intrc; |
1719 | else | 1719 | else |
1720 | rc = -EIO; | 1720 | rc = -EIO; |
1721 | return rc; | 1721 | return rc; |
1722 | } | 1722 | } |
1723 | 1723 | ||
1724 | /* | 1724 | /* |
1725 | * Cancels a request that was started with dasd_sleep_on_req. | 1725 | * Cancels a request that was started with dasd_sleep_on_req. |
1726 | * This is useful to timeout requests. The request will be | 1726 | * This is useful to timeout requests. The request will be |
1727 | * terminated if it is currently in i/o. | 1727 | * terminated if it is currently in i/o. |
1728 | * Returns 1 if the request has been terminated. | 1728 | * Returns 1 if the request has been terminated. |
1729 | * 0 if there was no need to terminate the request (not started yet) | 1729 | * 0 if there was no need to terminate the request (not started yet) |
1730 | * negative error code if termination failed | 1730 | * negative error code if termination failed |
1731 | * Cancellation of a request is an asynchronous operation! The calling | 1731 | * Cancellation of a request is an asynchronous operation! The calling |
1732 | * function has to wait until the request is properly returned via callback. | 1732 | * function has to wait until the request is properly returned via callback. |
1733 | */ | 1733 | */ |
1734 | int dasd_cancel_req(struct dasd_ccw_req *cqr) | 1734 | int dasd_cancel_req(struct dasd_ccw_req *cqr) |
1735 | { | 1735 | { |
1736 | struct dasd_device *device = cqr->startdev; | 1736 | struct dasd_device *device = cqr->startdev; |
1737 | unsigned long flags; | 1737 | unsigned long flags; |
1738 | int rc; | 1738 | int rc; |
1739 | 1739 | ||
1740 | rc = 0; | 1740 | rc = 0; |
1741 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); | 1741 | spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); |
1742 | switch (cqr->status) { | 1742 | switch (cqr->status) { |
1743 | case DASD_CQR_QUEUED: | 1743 | case DASD_CQR_QUEUED: |
1744 | /* request was not started - just set to cleared */ | 1744 | /* request was not started - just set to cleared */ |
1745 | cqr->status = DASD_CQR_CLEARED; | 1745 | cqr->status = DASD_CQR_CLEARED; |
1746 | break; | 1746 | break; |
1747 | case DASD_CQR_IN_IO: | 1747 | case DASD_CQR_IN_IO: |
1748 | /* request in IO - terminate IO and release again */ | 1748 | /* request in IO - terminate IO and release again */ |
1749 | rc = device->discipline->term_IO(cqr); | 1749 | rc = device->discipline->term_IO(cqr); |
1750 | if (rc) { | 1750 | if (rc) { |
1751 | dev_err(&device->cdev->dev, | 1751 | dev_err(&device->cdev->dev, |
1752 | "Cancelling request %p failed with rc=%d\n", | 1752 | "Cancelling request %p failed with rc=%d\n", |
1753 | cqr, rc); | 1753 | cqr, rc); |
1754 | } else { | 1754 | } else { |
1755 | cqr->stopclk = get_clock(); | 1755 | cqr->stopclk = get_clock(); |
1756 | } | 1756 | } |
1757 | break; | 1757 | break; |
1758 | default: /* already finished or clear pending - do nothing */ | 1758 | default: /* already finished or clear pending - do nothing */ |
1759 | break; | 1759 | break; |
1760 | } | 1760 | } |
1761 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); | 1761 | spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); |
1762 | dasd_schedule_device_bh(device); | 1762 | dasd_schedule_device_bh(device); |
1763 | return rc; | 1763 | return rc; |
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | 1766 | ||
1767 | /* | 1767 | /* |
1768 | * SECTION: Operations of the dasd_block layer. | 1768 | * SECTION: Operations of the dasd_block layer. |
1769 | */ | 1769 | */ |
1770 | 1770 | ||
1771 | /* | 1771 | /* |
1772 | * Timeout function for dasd_block. This is used when the block layer | 1772 | * Timeout function for dasd_block. This is used when the block layer |
1773 | * is waiting for something that may not come reliably, (e.g. a state | 1773 | * is waiting for something that may not come reliably, (e.g. a state |
1774 | * change interrupt) | 1774 | * change interrupt) |
1775 | */ | 1775 | */ |
1776 | static void dasd_block_timeout(unsigned long ptr) | 1776 | static void dasd_block_timeout(unsigned long ptr) |
1777 | { | 1777 | { |
1778 | unsigned long flags; | 1778 | unsigned long flags; |
1779 | struct dasd_block *block; | 1779 | struct dasd_block *block; |
1780 | 1780 | ||
1781 | block = (struct dasd_block *) ptr; | 1781 | block = (struct dasd_block *) ptr; |
1782 | spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags); | 1782 | spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags); |
1783 | /* re-activate request queue */ | 1783 | /* re-activate request queue */ |
1784 | dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING); | 1784 | dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING); |
1785 | spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags); | 1785 | spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags); |
1786 | dasd_schedule_block_bh(block); | 1786 | dasd_schedule_block_bh(block); |
1787 | } | 1787 | } |
1788 | 1788 | ||
1789 | /* | 1789 | /* |
1790 | * Setup timeout for a dasd_block in jiffies. | 1790 | * Setup timeout for a dasd_block in jiffies. |
1791 | */ | 1791 | */ |
1792 | void dasd_block_set_timer(struct dasd_block *block, int expires) | 1792 | void dasd_block_set_timer(struct dasd_block *block, int expires) |
1793 | { | 1793 | { |
1794 | if (expires == 0) | 1794 | if (expires == 0) |
1795 | del_timer(&block->timer); | 1795 | del_timer(&block->timer); |
1796 | else | 1796 | else |
1797 | mod_timer(&block->timer, jiffies + expires); | 1797 | mod_timer(&block->timer, jiffies + expires); |
1798 | } | 1798 | } |
1799 | 1799 | ||
1800 | /* | 1800 | /* |
1801 | * Clear timeout for a dasd_block. | 1801 | * Clear timeout for a dasd_block. |
1802 | */ | 1802 | */ |
1803 | void dasd_block_clear_timer(struct dasd_block *block) | 1803 | void dasd_block_clear_timer(struct dasd_block *block) |
1804 | { | 1804 | { |
1805 | del_timer(&block->timer); | 1805 | del_timer(&block->timer); |
1806 | } | 1806 | } |
1807 | 1807 | ||
1808 | /* | 1808 | /* |
1809 | * Process finished error recovery ccw. | 1809 | * Process finished error recovery ccw. |
1810 | */ | 1810 | */ |
1811 | static void __dasd_process_erp(struct dasd_device *device, | 1811 | static void __dasd_process_erp(struct dasd_device *device, |
1812 | struct dasd_ccw_req *cqr) | 1812 | struct dasd_ccw_req *cqr) |
1813 | { | 1813 | { |
1814 | dasd_erp_fn_t erp_fn; | 1814 | dasd_erp_fn_t erp_fn; |
1815 | 1815 | ||
1816 | if (cqr->status == DASD_CQR_DONE) | 1816 | if (cqr->status == DASD_CQR_DONE) |
1817 | DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful"); | 1817 | DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful"); |
1818 | else | 1818 | else |
1819 | dev_err(&device->cdev->dev, "ERP failed for the DASD\n"); | 1819 | dev_err(&device->cdev->dev, "ERP failed for the DASD\n"); |
1820 | erp_fn = device->discipline->erp_postaction(cqr); | 1820 | erp_fn = device->discipline->erp_postaction(cqr); |
1821 | erp_fn(cqr); | 1821 | erp_fn(cqr); |
1822 | } | 1822 | } |
1823 | 1823 | ||
1824 | /* | 1824 | /* |
1825 | * Fetch requests from the block device queue. | 1825 | * Fetch requests from the block device queue. |
1826 | */ | 1826 | */ |
1827 | static void __dasd_process_request_queue(struct dasd_block *block) | 1827 | static void __dasd_process_request_queue(struct dasd_block *block) |
1828 | { | 1828 | { |
1829 | struct request_queue *queue; | 1829 | struct request_queue *queue; |
1830 | struct request *req; | 1830 | struct request *req; |
1831 | struct dasd_ccw_req *cqr; | 1831 | struct dasd_ccw_req *cqr; |
1832 | struct dasd_device *basedev; | 1832 | struct dasd_device *basedev; |
1833 | unsigned long flags; | 1833 | unsigned long flags; |
1834 | queue = block->request_queue; | 1834 | queue = block->request_queue; |
1835 | basedev = block->base; | 1835 | basedev = block->base; |
1836 | /* No queue ? Then there is nothing to do. */ | 1836 | /* No queue ? Then there is nothing to do. */ |
1837 | if (queue == NULL) | 1837 | if (queue == NULL) |
1838 | return; | 1838 | return; |
1839 | 1839 | ||
1840 | /* | 1840 | /* |
1841 | * We requeue request from the block device queue to the ccw | 1841 | * We requeue request from the block device queue to the ccw |
1842 | * queue only in two states. In state DASD_STATE_READY the | 1842 | * queue only in two states. In state DASD_STATE_READY the |
1843 | * partition detection is done and we need to requeue requests | 1843 | * partition detection is done and we need to requeue requests |
1844 | * for that. State DASD_STATE_ONLINE is normal block device | 1844 | * for that. State DASD_STATE_ONLINE is normal block device |
1845 | * operation. | 1845 | * operation. |
1846 | */ | 1846 | */ |
1847 | if (basedev->state < DASD_STATE_READY) { | 1847 | if (basedev->state < DASD_STATE_READY) { |
1848 | while ((req = blk_fetch_request(block->request_queue))) | 1848 | while ((req = blk_fetch_request(block->request_queue))) |
1849 | __blk_end_request_all(req, -EIO); | 1849 | __blk_end_request_all(req, -EIO); |
1850 | return; | 1850 | return; |
1851 | } | 1851 | } |
1852 | /* Now we try to fetch requests from the request queue */ | 1852 | /* Now we try to fetch requests from the request queue */ |
1853 | while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) { | 1853 | while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) { |
1854 | if (basedev->features & DASD_FEATURE_READONLY && | 1854 | if (basedev->features & DASD_FEATURE_READONLY && |
1855 | rq_data_dir(req) == WRITE) { | 1855 | rq_data_dir(req) == WRITE) { |
1856 | DBF_DEV_EVENT(DBF_ERR, basedev, | 1856 | DBF_DEV_EVENT(DBF_ERR, basedev, |
1857 | "Rejecting write request %p", | 1857 | "Rejecting write request %p", |
1858 | req); | 1858 | req); |
1859 | blk_start_request(req); | 1859 | blk_start_request(req); |
1860 | __blk_end_request_all(req, -EIO); | 1860 | __blk_end_request_all(req, -EIO); |
1861 | continue; | 1861 | continue; |
1862 | } | 1862 | } |
1863 | cqr = basedev->discipline->build_cp(basedev, block, req); | 1863 | cqr = basedev->discipline->build_cp(basedev, block, req); |
1864 | if (IS_ERR(cqr)) { | 1864 | if (IS_ERR(cqr)) { |
1865 | if (PTR_ERR(cqr) == -EBUSY) | 1865 | if (PTR_ERR(cqr) == -EBUSY) |
1866 | break; /* normal end condition */ | 1866 | break; /* normal end condition */ |
1867 | if (PTR_ERR(cqr) == -ENOMEM) | 1867 | if (PTR_ERR(cqr) == -ENOMEM) |
1868 | break; /* terminate request queue loop */ | 1868 | break; /* terminate request queue loop */ |
1869 | if (PTR_ERR(cqr) == -EAGAIN) { | 1869 | if (PTR_ERR(cqr) == -EAGAIN) { |
1870 | /* | 1870 | /* |
1871 | * The current request cannot be build right | 1871 | * The current request cannot be build right |
1872 | * now, we have to try later. If this request | 1872 | * now, we have to try later. If this request |
1873 | * is the head-of-queue we stop the device | 1873 | * is the head-of-queue we stop the device |
1874 | * for 1/2 second. | 1874 | * for 1/2 second. |
1875 | */ | 1875 | */ |
1876 | if (!list_empty(&block->ccw_queue)) | 1876 | if (!list_empty(&block->ccw_queue)) |
1877 | break; | 1877 | break; |
1878 | spin_lock_irqsave( | 1878 | spin_lock_irqsave( |
1879 | get_ccwdev_lock(basedev->cdev), flags); | 1879 | get_ccwdev_lock(basedev->cdev), flags); |
1880 | dasd_device_set_stop_bits(basedev, | 1880 | dasd_device_set_stop_bits(basedev, |
1881 | DASD_STOPPED_PENDING); | 1881 | DASD_STOPPED_PENDING); |
1882 | spin_unlock_irqrestore( | 1882 | spin_unlock_irqrestore( |
1883 | get_ccwdev_lock(basedev->cdev), flags); | 1883 | get_ccwdev_lock(basedev->cdev), flags); |
1884 | dasd_block_set_timer(block, HZ/2); | 1884 | dasd_block_set_timer(block, HZ/2); |
1885 | break; | 1885 | break; |
1886 | } | 1886 | } |
1887 | DBF_DEV_EVENT(DBF_ERR, basedev, | 1887 | DBF_DEV_EVENT(DBF_ERR, basedev, |
1888 | "CCW creation failed (rc=%ld) " | 1888 | "CCW creation failed (rc=%ld) " |
1889 | "on request %p", | 1889 | "on request %p", |
1890 | PTR_ERR(cqr), req); | 1890 | PTR_ERR(cqr), req); |
1891 | blk_start_request(req); | 1891 | blk_start_request(req); |
1892 | __blk_end_request_all(req, -EIO); | 1892 | __blk_end_request_all(req, -EIO); |
1893 | continue; | 1893 | continue; |
1894 | } | 1894 | } |
1895 | /* | 1895 | /* |
1896 | * Note: callback is set to dasd_return_cqr_cb in | 1896 | * Note: callback is set to dasd_return_cqr_cb in |
1897 | * __dasd_block_start_head to cover erp requests as well | 1897 | * __dasd_block_start_head to cover erp requests as well |
1898 | */ | 1898 | */ |
1899 | cqr->callback_data = (void *) req; | 1899 | cqr->callback_data = (void *) req; |
1900 | cqr->status = DASD_CQR_FILLED; | 1900 | cqr->status = DASD_CQR_FILLED; |
1901 | blk_start_request(req); | 1901 | blk_start_request(req); |
1902 | list_add_tail(&cqr->blocklist, &block->ccw_queue); | 1902 | list_add_tail(&cqr->blocklist, &block->ccw_queue); |
1903 | dasd_profile_start(block, cqr, req); | 1903 | dasd_profile_start(block, cqr, req); |
1904 | } | 1904 | } |
1905 | } | 1905 | } |
1906 | 1906 | ||
1907 | static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) | 1907 | static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) |
1908 | { | 1908 | { |
1909 | struct request *req; | 1909 | struct request *req; |
1910 | int status; | 1910 | int status; |
1911 | int error = 0; | 1911 | int error = 0; |
1912 | 1912 | ||
1913 | req = (struct request *) cqr->callback_data; | 1913 | req = (struct request *) cqr->callback_data; |
1914 | dasd_profile_end(cqr->block, cqr, req); | 1914 | dasd_profile_end(cqr->block, cqr, req); |
1915 | status = cqr->block->base->discipline->free_cp(cqr, req); | 1915 | status = cqr->block->base->discipline->free_cp(cqr, req); |
1916 | if (status <= 0) | 1916 | if (status <= 0) |
1917 | error = status ? status : -EIO; | 1917 | error = status ? status : -EIO; |
1918 | __blk_end_request_all(req, error); | 1918 | __blk_end_request_all(req, error); |
1919 | } | 1919 | } |
1920 | 1920 | ||
1921 | /* | 1921 | /* |
1922 | * Process ccw request queue. | 1922 | * Process ccw request queue. |
1923 | */ | 1923 | */ |
1924 | static void __dasd_process_block_ccw_queue(struct dasd_block *block, | 1924 | static void __dasd_process_block_ccw_queue(struct dasd_block *block, |
1925 | struct list_head *final_queue) | 1925 | struct list_head *final_queue) |
1926 | { | 1926 | { |
1927 | struct list_head *l, *n; | 1927 | struct list_head *l, *n; |
1928 | struct dasd_ccw_req *cqr; | 1928 | struct dasd_ccw_req *cqr; |
1929 | dasd_erp_fn_t erp_fn; | 1929 | dasd_erp_fn_t erp_fn; |
1930 | unsigned long flags; | 1930 | unsigned long flags; |
1931 | struct dasd_device *base = block->base; | 1931 | struct dasd_device *base = block->base; |
1932 | 1932 | ||
1933 | restart: | 1933 | restart: |
1934 | /* Process request with final status. */ | 1934 | /* Process request with final status. */ |
1935 | list_for_each_safe(l, n, &block->ccw_queue) { | 1935 | list_for_each_safe(l, n, &block->ccw_queue) { |
1936 | cqr = list_entry(l, struct dasd_ccw_req, blocklist); | 1936 | cqr = list_entry(l, struct dasd_ccw_req, blocklist); |
1937 | if (cqr->status != DASD_CQR_DONE && | 1937 | if (cqr->status != DASD_CQR_DONE && |
1938 | cqr->status != DASD_CQR_FAILED && | 1938 | cqr->status != DASD_CQR_FAILED && |
1939 | cqr->status != DASD_CQR_NEED_ERP && | 1939 | cqr->status != DASD_CQR_NEED_ERP && |
1940 | cqr->status != DASD_CQR_TERMINATED) | 1940 | cqr->status != DASD_CQR_TERMINATED) |
1941 | continue; | 1941 | continue; |
1942 | 1942 | ||
1943 | if (cqr->status == DASD_CQR_TERMINATED) { | 1943 | if (cqr->status == DASD_CQR_TERMINATED) { |
1944 | base->discipline->handle_terminated_request(cqr); | 1944 | base->discipline->handle_terminated_request(cqr); |
1945 | goto restart; | 1945 | goto restart; |
1946 | } | 1946 | } |
1947 | 1947 | ||
1948 | /* Process requests that may be recovered */ | 1948 | /* Process requests that may be recovered */ |
1949 | if (cqr->status == DASD_CQR_NEED_ERP) { | 1949 | if (cqr->status == DASD_CQR_NEED_ERP) { |
1950 | erp_fn = base->discipline->erp_action(cqr); | 1950 | erp_fn = base->discipline->erp_action(cqr); |
1951 | if (IS_ERR(erp_fn(cqr))) | 1951 | if (IS_ERR(erp_fn(cqr))) |
1952 | continue; | 1952 | continue; |
1953 | goto restart; | 1953 | goto restart; |
1954 | } | 1954 | } |
1955 | 1955 | ||
1956 | /* log sense for fatal error */ | 1956 | /* log sense for fatal error */ |
1957 | if (cqr->status == DASD_CQR_FAILED) { | 1957 | if (cqr->status == DASD_CQR_FAILED) { |
1958 | dasd_log_sense(cqr, &cqr->irb); | 1958 | dasd_log_sense(cqr, &cqr->irb); |
1959 | } | 1959 | } |
1960 | 1960 | ||
1961 | /* First of all call extended error reporting. */ | 1961 | /* First of all call extended error reporting. */ |
1962 | if (dasd_eer_enabled(base) && | 1962 | if (dasd_eer_enabled(base) && |
1963 | cqr->status == DASD_CQR_FAILED) { | 1963 | cqr->status == DASD_CQR_FAILED) { |
1964 | dasd_eer_write(base, cqr, DASD_EER_FATALERROR); | 1964 | dasd_eer_write(base, cqr, DASD_EER_FATALERROR); |
1965 | 1965 | ||
1966 | /* restart request */ | 1966 | /* restart request */ |
1967 | cqr->status = DASD_CQR_FILLED; | 1967 | cqr->status = DASD_CQR_FILLED; |
1968 | cqr->retries = 255; | 1968 | cqr->retries = 255; |
1969 | spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); | 1969 | spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); |
1970 | dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); | 1970 | dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); |
1971 | spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), | 1971 | spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), |
1972 | flags); | 1972 | flags); |
1973 | goto restart; | 1973 | goto restart; |
1974 | } | 1974 | } |
1975 | 1975 | ||
1976 | /* Process finished ERP request. */ | 1976 | /* Process finished ERP request. */ |
1977 | if (cqr->refers) { | 1977 | if (cqr->refers) { |
1978 | __dasd_process_erp(base, cqr); | 1978 | __dasd_process_erp(base, cqr); |
1979 | goto restart; | 1979 | goto restart; |
1980 | } | 1980 | } |
1981 | 1981 | ||
1982 | /* Rechain finished requests to final queue */ | 1982 | /* Rechain finished requests to final queue */ |
1983 | cqr->endclk = get_clock(); | 1983 | cqr->endclk = get_clock(); |
1984 | list_move_tail(&cqr->blocklist, final_queue); | 1984 | list_move_tail(&cqr->blocklist, final_queue); |
1985 | } | 1985 | } |
1986 | } | 1986 | } |
1987 | 1987 | ||
1988 | static void dasd_return_cqr_cb(struct dasd_ccw_req *cqr, void *data) | 1988 | static void dasd_return_cqr_cb(struct dasd_ccw_req *cqr, void *data) |
1989 | { | 1989 | { |
1990 | dasd_schedule_block_bh(cqr->block); | 1990 | dasd_schedule_block_bh(cqr->block); |
1991 | } | 1991 | } |
1992 | 1992 | ||
1993 | static void __dasd_block_start_head(struct dasd_block *block) | 1993 | static void __dasd_block_start_head(struct dasd_block *block) |
1994 | { | 1994 | { |
1995 | struct dasd_ccw_req *cqr; | 1995 | struct dasd_ccw_req *cqr; |
1996 | 1996 | ||
1997 | if (list_empty(&block->ccw_queue)) | 1997 | if (list_empty(&block->ccw_queue)) |
1998 | return; | 1998 | return; |
1999 | /* We allways begin with the first requests on the queue, as some | 1999 | /* We allways begin with the first requests on the queue, as some |
2000 | * of previously started requests have to be enqueued on a | 2000 | * of previously started requests have to be enqueued on a |
2001 | * dasd_device again for error recovery. | 2001 | * dasd_device again for error recovery. |
2002 | */ | 2002 | */ |
2003 | list_for_each_entry(cqr, &block->ccw_queue, blocklist) { | 2003 | list_for_each_entry(cqr, &block->ccw_queue, blocklist) { |
2004 | if (cqr->status != DASD_CQR_FILLED) | 2004 | if (cqr->status != DASD_CQR_FILLED) |
2005 | continue; | 2005 | continue; |
2006 | /* Non-temporary stop condition will trigger fail fast */ | 2006 | /* Non-temporary stop condition will trigger fail fast */ |
2007 | if (block->base->stopped & ~DASD_STOPPED_PENDING && | 2007 | if (block->base->stopped & ~DASD_STOPPED_PENDING && |
2008 | test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && | 2008 | test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && |
2009 | (!dasd_eer_enabled(block->base))) { | 2009 | (!dasd_eer_enabled(block->base))) { |
2010 | cqr->status = DASD_CQR_FAILED; | 2010 | cqr->status = DASD_CQR_FAILED; |
2011 | dasd_schedule_block_bh(block); | 2011 | dasd_schedule_block_bh(block); |
2012 | continue; | 2012 | continue; |
2013 | } | 2013 | } |
2014 | /* Don't try to start requests if device is stopped */ | 2014 | /* Don't try to start requests if device is stopped */ |
2015 | if (block->base->stopped) | 2015 | if (block->base->stopped) |
2016 | return; | 2016 | return; |
2017 | 2017 | ||
2018 | /* just a fail safe check, should not happen */ | 2018 | /* just a fail safe check, should not happen */ |
2019 | if (!cqr->startdev) | 2019 | if (!cqr->startdev) |
2020 | cqr->startdev = block->base; | 2020 | cqr->startdev = block->base; |
2021 | 2021 | ||
2022 | /* make sure that the requests we submit find their way back */ | 2022 | /* make sure that the requests we submit find their way back */ |
2023 | cqr->callback = dasd_return_cqr_cb; | 2023 | cqr->callback = dasd_return_cqr_cb; |
2024 | 2024 | ||
2025 | dasd_add_request_tail(cqr); | 2025 | dasd_add_request_tail(cqr); |
2026 | } | 2026 | } |
2027 | } | 2027 | } |
2028 | 2028 | ||
2029 | /* | 2029 | /* |
2030 | * Central dasd_block layer routine. Takes requests from the generic | 2030 | * Central dasd_block layer routine. Takes requests from the generic |
2031 | * block layer request queue, creates ccw requests, enqueues them on | 2031 | * block layer request queue, creates ccw requests, enqueues them on |
2032 | * a dasd_device and processes ccw requests that have been returned. | 2032 | * a dasd_device and processes ccw requests that have been returned. |
2033 | */ | 2033 | */ |
2034 | static void dasd_block_tasklet(struct dasd_block *block) | 2034 | static void dasd_block_tasklet(struct dasd_block *block) |
2035 | { | 2035 | { |
2036 | struct list_head final_queue; | 2036 | struct list_head final_queue; |
2037 | struct list_head *l, *n; | 2037 | struct list_head *l, *n; |
2038 | struct dasd_ccw_req *cqr; | 2038 | struct dasd_ccw_req *cqr; |
2039 | 2039 | ||
2040 | atomic_set(&block->tasklet_scheduled, 0); | 2040 | atomic_set(&block->tasklet_scheduled, 0); |
2041 | INIT_LIST_HEAD(&final_queue); | 2041 | INIT_LIST_HEAD(&final_queue); |
2042 | spin_lock(&block->queue_lock); | 2042 | spin_lock(&block->queue_lock); |
2043 | /* Finish off requests on ccw queue */ | 2043 | /* Finish off requests on ccw queue */ |
2044 | __dasd_process_block_ccw_queue(block, &final_queue); | 2044 | __dasd_process_block_ccw_queue(block, &final_queue); |
2045 | spin_unlock(&block->queue_lock); | 2045 | spin_unlock(&block->queue_lock); |
2046 | /* Now call the callback function of requests with final status */ | 2046 | /* Now call the callback function of requests with final status */ |
2047 | spin_lock_irq(&block->request_queue_lock); | 2047 | spin_lock_irq(&block->request_queue_lock); |
2048 | list_for_each_safe(l, n, &final_queue) { | 2048 | list_for_each_safe(l, n, &final_queue) { |
2049 | cqr = list_entry(l, struct dasd_ccw_req, blocklist); | 2049 | cqr = list_entry(l, struct dasd_ccw_req, blocklist); |
2050 | list_del_init(&cqr->blocklist); | 2050 | list_del_init(&cqr->blocklist); |
2051 | __dasd_cleanup_cqr(cqr); | 2051 | __dasd_cleanup_cqr(cqr); |
2052 | } | 2052 | } |
2053 | spin_lock(&block->queue_lock); | 2053 | spin_lock(&block->queue_lock); |
2054 | /* Get new request from the block device request queue */ | 2054 | /* Get new request from the block device request queue */ |
2055 | __dasd_process_request_queue(block); | 2055 | __dasd_process_request_queue(block); |
2056 | /* Now check if the head of the ccw queue needs to be started. */ | 2056 | /* Now check if the head of the ccw queue needs to be started. */ |
2057 | __dasd_block_start_head(block); | 2057 | __dasd_block_start_head(block); |
2058 | spin_unlock(&block->queue_lock); | 2058 | spin_unlock(&block->queue_lock); |
2059 | spin_unlock_irq(&block->request_queue_lock); | 2059 | spin_unlock_irq(&block->request_queue_lock); |
2060 | dasd_put_device(block->base); | 2060 | dasd_put_device(block->base); |
2061 | } | 2061 | } |
2062 | 2062 | ||
2063 | static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data) | 2063 | static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data) |
2064 | { | 2064 | { |
2065 | wake_up(&dasd_flush_wq); | 2065 | wake_up(&dasd_flush_wq); |
2066 | } | 2066 | } |
2067 | 2067 | ||
2068 | /* | 2068 | /* |
2069 | * Go through all request on the dasd_block request queue, cancel them | 2069 | * Go through all request on the dasd_block request queue, cancel them |
2070 | * on the respective dasd_device, and return them to the generic | 2070 | * on the respective dasd_device, and return them to the generic |
2071 | * block layer. | 2071 | * block layer. |
2072 | */ | 2072 | */ |
2073 | static int dasd_flush_block_queue(struct dasd_block *block) | 2073 | static int dasd_flush_block_queue(struct dasd_block *block) |
2074 | { | 2074 | { |
2075 | struct dasd_ccw_req *cqr, *n; | 2075 | struct dasd_ccw_req *cqr, *n; |
2076 | int rc, i; | 2076 | int rc, i; |
2077 | struct list_head flush_queue; | 2077 | struct list_head flush_queue; |
2078 | 2078 | ||
2079 | INIT_LIST_HEAD(&flush_queue); | 2079 | INIT_LIST_HEAD(&flush_queue); |
2080 | spin_lock_bh(&block->queue_lock); | 2080 | spin_lock_bh(&block->queue_lock); |
2081 | rc = 0; | 2081 | rc = 0; |
2082 | restart: | 2082 | restart: |
2083 | list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) { | 2083 | list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) { |
2084 | /* if this request currently owned by a dasd_device cancel it */ | 2084 | /* if this request currently owned by a dasd_device cancel it */ |
2085 | if (cqr->status >= DASD_CQR_QUEUED) | 2085 | if (cqr->status >= DASD_CQR_QUEUED) |
2086 | rc = dasd_cancel_req(cqr); | 2086 | rc = dasd_cancel_req(cqr); |
2087 | if (rc < 0) | 2087 | if (rc < 0) |
2088 | break; | 2088 | break; |
2089 | /* Rechain request (including erp chain) so it won't be | 2089 | /* Rechain request (including erp chain) so it won't be |
2090 | * touched by the dasd_block_tasklet anymore. | 2090 | * touched by the dasd_block_tasklet anymore. |
2091 | * Replace the callback so we notice when the request | 2091 | * Replace the callback so we notice when the request |
2092 | * is returned from the dasd_device layer. | 2092 | * is returned from the dasd_device layer. |
2093 | */ | 2093 | */ |
2094 | cqr->callback = _dasd_wake_block_flush_cb; | 2094 | cqr->callback = _dasd_wake_block_flush_cb; |
2095 | for (i = 0; cqr != NULL; cqr = cqr->refers, i++) | 2095 | for (i = 0; cqr != NULL; cqr = cqr->refers, i++) |
2096 | list_move_tail(&cqr->blocklist, &flush_queue); | 2096 | list_move_tail(&cqr->blocklist, &flush_queue); |
2097 | if (i > 1) | 2097 | if (i > 1) |
2098 | /* moved more than one request - need to restart */ | 2098 | /* moved more than one request - need to restart */ |
2099 | goto restart; | 2099 | goto restart; |
2100 | } | 2100 | } |
2101 | spin_unlock_bh(&block->queue_lock); | 2101 | spin_unlock_bh(&block->queue_lock); |
2102 | /* Now call the callback function of flushed requests */ | 2102 | /* Now call the callback function of flushed requests */ |
2103 | restart_cb: | 2103 | restart_cb: |
2104 | list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) { | 2104 | list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) { |
2105 | wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED)); | 2105 | wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED)); |
2106 | /* Process finished ERP request. */ | 2106 | /* Process finished ERP request. */ |
2107 | if (cqr->refers) { | 2107 | if (cqr->refers) { |
2108 | spin_lock_bh(&block->queue_lock); | 2108 | spin_lock_bh(&block->queue_lock); |
2109 | __dasd_process_erp(block->base, cqr); | 2109 | __dasd_process_erp(block->base, cqr); |
2110 | spin_unlock_bh(&block->queue_lock); | 2110 | spin_unlock_bh(&block->queue_lock); |
2111 | /* restart list_for_xx loop since dasd_process_erp | 2111 | /* restart list_for_xx loop since dasd_process_erp |
2112 | * might remove multiple elements */ | 2112 | * might remove multiple elements */ |
2113 | goto restart_cb; | 2113 | goto restart_cb; |
2114 | } | 2114 | } |
2115 | /* call the callback function */ | 2115 | /* call the callback function */ |
2116 | spin_lock_irq(&block->request_queue_lock); | 2116 | spin_lock_irq(&block->request_queue_lock); |
2117 | cqr->endclk = get_clock(); | 2117 | cqr->endclk = get_clock(); |
2118 | list_del_init(&cqr->blocklist); | 2118 | list_del_init(&cqr->blocklist); |
2119 | __dasd_cleanup_cqr(cqr); | 2119 | __dasd_cleanup_cqr(cqr); |
2120 | spin_unlock_irq(&block->request_queue_lock); | 2120 | spin_unlock_irq(&block->request_queue_lock); |
2121 | } | 2121 | } |
2122 | return rc; | 2122 | return rc; |
2123 | } | 2123 | } |
2124 | 2124 | ||
2125 | /* | 2125 | /* |
2126 | * Schedules a call to dasd_tasklet over the device tasklet. | 2126 | * Schedules a call to dasd_tasklet over the device tasklet. |
2127 | */ | 2127 | */ |
2128 | void dasd_schedule_block_bh(struct dasd_block *block) | 2128 | void dasd_schedule_block_bh(struct dasd_block *block) |
2129 | { | 2129 | { |
2130 | /* Protect against rescheduling. */ | 2130 | /* Protect against rescheduling. */ |
2131 | if (atomic_cmpxchg(&block->tasklet_scheduled, 0, 1) != 0) | 2131 | if (atomic_cmpxchg(&block->tasklet_scheduled, 0, 1) != 0) |
2132 | return; | 2132 | return; |
2133 | /* life cycle of block is bound to it's base device */ | 2133 | /* life cycle of block is bound to it's base device */ |
2134 | dasd_get_device(block->base); | 2134 | dasd_get_device(block->base); |
2135 | tasklet_hi_schedule(&block->tasklet); | 2135 | tasklet_hi_schedule(&block->tasklet); |
2136 | } | 2136 | } |
2137 | 2137 | ||
2138 | 2138 | ||
2139 | /* | 2139 | /* |
2140 | * SECTION: external block device operations | 2140 | * SECTION: external block device operations |
2141 | * (request queue handling, open, release, etc.) | 2141 | * (request queue handling, open, release, etc.) |
2142 | */ | 2142 | */ |
2143 | 2143 | ||
2144 | /* | 2144 | /* |
2145 | * Dasd request queue function. Called from ll_rw_blk.c | 2145 | * Dasd request queue function. Called from ll_rw_blk.c |
2146 | */ | 2146 | */ |
2147 | static void do_dasd_request(struct request_queue *queue) | 2147 | static void do_dasd_request(struct request_queue *queue) |
2148 | { | 2148 | { |
2149 | struct dasd_block *block; | 2149 | struct dasd_block *block; |
2150 | 2150 | ||
2151 | block = queue->queuedata; | 2151 | block = queue->queuedata; |
2152 | spin_lock(&block->queue_lock); | 2152 | spin_lock(&block->queue_lock); |
2153 | /* Get new request from the block device request queue */ | 2153 | /* Get new request from the block device request queue */ |
2154 | __dasd_process_request_queue(block); | 2154 | __dasd_process_request_queue(block); |
2155 | /* Now check if the head of the ccw queue needs to be started. */ | 2155 | /* Now check if the head of the ccw queue needs to be started. */ |
2156 | __dasd_block_start_head(block); | 2156 | __dasd_block_start_head(block); |
2157 | spin_unlock(&block->queue_lock); | 2157 | spin_unlock(&block->queue_lock); |
2158 | } | 2158 | } |
2159 | 2159 | ||
2160 | /* | 2160 | /* |
2161 | * Allocate and initialize request queue and default I/O scheduler. | 2161 | * Allocate and initialize request queue and default I/O scheduler. |
2162 | */ | 2162 | */ |
2163 | static int dasd_alloc_queue(struct dasd_block *block) | 2163 | static int dasd_alloc_queue(struct dasd_block *block) |
2164 | { | 2164 | { |
2165 | int rc; | 2165 | int rc; |
2166 | 2166 | ||
2167 | block->request_queue = blk_init_queue(do_dasd_request, | 2167 | block->request_queue = blk_init_queue(do_dasd_request, |
2168 | &block->request_queue_lock); | 2168 | &block->request_queue_lock); |
2169 | if (block->request_queue == NULL) | 2169 | if (block->request_queue == NULL) |
2170 | return -ENOMEM; | 2170 | return -ENOMEM; |
2171 | 2171 | ||
2172 | block->request_queue->queuedata = block; | 2172 | block->request_queue->queuedata = block; |
2173 | 2173 | ||
2174 | elevator_exit(block->request_queue->elevator); | 2174 | elevator_exit(block->request_queue->elevator); |
2175 | block->request_queue->elevator = NULL; | 2175 | block->request_queue->elevator = NULL; |
2176 | rc = elevator_init(block->request_queue, "deadline"); | 2176 | rc = elevator_init(block->request_queue, "deadline"); |
2177 | if (rc) { | 2177 | if (rc) { |
2178 | blk_cleanup_queue(block->request_queue); | 2178 | blk_cleanup_queue(block->request_queue); |
2179 | return rc; | 2179 | return rc; |
2180 | } | 2180 | } |
2181 | return 0; | 2181 | return 0; |
2182 | } | 2182 | } |
2183 | 2183 | ||
2184 | /* | 2184 | /* |
2185 | * Allocate and initialize request queue. | 2185 | * Allocate and initialize request queue. |
2186 | */ | 2186 | */ |
2187 | static void dasd_setup_queue(struct dasd_block *block) | 2187 | static void dasd_setup_queue(struct dasd_block *block) |
2188 | { | 2188 | { |
2189 | int max; | 2189 | int max; |
2190 | 2190 | ||
2191 | blk_queue_logical_block_size(block->request_queue, block->bp_block); | 2191 | blk_queue_logical_block_size(block->request_queue, block->bp_block); |
2192 | max = block->base->discipline->max_blocks << block->s2b_shift; | 2192 | max = block->base->discipline->max_blocks << block->s2b_shift; |
2193 | blk_queue_max_hw_sectors(block->request_queue, max); | 2193 | blk_queue_max_hw_sectors(block->request_queue, max); |
2194 | blk_queue_max_segments(block->request_queue, -1L); | 2194 | blk_queue_max_segments(block->request_queue, -1L); |
2195 | /* with page sized segments we can translate each segement into | 2195 | /* with page sized segments we can translate each segement into |
2196 | * one idaw/tidaw | 2196 | * one idaw/tidaw |
2197 | */ | 2197 | */ |
2198 | blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); | 2198 | blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); |
2199 | blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); | 2199 | blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); |
2200 | blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN); | ||
2201 | } | 2200 | } |
2202 | 2201 | ||
2203 | /* | 2202 | /* |
2204 | * Deactivate and free request queue. | 2203 | * Deactivate and free request queue. |
2205 | */ | 2204 | */ |
2206 | static void dasd_free_queue(struct dasd_block *block) | 2205 | static void dasd_free_queue(struct dasd_block *block) |
2207 | { | 2206 | { |
2208 | if (block->request_queue) { | 2207 | if (block->request_queue) { |
2209 | blk_cleanup_queue(block->request_queue); | 2208 | blk_cleanup_queue(block->request_queue); |
2210 | block->request_queue = NULL; | 2209 | block->request_queue = NULL; |
2211 | } | 2210 | } |
2212 | } | 2211 | } |
2213 | 2212 | ||
2214 | /* | 2213 | /* |
2215 | * Flush request on the request queue. | 2214 | * Flush request on the request queue. |
2216 | */ | 2215 | */ |
2217 | static void dasd_flush_request_queue(struct dasd_block *block) | 2216 | static void dasd_flush_request_queue(struct dasd_block *block) |
2218 | { | 2217 | { |
2219 | struct request *req; | 2218 | struct request *req; |
2220 | 2219 | ||
2221 | if (!block->request_queue) | 2220 | if (!block->request_queue) |
2222 | return; | 2221 | return; |
2223 | 2222 | ||
2224 | spin_lock_irq(&block->request_queue_lock); | 2223 | spin_lock_irq(&block->request_queue_lock); |
2225 | while ((req = blk_fetch_request(block->request_queue))) | 2224 | while ((req = blk_fetch_request(block->request_queue))) |
2226 | __blk_end_request_all(req, -EIO); | 2225 | __blk_end_request_all(req, -EIO); |
2227 | spin_unlock_irq(&block->request_queue_lock); | 2226 | spin_unlock_irq(&block->request_queue_lock); |
2228 | } | 2227 | } |
2229 | 2228 | ||
2230 | static int dasd_open(struct block_device *bdev, fmode_t mode) | 2229 | static int dasd_open(struct block_device *bdev, fmode_t mode) |
2231 | { | 2230 | { |
2232 | struct dasd_block *block = bdev->bd_disk->private_data; | 2231 | struct dasd_block *block = bdev->bd_disk->private_data; |
2233 | struct dasd_device *base; | 2232 | struct dasd_device *base; |
2234 | int rc; | 2233 | int rc; |
2235 | 2234 | ||
2236 | if (!block) | 2235 | if (!block) |
2237 | return -ENODEV; | 2236 | return -ENODEV; |
2238 | 2237 | ||
2239 | lock_kernel(); | 2238 | lock_kernel(); |
2240 | base = block->base; | 2239 | base = block->base; |
2241 | atomic_inc(&block->open_count); | 2240 | atomic_inc(&block->open_count); |
2242 | if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) { | 2241 | if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) { |
2243 | rc = -ENODEV; | 2242 | rc = -ENODEV; |
2244 | goto unlock; | 2243 | goto unlock; |
2245 | } | 2244 | } |
2246 | 2245 | ||
2247 | if (!try_module_get(base->discipline->owner)) { | 2246 | if (!try_module_get(base->discipline->owner)) { |
2248 | rc = -EINVAL; | 2247 | rc = -EINVAL; |
2249 | goto unlock; | 2248 | goto unlock; |
2250 | } | 2249 | } |
2251 | 2250 | ||
2252 | if (dasd_probeonly) { | 2251 | if (dasd_probeonly) { |
2253 | dev_info(&base->cdev->dev, | 2252 | dev_info(&base->cdev->dev, |
2254 | "Accessing the DASD failed because it is in " | 2253 | "Accessing the DASD failed because it is in " |
2255 | "probeonly mode\n"); | 2254 | "probeonly mode\n"); |
2256 | rc = -EPERM; | 2255 | rc = -EPERM; |
2257 | goto out; | 2256 | goto out; |
2258 | } | 2257 | } |
2259 | 2258 | ||
2260 | if (base->state <= DASD_STATE_BASIC) { | 2259 | if (base->state <= DASD_STATE_BASIC) { |
2261 | DBF_DEV_EVENT(DBF_ERR, base, " %s", | 2260 | DBF_DEV_EVENT(DBF_ERR, base, " %s", |
2262 | " Cannot open unrecognized device"); | 2261 | " Cannot open unrecognized device"); |
2263 | rc = -ENODEV; | 2262 | rc = -ENODEV; |
2264 | goto out; | 2263 | goto out; |
2265 | } | 2264 | } |
2266 | 2265 | ||
2267 | if ((mode & FMODE_WRITE) && | 2266 | if ((mode & FMODE_WRITE) && |
2268 | (test_bit(DASD_FLAG_DEVICE_RO, &base->flags) || | 2267 | (test_bit(DASD_FLAG_DEVICE_RO, &base->flags) || |
2269 | (base->features & DASD_FEATURE_READONLY))) { | 2268 | (base->features & DASD_FEATURE_READONLY))) { |
2270 | rc = -EROFS; | 2269 | rc = -EROFS; |
2271 | goto out; | 2270 | goto out; |
2272 | } | 2271 | } |
2273 | 2272 | ||
2274 | unlock_kernel(); | 2273 | unlock_kernel(); |
2275 | return 0; | 2274 | return 0; |
2276 | 2275 | ||
2277 | out: | 2276 | out: |
2278 | module_put(base->discipline->owner); | 2277 | module_put(base->discipline->owner); |
2279 | unlock: | 2278 | unlock: |
2280 | atomic_dec(&block->open_count); | 2279 | atomic_dec(&block->open_count); |
2281 | unlock_kernel(); | 2280 | unlock_kernel(); |
2282 | return rc; | 2281 | return rc; |
2283 | } | 2282 | } |
2284 | 2283 | ||
2285 | static int dasd_release(struct gendisk *disk, fmode_t mode) | 2284 | static int dasd_release(struct gendisk *disk, fmode_t mode) |
2286 | { | 2285 | { |
2287 | struct dasd_block *block = disk->private_data; | 2286 | struct dasd_block *block = disk->private_data; |
2288 | 2287 | ||
2289 | lock_kernel(); | 2288 | lock_kernel(); |
2290 | atomic_dec(&block->open_count); | 2289 | atomic_dec(&block->open_count); |
2291 | module_put(block->base->discipline->owner); | 2290 | module_put(block->base->discipline->owner); |
2292 | unlock_kernel(); | 2291 | unlock_kernel(); |
2293 | return 0; | 2292 | return 0; |
2294 | } | 2293 | } |
2295 | 2294 | ||
2296 | /* | 2295 | /* |
2297 | * Return disk geometry. | 2296 | * Return disk geometry. |
2298 | */ | 2297 | */ |
2299 | static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) | 2298 | static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
2300 | { | 2299 | { |
2301 | struct dasd_block *block; | 2300 | struct dasd_block *block; |
2302 | struct dasd_device *base; | 2301 | struct dasd_device *base; |
2303 | 2302 | ||
2304 | block = bdev->bd_disk->private_data; | 2303 | block = bdev->bd_disk->private_data; |
2305 | if (!block) | 2304 | if (!block) |
2306 | return -ENODEV; | 2305 | return -ENODEV; |
2307 | base = block->base; | 2306 | base = block->base; |
2308 | 2307 | ||
2309 | if (!base->discipline || | 2308 | if (!base->discipline || |
2310 | !base->discipline->fill_geometry) | 2309 | !base->discipline->fill_geometry) |
2311 | return -EINVAL; | 2310 | return -EINVAL; |
2312 | 2311 | ||
2313 | base->discipline->fill_geometry(block, geo); | 2312 | base->discipline->fill_geometry(block, geo); |
2314 | geo->start = get_start_sect(bdev) >> block->s2b_shift; | 2313 | geo->start = get_start_sect(bdev) >> block->s2b_shift; |
2315 | return 0; | 2314 | return 0; |
2316 | } | 2315 | } |
2317 | 2316 | ||
2318 | const struct block_device_operations | 2317 | const struct block_device_operations |
2319 | dasd_device_operations = { | 2318 | dasd_device_operations = { |
2320 | .owner = THIS_MODULE, | 2319 | .owner = THIS_MODULE, |
2321 | .open = dasd_open, | 2320 | .open = dasd_open, |
2322 | .release = dasd_release, | 2321 | .release = dasd_release, |
2323 | .ioctl = dasd_ioctl, | 2322 | .ioctl = dasd_ioctl, |
2324 | .compat_ioctl = dasd_ioctl, | 2323 | .compat_ioctl = dasd_ioctl, |
2325 | .getgeo = dasd_getgeo, | 2324 | .getgeo = dasd_getgeo, |
2326 | }; | 2325 | }; |
2327 | 2326 | ||
2328 | /******************************************************************************* | 2327 | /******************************************************************************* |
2329 | * end of block device operations | 2328 | * end of block device operations |
2330 | */ | 2329 | */ |
2331 | 2330 | ||
2332 | static void | 2331 | static void |
2333 | dasd_exit(void) | 2332 | dasd_exit(void) |
2334 | { | 2333 | { |
2335 | #ifdef CONFIG_PROC_FS | 2334 | #ifdef CONFIG_PROC_FS |
2336 | dasd_proc_exit(); | 2335 | dasd_proc_exit(); |
2337 | #endif | 2336 | #endif |
2338 | dasd_eer_exit(); | 2337 | dasd_eer_exit(); |
2339 | if (dasd_page_cache != NULL) { | 2338 | if (dasd_page_cache != NULL) { |
2340 | kmem_cache_destroy(dasd_page_cache); | 2339 | kmem_cache_destroy(dasd_page_cache); |
2341 | dasd_page_cache = NULL; | 2340 | dasd_page_cache = NULL; |
2342 | } | 2341 | } |
2343 | dasd_gendisk_exit(); | 2342 | dasd_gendisk_exit(); |
2344 | dasd_devmap_exit(); | 2343 | dasd_devmap_exit(); |
2345 | if (dasd_debug_area != NULL) { | 2344 | if (dasd_debug_area != NULL) { |
2346 | debug_unregister(dasd_debug_area); | 2345 | debug_unregister(dasd_debug_area); |
2347 | dasd_debug_area = NULL; | 2346 | dasd_debug_area = NULL; |
2348 | } | 2347 | } |
2349 | } | 2348 | } |
2350 | 2349 | ||
2351 | /* | 2350 | /* |
2352 | * SECTION: common functions for ccw_driver use | 2351 | * SECTION: common functions for ccw_driver use |
2353 | */ | 2352 | */ |
2354 | 2353 | ||
2355 | /* | 2354 | /* |
2356 | * Is the device read-only? | 2355 | * Is the device read-only? |
2357 | * Note that this function does not report the setting of the | 2356 | * Note that this function does not report the setting of the |
2358 | * readonly device attribute, but how it is configured in z/VM. | 2357 | * readonly device attribute, but how it is configured in z/VM. |
2359 | */ | 2358 | */ |
2360 | int dasd_device_is_ro(struct dasd_device *device) | 2359 | int dasd_device_is_ro(struct dasd_device *device) |
2361 | { | 2360 | { |
2362 | struct ccw_dev_id dev_id; | 2361 | struct ccw_dev_id dev_id; |
2363 | struct diag210 diag_data; | 2362 | struct diag210 diag_data; |
2364 | int rc; | 2363 | int rc; |
2365 | 2364 | ||
2366 | if (!MACHINE_IS_VM) | 2365 | if (!MACHINE_IS_VM) |
2367 | return 0; | 2366 | return 0; |
2368 | ccw_device_get_id(device->cdev, &dev_id); | 2367 | ccw_device_get_id(device->cdev, &dev_id); |
2369 | memset(&diag_data, 0, sizeof(diag_data)); | 2368 | memset(&diag_data, 0, sizeof(diag_data)); |
2370 | diag_data.vrdcdvno = dev_id.devno; | 2369 | diag_data.vrdcdvno = dev_id.devno; |
2371 | diag_data.vrdclen = sizeof(diag_data); | 2370 | diag_data.vrdclen = sizeof(diag_data); |
2372 | rc = diag210(&diag_data); | 2371 | rc = diag210(&diag_data); |
2373 | if (rc == 0 || rc == 2) { | 2372 | if (rc == 0 || rc == 2) { |
2374 | return diag_data.vrdcvfla & 0x80; | 2373 | return diag_data.vrdcvfla & 0x80; |
2375 | } else { | 2374 | } else { |
2376 | DBF_EVENT(DBF_WARNING, "diag210 failed for dev=%04x with rc=%d", | 2375 | DBF_EVENT(DBF_WARNING, "diag210 failed for dev=%04x with rc=%d", |
2377 | dev_id.devno, rc); | 2376 | dev_id.devno, rc); |
2378 | return 0; | 2377 | return 0; |
2379 | } | 2378 | } |
2380 | } | 2379 | } |
2381 | EXPORT_SYMBOL_GPL(dasd_device_is_ro); | 2380 | EXPORT_SYMBOL_GPL(dasd_device_is_ro); |
2382 | 2381 | ||
2383 | static void dasd_generic_auto_online(void *data, async_cookie_t cookie) | 2382 | static void dasd_generic_auto_online(void *data, async_cookie_t cookie) |
2384 | { | 2383 | { |
2385 | struct ccw_device *cdev = data; | 2384 | struct ccw_device *cdev = data; |
2386 | int ret; | 2385 | int ret; |
2387 | 2386 | ||
2388 | ret = ccw_device_set_online(cdev); | 2387 | ret = ccw_device_set_online(cdev); |
2389 | if (ret) | 2388 | if (ret) |
2390 | pr_warning("%s: Setting the DASD online failed with rc=%d\n", | 2389 | pr_warning("%s: Setting the DASD online failed with rc=%d\n", |
2391 | dev_name(&cdev->dev), ret); | 2390 | dev_name(&cdev->dev), ret); |
2392 | } | 2391 | } |
2393 | 2392 | ||
2394 | /* | 2393 | /* |
2395 | * Initial attempt at a probe function. this can be simplified once | 2394 | * Initial attempt at a probe function. this can be simplified once |
2396 | * the other detection code is gone. | 2395 | * the other detection code is gone. |
2397 | */ | 2396 | */ |
2398 | int dasd_generic_probe(struct ccw_device *cdev, | 2397 | int dasd_generic_probe(struct ccw_device *cdev, |
2399 | struct dasd_discipline *discipline) | 2398 | struct dasd_discipline *discipline) |
2400 | { | 2399 | { |
2401 | int ret; | 2400 | int ret; |
2402 | 2401 | ||
2403 | ret = dasd_add_sysfs_files(cdev); | 2402 | ret = dasd_add_sysfs_files(cdev); |
2404 | if (ret) { | 2403 | if (ret) { |
2405 | DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s", | 2404 | DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s", |
2406 | "dasd_generic_probe: could not add " | 2405 | "dasd_generic_probe: could not add " |
2407 | "sysfs entries"); | 2406 | "sysfs entries"); |
2408 | return ret; | 2407 | return ret; |
2409 | } | 2408 | } |
2410 | cdev->handler = &dasd_int_handler; | 2409 | cdev->handler = &dasd_int_handler; |
2411 | 2410 | ||
2412 | /* | 2411 | /* |
2413 | * Automatically online either all dasd devices (dasd_autodetect) | 2412 | * Automatically online either all dasd devices (dasd_autodetect) |
2414 | * or all devices specified with dasd= parameters during | 2413 | * or all devices specified with dasd= parameters during |
2415 | * initial probe. | 2414 | * initial probe. |
2416 | */ | 2415 | */ |
2417 | if ((dasd_get_feature(cdev, DASD_FEATURE_INITIAL_ONLINE) > 0 ) || | 2416 | if ((dasd_get_feature(cdev, DASD_FEATURE_INITIAL_ONLINE) > 0 ) || |
2418 | (dasd_autodetect && dasd_busid_known(dev_name(&cdev->dev)) != 0)) | 2417 | (dasd_autodetect && dasd_busid_known(dev_name(&cdev->dev)) != 0)) |
2419 | async_schedule(dasd_generic_auto_online, cdev); | 2418 | async_schedule(dasd_generic_auto_online, cdev); |
2420 | return 0; | 2419 | return 0; |
2421 | } | 2420 | } |
2422 | 2421 | ||
2423 | /* | 2422 | /* |
2424 | * This will one day be called from a global not_oper handler. | 2423 | * This will one day be called from a global not_oper handler. |
2425 | * It is also used by driver_unregister during module unload. | 2424 | * It is also used by driver_unregister during module unload. |
2426 | */ | 2425 | */ |
2427 | void dasd_generic_remove(struct ccw_device *cdev) | 2426 | void dasd_generic_remove(struct ccw_device *cdev) |
2428 | { | 2427 | { |
2429 | struct dasd_device *device; | 2428 | struct dasd_device *device; |
2430 | struct dasd_block *block; | 2429 | struct dasd_block *block; |
2431 | 2430 | ||
2432 | cdev->handler = NULL; | 2431 | cdev->handler = NULL; |
2433 | 2432 | ||
2434 | dasd_remove_sysfs_files(cdev); | 2433 | dasd_remove_sysfs_files(cdev); |
2435 | device = dasd_device_from_cdev(cdev); | 2434 | device = dasd_device_from_cdev(cdev); |
2436 | if (IS_ERR(device)) | 2435 | if (IS_ERR(device)) |
2437 | return; | 2436 | return; |
2438 | if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) { | 2437 | if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) { |
2439 | /* Already doing offline processing */ | 2438 | /* Already doing offline processing */ |
2440 | dasd_put_device(device); | 2439 | dasd_put_device(device); |
2441 | return; | 2440 | return; |
2442 | } | 2441 | } |
2443 | /* | 2442 | /* |
2444 | * This device is removed unconditionally. Set offline | 2443 | * This device is removed unconditionally. Set offline |
2445 | * flag to prevent dasd_open from opening it while it is | 2444 | * flag to prevent dasd_open from opening it while it is |
2446 | * no quite down yet. | 2445 | * no quite down yet. |
2447 | */ | 2446 | */ |
2448 | dasd_set_target_state(device, DASD_STATE_NEW); | 2447 | dasd_set_target_state(device, DASD_STATE_NEW); |
2449 | /* dasd_delete_device destroys the device reference. */ | 2448 | /* dasd_delete_device destroys the device reference. */ |
2450 | block = device->block; | 2449 | block = device->block; |
2451 | device->block = NULL; | 2450 | device->block = NULL; |
2452 | dasd_delete_device(device); | 2451 | dasd_delete_device(device); |
2453 | /* | 2452 | /* |
2454 | * life cycle of block is bound to device, so delete it after | 2453 | * life cycle of block is bound to device, so delete it after |
2455 | * device was safely removed | 2454 | * device was safely removed |
2456 | */ | 2455 | */ |
2457 | if (block) | 2456 | if (block) |
2458 | dasd_free_block(block); | 2457 | dasd_free_block(block); |
2459 | } | 2458 | } |
2460 | 2459 | ||
2461 | /* | 2460 | /* |
2462 | * Activate a device. This is called from dasd_{eckd,fba}_probe() when either | 2461 | * Activate a device. This is called from dasd_{eckd,fba}_probe() when either |
2463 | * the device is detected for the first time and is supposed to be used | 2462 | * the device is detected for the first time and is supposed to be used |
2464 | * or the user has started activation through sysfs. | 2463 | * or the user has started activation through sysfs. |
2465 | */ | 2464 | */ |
2466 | int dasd_generic_set_online(struct ccw_device *cdev, | 2465 | int dasd_generic_set_online(struct ccw_device *cdev, |
2467 | struct dasd_discipline *base_discipline) | 2466 | struct dasd_discipline *base_discipline) |
2468 | { | 2467 | { |
2469 | struct dasd_discipline *discipline; | 2468 | struct dasd_discipline *discipline; |
2470 | struct dasd_device *device; | 2469 | struct dasd_device *device; |
2471 | int rc; | 2470 | int rc; |
2472 | 2471 | ||
2473 | /* first online clears initial online feature flag */ | 2472 | /* first online clears initial online feature flag */ |
2474 | dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); | 2473 | dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); |
2475 | device = dasd_create_device(cdev); | 2474 | device = dasd_create_device(cdev); |
2476 | if (IS_ERR(device)) | 2475 | if (IS_ERR(device)) |
2477 | return PTR_ERR(device); | 2476 | return PTR_ERR(device); |
2478 | 2477 | ||
2479 | discipline = base_discipline; | 2478 | discipline = base_discipline; |
2480 | if (device->features & DASD_FEATURE_USEDIAG) { | 2479 | if (device->features & DASD_FEATURE_USEDIAG) { |
2481 | if (!dasd_diag_discipline_pointer) { | 2480 | if (!dasd_diag_discipline_pointer) { |
2482 | pr_warning("%s Setting the DASD online failed because " | 2481 | pr_warning("%s Setting the DASD online failed because " |
2483 | "of missing DIAG discipline\n", | 2482 | "of missing DIAG discipline\n", |
2484 | dev_name(&cdev->dev)); | 2483 | dev_name(&cdev->dev)); |
2485 | dasd_delete_device(device); | 2484 | dasd_delete_device(device); |
2486 | return -ENODEV; | 2485 | return -ENODEV; |
2487 | } | 2486 | } |
2488 | discipline = dasd_diag_discipline_pointer; | 2487 | discipline = dasd_diag_discipline_pointer; |
2489 | } | 2488 | } |
2490 | if (!try_module_get(base_discipline->owner)) { | 2489 | if (!try_module_get(base_discipline->owner)) { |
2491 | dasd_delete_device(device); | 2490 | dasd_delete_device(device); |
2492 | return -EINVAL; | 2491 | return -EINVAL; |
2493 | } | 2492 | } |
2494 | if (!try_module_get(discipline->owner)) { | 2493 | if (!try_module_get(discipline->owner)) { |
2495 | module_put(base_discipline->owner); | 2494 | module_put(base_discipline->owner); |
2496 | dasd_delete_device(device); | 2495 | dasd_delete_device(device); |
2497 | return -EINVAL; | 2496 | return -EINVAL; |
2498 | } | 2497 | } |
2499 | device->base_discipline = base_discipline; | 2498 | device->base_discipline = base_discipline; |
2500 | device->discipline = discipline; | 2499 | device->discipline = discipline; |
2501 | 2500 | ||
2502 | /* check_device will allocate block device if necessary */ | 2501 | /* check_device will allocate block device if necessary */ |
2503 | rc = discipline->check_device(device); | 2502 | rc = discipline->check_device(device); |
2504 | if (rc) { | 2503 | if (rc) { |
2505 | pr_warning("%s Setting the DASD online with discipline %s " | 2504 | pr_warning("%s Setting the DASD online with discipline %s " |
2506 | "failed with rc=%i\n", | 2505 | "failed with rc=%i\n", |
2507 | dev_name(&cdev->dev), discipline->name, rc); | 2506 | dev_name(&cdev->dev), discipline->name, rc); |
2508 | module_put(discipline->owner); | 2507 | module_put(discipline->owner); |
2509 | module_put(base_discipline->owner); | 2508 | module_put(base_discipline->owner); |
2510 | dasd_delete_device(device); | 2509 | dasd_delete_device(device); |
2511 | return rc; | 2510 | return rc; |
2512 | } | 2511 | } |
2513 | 2512 | ||
2514 | dasd_set_target_state(device, DASD_STATE_ONLINE); | 2513 | dasd_set_target_state(device, DASD_STATE_ONLINE); |
2515 | if (device->state <= DASD_STATE_KNOWN) { | 2514 | if (device->state <= DASD_STATE_KNOWN) { |
2516 | pr_warning("%s Setting the DASD online failed because of a " | 2515 | pr_warning("%s Setting the DASD online failed because of a " |
2517 | "missing discipline\n", dev_name(&cdev->dev)); | 2516 | "missing discipline\n", dev_name(&cdev->dev)); |
2518 | rc = -ENODEV; | 2517 | rc = -ENODEV; |
2519 | dasd_set_target_state(device, DASD_STATE_NEW); | 2518 | dasd_set_target_state(device, DASD_STATE_NEW); |
2520 | if (device->block) | 2519 | if (device->block) |
2521 | dasd_free_block(device->block); | 2520 | dasd_free_block(device->block); |
2522 | dasd_delete_device(device); | 2521 | dasd_delete_device(device); |
2523 | } else | 2522 | } else |
2524 | pr_debug("dasd_generic device %s found\n", | 2523 | pr_debug("dasd_generic device %s found\n", |
2525 | dev_name(&cdev->dev)); | 2524 | dev_name(&cdev->dev)); |
2526 | 2525 | ||
2527 | wait_event(dasd_init_waitq, _wait_for_device(device)); | 2526 | wait_event(dasd_init_waitq, _wait_for_device(device)); |
2528 | 2527 | ||
2529 | dasd_put_device(device); | 2528 | dasd_put_device(device); |
2530 | return rc; | 2529 | return rc; |
2531 | } | 2530 | } |
2532 | 2531 | ||
2533 | int dasd_generic_set_offline(struct ccw_device *cdev) | 2532 | int dasd_generic_set_offline(struct ccw_device *cdev) |
2534 | { | 2533 | { |
2535 | struct dasd_device *device; | 2534 | struct dasd_device *device; |
2536 | struct dasd_block *block; | 2535 | struct dasd_block *block; |
2537 | int max_count, open_count; | 2536 | int max_count, open_count; |
2538 | 2537 | ||
2539 | device = dasd_device_from_cdev(cdev); | 2538 | device = dasd_device_from_cdev(cdev); |
2540 | if (IS_ERR(device)) | 2539 | if (IS_ERR(device)) |
2541 | return PTR_ERR(device); | 2540 | return PTR_ERR(device); |
2542 | if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) { | 2541 | if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) { |
2543 | /* Already doing offline processing */ | 2542 | /* Already doing offline processing */ |
2544 | dasd_put_device(device); | 2543 | dasd_put_device(device); |
2545 | return 0; | 2544 | return 0; |
2546 | } | 2545 | } |
2547 | /* | 2546 | /* |
2548 | * We must make sure that this device is currently not in use. | 2547 | * We must make sure that this device is currently not in use. |
2549 | * The open_count is increased for every opener, that includes | 2548 | * The open_count is increased for every opener, that includes |
2550 | * the blkdev_get in dasd_scan_partitions. We are only interested | 2549 | * the blkdev_get in dasd_scan_partitions. We are only interested |
2551 | * in the other openers. | 2550 | * in the other openers. |
2552 | */ | 2551 | */ |
2553 | if (device->block) { | 2552 | if (device->block) { |
2554 | max_count = device->block->bdev ? 0 : -1; | 2553 | max_count = device->block->bdev ? 0 : -1; |
2555 | open_count = atomic_read(&device->block->open_count); | 2554 | open_count = atomic_read(&device->block->open_count); |
2556 | if (open_count > max_count) { | 2555 | if (open_count > max_count) { |
2557 | if (open_count > 0) | 2556 | if (open_count > 0) |
2558 | pr_warning("%s: The DASD cannot be set offline " | 2557 | pr_warning("%s: The DASD cannot be set offline " |
2559 | "with open count %i\n", | 2558 | "with open count %i\n", |
2560 | dev_name(&cdev->dev), open_count); | 2559 | dev_name(&cdev->dev), open_count); |
2561 | else | 2560 | else |
2562 | pr_warning("%s: The DASD cannot be set offline " | 2561 | pr_warning("%s: The DASD cannot be set offline " |
2563 | "while it is in use\n", | 2562 | "while it is in use\n", |
2564 | dev_name(&cdev->dev)); | 2563 | dev_name(&cdev->dev)); |
2565 | clear_bit(DASD_FLAG_OFFLINE, &device->flags); | 2564 | clear_bit(DASD_FLAG_OFFLINE, &device->flags); |
2566 | dasd_put_device(device); | 2565 | dasd_put_device(device); |
2567 | return -EBUSY; | 2566 | return -EBUSY; |
2568 | } | 2567 | } |
2569 | } | 2568 | } |
2570 | dasd_set_target_state(device, DASD_STATE_NEW); | 2569 | dasd_set_target_state(device, DASD_STATE_NEW); |
2571 | /* dasd_delete_device destroys the device reference. */ | 2570 | /* dasd_delete_device destroys the device reference. */ |
2572 | block = device->block; | 2571 | block = device->block; |
2573 | device->block = NULL; | 2572 | device->block = NULL; |
2574 | dasd_delete_device(device); | 2573 | dasd_delete_device(device); |
2575 | /* | 2574 | /* |
2576 | * life cycle of block is bound to device, so delete it after | 2575 | * life cycle of block is bound to device, so delete it after |
2577 | * device was safely removed | 2576 | * device was safely removed |
2578 | */ | 2577 | */ |
2579 | if (block) | 2578 | if (block) |
2580 | dasd_free_block(block); | 2579 | dasd_free_block(block); |
2581 | return 0; | 2580 | return 0; |
2582 | } | 2581 | } |
2583 | 2582 | ||
2584 | int dasd_generic_notify(struct ccw_device *cdev, int event) | 2583 | int dasd_generic_notify(struct ccw_device *cdev, int event) |
2585 | { | 2584 | { |
2586 | struct dasd_device *device; | 2585 | struct dasd_device *device; |
2587 | struct dasd_ccw_req *cqr; | 2586 | struct dasd_ccw_req *cqr; |
2588 | int ret; | 2587 | int ret; |
2589 | 2588 | ||
2590 | device = dasd_device_from_cdev_locked(cdev); | 2589 | device = dasd_device_from_cdev_locked(cdev); |
2591 | if (IS_ERR(device)) | 2590 | if (IS_ERR(device)) |
2592 | return 0; | 2591 | return 0; |
2593 | ret = 0; | 2592 | ret = 0; |
2594 | switch (event) { | 2593 | switch (event) { |
2595 | case CIO_GONE: | 2594 | case CIO_GONE: |
2596 | case CIO_BOXED: | 2595 | case CIO_BOXED: |
2597 | case CIO_NO_PATH: | 2596 | case CIO_NO_PATH: |
2598 | /* First of all call extended error reporting. */ | 2597 | /* First of all call extended error reporting. */ |
2599 | dasd_eer_write(device, NULL, DASD_EER_NOPATH); | 2598 | dasd_eer_write(device, NULL, DASD_EER_NOPATH); |
2600 | 2599 | ||
2601 | if (device->state < DASD_STATE_BASIC) | 2600 | if (device->state < DASD_STATE_BASIC) |
2602 | break; | 2601 | break; |
2603 | /* Device is active. We want to keep it. */ | 2602 | /* Device is active. We want to keep it. */ |
2604 | list_for_each_entry(cqr, &device->ccw_queue, devlist) | 2603 | list_for_each_entry(cqr, &device->ccw_queue, devlist) |
2605 | if (cqr->status == DASD_CQR_IN_IO) { | 2604 | if (cqr->status == DASD_CQR_IN_IO) { |
2606 | cqr->status = DASD_CQR_QUEUED; | 2605 | cqr->status = DASD_CQR_QUEUED; |
2607 | cqr->retries++; | 2606 | cqr->retries++; |
2608 | } | 2607 | } |
2609 | dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); | 2608 | dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); |
2610 | dasd_device_clear_timer(device); | 2609 | dasd_device_clear_timer(device); |
2611 | dasd_schedule_device_bh(device); | 2610 | dasd_schedule_device_bh(device); |
2612 | ret = 1; | 2611 | ret = 1; |
2613 | break; | 2612 | break; |
2614 | case CIO_OPER: | 2613 | case CIO_OPER: |
2615 | /* FIXME: add a sanity check. */ | 2614 | /* FIXME: add a sanity check. */ |
2616 | dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT); | 2615 | dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT); |
2617 | if (device->stopped & DASD_UNRESUMED_PM) { | 2616 | if (device->stopped & DASD_UNRESUMED_PM) { |
2618 | dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM); | 2617 | dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM); |
2619 | dasd_restore_device(device); | 2618 | dasd_restore_device(device); |
2620 | ret = 1; | 2619 | ret = 1; |
2621 | break; | 2620 | break; |
2622 | } | 2621 | } |
2623 | dasd_schedule_device_bh(device); | 2622 | dasd_schedule_device_bh(device); |
2624 | if (device->block) | 2623 | if (device->block) |
2625 | dasd_schedule_block_bh(device->block); | 2624 | dasd_schedule_block_bh(device->block); |
2626 | ret = 1; | 2625 | ret = 1; |
2627 | break; | 2626 | break; |
2628 | } | 2627 | } |
2629 | dasd_put_device(device); | 2628 | dasd_put_device(device); |
2630 | return ret; | 2629 | return ret; |
2631 | } | 2630 | } |
2632 | 2631 | ||
2633 | int dasd_generic_pm_freeze(struct ccw_device *cdev) | 2632 | int dasd_generic_pm_freeze(struct ccw_device *cdev) |
2634 | { | 2633 | { |
2635 | struct dasd_ccw_req *cqr, *n; | 2634 | struct dasd_ccw_req *cqr, *n; |
2636 | int rc; | 2635 | int rc; |
2637 | struct list_head freeze_queue; | 2636 | struct list_head freeze_queue; |
2638 | struct dasd_device *device = dasd_device_from_cdev(cdev); | 2637 | struct dasd_device *device = dasd_device_from_cdev(cdev); |
2639 | 2638 | ||
2640 | if (IS_ERR(device)) | 2639 | if (IS_ERR(device)) |
2641 | return PTR_ERR(device); | 2640 | return PTR_ERR(device); |
2642 | /* disallow new I/O */ | 2641 | /* disallow new I/O */ |
2643 | dasd_device_set_stop_bits(device, DASD_STOPPED_PM); | 2642 | dasd_device_set_stop_bits(device, DASD_STOPPED_PM); |
2644 | /* clear active requests */ | 2643 | /* clear active requests */ |
2645 | INIT_LIST_HEAD(&freeze_queue); | 2644 | INIT_LIST_HEAD(&freeze_queue); |
2646 | spin_lock_irq(get_ccwdev_lock(cdev)); | 2645 | spin_lock_irq(get_ccwdev_lock(cdev)); |
2647 | rc = 0; | 2646 | rc = 0; |
2648 | list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) { | 2647 | list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) { |
2649 | /* Check status and move request to flush_queue */ | 2648 | /* Check status and move request to flush_queue */ |
2650 | if (cqr->status == DASD_CQR_IN_IO) { | 2649 | if (cqr->status == DASD_CQR_IN_IO) { |
2651 | rc = device->discipline->term_IO(cqr); | 2650 | rc = device->discipline->term_IO(cqr); |
2652 | if (rc) { | 2651 | if (rc) { |
2653 | /* unable to terminate requeust */ | 2652 | /* unable to terminate requeust */ |
2654 | dev_err(&device->cdev->dev, | 2653 | dev_err(&device->cdev->dev, |
2655 | "Unable to terminate request %p " | 2654 | "Unable to terminate request %p " |
2656 | "on suspend\n", cqr); | 2655 | "on suspend\n", cqr); |
2657 | spin_unlock_irq(get_ccwdev_lock(cdev)); | 2656 | spin_unlock_irq(get_ccwdev_lock(cdev)); |
2658 | dasd_put_device(device); | 2657 | dasd_put_device(device); |
2659 | return rc; | 2658 | return rc; |
2660 | } | 2659 | } |
2661 | } | 2660 | } |
2662 | list_move_tail(&cqr->devlist, &freeze_queue); | 2661 | list_move_tail(&cqr->devlist, &freeze_queue); |
2663 | } | 2662 | } |
2664 | 2663 | ||
2665 | spin_unlock_irq(get_ccwdev_lock(cdev)); | 2664 | spin_unlock_irq(get_ccwdev_lock(cdev)); |
2666 | 2665 | ||
2667 | list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) { | 2666 | list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) { |
2668 | wait_event(dasd_flush_wq, | 2667 | wait_event(dasd_flush_wq, |
2669 | (cqr->status != DASD_CQR_CLEAR_PENDING)); | 2668 | (cqr->status != DASD_CQR_CLEAR_PENDING)); |
2670 | if (cqr->status == DASD_CQR_CLEARED) | 2669 | if (cqr->status == DASD_CQR_CLEARED) |
2671 | cqr->status = DASD_CQR_QUEUED; | 2670 | cqr->status = DASD_CQR_QUEUED; |
2672 | } | 2671 | } |
2673 | /* move freeze_queue to start of the ccw_queue */ | 2672 | /* move freeze_queue to start of the ccw_queue */ |
2674 | spin_lock_irq(get_ccwdev_lock(cdev)); | 2673 | spin_lock_irq(get_ccwdev_lock(cdev)); |
2675 | list_splice_tail(&freeze_queue, &device->ccw_queue); | 2674 | list_splice_tail(&freeze_queue, &device->ccw_queue); |
2676 | spin_unlock_irq(get_ccwdev_lock(cdev)); | 2675 | spin_unlock_irq(get_ccwdev_lock(cdev)); |
2677 | 2676 | ||
2678 | if (device->discipline->freeze) | 2677 | if (device->discipline->freeze) |
2679 | rc = device->discipline->freeze(device); | 2678 | rc = device->discipline->freeze(device); |
2680 | 2679 | ||
2681 | dasd_put_device(device); | 2680 | dasd_put_device(device); |
2682 | return rc; | 2681 | return rc; |
2683 | } | 2682 | } |
2684 | EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze); | 2683 | EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze); |
2685 | 2684 | ||
2686 | int dasd_generic_restore_device(struct ccw_device *cdev) | 2685 | int dasd_generic_restore_device(struct ccw_device *cdev) |
2687 | { | 2686 | { |
2688 | struct dasd_device *device = dasd_device_from_cdev(cdev); | 2687 | struct dasd_device *device = dasd_device_from_cdev(cdev); |
2689 | int rc = 0; | 2688 | int rc = 0; |
2690 | 2689 | ||
2691 | if (IS_ERR(device)) | 2690 | if (IS_ERR(device)) |
2692 | return PTR_ERR(device); | 2691 | return PTR_ERR(device); |
2693 | 2692 | ||
2694 | /* allow new IO again */ | 2693 | /* allow new IO again */ |
2695 | dasd_device_remove_stop_bits(device, | 2694 | dasd_device_remove_stop_bits(device, |
2696 | (DASD_STOPPED_PM | DASD_UNRESUMED_PM)); | 2695 | (DASD_STOPPED_PM | DASD_UNRESUMED_PM)); |
2697 | 2696 | ||
2698 | dasd_schedule_device_bh(device); | 2697 | dasd_schedule_device_bh(device); |
2699 | 2698 | ||
2700 | /* | 2699 | /* |
2701 | * call discipline restore function | 2700 | * call discipline restore function |
2702 | * if device is stopped do nothing e.g. for disconnected devices | 2701 | * if device is stopped do nothing e.g. for disconnected devices |
2703 | */ | 2702 | */ |
2704 | if (device->discipline->restore && !(device->stopped)) | 2703 | if (device->discipline->restore && !(device->stopped)) |
2705 | rc = device->discipline->restore(device); | 2704 | rc = device->discipline->restore(device); |
2706 | if (rc || device->stopped) | 2705 | if (rc || device->stopped) |
2707 | /* | 2706 | /* |
2708 | * if the resume failed for the DASD we put it in | 2707 | * if the resume failed for the DASD we put it in |
2709 | * an UNRESUMED stop state | 2708 | * an UNRESUMED stop state |
2710 | */ | 2709 | */ |
2711 | device->stopped |= DASD_UNRESUMED_PM; | 2710 | device->stopped |= DASD_UNRESUMED_PM; |
2712 | 2711 | ||
2713 | if (device->block) | 2712 | if (device->block) |
2714 | dasd_schedule_block_bh(device->block); | 2713 | dasd_schedule_block_bh(device->block); |
2715 | 2714 | ||
2716 | dasd_put_device(device); | 2715 | dasd_put_device(device); |
2717 | return 0; | 2716 | return 0; |
2718 | } | 2717 | } |
2719 | EXPORT_SYMBOL_GPL(dasd_generic_restore_device); | 2718 | EXPORT_SYMBOL_GPL(dasd_generic_restore_device); |
2720 | 2719 | ||
2721 | static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, | 2720 | static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, |
2722 | void *rdc_buffer, | 2721 | void *rdc_buffer, |
2723 | int rdc_buffer_size, | 2722 | int rdc_buffer_size, |
2724 | int magic) | 2723 | int magic) |
2725 | { | 2724 | { |
2726 | struct dasd_ccw_req *cqr; | 2725 | struct dasd_ccw_req *cqr; |
2727 | struct ccw1 *ccw; | 2726 | struct ccw1 *ccw; |
2728 | unsigned long *idaw; | 2727 | unsigned long *idaw; |
2729 | 2728 | ||
2730 | cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device); | 2729 | cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device); |
2731 | 2730 | ||
2732 | if (IS_ERR(cqr)) { | 2731 | if (IS_ERR(cqr)) { |
2733 | /* internal error 13 - Allocating the RDC request failed*/ | 2732 | /* internal error 13 - Allocating the RDC request failed*/ |
2734 | dev_err(&device->cdev->dev, | 2733 | dev_err(&device->cdev->dev, |
2735 | "An error occurred in the DASD device driver, " | 2734 | "An error occurred in the DASD device driver, " |
2736 | "reason=%s\n", "13"); | 2735 | "reason=%s\n", "13"); |
2737 | return cqr; | 2736 | return cqr; |
2738 | } | 2737 | } |
2739 | 2738 | ||
2740 | ccw = cqr->cpaddr; | 2739 | ccw = cqr->cpaddr; |
2741 | ccw->cmd_code = CCW_CMD_RDC; | 2740 | ccw->cmd_code = CCW_CMD_RDC; |
2742 | if (idal_is_needed(rdc_buffer, rdc_buffer_size)) { | 2741 | if (idal_is_needed(rdc_buffer, rdc_buffer_size)) { |
2743 | idaw = (unsigned long *) (cqr->data); | 2742 | idaw = (unsigned long *) (cqr->data); |
2744 | ccw->cda = (__u32)(addr_t) idaw; | 2743 | ccw->cda = (__u32)(addr_t) idaw; |
2745 | ccw->flags = CCW_FLAG_IDA; | 2744 | ccw->flags = CCW_FLAG_IDA; |
2746 | idaw = idal_create_words(idaw, rdc_buffer, rdc_buffer_size); | 2745 | idaw = idal_create_words(idaw, rdc_buffer, rdc_buffer_size); |
2747 | } else { | 2746 | } else { |
2748 | ccw->cda = (__u32)(addr_t) rdc_buffer; | 2747 | ccw->cda = (__u32)(addr_t) rdc_buffer; |
2749 | ccw->flags = 0; | 2748 | ccw->flags = 0; |
2750 | } | 2749 | } |
2751 | 2750 | ||
2752 | ccw->count = rdc_buffer_size; | 2751 | ccw->count = rdc_buffer_size; |
2753 | cqr->startdev = device; | 2752 | cqr->startdev = device; |
2754 | cqr->memdev = device; | 2753 | cqr->memdev = device; |
2755 | cqr->expires = 10*HZ; | 2754 | cqr->expires = 10*HZ; |
2756 | cqr->retries = 256; | 2755 | cqr->retries = 256; |
2757 | cqr->buildclk = get_clock(); | 2756 | cqr->buildclk = get_clock(); |
2758 | cqr->status = DASD_CQR_FILLED; | 2757 | cqr->status = DASD_CQR_FILLED; |
2759 | return cqr; | 2758 | return cqr; |
2760 | } | 2759 | } |
2761 | 2760 | ||
2762 | 2761 | ||
2763 | int dasd_generic_read_dev_chars(struct dasd_device *device, int magic, | 2762 | int dasd_generic_read_dev_chars(struct dasd_device *device, int magic, |
2764 | void *rdc_buffer, int rdc_buffer_size) | 2763 | void *rdc_buffer, int rdc_buffer_size) |
2765 | { | 2764 | { |
2766 | int ret; | 2765 | int ret; |
2767 | struct dasd_ccw_req *cqr; | 2766 | struct dasd_ccw_req *cqr; |
2768 | 2767 | ||
2769 | cqr = dasd_generic_build_rdc(device, rdc_buffer, rdc_buffer_size, | 2768 | cqr = dasd_generic_build_rdc(device, rdc_buffer, rdc_buffer_size, |
2770 | magic); | 2769 | magic); |
2771 | if (IS_ERR(cqr)) | 2770 | if (IS_ERR(cqr)) |
2772 | return PTR_ERR(cqr); | 2771 | return PTR_ERR(cqr); |
2773 | 2772 | ||
2774 | ret = dasd_sleep_on(cqr); | 2773 | ret = dasd_sleep_on(cqr); |
2775 | dasd_sfree_request(cqr, cqr->memdev); | 2774 | dasd_sfree_request(cqr, cqr->memdev); |
2776 | return ret; | 2775 | return ret; |
2777 | } | 2776 | } |
2778 | EXPORT_SYMBOL_GPL(dasd_generic_read_dev_chars); | 2777 | EXPORT_SYMBOL_GPL(dasd_generic_read_dev_chars); |
2779 | 2778 | ||
2780 | /* | 2779 | /* |
2781 | * In command mode and transport mode we need to look for sense | 2780 | * In command mode and transport mode we need to look for sense |
2782 | * data in different places. The sense data itself is allways | 2781 | * data in different places. The sense data itself is allways |
2783 | * an array of 32 bytes, so we can unify the sense data access | 2782 | * an array of 32 bytes, so we can unify the sense data access |
2784 | * for both modes. | 2783 | * for both modes. |
2785 | */ | 2784 | */ |
2786 | char *dasd_get_sense(struct irb *irb) | 2785 | char *dasd_get_sense(struct irb *irb) |
2787 | { | 2786 | { |
2788 | struct tsb *tsb = NULL; | 2787 | struct tsb *tsb = NULL; |
2789 | char *sense = NULL; | 2788 | char *sense = NULL; |
2790 | 2789 | ||
2791 | if (scsw_is_tm(&irb->scsw) && (irb->scsw.tm.fcxs == 0x01)) { | 2790 | if (scsw_is_tm(&irb->scsw) && (irb->scsw.tm.fcxs == 0x01)) { |
2792 | if (irb->scsw.tm.tcw) | 2791 | if (irb->scsw.tm.tcw) |
2793 | tsb = tcw_get_tsb((struct tcw *)(unsigned long) | 2792 | tsb = tcw_get_tsb((struct tcw *)(unsigned long) |
2794 | irb->scsw.tm.tcw); | 2793 | irb->scsw.tm.tcw); |
2795 | if (tsb && tsb->length == 64 && tsb->flags) | 2794 | if (tsb && tsb->length == 64 && tsb->flags) |
2796 | switch (tsb->flags & 0x07) { | 2795 | switch (tsb->flags & 0x07) { |
2797 | case 1: /* tsa_iostat */ | 2796 | case 1: /* tsa_iostat */ |
2798 | sense = tsb->tsa.iostat.sense; | 2797 | sense = tsb->tsa.iostat.sense; |
2799 | break; | 2798 | break; |
2800 | case 2: /* tsa_ddpc */ | 2799 | case 2: /* tsa_ddpc */ |
2801 | sense = tsb->tsa.ddpc.sense; | 2800 | sense = tsb->tsa.ddpc.sense; |
2802 | break; | 2801 | break; |
2803 | default: | 2802 | default: |
2804 | /* currently we don't use interrogate data */ | 2803 | /* currently we don't use interrogate data */ |
2805 | break; | 2804 | break; |
2806 | } | 2805 | } |
2807 | } else if (irb->esw.esw0.erw.cons) { | 2806 | } else if (irb->esw.esw0.erw.cons) { |
2808 | sense = irb->ecw; | 2807 | sense = irb->ecw; |
2809 | } | 2808 | } |
2810 | return sense; | 2809 | return sense; |
2811 | } | 2810 | } |
2812 | EXPORT_SYMBOL_GPL(dasd_get_sense); | 2811 | EXPORT_SYMBOL_GPL(dasd_get_sense); |
2813 | 2812 | ||
2814 | static int __init dasd_init(void) | 2813 | static int __init dasd_init(void) |
2815 | { | 2814 | { |
2816 | int rc; | 2815 | int rc; |
2817 | 2816 | ||
2818 | init_waitqueue_head(&dasd_init_waitq); | 2817 | init_waitqueue_head(&dasd_init_waitq); |
2819 | init_waitqueue_head(&dasd_flush_wq); | 2818 | init_waitqueue_head(&dasd_flush_wq); |
2820 | init_waitqueue_head(&generic_waitq); | 2819 | init_waitqueue_head(&generic_waitq); |
2821 | 2820 | ||
2822 | /* register 'common' DASD debug area, used for all DBF_XXX calls */ | 2821 | /* register 'common' DASD debug area, used for all DBF_XXX calls */ |
2823 | dasd_debug_area = debug_register("dasd", 1, 1, 8 * sizeof(long)); | 2822 | dasd_debug_area = debug_register("dasd", 1, 1, 8 * sizeof(long)); |
2824 | if (dasd_debug_area == NULL) { | 2823 | if (dasd_debug_area == NULL) { |
2825 | rc = -ENOMEM; | 2824 | rc = -ENOMEM; |
2826 | goto failed; | 2825 | goto failed; |
2827 | } | 2826 | } |
2828 | debug_register_view(dasd_debug_area, &debug_sprintf_view); | 2827 | debug_register_view(dasd_debug_area, &debug_sprintf_view); |
2829 | debug_set_level(dasd_debug_area, DBF_WARNING); | 2828 | debug_set_level(dasd_debug_area, DBF_WARNING); |
2830 | 2829 | ||
2831 | DBF_EVENT(DBF_EMERG, "%s", "debug area created"); | 2830 | DBF_EVENT(DBF_EMERG, "%s", "debug area created"); |
2832 | 2831 | ||
2833 | dasd_diag_discipline_pointer = NULL; | 2832 | dasd_diag_discipline_pointer = NULL; |
2834 | 2833 | ||
2835 | rc = dasd_devmap_init(); | 2834 | rc = dasd_devmap_init(); |
2836 | if (rc) | 2835 | if (rc) |
2837 | goto failed; | 2836 | goto failed; |
2838 | rc = dasd_gendisk_init(); | 2837 | rc = dasd_gendisk_init(); |
2839 | if (rc) | 2838 | if (rc) |
2840 | goto failed; | 2839 | goto failed; |
2841 | rc = dasd_parse(); | 2840 | rc = dasd_parse(); |
2842 | if (rc) | 2841 | if (rc) |
2843 | goto failed; | 2842 | goto failed; |
2844 | rc = dasd_eer_init(); | 2843 | rc = dasd_eer_init(); |
2845 | if (rc) | 2844 | if (rc) |
2846 | goto failed; | 2845 | goto failed; |
2847 | #ifdef CONFIG_PROC_FS | 2846 | #ifdef CONFIG_PROC_FS |
2848 | rc = dasd_proc_init(); | 2847 | rc = dasd_proc_init(); |
2849 | if (rc) | 2848 | if (rc) |
2850 | goto failed; | 2849 | goto failed; |
2851 | #endif | 2850 | #endif |
2852 | 2851 | ||
2853 | return 0; | 2852 | return 0; |
2854 | failed: | 2853 | failed: |
2855 | pr_info("The DASD device driver could not be initialized\n"); | 2854 | pr_info("The DASD device driver could not be initialized\n"); |
2856 | dasd_exit(); | 2855 | dasd_exit(); |
2857 | return rc; | 2856 | return rc; |
2858 | } | 2857 | } |
2859 | 2858 | ||
2860 | module_init(dasd_init); | 2859 | module_init(dasd_init); |
2861 | module_exit(dasd_exit); | 2860 | module_exit(dasd_exit); |
2862 | 2861 | ||
2863 | EXPORT_SYMBOL(dasd_debug_area); | 2862 | EXPORT_SYMBOL(dasd_debug_area); |
2864 | EXPORT_SYMBOL(dasd_diag_discipline_pointer); | 2863 | EXPORT_SYMBOL(dasd_diag_discipline_pointer); |
2865 | 2864 | ||
2866 | EXPORT_SYMBOL(dasd_add_request_head); | 2865 | EXPORT_SYMBOL(dasd_add_request_head); |
2867 | EXPORT_SYMBOL(dasd_add_request_tail); | 2866 | EXPORT_SYMBOL(dasd_add_request_tail); |
2868 | EXPORT_SYMBOL(dasd_cancel_req); | 2867 | EXPORT_SYMBOL(dasd_cancel_req); |
2869 | EXPORT_SYMBOL(dasd_device_clear_timer); | 2868 | EXPORT_SYMBOL(dasd_device_clear_timer); |
2870 | EXPORT_SYMBOL(dasd_block_clear_timer); | 2869 | EXPORT_SYMBOL(dasd_block_clear_timer); |
2871 | EXPORT_SYMBOL(dasd_enable_device); | 2870 | EXPORT_SYMBOL(dasd_enable_device); |
2872 | EXPORT_SYMBOL(dasd_int_handler); | 2871 | EXPORT_SYMBOL(dasd_int_handler); |
2873 | EXPORT_SYMBOL(dasd_kfree_request); | 2872 | EXPORT_SYMBOL(dasd_kfree_request); |
2874 | EXPORT_SYMBOL(dasd_kick_device); | 2873 | EXPORT_SYMBOL(dasd_kick_device); |
2875 | EXPORT_SYMBOL(dasd_kmalloc_request); | 2874 | EXPORT_SYMBOL(dasd_kmalloc_request); |
2876 | EXPORT_SYMBOL(dasd_schedule_device_bh); | 2875 | EXPORT_SYMBOL(dasd_schedule_device_bh); |
2877 | EXPORT_SYMBOL(dasd_schedule_block_bh); | 2876 | EXPORT_SYMBOL(dasd_schedule_block_bh); |
2878 | EXPORT_SYMBOL(dasd_set_target_state); | 2877 | EXPORT_SYMBOL(dasd_set_target_state); |
2879 | EXPORT_SYMBOL(dasd_device_set_timer); | 2878 | EXPORT_SYMBOL(dasd_device_set_timer); |
2880 | EXPORT_SYMBOL(dasd_block_set_timer); | 2879 | EXPORT_SYMBOL(dasd_block_set_timer); |
2881 | EXPORT_SYMBOL(dasd_sfree_request); | 2880 | EXPORT_SYMBOL(dasd_sfree_request); |
2882 | EXPORT_SYMBOL(dasd_sleep_on); | 2881 | EXPORT_SYMBOL(dasd_sleep_on); |
2883 | EXPORT_SYMBOL(dasd_sleep_on_immediatly); | 2882 | EXPORT_SYMBOL(dasd_sleep_on_immediatly); |
2884 | EXPORT_SYMBOL(dasd_sleep_on_interruptible); | 2883 | EXPORT_SYMBOL(dasd_sleep_on_interruptible); |
2885 | EXPORT_SYMBOL(dasd_smalloc_request); | 2884 | EXPORT_SYMBOL(dasd_smalloc_request); |
2886 | EXPORT_SYMBOL(dasd_start_IO); | 2885 | EXPORT_SYMBOL(dasd_start_IO); |
2887 | EXPORT_SYMBOL(dasd_term_IO); | 2886 | EXPORT_SYMBOL(dasd_term_IO); |
2888 | 2887 | ||
2889 | EXPORT_SYMBOL_GPL(dasd_generic_probe); | 2888 | EXPORT_SYMBOL_GPL(dasd_generic_probe); |
2890 | EXPORT_SYMBOL_GPL(dasd_generic_remove); | 2889 | EXPORT_SYMBOL_GPL(dasd_generic_remove); |
2891 | EXPORT_SYMBOL_GPL(dasd_generic_notify); | 2890 | EXPORT_SYMBOL_GPL(dasd_generic_notify); |
2892 | EXPORT_SYMBOL_GPL(dasd_generic_set_online); | 2891 | EXPORT_SYMBOL_GPL(dasd_generic_set_online); |
2893 | EXPORT_SYMBOL_GPL(dasd_generic_set_offline); | 2892 | EXPORT_SYMBOL_GPL(dasd_generic_set_offline); |
2894 | EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); | 2893 | EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); |
2895 | EXPORT_SYMBOL_GPL(dasd_flush_device_queue); | 2894 | EXPORT_SYMBOL_GPL(dasd_flush_device_queue); |
2896 | EXPORT_SYMBOL_GPL(dasd_alloc_block); | 2895 | EXPORT_SYMBOL_GPL(dasd_alloc_block); |
2897 | EXPORT_SYMBOL_GPL(dasd_free_block); | 2896 | EXPORT_SYMBOL_GPL(dasd_free_block); |
2898 | 2897 |
drivers/scsi/sd.c
1 | /* | 1 | /* |
2 | * sd.c Copyright (C) 1992 Drew Eckhardt | 2 | * sd.c Copyright (C) 1992 Drew Eckhardt |
3 | * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale | 3 | * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale |
4 | * | 4 | * |
5 | * Linux scsi disk driver | 5 | * Linux scsi disk driver |
6 | * Initial versions: Drew Eckhardt | 6 | * Initial versions: Drew Eckhardt |
7 | * Subsequent revisions: Eric Youngdale | 7 | * Subsequent revisions: Eric Youngdale |
8 | * Modification history: | 8 | * Modification history: |
9 | * - Drew Eckhardt <drew@colorado.edu> original | 9 | * - Drew Eckhardt <drew@colorado.edu> original |
10 | * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple | 10 | * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple |
11 | * outstanding request, and other enhancements. | 11 | * outstanding request, and other enhancements. |
12 | * Support loadable low-level scsi drivers. | 12 | * Support loadable low-level scsi drivers. |
13 | * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using | 13 | * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using |
14 | * eight major numbers. | 14 | * eight major numbers. |
15 | * - Richard Gooch <rgooch@atnf.csiro.au> support devfs. | 15 | * - Richard Gooch <rgooch@atnf.csiro.au> support devfs. |
16 | * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in | 16 | * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in |
17 | * sd_init and cleanups. | 17 | * sd_init and cleanups. |
18 | * - Alex Davis <letmein@erols.com> Fix problem where partition info | 18 | * - Alex Davis <letmein@erols.com> Fix problem where partition info |
19 | * not being read in sd_open. Fix problem where removable media | 19 | * not being read in sd_open. Fix problem where removable media |
20 | * could be ejected after sd_open. | 20 | * could be ejected after sd_open. |
21 | * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x | 21 | * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x |
22 | * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox | 22 | * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox |
23 | * <willy@debian.org>, Kurt Garloff <garloff@suse.de>: | 23 | * <willy@debian.org>, Kurt Garloff <garloff@suse.de>: |
24 | * Support 32k/1M disks. | 24 | * Support 32k/1M disks. |
25 | * | 25 | * |
26 | * Logging policy (needs CONFIG_SCSI_LOGGING defined): | 26 | * Logging policy (needs CONFIG_SCSI_LOGGING defined): |
27 | * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2 | 27 | * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2 |
28 | * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1 | 28 | * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1 |
29 | * - entering sd_ioctl: SCSI_LOG_IOCTL level 1 | 29 | * - entering sd_ioctl: SCSI_LOG_IOCTL level 1 |
30 | * - entering other commands: SCSI_LOG_HLQUEUE level 3 | 30 | * - entering other commands: SCSI_LOG_HLQUEUE level 3 |
31 | * Note: when the logging level is set by the user, it must be greater | 31 | * Note: when the logging level is set by the user, it must be greater |
32 | * than the level indicated above to trigger output. | 32 | * than the level indicated above to trigger output. |
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
36 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
37 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
38 | #include <linux/mm.h> | 38 | #include <linux/mm.h> |
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/genhd.h> | 40 | #include <linux/genhd.h> |
41 | #include <linux/hdreg.h> | 41 | #include <linux/hdreg.h> |
42 | #include <linux/errno.h> | 42 | #include <linux/errno.h> |
43 | #include <linux/idr.h> | 43 | #include <linux/idr.h> |
44 | #include <linux/interrupt.h> | 44 | #include <linux/interrupt.h> |
45 | #include <linux/init.h> | 45 | #include <linux/init.h> |
46 | #include <linux/blkdev.h> | 46 | #include <linux/blkdev.h> |
47 | #include <linux/blkpg.h> | 47 | #include <linux/blkpg.h> |
48 | #include <linux/delay.h> | 48 | #include <linux/delay.h> |
49 | #include <linux/smp_lock.h> | 49 | #include <linux/smp_lock.h> |
50 | #include <linux/mutex.h> | 50 | #include <linux/mutex.h> |
51 | #include <linux/string_helpers.h> | 51 | #include <linux/string_helpers.h> |
52 | #include <linux/async.h> | 52 | #include <linux/async.h> |
53 | #include <linux/slab.h> | 53 | #include <linux/slab.h> |
54 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
55 | #include <asm/unaligned.h> | 55 | #include <asm/unaligned.h> |
56 | 56 | ||
57 | #include <scsi/scsi.h> | 57 | #include <scsi/scsi.h> |
58 | #include <scsi/scsi_cmnd.h> | 58 | #include <scsi/scsi_cmnd.h> |
59 | #include <scsi/scsi_dbg.h> | 59 | #include <scsi/scsi_dbg.h> |
60 | #include <scsi/scsi_device.h> | 60 | #include <scsi/scsi_device.h> |
61 | #include <scsi/scsi_driver.h> | 61 | #include <scsi/scsi_driver.h> |
62 | #include <scsi/scsi_eh.h> | 62 | #include <scsi/scsi_eh.h> |
63 | #include <scsi/scsi_host.h> | 63 | #include <scsi/scsi_host.h> |
64 | #include <scsi/scsi_ioctl.h> | 64 | #include <scsi/scsi_ioctl.h> |
65 | #include <scsi/scsicam.h> | 65 | #include <scsi/scsicam.h> |
66 | 66 | ||
67 | #include "sd.h" | 67 | #include "sd.h" |
68 | #include "scsi_logging.h" | 68 | #include "scsi_logging.h" |
69 | 69 | ||
70 | MODULE_AUTHOR("Eric Youngdale"); | 70 | MODULE_AUTHOR("Eric Youngdale"); |
71 | MODULE_DESCRIPTION("SCSI disk (sd) driver"); | 71 | MODULE_DESCRIPTION("SCSI disk (sd) driver"); |
72 | MODULE_LICENSE("GPL"); | 72 | MODULE_LICENSE("GPL"); |
73 | 73 | ||
74 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR); | 74 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR); |
75 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR); | 75 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR); |
76 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR); | 76 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR); |
77 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR); | 77 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR); |
78 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR); | 78 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR); |
79 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR); | 79 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR); |
80 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR); | 80 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR); |
81 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR); | 81 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR); |
82 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR); | 82 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR); |
83 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR); | 83 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR); |
84 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR); | 84 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR); |
85 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR); | 85 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR); |
86 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR); | 86 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR); |
87 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR); | 87 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR); |
88 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); | 88 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); |
89 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); | 89 | MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); |
90 | MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); | 90 | MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); |
91 | MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); | 91 | MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); |
92 | MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); | 92 | MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); |
93 | 93 | ||
94 | #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) | 94 | #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) |
95 | #define SD_MINORS 16 | 95 | #define SD_MINORS 16 |
96 | #else | 96 | #else |
97 | #define SD_MINORS 0 | 97 | #define SD_MINORS 0 |
98 | #endif | 98 | #endif |
99 | 99 | ||
100 | static int sd_revalidate_disk(struct gendisk *); | 100 | static int sd_revalidate_disk(struct gendisk *); |
101 | static void sd_unlock_native_capacity(struct gendisk *disk); | 101 | static void sd_unlock_native_capacity(struct gendisk *disk); |
102 | static int sd_probe(struct device *); | 102 | static int sd_probe(struct device *); |
103 | static int sd_remove(struct device *); | 103 | static int sd_remove(struct device *); |
104 | static void sd_shutdown(struct device *); | 104 | static void sd_shutdown(struct device *); |
105 | static int sd_suspend(struct device *, pm_message_t state); | 105 | static int sd_suspend(struct device *, pm_message_t state); |
106 | static int sd_resume(struct device *); | 106 | static int sd_resume(struct device *); |
107 | static void sd_rescan(struct device *); | 107 | static void sd_rescan(struct device *); |
108 | static int sd_done(struct scsi_cmnd *); | 108 | static int sd_done(struct scsi_cmnd *); |
109 | static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); | 109 | static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); |
110 | static void scsi_disk_release(struct device *cdev); | 110 | static void scsi_disk_release(struct device *cdev); |
111 | static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *); | 111 | static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *); |
112 | static void sd_print_result(struct scsi_disk *, int); | 112 | static void sd_print_result(struct scsi_disk *, int); |
113 | 113 | ||
114 | static DEFINE_SPINLOCK(sd_index_lock); | 114 | static DEFINE_SPINLOCK(sd_index_lock); |
115 | static DEFINE_IDA(sd_index_ida); | 115 | static DEFINE_IDA(sd_index_ida); |
116 | 116 | ||
117 | /* This semaphore is used to mediate the 0->1 reference get in the | 117 | /* This semaphore is used to mediate the 0->1 reference get in the |
118 | * face of object destruction (i.e. we can't allow a get on an | 118 | * face of object destruction (i.e. we can't allow a get on an |
119 | * object after last put) */ | 119 | * object after last put) */ |
120 | static DEFINE_MUTEX(sd_ref_mutex); | 120 | static DEFINE_MUTEX(sd_ref_mutex); |
121 | 121 | ||
122 | static struct kmem_cache *sd_cdb_cache; | 122 | static struct kmem_cache *sd_cdb_cache; |
123 | static mempool_t *sd_cdb_pool; | 123 | static mempool_t *sd_cdb_pool; |
124 | 124 | ||
125 | static const char *sd_cache_types[] = { | 125 | static const char *sd_cache_types[] = { |
126 | "write through", "none", "write back", | 126 | "write through", "none", "write back", |
127 | "write back, no read (daft)" | 127 | "write back, no read (daft)" |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static ssize_t | 130 | static ssize_t |
131 | sd_store_cache_type(struct device *dev, struct device_attribute *attr, | 131 | sd_store_cache_type(struct device *dev, struct device_attribute *attr, |
132 | const char *buf, size_t count) | 132 | const char *buf, size_t count) |
133 | { | 133 | { |
134 | int i, ct = -1, rcd, wce, sp; | 134 | int i, ct = -1, rcd, wce, sp; |
135 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 135 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
136 | struct scsi_device *sdp = sdkp->device; | 136 | struct scsi_device *sdp = sdkp->device; |
137 | char buffer[64]; | 137 | char buffer[64]; |
138 | char *buffer_data; | 138 | char *buffer_data; |
139 | struct scsi_mode_data data; | 139 | struct scsi_mode_data data; |
140 | struct scsi_sense_hdr sshdr; | 140 | struct scsi_sense_hdr sshdr; |
141 | int len; | 141 | int len; |
142 | 142 | ||
143 | if (sdp->type != TYPE_DISK) | 143 | if (sdp->type != TYPE_DISK) |
144 | /* no cache control on RBC devices; theoretically they | 144 | /* no cache control on RBC devices; theoretically they |
145 | * can do it, but there's probably so many exceptions | 145 | * can do it, but there's probably so many exceptions |
146 | * it's not worth the risk */ | 146 | * it's not worth the risk */ |
147 | return -EINVAL; | 147 | return -EINVAL; |
148 | 148 | ||
149 | for (i = 0; i < ARRAY_SIZE(sd_cache_types); i++) { | 149 | for (i = 0; i < ARRAY_SIZE(sd_cache_types); i++) { |
150 | len = strlen(sd_cache_types[i]); | 150 | len = strlen(sd_cache_types[i]); |
151 | if (strncmp(sd_cache_types[i], buf, len) == 0 && | 151 | if (strncmp(sd_cache_types[i], buf, len) == 0 && |
152 | buf[len] == '\n') { | 152 | buf[len] == '\n') { |
153 | ct = i; | 153 | ct = i; |
154 | break; | 154 | break; |
155 | } | 155 | } |
156 | } | 156 | } |
157 | if (ct < 0) | 157 | if (ct < 0) |
158 | return -EINVAL; | 158 | return -EINVAL; |
159 | rcd = ct & 0x01 ? 1 : 0; | 159 | rcd = ct & 0x01 ? 1 : 0; |
160 | wce = ct & 0x02 ? 1 : 0; | 160 | wce = ct & 0x02 ? 1 : 0; |
161 | if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT, | 161 | if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT, |
162 | SD_MAX_RETRIES, &data, NULL)) | 162 | SD_MAX_RETRIES, &data, NULL)) |
163 | return -EINVAL; | 163 | return -EINVAL; |
164 | len = min_t(size_t, sizeof(buffer), data.length - data.header_length - | 164 | len = min_t(size_t, sizeof(buffer), data.length - data.header_length - |
165 | data.block_descriptor_length); | 165 | data.block_descriptor_length); |
166 | buffer_data = buffer + data.header_length + | 166 | buffer_data = buffer + data.header_length + |
167 | data.block_descriptor_length; | 167 | data.block_descriptor_length; |
168 | buffer_data[2] &= ~0x05; | 168 | buffer_data[2] &= ~0x05; |
169 | buffer_data[2] |= wce << 2 | rcd; | 169 | buffer_data[2] |= wce << 2 | rcd; |
170 | sp = buffer_data[0] & 0x80 ? 1 : 0; | 170 | sp = buffer_data[0] & 0x80 ? 1 : 0; |
171 | 171 | ||
172 | if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, | 172 | if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, |
173 | SD_MAX_RETRIES, &data, &sshdr)) { | 173 | SD_MAX_RETRIES, &data, &sshdr)) { |
174 | if (scsi_sense_valid(&sshdr)) | 174 | if (scsi_sense_valid(&sshdr)) |
175 | sd_print_sense_hdr(sdkp, &sshdr); | 175 | sd_print_sense_hdr(sdkp, &sshdr); |
176 | return -EINVAL; | 176 | return -EINVAL; |
177 | } | 177 | } |
178 | revalidate_disk(sdkp->disk); | 178 | revalidate_disk(sdkp->disk); |
179 | return count; | 179 | return count; |
180 | } | 180 | } |
181 | 181 | ||
182 | static ssize_t | 182 | static ssize_t |
183 | sd_store_manage_start_stop(struct device *dev, struct device_attribute *attr, | 183 | sd_store_manage_start_stop(struct device *dev, struct device_attribute *attr, |
184 | const char *buf, size_t count) | 184 | const char *buf, size_t count) |
185 | { | 185 | { |
186 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 186 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
187 | struct scsi_device *sdp = sdkp->device; | 187 | struct scsi_device *sdp = sdkp->device; |
188 | 188 | ||
189 | if (!capable(CAP_SYS_ADMIN)) | 189 | if (!capable(CAP_SYS_ADMIN)) |
190 | return -EACCES; | 190 | return -EACCES; |
191 | 191 | ||
192 | sdp->manage_start_stop = simple_strtoul(buf, NULL, 10); | 192 | sdp->manage_start_stop = simple_strtoul(buf, NULL, 10); |
193 | 193 | ||
194 | return count; | 194 | return count; |
195 | } | 195 | } |
196 | 196 | ||
197 | static ssize_t | 197 | static ssize_t |
198 | sd_store_allow_restart(struct device *dev, struct device_attribute *attr, | 198 | sd_store_allow_restart(struct device *dev, struct device_attribute *attr, |
199 | const char *buf, size_t count) | 199 | const char *buf, size_t count) |
200 | { | 200 | { |
201 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 201 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
202 | struct scsi_device *sdp = sdkp->device; | 202 | struct scsi_device *sdp = sdkp->device; |
203 | 203 | ||
204 | if (!capable(CAP_SYS_ADMIN)) | 204 | if (!capable(CAP_SYS_ADMIN)) |
205 | return -EACCES; | 205 | return -EACCES; |
206 | 206 | ||
207 | if (sdp->type != TYPE_DISK) | 207 | if (sdp->type != TYPE_DISK) |
208 | return -EINVAL; | 208 | return -EINVAL; |
209 | 209 | ||
210 | sdp->allow_restart = simple_strtoul(buf, NULL, 10); | 210 | sdp->allow_restart = simple_strtoul(buf, NULL, 10); |
211 | 211 | ||
212 | return count; | 212 | return count; |
213 | } | 213 | } |
214 | 214 | ||
215 | static ssize_t | 215 | static ssize_t |
216 | sd_show_cache_type(struct device *dev, struct device_attribute *attr, | 216 | sd_show_cache_type(struct device *dev, struct device_attribute *attr, |
217 | char *buf) | 217 | char *buf) |
218 | { | 218 | { |
219 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 219 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
220 | int ct = sdkp->RCD + 2*sdkp->WCE; | 220 | int ct = sdkp->RCD + 2*sdkp->WCE; |
221 | 221 | ||
222 | return snprintf(buf, 40, "%s\n", sd_cache_types[ct]); | 222 | return snprintf(buf, 40, "%s\n", sd_cache_types[ct]); |
223 | } | 223 | } |
224 | 224 | ||
225 | static ssize_t | 225 | static ssize_t |
226 | sd_show_fua(struct device *dev, struct device_attribute *attr, char *buf) | 226 | sd_show_fua(struct device *dev, struct device_attribute *attr, char *buf) |
227 | { | 227 | { |
228 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 228 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
229 | 229 | ||
230 | return snprintf(buf, 20, "%u\n", sdkp->DPOFUA); | 230 | return snprintf(buf, 20, "%u\n", sdkp->DPOFUA); |
231 | } | 231 | } |
232 | 232 | ||
233 | static ssize_t | 233 | static ssize_t |
234 | sd_show_manage_start_stop(struct device *dev, struct device_attribute *attr, | 234 | sd_show_manage_start_stop(struct device *dev, struct device_attribute *attr, |
235 | char *buf) | 235 | char *buf) |
236 | { | 236 | { |
237 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 237 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
238 | struct scsi_device *sdp = sdkp->device; | 238 | struct scsi_device *sdp = sdkp->device; |
239 | 239 | ||
240 | return snprintf(buf, 20, "%u\n", sdp->manage_start_stop); | 240 | return snprintf(buf, 20, "%u\n", sdp->manage_start_stop); |
241 | } | 241 | } |
242 | 242 | ||
243 | static ssize_t | 243 | static ssize_t |
244 | sd_show_allow_restart(struct device *dev, struct device_attribute *attr, | 244 | sd_show_allow_restart(struct device *dev, struct device_attribute *attr, |
245 | char *buf) | 245 | char *buf) |
246 | { | 246 | { |
247 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 247 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
248 | 248 | ||
249 | return snprintf(buf, 40, "%d\n", sdkp->device->allow_restart); | 249 | return snprintf(buf, 40, "%d\n", sdkp->device->allow_restart); |
250 | } | 250 | } |
251 | 251 | ||
252 | static ssize_t | 252 | static ssize_t |
253 | sd_show_protection_type(struct device *dev, struct device_attribute *attr, | 253 | sd_show_protection_type(struct device *dev, struct device_attribute *attr, |
254 | char *buf) | 254 | char *buf) |
255 | { | 255 | { |
256 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 256 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
257 | 257 | ||
258 | return snprintf(buf, 20, "%u\n", sdkp->protection_type); | 258 | return snprintf(buf, 20, "%u\n", sdkp->protection_type); |
259 | } | 259 | } |
260 | 260 | ||
261 | static ssize_t | 261 | static ssize_t |
262 | sd_show_app_tag_own(struct device *dev, struct device_attribute *attr, | 262 | sd_show_app_tag_own(struct device *dev, struct device_attribute *attr, |
263 | char *buf) | 263 | char *buf) |
264 | { | 264 | { |
265 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 265 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
266 | 266 | ||
267 | return snprintf(buf, 20, "%u\n", sdkp->ATO); | 267 | return snprintf(buf, 20, "%u\n", sdkp->ATO); |
268 | } | 268 | } |
269 | 269 | ||
270 | static ssize_t | 270 | static ssize_t |
271 | sd_show_thin_provisioning(struct device *dev, struct device_attribute *attr, | 271 | sd_show_thin_provisioning(struct device *dev, struct device_attribute *attr, |
272 | char *buf) | 272 | char *buf) |
273 | { | 273 | { |
274 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 274 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
275 | 275 | ||
276 | return snprintf(buf, 20, "%u\n", sdkp->thin_provisioning); | 276 | return snprintf(buf, 20, "%u\n", sdkp->thin_provisioning); |
277 | } | 277 | } |
278 | 278 | ||
279 | static struct device_attribute sd_disk_attrs[] = { | 279 | static struct device_attribute sd_disk_attrs[] = { |
280 | __ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type, | 280 | __ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type, |
281 | sd_store_cache_type), | 281 | sd_store_cache_type), |
282 | __ATTR(FUA, S_IRUGO, sd_show_fua, NULL), | 282 | __ATTR(FUA, S_IRUGO, sd_show_fua, NULL), |
283 | __ATTR(allow_restart, S_IRUGO|S_IWUSR, sd_show_allow_restart, | 283 | __ATTR(allow_restart, S_IRUGO|S_IWUSR, sd_show_allow_restart, |
284 | sd_store_allow_restart), | 284 | sd_store_allow_restart), |
285 | __ATTR(manage_start_stop, S_IRUGO|S_IWUSR, sd_show_manage_start_stop, | 285 | __ATTR(manage_start_stop, S_IRUGO|S_IWUSR, sd_show_manage_start_stop, |
286 | sd_store_manage_start_stop), | 286 | sd_store_manage_start_stop), |
287 | __ATTR(protection_type, S_IRUGO, sd_show_protection_type, NULL), | 287 | __ATTR(protection_type, S_IRUGO, sd_show_protection_type, NULL), |
288 | __ATTR(app_tag_own, S_IRUGO, sd_show_app_tag_own, NULL), | 288 | __ATTR(app_tag_own, S_IRUGO, sd_show_app_tag_own, NULL), |
289 | __ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL), | 289 | __ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL), |
290 | __ATTR_NULL, | 290 | __ATTR_NULL, |
291 | }; | 291 | }; |
292 | 292 | ||
293 | static struct class sd_disk_class = { | 293 | static struct class sd_disk_class = { |
294 | .name = "scsi_disk", | 294 | .name = "scsi_disk", |
295 | .owner = THIS_MODULE, | 295 | .owner = THIS_MODULE, |
296 | .dev_release = scsi_disk_release, | 296 | .dev_release = scsi_disk_release, |
297 | .dev_attrs = sd_disk_attrs, | 297 | .dev_attrs = sd_disk_attrs, |
298 | }; | 298 | }; |
299 | 299 | ||
300 | static struct scsi_driver sd_template = { | 300 | static struct scsi_driver sd_template = { |
301 | .owner = THIS_MODULE, | 301 | .owner = THIS_MODULE, |
302 | .gendrv = { | 302 | .gendrv = { |
303 | .name = "sd", | 303 | .name = "sd", |
304 | .probe = sd_probe, | 304 | .probe = sd_probe, |
305 | .remove = sd_remove, | 305 | .remove = sd_remove, |
306 | .suspend = sd_suspend, | 306 | .suspend = sd_suspend, |
307 | .resume = sd_resume, | 307 | .resume = sd_resume, |
308 | .shutdown = sd_shutdown, | 308 | .shutdown = sd_shutdown, |
309 | }, | 309 | }, |
310 | .rescan = sd_rescan, | 310 | .rescan = sd_rescan, |
311 | .done = sd_done, | 311 | .done = sd_done, |
312 | }; | 312 | }; |
313 | 313 | ||
314 | /* | 314 | /* |
315 | * Device no to disk mapping: | 315 | * Device no to disk mapping: |
316 | * | 316 | * |
317 | * major disc2 disc p1 | 317 | * major disc2 disc p1 |
318 | * |............|.............|....|....| <- dev_t | 318 | * |............|.............|....|....| <- dev_t |
319 | * 31 20 19 8 7 4 3 0 | 319 | * 31 20 19 8 7 4 3 0 |
320 | * | 320 | * |
321 | * Inside a major, we have 16k disks, however mapped non- | 321 | * Inside a major, we have 16k disks, however mapped non- |
322 | * contiguously. The first 16 disks are for major0, the next | 322 | * contiguously. The first 16 disks are for major0, the next |
323 | * ones with major1, ... Disk 256 is for major0 again, disk 272 | 323 | * ones with major1, ... Disk 256 is for major0 again, disk 272 |
324 | * for major1, ... | 324 | * for major1, ... |
325 | * As we stay compatible with our numbering scheme, we can reuse | 325 | * As we stay compatible with our numbering scheme, we can reuse |
326 | * the well-know SCSI majors 8, 65--71, 136--143. | 326 | * the well-know SCSI majors 8, 65--71, 136--143. |
327 | */ | 327 | */ |
328 | static int sd_major(int major_idx) | 328 | static int sd_major(int major_idx) |
329 | { | 329 | { |
330 | switch (major_idx) { | 330 | switch (major_idx) { |
331 | case 0: | 331 | case 0: |
332 | return SCSI_DISK0_MAJOR; | 332 | return SCSI_DISK0_MAJOR; |
333 | case 1 ... 7: | 333 | case 1 ... 7: |
334 | return SCSI_DISK1_MAJOR + major_idx - 1; | 334 | return SCSI_DISK1_MAJOR + major_idx - 1; |
335 | case 8 ... 15: | 335 | case 8 ... 15: |
336 | return SCSI_DISK8_MAJOR + major_idx - 8; | 336 | return SCSI_DISK8_MAJOR + major_idx - 8; |
337 | default: | 337 | default: |
338 | BUG(); | 338 | BUG(); |
339 | return 0; /* shut up gcc */ | 339 | return 0; /* shut up gcc */ |
340 | } | 340 | } |
341 | } | 341 | } |
342 | 342 | ||
343 | static struct scsi_disk *__scsi_disk_get(struct gendisk *disk) | 343 | static struct scsi_disk *__scsi_disk_get(struct gendisk *disk) |
344 | { | 344 | { |
345 | struct scsi_disk *sdkp = NULL; | 345 | struct scsi_disk *sdkp = NULL; |
346 | 346 | ||
347 | if (disk->private_data) { | 347 | if (disk->private_data) { |
348 | sdkp = scsi_disk(disk); | 348 | sdkp = scsi_disk(disk); |
349 | if (scsi_device_get(sdkp->device) == 0) | 349 | if (scsi_device_get(sdkp->device) == 0) |
350 | get_device(&sdkp->dev); | 350 | get_device(&sdkp->dev); |
351 | else | 351 | else |
352 | sdkp = NULL; | 352 | sdkp = NULL; |
353 | } | 353 | } |
354 | return sdkp; | 354 | return sdkp; |
355 | } | 355 | } |
356 | 356 | ||
357 | static struct scsi_disk *scsi_disk_get(struct gendisk *disk) | 357 | static struct scsi_disk *scsi_disk_get(struct gendisk *disk) |
358 | { | 358 | { |
359 | struct scsi_disk *sdkp; | 359 | struct scsi_disk *sdkp; |
360 | 360 | ||
361 | mutex_lock(&sd_ref_mutex); | 361 | mutex_lock(&sd_ref_mutex); |
362 | sdkp = __scsi_disk_get(disk); | 362 | sdkp = __scsi_disk_get(disk); |
363 | mutex_unlock(&sd_ref_mutex); | 363 | mutex_unlock(&sd_ref_mutex); |
364 | return sdkp; | 364 | return sdkp; |
365 | } | 365 | } |
366 | 366 | ||
367 | static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev) | 367 | static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev) |
368 | { | 368 | { |
369 | struct scsi_disk *sdkp; | 369 | struct scsi_disk *sdkp; |
370 | 370 | ||
371 | mutex_lock(&sd_ref_mutex); | 371 | mutex_lock(&sd_ref_mutex); |
372 | sdkp = dev_get_drvdata(dev); | 372 | sdkp = dev_get_drvdata(dev); |
373 | if (sdkp) | 373 | if (sdkp) |
374 | sdkp = __scsi_disk_get(sdkp->disk); | 374 | sdkp = __scsi_disk_get(sdkp->disk); |
375 | mutex_unlock(&sd_ref_mutex); | 375 | mutex_unlock(&sd_ref_mutex); |
376 | return sdkp; | 376 | return sdkp; |
377 | } | 377 | } |
378 | 378 | ||
379 | static void scsi_disk_put(struct scsi_disk *sdkp) | 379 | static void scsi_disk_put(struct scsi_disk *sdkp) |
380 | { | 380 | { |
381 | struct scsi_device *sdev = sdkp->device; | 381 | struct scsi_device *sdev = sdkp->device; |
382 | 382 | ||
383 | mutex_lock(&sd_ref_mutex); | 383 | mutex_lock(&sd_ref_mutex); |
384 | put_device(&sdkp->dev); | 384 | put_device(&sdkp->dev); |
385 | scsi_device_put(sdev); | 385 | scsi_device_put(sdev); |
386 | mutex_unlock(&sd_ref_mutex); | 386 | mutex_unlock(&sd_ref_mutex); |
387 | } | 387 | } |
388 | 388 | ||
389 | static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif) | 389 | static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif) |
390 | { | 390 | { |
391 | unsigned int prot_op = SCSI_PROT_NORMAL; | 391 | unsigned int prot_op = SCSI_PROT_NORMAL; |
392 | unsigned int dix = scsi_prot_sg_count(scmd); | 392 | unsigned int dix = scsi_prot_sg_count(scmd); |
393 | 393 | ||
394 | if (scmd->sc_data_direction == DMA_FROM_DEVICE) { | 394 | if (scmd->sc_data_direction == DMA_FROM_DEVICE) { |
395 | if (dif && dix) | 395 | if (dif && dix) |
396 | prot_op = SCSI_PROT_READ_PASS; | 396 | prot_op = SCSI_PROT_READ_PASS; |
397 | else if (dif && !dix) | 397 | else if (dif && !dix) |
398 | prot_op = SCSI_PROT_READ_STRIP; | 398 | prot_op = SCSI_PROT_READ_STRIP; |
399 | else if (!dif && dix) | 399 | else if (!dif && dix) |
400 | prot_op = SCSI_PROT_READ_INSERT; | 400 | prot_op = SCSI_PROT_READ_INSERT; |
401 | } else { | 401 | } else { |
402 | if (dif && dix) | 402 | if (dif && dix) |
403 | prot_op = SCSI_PROT_WRITE_PASS; | 403 | prot_op = SCSI_PROT_WRITE_PASS; |
404 | else if (dif && !dix) | 404 | else if (dif && !dix) |
405 | prot_op = SCSI_PROT_WRITE_INSERT; | 405 | prot_op = SCSI_PROT_WRITE_INSERT; |
406 | else if (!dif && dix) | 406 | else if (!dif && dix) |
407 | prot_op = SCSI_PROT_WRITE_STRIP; | 407 | prot_op = SCSI_PROT_WRITE_STRIP; |
408 | } | 408 | } |
409 | 409 | ||
410 | scsi_set_prot_op(scmd, prot_op); | 410 | scsi_set_prot_op(scmd, prot_op); |
411 | scsi_set_prot_type(scmd, dif); | 411 | scsi_set_prot_type(scmd, dif); |
412 | } | 412 | } |
413 | 413 | ||
414 | /** | 414 | /** |
415 | * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device | 415 | * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device |
416 | * @sdp: scsi device to operate one | 416 | * @sdp: scsi device to operate one |
417 | * @rq: Request to prepare | 417 | * @rq: Request to prepare |
418 | * | 418 | * |
419 | * Will issue either UNMAP or WRITE SAME(16) depending on preference | 419 | * Will issue either UNMAP or WRITE SAME(16) depending on preference |
420 | * indicated by target device. | 420 | * indicated by target device. |
421 | **/ | 421 | **/ |
422 | static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) | 422 | static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) |
423 | { | 423 | { |
424 | struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); | 424 | struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); |
425 | struct bio *bio = rq->bio; | 425 | struct bio *bio = rq->bio; |
426 | sector_t sector = bio->bi_sector; | 426 | sector_t sector = bio->bi_sector; |
427 | unsigned int nr_sectors = bio_sectors(bio); | 427 | unsigned int nr_sectors = bio_sectors(bio); |
428 | unsigned int len; | 428 | unsigned int len; |
429 | int ret; | 429 | int ret; |
430 | struct page *page; | 430 | struct page *page; |
431 | 431 | ||
432 | if (sdkp->device->sector_size == 4096) { | 432 | if (sdkp->device->sector_size == 4096) { |
433 | sector >>= 3; | 433 | sector >>= 3; |
434 | nr_sectors >>= 3; | 434 | nr_sectors >>= 3; |
435 | } | 435 | } |
436 | 436 | ||
437 | rq->timeout = SD_TIMEOUT; | 437 | rq->timeout = SD_TIMEOUT; |
438 | 438 | ||
439 | memset(rq->cmd, 0, rq->cmd_len); | 439 | memset(rq->cmd, 0, rq->cmd_len); |
440 | 440 | ||
441 | page = alloc_page(GFP_ATOMIC | __GFP_ZERO); | 441 | page = alloc_page(GFP_ATOMIC | __GFP_ZERO); |
442 | if (!page) | 442 | if (!page) |
443 | return BLKPREP_DEFER; | 443 | return BLKPREP_DEFER; |
444 | 444 | ||
445 | if (sdkp->unmap) { | 445 | if (sdkp->unmap) { |
446 | char *buf = page_address(page); | 446 | char *buf = page_address(page); |
447 | 447 | ||
448 | rq->cmd_len = 10; | 448 | rq->cmd_len = 10; |
449 | rq->cmd[0] = UNMAP; | 449 | rq->cmd[0] = UNMAP; |
450 | rq->cmd[8] = 24; | 450 | rq->cmd[8] = 24; |
451 | 451 | ||
452 | put_unaligned_be16(6 + 16, &buf[0]); | 452 | put_unaligned_be16(6 + 16, &buf[0]); |
453 | put_unaligned_be16(16, &buf[2]); | 453 | put_unaligned_be16(16, &buf[2]); |
454 | put_unaligned_be64(sector, &buf[8]); | 454 | put_unaligned_be64(sector, &buf[8]); |
455 | put_unaligned_be32(nr_sectors, &buf[16]); | 455 | put_unaligned_be32(nr_sectors, &buf[16]); |
456 | 456 | ||
457 | len = 24; | 457 | len = 24; |
458 | } else { | 458 | } else { |
459 | rq->cmd_len = 16; | 459 | rq->cmd_len = 16; |
460 | rq->cmd[0] = WRITE_SAME_16; | 460 | rq->cmd[0] = WRITE_SAME_16; |
461 | rq->cmd[1] = 0x8; /* UNMAP */ | 461 | rq->cmd[1] = 0x8; /* UNMAP */ |
462 | put_unaligned_be64(sector, &rq->cmd[2]); | 462 | put_unaligned_be64(sector, &rq->cmd[2]); |
463 | put_unaligned_be32(nr_sectors, &rq->cmd[10]); | 463 | put_unaligned_be32(nr_sectors, &rq->cmd[10]); |
464 | 464 | ||
465 | len = sdkp->device->sector_size; | 465 | len = sdkp->device->sector_size; |
466 | } | 466 | } |
467 | 467 | ||
468 | blk_add_request_payload(rq, page, len); | 468 | blk_add_request_payload(rq, page, len); |
469 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); | 469 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); |
470 | rq->buffer = page_address(page); | 470 | rq->buffer = page_address(page); |
471 | if (ret != BLKPREP_OK) { | 471 | if (ret != BLKPREP_OK) { |
472 | __free_page(page); | 472 | __free_page(page); |
473 | rq->buffer = NULL; | 473 | rq->buffer = NULL; |
474 | } | 474 | } |
475 | return ret; | 475 | return ret; |
476 | } | 476 | } |
477 | 477 | ||
478 | static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) | 478 | static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) |
479 | { | 479 | { |
480 | rq->timeout = SD_TIMEOUT; | 480 | rq->timeout = SD_TIMEOUT; |
481 | rq->retries = SD_MAX_RETRIES; | 481 | rq->retries = SD_MAX_RETRIES; |
482 | rq->cmd[0] = SYNCHRONIZE_CACHE; | 482 | rq->cmd[0] = SYNCHRONIZE_CACHE; |
483 | rq->cmd_len = 10; | 483 | rq->cmd_len = 10; |
484 | 484 | ||
485 | return scsi_setup_blk_pc_cmnd(sdp, rq); | 485 | return scsi_setup_blk_pc_cmnd(sdp, rq); |
486 | } | 486 | } |
487 | 487 | ||
488 | static void sd_unprep_fn(struct request_queue *q, struct request *rq) | 488 | static void sd_unprep_fn(struct request_queue *q, struct request *rq) |
489 | { | 489 | { |
490 | if (rq->cmd_flags & REQ_DISCARD) { | 490 | if (rq->cmd_flags & REQ_DISCARD) { |
491 | free_page((unsigned long)rq->buffer); | 491 | free_page((unsigned long)rq->buffer); |
492 | rq->buffer = NULL; | 492 | rq->buffer = NULL; |
493 | } | 493 | } |
494 | } | 494 | } |
495 | 495 | ||
496 | /** | 496 | /** |
497 | * sd_init_command - build a scsi (read or write) command from | 497 | * sd_init_command - build a scsi (read or write) command from |
498 | * information in the request structure. | 498 | * information in the request structure. |
499 | * @SCpnt: pointer to mid-level's per scsi command structure that | 499 | * @SCpnt: pointer to mid-level's per scsi command structure that |
500 | * contains request and into which the scsi command is written | 500 | * contains request and into which the scsi command is written |
501 | * | 501 | * |
502 | * Returns 1 if successful and 0 if error (or cannot be done now). | 502 | * Returns 1 if successful and 0 if error (or cannot be done now). |
503 | **/ | 503 | **/ |
504 | static int sd_prep_fn(struct request_queue *q, struct request *rq) | 504 | static int sd_prep_fn(struct request_queue *q, struct request *rq) |
505 | { | 505 | { |
506 | struct scsi_cmnd *SCpnt; | 506 | struct scsi_cmnd *SCpnt; |
507 | struct scsi_device *sdp = q->queuedata; | 507 | struct scsi_device *sdp = q->queuedata; |
508 | struct gendisk *disk = rq->rq_disk; | 508 | struct gendisk *disk = rq->rq_disk; |
509 | struct scsi_disk *sdkp; | 509 | struct scsi_disk *sdkp; |
510 | sector_t block = blk_rq_pos(rq); | 510 | sector_t block = blk_rq_pos(rq); |
511 | sector_t threshold; | 511 | sector_t threshold; |
512 | unsigned int this_count = blk_rq_sectors(rq); | 512 | unsigned int this_count = blk_rq_sectors(rq); |
513 | int ret, host_dif; | 513 | int ret, host_dif; |
514 | unsigned char protect; | 514 | unsigned char protect; |
515 | 515 | ||
516 | /* | 516 | /* |
517 | * Discard request come in as REQ_TYPE_FS but we turn them into | 517 | * Discard request come in as REQ_TYPE_FS but we turn them into |
518 | * block PC requests to make life easier. | 518 | * block PC requests to make life easier. |
519 | */ | 519 | */ |
520 | if (rq->cmd_flags & REQ_DISCARD) { | 520 | if (rq->cmd_flags & REQ_DISCARD) { |
521 | ret = scsi_setup_discard_cmnd(sdp, rq); | 521 | ret = scsi_setup_discard_cmnd(sdp, rq); |
522 | goto out; | 522 | goto out; |
523 | } else if (rq->cmd_flags & REQ_FLUSH) { | 523 | } else if (rq->cmd_flags & REQ_FLUSH) { |
524 | ret = scsi_setup_flush_cmnd(sdp, rq); | 524 | ret = scsi_setup_flush_cmnd(sdp, rq); |
525 | goto out; | 525 | goto out; |
526 | } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 526 | } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
527 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); | 527 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); |
528 | goto out; | 528 | goto out; |
529 | } else if (rq->cmd_type != REQ_TYPE_FS) { | 529 | } else if (rq->cmd_type != REQ_TYPE_FS) { |
530 | ret = BLKPREP_KILL; | 530 | ret = BLKPREP_KILL; |
531 | goto out; | 531 | goto out; |
532 | } | 532 | } |
533 | ret = scsi_setup_fs_cmnd(sdp, rq); | 533 | ret = scsi_setup_fs_cmnd(sdp, rq); |
534 | if (ret != BLKPREP_OK) | 534 | if (ret != BLKPREP_OK) |
535 | goto out; | 535 | goto out; |
536 | SCpnt = rq->special; | 536 | SCpnt = rq->special; |
537 | sdkp = scsi_disk(disk); | 537 | sdkp = scsi_disk(disk); |
538 | 538 | ||
539 | /* from here on until we're complete, any goto out | 539 | /* from here on until we're complete, any goto out |
540 | * is used for a killable error condition */ | 540 | * is used for a killable error condition */ |
541 | ret = BLKPREP_KILL; | 541 | ret = BLKPREP_KILL; |
542 | 542 | ||
543 | SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, | 543 | SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, |
544 | "sd_init_command: block=%llu, " | 544 | "sd_init_command: block=%llu, " |
545 | "count=%d\n", | 545 | "count=%d\n", |
546 | (unsigned long long)block, | 546 | (unsigned long long)block, |
547 | this_count)); | 547 | this_count)); |
548 | 548 | ||
549 | if (!sdp || !scsi_device_online(sdp) || | 549 | if (!sdp || !scsi_device_online(sdp) || |
550 | block + blk_rq_sectors(rq) > get_capacity(disk)) { | 550 | block + blk_rq_sectors(rq) > get_capacity(disk)) { |
551 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, | 551 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, |
552 | "Finishing %u sectors\n", | 552 | "Finishing %u sectors\n", |
553 | blk_rq_sectors(rq))); | 553 | blk_rq_sectors(rq))); |
554 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, | 554 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, |
555 | "Retry with 0x%p\n", SCpnt)); | 555 | "Retry with 0x%p\n", SCpnt)); |
556 | goto out; | 556 | goto out; |
557 | } | 557 | } |
558 | 558 | ||
559 | if (sdp->changed) { | 559 | if (sdp->changed) { |
560 | /* | 560 | /* |
561 | * quietly refuse to do anything to a changed disc until | 561 | * quietly refuse to do anything to a changed disc until |
562 | * the changed bit has been reset | 562 | * the changed bit has been reset |
563 | */ | 563 | */ |
564 | /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */ | 564 | /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */ |
565 | goto out; | 565 | goto out; |
566 | } | 566 | } |
567 | 567 | ||
568 | /* | 568 | /* |
569 | * Some SD card readers can't handle multi-sector accesses which touch | 569 | * Some SD card readers can't handle multi-sector accesses which touch |
570 | * the last one or two hardware sectors. Split accesses as needed. | 570 | * the last one or two hardware sectors. Split accesses as needed. |
571 | */ | 571 | */ |
572 | threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS * | 572 | threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS * |
573 | (sdp->sector_size / 512); | 573 | (sdp->sector_size / 512); |
574 | 574 | ||
575 | if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) { | 575 | if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) { |
576 | if (block < threshold) { | 576 | if (block < threshold) { |
577 | /* Access up to the threshold but not beyond */ | 577 | /* Access up to the threshold but not beyond */ |
578 | this_count = threshold - block; | 578 | this_count = threshold - block; |
579 | } else { | 579 | } else { |
580 | /* Access only a single hardware sector */ | 580 | /* Access only a single hardware sector */ |
581 | this_count = sdp->sector_size / 512; | 581 | this_count = sdp->sector_size / 512; |
582 | } | 582 | } |
583 | } | 583 | } |
584 | 584 | ||
585 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n", | 585 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n", |
586 | (unsigned long long)block)); | 586 | (unsigned long long)block)); |
587 | 587 | ||
588 | /* | 588 | /* |
589 | * If we have a 1K hardware sectorsize, prevent access to single | 589 | * If we have a 1K hardware sectorsize, prevent access to single |
590 | * 512 byte sectors. In theory we could handle this - in fact | 590 | * 512 byte sectors. In theory we could handle this - in fact |
591 | * the scsi cdrom driver must be able to handle this because | 591 | * the scsi cdrom driver must be able to handle this because |
592 | * we typically use 1K blocksizes, and cdroms typically have | 592 | * we typically use 1K blocksizes, and cdroms typically have |
593 | * 2K hardware sectorsizes. Of course, things are simpler | 593 | * 2K hardware sectorsizes. Of course, things are simpler |
594 | * with the cdrom, since it is read-only. For performance | 594 | * with the cdrom, since it is read-only. For performance |
595 | * reasons, the filesystems should be able to handle this | 595 | * reasons, the filesystems should be able to handle this |
596 | * and not force the scsi disk driver to use bounce buffers | 596 | * and not force the scsi disk driver to use bounce buffers |
597 | * for this. | 597 | * for this. |
598 | */ | 598 | */ |
599 | if (sdp->sector_size == 1024) { | 599 | if (sdp->sector_size == 1024) { |
600 | if ((block & 1) || (blk_rq_sectors(rq) & 1)) { | 600 | if ((block & 1) || (blk_rq_sectors(rq) & 1)) { |
601 | scmd_printk(KERN_ERR, SCpnt, | 601 | scmd_printk(KERN_ERR, SCpnt, |
602 | "Bad block number requested\n"); | 602 | "Bad block number requested\n"); |
603 | goto out; | 603 | goto out; |
604 | } else { | 604 | } else { |
605 | block = block >> 1; | 605 | block = block >> 1; |
606 | this_count = this_count >> 1; | 606 | this_count = this_count >> 1; |
607 | } | 607 | } |
608 | } | 608 | } |
609 | if (sdp->sector_size == 2048) { | 609 | if (sdp->sector_size == 2048) { |
610 | if ((block & 3) || (blk_rq_sectors(rq) & 3)) { | 610 | if ((block & 3) || (blk_rq_sectors(rq) & 3)) { |
611 | scmd_printk(KERN_ERR, SCpnt, | 611 | scmd_printk(KERN_ERR, SCpnt, |
612 | "Bad block number requested\n"); | 612 | "Bad block number requested\n"); |
613 | goto out; | 613 | goto out; |
614 | } else { | 614 | } else { |
615 | block = block >> 2; | 615 | block = block >> 2; |
616 | this_count = this_count >> 2; | 616 | this_count = this_count >> 2; |
617 | } | 617 | } |
618 | } | 618 | } |
619 | if (sdp->sector_size == 4096) { | 619 | if (sdp->sector_size == 4096) { |
620 | if ((block & 7) || (blk_rq_sectors(rq) & 7)) { | 620 | if ((block & 7) || (blk_rq_sectors(rq) & 7)) { |
621 | scmd_printk(KERN_ERR, SCpnt, | 621 | scmd_printk(KERN_ERR, SCpnt, |
622 | "Bad block number requested\n"); | 622 | "Bad block number requested\n"); |
623 | goto out; | 623 | goto out; |
624 | } else { | 624 | } else { |
625 | block = block >> 3; | 625 | block = block >> 3; |
626 | this_count = this_count >> 3; | 626 | this_count = this_count >> 3; |
627 | } | 627 | } |
628 | } | 628 | } |
629 | if (rq_data_dir(rq) == WRITE) { | 629 | if (rq_data_dir(rq) == WRITE) { |
630 | if (!sdp->writeable) { | 630 | if (!sdp->writeable) { |
631 | goto out; | 631 | goto out; |
632 | } | 632 | } |
633 | SCpnt->cmnd[0] = WRITE_6; | 633 | SCpnt->cmnd[0] = WRITE_6; |
634 | SCpnt->sc_data_direction = DMA_TO_DEVICE; | 634 | SCpnt->sc_data_direction = DMA_TO_DEVICE; |
635 | 635 | ||
636 | if (blk_integrity_rq(rq) && | 636 | if (blk_integrity_rq(rq) && |
637 | sd_dif_prepare(rq, block, sdp->sector_size) == -EIO) | 637 | sd_dif_prepare(rq, block, sdp->sector_size) == -EIO) |
638 | goto out; | 638 | goto out; |
639 | 639 | ||
640 | } else if (rq_data_dir(rq) == READ) { | 640 | } else if (rq_data_dir(rq) == READ) { |
641 | SCpnt->cmnd[0] = READ_6; | 641 | SCpnt->cmnd[0] = READ_6; |
642 | SCpnt->sc_data_direction = DMA_FROM_DEVICE; | 642 | SCpnt->sc_data_direction = DMA_FROM_DEVICE; |
643 | } else { | 643 | } else { |
644 | scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags); | 644 | scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags); |
645 | goto out; | 645 | goto out; |
646 | } | 646 | } |
647 | 647 | ||
648 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, | 648 | SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, |
649 | "%s %d/%u 512 byte blocks.\n", | 649 | "%s %d/%u 512 byte blocks.\n", |
650 | (rq_data_dir(rq) == WRITE) ? | 650 | (rq_data_dir(rq) == WRITE) ? |
651 | "writing" : "reading", this_count, | 651 | "writing" : "reading", this_count, |
652 | blk_rq_sectors(rq))); | 652 | blk_rq_sectors(rq))); |
653 | 653 | ||
654 | /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ | 654 | /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ |
655 | host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); | 655 | host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); |
656 | if (host_dif) | 656 | if (host_dif) |
657 | protect = 1 << 5; | 657 | protect = 1 << 5; |
658 | else | 658 | else |
659 | protect = 0; | 659 | protect = 0; |
660 | 660 | ||
661 | if (host_dif == SD_DIF_TYPE2_PROTECTION) { | 661 | if (host_dif == SD_DIF_TYPE2_PROTECTION) { |
662 | SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); | 662 | SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); |
663 | 663 | ||
664 | if (unlikely(SCpnt->cmnd == NULL)) { | 664 | if (unlikely(SCpnt->cmnd == NULL)) { |
665 | ret = BLKPREP_DEFER; | 665 | ret = BLKPREP_DEFER; |
666 | goto out; | 666 | goto out; |
667 | } | 667 | } |
668 | 668 | ||
669 | SCpnt->cmd_len = SD_EXT_CDB_SIZE; | 669 | SCpnt->cmd_len = SD_EXT_CDB_SIZE; |
670 | memset(SCpnt->cmnd, 0, SCpnt->cmd_len); | 670 | memset(SCpnt->cmnd, 0, SCpnt->cmd_len); |
671 | SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD; | 671 | SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD; |
672 | SCpnt->cmnd[7] = 0x18; | 672 | SCpnt->cmnd[7] = 0x18; |
673 | SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32; | 673 | SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32; |
674 | SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); | 674 | SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); |
675 | 675 | ||
676 | /* LBA */ | 676 | /* LBA */ |
677 | SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; | 677 | SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; |
678 | SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; | 678 | SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; |
679 | SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; | 679 | SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; |
680 | SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; | 680 | SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; |
681 | SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff; | 681 | SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff; |
682 | SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff; | 682 | SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff; |
683 | SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff; | 683 | SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff; |
684 | SCpnt->cmnd[19] = (unsigned char) block & 0xff; | 684 | SCpnt->cmnd[19] = (unsigned char) block & 0xff; |
685 | 685 | ||
686 | /* Expected Indirect LBA */ | 686 | /* Expected Indirect LBA */ |
687 | SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff; | 687 | SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff; |
688 | SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff; | 688 | SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff; |
689 | SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff; | 689 | SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff; |
690 | SCpnt->cmnd[23] = (unsigned char) block & 0xff; | 690 | SCpnt->cmnd[23] = (unsigned char) block & 0xff; |
691 | 691 | ||
692 | /* Transfer length */ | 692 | /* Transfer length */ |
693 | SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff; | 693 | SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff; |
694 | SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff; | 694 | SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff; |
695 | SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff; | 695 | SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff; |
696 | SCpnt->cmnd[31] = (unsigned char) this_count & 0xff; | 696 | SCpnt->cmnd[31] = (unsigned char) this_count & 0xff; |
697 | } else if (block > 0xffffffff) { | 697 | } else if (block > 0xffffffff) { |
698 | SCpnt->cmnd[0] += READ_16 - READ_6; | 698 | SCpnt->cmnd[0] += READ_16 - READ_6; |
699 | SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); | 699 | SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); |
700 | SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; | 700 | SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; |
701 | SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; | 701 | SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; |
702 | SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; | 702 | SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; |
703 | SCpnt->cmnd[5] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; | 703 | SCpnt->cmnd[5] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0; |
704 | SCpnt->cmnd[6] = (unsigned char) (block >> 24) & 0xff; | 704 | SCpnt->cmnd[6] = (unsigned char) (block >> 24) & 0xff; |
705 | SCpnt->cmnd[7] = (unsigned char) (block >> 16) & 0xff; | 705 | SCpnt->cmnd[7] = (unsigned char) (block >> 16) & 0xff; |
706 | SCpnt->cmnd[8] = (unsigned char) (block >> 8) & 0xff; | 706 | SCpnt->cmnd[8] = (unsigned char) (block >> 8) & 0xff; |
707 | SCpnt->cmnd[9] = (unsigned char) block & 0xff; | 707 | SCpnt->cmnd[9] = (unsigned char) block & 0xff; |
708 | SCpnt->cmnd[10] = (unsigned char) (this_count >> 24) & 0xff; | 708 | SCpnt->cmnd[10] = (unsigned char) (this_count >> 24) & 0xff; |
709 | SCpnt->cmnd[11] = (unsigned char) (this_count >> 16) & 0xff; | 709 | SCpnt->cmnd[11] = (unsigned char) (this_count >> 16) & 0xff; |
710 | SCpnt->cmnd[12] = (unsigned char) (this_count >> 8) & 0xff; | 710 | SCpnt->cmnd[12] = (unsigned char) (this_count >> 8) & 0xff; |
711 | SCpnt->cmnd[13] = (unsigned char) this_count & 0xff; | 711 | SCpnt->cmnd[13] = (unsigned char) this_count & 0xff; |
712 | SCpnt->cmnd[14] = SCpnt->cmnd[15] = 0; | 712 | SCpnt->cmnd[14] = SCpnt->cmnd[15] = 0; |
713 | } else if ((this_count > 0xff) || (block > 0x1fffff) || | 713 | } else if ((this_count > 0xff) || (block > 0x1fffff) || |
714 | scsi_device_protection(SCpnt->device) || | 714 | scsi_device_protection(SCpnt->device) || |
715 | SCpnt->device->use_10_for_rw) { | 715 | SCpnt->device->use_10_for_rw) { |
716 | if (this_count > 0xffff) | 716 | if (this_count > 0xffff) |
717 | this_count = 0xffff; | 717 | this_count = 0xffff; |
718 | 718 | ||
719 | SCpnt->cmnd[0] += READ_10 - READ_6; | 719 | SCpnt->cmnd[0] += READ_10 - READ_6; |
720 | SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); | 720 | SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0); |
721 | SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; | 721 | SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; |
722 | SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; | 722 | SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; |
723 | SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; | 723 | SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; |
724 | SCpnt->cmnd[5] = (unsigned char) block & 0xff; | 724 | SCpnt->cmnd[5] = (unsigned char) block & 0xff; |
725 | SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0; | 725 | SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0; |
726 | SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; | 726 | SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; |
727 | SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; | 727 | SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; |
728 | } else { | 728 | } else { |
729 | if (unlikely(rq->cmd_flags & REQ_FUA)) { | 729 | if (unlikely(rq->cmd_flags & REQ_FUA)) { |
730 | /* | 730 | /* |
731 | * This happens only if this drive failed | 731 | * This happens only if this drive failed |
732 | * 10byte rw command with ILLEGAL_REQUEST | 732 | * 10byte rw command with ILLEGAL_REQUEST |
733 | * during operation and thus turned off | 733 | * during operation and thus turned off |
734 | * use_10_for_rw. | 734 | * use_10_for_rw. |
735 | */ | 735 | */ |
736 | scmd_printk(KERN_ERR, SCpnt, | 736 | scmd_printk(KERN_ERR, SCpnt, |
737 | "FUA write on READ/WRITE(6) drive\n"); | 737 | "FUA write on READ/WRITE(6) drive\n"); |
738 | goto out; | 738 | goto out; |
739 | } | 739 | } |
740 | 740 | ||
741 | SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); | 741 | SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); |
742 | SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff); | 742 | SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff); |
743 | SCpnt->cmnd[3] = (unsigned char) block & 0xff; | 743 | SCpnt->cmnd[3] = (unsigned char) block & 0xff; |
744 | SCpnt->cmnd[4] = (unsigned char) this_count; | 744 | SCpnt->cmnd[4] = (unsigned char) this_count; |
745 | SCpnt->cmnd[5] = 0; | 745 | SCpnt->cmnd[5] = 0; |
746 | } | 746 | } |
747 | SCpnt->sdb.length = this_count * sdp->sector_size; | 747 | SCpnt->sdb.length = this_count * sdp->sector_size; |
748 | 748 | ||
749 | /* If DIF or DIX is enabled, tell HBA how to handle request */ | 749 | /* If DIF or DIX is enabled, tell HBA how to handle request */ |
750 | if (host_dif || scsi_prot_sg_count(SCpnt)) | 750 | if (host_dif || scsi_prot_sg_count(SCpnt)) |
751 | sd_prot_op(SCpnt, host_dif); | 751 | sd_prot_op(SCpnt, host_dif); |
752 | 752 | ||
753 | /* | 753 | /* |
754 | * We shouldn't disconnect in the middle of a sector, so with a dumb | 754 | * We shouldn't disconnect in the middle of a sector, so with a dumb |
755 | * host adapter, it's safe to assume that we can at least transfer | 755 | * host adapter, it's safe to assume that we can at least transfer |
756 | * this many bytes between each connect / disconnect. | 756 | * this many bytes between each connect / disconnect. |
757 | */ | 757 | */ |
758 | SCpnt->transfersize = sdp->sector_size; | 758 | SCpnt->transfersize = sdp->sector_size; |
759 | SCpnt->underflow = this_count << 9; | 759 | SCpnt->underflow = this_count << 9; |
760 | SCpnt->allowed = SD_MAX_RETRIES; | 760 | SCpnt->allowed = SD_MAX_RETRIES; |
761 | 761 | ||
762 | /* | 762 | /* |
763 | * This indicates that the command is ready from our end to be | 763 | * This indicates that the command is ready from our end to be |
764 | * queued. | 764 | * queued. |
765 | */ | 765 | */ |
766 | ret = BLKPREP_OK; | 766 | ret = BLKPREP_OK; |
767 | out: | 767 | out: |
768 | return scsi_prep_return(q, rq, ret); | 768 | return scsi_prep_return(q, rq, ret); |
769 | } | 769 | } |
770 | 770 | ||
771 | /** | 771 | /** |
772 | * sd_open - open a scsi disk device | 772 | * sd_open - open a scsi disk device |
773 | * @inode: only i_rdev member may be used | 773 | * @inode: only i_rdev member may be used |
774 | * @filp: only f_mode and f_flags may be used | 774 | * @filp: only f_mode and f_flags may be used |
775 | * | 775 | * |
776 | * Returns 0 if successful. Returns a negated errno value in case | 776 | * Returns 0 if successful. Returns a negated errno value in case |
777 | * of error. | 777 | * of error. |
778 | * | 778 | * |
779 | * Note: This can be called from a user context (e.g. fsck(1) ) | 779 | * Note: This can be called from a user context (e.g. fsck(1) ) |
780 | * or from within the kernel (e.g. as a result of a mount(1) ). | 780 | * or from within the kernel (e.g. as a result of a mount(1) ). |
781 | * In the latter case @inode and @filp carry an abridged amount | 781 | * In the latter case @inode and @filp carry an abridged amount |
782 | * of information as noted above. | 782 | * of information as noted above. |
783 | * | 783 | * |
784 | * Locking: called with bdev->bd_mutex held. | 784 | * Locking: called with bdev->bd_mutex held. |
785 | **/ | 785 | **/ |
786 | static int sd_open(struct block_device *bdev, fmode_t mode) | 786 | static int sd_open(struct block_device *bdev, fmode_t mode) |
787 | { | 787 | { |
788 | struct scsi_disk *sdkp = scsi_disk_get(bdev->bd_disk); | 788 | struct scsi_disk *sdkp = scsi_disk_get(bdev->bd_disk); |
789 | struct scsi_device *sdev; | 789 | struct scsi_device *sdev; |
790 | int retval; | 790 | int retval; |
791 | 791 | ||
792 | if (!sdkp) | 792 | if (!sdkp) |
793 | return -ENXIO; | 793 | return -ENXIO; |
794 | 794 | ||
795 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); | 795 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); |
796 | 796 | ||
797 | sdev = sdkp->device; | 797 | sdev = sdkp->device; |
798 | 798 | ||
799 | retval = scsi_autopm_get_device(sdev); | 799 | retval = scsi_autopm_get_device(sdev); |
800 | if (retval) | 800 | if (retval) |
801 | goto error_autopm; | 801 | goto error_autopm; |
802 | 802 | ||
803 | /* | 803 | /* |
804 | * If the device is in error recovery, wait until it is done. | 804 | * If the device is in error recovery, wait until it is done. |
805 | * If the device is offline, then disallow any access to it. | 805 | * If the device is offline, then disallow any access to it. |
806 | */ | 806 | */ |
807 | retval = -ENXIO; | 807 | retval = -ENXIO; |
808 | if (!scsi_block_when_processing_errors(sdev)) | 808 | if (!scsi_block_when_processing_errors(sdev)) |
809 | goto error_out; | 809 | goto error_out; |
810 | 810 | ||
811 | if (sdev->removable || sdkp->write_prot) | 811 | if (sdev->removable || sdkp->write_prot) |
812 | check_disk_change(bdev); | 812 | check_disk_change(bdev); |
813 | 813 | ||
814 | /* | 814 | /* |
815 | * If the drive is empty, just let the open fail. | 815 | * If the drive is empty, just let the open fail. |
816 | */ | 816 | */ |
817 | retval = -ENOMEDIUM; | 817 | retval = -ENOMEDIUM; |
818 | if (sdev->removable && !sdkp->media_present && !(mode & FMODE_NDELAY)) | 818 | if (sdev->removable && !sdkp->media_present && !(mode & FMODE_NDELAY)) |
819 | goto error_out; | 819 | goto error_out; |
820 | 820 | ||
821 | /* | 821 | /* |
822 | * If the device has the write protect tab set, have the open fail | 822 | * If the device has the write protect tab set, have the open fail |
823 | * if the user expects to be able to write to the thing. | 823 | * if the user expects to be able to write to the thing. |
824 | */ | 824 | */ |
825 | retval = -EROFS; | 825 | retval = -EROFS; |
826 | if (sdkp->write_prot && (mode & FMODE_WRITE)) | 826 | if (sdkp->write_prot && (mode & FMODE_WRITE)) |
827 | goto error_out; | 827 | goto error_out; |
828 | 828 | ||
829 | /* | 829 | /* |
830 | * It is possible that the disk changing stuff resulted in | 830 | * It is possible that the disk changing stuff resulted in |
831 | * the device being taken offline. If this is the case, | 831 | * the device being taken offline. If this is the case, |
832 | * report this to the user, and don't pretend that the | 832 | * report this to the user, and don't pretend that the |
833 | * open actually succeeded. | 833 | * open actually succeeded. |
834 | */ | 834 | */ |
835 | retval = -ENXIO; | 835 | retval = -ENXIO; |
836 | if (!scsi_device_online(sdev)) | 836 | if (!scsi_device_online(sdev)) |
837 | goto error_out; | 837 | goto error_out; |
838 | 838 | ||
839 | if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) { | 839 | if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) { |
840 | if (scsi_block_when_processing_errors(sdev)) | 840 | if (scsi_block_when_processing_errors(sdev)) |
841 | scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); | 841 | scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); |
842 | } | 842 | } |
843 | 843 | ||
844 | return 0; | 844 | return 0; |
845 | 845 | ||
846 | error_out: | 846 | error_out: |
847 | scsi_autopm_put_device(sdev); | 847 | scsi_autopm_put_device(sdev); |
848 | error_autopm: | 848 | error_autopm: |
849 | scsi_disk_put(sdkp); | 849 | scsi_disk_put(sdkp); |
850 | return retval; | 850 | return retval; |
851 | } | 851 | } |
852 | 852 | ||
853 | /** | 853 | /** |
854 | * sd_release - invoked when the (last) close(2) is called on this | 854 | * sd_release - invoked when the (last) close(2) is called on this |
855 | * scsi disk. | 855 | * scsi disk. |
856 | * @inode: only i_rdev member may be used | 856 | * @inode: only i_rdev member may be used |
857 | * @filp: only f_mode and f_flags may be used | 857 | * @filp: only f_mode and f_flags may be used |
858 | * | 858 | * |
859 | * Returns 0. | 859 | * Returns 0. |
860 | * | 860 | * |
861 | * Note: may block (uninterruptible) if error recovery is underway | 861 | * Note: may block (uninterruptible) if error recovery is underway |
862 | * on this disk. | 862 | * on this disk. |
863 | * | 863 | * |
864 | * Locking: called with bdev->bd_mutex held. | 864 | * Locking: called with bdev->bd_mutex held. |
865 | **/ | 865 | **/ |
866 | static int sd_release(struct gendisk *disk, fmode_t mode) | 866 | static int sd_release(struct gendisk *disk, fmode_t mode) |
867 | { | 867 | { |
868 | struct scsi_disk *sdkp = scsi_disk(disk); | 868 | struct scsi_disk *sdkp = scsi_disk(disk); |
869 | struct scsi_device *sdev = sdkp->device; | 869 | struct scsi_device *sdev = sdkp->device; |
870 | 870 | ||
871 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); | 871 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); |
872 | 872 | ||
873 | if (atomic_dec_return(&sdkp->openers) && sdev->removable) { | 873 | if (atomic_dec_return(&sdkp->openers) && sdev->removable) { |
874 | if (scsi_block_when_processing_errors(sdev)) | 874 | if (scsi_block_when_processing_errors(sdev)) |
875 | scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); | 875 | scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); |
876 | } | 876 | } |
877 | 877 | ||
878 | /* | 878 | /* |
879 | * XXX and what if there are packets in flight and this close() | 879 | * XXX and what if there are packets in flight and this close() |
880 | * XXX is followed by a "rmmod sd_mod"? | 880 | * XXX is followed by a "rmmod sd_mod"? |
881 | */ | 881 | */ |
882 | 882 | ||
883 | scsi_autopm_put_device(sdev); | 883 | scsi_autopm_put_device(sdev); |
884 | scsi_disk_put(sdkp); | 884 | scsi_disk_put(sdkp); |
885 | return 0; | 885 | return 0; |
886 | } | 886 | } |
887 | 887 | ||
888 | static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) | 888 | static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
889 | { | 889 | { |
890 | struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); | 890 | struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); |
891 | struct scsi_device *sdp = sdkp->device; | 891 | struct scsi_device *sdp = sdkp->device; |
892 | struct Scsi_Host *host = sdp->host; | 892 | struct Scsi_Host *host = sdp->host; |
893 | int diskinfo[4]; | 893 | int diskinfo[4]; |
894 | 894 | ||
895 | /* default to most commonly used values */ | 895 | /* default to most commonly used values */ |
896 | diskinfo[0] = 0x40; /* 1 << 6 */ | 896 | diskinfo[0] = 0x40; /* 1 << 6 */ |
897 | diskinfo[1] = 0x20; /* 1 << 5 */ | 897 | diskinfo[1] = 0x20; /* 1 << 5 */ |
898 | diskinfo[2] = sdkp->capacity >> 11; | 898 | diskinfo[2] = sdkp->capacity >> 11; |
899 | 899 | ||
900 | /* override with calculated, extended default, or driver values */ | 900 | /* override with calculated, extended default, or driver values */ |
901 | if (host->hostt->bios_param) | 901 | if (host->hostt->bios_param) |
902 | host->hostt->bios_param(sdp, bdev, sdkp->capacity, diskinfo); | 902 | host->hostt->bios_param(sdp, bdev, sdkp->capacity, diskinfo); |
903 | else | 903 | else |
904 | scsicam_bios_param(bdev, sdkp->capacity, diskinfo); | 904 | scsicam_bios_param(bdev, sdkp->capacity, diskinfo); |
905 | 905 | ||
906 | geo->heads = diskinfo[0]; | 906 | geo->heads = diskinfo[0]; |
907 | geo->sectors = diskinfo[1]; | 907 | geo->sectors = diskinfo[1]; |
908 | geo->cylinders = diskinfo[2]; | 908 | geo->cylinders = diskinfo[2]; |
909 | return 0; | 909 | return 0; |
910 | } | 910 | } |
911 | 911 | ||
912 | /** | 912 | /** |
913 | * sd_ioctl - process an ioctl | 913 | * sd_ioctl - process an ioctl |
914 | * @inode: only i_rdev/i_bdev members may be used | 914 | * @inode: only i_rdev/i_bdev members may be used |
915 | * @filp: only f_mode and f_flags may be used | 915 | * @filp: only f_mode and f_flags may be used |
916 | * @cmd: ioctl command number | 916 | * @cmd: ioctl command number |
917 | * @arg: this is third argument given to ioctl(2) system call. | 917 | * @arg: this is third argument given to ioctl(2) system call. |
918 | * Often contains a pointer. | 918 | * Often contains a pointer. |
919 | * | 919 | * |
920 | * Returns 0 if successful (some ioctls return postive numbers on | 920 | * Returns 0 if successful (some ioctls return postive numbers on |
921 | * success as well). Returns a negated errno value in case of error. | 921 | * success as well). Returns a negated errno value in case of error. |
922 | * | 922 | * |
923 | * Note: most ioctls are forward onto the block subsystem or further | 923 | * Note: most ioctls are forward onto the block subsystem or further |
924 | * down in the scsi subsystem. | 924 | * down in the scsi subsystem. |
925 | **/ | 925 | **/ |
926 | static int sd_ioctl(struct block_device *bdev, fmode_t mode, | 926 | static int sd_ioctl(struct block_device *bdev, fmode_t mode, |
927 | unsigned int cmd, unsigned long arg) | 927 | unsigned int cmd, unsigned long arg) |
928 | { | 928 | { |
929 | struct gendisk *disk = bdev->bd_disk; | 929 | struct gendisk *disk = bdev->bd_disk; |
930 | struct scsi_device *sdp = scsi_disk(disk)->device; | 930 | struct scsi_device *sdp = scsi_disk(disk)->device; |
931 | void __user *p = (void __user *)arg; | 931 | void __user *p = (void __user *)arg; |
932 | int error; | 932 | int error; |
933 | 933 | ||
934 | SCSI_LOG_IOCTL(1, printk("sd_ioctl: disk=%s, cmd=0x%x\n", | 934 | SCSI_LOG_IOCTL(1, printk("sd_ioctl: disk=%s, cmd=0x%x\n", |
935 | disk->disk_name, cmd)); | 935 | disk->disk_name, cmd)); |
936 | 936 | ||
937 | /* | 937 | /* |
938 | * If we are in the middle of error recovery, don't let anyone | 938 | * If we are in the middle of error recovery, don't let anyone |
939 | * else try and use this device. Also, if error recovery fails, it | 939 | * else try and use this device. Also, if error recovery fails, it |
940 | * may try and take the device offline, in which case all further | 940 | * may try and take the device offline, in which case all further |
941 | * access to the device is prohibited. | 941 | * access to the device is prohibited. |
942 | */ | 942 | */ |
943 | error = scsi_nonblockable_ioctl(sdp, cmd, p, | 943 | error = scsi_nonblockable_ioctl(sdp, cmd, p, |
944 | (mode & FMODE_NDELAY) != 0); | 944 | (mode & FMODE_NDELAY) != 0); |
945 | if (!scsi_block_when_processing_errors(sdp) || !error) | 945 | if (!scsi_block_when_processing_errors(sdp) || !error) |
946 | goto out; | 946 | goto out; |
947 | 947 | ||
948 | /* | 948 | /* |
949 | * Send SCSI addressing ioctls directly to mid level, send other | 949 | * Send SCSI addressing ioctls directly to mid level, send other |
950 | * ioctls to block level and then onto mid level if they can't be | 950 | * ioctls to block level and then onto mid level if they can't be |
951 | * resolved. | 951 | * resolved. |
952 | */ | 952 | */ |
953 | switch (cmd) { | 953 | switch (cmd) { |
954 | case SCSI_IOCTL_GET_IDLUN: | 954 | case SCSI_IOCTL_GET_IDLUN: |
955 | case SCSI_IOCTL_GET_BUS_NUMBER: | 955 | case SCSI_IOCTL_GET_BUS_NUMBER: |
956 | error = scsi_ioctl(sdp, cmd, p); | 956 | error = scsi_ioctl(sdp, cmd, p); |
957 | break; | 957 | break; |
958 | default: | 958 | default: |
959 | error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p); | 959 | error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p); |
960 | if (error != -ENOTTY) | 960 | if (error != -ENOTTY) |
961 | break; | 961 | break; |
962 | error = scsi_ioctl(sdp, cmd, p); | 962 | error = scsi_ioctl(sdp, cmd, p); |
963 | break; | 963 | break; |
964 | } | 964 | } |
965 | out: | 965 | out: |
966 | return error; | 966 | return error; |
967 | } | 967 | } |
968 | 968 | ||
969 | static void set_media_not_present(struct scsi_disk *sdkp) | 969 | static void set_media_not_present(struct scsi_disk *sdkp) |
970 | { | 970 | { |
971 | sdkp->media_present = 0; | 971 | sdkp->media_present = 0; |
972 | sdkp->capacity = 0; | 972 | sdkp->capacity = 0; |
973 | sdkp->device->changed = 1; | 973 | sdkp->device->changed = 1; |
974 | } | 974 | } |
975 | 975 | ||
976 | /** | 976 | /** |
977 | * sd_media_changed - check if our medium changed | 977 | * sd_media_changed - check if our medium changed |
978 | * @disk: kernel device descriptor | 978 | * @disk: kernel device descriptor |
979 | * | 979 | * |
980 | * Returns 0 if not applicable or no change; 1 if change | 980 | * Returns 0 if not applicable or no change; 1 if change |
981 | * | 981 | * |
982 | * Note: this function is invoked from the block subsystem. | 982 | * Note: this function is invoked from the block subsystem. |
983 | **/ | 983 | **/ |
984 | static int sd_media_changed(struct gendisk *disk) | 984 | static int sd_media_changed(struct gendisk *disk) |
985 | { | 985 | { |
986 | struct scsi_disk *sdkp = scsi_disk(disk); | 986 | struct scsi_disk *sdkp = scsi_disk(disk); |
987 | struct scsi_device *sdp = sdkp->device; | 987 | struct scsi_device *sdp = sdkp->device; |
988 | struct scsi_sense_hdr *sshdr = NULL; | 988 | struct scsi_sense_hdr *sshdr = NULL; |
989 | int retval; | 989 | int retval; |
990 | 990 | ||
991 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_media_changed\n")); | 991 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_media_changed\n")); |
992 | 992 | ||
993 | if (!sdp->removable) | 993 | if (!sdp->removable) |
994 | return 0; | 994 | return 0; |
995 | 995 | ||
996 | /* | 996 | /* |
997 | * If the device is offline, don't send any commands - just pretend as | 997 | * If the device is offline, don't send any commands - just pretend as |
998 | * if the command failed. If the device ever comes back online, we | 998 | * if the command failed. If the device ever comes back online, we |
999 | * can deal with it then. It is only because of unrecoverable errors | 999 | * can deal with it then. It is only because of unrecoverable errors |
1000 | * that we would ever take a device offline in the first place. | 1000 | * that we would ever take a device offline in the first place. |
1001 | */ | 1001 | */ |
1002 | if (!scsi_device_online(sdp)) { | 1002 | if (!scsi_device_online(sdp)) { |
1003 | set_media_not_present(sdkp); | 1003 | set_media_not_present(sdkp); |
1004 | retval = 1; | 1004 | retval = 1; |
1005 | goto out; | 1005 | goto out; |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | /* | 1008 | /* |
1009 | * Using TEST_UNIT_READY enables differentiation between drive with | 1009 | * Using TEST_UNIT_READY enables differentiation between drive with |
1010 | * no cartridge loaded - NOT READY, drive with changed cartridge - | 1010 | * no cartridge loaded - NOT READY, drive with changed cartridge - |
1011 | * UNIT ATTENTION, or with same cartridge - GOOD STATUS. | 1011 | * UNIT ATTENTION, or with same cartridge - GOOD STATUS. |
1012 | * | 1012 | * |
1013 | * Drives that auto spin down. eg iomega jaz 1G, will be started | 1013 | * Drives that auto spin down. eg iomega jaz 1G, will be started |
1014 | * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever | 1014 | * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever |
1015 | * sd_revalidate() is called. | 1015 | * sd_revalidate() is called. |
1016 | */ | 1016 | */ |
1017 | retval = -ENODEV; | 1017 | retval = -ENODEV; |
1018 | 1018 | ||
1019 | if (scsi_block_when_processing_errors(sdp)) { | 1019 | if (scsi_block_when_processing_errors(sdp)) { |
1020 | sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); | 1020 | sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); |
1021 | retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES, | 1021 | retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES, |
1022 | sshdr); | 1022 | sshdr); |
1023 | } | 1023 | } |
1024 | 1024 | ||
1025 | /* | 1025 | /* |
1026 | * Unable to test, unit probably not ready. This usually | 1026 | * Unable to test, unit probably not ready. This usually |
1027 | * means there is no disc in the drive. Mark as changed, | 1027 | * means there is no disc in the drive. Mark as changed, |
1028 | * and we will figure it out later once the drive is | 1028 | * and we will figure it out later once the drive is |
1029 | * available again. | 1029 | * available again. |
1030 | */ | 1030 | */ |
1031 | if (retval || (scsi_sense_valid(sshdr) && | 1031 | if (retval || (scsi_sense_valid(sshdr) && |
1032 | /* 0x3a is medium not present */ | 1032 | /* 0x3a is medium not present */ |
1033 | sshdr->asc == 0x3a)) { | 1033 | sshdr->asc == 0x3a)) { |
1034 | set_media_not_present(sdkp); | 1034 | set_media_not_present(sdkp); |
1035 | retval = 1; | 1035 | retval = 1; |
1036 | goto out; | 1036 | goto out; |
1037 | } | 1037 | } |
1038 | 1038 | ||
1039 | /* | 1039 | /* |
1040 | * For removable scsi disk we have to recognise the presence | 1040 | * For removable scsi disk we have to recognise the presence |
1041 | * of a disk in the drive. This is kept in the struct scsi_disk | 1041 | * of a disk in the drive. This is kept in the struct scsi_disk |
1042 | * struct and tested at open ! Daniel Roche (dan@lectra.fr) | 1042 | * struct and tested at open ! Daniel Roche (dan@lectra.fr) |
1043 | */ | 1043 | */ |
1044 | sdkp->media_present = 1; | 1044 | sdkp->media_present = 1; |
1045 | 1045 | ||
1046 | retval = sdp->changed; | 1046 | retval = sdp->changed; |
1047 | sdp->changed = 0; | 1047 | sdp->changed = 0; |
1048 | out: | 1048 | out: |
1049 | if (retval != sdkp->previous_state) | 1049 | if (retval != sdkp->previous_state) |
1050 | sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL); | 1050 | sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL); |
1051 | sdkp->previous_state = retval; | 1051 | sdkp->previous_state = retval; |
1052 | kfree(sshdr); | 1052 | kfree(sshdr); |
1053 | return retval; | 1053 | return retval; |
1054 | } | 1054 | } |
1055 | 1055 | ||
1056 | static int sd_sync_cache(struct scsi_disk *sdkp) | 1056 | static int sd_sync_cache(struct scsi_disk *sdkp) |
1057 | { | 1057 | { |
1058 | int retries, res; | 1058 | int retries, res; |
1059 | struct scsi_device *sdp = sdkp->device; | 1059 | struct scsi_device *sdp = sdkp->device; |
1060 | struct scsi_sense_hdr sshdr; | 1060 | struct scsi_sense_hdr sshdr; |
1061 | 1061 | ||
1062 | if (!scsi_device_online(sdp)) | 1062 | if (!scsi_device_online(sdp)) |
1063 | return -ENODEV; | 1063 | return -ENODEV; |
1064 | 1064 | ||
1065 | 1065 | ||
1066 | for (retries = 3; retries > 0; --retries) { | 1066 | for (retries = 3; retries > 0; --retries) { |
1067 | unsigned char cmd[10] = { 0 }; | 1067 | unsigned char cmd[10] = { 0 }; |
1068 | 1068 | ||
1069 | cmd[0] = SYNCHRONIZE_CACHE; | 1069 | cmd[0] = SYNCHRONIZE_CACHE; |
1070 | /* | 1070 | /* |
1071 | * Leave the rest of the command zero to indicate | 1071 | * Leave the rest of the command zero to indicate |
1072 | * flush everything. | 1072 | * flush everything. |
1073 | */ | 1073 | */ |
1074 | res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, | 1074 | res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, |
1075 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); | 1075 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); |
1076 | if (res == 0) | 1076 | if (res == 0) |
1077 | break; | 1077 | break; |
1078 | } | 1078 | } |
1079 | 1079 | ||
1080 | if (res) { | 1080 | if (res) { |
1081 | sd_print_result(sdkp, res); | 1081 | sd_print_result(sdkp, res); |
1082 | if (driver_byte(res) & DRIVER_SENSE) | 1082 | if (driver_byte(res) & DRIVER_SENSE) |
1083 | sd_print_sense_hdr(sdkp, &sshdr); | 1083 | sd_print_sense_hdr(sdkp, &sshdr); |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | if (res) | 1086 | if (res) |
1087 | return -EIO; | 1087 | return -EIO; |
1088 | return 0; | 1088 | return 0; |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | static void sd_rescan(struct device *dev) | 1091 | static void sd_rescan(struct device *dev) |
1092 | { | 1092 | { |
1093 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); | 1093 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); |
1094 | 1094 | ||
1095 | if (sdkp) { | 1095 | if (sdkp) { |
1096 | revalidate_disk(sdkp->disk); | 1096 | revalidate_disk(sdkp->disk); |
1097 | scsi_disk_put(sdkp); | 1097 | scsi_disk_put(sdkp); |
1098 | } | 1098 | } |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | 1101 | ||
1102 | #ifdef CONFIG_COMPAT | 1102 | #ifdef CONFIG_COMPAT |
1103 | /* | 1103 | /* |
1104 | * This gets directly called from VFS. When the ioctl | 1104 | * This gets directly called from VFS. When the ioctl |
1105 | * is not recognized we go back to the other translation paths. | 1105 | * is not recognized we go back to the other translation paths. |
1106 | */ | 1106 | */ |
1107 | static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, | 1107 | static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, |
1108 | unsigned int cmd, unsigned long arg) | 1108 | unsigned int cmd, unsigned long arg) |
1109 | { | 1109 | { |
1110 | struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; | 1110 | struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; |
1111 | 1111 | ||
1112 | /* | 1112 | /* |
1113 | * If we are in the middle of error recovery, don't let anyone | 1113 | * If we are in the middle of error recovery, don't let anyone |
1114 | * else try and use this device. Also, if error recovery fails, it | 1114 | * else try and use this device. Also, if error recovery fails, it |
1115 | * may try and take the device offline, in which case all further | 1115 | * may try and take the device offline, in which case all further |
1116 | * access to the device is prohibited. | 1116 | * access to the device is prohibited. |
1117 | */ | 1117 | */ |
1118 | if (!scsi_block_when_processing_errors(sdev)) | 1118 | if (!scsi_block_when_processing_errors(sdev)) |
1119 | return -ENODEV; | 1119 | return -ENODEV; |
1120 | 1120 | ||
1121 | if (sdev->host->hostt->compat_ioctl) { | 1121 | if (sdev->host->hostt->compat_ioctl) { |
1122 | int ret; | 1122 | int ret; |
1123 | 1123 | ||
1124 | ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); | 1124 | ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); |
1125 | 1125 | ||
1126 | return ret; | 1126 | return ret; |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | /* | 1129 | /* |
1130 | * Let the static ioctl translation table take care of it. | 1130 | * Let the static ioctl translation table take care of it. |
1131 | */ | 1131 | */ |
1132 | return -ENOIOCTLCMD; | 1132 | return -ENOIOCTLCMD; |
1133 | } | 1133 | } |
1134 | #endif | 1134 | #endif |
1135 | 1135 | ||
1136 | static const struct block_device_operations sd_fops = { | 1136 | static const struct block_device_operations sd_fops = { |
1137 | .owner = THIS_MODULE, | 1137 | .owner = THIS_MODULE, |
1138 | .open = sd_open, | 1138 | .open = sd_open, |
1139 | .release = sd_release, | 1139 | .release = sd_release, |
1140 | .ioctl = sd_ioctl, | 1140 | .ioctl = sd_ioctl, |
1141 | .getgeo = sd_getgeo, | 1141 | .getgeo = sd_getgeo, |
1142 | #ifdef CONFIG_COMPAT | 1142 | #ifdef CONFIG_COMPAT |
1143 | .compat_ioctl = sd_compat_ioctl, | 1143 | .compat_ioctl = sd_compat_ioctl, |
1144 | #endif | 1144 | #endif |
1145 | .media_changed = sd_media_changed, | 1145 | .media_changed = sd_media_changed, |
1146 | .revalidate_disk = sd_revalidate_disk, | 1146 | .revalidate_disk = sd_revalidate_disk, |
1147 | .unlock_native_capacity = sd_unlock_native_capacity, | 1147 | .unlock_native_capacity = sd_unlock_native_capacity, |
1148 | }; | 1148 | }; |
1149 | 1149 | ||
1150 | static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) | 1150 | static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) |
1151 | { | 1151 | { |
1152 | u64 start_lba = blk_rq_pos(scmd->request); | 1152 | u64 start_lba = blk_rq_pos(scmd->request); |
1153 | u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512); | 1153 | u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512); |
1154 | u64 bad_lba; | 1154 | u64 bad_lba; |
1155 | int info_valid; | 1155 | int info_valid; |
1156 | 1156 | ||
1157 | if (scmd->request->cmd_type != REQ_TYPE_FS) | 1157 | if (scmd->request->cmd_type != REQ_TYPE_FS) |
1158 | return 0; | 1158 | return 0; |
1159 | 1159 | ||
1160 | info_valid = scsi_get_sense_info_fld(scmd->sense_buffer, | 1160 | info_valid = scsi_get_sense_info_fld(scmd->sense_buffer, |
1161 | SCSI_SENSE_BUFFERSIZE, | 1161 | SCSI_SENSE_BUFFERSIZE, |
1162 | &bad_lba); | 1162 | &bad_lba); |
1163 | if (!info_valid) | 1163 | if (!info_valid) |
1164 | return 0; | 1164 | return 0; |
1165 | 1165 | ||
1166 | if (scsi_bufflen(scmd) <= scmd->device->sector_size) | 1166 | if (scsi_bufflen(scmd) <= scmd->device->sector_size) |
1167 | return 0; | 1167 | return 0; |
1168 | 1168 | ||
1169 | if (scmd->device->sector_size < 512) { | 1169 | if (scmd->device->sector_size < 512) { |
1170 | /* only legitimate sector_size here is 256 */ | 1170 | /* only legitimate sector_size here is 256 */ |
1171 | start_lba <<= 1; | 1171 | start_lba <<= 1; |
1172 | end_lba <<= 1; | 1172 | end_lba <<= 1; |
1173 | } else { | 1173 | } else { |
1174 | /* be careful ... don't want any overflows */ | 1174 | /* be careful ... don't want any overflows */ |
1175 | u64 factor = scmd->device->sector_size / 512; | 1175 | u64 factor = scmd->device->sector_size / 512; |
1176 | do_div(start_lba, factor); | 1176 | do_div(start_lba, factor); |
1177 | do_div(end_lba, factor); | 1177 | do_div(end_lba, factor); |
1178 | } | 1178 | } |
1179 | 1179 | ||
1180 | /* The bad lba was reported incorrectly, we have no idea where | 1180 | /* The bad lba was reported incorrectly, we have no idea where |
1181 | * the error is. | 1181 | * the error is. |
1182 | */ | 1182 | */ |
1183 | if (bad_lba < start_lba || bad_lba >= end_lba) | 1183 | if (bad_lba < start_lba || bad_lba >= end_lba) |
1184 | return 0; | 1184 | return 0; |
1185 | 1185 | ||
1186 | /* This computation should always be done in terms of | 1186 | /* This computation should always be done in terms of |
1187 | * the resolution of the device's medium. | 1187 | * the resolution of the device's medium. |
1188 | */ | 1188 | */ |
1189 | return (bad_lba - start_lba) * scmd->device->sector_size; | 1189 | return (bad_lba - start_lba) * scmd->device->sector_size; |
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | /** | 1192 | /** |
1193 | * sd_done - bottom half handler: called when the lower level | 1193 | * sd_done - bottom half handler: called when the lower level |
1194 | * driver has completed (successfully or otherwise) a scsi command. | 1194 | * driver has completed (successfully or otherwise) a scsi command. |
1195 | * @SCpnt: mid-level's per command structure. | 1195 | * @SCpnt: mid-level's per command structure. |
1196 | * | 1196 | * |
1197 | * Note: potentially run from within an ISR. Must not block. | 1197 | * Note: potentially run from within an ISR. Must not block. |
1198 | **/ | 1198 | **/ |
1199 | static int sd_done(struct scsi_cmnd *SCpnt) | 1199 | static int sd_done(struct scsi_cmnd *SCpnt) |
1200 | { | 1200 | { |
1201 | int result = SCpnt->result; | 1201 | int result = SCpnt->result; |
1202 | unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); | 1202 | unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); |
1203 | struct scsi_sense_hdr sshdr; | 1203 | struct scsi_sense_hdr sshdr; |
1204 | struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); | 1204 | struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); |
1205 | int sense_valid = 0; | 1205 | int sense_valid = 0; |
1206 | int sense_deferred = 0; | 1206 | int sense_deferred = 0; |
1207 | 1207 | ||
1208 | if (SCpnt->request->cmd_flags & REQ_DISCARD) { | 1208 | if (SCpnt->request->cmd_flags & REQ_DISCARD) { |
1209 | if (!result) | 1209 | if (!result) |
1210 | scsi_set_resid(SCpnt, 0); | 1210 | scsi_set_resid(SCpnt, 0); |
1211 | return good_bytes; | 1211 | return good_bytes; |
1212 | } | 1212 | } |
1213 | 1213 | ||
1214 | if (result) { | 1214 | if (result) { |
1215 | sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); | 1215 | sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); |
1216 | if (sense_valid) | 1216 | if (sense_valid) |
1217 | sense_deferred = scsi_sense_is_deferred(&sshdr); | 1217 | sense_deferred = scsi_sense_is_deferred(&sshdr); |
1218 | } | 1218 | } |
1219 | #ifdef CONFIG_SCSI_LOGGING | 1219 | #ifdef CONFIG_SCSI_LOGGING |
1220 | SCSI_LOG_HLCOMPLETE(1, scsi_print_result(SCpnt)); | 1220 | SCSI_LOG_HLCOMPLETE(1, scsi_print_result(SCpnt)); |
1221 | if (sense_valid) { | 1221 | if (sense_valid) { |
1222 | SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, | 1222 | SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, |
1223 | "sd_done: sb[respc,sk,asc," | 1223 | "sd_done: sb[respc,sk,asc," |
1224 | "ascq]=%x,%x,%x,%x\n", | 1224 | "ascq]=%x,%x,%x,%x\n", |
1225 | sshdr.response_code, | 1225 | sshdr.response_code, |
1226 | sshdr.sense_key, sshdr.asc, | 1226 | sshdr.sense_key, sshdr.asc, |
1227 | sshdr.ascq)); | 1227 | sshdr.ascq)); |
1228 | } | 1228 | } |
1229 | #endif | 1229 | #endif |
1230 | if (driver_byte(result) != DRIVER_SENSE && | 1230 | if (driver_byte(result) != DRIVER_SENSE && |
1231 | (!sense_valid || sense_deferred)) | 1231 | (!sense_valid || sense_deferred)) |
1232 | goto out; | 1232 | goto out; |
1233 | 1233 | ||
1234 | switch (sshdr.sense_key) { | 1234 | switch (sshdr.sense_key) { |
1235 | case HARDWARE_ERROR: | 1235 | case HARDWARE_ERROR: |
1236 | case MEDIUM_ERROR: | 1236 | case MEDIUM_ERROR: |
1237 | good_bytes = sd_completed_bytes(SCpnt); | 1237 | good_bytes = sd_completed_bytes(SCpnt); |
1238 | break; | 1238 | break; |
1239 | case RECOVERED_ERROR: | 1239 | case RECOVERED_ERROR: |
1240 | good_bytes = scsi_bufflen(SCpnt); | 1240 | good_bytes = scsi_bufflen(SCpnt); |
1241 | break; | 1241 | break; |
1242 | case NO_SENSE: | 1242 | case NO_SENSE: |
1243 | /* This indicates a false check condition, so ignore it. An | 1243 | /* This indicates a false check condition, so ignore it. An |
1244 | * unknown amount of data was transferred so treat it as an | 1244 | * unknown amount of data was transferred so treat it as an |
1245 | * error. | 1245 | * error. |
1246 | */ | 1246 | */ |
1247 | scsi_print_sense("sd", SCpnt); | 1247 | scsi_print_sense("sd", SCpnt); |
1248 | SCpnt->result = 0; | 1248 | SCpnt->result = 0; |
1249 | memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | 1249 | memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); |
1250 | break; | 1250 | break; |
1251 | case ABORTED_COMMAND: /* DIF: Target detected corruption */ | 1251 | case ABORTED_COMMAND: /* DIF: Target detected corruption */ |
1252 | case ILLEGAL_REQUEST: /* DIX: Host detected corruption */ | 1252 | case ILLEGAL_REQUEST: /* DIX: Host detected corruption */ |
1253 | if (sshdr.asc == 0x10) | 1253 | if (sshdr.asc == 0x10) |
1254 | good_bytes = sd_completed_bytes(SCpnt); | 1254 | good_bytes = sd_completed_bytes(SCpnt); |
1255 | break; | 1255 | break; |
1256 | default: | 1256 | default: |
1257 | break; | 1257 | break; |
1258 | } | 1258 | } |
1259 | out: | 1259 | out: |
1260 | if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) | 1260 | if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) |
1261 | sd_dif_complete(SCpnt, good_bytes); | 1261 | sd_dif_complete(SCpnt, good_bytes); |
1262 | 1262 | ||
1263 | if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type) | 1263 | if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type) |
1264 | == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) { | 1264 | == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) { |
1265 | 1265 | ||
1266 | /* We have to print a failed command here as the | 1266 | /* We have to print a failed command here as the |
1267 | * extended CDB gets freed before scsi_io_completion() | 1267 | * extended CDB gets freed before scsi_io_completion() |
1268 | * is called. | 1268 | * is called. |
1269 | */ | 1269 | */ |
1270 | if (result) | 1270 | if (result) |
1271 | scsi_print_command(SCpnt); | 1271 | scsi_print_command(SCpnt); |
1272 | 1272 | ||
1273 | mempool_free(SCpnt->cmnd, sd_cdb_pool); | 1273 | mempool_free(SCpnt->cmnd, sd_cdb_pool); |
1274 | SCpnt->cmnd = NULL; | 1274 | SCpnt->cmnd = NULL; |
1275 | SCpnt->cmd_len = 0; | 1275 | SCpnt->cmd_len = 0; |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | return good_bytes; | 1278 | return good_bytes; |
1279 | } | 1279 | } |
1280 | 1280 | ||
1281 | static int media_not_present(struct scsi_disk *sdkp, | 1281 | static int media_not_present(struct scsi_disk *sdkp, |
1282 | struct scsi_sense_hdr *sshdr) | 1282 | struct scsi_sense_hdr *sshdr) |
1283 | { | 1283 | { |
1284 | 1284 | ||
1285 | if (!scsi_sense_valid(sshdr)) | 1285 | if (!scsi_sense_valid(sshdr)) |
1286 | return 0; | 1286 | return 0; |
1287 | /* not invoked for commands that could return deferred errors */ | 1287 | /* not invoked for commands that could return deferred errors */ |
1288 | if (sshdr->sense_key != NOT_READY && | 1288 | if (sshdr->sense_key != NOT_READY && |
1289 | sshdr->sense_key != UNIT_ATTENTION) | 1289 | sshdr->sense_key != UNIT_ATTENTION) |
1290 | return 0; | 1290 | return 0; |
1291 | if (sshdr->asc != 0x3A) /* medium not present */ | 1291 | if (sshdr->asc != 0x3A) /* medium not present */ |
1292 | return 0; | 1292 | return 0; |
1293 | 1293 | ||
1294 | set_media_not_present(sdkp); | 1294 | set_media_not_present(sdkp); |
1295 | return 1; | 1295 | return 1; |
1296 | } | 1296 | } |
1297 | 1297 | ||
1298 | /* | 1298 | /* |
1299 | * spinup disk - called only in sd_revalidate_disk() | 1299 | * spinup disk - called only in sd_revalidate_disk() |
1300 | */ | 1300 | */ |
1301 | static void | 1301 | static void |
1302 | sd_spinup_disk(struct scsi_disk *sdkp) | 1302 | sd_spinup_disk(struct scsi_disk *sdkp) |
1303 | { | 1303 | { |
1304 | unsigned char cmd[10]; | 1304 | unsigned char cmd[10]; |
1305 | unsigned long spintime_expire = 0; | 1305 | unsigned long spintime_expire = 0; |
1306 | int retries, spintime; | 1306 | int retries, spintime; |
1307 | unsigned int the_result; | 1307 | unsigned int the_result; |
1308 | struct scsi_sense_hdr sshdr; | 1308 | struct scsi_sense_hdr sshdr; |
1309 | int sense_valid = 0; | 1309 | int sense_valid = 0; |
1310 | 1310 | ||
1311 | spintime = 0; | 1311 | spintime = 0; |
1312 | 1312 | ||
1313 | /* Spin up drives, as required. Only do this at boot time */ | 1313 | /* Spin up drives, as required. Only do this at boot time */ |
1314 | /* Spinup needs to be done for module loads too. */ | 1314 | /* Spinup needs to be done for module loads too. */ |
1315 | do { | 1315 | do { |
1316 | retries = 0; | 1316 | retries = 0; |
1317 | 1317 | ||
1318 | do { | 1318 | do { |
1319 | cmd[0] = TEST_UNIT_READY; | 1319 | cmd[0] = TEST_UNIT_READY; |
1320 | memset((void *) &cmd[1], 0, 9); | 1320 | memset((void *) &cmd[1], 0, 9); |
1321 | 1321 | ||
1322 | the_result = scsi_execute_req(sdkp->device, cmd, | 1322 | the_result = scsi_execute_req(sdkp->device, cmd, |
1323 | DMA_NONE, NULL, 0, | 1323 | DMA_NONE, NULL, 0, |
1324 | &sshdr, SD_TIMEOUT, | 1324 | &sshdr, SD_TIMEOUT, |
1325 | SD_MAX_RETRIES, NULL); | 1325 | SD_MAX_RETRIES, NULL); |
1326 | 1326 | ||
1327 | /* | 1327 | /* |
1328 | * If the drive has indicated to us that it | 1328 | * If the drive has indicated to us that it |
1329 | * doesn't have any media in it, don't bother | 1329 | * doesn't have any media in it, don't bother |
1330 | * with any more polling. | 1330 | * with any more polling. |
1331 | */ | 1331 | */ |
1332 | if (media_not_present(sdkp, &sshdr)) | 1332 | if (media_not_present(sdkp, &sshdr)) |
1333 | return; | 1333 | return; |
1334 | 1334 | ||
1335 | if (the_result) | 1335 | if (the_result) |
1336 | sense_valid = scsi_sense_valid(&sshdr); | 1336 | sense_valid = scsi_sense_valid(&sshdr); |
1337 | retries++; | 1337 | retries++; |
1338 | } while (retries < 3 && | 1338 | } while (retries < 3 && |
1339 | (!scsi_status_is_good(the_result) || | 1339 | (!scsi_status_is_good(the_result) || |
1340 | ((driver_byte(the_result) & DRIVER_SENSE) && | 1340 | ((driver_byte(the_result) & DRIVER_SENSE) && |
1341 | sense_valid && sshdr.sense_key == UNIT_ATTENTION))); | 1341 | sense_valid && sshdr.sense_key == UNIT_ATTENTION))); |
1342 | 1342 | ||
1343 | if ((driver_byte(the_result) & DRIVER_SENSE) == 0) { | 1343 | if ((driver_byte(the_result) & DRIVER_SENSE) == 0) { |
1344 | /* no sense, TUR either succeeded or failed | 1344 | /* no sense, TUR either succeeded or failed |
1345 | * with a status error */ | 1345 | * with a status error */ |
1346 | if(!spintime && !scsi_status_is_good(the_result)) { | 1346 | if(!spintime && !scsi_status_is_good(the_result)) { |
1347 | sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); | 1347 | sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); |
1348 | sd_print_result(sdkp, the_result); | 1348 | sd_print_result(sdkp, the_result); |
1349 | } | 1349 | } |
1350 | break; | 1350 | break; |
1351 | } | 1351 | } |
1352 | 1352 | ||
1353 | /* | 1353 | /* |
1354 | * The device does not want the automatic start to be issued. | 1354 | * The device does not want the automatic start to be issued. |
1355 | */ | 1355 | */ |
1356 | if (sdkp->device->no_start_on_add) | 1356 | if (sdkp->device->no_start_on_add) |
1357 | break; | 1357 | break; |
1358 | 1358 | ||
1359 | if (sense_valid && sshdr.sense_key == NOT_READY) { | 1359 | if (sense_valid && sshdr.sense_key == NOT_READY) { |
1360 | if (sshdr.asc == 4 && sshdr.ascq == 3) | 1360 | if (sshdr.asc == 4 && sshdr.ascq == 3) |
1361 | break; /* manual intervention required */ | 1361 | break; /* manual intervention required */ |
1362 | if (sshdr.asc == 4 && sshdr.ascq == 0xb) | 1362 | if (sshdr.asc == 4 && sshdr.ascq == 0xb) |
1363 | break; /* standby */ | 1363 | break; /* standby */ |
1364 | if (sshdr.asc == 4 && sshdr.ascq == 0xc) | 1364 | if (sshdr.asc == 4 && sshdr.ascq == 0xc) |
1365 | break; /* unavailable */ | 1365 | break; /* unavailable */ |
1366 | /* | 1366 | /* |
1367 | * Issue command to spin up drive when not ready | 1367 | * Issue command to spin up drive when not ready |
1368 | */ | 1368 | */ |
1369 | if (!spintime) { | 1369 | if (!spintime) { |
1370 | sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); | 1370 | sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); |
1371 | cmd[0] = START_STOP; | 1371 | cmd[0] = START_STOP; |
1372 | cmd[1] = 1; /* Return immediately */ | 1372 | cmd[1] = 1; /* Return immediately */ |
1373 | memset((void *) &cmd[2], 0, 8); | 1373 | memset((void *) &cmd[2], 0, 8); |
1374 | cmd[4] = 1; /* Start spin cycle */ | 1374 | cmd[4] = 1; /* Start spin cycle */ |
1375 | if (sdkp->device->start_stop_pwr_cond) | 1375 | if (sdkp->device->start_stop_pwr_cond) |
1376 | cmd[4] |= 1 << 4; | 1376 | cmd[4] |= 1 << 4; |
1377 | scsi_execute_req(sdkp->device, cmd, DMA_NONE, | 1377 | scsi_execute_req(sdkp->device, cmd, DMA_NONE, |
1378 | NULL, 0, &sshdr, | 1378 | NULL, 0, &sshdr, |
1379 | SD_TIMEOUT, SD_MAX_RETRIES, | 1379 | SD_TIMEOUT, SD_MAX_RETRIES, |
1380 | NULL); | 1380 | NULL); |
1381 | spintime_expire = jiffies + 100 * HZ; | 1381 | spintime_expire = jiffies + 100 * HZ; |
1382 | spintime = 1; | 1382 | spintime = 1; |
1383 | } | 1383 | } |
1384 | /* Wait 1 second for next try */ | 1384 | /* Wait 1 second for next try */ |
1385 | msleep(1000); | 1385 | msleep(1000); |
1386 | printk("."); | 1386 | printk("."); |
1387 | 1387 | ||
1388 | /* | 1388 | /* |
1389 | * Wait for USB flash devices with slow firmware. | 1389 | * Wait for USB flash devices with slow firmware. |
1390 | * Yes, this sense key/ASC combination shouldn't | 1390 | * Yes, this sense key/ASC combination shouldn't |
1391 | * occur here. It's characteristic of these devices. | 1391 | * occur here. It's characteristic of these devices. |
1392 | */ | 1392 | */ |
1393 | } else if (sense_valid && | 1393 | } else if (sense_valid && |
1394 | sshdr.sense_key == UNIT_ATTENTION && | 1394 | sshdr.sense_key == UNIT_ATTENTION && |
1395 | sshdr.asc == 0x28) { | 1395 | sshdr.asc == 0x28) { |
1396 | if (!spintime) { | 1396 | if (!spintime) { |
1397 | spintime_expire = jiffies + 5 * HZ; | 1397 | spintime_expire = jiffies + 5 * HZ; |
1398 | spintime = 1; | 1398 | spintime = 1; |
1399 | } | 1399 | } |
1400 | /* Wait 1 second for next try */ | 1400 | /* Wait 1 second for next try */ |
1401 | msleep(1000); | 1401 | msleep(1000); |
1402 | } else { | 1402 | } else { |
1403 | /* we don't understand the sense code, so it's | 1403 | /* we don't understand the sense code, so it's |
1404 | * probably pointless to loop */ | 1404 | * probably pointless to loop */ |
1405 | if(!spintime) { | 1405 | if(!spintime) { |
1406 | sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); | 1406 | sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); |
1407 | sd_print_sense_hdr(sdkp, &sshdr); | 1407 | sd_print_sense_hdr(sdkp, &sshdr); |
1408 | } | 1408 | } |
1409 | break; | 1409 | break; |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | } while (spintime && time_before_eq(jiffies, spintime_expire)); | 1412 | } while (spintime && time_before_eq(jiffies, spintime_expire)); |
1413 | 1413 | ||
1414 | if (spintime) { | 1414 | if (spintime) { |
1415 | if (scsi_status_is_good(the_result)) | 1415 | if (scsi_status_is_good(the_result)) |
1416 | printk("ready\n"); | 1416 | printk("ready\n"); |
1417 | else | 1417 | else |
1418 | printk("not responding...\n"); | 1418 | printk("not responding...\n"); |
1419 | } | 1419 | } |
1420 | } | 1420 | } |
1421 | 1421 | ||
1422 | 1422 | ||
1423 | /* | 1423 | /* |
1424 | * Determine whether disk supports Data Integrity Field. | 1424 | * Determine whether disk supports Data Integrity Field. |
1425 | */ | 1425 | */ |
1426 | static void sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer) | 1426 | static void sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer) |
1427 | { | 1427 | { |
1428 | struct scsi_device *sdp = sdkp->device; | 1428 | struct scsi_device *sdp = sdkp->device; |
1429 | u8 type; | 1429 | u8 type; |
1430 | 1430 | ||
1431 | if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) | 1431 | if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) |
1432 | return; | 1432 | return; |
1433 | 1433 | ||
1434 | type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ | 1434 | type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ |
1435 | 1435 | ||
1436 | if (type == sdkp->protection_type || !sdkp->first_scan) | 1436 | if (type == sdkp->protection_type || !sdkp->first_scan) |
1437 | return; | 1437 | return; |
1438 | 1438 | ||
1439 | sdkp->protection_type = type; | 1439 | sdkp->protection_type = type; |
1440 | 1440 | ||
1441 | if (type > SD_DIF_TYPE3_PROTECTION) { | 1441 | if (type > SD_DIF_TYPE3_PROTECTION) { |
1442 | sd_printk(KERN_ERR, sdkp, "formatted with unsupported " \ | 1442 | sd_printk(KERN_ERR, sdkp, "formatted with unsupported " \ |
1443 | "protection type %u. Disabling disk!\n", type); | 1443 | "protection type %u. Disabling disk!\n", type); |
1444 | sdkp->capacity = 0; | 1444 | sdkp->capacity = 0; |
1445 | return; | 1445 | return; |
1446 | } | 1446 | } |
1447 | 1447 | ||
1448 | if (scsi_host_dif_capable(sdp->host, type)) | 1448 | if (scsi_host_dif_capable(sdp->host, type)) |
1449 | sd_printk(KERN_NOTICE, sdkp, | 1449 | sd_printk(KERN_NOTICE, sdkp, |
1450 | "Enabling DIF Type %u protection\n", type); | 1450 | "Enabling DIF Type %u protection\n", type); |
1451 | else | 1451 | else |
1452 | sd_printk(KERN_NOTICE, sdkp, | 1452 | sd_printk(KERN_NOTICE, sdkp, |
1453 | "Disabling DIF Type %u protection\n", type); | 1453 | "Disabling DIF Type %u protection\n", type); |
1454 | } | 1454 | } |
1455 | 1455 | ||
1456 | static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, | 1456 | static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, |
1457 | struct scsi_sense_hdr *sshdr, int sense_valid, | 1457 | struct scsi_sense_hdr *sshdr, int sense_valid, |
1458 | int the_result) | 1458 | int the_result) |
1459 | { | 1459 | { |
1460 | sd_print_result(sdkp, the_result); | 1460 | sd_print_result(sdkp, the_result); |
1461 | if (driver_byte(the_result) & DRIVER_SENSE) | 1461 | if (driver_byte(the_result) & DRIVER_SENSE) |
1462 | sd_print_sense_hdr(sdkp, sshdr); | 1462 | sd_print_sense_hdr(sdkp, sshdr); |
1463 | else | 1463 | else |
1464 | sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n"); | 1464 | sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n"); |
1465 | 1465 | ||
1466 | /* | 1466 | /* |
1467 | * Set dirty bit for removable devices if not ready - | 1467 | * Set dirty bit for removable devices if not ready - |
1468 | * sometimes drives will not report this properly. | 1468 | * sometimes drives will not report this properly. |
1469 | */ | 1469 | */ |
1470 | if (sdp->removable && | 1470 | if (sdp->removable && |
1471 | sense_valid && sshdr->sense_key == NOT_READY) | 1471 | sense_valid && sshdr->sense_key == NOT_READY) |
1472 | sdp->changed = 1; | 1472 | sdp->changed = 1; |
1473 | 1473 | ||
1474 | /* | 1474 | /* |
1475 | * We used to set media_present to 0 here to indicate no media | 1475 | * We used to set media_present to 0 here to indicate no media |
1476 | * in the drive, but some drives fail read capacity even with | 1476 | * in the drive, but some drives fail read capacity even with |
1477 | * media present, so we can't do that. | 1477 | * media present, so we can't do that. |
1478 | */ | 1478 | */ |
1479 | sdkp->capacity = 0; /* unknown mapped to zero - as usual */ | 1479 | sdkp->capacity = 0; /* unknown mapped to zero - as usual */ |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | #define RC16_LEN 32 | 1482 | #define RC16_LEN 32 |
1483 | #if RC16_LEN > SD_BUF_SIZE | 1483 | #if RC16_LEN > SD_BUF_SIZE |
1484 | #error RC16_LEN must not be more than SD_BUF_SIZE | 1484 | #error RC16_LEN must not be more than SD_BUF_SIZE |
1485 | #endif | 1485 | #endif |
1486 | 1486 | ||
1487 | #define READ_CAPACITY_RETRIES_ON_RESET 10 | 1487 | #define READ_CAPACITY_RETRIES_ON_RESET 10 |
1488 | 1488 | ||
1489 | static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, | 1489 | static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, |
1490 | unsigned char *buffer) | 1490 | unsigned char *buffer) |
1491 | { | 1491 | { |
1492 | unsigned char cmd[16]; | 1492 | unsigned char cmd[16]; |
1493 | struct scsi_sense_hdr sshdr; | 1493 | struct scsi_sense_hdr sshdr; |
1494 | int sense_valid = 0; | 1494 | int sense_valid = 0; |
1495 | int the_result; | 1495 | int the_result; |
1496 | int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; | 1496 | int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; |
1497 | unsigned int alignment; | 1497 | unsigned int alignment; |
1498 | unsigned long long lba; | 1498 | unsigned long long lba; |
1499 | unsigned sector_size; | 1499 | unsigned sector_size; |
1500 | 1500 | ||
1501 | do { | 1501 | do { |
1502 | memset(cmd, 0, 16); | 1502 | memset(cmd, 0, 16); |
1503 | cmd[0] = SERVICE_ACTION_IN; | 1503 | cmd[0] = SERVICE_ACTION_IN; |
1504 | cmd[1] = SAI_READ_CAPACITY_16; | 1504 | cmd[1] = SAI_READ_CAPACITY_16; |
1505 | cmd[13] = RC16_LEN; | 1505 | cmd[13] = RC16_LEN; |
1506 | memset(buffer, 0, RC16_LEN); | 1506 | memset(buffer, 0, RC16_LEN); |
1507 | 1507 | ||
1508 | the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, | 1508 | the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, |
1509 | buffer, RC16_LEN, &sshdr, | 1509 | buffer, RC16_LEN, &sshdr, |
1510 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); | 1510 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); |
1511 | 1511 | ||
1512 | if (media_not_present(sdkp, &sshdr)) | 1512 | if (media_not_present(sdkp, &sshdr)) |
1513 | return -ENODEV; | 1513 | return -ENODEV; |
1514 | 1514 | ||
1515 | if (the_result) { | 1515 | if (the_result) { |
1516 | sense_valid = scsi_sense_valid(&sshdr); | 1516 | sense_valid = scsi_sense_valid(&sshdr); |
1517 | if (sense_valid && | 1517 | if (sense_valid && |
1518 | sshdr.sense_key == ILLEGAL_REQUEST && | 1518 | sshdr.sense_key == ILLEGAL_REQUEST && |
1519 | (sshdr.asc == 0x20 || sshdr.asc == 0x24) && | 1519 | (sshdr.asc == 0x20 || sshdr.asc == 0x24) && |
1520 | sshdr.ascq == 0x00) | 1520 | sshdr.ascq == 0x00) |
1521 | /* Invalid Command Operation Code or | 1521 | /* Invalid Command Operation Code or |
1522 | * Invalid Field in CDB, just retry | 1522 | * Invalid Field in CDB, just retry |
1523 | * silently with RC10 */ | 1523 | * silently with RC10 */ |
1524 | return -EINVAL; | 1524 | return -EINVAL; |
1525 | if (sense_valid && | 1525 | if (sense_valid && |
1526 | sshdr.sense_key == UNIT_ATTENTION && | 1526 | sshdr.sense_key == UNIT_ATTENTION && |
1527 | sshdr.asc == 0x29 && sshdr.ascq == 0x00) | 1527 | sshdr.asc == 0x29 && sshdr.ascq == 0x00) |
1528 | /* Device reset might occur several times, | 1528 | /* Device reset might occur several times, |
1529 | * give it one more chance */ | 1529 | * give it one more chance */ |
1530 | if (--reset_retries > 0) | 1530 | if (--reset_retries > 0) |
1531 | continue; | 1531 | continue; |
1532 | } | 1532 | } |
1533 | retries--; | 1533 | retries--; |
1534 | 1534 | ||
1535 | } while (the_result && retries); | 1535 | } while (the_result && retries); |
1536 | 1536 | ||
1537 | if (the_result) { | 1537 | if (the_result) { |
1538 | sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY(16) failed\n"); | 1538 | sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY(16) failed\n"); |
1539 | read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); | 1539 | read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); |
1540 | return -EINVAL; | 1540 | return -EINVAL; |
1541 | } | 1541 | } |
1542 | 1542 | ||
1543 | sector_size = get_unaligned_be32(&buffer[8]); | 1543 | sector_size = get_unaligned_be32(&buffer[8]); |
1544 | lba = get_unaligned_be64(&buffer[0]); | 1544 | lba = get_unaligned_be64(&buffer[0]); |
1545 | 1545 | ||
1546 | sd_read_protection_type(sdkp, buffer); | 1546 | sd_read_protection_type(sdkp, buffer); |
1547 | 1547 | ||
1548 | if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) { | 1548 | if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) { |
1549 | sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " | 1549 | sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " |
1550 | "kernel compiled with support for large block " | 1550 | "kernel compiled with support for large block " |
1551 | "devices.\n"); | 1551 | "devices.\n"); |
1552 | sdkp->capacity = 0; | 1552 | sdkp->capacity = 0; |
1553 | return -EOVERFLOW; | 1553 | return -EOVERFLOW; |
1554 | } | 1554 | } |
1555 | 1555 | ||
1556 | /* Logical blocks per physical block exponent */ | 1556 | /* Logical blocks per physical block exponent */ |
1557 | sdkp->hw_sector_size = (1 << (buffer[13] & 0xf)) * sector_size; | 1557 | sdkp->hw_sector_size = (1 << (buffer[13] & 0xf)) * sector_size; |
1558 | 1558 | ||
1559 | /* Lowest aligned logical block */ | 1559 | /* Lowest aligned logical block */ |
1560 | alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; | 1560 | alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; |
1561 | blk_queue_alignment_offset(sdp->request_queue, alignment); | 1561 | blk_queue_alignment_offset(sdp->request_queue, alignment); |
1562 | if (alignment && sdkp->first_scan) | 1562 | if (alignment && sdkp->first_scan) |
1563 | sd_printk(KERN_NOTICE, sdkp, | 1563 | sd_printk(KERN_NOTICE, sdkp, |
1564 | "physical block alignment offset: %u\n", alignment); | 1564 | "physical block alignment offset: %u\n", alignment); |
1565 | 1565 | ||
1566 | if (buffer[14] & 0x80) { /* TPE */ | 1566 | if (buffer[14] & 0x80) { /* TPE */ |
1567 | struct request_queue *q = sdp->request_queue; | 1567 | struct request_queue *q = sdp->request_queue; |
1568 | 1568 | ||
1569 | sdkp->thin_provisioning = 1; | 1569 | sdkp->thin_provisioning = 1; |
1570 | q->limits.discard_granularity = sdkp->hw_sector_size; | 1570 | q->limits.discard_granularity = sdkp->hw_sector_size; |
1571 | q->limits.max_discard_sectors = 0xffffffff; | 1571 | q->limits.max_discard_sectors = 0xffffffff; |
1572 | 1572 | ||
1573 | if (buffer[14] & 0x40) /* TPRZ */ | 1573 | if (buffer[14] & 0x40) /* TPRZ */ |
1574 | q->limits.discard_zeroes_data = 1; | 1574 | q->limits.discard_zeroes_data = 1; |
1575 | 1575 | ||
1576 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | 1576 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); |
1577 | } | 1577 | } |
1578 | 1578 | ||
1579 | sdkp->capacity = lba + 1; | 1579 | sdkp->capacity = lba + 1; |
1580 | return sector_size; | 1580 | return sector_size; |
1581 | } | 1581 | } |
1582 | 1582 | ||
1583 | static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, | 1583 | static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, |
1584 | unsigned char *buffer) | 1584 | unsigned char *buffer) |
1585 | { | 1585 | { |
1586 | unsigned char cmd[16]; | 1586 | unsigned char cmd[16]; |
1587 | struct scsi_sense_hdr sshdr; | 1587 | struct scsi_sense_hdr sshdr; |
1588 | int sense_valid = 0; | 1588 | int sense_valid = 0; |
1589 | int the_result; | 1589 | int the_result; |
1590 | int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; | 1590 | int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; |
1591 | sector_t lba; | 1591 | sector_t lba; |
1592 | unsigned sector_size; | 1592 | unsigned sector_size; |
1593 | 1593 | ||
1594 | do { | 1594 | do { |
1595 | cmd[0] = READ_CAPACITY; | 1595 | cmd[0] = READ_CAPACITY; |
1596 | memset(&cmd[1], 0, 9); | 1596 | memset(&cmd[1], 0, 9); |
1597 | memset(buffer, 0, 8); | 1597 | memset(buffer, 0, 8); |
1598 | 1598 | ||
1599 | the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, | 1599 | the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, |
1600 | buffer, 8, &sshdr, | 1600 | buffer, 8, &sshdr, |
1601 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); | 1601 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); |
1602 | 1602 | ||
1603 | if (media_not_present(sdkp, &sshdr)) | 1603 | if (media_not_present(sdkp, &sshdr)) |
1604 | return -ENODEV; | 1604 | return -ENODEV; |
1605 | 1605 | ||
1606 | if (the_result) { | 1606 | if (the_result) { |
1607 | sense_valid = scsi_sense_valid(&sshdr); | 1607 | sense_valid = scsi_sense_valid(&sshdr); |
1608 | if (sense_valid && | 1608 | if (sense_valid && |
1609 | sshdr.sense_key == UNIT_ATTENTION && | 1609 | sshdr.sense_key == UNIT_ATTENTION && |
1610 | sshdr.asc == 0x29 && sshdr.ascq == 0x00) | 1610 | sshdr.asc == 0x29 && sshdr.ascq == 0x00) |
1611 | /* Device reset might occur several times, | 1611 | /* Device reset might occur several times, |
1612 | * give it one more chance */ | 1612 | * give it one more chance */ |
1613 | if (--reset_retries > 0) | 1613 | if (--reset_retries > 0) |
1614 | continue; | 1614 | continue; |
1615 | } | 1615 | } |
1616 | retries--; | 1616 | retries--; |
1617 | 1617 | ||
1618 | } while (the_result && retries); | 1618 | } while (the_result && retries); |
1619 | 1619 | ||
1620 | if (the_result) { | 1620 | if (the_result) { |
1621 | sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY failed\n"); | 1621 | sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY failed\n"); |
1622 | read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); | 1622 | read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); |
1623 | return -EINVAL; | 1623 | return -EINVAL; |
1624 | } | 1624 | } |
1625 | 1625 | ||
1626 | sector_size = get_unaligned_be32(&buffer[4]); | 1626 | sector_size = get_unaligned_be32(&buffer[4]); |
1627 | lba = get_unaligned_be32(&buffer[0]); | 1627 | lba = get_unaligned_be32(&buffer[0]); |
1628 | 1628 | ||
1629 | if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) { | 1629 | if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) { |
1630 | sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " | 1630 | sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " |
1631 | "kernel compiled with support for large block " | 1631 | "kernel compiled with support for large block " |
1632 | "devices.\n"); | 1632 | "devices.\n"); |
1633 | sdkp->capacity = 0; | 1633 | sdkp->capacity = 0; |
1634 | return -EOVERFLOW; | 1634 | return -EOVERFLOW; |
1635 | } | 1635 | } |
1636 | 1636 | ||
1637 | sdkp->capacity = lba + 1; | 1637 | sdkp->capacity = lba + 1; |
1638 | sdkp->hw_sector_size = sector_size; | 1638 | sdkp->hw_sector_size = sector_size; |
1639 | return sector_size; | 1639 | return sector_size; |
1640 | } | 1640 | } |
1641 | 1641 | ||
1642 | static int sd_try_rc16_first(struct scsi_device *sdp) | 1642 | static int sd_try_rc16_first(struct scsi_device *sdp) |
1643 | { | 1643 | { |
1644 | if (sdp->host->max_cmd_len < 16) | 1644 | if (sdp->host->max_cmd_len < 16) |
1645 | return 0; | 1645 | return 0; |
1646 | if (sdp->scsi_level > SCSI_SPC_2) | 1646 | if (sdp->scsi_level > SCSI_SPC_2) |
1647 | return 1; | 1647 | return 1; |
1648 | if (scsi_device_protection(sdp)) | 1648 | if (scsi_device_protection(sdp)) |
1649 | return 1; | 1649 | return 1; |
1650 | return 0; | 1650 | return 0; |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | /* | 1653 | /* |
1654 | * read disk capacity | 1654 | * read disk capacity |
1655 | */ | 1655 | */ |
1656 | static void | 1656 | static void |
1657 | sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) | 1657 | sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) |
1658 | { | 1658 | { |
1659 | int sector_size; | 1659 | int sector_size; |
1660 | struct scsi_device *sdp = sdkp->device; | 1660 | struct scsi_device *sdp = sdkp->device; |
1661 | sector_t old_capacity = sdkp->capacity; | 1661 | sector_t old_capacity = sdkp->capacity; |
1662 | 1662 | ||
1663 | if (sd_try_rc16_first(sdp)) { | 1663 | if (sd_try_rc16_first(sdp)) { |
1664 | sector_size = read_capacity_16(sdkp, sdp, buffer); | 1664 | sector_size = read_capacity_16(sdkp, sdp, buffer); |
1665 | if (sector_size == -EOVERFLOW) | 1665 | if (sector_size == -EOVERFLOW) |
1666 | goto got_data; | 1666 | goto got_data; |
1667 | if (sector_size == -ENODEV) | 1667 | if (sector_size == -ENODEV) |
1668 | return; | 1668 | return; |
1669 | if (sector_size < 0) | 1669 | if (sector_size < 0) |
1670 | sector_size = read_capacity_10(sdkp, sdp, buffer); | 1670 | sector_size = read_capacity_10(sdkp, sdp, buffer); |
1671 | if (sector_size < 0) | 1671 | if (sector_size < 0) |
1672 | return; | 1672 | return; |
1673 | } else { | 1673 | } else { |
1674 | sector_size = read_capacity_10(sdkp, sdp, buffer); | 1674 | sector_size = read_capacity_10(sdkp, sdp, buffer); |
1675 | if (sector_size == -EOVERFLOW) | 1675 | if (sector_size == -EOVERFLOW) |
1676 | goto got_data; | 1676 | goto got_data; |
1677 | if (sector_size < 0) | 1677 | if (sector_size < 0) |
1678 | return; | 1678 | return; |
1679 | if ((sizeof(sdkp->capacity) > 4) && | 1679 | if ((sizeof(sdkp->capacity) > 4) && |
1680 | (sdkp->capacity > 0xffffffffULL)) { | 1680 | (sdkp->capacity > 0xffffffffULL)) { |
1681 | int old_sector_size = sector_size; | 1681 | int old_sector_size = sector_size; |
1682 | sd_printk(KERN_NOTICE, sdkp, "Very big device. " | 1682 | sd_printk(KERN_NOTICE, sdkp, "Very big device. " |
1683 | "Trying to use READ CAPACITY(16).\n"); | 1683 | "Trying to use READ CAPACITY(16).\n"); |
1684 | sector_size = read_capacity_16(sdkp, sdp, buffer); | 1684 | sector_size = read_capacity_16(sdkp, sdp, buffer); |
1685 | if (sector_size < 0) { | 1685 | if (sector_size < 0) { |
1686 | sd_printk(KERN_NOTICE, sdkp, | 1686 | sd_printk(KERN_NOTICE, sdkp, |
1687 | "Using 0xffffffff as device size\n"); | 1687 | "Using 0xffffffff as device size\n"); |
1688 | sdkp->capacity = 1 + (sector_t) 0xffffffff; | 1688 | sdkp->capacity = 1 + (sector_t) 0xffffffff; |
1689 | sector_size = old_sector_size; | 1689 | sector_size = old_sector_size; |
1690 | goto got_data; | 1690 | goto got_data; |
1691 | } | 1691 | } |
1692 | } | 1692 | } |
1693 | } | 1693 | } |
1694 | 1694 | ||
1695 | /* Some devices are known to return the total number of blocks, | 1695 | /* Some devices are known to return the total number of blocks, |
1696 | * not the highest block number. Some devices have versions | 1696 | * not the highest block number. Some devices have versions |
1697 | * which do this and others which do not. Some devices we might | 1697 | * which do this and others which do not. Some devices we might |
1698 | * suspect of doing this but we don't know for certain. | 1698 | * suspect of doing this but we don't know for certain. |
1699 | * | 1699 | * |
1700 | * If we know the reported capacity is wrong, decrement it. If | 1700 | * If we know the reported capacity is wrong, decrement it. If |
1701 | * we can only guess, then assume the number of blocks is even | 1701 | * we can only guess, then assume the number of blocks is even |
1702 | * (usually true but not always) and err on the side of lowering | 1702 | * (usually true but not always) and err on the side of lowering |
1703 | * the capacity. | 1703 | * the capacity. |
1704 | */ | 1704 | */ |
1705 | if (sdp->fix_capacity || | 1705 | if (sdp->fix_capacity || |
1706 | (sdp->guess_capacity && (sdkp->capacity & 0x01))) { | 1706 | (sdp->guess_capacity && (sdkp->capacity & 0x01))) { |
1707 | sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " | 1707 | sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " |
1708 | "from its reported value: %llu\n", | 1708 | "from its reported value: %llu\n", |
1709 | (unsigned long long) sdkp->capacity); | 1709 | (unsigned long long) sdkp->capacity); |
1710 | --sdkp->capacity; | 1710 | --sdkp->capacity; |
1711 | } | 1711 | } |
1712 | 1712 | ||
1713 | got_data: | 1713 | got_data: |
1714 | if (sector_size == 0) { | 1714 | if (sector_size == 0) { |
1715 | sector_size = 512; | 1715 | sector_size = 512; |
1716 | sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, " | 1716 | sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, " |
1717 | "assuming 512.\n"); | 1717 | "assuming 512.\n"); |
1718 | } | 1718 | } |
1719 | 1719 | ||
1720 | if (sector_size != 512 && | 1720 | if (sector_size != 512 && |
1721 | sector_size != 1024 && | 1721 | sector_size != 1024 && |
1722 | sector_size != 2048 && | 1722 | sector_size != 2048 && |
1723 | sector_size != 4096 && | 1723 | sector_size != 4096 && |
1724 | sector_size != 256) { | 1724 | sector_size != 256) { |
1725 | sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", | 1725 | sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", |
1726 | sector_size); | 1726 | sector_size); |
1727 | /* | 1727 | /* |
1728 | * The user might want to re-format the drive with | 1728 | * The user might want to re-format the drive with |
1729 | * a supported sectorsize. Once this happens, it | 1729 | * a supported sectorsize. Once this happens, it |
1730 | * would be relatively trivial to set the thing up. | 1730 | * would be relatively trivial to set the thing up. |
1731 | * For this reason, we leave the thing in the table. | 1731 | * For this reason, we leave the thing in the table. |
1732 | */ | 1732 | */ |
1733 | sdkp->capacity = 0; | 1733 | sdkp->capacity = 0; |
1734 | /* | 1734 | /* |
1735 | * set a bogus sector size so the normal read/write | 1735 | * set a bogus sector size so the normal read/write |
1736 | * logic in the block layer will eventually refuse any | 1736 | * logic in the block layer will eventually refuse any |
1737 | * request on this device without tripping over power | 1737 | * request on this device without tripping over power |
1738 | * of two sector size assumptions | 1738 | * of two sector size assumptions |
1739 | */ | 1739 | */ |
1740 | sector_size = 512; | 1740 | sector_size = 512; |
1741 | } | 1741 | } |
1742 | blk_queue_logical_block_size(sdp->request_queue, sector_size); | 1742 | blk_queue_logical_block_size(sdp->request_queue, sector_size); |
1743 | 1743 | ||
1744 | { | 1744 | { |
1745 | char cap_str_2[10], cap_str_10[10]; | 1745 | char cap_str_2[10], cap_str_10[10]; |
1746 | u64 sz = (u64)sdkp->capacity << ilog2(sector_size); | 1746 | u64 sz = (u64)sdkp->capacity << ilog2(sector_size); |
1747 | 1747 | ||
1748 | string_get_size(sz, STRING_UNITS_2, cap_str_2, | 1748 | string_get_size(sz, STRING_UNITS_2, cap_str_2, |
1749 | sizeof(cap_str_2)); | 1749 | sizeof(cap_str_2)); |
1750 | string_get_size(sz, STRING_UNITS_10, cap_str_10, | 1750 | string_get_size(sz, STRING_UNITS_10, cap_str_10, |
1751 | sizeof(cap_str_10)); | 1751 | sizeof(cap_str_10)); |
1752 | 1752 | ||
1753 | if (sdkp->first_scan || old_capacity != sdkp->capacity) { | 1753 | if (sdkp->first_scan || old_capacity != sdkp->capacity) { |
1754 | sd_printk(KERN_NOTICE, sdkp, | 1754 | sd_printk(KERN_NOTICE, sdkp, |
1755 | "%llu %d-byte logical blocks: (%s/%s)\n", | 1755 | "%llu %d-byte logical blocks: (%s/%s)\n", |
1756 | (unsigned long long)sdkp->capacity, | 1756 | (unsigned long long)sdkp->capacity, |
1757 | sector_size, cap_str_10, cap_str_2); | 1757 | sector_size, cap_str_10, cap_str_2); |
1758 | 1758 | ||
1759 | if (sdkp->hw_sector_size != sector_size) | 1759 | if (sdkp->hw_sector_size != sector_size) |
1760 | sd_printk(KERN_NOTICE, sdkp, | 1760 | sd_printk(KERN_NOTICE, sdkp, |
1761 | "%u-byte physical blocks\n", | 1761 | "%u-byte physical blocks\n", |
1762 | sdkp->hw_sector_size); | 1762 | sdkp->hw_sector_size); |
1763 | } | 1763 | } |
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | /* Rescale capacity to 512-byte units */ | 1766 | /* Rescale capacity to 512-byte units */ |
1767 | if (sector_size == 4096) | 1767 | if (sector_size == 4096) |
1768 | sdkp->capacity <<= 3; | 1768 | sdkp->capacity <<= 3; |
1769 | else if (sector_size == 2048) | 1769 | else if (sector_size == 2048) |
1770 | sdkp->capacity <<= 2; | 1770 | sdkp->capacity <<= 2; |
1771 | else if (sector_size == 1024) | 1771 | else if (sector_size == 1024) |
1772 | sdkp->capacity <<= 1; | 1772 | sdkp->capacity <<= 1; |
1773 | else if (sector_size == 256) | 1773 | else if (sector_size == 256) |
1774 | sdkp->capacity >>= 1; | 1774 | sdkp->capacity >>= 1; |
1775 | 1775 | ||
1776 | blk_queue_physical_block_size(sdp->request_queue, sdkp->hw_sector_size); | 1776 | blk_queue_physical_block_size(sdp->request_queue, sdkp->hw_sector_size); |
1777 | sdkp->device->sector_size = sector_size; | 1777 | sdkp->device->sector_size = sector_size; |
1778 | } | 1778 | } |
1779 | 1779 | ||
1780 | /* called with buffer of length 512 */ | 1780 | /* called with buffer of length 512 */ |
1781 | static inline int | 1781 | static inline int |
1782 | sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage, | 1782 | sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage, |
1783 | unsigned char *buffer, int len, struct scsi_mode_data *data, | 1783 | unsigned char *buffer, int len, struct scsi_mode_data *data, |
1784 | struct scsi_sense_hdr *sshdr) | 1784 | struct scsi_sense_hdr *sshdr) |
1785 | { | 1785 | { |
1786 | return scsi_mode_sense(sdp, dbd, modepage, buffer, len, | 1786 | return scsi_mode_sense(sdp, dbd, modepage, buffer, len, |
1787 | SD_TIMEOUT, SD_MAX_RETRIES, data, | 1787 | SD_TIMEOUT, SD_MAX_RETRIES, data, |
1788 | sshdr); | 1788 | sshdr); |
1789 | } | 1789 | } |
1790 | 1790 | ||
1791 | /* | 1791 | /* |
1792 | * read write protect setting, if possible - called only in sd_revalidate_disk() | 1792 | * read write protect setting, if possible - called only in sd_revalidate_disk() |
1793 | * called with buffer of length SD_BUF_SIZE | 1793 | * called with buffer of length SD_BUF_SIZE |
1794 | */ | 1794 | */ |
1795 | static void | 1795 | static void |
1796 | sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) | 1796 | sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) |
1797 | { | 1797 | { |
1798 | int res; | 1798 | int res; |
1799 | struct scsi_device *sdp = sdkp->device; | 1799 | struct scsi_device *sdp = sdkp->device; |
1800 | struct scsi_mode_data data; | 1800 | struct scsi_mode_data data; |
1801 | int old_wp = sdkp->write_prot; | 1801 | int old_wp = sdkp->write_prot; |
1802 | 1802 | ||
1803 | set_disk_ro(sdkp->disk, 0); | 1803 | set_disk_ro(sdkp->disk, 0); |
1804 | if (sdp->skip_ms_page_3f) { | 1804 | if (sdp->skip_ms_page_3f) { |
1805 | sd_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); | 1805 | sd_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); |
1806 | return; | 1806 | return; |
1807 | } | 1807 | } |
1808 | 1808 | ||
1809 | if (sdp->use_192_bytes_for_3f) { | 1809 | if (sdp->use_192_bytes_for_3f) { |
1810 | res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 192, &data, NULL); | 1810 | res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 192, &data, NULL); |
1811 | } else { | 1811 | } else { |
1812 | /* | 1812 | /* |
1813 | * First attempt: ask for all pages (0x3F), but only 4 bytes. | 1813 | * First attempt: ask for all pages (0x3F), but only 4 bytes. |
1814 | * We have to start carefully: some devices hang if we ask | 1814 | * We have to start carefully: some devices hang if we ask |
1815 | * for more than is available. | 1815 | * for more than is available. |
1816 | */ | 1816 | */ |
1817 | res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 4, &data, NULL); | 1817 | res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 4, &data, NULL); |
1818 | 1818 | ||
1819 | /* | 1819 | /* |
1820 | * Second attempt: ask for page 0 When only page 0 is | 1820 | * Second attempt: ask for page 0 When only page 0 is |
1821 | * implemented, a request for page 3F may return Sense Key | 1821 | * implemented, a request for page 3F may return Sense Key |
1822 | * 5: Illegal Request, Sense Code 24: Invalid field in | 1822 | * 5: Illegal Request, Sense Code 24: Invalid field in |
1823 | * CDB. | 1823 | * CDB. |
1824 | */ | 1824 | */ |
1825 | if (!scsi_status_is_good(res)) | 1825 | if (!scsi_status_is_good(res)) |
1826 | res = sd_do_mode_sense(sdp, 0, 0, buffer, 4, &data, NULL); | 1826 | res = sd_do_mode_sense(sdp, 0, 0, buffer, 4, &data, NULL); |
1827 | 1827 | ||
1828 | /* | 1828 | /* |
1829 | * Third attempt: ask 255 bytes, as we did earlier. | 1829 | * Third attempt: ask 255 bytes, as we did earlier. |
1830 | */ | 1830 | */ |
1831 | if (!scsi_status_is_good(res)) | 1831 | if (!scsi_status_is_good(res)) |
1832 | res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 255, | 1832 | res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 255, |
1833 | &data, NULL); | 1833 | &data, NULL); |
1834 | } | 1834 | } |
1835 | 1835 | ||
1836 | if (!scsi_status_is_good(res)) { | 1836 | if (!scsi_status_is_good(res)) { |
1837 | sd_printk(KERN_WARNING, sdkp, | 1837 | sd_printk(KERN_WARNING, sdkp, |
1838 | "Test WP failed, assume Write Enabled\n"); | 1838 | "Test WP failed, assume Write Enabled\n"); |
1839 | } else { | 1839 | } else { |
1840 | sdkp->write_prot = ((data.device_specific & 0x80) != 0); | 1840 | sdkp->write_prot = ((data.device_specific & 0x80) != 0); |
1841 | set_disk_ro(sdkp->disk, sdkp->write_prot); | 1841 | set_disk_ro(sdkp->disk, sdkp->write_prot); |
1842 | if (sdkp->first_scan || old_wp != sdkp->write_prot) { | 1842 | if (sdkp->first_scan || old_wp != sdkp->write_prot) { |
1843 | sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", | 1843 | sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", |
1844 | sdkp->write_prot ? "on" : "off"); | 1844 | sdkp->write_prot ? "on" : "off"); |
1845 | sd_printk(KERN_DEBUG, sdkp, | 1845 | sd_printk(KERN_DEBUG, sdkp, |
1846 | "Mode Sense: %02x %02x %02x %02x\n", | 1846 | "Mode Sense: %02x %02x %02x %02x\n", |
1847 | buffer[0], buffer[1], buffer[2], buffer[3]); | 1847 | buffer[0], buffer[1], buffer[2], buffer[3]); |
1848 | } | 1848 | } |
1849 | } | 1849 | } |
1850 | } | 1850 | } |
1851 | 1851 | ||
1852 | /* | 1852 | /* |
1853 | * sd_read_cache_type - called only from sd_revalidate_disk() | 1853 | * sd_read_cache_type - called only from sd_revalidate_disk() |
1854 | * called with buffer of length SD_BUF_SIZE | 1854 | * called with buffer of length SD_BUF_SIZE |
1855 | */ | 1855 | */ |
1856 | static void | 1856 | static void |
1857 | sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) | 1857 | sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) |
1858 | { | 1858 | { |
1859 | int len = 0, res; | 1859 | int len = 0, res; |
1860 | struct scsi_device *sdp = sdkp->device; | 1860 | struct scsi_device *sdp = sdkp->device; |
1861 | 1861 | ||
1862 | int dbd; | 1862 | int dbd; |
1863 | int modepage; | 1863 | int modepage; |
1864 | struct scsi_mode_data data; | 1864 | struct scsi_mode_data data; |
1865 | struct scsi_sense_hdr sshdr; | 1865 | struct scsi_sense_hdr sshdr; |
1866 | int old_wce = sdkp->WCE; | 1866 | int old_wce = sdkp->WCE; |
1867 | int old_rcd = sdkp->RCD; | 1867 | int old_rcd = sdkp->RCD; |
1868 | int old_dpofua = sdkp->DPOFUA; | 1868 | int old_dpofua = sdkp->DPOFUA; |
1869 | 1869 | ||
1870 | if (sdp->skip_ms_page_8) | 1870 | if (sdp->skip_ms_page_8) |
1871 | goto defaults; | 1871 | goto defaults; |
1872 | 1872 | ||
1873 | if (sdp->type == TYPE_RBC) { | 1873 | if (sdp->type == TYPE_RBC) { |
1874 | modepage = 6; | 1874 | modepage = 6; |
1875 | dbd = 8; | 1875 | dbd = 8; |
1876 | } else { | 1876 | } else { |
1877 | modepage = 8; | 1877 | modepage = 8; |
1878 | dbd = 0; | 1878 | dbd = 0; |
1879 | } | 1879 | } |
1880 | 1880 | ||
1881 | /* cautiously ask */ | 1881 | /* cautiously ask */ |
1882 | res = sd_do_mode_sense(sdp, dbd, modepage, buffer, 4, &data, &sshdr); | 1882 | res = sd_do_mode_sense(sdp, dbd, modepage, buffer, 4, &data, &sshdr); |
1883 | 1883 | ||
1884 | if (!scsi_status_is_good(res)) | 1884 | if (!scsi_status_is_good(res)) |
1885 | goto bad_sense; | 1885 | goto bad_sense; |
1886 | 1886 | ||
1887 | if (!data.header_length) { | 1887 | if (!data.header_length) { |
1888 | modepage = 6; | 1888 | modepage = 6; |
1889 | sd_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n"); | 1889 | sd_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n"); |
1890 | } | 1890 | } |
1891 | 1891 | ||
1892 | /* that went OK, now ask for the proper length */ | 1892 | /* that went OK, now ask for the proper length */ |
1893 | len = data.length; | 1893 | len = data.length; |
1894 | 1894 | ||
1895 | /* | 1895 | /* |
1896 | * We're only interested in the first three bytes, actually. | 1896 | * We're only interested in the first three bytes, actually. |
1897 | * But the data cache page is defined for the first 20. | 1897 | * But the data cache page is defined for the first 20. |
1898 | */ | 1898 | */ |
1899 | if (len < 3) | 1899 | if (len < 3) |
1900 | goto bad_sense; | 1900 | goto bad_sense; |
1901 | if (len > 20) | 1901 | if (len > 20) |
1902 | len = 20; | 1902 | len = 20; |
1903 | 1903 | ||
1904 | /* Take headers and block descriptors into account */ | 1904 | /* Take headers and block descriptors into account */ |
1905 | len += data.header_length + data.block_descriptor_length; | 1905 | len += data.header_length + data.block_descriptor_length; |
1906 | if (len > SD_BUF_SIZE) | 1906 | if (len > SD_BUF_SIZE) |
1907 | goto bad_sense; | 1907 | goto bad_sense; |
1908 | 1908 | ||
1909 | /* Get the data */ | 1909 | /* Get the data */ |
1910 | res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr); | 1910 | res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr); |
1911 | 1911 | ||
1912 | if (scsi_status_is_good(res)) { | 1912 | if (scsi_status_is_good(res)) { |
1913 | int offset = data.header_length + data.block_descriptor_length; | 1913 | int offset = data.header_length + data.block_descriptor_length; |
1914 | 1914 | ||
1915 | if (offset >= SD_BUF_SIZE - 2) { | 1915 | if (offset >= SD_BUF_SIZE - 2) { |
1916 | sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n"); | 1916 | sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n"); |
1917 | goto defaults; | 1917 | goto defaults; |
1918 | } | 1918 | } |
1919 | 1919 | ||
1920 | if ((buffer[offset] & 0x3f) != modepage) { | 1920 | if ((buffer[offset] & 0x3f) != modepage) { |
1921 | sd_printk(KERN_ERR, sdkp, "Got wrong page\n"); | 1921 | sd_printk(KERN_ERR, sdkp, "Got wrong page\n"); |
1922 | goto defaults; | 1922 | goto defaults; |
1923 | } | 1923 | } |
1924 | 1924 | ||
1925 | if (modepage == 8) { | 1925 | if (modepage == 8) { |
1926 | sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); | 1926 | sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); |
1927 | sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0); | 1927 | sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0); |
1928 | } else { | 1928 | } else { |
1929 | sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0); | 1929 | sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0); |
1930 | sdkp->RCD = 0; | 1930 | sdkp->RCD = 0; |
1931 | } | 1931 | } |
1932 | 1932 | ||
1933 | sdkp->DPOFUA = (data.device_specific & 0x10) != 0; | 1933 | sdkp->DPOFUA = (data.device_specific & 0x10) != 0; |
1934 | if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) { | 1934 | if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) { |
1935 | sd_printk(KERN_NOTICE, sdkp, | 1935 | sd_printk(KERN_NOTICE, sdkp, |
1936 | "Uses READ/WRITE(6), disabling FUA\n"); | 1936 | "Uses READ/WRITE(6), disabling FUA\n"); |
1937 | sdkp->DPOFUA = 0; | 1937 | sdkp->DPOFUA = 0; |
1938 | } | 1938 | } |
1939 | 1939 | ||
1940 | if (sdkp->first_scan || old_wce != sdkp->WCE || | 1940 | if (sdkp->first_scan || old_wce != sdkp->WCE || |
1941 | old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA) | 1941 | old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA) |
1942 | sd_printk(KERN_NOTICE, sdkp, | 1942 | sd_printk(KERN_NOTICE, sdkp, |
1943 | "Write cache: %s, read cache: %s, %s\n", | 1943 | "Write cache: %s, read cache: %s, %s\n", |
1944 | sdkp->WCE ? "enabled" : "disabled", | 1944 | sdkp->WCE ? "enabled" : "disabled", |
1945 | sdkp->RCD ? "disabled" : "enabled", | 1945 | sdkp->RCD ? "disabled" : "enabled", |
1946 | sdkp->DPOFUA ? "supports DPO and FUA" | 1946 | sdkp->DPOFUA ? "supports DPO and FUA" |
1947 | : "doesn't support DPO or FUA"); | 1947 | : "doesn't support DPO or FUA"); |
1948 | 1948 | ||
1949 | return; | 1949 | return; |
1950 | } | 1950 | } |
1951 | 1951 | ||
1952 | bad_sense: | 1952 | bad_sense: |
1953 | if (scsi_sense_valid(&sshdr) && | 1953 | if (scsi_sense_valid(&sshdr) && |
1954 | sshdr.sense_key == ILLEGAL_REQUEST && | 1954 | sshdr.sense_key == ILLEGAL_REQUEST && |
1955 | sshdr.asc == 0x24 && sshdr.ascq == 0x0) | 1955 | sshdr.asc == 0x24 && sshdr.ascq == 0x0) |
1956 | /* Invalid field in CDB */ | 1956 | /* Invalid field in CDB */ |
1957 | sd_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n"); | 1957 | sd_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n"); |
1958 | else | 1958 | else |
1959 | sd_printk(KERN_ERR, sdkp, "Asking for cache data failed\n"); | 1959 | sd_printk(KERN_ERR, sdkp, "Asking for cache data failed\n"); |
1960 | 1960 | ||
1961 | defaults: | 1961 | defaults: |
1962 | sd_printk(KERN_ERR, sdkp, "Assuming drive cache: write through\n"); | 1962 | sd_printk(KERN_ERR, sdkp, "Assuming drive cache: write through\n"); |
1963 | sdkp->WCE = 0; | 1963 | sdkp->WCE = 0; |
1964 | sdkp->RCD = 0; | 1964 | sdkp->RCD = 0; |
1965 | sdkp->DPOFUA = 0; | 1965 | sdkp->DPOFUA = 0; |
1966 | } | 1966 | } |
1967 | 1967 | ||
1968 | /* | 1968 | /* |
1969 | * The ATO bit indicates whether the DIF application tag is available | 1969 | * The ATO bit indicates whether the DIF application tag is available |
1970 | * for use by the operating system. | 1970 | * for use by the operating system. |
1971 | */ | 1971 | */ |
1972 | static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) | 1972 | static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) |
1973 | { | 1973 | { |
1974 | int res, offset; | 1974 | int res, offset; |
1975 | struct scsi_device *sdp = sdkp->device; | 1975 | struct scsi_device *sdp = sdkp->device; |
1976 | struct scsi_mode_data data; | 1976 | struct scsi_mode_data data; |
1977 | struct scsi_sense_hdr sshdr; | 1977 | struct scsi_sense_hdr sshdr; |
1978 | 1978 | ||
1979 | if (sdp->type != TYPE_DISK) | 1979 | if (sdp->type != TYPE_DISK) |
1980 | return; | 1980 | return; |
1981 | 1981 | ||
1982 | if (sdkp->protection_type == 0) | 1982 | if (sdkp->protection_type == 0) |
1983 | return; | 1983 | return; |
1984 | 1984 | ||
1985 | res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT, | 1985 | res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT, |
1986 | SD_MAX_RETRIES, &data, &sshdr); | 1986 | SD_MAX_RETRIES, &data, &sshdr); |
1987 | 1987 | ||
1988 | if (!scsi_status_is_good(res) || !data.header_length || | 1988 | if (!scsi_status_is_good(res) || !data.header_length || |
1989 | data.length < 6) { | 1989 | data.length < 6) { |
1990 | sd_printk(KERN_WARNING, sdkp, | 1990 | sd_printk(KERN_WARNING, sdkp, |
1991 | "getting Control mode page failed, assume no ATO\n"); | 1991 | "getting Control mode page failed, assume no ATO\n"); |
1992 | 1992 | ||
1993 | if (scsi_sense_valid(&sshdr)) | 1993 | if (scsi_sense_valid(&sshdr)) |
1994 | sd_print_sense_hdr(sdkp, &sshdr); | 1994 | sd_print_sense_hdr(sdkp, &sshdr); |
1995 | 1995 | ||
1996 | return; | 1996 | return; |
1997 | } | 1997 | } |
1998 | 1998 | ||
1999 | offset = data.header_length + data.block_descriptor_length; | 1999 | offset = data.header_length + data.block_descriptor_length; |
2000 | 2000 | ||
2001 | if ((buffer[offset] & 0x3f) != 0x0a) { | 2001 | if ((buffer[offset] & 0x3f) != 0x0a) { |
2002 | sd_printk(KERN_ERR, sdkp, "ATO Got wrong page\n"); | 2002 | sd_printk(KERN_ERR, sdkp, "ATO Got wrong page\n"); |
2003 | return; | 2003 | return; |
2004 | } | 2004 | } |
2005 | 2005 | ||
2006 | if ((buffer[offset + 5] & 0x80) == 0) | 2006 | if ((buffer[offset + 5] & 0x80) == 0) |
2007 | return; | 2007 | return; |
2008 | 2008 | ||
2009 | sdkp->ATO = 1; | 2009 | sdkp->ATO = 1; |
2010 | 2010 | ||
2011 | return; | 2011 | return; |
2012 | } | 2012 | } |
2013 | 2013 | ||
2014 | /** | 2014 | /** |
2015 | * sd_read_block_limits - Query disk device for preferred I/O sizes. | 2015 | * sd_read_block_limits - Query disk device for preferred I/O sizes. |
2016 | * @disk: disk to query | 2016 | * @disk: disk to query |
2017 | */ | 2017 | */ |
2018 | static void sd_read_block_limits(struct scsi_disk *sdkp) | 2018 | static void sd_read_block_limits(struct scsi_disk *sdkp) |
2019 | { | 2019 | { |
2020 | struct request_queue *q = sdkp->disk->queue; | 2020 | struct request_queue *q = sdkp->disk->queue; |
2021 | unsigned int sector_sz = sdkp->device->sector_size; | 2021 | unsigned int sector_sz = sdkp->device->sector_size; |
2022 | const int vpd_len = 64; | 2022 | const int vpd_len = 64; |
2023 | unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL); | 2023 | unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL); |
2024 | 2024 | ||
2025 | if (!buffer || | 2025 | if (!buffer || |
2026 | /* Block Limits VPD */ | 2026 | /* Block Limits VPD */ |
2027 | scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len)) | 2027 | scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len)) |
2028 | goto out; | 2028 | goto out; |
2029 | 2029 | ||
2030 | blk_queue_io_min(sdkp->disk->queue, | 2030 | blk_queue_io_min(sdkp->disk->queue, |
2031 | get_unaligned_be16(&buffer[6]) * sector_sz); | 2031 | get_unaligned_be16(&buffer[6]) * sector_sz); |
2032 | blk_queue_io_opt(sdkp->disk->queue, | 2032 | blk_queue_io_opt(sdkp->disk->queue, |
2033 | get_unaligned_be32(&buffer[12]) * sector_sz); | 2033 | get_unaligned_be32(&buffer[12]) * sector_sz); |
2034 | 2034 | ||
2035 | /* Thin provisioning enabled and page length indicates TP support */ | 2035 | /* Thin provisioning enabled and page length indicates TP support */ |
2036 | if (sdkp->thin_provisioning && buffer[3] == 0x3c) { | 2036 | if (sdkp->thin_provisioning && buffer[3] == 0x3c) { |
2037 | unsigned int lba_count, desc_count, granularity; | 2037 | unsigned int lba_count, desc_count, granularity; |
2038 | 2038 | ||
2039 | lba_count = get_unaligned_be32(&buffer[20]); | 2039 | lba_count = get_unaligned_be32(&buffer[20]); |
2040 | desc_count = get_unaligned_be32(&buffer[24]); | 2040 | desc_count = get_unaligned_be32(&buffer[24]); |
2041 | 2041 | ||
2042 | if (lba_count) { | 2042 | if (lba_count) { |
2043 | q->limits.max_discard_sectors = | 2043 | q->limits.max_discard_sectors = |
2044 | lba_count * sector_sz >> 9; | 2044 | lba_count * sector_sz >> 9; |
2045 | 2045 | ||
2046 | if (desc_count) | 2046 | if (desc_count) |
2047 | sdkp->unmap = 1; | 2047 | sdkp->unmap = 1; |
2048 | } | 2048 | } |
2049 | 2049 | ||
2050 | granularity = get_unaligned_be32(&buffer[28]); | 2050 | granularity = get_unaligned_be32(&buffer[28]); |
2051 | 2051 | ||
2052 | if (granularity) | 2052 | if (granularity) |
2053 | q->limits.discard_granularity = granularity * sector_sz; | 2053 | q->limits.discard_granularity = granularity * sector_sz; |
2054 | 2054 | ||
2055 | if (buffer[32] & 0x80) | 2055 | if (buffer[32] & 0x80) |
2056 | q->limits.discard_alignment = | 2056 | q->limits.discard_alignment = |
2057 | get_unaligned_be32(&buffer[32]) & ~(1 << 31); | 2057 | get_unaligned_be32(&buffer[32]) & ~(1 << 31); |
2058 | } | 2058 | } |
2059 | 2059 | ||
2060 | out: | 2060 | out: |
2061 | kfree(buffer); | 2061 | kfree(buffer); |
2062 | } | 2062 | } |
2063 | 2063 | ||
2064 | /** | 2064 | /** |
2065 | * sd_read_block_characteristics - Query block dev. characteristics | 2065 | * sd_read_block_characteristics - Query block dev. characteristics |
2066 | * @disk: disk to query | 2066 | * @disk: disk to query |
2067 | */ | 2067 | */ |
2068 | static void sd_read_block_characteristics(struct scsi_disk *sdkp) | 2068 | static void sd_read_block_characteristics(struct scsi_disk *sdkp) |
2069 | { | 2069 | { |
2070 | unsigned char *buffer; | 2070 | unsigned char *buffer; |
2071 | u16 rot; | 2071 | u16 rot; |
2072 | const int vpd_len = 64; | 2072 | const int vpd_len = 64; |
2073 | 2073 | ||
2074 | buffer = kmalloc(vpd_len, GFP_KERNEL); | 2074 | buffer = kmalloc(vpd_len, GFP_KERNEL); |
2075 | 2075 | ||
2076 | if (!buffer || | 2076 | if (!buffer || |
2077 | /* Block Device Characteristics VPD */ | 2077 | /* Block Device Characteristics VPD */ |
2078 | scsi_get_vpd_page(sdkp->device, 0xb1, buffer, vpd_len)) | 2078 | scsi_get_vpd_page(sdkp->device, 0xb1, buffer, vpd_len)) |
2079 | goto out; | 2079 | goto out; |
2080 | 2080 | ||
2081 | rot = get_unaligned_be16(&buffer[4]); | 2081 | rot = get_unaligned_be16(&buffer[4]); |
2082 | 2082 | ||
2083 | if (rot == 1) | 2083 | if (rot == 1) |
2084 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, sdkp->disk->queue); | 2084 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, sdkp->disk->queue); |
2085 | 2085 | ||
2086 | out: | 2086 | out: |
2087 | kfree(buffer); | 2087 | kfree(buffer); |
2088 | } | 2088 | } |
2089 | 2089 | ||
2090 | static int sd_try_extended_inquiry(struct scsi_device *sdp) | 2090 | static int sd_try_extended_inquiry(struct scsi_device *sdp) |
2091 | { | 2091 | { |
2092 | /* | 2092 | /* |
2093 | * Although VPD inquiries can go to SCSI-2 type devices, | 2093 | * Although VPD inquiries can go to SCSI-2 type devices, |
2094 | * some USB ones crash on receiving them, and the pages | 2094 | * some USB ones crash on receiving them, and the pages |
2095 | * we currently ask for are for SPC-3 and beyond | 2095 | * we currently ask for are for SPC-3 and beyond |
2096 | */ | 2096 | */ |
2097 | if (sdp->scsi_level > SCSI_SPC_2) | 2097 | if (sdp->scsi_level > SCSI_SPC_2) |
2098 | return 1; | 2098 | return 1; |
2099 | return 0; | 2099 | return 0; |
2100 | } | 2100 | } |
2101 | 2101 | ||
2102 | /** | 2102 | /** |
2103 | * sd_revalidate_disk - called the first time a new disk is seen, | 2103 | * sd_revalidate_disk - called the first time a new disk is seen, |
2104 | * performs disk spin up, read_capacity, etc. | 2104 | * performs disk spin up, read_capacity, etc. |
2105 | * @disk: struct gendisk we care about | 2105 | * @disk: struct gendisk we care about |
2106 | **/ | 2106 | **/ |
2107 | static int sd_revalidate_disk(struct gendisk *disk) | 2107 | static int sd_revalidate_disk(struct gendisk *disk) |
2108 | { | 2108 | { |
2109 | struct scsi_disk *sdkp = scsi_disk(disk); | 2109 | struct scsi_disk *sdkp = scsi_disk(disk); |
2110 | struct scsi_device *sdp = sdkp->device; | 2110 | struct scsi_device *sdp = sdkp->device; |
2111 | unsigned char *buffer; | 2111 | unsigned char *buffer; |
2112 | unsigned ordered; | 2112 | unsigned flush = 0; |
2113 | 2113 | ||
2114 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, | 2114 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, |
2115 | "sd_revalidate_disk\n")); | 2115 | "sd_revalidate_disk\n")); |
2116 | 2116 | ||
2117 | /* | 2117 | /* |
2118 | * If the device is offline, don't try and read capacity or any | 2118 | * If the device is offline, don't try and read capacity or any |
2119 | * of the other niceties. | 2119 | * of the other niceties. |
2120 | */ | 2120 | */ |
2121 | if (!scsi_device_online(sdp)) | 2121 | if (!scsi_device_online(sdp)) |
2122 | goto out; | 2122 | goto out; |
2123 | 2123 | ||
2124 | buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); | 2124 | buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); |
2125 | if (!buffer) { | 2125 | if (!buffer) { |
2126 | sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " | 2126 | sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " |
2127 | "allocation failure.\n"); | 2127 | "allocation failure.\n"); |
2128 | goto out; | 2128 | goto out; |
2129 | } | 2129 | } |
2130 | 2130 | ||
2131 | sd_spinup_disk(sdkp); | 2131 | sd_spinup_disk(sdkp); |
2132 | 2132 | ||
2133 | /* | 2133 | /* |
2134 | * Without media there is no reason to ask; moreover, some devices | 2134 | * Without media there is no reason to ask; moreover, some devices |
2135 | * react badly if we do. | 2135 | * react badly if we do. |
2136 | */ | 2136 | */ |
2137 | if (sdkp->media_present) { | 2137 | if (sdkp->media_present) { |
2138 | sd_read_capacity(sdkp, buffer); | 2138 | sd_read_capacity(sdkp, buffer); |
2139 | 2139 | ||
2140 | if (sd_try_extended_inquiry(sdp)) { | 2140 | if (sd_try_extended_inquiry(sdp)) { |
2141 | sd_read_block_limits(sdkp); | 2141 | sd_read_block_limits(sdkp); |
2142 | sd_read_block_characteristics(sdkp); | 2142 | sd_read_block_characteristics(sdkp); |
2143 | } | 2143 | } |
2144 | 2144 | ||
2145 | sd_read_write_protect_flag(sdkp, buffer); | 2145 | sd_read_write_protect_flag(sdkp, buffer); |
2146 | sd_read_cache_type(sdkp, buffer); | 2146 | sd_read_cache_type(sdkp, buffer); |
2147 | sd_read_app_tag_own(sdkp, buffer); | 2147 | sd_read_app_tag_own(sdkp, buffer); |
2148 | } | 2148 | } |
2149 | 2149 | ||
2150 | sdkp->first_scan = 0; | 2150 | sdkp->first_scan = 0; |
2151 | 2151 | ||
2152 | /* | 2152 | /* |
2153 | * We now have all cache related info, determine how we deal | 2153 | * We now have all cache related info, determine how we deal |
2154 | * with ordered requests. | 2154 | * with flush requests. |
2155 | */ | 2155 | */ |
2156 | if (sdkp->WCE) | 2156 | if (sdkp->WCE) { |
2157 | ordered = sdkp->DPOFUA | 2157 | flush |= REQ_FLUSH; |
2158 | ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; | 2158 | if (sdkp->DPOFUA) |
2159 | else | 2159 | flush |= REQ_FUA; |
2160 | ordered = QUEUE_ORDERED_DRAIN; | 2160 | } |
2161 | 2161 | ||
2162 | blk_queue_ordered(sdkp->disk->queue, ordered); | 2162 | blk_queue_flush(sdkp->disk->queue, flush); |
2163 | 2163 | ||
2164 | set_capacity(disk, sdkp->capacity); | 2164 | set_capacity(disk, sdkp->capacity); |
2165 | kfree(buffer); | 2165 | kfree(buffer); |
2166 | 2166 | ||
2167 | out: | 2167 | out: |
2168 | return 0; | 2168 | return 0; |
2169 | } | 2169 | } |
2170 | 2170 | ||
2171 | /** | 2171 | /** |
2172 | * sd_unlock_native_capacity - unlock native capacity | 2172 | * sd_unlock_native_capacity - unlock native capacity |
2173 | * @disk: struct gendisk to set capacity for | 2173 | * @disk: struct gendisk to set capacity for |
2174 | * | 2174 | * |
2175 | * Block layer calls this function if it detects that partitions | 2175 | * Block layer calls this function if it detects that partitions |
2176 | * on @disk reach beyond the end of the device. If the SCSI host | 2176 | * on @disk reach beyond the end of the device. If the SCSI host |
2177 | * implements ->unlock_native_capacity() method, it's invoked to | 2177 | * implements ->unlock_native_capacity() method, it's invoked to |
2178 | * give it a chance to adjust the device capacity. | 2178 | * give it a chance to adjust the device capacity. |
2179 | * | 2179 | * |
2180 | * CONTEXT: | 2180 | * CONTEXT: |
2181 | * Defined by block layer. Might sleep. | 2181 | * Defined by block layer. Might sleep. |
2182 | */ | 2182 | */ |
2183 | static void sd_unlock_native_capacity(struct gendisk *disk) | 2183 | static void sd_unlock_native_capacity(struct gendisk *disk) |
2184 | { | 2184 | { |
2185 | struct scsi_device *sdev = scsi_disk(disk)->device; | 2185 | struct scsi_device *sdev = scsi_disk(disk)->device; |
2186 | 2186 | ||
2187 | if (sdev->host->hostt->unlock_native_capacity) | 2187 | if (sdev->host->hostt->unlock_native_capacity) |
2188 | sdev->host->hostt->unlock_native_capacity(sdev); | 2188 | sdev->host->hostt->unlock_native_capacity(sdev); |
2189 | } | 2189 | } |
2190 | 2190 | ||
2191 | /** | 2191 | /** |
2192 | * sd_format_disk_name - format disk name | 2192 | * sd_format_disk_name - format disk name |
2193 | * @prefix: name prefix - ie. "sd" for SCSI disks | 2193 | * @prefix: name prefix - ie. "sd" for SCSI disks |
2194 | * @index: index of the disk to format name for | 2194 | * @index: index of the disk to format name for |
2195 | * @buf: output buffer | 2195 | * @buf: output buffer |
2196 | * @buflen: length of the output buffer | 2196 | * @buflen: length of the output buffer |
2197 | * | 2197 | * |
2198 | * SCSI disk names starts at sda. The 26th device is sdz and the | 2198 | * SCSI disk names starts at sda. The 26th device is sdz and the |
2199 | * 27th is sdaa. The last one for two lettered suffix is sdzz | 2199 | * 27th is sdaa. The last one for two lettered suffix is sdzz |
2200 | * which is followed by sdaaa. | 2200 | * which is followed by sdaaa. |
2201 | * | 2201 | * |
2202 | * This is basically 26 base counting with one extra 'nil' entry | 2202 | * This is basically 26 base counting with one extra 'nil' entry |
2203 | * at the beginning from the second digit on and can be | 2203 | * at the beginning from the second digit on and can be |
2204 | * determined using similar method as 26 base conversion with the | 2204 | * determined using similar method as 26 base conversion with the |
2205 | * index shifted -1 after each digit is computed. | 2205 | * index shifted -1 after each digit is computed. |
2206 | * | 2206 | * |
2207 | * CONTEXT: | 2207 | * CONTEXT: |
2208 | * Don't care. | 2208 | * Don't care. |
2209 | * | 2209 | * |
2210 | * RETURNS: | 2210 | * RETURNS: |
2211 | * 0 on success, -errno on failure. | 2211 | * 0 on success, -errno on failure. |
2212 | */ | 2212 | */ |
2213 | static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) | 2213 | static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) |
2214 | { | 2214 | { |
2215 | const int base = 'z' - 'a' + 1; | 2215 | const int base = 'z' - 'a' + 1; |
2216 | char *begin = buf + strlen(prefix); | 2216 | char *begin = buf + strlen(prefix); |
2217 | char *end = buf + buflen; | 2217 | char *end = buf + buflen; |
2218 | char *p; | 2218 | char *p; |
2219 | int unit; | 2219 | int unit; |
2220 | 2220 | ||
2221 | p = end - 1; | 2221 | p = end - 1; |
2222 | *p = '\0'; | 2222 | *p = '\0'; |
2223 | unit = base; | 2223 | unit = base; |
2224 | do { | 2224 | do { |
2225 | if (p == begin) | 2225 | if (p == begin) |
2226 | return -EINVAL; | 2226 | return -EINVAL; |
2227 | *--p = 'a' + (index % unit); | 2227 | *--p = 'a' + (index % unit); |
2228 | index = (index / unit) - 1; | 2228 | index = (index / unit) - 1; |
2229 | } while (index >= 0); | 2229 | } while (index >= 0); |
2230 | 2230 | ||
2231 | memmove(begin, p, end - p); | 2231 | memmove(begin, p, end - p); |
2232 | memcpy(buf, prefix, strlen(prefix)); | 2232 | memcpy(buf, prefix, strlen(prefix)); |
2233 | 2233 | ||
2234 | return 0; | 2234 | return 0; |
2235 | } | 2235 | } |
2236 | 2236 | ||
2237 | /* | 2237 | /* |
2238 | * The asynchronous part of sd_probe | 2238 | * The asynchronous part of sd_probe |
2239 | */ | 2239 | */ |
2240 | static void sd_probe_async(void *data, async_cookie_t cookie) | 2240 | static void sd_probe_async(void *data, async_cookie_t cookie) |
2241 | { | 2241 | { |
2242 | struct scsi_disk *sdkp = data; | 2242 | struct scsi_disk *sdkp = data; |
2243 | struct scsi_device *sdp; | 2243 | struct scsi_device *sdp; |
2244 | struct gendisk *gd; | 2244 | struct gendisk *gd; |
2245 | u32 index; | 2245 | u32 index; |
2246 | struct device *dev; | 2246 | struct device *dev; |
2247 | 2247 | ||
2248 | sdp = sdkp->device; | 2248 | sdp = sdkp->device; |
2249 | gd = sdkp->disk; | 2249 | gd = sdkp->disk; |
2250 | index = sdkp->index; | 2250 | index = sdkp->index; |
2251 | dev = &sdp->sdev_gendev; | 2251 | dev = &sdp->sdev_gendev; |
2252 | 2252 | ||
2253 | if (index < SD_MAX_DISKS) { | 2253 | if (index < SD_MAX_DISKS) { |
2254 | gd->major = sd_major((index & 0xf0) >> 4); | 2254 | gd->major = sd_major((index & 0xf0) >> 4); |
2255 | gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); | 2255 | gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); |
2256 | gd->minors = SD_MINORS; | 2256 | gd->minors = SD_MINORS; |
2257 | } | 2257 | } |
2258 | gd->fops = &sd_fops; | 2258 | gd->fops = &sd_fops; |
2259 | gd->private_data = &sdkp->driver; | 2259 | gd->private_data = &sdkp->driver; |
2260 | gd->queue = sdkp->device->request_queue; | 2260 | gd->queue = sdkp->device->request_queue; |
2261 | 2261 | ||
2262 | /* defaults, until the device tells us otherwise */ | 2262 | /* defaults, until the device tells us otherwise */ |
2263 | sdp->sector_size = 512; | 2263 | sdp->sector_size = 512; |
2264 | sdkp->capacity = 0; | 2264 | sdkp->capacity = 0; |
2265 | sdkp->media_present = 1; | 2265 | sdkp->media_present = 1; |
2266 | sdkp->write_prot = 0; | 2266 | sdkp->write_prot = 0; |
2267 | sdkp->WCE = 0; | 2267 | sdkp->WCE = 0; |
2268 | sdkp->RCD = 0; | 2268 | sdkp->RCD = 0; |
2269 | sdkp->ATO = 0; | 2269 | sdkp->ATO = 0; |
2270 | sdkp->first_scan = 1; | 2270 | sdkp->first_scan = 1; |
2271 | 2271 | ||
2272 | sd_revalidate_disk(gd); | 2272 | sd_revalidate_disk(gd); |
2273 | 2273 | ||
2274 | blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); | 2274 | blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); |
2275 | blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn); | 2275 | blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn); |
2276 | 2276 | ||
2277 | gd->driverfs_dev = &sdp->sdev_gendev; | 2277 | gd->driverfs_dev = &sdp->sdev_gendev; |
2278 | gd->flags = GENHD_FL_EXT_DEVT; | 2278 | gd->flags = GENHD_FL_EXT_DEVT; |
2279 | if (sdp->removable) | 2279 | if (sdp->removable) |
2280 | gd->flags |= GENHD_FL_REMOVABLE; | 2280 | gd->flags |= GENHD_FL_REMOVABLE; |
2281 | 2281 | ||
2282 | add_disk(gd); | 2282 | add_disk(gd); |
2283 | sd_dif_config_host(sdkp); | 2283 | sd_dif_config_host(sdkp); |
2284 | 2284 | ||
2285 | sd_revalidate_disk(gd); | 2285 | sd_revalidate_disk(gd); |
2286 | 2286 | ||
2287 | sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", | 2287 | sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", |
2288 | sdp->removable ? "removable " : ""); | 2288 | sdp->removable ? "removable " : ""); |
2289 | scsi_autopm_put_device(sdp); | 2289 | scsi_autopm_put_device(sdp); |
2290 | put_device(&sdkp->dev); | 2290 | put_device(&sdkp->dev); |
2291 | } | 2291 | } |
2292 | 2292 | ||
2293 | /** | 2293 | /** |
2294 | * sd_probe - called during driver initialization and whenever a | 2294 | * sd_probe - called during driver initialization and whenever a |
2295 | * new scsi device is attached to the system. It is called once | 2295 | * new scsi device is attached to the system. It is called once |
2296 | * for each scsi device (not just disks) present. | 2296 | * for each scsi device (not just disks) present. |
2297 | * @dev: pointer to device object | 2297 | * @dev: pointer to device object |
2298 | * | 2298 | * |
2299 | * Returns 0 if successful (or not interested in this scsi device | 2299 | * Returns 0 if successful (or not interested in this scsi device |
2300 | * (e.g. scanner)); 1 when there is an error. | 2300 | * (e.g. scanner)); 1 when there is an error. |
2301 | * | 2301 | * |
2302 | * Note: this function is invoked from the scsi mid-level. | 2302 | * Note: this function is invoked from the scsi mid-level. |
2303 | * This function sets up the mapping between a given | 2303 | * This function sets up the mapping between a given |
2304 | * <host,channel,id,lun> (found in sdp) and new device name | 2304 | * <host,channel,id,lun> (found in sdp) and new device name |
2305 | * (e.g. /dev/sda). More precisely it is the block device major | 2305 | * (e.g. /dev/sda). More precisely it is the block device major |
2306 | * and minor number that is chosen here. | 2306 | * and minor number that is chosen here. |
2307 | * | 2307 | * |
2308 | * Assume sd_attach is not re-entrant (for time being) | 2308 | * Assume sd_attach is not re-entrant (for time being) |
2309 | * Also think about sd_attach() and sd_remove() running coincidentally. | 2309 | * Also think about sd_attach() and sd_remove() running coincidentally. |
2310 | **/ | 2310 | **/ |
2311 | static int sd_probe(struct device *dev) | 2311 | static int sd_probe(struct device *dev) |
2312 | { | 2312 | { |
2313 | struct scsi_device *sdp = to_scsi_device(dev); | 2313 | struct scsi_device *sdp = to_scsi_device(dev); |
2314 | struct scsi_disk *sdkp; | 2314 | struct scsi_disk *sdkp; |
2315 | struct gendisk *gd; | 2315 | struct gendisk *gd; |
2316 | int index; | 2316 | int index; |
2317 | int error; | 2317 | int error; |
2318 | 2318 | ||
2319 | error = -ENODEV; | 2319 | error = -ENODEV; |
2320 | if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC) | 2320 | if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC) |
2321 | goto out; | 2321 | goto out; |
2322 | 2322 | ||
2323 | SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, | 2323 | SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, |
2324 | "sd_attach\n")); | 2324 | "sd_attach\n")); |
2325 | 2325 | ||
2326 | error = -ENOMEM; | 2326 | error = -ENOMEM; |
2327 | sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL); | 2327 | sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL); |
2328 | if (!sdkp) | 2328 | if (!sdkp) |
2329 | goto out; | 2329 | goto out; |
2330 | 2330 | ||
2331 | gd = alloc_disk(SD_MINORS); | 2331 | gd = alloc_disk(SD_MINORS); |
2332 | if (!gd) | 2332 | if (!gd) |
2333 | goto out_free; | 2333 | goto out_free; |
2334 | 2334 | ||
2335 | do { | 2335 | do { |
2336 | if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) | 2336 | if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) |
2337 | goto out_put; | 2337 | goto out_put; |
2338 | 2338 | ||
2339 | spin_lock(&sd_index_lock); | 2339 | spin_lock(&sd_index_lock); |
2340 | error = ida_get_new(&sd_index_ida, &index); | 2340 | error = ida_get_new(&sd_index_ida, &index); |
2341 | spin_unlock(&sd_index_lock); | 2341 | spin_unlock(&sd_index_lock); |
2342 | } while (error == -EAGAIN); | 2342 | } while (error == -EAGAIN); |
2343 | 2343 | ||
2344 | if (error) | 2344 | if (error) |
2345 | goto out_put; | 2345 | goto out_put; |
2346 | 2346 | ||
2347 | error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); | 2347 | error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); |
2348 | if (error) | 2348 | if (error) |
2349 | goto out_free_index; | 2349 | goto out_free_index; |
2350 | 2350 | ||
2351 | sdkp->device = sdp; | 2351 | sdkp->device = sdp; |
2352 | sdkp->driver = &sd_template; | 2352 | sdkp->driver = &sd_template; |
2353 | sdkp->disk = gd; | 2353 | sdkp->disk = gd; |
2354 | sdkp->index = index; | 2354 | sdkp->index = index; |
2355 | atomic_set(&sdkp->openers, 0); | 2355 | atomic_set(&sdkp->openers, 0); |
2356 | sdkp->previous_state = 1; | 2356 | sdkp->previous_state = 1; |
2357 | 2357 | ||
2358 | if (!sdp->request_queue->rq_timeout) { | 2358 | if (!sdp->request_queue->rq_timeout) { |
2359 | if (sdp->type != TYPE_MOD) | 2359 | if (sdp->type != TYPE_MOD) |
2360 | blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); | 2360 | blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); |
2361 | else | 2361 | else |
2362 | blk_queue_rq_timeout(sdp->request_queue, | 2362 | blk_queue_rq_timeout(sdp->request_queue, |
2363 | SD_MOD_TIMEOUT); | 2363 | SD_MOD_TIMEOUT); |
2364 | } | 2364 | } |
2365 | 2365 | ||
2366 | device_initialize(&sdkp->dev); | 2366 | device_initialize(&sdkp->dev); |
2367 | sdkp->dev.parent = dev; | 2367 | sdkp->dev.parent = dev; |
2368 | sdkp->dev.class = &sd_disk_class; | 2368 | sdkp->dev.class = &sd_disk_class; |
2369 | dev_set_name(&sdkp->dev, dev_name(dev)); | 2369 | dev_set_name(&sdkp->dev, dev_name(dev)); |
2370 | 2370 | ||
2371 | if (device_add(&sdkp->dev)) | 2371 | if (device_add(&sdkp->dev)) |
2372 | goto out_free_index; | 2372 | goto out_free_index; |
2373 | 2373 | ||
2374 | get_device(dev); | 2374 | get_device(dev); |
2375 | dev_set_drvdata(dev, sdkp); | 2375 | dev_set_drvdata(dev, sdkp); |
2376 | 2376 | ||
2377 | get_device(&sdkp->dev); /* prevent release before async_schedule */ | 2377 | get_device(&sdkp->dev); /* prevent release before async_schedule */ |
2378 | async_schedule(sd_probe_async, sdkp); | 2378 | async_schedule(sd_probe_async, sdkp); |
2379 | 2379 | ||
2380 | return 0; | 2380 | return 0; |
2381 | 2381 | ||
2382 | out_free_index: | 2382 | out_free_index: |
2383 | spin_lock(&sd_index_lock); | 2383 | spin_lock(&sd_index_lock); |
2384 | ida_remove(&sd_index_ida, index); | 2384 | ida_remove(&sd_index_ida, index); |
2385 | spin_unlock(&sd_index_lock); | 2385 | spin_unlock(&sd_index_lock); |
2386 | out_put: | 2386 | out_put: |
2387 | put_disk(gd); | 2387 | put_disk(gd); |
2388 | out_free: | 2388 | out_free: |
2389 | kfree(sdkp); | 2389 | kfree(sdkp); |
2390 | out: | 2390 | out: |
2391 | return error; | 2391 | return error; |
2392 | } | 2392 | } |
2393 | 2393 | ||
2394 | /** | 2394 | /** |
2395 | * sd_remove - called whenever a scsi disk (previously recognized by | 2395 | * sd_remove - called whenever a scsi disk (previously recognized by |
2396 | * sd_probe) is detached from the system. It is called (potentially | 2396 | * sd_probe) is detached from the system. It is called (potentially |
2397 | * multiple times) during sd module unload. | 2397 | * multiple times) during sd module unload. |
2398 | * @sdp: pointer to mid level scsi device object | 2398 | * @sdp: pointer to mid level scsi device object |
2399 | * | 2399 | * |
2400 | * Note: this function is invoked from the scsi mid-level. | 2400 | * Note: this function is invoked from the scsi mid-level. |
2401 | * This function potentially frees up a device name (e.g. /dev/sdc) | 2401 | * This function potentially frees up a device name (e.g. /dev/sdc) |
2402 | * that could be re-used by a subsequent sd_probe(). | 2402 | * that could be re-used by a subsequent sd_probe(). |
2403 | * This function is not called when the built-in sd driver is "exit-ed". | 2403 | * This function is not called when the built-in sd driver is "exit-ed". |
2404 | **/ | 2404 | **/ |
2405 | static int sd_remove(struct device *dev) | 2405 | static int sd_remove(struct device *dev) |
2406 | { | 2406 | { |
2407 | struct scsi_disk *sdkp; | 2407 | struct scsi_disk *sdkp; |
2408 | 2408 | ||
2409 | sdkp = dev_get_drvdata(dev); | 2409 | sdkp = dev_get_drvdata(dev); |
2410 | scsi_autopm_get_device(sdkp->device); | 2410 | scsi_autopm_get_device(sdkp->device); |
2411 | 2411 | ||
2412 | async_synchronize_full(); | 2412 | async_synchronize_full(); |
2413 | blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn); | 2413 | blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn); |
2414 | blk_queue_unprep_rq(sdkp->device->request_queue, NULL); | 2414 | blk_queue_unprep_rq(sdkp->device->request_queue, NULL); |
2415 | device_del(&sdkp->dev); | 2415 | device_del(&sdkp->dev); |
2416 | del_gendisk(sdkp->disk); | 2416 | del_gendisk(sdkp->disk); |
2417 | sd_shutdown(dev); | 2417 | sd_shutdown(dev); |
2418 | 2418 | ||
2419 | mutex_lock(&sd_ref_mutex); | 2419 | mutex_lock(&sd_ref_mutex); |
2420 | dev_set_drvdata(dev, NULL); | 2420 | dev_set_drvdata(dev, NULL); |
2421 | put_device(&sdkp->dev); | 2421 | put_device(&sdkp->dev); |
2422 | mutex_unlock(&sd_ref_mutex); | 2422 | mutex_unlock(&sd_ref_mutex); |
2423 | 2423 | ||
2424 | return 0; | 2424 | return 0; |
2425 | } | 2425 | } |
2426 | 2426 | ||
2427 | /** | 2427 | /** |
2428 | * scsi_disk_release - Called to free the scsi_disk structure | 2428 | * scsi_disk_release - Called to free the scsi_disk structure |
2429 | * @dev: pointer to embedded class device | 2429 | * @dev: pointer to embedded class device |
2430 | * | 2430 | * |
2431 | * sd_ref_mutex must be held entering this routine. Because it is | 2431 | * sd_ref_mutex must be held entering this routine. Because it is |
2432 | * called on last put, you should always use the scsi_disk_get() | 2432 | * called on last put, you should always use the scsi_disk_get() |
2433 | * scsi_disk_put() helpers which manipulate the semaphore directly | 2433 | * scsi_disk_put() helpers which manipulate the semaphore directly |
2434 | * and never do a direct put_device. | 2434 | * and never do a direct put_device. |
2435 | **/ | 2435 | **/ |
2436 | static void scsi_disk_release(struct device *dev) | 2436 | static void scsi_disk_release(struct device *dev) |
2437 | { | 2437 | { |
2438 | struct scsi_disk *sdkp = to_scsi_disk(dev); | 2438 | struct scsi_disk *sdkp = to_scsi_disk(dev); |
2439 | struct gendisk *disk = sdkp->disk; | 2439 | struct gendisk *disk = sdkp->disk; |
2440 | 2440 | ||
2441 | spin_lock(&sd_index_lock); | 2441 | spin_lock(&sd_index_lock); |
2442 | ida_remove(&sd_index_ida, sdkp->index); | 2442 | ida_remove(&sd_index_ida, sdkp->index); |
2443 | spin_unlock(&sd_index_lock); | 2443 | spin_unlock(&sd_index_lock); |
2444 | 2444 | ||
2445 | disk->private_data = NULL; | 2445 | disk->private_data = NULL; |
2446 | put_disk(disk); | 2446 | put_disk(disk); |
2447 | put_device(&sdkp->device->sdev_gendev); | 2447 | put_device(&sdkp->device->sdev_gendev); |
2448 | 2448 | ||
2449 | kfree(sdkp); | 2449 | kfree(sdkp); |
2450 | } | 2450 | } |
2451 | 2451 | ||
2452 | static int sd_start_stop_device(struct scsi_disk *sdkp, int start) | 2452 | static int sd_start_stop_device(struct scsi_disk *sdkp, int start) |
2453 | { | 2453 | { |
2454 | unsigned char cmd[6] = { START_STOP }; /* START_VALID */ | 2454 | unsigned char cmd[6] = { START_STOP }; /* START_VALID */ |
2455 | struct scsi_sense_hdr sshdr; | 2455 | struct scsi_sense_hdr sshdr; |
2456 | struct scsi_device *sdp = sdkp->device; | 2456 | struct scsi_device *sdp = sdkp->device; |
2457 | int res; | 2457 | int res; |
2458 | 2458 | ||
2459 | if (start) | 2459 | if (start) |
2460 | cmd[4] |= 1; /* START */ | 2460 | cmd[4] |= 1; /* START */ |
2461 | 2461 | ||
2462 | if (sdp->start_stop_pwr_cond) | 2462 | if (sdp->start_stop_pwr_cond) |
2463 | cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */ | 2463 | cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */ |
2464 | 2464 | ||
2465 | if (!scsi_device_online(sdp)) | 2465 | if (!scsi_device_online(sdp)) |
2466 | return -ENODEV; | 2466 | return -ENODEV; |
2467 | 2467 | ||
2468 | res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, | 2468 | res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, |
2469 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); | 2469 | SD_TIMEOUT, SD_MAX_RETRIES, NULL); |
2470 | if (res) { | 2470 | if (res) { |
2471 | sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n"); | 2471 | sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n"); |
2472 | sd_print_result(sdkp, res); | 2472 | sd_print_result(sdkp, res); |
2473 | if (driver_byte(res) & DRIVER_SENSE) | 2473 | if (driver_byte(res) & DRIVER_SENSE) |
2474 | sd_print_sense_hdr(sdkp, &sshdr); | 2474 | sd_print_sense_hdr(sdkp, &sshdr); |
2475 | } | 2475 | } |
2476 | 2476 | ||
2477 | return res; | 2477 | return res; |
2478 | } | 2478 | } |
2479 | 2479 | ||
2480 | /* | 2480 | /* |
2481 | * Send a SYNCHRONIZE CACHE instruction down to the device through | 2481 | * Send a SYNCHRONIZE CACHE instruction down to the device through |
2482 | * the normal SCSI command structure. Wait for the command to | 2482 | * the normal SCSI command structure. Wait for the command to |
2483 | * complete. | 2483 | * complete. |
2484 | */ | 2484 | */ |
2485 | static void sd_shutdown(struct device *dev) | 2485 | static void sd_shutdown(struct device *dev) |
2486 | { | 2486 | { |
2487 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); | 2487 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); |
2488 | 2488 | ||
2489 | if (!sdkp) | 2489 | if (!sdkp) |
2490 | return; /* this can happen */ | 2490 | return; /* this can happen */ |
2491 | 2491 | ||
2492 | if (sdkp->WCE) { | 2492 | if (sdkp->WCE) { |
2493 | sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); | 2493 | sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); |
2494 | sd_sync_cache(sdkp); | 2494 | sd_sync_cache(sdkp); |
2495 | } | 2495 | } |
2496 | 2496 | ||
2497 | if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { | 2497 | if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { |
2498 | sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); | 2498 | sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); |
2499 | sd_start_stop_device(sdkp, 0); | 2499 | sd_start_stop_device(sdkp, 0); |
2500 | } | 2500 | } |
2501 | 2501 | ||
2502 | scsi_disk_put(sdkp); | 2502 | scsi_disk_put(sdkp); |
2503 | } | 2503 | } |
2504 | 2504 | ||
2505 | static int sd_suspend(struct device *dev, pm_message_t mesg) | 2505 | static int sd_suspend(struct device *dev, pm_message_t mesg) |
2506 | { | 2506 | { |
2507 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); | 2507 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); |
2508 | int ret = 0; | 2508 | int ret = 0; |
2509 | 2509 | ||
2510 | if (!sdkp) | 2510 | if (!sdkp) |
2511 | return 0; /* this can happen */ | 2511 | return 0; /* this can happen */ |
2512 | 2512 | ||
2513 | if (sdkp->WCE) { | 2513 | if (sdkp->WCE) { |
2514 | sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); | 2514 | sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); |
2515 | ret = sd_sync_cache(sdkp); | 2515 | ret = sd_sync_cache(sdkp); |
2516 | if (ret) | 2516 | if (ret) |
2517 | goto done; | 2517 | goto done; |
2518 | } | 2518 | } |
2519 | 2519 | ||
2520 | if ((mesg.event & PM_EVENT_SLEEP) && sdkp->device->manage_start_stop) { | 2520 | if ((mesg.event & PM_EVENT_SLEEP) && sdkp->device->manage_start_stop) { |
2521 | sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); | 2521 | sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); |
2522 | ret = sd_start_stop_device(sdkp, 0); | 2522 | ret = sd_start_stop_device(sdkp, 0); |
2523 | } | 2523 | } |
2524 | 2524 | ||
2525 | done: | 2525 | done: |
2526 | scsi_disk_put(sdkp); | 2526 | scsi_disk_put(sdkp); |
2527 | return ret; | 2527 | return ret; |
2528 | } | 2528 | } |
2529 | 2529 | ||
2530 | static int sd_resume(struct device *dev) | 2530 | static int sd_resume(struct device *dev) |
2531 | { | 2531 | { |
2532 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); | 2532 | struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); |
2533 | int ret = 0; | 2533 | int ret = 0; |
2534 | 2534 | ||
2535 | if (!sdkp->device->manage_start_stop) | 2535 | if (!sdkp->device->manage_start_stop) |
2536 | goto done; | 2536 | goto done; |
2537 | 2537 | ||
2538 | sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); | 2538 | sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); |
2539 | ret = sd_start_stop_device(sdkp, 1); | 2539 | ret = sd_start_stop_device(sdkp, 1); |
2540 | 2540 | ||
2541 | done: | 2541 | done: |
2542 | scsi_disk_put(sdkp); | 2542 | scsi_disk_put(sdkp); |
2543 | return ret; | 2543 | return ret; |
2544 | } | 2544 | } |
2545 | 2545 | ||
2546 | /** | 2546 | /** |
2547 | * init_sd - entry point for this driver (both when built in or when | 2547 | * init_sd - entry point for this driver (both when built in or when |
2548 | * a module). | 2548 | * a module). |
2549 | * | 2549 | * |
2550 | * Note: this function registers this driver with the scsi mid-level. | 2550 | * Note: this function registers this driver with the scsi mid-level. |
2551 | **/ | 2551 | **/ |
2552 | static int __init init_sd(void) | 2552 | static int __init init_sd(void) |
2553 | { | 2553 | { |
2554 | int majors = 0, i, err; | 2554 | int majors = 0, i, err; |
2555 | 2555 | ||
2556 | SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); | 2556 | SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); |
2557 | 2557 | ||
2558 | for (i = 0; i < SD_MAJORS; i++) | 2558 | for (i = 0; i < SD_MAJORS; i++) |
2559 | if (register_blkdev(sd_major(i), "sd") == 0) | 2559 | if (register_blkdev(sd_major(i), "sd") == 0) |
2560 | majors++; | 2560 | majors++; |
2561 | 2561 | ||
2562 | if (!majors) | 2562 | if (!majors) |
2563 | return -ENODEV; | 2563 | return -ENODEV; |
2564 | 2564 | ||
2565 | err = class_register(&sd_disk_class); | 2565 | err = class_register(&sd_disk_class); |
2566 | if (err) | 2566 | if (err) |
2567 | goto err_out; | 2567 | goto err_out; |
2568 | 2568 | ||
2569 | err = scsi_register_driver(&sd_template.gendrv); | 2569 | err = scsi_register_driver(&sd_template.gendrv); |
2570 | if (err) | 2570 | if (err) |
2571 | goto err_out_class; | 2571 | goto err_out_class; |
2572 | 2572 | ||
2573 | sd_cdb_cache = kmem_cache_create("sd_ext_cdb", SD_EXT_CDB_SIZE, | 2573 | sd_cdb_cache = kmem_cache_create("sd_ext_cdb", SD_EXT_CDB_SIZE, |
2574 | 0, 0, NULL); | 2574 | 0, 0, NULL); |
2575 | if (!sd_cdb_cache) { | 2575 | if (!sd_cdb_cache) { |
2576 | printk(KERN_ERR "sd: can't init extended cdb cache\n"); | 2576 | printk(KERN_ERR "sd: can't init extended cdb cache\n"); |
2577 | goto err_out_class; | 2577 | goto err_out_class; |
2578 | } | 2578 | } |
2579 | 2579 | ||
2580 | sd_cdb_pool = mempool_create_slab_pool(SD_MEMPOOL_SIZE, sd_cdb_cache); | 2580 | sd_cdb_pool = mempool_create_slab_pool(SD_MEMPOOL_SIZE, sd_cdb_cache); |
2581 | if (!sd_cdb_pool) { | 2581 | if (!sd_cdb_pool) { |
2582 | printk(KERN_ERR "sd: can't init extended cdb pool\n"); | 2582 | printk(KERN_ERR "sd: can't init extended cdb pool\n"); |
2583 | goto err_out_cache; | 2583 | goto err_out_cache; |
2584 | } | 2584 | } |
2585 | 2585 | ||
2586 | return 0; | 2586 | return 0; |
2587 | 2587 | ||
2588 | err_out_cache: | 2588 | err_out_cache: |
2589 | kmem_cache_destroy(sd_cdb_cache); | 2589 | kmem_cache_destroy(sd_cdb_cache); |
2590 | 2590 | ||
2591 | err_out_class: | 2591 | err_out_class: |
2592 | class_unregister(&sd_disk_class); | 2592 | class_unregister(&sd_disk_class); |
2593 | err_out: | 2593 | err_out: |
2594 | for (i = 0; i < SD_MAJORS; i++) | 2594 | for (i = 0; i < SD_MAJORS; i++) |
2595 | unregister_blkdev(sd_major(i), "sd"); | 2595 | unregister_blkdev(sd_major(i), "sd"); |
2596 | return err; | 2596 | return err; |
2597 | } | 2597 | } |
2598 | 2598 | ||
2599 | /** | 2599 | /** |
2600 | * exit_sd - exit point for this driver (when it is a module). | 2600 | * exit_sd - exit point for this driver (when it is a module). |
2601 | * | 2601 | * |
2602 | * Note: this function unregisters this driver from the scsi mid-level. | 2602 | * Note: this function unregisters this driver from the scsi mid-level. |
2603 | **/ | 2603 | **/ |
2604 | static void __exit exit_sd(void) | 2604 | static void __exit exit_sd(void) |
2605 | { | 2605 | { |
2606 | int i; | 2606 | int i; |
2607 | 2607 | ||
2608 | SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); | 2608 | SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); |
2609 | 2609 | ||
2610 | mempool_destroy(sd_cdb_pool); | 2610 | mempool_destroy(sd_cdb_pool); |
2611 | kmem_cache_destroy(sd_cdb_cache); | 2611 | kmem_cache_destroy(sd_cdb_cache); |
2612 | 2612 | ||
2613 | scsi_unregister_driver(&sd_template.gendrv); | 2613 | scsi_unregister_driver(&sd_template.gendrv); |
2614 | class_unregister(&sd_disk_class); | 2614 | class_unregister(&sd_disk_class); |
2615 | 2615 | ||
2616 | for (i = 0; i < SD_MAJORS; i++) | 2616 | for (i = 0; i < SD_MAJORS; i++) |
2617 | unregister_blkdev(sd_major(i), "sd"); | 2617 | unregister_blkdev(sd_major(i), "sd"); |
2618 | } | 2618 | } |
2619 | 2619 | ||
2620 | module_init(init_sd); | 2620 | module_init(init_sd); |
2621 | module_exit(exit_sd); | 2621 | module_exit(exit_sd); |
2622 | 2622 | ||
2623 | static void sd_print_sense_hdr(struct scsi_disk *sdkp, | 2623 | static void sd_print_sense_hdr(struct scsi_disk *sdkp, |
2624 | struct scsi_sense_hdr *sshdr) | 2624 | struct scsi_sense_hdr *sshdr) |
2625 | { | 2625 | { |
2626 | sd_printk(KERN_INFO, sdkp, ""); | 2626 | sd_printk(KERN_INFO, sdkp, ""); |
2627 | scsi_show_sense_hdr(sshdr); | 2627 | scsi_show_sense_hdr(sshdr); |
2628 | sd_printk(KERN_INFO, sdkp, ""); | 2628 | sd_printk(KERN_INFO, sdkp, ""); |
2629 | scsi_show_extd_sense(sshdr->asc, sshdr->ascq); | 2629 | scsi_show_extd_sense(sshdr->asc, sshdr->ascq); |
2630 | } | 2630 | } |
2631 | 2631 | ||
2632 | static void sd_print_result(struct scsi_disk *sdkp, int result) | 2632 | static void sd_print_result(struct scsi_disk *sdkp, int result) |
2633 | { | 2633 | { |
2634 | sd_printk(KERN_INFO, sdkp, ""); | 2634 | sd_printk(KERN_INFO, sdkp, ""); |
2635 | scsi_show_result(result); | 2635 | scsi_show_result(result); |
2636 | } | 2636 | } |
2637 | 2637 | ||
2638 | 2638 |
include/linux/blkdev.h
1 | #ifndef _LINUX_BLKDEV_H | 1 | #ifndef _LINUX_BLKDEV_H |
2 | #define _LINUX_BLKDEV_H | 2 | #define _LINUX_BLKDEV_H |
3 | 3 | ||
4 | #ifdef CONFIG_BLOCK | 4 | #ifdef CONFIG_BLOCK |
5 | 5 | ||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/major.h> | 7 | #include <linux/major.h> |
8 | #include <linux/genhd.h> | 8 | #include <linux/genhd.h> |
9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
10 | #include <linux/timer.h> | 10 | #include <linux/timer.h> |
11 | #include <linux/workqueue.h> | 11 | #include <linux/workqueue.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/backing-dev.h> | 13 | #include <linux/backing-dev.h> |
14 | #include <linux/wait.h> | 14 | #include <linux/wait.h> |
15 | #include <linux/mempool.h> | 15 | #include <linux/mempool.h> |
16 | #include <linux/bio.h> | 16 | #include <linux/bio.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/stringify.h> | 18 | #include <linux/stringify.h> |
19 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
20 | #include <linux/bsg.h> | 20 | #include <linux/bsg.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | 22 | ||
23 | #include <asm/scatterlist.h> | 23 | #include <asm/scatterlist.h> |
24 | 24 | ||
25 | struct scsi_ioctl_command; | 25 | struct scsi_ioctl_command; |
26 | 26 | ||
27 | struct request_queue; | 27 | struct request_queue; |
28 | struct elevator_queue; | 28 | struct elevator_queue; |
29 | struct request_pm_state; | 29 | struct request_pm_state; |
30 | struct blk_trace; | 30 | struct blk_trace; |
31 | struct request; | 31 | struct request; |
32 | struct sg_io_hdr; | 32 | struct sg_io_hdr; |
33 | 33 | ||
34 | #define BLKDEV_MIN_RQ 4 | 34 | #define BLKDEV_MIN_RQ 4 |
35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
36 | 36 | ||
37 | struct request; | 37 | struct request; |
38 | typedef void (rq_end_io_fn)(struct request *, int); | 38 | typedef void (rq_end_io_fn)(struct request *, int); |
39 | 39 | ||
40 | struct request_list { | 40 | struct request_list { |
41 | /* | 41 | /* |
42 | * count[], starved[], and wait[] are indexed by | 42 | * count[], starved[], and wait[] are indexed by |
43 | * BLK_RW_SYNC/BLK_RW_ASYNC | 43 | * BLK_RW_SYNC/BLK_RW_ASYNC |
44 | */ | 44 | */ |
45 | int count[2]; | 45 | int count[2]; |
46 | int starved[2]; | 46 | int starved[2]; |
47 | int elvpriv; | 47 | int elvpriv; |
48 | mempool_t *rq_pool; | 48 | mempool_t *rq_pool; |
49 | wait_queue_head_t wait[2]; | 49 | wait_queue_head_t wait[2]; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * request command types | 53 | * request command types |
54 | */ | 54 | */ |
55 | enum rq_cmd_type_bits { | 55 | enum rq_cmd_type_bits { |
56 | REQ_TYPE_FS = 1, /* fs request */ | 56 | REQ_TYPE_FS = 1, /* fs request */ |
57 | REQ_TYPE_BLOCK_PC, /* scsi command */ | 57 | REQ_TYPE_BLOCK_PC, /* scsi command */ |
58 | REQ_TYPE_SENSE, /* sense request */ | 58 | REQ_TYPE_SENSE, /* sense request */ |
59 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | 59 | REQ_TYPE_PM_SUSPEND, /* suspend request */ |
60 | REQ_TYPE_PM_RESUME, /* resume request */ | 60 | REQ_TYPE_PM_RESUME, /* resume request */ |
61 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | 61 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ |
62 | REQ_TYPE_SPECIAL, /* driver defined type */ | 62 | REQ_TYPE_SPECIAL, /* driver defined type */ |
63 | /* | 63 | /* |
64 | * for ATA/ATAPI devices. this really doesn't belong here, ide should | 64 | * for ATA/ATAPI devices. this really doesn't belong here, ide should |
65 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver | 65 | * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver |
66 | * private REQ_LB opcodes to differentiate what type of request this is | 66 | * private REQ_LB opcodes to differentiate what type of request this is |
67 | */ | 67 | */ |
68 | REQ_TYPE_ATA_TASKFILE, | 68 | REQ_TYPE_ATA_TASKFILE, |
69 | REQ_TYPE_ATA_PC, | 69 | REQ_TYPE_ATA_PC, |
70 | }; | 70 | }; |
71 | 71 | ||
72 | #define BLK_MAX_CDB 16 | 72 | #define BLK_MAX_CDB 16 |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * try to put the fields that are referenced together in the same cacheline. | 75 | * try to put the fields that are referenced together in the same cacheline. |
76 | * if you modify this structure, be sure to check block/blk-core.c:rq_init() | 76 | * if you modify this structure, be sure to check block/blk-core.c:rq_init() |
77 | * as well! | 77 | * as well! |
78 | */ | 78 | */ |
79 | struct request { | 79 | struct request { |
80 | struct list_head queuelist; | 80 | struct list_head queuelist; |
81 | struct call_single_data csd; | 81 | struct call_single_data csd; |
82 | 82 | ||
83 | struct request_queue *q; | 83 | struct request_queue *q; |
84 | 84 | ||
85 | unsigned int cmd_flags; | 85 | unsigned int cmd_flags; |
86 | enum rq_cmd_type_bits cmd_type; | 86 | enum rq_cmd_type_bits cmd_type; |
87 | unsigned long atomic_flags; | 87 | unsigned long atomic_flags; |
88 | 88 | ||
89 | int cpu; | 89 | int cpu; |
90 | 90 | ||
91 | /* the following two fields are internal, NEVER access directly */ | 91 | /* the following two fields are internal, NEVER access directly */ |
92 | unsigned int __data_len; /* total data len */ | 92 | unsigned int __data_len; /* total data len */ |
93 | sector_t __sector; /* sector cursor */ | 93 | sector_t __sector; /* sector cursor */ |
94 | 94 | ||
95 | struct bio *bio; | 95 | struct bio *bio; |
96 | struct bio *biotail; | 96 | struct bio *biotail; |
97 | 97 | ||
98 | struct hlist_node hash; /* merge hash */ | 98 | struct hlist_node hash; /* merge hash */ |
99 | /* | 99 | /* |
100 | * The rb_node is only used inside the io scheduler, requests | 100 | * The rb_node is only used inside the io scheduler, requests |
101 | * are pruned when moved to the dispatch queue. So let the | 101 | * are pruned when moved to the dispatch queue. So let the |
102 | * completion_data share space with the rb_node. | 102 | * completion_data share space with the rb_node. |
103 | */ | 103 | */ |
104 | union { | 104 | union { |
105 | struct rb_node rb_node; /* sort/lookup */ | 105 | struct rb_node rb_node; /* sort/lookup */ |
106 | void *completion_data; | 106 | void *completion_data; |
107 | }; | 107 | }; |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * Three pointers are available for the IO schedulers, if they need | 110 | * Three pointers are available for the IO schedulers, if they need |
111 | * more they have to dynamically allocate it. | 111 | * more they have to dynamically allocate it. |
112 | */ | 112 | */ |
113 | void *elevator_private; | 113 | void *elevator_private; |
114 | void *elevator_private2; | 114 | void *elevator_private2; |
115 | void *elevator_private3; | 115 | void *elevator_private3; |
116 | 116 | ||
117 | struct gendisk *rq_disk; | 117 | struct gendisk *rq_disk; |
118 | unsigned long start_time; | 118 | unsigned long start_time; |
119 | #ifdef CONFIG_BLK_CGROUP | 119 | #ifdef CONFIG_BLK_CGROUP |
120 | unsigned long long start_time_ns; | 120 | unsigned long long start_time_ns; |
121 | unsigned long long io_start_time_ns; /* when passed to hardware */ | 121 | unsigned long long io_start_time_ns; /* when passed to hardware */ |
122 | #endif | 122 | #endif |
123 | /* Number of scatter-gather DMA addr+len pairs after | 123 | /* Number of scatter-gather DMA addr+len pairs after |
124 | * physical address coalescing is performed. | 124 | * physical address coalescing is performed. |
125 | */ | 125 | */ |
126 | unsigned short nr_phys_segments; | 126 | unsigned short nr_phys_segments; |
127 | 127 | ||
128 | unsigned short ioprio; | 128 | unsigned short ioprio; |
129 | 129 | ||
130 | int ref_count; | 130 | int ref_count; |
131 | 131 | ||
132 | void *special; /* opaque pointer available for LLD use */ | 132 | void *special; /* opaque pointer available for LLD use */ |
133 | char *buffer; /* kaddr of the current segment if available */ | 133 | char *buffer; /* kaddr of the current segment if available */ |
134 | 134 | ||
135 | int tag; | 135 | int tag; |
136 | int errors; | 136 | int errors; |
137 | 137 | ||
138 | /* | 138 | /* |
139 | * when request is used as a packet command carrier | 139 | * when request is used as a packet command carrier |
140 | */ | 140 | */ |
141 | unsigned char __cmd[BLK_MAX_CDB]; | 141 | unsigned char __cmd[BLK_MAX_CDB]; |
142 | unsigned char *cmd; | 142 | unsigned char *cmd; |
143 | unsigned short cmd_len; | 143 | unsigned short cmd_len; |
144 | 144 | ||
145 | unsigned int extra_len; /* length of alignment and padding */ | 145 | unsigned int extra_len; /* length of alignment and padding */ |
146 | unsigned int sense_len; | 146 | unsigned int sense_len; |
147 | unsigned int resid_len; /* residual count */ | 147 | unsigned int resid_len; /* residual count */ |
148 | void *sense; | 148 | void *sense; |
149 | 149 | ||
150 | unsigned long deadline; | 150 | unsigned long deadline; |
151 | struct list_head timeout_list; | 151 | struct list_head timeout_list; |
152 | unsigned int timeout; | 152 | unsigned int timeout; |
153 | int retries; | 153 | int retries; |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * completion callback. | 156 | * completion callback. |
157 | */ | 157 | */ |
158 | rq_end_io_fn *end_io; | 158 | rq_end_io_fn *end_io; |
159 | void *end_io_data; | 159 | void *end_io_data; |
160 | 160 | ||
161 | /* for bidi */ | 161 | /* for bidi */ |
162 | struct request *next_rq; | 162 | struct request *next_rq; |
163 | }; | 163 | }; |
164 | 164 | ||
165 | static inline unsigned short req_get_ioprio(struct request *req) | 165 | static inline unsigned short req_get_ioprio(struct request *req) |
166 | { | 166 | { |
167 | return req->ioprio; | 167 | return req->ioprio; |
168 | } | 168 | } |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | 171 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME |
172 | * requests. Some step values could eventually be made generic. | 172 | * requests. Some step values could eventually be made generic. |
173 | */ | 173 | */ |
174 | struct request_pm_state | 174 | struct request_pm_state |
175 | { | 175 | { |
176 | /* PM state machine step value, currently driver specific */ | 176 | /* PM state machine step value, currently driver specific */ |
177 | int pm_step; | 177 | int pm_step; |
178 | /* requested PM state value (S1, S2, S3, S4, ...) */ | 178 | /* requested PM state value (S1, S2, S3, S4, ...) */ |
179 | u32 pm_state; | 179 | u32 pm_state; |
180 | void* data; /* for driver use */ | 180 | void* data; /* for driver use */ |
181 | }; | 181 | }; |
182 | 182 | ||
183 | #include <linux/elevator.h> | 183 | #include <linux/elevator.h> |
184 | 184 | ||
185 | typedef void (request_fn_proc) (struct request_queue *q); | 185 | typedef void (request_fn_proc) (struct request_queue *q); |
186 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); | 186 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); |
187 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | 187 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
188 | typedef void (unprep_rq_fn) (struct request_queue *, struct request *); | 188 | typedef void (unprep_rq_fn) (struct request_queue *, struct request *); |
189 | typedef void (unplug_fn) (struct request_queue *); | 189 | typedef void (unplug_fn) (struct request_queue *); |
190 | 190 | ||
191 | struct bio_vec; | 191 | struct bio_vec; |
192 | struct bvec_merge_data { | 192 | struct bvec_merge_data { |
193 | struct block_device *bi_bdev; | 193 | struct block_device *bi_bdev; |
194 | sector_t bi_sector; | 194 | sector_t bi_sector; |
195 | unsigned bi_size; | 195 | unsigned bi_size; |
196 | unsigned long bi_rw; | 196 | unsigned long bi_rw; |
197 | }; | 197 | }; |
198 | typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, | 198 | typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, |
199 | struct bio_vec *); | 199 | struct bio_vec *); |
200 | typedef void (softirq_done_fn)(struct request *); | 200 | typedef void (softirq_done_fn)(struct request *); |
201 | typedef int (dma_drain_needed_fn)(struct request *); | 201 | typedef int (dma_drain_needed_fn)(struct request *); |
202 | typedef int (lld_busy_fn) (struct request_queue *q); | 202 | typedef int (lld_busy_fn) (struct request_queue *q); |
203 | 203 | ||
204 | enum blk_eh_timer_return { | 204 | enum blk_eh_timer_return { |
205 | BLK_EH_NOT_HANDLED, | 205 | BLK_EH_NOT_HANDLED, |
206 | BLK_EH_HANDLED, | 206 | BLK_EH_HANDLED, |
207 | BLK_EH_RESET_TIMER, | 207 | BLK_EH_RESET_TIMER, |
208 | }; | 208 | }; |
209 | 209 | ||
210 | typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); | 210 | typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); |
211 | 211 | ||
212 | enum blk_queue_state { | 212 | enum blk_queue_state { |
213 | Queue_down, | 213 | Queue_down, |
214 | Queue_up, | 214 | Queue_up, |
215 | }; | 215 | }; |
216 | 216 | ||
217 | struct blk_queue_tag { | 217 | struct blk_queue_tag { |
218 | struct request **tag_index; /* map of busy tags */ | 218 | struct request **tag_index; /* map of busy tags */ |
219 | unsigned long *tag_map; /* bit map of free/busy tags */ | 219 | unsigned long *tag_map; /* bit map of free/busy tags */ |
220 | int busy; /* current depth */ | 220 | int busy; /* current depth */ |
221 | int max_depth; /* what we will send to device */ | 221 | int max_depth; /* what we will send to device */ |
222 | int real_max_depth; /* what the array can hold */ | 222 | int real_max_depth; /* what the array can hold */ |
223 | atomic_t refcnt; /* map can be shared */ | 223 | atomic_t refcnt; /* map can be shared */ |
224 | }; | 224 | }; |
225 | 225 | ||
226 | #define BLK_SCSI_MAX_CMDS (256) | 226 | #define BLK_SCSI_MAX_CMDS (256) |
227 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) | 227 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) |
228 | 228 | ||
229 | struct queue_limits { | 229 | struct queue_limits { |
230 | unsigned long bounce_pfn; | 230 | unsigned long bounce_pfn; |
231 | unsigned long seg_boundary_mask; | 231 | unsigned long seg_boundary_mask; |
232 | 232 | ||
233 | unsigned int max_hw_sectors; | 233 | unsigned int max_hw_sectors; |
234 | unsigned int max_sectors; | 234 | unsigned int max_sectors; |
235 | unsigned int max_segment_size; | 235 | unsigned int max_segment_size; |
236 | unsigned int physical_block_size; | 236 | unsigned int physical_block_size; |
237 | unsigned int alignment_offset; | 237 | unsigned int alignment_offset; |
238 | unsigned int io_min; | 238 | unsigned int io_min; |
239 | unsigned int io_opt; | 239 | unsigned int io_opt; |
240 | unsigned int max_discard_sectors; | 240 | unsigned int max_discard_sectors; |
241 | unsigned int discard_granularity; | 241 | unsigned int discard_granularity; |
242 | unsigned int discard_alignment; | 242 | unsigned int discard_alignment; |
243 | 243 | ||
244 | unsigned short logical_block_size; | 244 | unsigned short logical_block_size; |
245 | unsigned short max_segments; | 245 | unsigned short max_segments; |
246 | 246 | ||
247 | unsigned char misaligned; | 247 | unsigned char misaligned; |
248 | unsigned char discard_misaligned; | 248 | unsigned char discard_misaligned; |
249 | unsigned char no_cluster; | 249 | unsigned char no_cluster; |
250 | signed char discard_zeroes_data; | 250 | signed char discard_zeroes_data; |
251 | }; | 251 | }; |
252 | 252 | ||
253 | struct request_queue | 253 | struct request_queue |
254 | { | 254 | { |
255 | /* | 255 | /* |
256 | * Together with queue_head for cacheline sharing | 256 | * Together with queue_head for cacheline sharing |
257 | */ | 257 | */ |
258 | struct list_head queue_head; | 258 | struct list_head queue_head; |
259 | struct request *last_merge; | 259 | struct request *last_merge; |
260 | struct elevator_queue *elevator; | 260 | struct elevator_queue *elevator; |
261 | 261 | ||
262 | /* | 262 | /* |
263 | * the queue request freelist, one for reads and one for writes | 263 | * the queue request freelist, one for reads and one for writes |
264 | */ | 264 | */ |
265 | struct request_list rq; | 265 | struct request_list rq; |
266 | 266 | ||
267 | request_fn_proc *request_fn; | 267 | request_fn_proc *request_fn; |
268 | make_request_fn *make_request_fn; | 268 | make_request_fn *make_request_fn; |
269 | prep_rq_fn *prep_rq_fn; | 269 | prep_rq_fn *prep_rq_fn; |
270 | unprep_rq_fn *unprep_rq_fn; | 270 | unprep_rq_fn *unprep_rq_fn; |
271 | unplug_fn *unplug_fn; | 271 | unplug_fn *unplug_fn; |
272 | merge_bvec_fn *merge_bvec_fn; | 272 | merge_bvec_fn *merge_bvec_fn; |
273 | softirq_done_fn *softirq_done_fn; | 273 | softirq_done_fn *softirq_done_fn; |
274 | rq_timed_out_fn *rq_timed_out_fn; | 274 | rq_timed_out_fn *rq_timed_out_fn; |
275 | dma_drain_needed_fn *dma_drain_needed; | 275 | dma_drain_needed_fn *dma_drain_needed; |
276 | lld_busy_fn *lld_busy_fn; | 276 | lld_busy_fn *lld_busy_fn; |
277 | 277 | ||
278 | /* | 278 | /* |
279 | * Dispatch queue sorting | 279 | * Dispatch queue sorting |
280 | */ | 280 | */ |
281 | sector_t end_sector; | 281 | sector_t end_sector; |
282 | struct request *boundary_rq; | 282 | struct request *boundary_rq; |
283 | 283 | ||
284 | /* | 284 | /* |
285 | * Auto-unplugging state | 285 | * Auto-unplugging state |
286 | */ | 286 | */ |
287 | struct timer_list unplug_timer; | 287 | struct timer_list unplug_timer; |
288 | int unplug_thresh; /* After this many requests */ | 288 | int unplug_thresh; /* After this many requests */ |
289 | unsigned long unplug_delay; /* After this many jiffies */ | 289 | unsigned long unplug_delay; /* After this many jiffies */ |
290 | struct work_struct unplug_work; | 290 | struct work_struct unplug_work; |
291 | 291 | ||
292 | struct backing_dev_info backing_dev_info; | 292 | struct backing_dev_info backing_dev_info; |
293 | 293 | ||
294 | /* | 294 | /* |
295 | * The queue owner gets to use this for whatever they like. | 295 | * The queue owner gets to use this for whatever they like. |
296 | * ll_rw_blk doesn't touch it. | 296 | * ll_rw_blk doesn't touch it. |
297 | */ | 297 | */ |
298 | void *queuedata; | 298 | void *queuedata; |
299 | 299 | ||
300 | /* | 300 | /* |
301 | * queue needs bounce pages for pages above this limit | 301 | * queue needs bounce pages for pages above this limit |
302 | */ | 302 | */ |
303 | gfp_t bounce_gfp; | 303 | gfp_t bounce_gfp; |
304 | 304 | ||
305 | /* | 305 | /* |
306 | * various queue flags, see QUEUE_* below | 306 | * various queue flags, see QUEUE_* below |
307 | */ | 307 | */ |
308 | unsigned long queue_flags; | 308 | unsigned long queue_flags; |
309 | 309 | ||
310 | /* | 310 | /* |
311 | * protects queue structures from reentrancy. ->__queue_lock should | 311 | * protects queue structures from reentrancy. ->__queue_lock should |
312 | * _never_ be used directly, it is queue private. always use | 312 | * _never_ be used directly, it is queue private. always use |
313 | * ->queue_lock. | 313 | * ->queue_lock. |
314 | */ | 314 | */ |
315 | spinlock_t __queue_lock; | 315 | spinlock_t __queue_lock; |
316 | spinlock_t *queue_lock; | 316 | spinlock_t *queue_lock; |
317 | 317 | ||
318 | /* | 318 | /* |
319 | * queue kobject | 319 | * queue kobject |
320 | */ | 320 | */ |
321 | struct kobject kobj; | 321 | struct kobject kobj; |
322 | 322 | ||
323 | /* | 323 | /* |
324 | * queue settings | 324 | * queue settings |
325 | */ | 325 | */ |
326 | unsigned long nr_requests; /* Max # of requests */ | 326 | unsigned long nr_requests; /* Max # of requests */ |
327 | unsigned int nr_congestion_on; | 327 | unsigned int nr_congestion_on; |
328 | unsigned int nr_congestion_off; | 328 | unsigned int nr_congestion_off; |
329 | unsigned int nr_batching; | 329 | unsigned int nr_batching; |
330 | 330 | ||
331 | void *dma_drain_buffer; | 331 | void *dma_drain_buffer; |
332 | unsigned int dma_drain_size; | 332 | unsigned int dma_drain_size; |
333 | unsigned int dma_pad_mask; | 333 | unsigned int dma_pad_mask; |
334 | unsigned int dma_alignment; | 334 | unsigned int dma_alignment; |
335 | 335 | ||
336 | struct blk_queue_tag *queue_tags; | 336 | struct blk_queue_tag *queue_tags; |
337 | struct list_head tag_busy_list; | 337 | struct list_head tag_busy_list; |
338 | 338 | ||
339 | unsigned int nr_sorted; | 339 | unsigned int nr_sorted; |
340 | unsigned int in_flight[2]; | 340 | unsigned int in_flight[2]; |
341 | 341 | ||
342 | unsigned int rq_timeout; | 342 | unsigned int rq_timeout; |
343 | struct timer_list timeout; | 343 | struct timer_list timeout; |
344 | struct list_head timeout_list; | 344 | struct list_head timeout_list; |
345 | 345 | ||
346 | struct queue_limits limits; | 346 | struct queue_limits limits; |
347 | 347 | ||
348 | /* | 348 | /* |
349 | * sg stuff | 349 | * sg stuff |
350 | */ | 350 | */ |
351 | unsigned int sg_timeout; | 351 | unsigned int sg_timeout; |
352 | unsigned int sg_reserved_size; | 352 | unsigned int sg_reserved_size; |
353 | int node; | 353 | int node; |
354 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 354 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
355 | struct blk_trace *blk_trace; | 355 | struct blk_trace *blk_trace; |
356 | #endif | 356 | #endif |
357 | /* | 357 | /* |
358 | * reserved for flush operations | 358 | * for flush operations |
359 | */ | 359 | */ |
360 | unsigned int flush_flags; | ||
361 | |||
360 | unsigned int ordered, next_ordered, ordseq; | 362 | unsigned int ordered, next_ordered, ordseq; |
361 | int orderr, ordcolor; | 363 | int orderr, ordcolor; |
362 | struct request pre_flush_rq, bar_rq, post_flush_rq; | 364 | struct request pre_flush_rq, bar_rq, post_flush_rq; |
363 | struct request *orig_bar_rq; | 365 | struct request *orig_bar_rq; |
364 | 366 | ||
365 | struct mutex sysfs_lock; | 367 | struct mutex sysfs_lock; |
366 | 368 | ||
367 | #if defined(CONFIG_BLK_DEV_BSG) | 369 | #if defined(CONFIG_BLK_DEV_BSG) |
368 | struct bsg_class_device bsg_dev; | 370 | struct bsg_class_device bsg_dev; |
369 | #endif | 371 | #endif |
370 | }; | 372 | }; |
371 | 373 | ||
372 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ | 374 | #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ |
373 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 375 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
374 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ | 376 | #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ |
375 | #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ | 377 | #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ |
376 | #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ | 378 | #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ |
377 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ | 379 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ |
378 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ | 380 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ |
379 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ | 381 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ |
380 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ | 382 | #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ |
381 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ | 383 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ |
382 | #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ | 384 | #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ |
383 | #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ | 385 | #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ |
384 | #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ | 386 | #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ |
385 | #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ | 387 | #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ |
386 | #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ | 388 | #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ |
387 | #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ | 389 | #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ |
388 | #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ | 390 | #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ |
389 | #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ | 391 | #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ |
390 | #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ | 392 | #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ |
391 | #define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ | 393 | #define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ |
392 | #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ | 394 | #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ |
393 | 395 | ||
394 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 396 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
395 | (1 << QUEUE_FLAG_CLUSTER) | \ | 397 | (1 << QUEUE_FLAG_CLUSTER) | \ |
396 | (1 << QUEUE_FLAG_STACKABLE) | \ | 398 | (1 << QUEUE_FLAG_STACKABLE) | \ |
397 | (1 << QUEUE_FLAG_SAME_COMP) | \ | 399 | (1 << QUEUE_FLAG_SAME_COMP) | \ |
398 | (1 << QUEUE_FLAG_ADD_RANDOM)) | 400 | (1 << QUEUE_FLAG_ADD_RANDOM)) |
399 | 401 | ||
400 | static inline int queue_is_locked(struct request_queue *q) | 402 | static inline int queue_is_locked(struct request_queue *q) |
401 | { | 403 | { |
402 | #ifdef CONFIG_SMP | 404 | #ifdef CONFIG_SMP |
403 | spinlock_t *lock = q->queue_lock; | 405 | spinlock_t *lock = q->queue_lock; |
404 | return lock && spin_is_locked(lock); | 406 | return lock && spin_is_locked(lock); |
405 | #else | 407 | #else |
406 | return 1; | 408 | return 1; |
407 | #endif | 409 | #endif |
408 | } | 410 | } |
409 | 411 | ||
410 | static inline void queue_flag_set_unlocked(unsigned int flag, | 412 | static inline void queue_flag_set_unlocked(unsigned int flag, |
411 | struct request_queue *q) | 413 | struct request_queue *q) |
412 | { | 414 | { |
413 | __set_bit(flag, &q->queue_flags); | 415 | __set_bit(flag, &q->queue_flags); |
414 | } | 416 | } |
415 | 417 | ||
416 | static inline int queue_flag_test_and_clear(unsigned int flag, | 418 | static inline int queue_flag_test_and_clear(unsigned int flag, |
417 | struct request_queue *q) | 419 | struct request_queue *q) |
418 | { | 420 | { |
419 | WARN_ON_ONCE(!queue_is_locked(q)); | 421 | WARN_ON_ONCE(!queue_is_locked(q)); |
420 | 422 | ||
421 | if (test_bit(flag, &q->queue_flags)) { | 423 | if (test_bit(flag, &q->queue_flags)) { |
422 | __clear_bit(flag, &q->queue_flags); | 424 | __clear_bit(flag, &q->queue_flags); |
423 | return 1; | 425 | return 1; |
424 | } | 426 | } |
425 | 427 | ||
426 | return 0; | 428 | return 0; |
427 | } | 429 | } |
428 | 430 | ||
429 | static inline int queue_flag_test_and_set(unsigned int flag, | 431 | static inline int queue_flag_test_and_set(unsigned int flag, |
430 | struct request_queue *q) | 432 | struct request_queue *q) |
431 | { | 433 | { |
432 | WARN_ON_ONCE(!queue_is_locked(q)); | 434 | WARN_ON_ONCE(!queue_is_locked(q)); |
433 | 435 | ||
434 | if (!test_bit(flag, &q->queue_flags)) { | 436 | if (!test_bit(flag, &q->queue_flags)) { |
435 | __set_bit(flag, &q->queue_flags); | 437 | __set_bit(flag, &q->queue_flags); |
436 | return 0; | 438 | return 0; |
437 | } | 439 | } |
438 | 440 | ||
439 | return 1; | 441 | return 1; |
440 | } | 442 | } |
441 | 443 | ||
442 | static inline void queue_flag_set(unsigned int flag, struct request_queue *q) | 444 | static inline void queue_flag_set(unsigned int flag, struct request_queue *q) |
443 | { | 445 | { |
444 | WARN_ON_ONCE(!queue_is_locked(q)); | 446 | WARN_ON_ONCE(!queue_is_locked(q)); |
445 | __set_bit(flag, &q->queue_flags); | 447 | __set_bit(flag, &q->queue_flags); |
446 | } | 448 | } |
447 | 449 | ||
448 | static inline void queue_flag_clear_unlocked(unsigned int flag, | 450 | static inline void queue_flag_clear_unlocked(unsigned int flag, |
449 | struct request_queue *q) | 451 | struct request_queue *q) |
450 | { | 452 | { |
451 | __clear_bit(flag, &q->queue_flags); | 453 | __clear_bit(flag, &q->queue_flags); |
452 | } | 454 | } |
453 | 455 | ||
454 | static inline int queue_in_flight(struct request_queue *q) | 456 | static inline int queue_in_flight(struct request_queue *q) |
455 | { | 457 | { |
456 | return q->in_flight[0] + q->in_flight[1]; | 458 | return q->in_flight[0] + q->in_flight[1]; |
457 | } | 459 | } |
458 | 460 | ||
459 | static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | 461 | static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) |
460 | { | 462 | { |
461 | WARN_ON_ONCE(!queue_is_locked(q)); | 463 | WARN_ON_ONCE(!queue_is_locked(q)); |
462 | __clear_bit(flag, &q->queue_flags); | 464 | __clear_bit(flag, &q->queue_flags); |
463 | } | 465 | } |
464 | 466 | ||
465 | enum { | 467 | enum { |
466 | /* | 468 | /* |
467 | * Hardbarrier is supported with one of the following methods. | 469 | * Hardbarrier is supported with one of the following methods. |
468 | * | 470 | * |
469 | * NONE : hardbarrier unsupported | 471 | * NONE : hardbarrier unsupported |
470 | * DRAIN : ordering by draining is enough | 472 | * DRAIN : ordering by draining is enough |
471 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes | 473 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes |
472 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write | 474 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write |
473 | */ | 475 | */ |
474 | QUEUE_ORDERED_DO_PREFLUSH = 0x10, | 476 | QUEUE_ORDERED_DO_PREFLUSH = 0x10, |
475 | QUEUE_ORDERED_DO_BAR = 0x20, | 477 | QUEUE_ORDERED_DO_BAR = 0x20, |
476 | QUEUE_ORDERED_DO_POSTFLUSH = 0x40, | 478 | QUEUE_ORDERED_DO_POSTFLUSH = 0x40, |
477 | QUEUE_ORDERED_DO_FUA = 0x80, | 479 | QUEUE_ORDERED_DO_FUA = 0x80, |
478 | 480 | ||
479 | QUEUE_ORDERED_NONE = 0x00, | 481 | QUEUE_ORDERED_NONE = 0x00, |
480 | 482 | ||
481 | QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, | 483 | QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, |
482 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | | 484 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | |
483 | QUEUE_ORDERED_DO_PREFLUSH | | 485 | QUEUE_ORDERED_DO_PREFLUSH | |
484 | QUEUE_ORDERED_DO_POSTFLUSH, | 486 | QUEUE_ORDERED_DO_POSTFLUSH, |
485 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | | 487 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | |
486 | QUEUE_ORDERED_DO_PREFLUSH | | 488 | QUEUE_ORDERED_DO_PREFLUSH | |
487 | QUEUE_ORDERED_DO_FUA, | 489 | QUEUE_ORDERED_DO_FUA, |
488 | 490 | ||
489 | /* | 491 | /* |
490 | * Ordered operation sequence | 492 | * Ordered operation sequence |
491 | */ | 493 | */ |
492 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ | 494 | QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ |
493 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ | 495 | QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ |
494 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ | 496 | QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ |
495 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ | 497 | QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ |
496 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ | 498 | QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ |
497 | QUEUE_ORDSEQ_DONE = 0x20, | 499 | QUEUE_ORDSEQ_DONE = 0x20, |
498 | }; | 500 | }; |
499 | 501 | ||
500 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 502 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
501 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 503 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
502 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 504 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
503 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) | 505 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) |
504 | #define blk_queue_noxmerges(q) \ | 506 | #define blk_queue_noxmerges(q) \ |
505 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) | 507 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) |
506 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) | 508 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) |
507 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) | 509 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) |
508 | #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) | 510 | #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) |
509 | #define blk_queue_stackable(q) \ | 511 | #define blk_queue_stackable(q) \ |
510 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) | 512 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) |
511 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) | 513 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) |
512 | #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ | 514 | #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ |
513 | test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) | 515 | test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) |
514 | 516 | ||
515 | #define blk_noretry_request(rq) \ | 517 | #define blk_noretry_request(rq) \ |
516 | ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ | 518 | ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ |
517 | REQ_FAILFAST_DRIVER)) | 519 | REQ_FAILFAST_DRIVER)) |
518 | 520 | ||
519 | #define blk_account_rq(rq) \ | 521 | #define blk_account_rq(rq) \ |
520 | (((rq)->cmd_flags & REQ_STARTED) && \ | 522 | (((rq)->cmd_flags & REQ_STARTED) && \ |
521 | ((rq)->cmd_type == REQ_TYPE_FS || \ | 523 | ((rq)->cmd_type == REQ_TYPE_FS || \ |
522 | ((rq)->cmd_flags & REQ_DISCARD))) | 524 | ((rq)->cmd_flags & REQ_DISCARD))) |
523 | 525 | ||
524 | #define blk_pm_request(rq) \ | 526 | #define blk_pm_request(rq) \ |
525 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ | 527 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ |
526 | (rq)->cmd_type == REQ_TYPE_PM_RESUME) | 528 | (rq)->cmd_type == REQ_TYPE_PM_RESUME) |
527 | 529 | ||
528 | #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) | 530 | #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) |
529 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) | 531 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
530 | /* rq->queuelist of dequeued request must be list_empty() */ | 532 | /* rq->queuelist of dequeued request must be list_empty() */ |
531 | #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) | 533 | #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) |
532 | 534 | ||
533 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 535 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
534 | 536 | ||
535 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) | 537 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) |
536 | 538 | ||
537 | /* | 539 | /* |
538 | * We regard a request as sync, if either a read or a sync write | 540 | * We regard a request as sync, if either a read or a sync write |
539 | */ | 541 | */ |
540 | static inline bool rw_is_sync(unsigned int rw_flags) | 542 | static inline bool rw_is_sync(unsigned int rw_flags) |
541 | { | 543 | { |
542 | return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); | 544 | return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); |
543 | } | 545 | } |
544 | 546 | ||
545 | static inline bool rq_is_sync(struct request *rq) | 547 | static inline bool rq_is_sync(struct request *rq) |
546 | { | 548 | { |
547 | return rw_is_sync(rq->cmd_flags); | 549 | return rw_is_sync(rq->cmd_flags); |
548 | } | 550 | } |
549 | 551 | ||
550 | static inline int blk_queue_full(struct request_queue *q, int sync) | 552 | static inline int blk_queue_full(struct request_queue *q, int sync) |
551 | { | 553 | { |
552 | if (sync) | 554 | if (sync) |
553 | return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); | 555 | return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); |
554 | return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); | 556 | return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); |
555 | } | 557 | } |
556 | 558 | ||
557 | static inline void blk_set_queue_full(struct request_queue *q, int sync) | 559 | static inline void blk_set_queue_full(struct request_queue *q, int sync) |
558 | { | 560 | { |
559 | if (sync) | 561 | if (sync) |
560 | queue_flag_set(QUEUE_FLAG_SYNCFULL, q); | 562 | queue_flag_set(QUEUE_FLAG_SYNCFULL, q); |
561 | else | 563 | else |
562 | queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); | 564 | queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); |
563 | } | 565 | } |
564 | 566 | ||
565 | static inline void blk_clear_queue_full(struct request_queue *q, int sync) | 567 | static inline void blk_clear_queue_full(struct request_queue *q, int sync) |
566 | { | 568 | { |
567 | if (sync) | 569 | if (sync) |
568 | queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); | 570 | queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); |
569 | else | 571 | else |
570 | queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); | 572 | queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); |
571 | } | 573 | } |
572 | 574 | ||
573 | 575 | ||
574 | /* | 576 | /* |
575 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 577 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may |
576 | * it already be started by driver. | 578 | * it already be started by driver. |
577 | */ | 579 | */ |
578 | #define RQ_NOMERGE_FLAGS \ | 580 | #define RQ_NOMERGE_FLAGS \ |
579 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | 581 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) |
580 | #define rq_mergeable(rq) \ | 582 | #define rq_mergeable(rq) \ |
581 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | 583 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ |
582 | (((rq)->cmd_flags & REQ_DISCARD) || \ | 584 | (((rq)->cmd_flags & REQ_DISCARD) || \ |
583 | (rq)->cmd_type == REQ_TYPE_FS)) | 585 | (rq)->cmd_type == REQ_TYPE_FS)) |
584 | 586 | ||
585 | /* | 587 | /* |
586 | * q->prep_rq_fn return values | 588 | * q->prep_rq_fn return values |
587 | */ | 589 | */ |
588 | #define BLKPREP_OK 0 /* serve it */ | 590 | #define BLKPREP_OK 0 /* serve it */ |
589 | #define BLKPREP_KILL 1 /* fatal error, kill */ | 591 | #define BLKPREP_KILL 1 /* fatal error, kill */ |
590 | #define BLKPREP_DEFER 2 /* leave on queue */ | 592 | #define BLKPREP_DEFER 2 /* leave on queue */ |
591 | 593 | ||
592 | extern unsigned long blk_max_low_pfn, blk_max_pfn; | 594 | extern unsigned long blk_max_low_pfn, blk_max_pfn; |
593 | 595 | ||
594 | /* | 596 | /* |
595 | * standard bounce addresses: | 597 | * standard bounce addresses: |
596 | * | 598 | * |
597 | * BLK_BOUNCE_HIGH : bounce all highmem pages | 599 | * BLK_BOUNCE_HIGH : bounce all highmem pages |
598 | * BLK_BOUNCE_ANY : don't bounce anything | 600 | * BLK_BOUNCE_ANY : don't bounce anything |
599 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary | 601 | * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary |
600 | */ | 602 | */ |
601 | 603 | ||
602 | #if BITS_PER_LONG == 32 | 604 | #if BITS_PER_LONG == 32 |
603 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) | 605 | #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) |
604 | #else | 606 | #else |
605 | #define BLK_BOUNCE_HIGH -1ULL | 607 | #define BLK_BOUNCE_HIGH -1ULL |
606 | #endif | 608 | #endif |
607 | #define BLK_BOUNCE_ANY (-1ULL) | 609 | #define BLK_BOUNCE_ANY (-1ULL) |
608 | #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) | 610 | #define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) |
609 | 611 | ||
610 | /* | 612 | /* |
611 | * default timeout for SG_IO if none specified | 613 | * default timeout for SG_IO if none specified |
612 | */ | 614 | */ |
613 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) | 615 | #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) |
614 | #define BLK_MIN_SG_TIMEOUT (7 * HZ) | 616 | #define BLK_MIN_SG_TIMEOUT (7 * HZ) |
615 | 617 | ||
616 | #ifdef CONFIG_BOUNCE | 618 | #ifdef CONFIG_BOUNCE |
617 | extern int init_emergency_isa_pool(void); | 619 | extern int init_emergency_isa_pool(void); |
618 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); | 620 | extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); |
619 | #else | 621 | #else |
620 | static inline int init_emergency_isa_pool(void) | 622 | static inline int init_emergency_isa_pool(void) |
621 | { | 623 | { |
622 | return 0; | 624 | return 0; |
623 | } | 625 | } |
624 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) | 626 | static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) |
625 | { | 627 | { |
626 | } | 628 | } |
627 | #endif /* CONFIG_MMU */ | 629 | #endif /* CONFIG_MMU */ |
628 | 630 | ||
629 | struct rq_map_data { | 631 | struct rq_map_data { |
630 | struct page **pages; | 632 | struct page **pages; |
631 | int page_order; | 633 | int page_order; |
632 | int nr_entries; | 634 | int nr_entries; |
633 | unsigned long offset; | 635 | unsigned long offset; |
634 | int null_mapped; | 636 | int null_mapped; |
635 | int from_user; | 637 | int from_user; |
636 | }; | 638 | }; |
637 | 639 | ||
638 | struct req_iterator { | 640 | struct req_iterator { |
639 | int i; | 641 | int i; |
640 | struct bio *bio; | 642 | struct bio *bio; |
641 | }; | 643 | }; |
642 | 644 | ||
643 | /* This should not be used directly - use rq_for_each_segment */ | 645 | /* This should not be used directly - use rq_for_each_segment */ |
644 | #define for_each_bio(_bio) \ | 646 | #define for_each_bio(_bio) \ |
645 | for (; _bio; _bio = _bio->bi_next) | 647 | for (; _bio; _bio = _bio->bi_next) |
646 | #define __rq_for_each_bio(_bio, rq) \ | 648 | #define __rq_for_each_bio(_bio, rq) \ |
647 | if ((rq->bio)) \ | 649 | if ((rq->bio)) \ |
648 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) | 650 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) |
649 | 651 | ||
650 | #define rq_for_each_segment(bvl, _rq, _iter) \ | 652 | #define rq_for_each_segment(bvl, _rq, _iter) \ |
651 | __rq_for_each_bio(_iter.bio, _rq) \ | 653 | __rq_for_each_bio(_iter.bio, _rq) \ |
652 | bio_for_each_segment(bvl, _iter.bio, _iter.i) | 654 | bio_for_each_segment(bvl, _iter.bio, _iter.i) |
653 | 655 | ||
654 | #define rq_iter_last(rq, _iter) \ | 656 | #define rq_iter_last(rq, _iter) \ |
655 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) | 657 | (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) |
656 | 658 | ||
657 | #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 659 | #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
658 | # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" | 660 | # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" |
659 | #endif | 661 | #endif |
660 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 662 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
661 | extern void rq_flush_dcache_pages(struct request *rq); | 663 | extern void rq_flush_dcache_pages(struct request *rq); |
662 | #else | 664 | #else |
663 | static inline void rq_flush_dcache_pages(struct request *rq) | 665 | static inline void rq_flush_dcache_pages(struct request *rq) |
664 | { | 666 | { |
665 | } | 667 | } |
666 | #endif | 668 | #endif |
667 | 669 | ||
668 | extern int blk_register_queue(struct gendisk *disk); | 670 | extern int blk_register_queue(struct gendisk *disk); |
669 | extern void blk_unregister_queue(struct gendisk *disk); | 671 | extern void blk_unregister_queue(struct gendisk *disk); |
670 | extern void register_disk(struct gendisk *dev); | 672 | extern void register_disk(struct gendisk *dev); |
671 | extern void generic_make_request(struct bio *bio); | 673 | extern void generic_make_request(struct bio *bio); |
672 | extern void blk_rq_init(struct request_queue *q, struct request *rq); | 674 | extern void blk_rq_init(struct request_queue *q, struct request *rq); |
673 | extern void blk_put_request(struct request *); | 675 | extern void blk_put_request(struct request *); |
674 | extern void __blk_put_request(struct request_queue *, struct request *); | 676 | extern void __blk_put_request(struct request_queue *, struct request *); |
675 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); | 677 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); |
676 | extern struct request *blk_make_request(struct request_queue *, struct bio *, | 678 | extern struct request *blk_make_request(struct request_queue *, struct bio *, |
677 | gfp_t); | 679 | gfp_t); |
678 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); | 680 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); |
679 | extern void blk_requeue_request(struct request_queue *, struct request *); | 681 | extern void blk_requeue_request(struct request_queue *, struct request *); |
680 | extern void blk_add_request_payload(struct request *rq, struct page *page, | 682 | extern void blk_add_request_payload(struct request *rq, struct page *page, |
681 | unsigned int len); | 683 | unsigned int len); |
682 | extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); | 684 | extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); |
683 | extern int blk_lld_busy(struct request_queue *q); | 685 | extern int blk_lld_busy(struct request_queue *q); |
684 | extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | 686 | extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, |
685 | struct bio_set *bs, gfp_t gfp_mask, | 687 | struct bio_set *bs, gfp_t gfp_mask, |
686 | int (*bio_ctr)(struct bio *, struct bio *, void *), | 688 | int (*bio_ctr)(struct bio *, struct bio *, void *), |
687 | void *data); | 689 | void *data); |
688 | extern void blk_rq_unprep_clone(struct request *rq); | 690 | extern void blk_rq_unprep_clone(struct request *rq); |
689 | extern int blk_insert_cloned_request(struct request_queue *q, | 691 | extern int blk_insert_cloned_request(struct request_queue *q, |
690 | struct request *rq); | 692 | struct request *rq); |
691 | extern void blk_plug_device(struct request_queue *); | 693 | extern void blk_plug_device(struct request_queue *); |
692 | extern void blk_plug_device_unlocked(struct request_queue *); | 694 | extern void blk_plug_device_unlocked(struct request_queue *); |
693 | extern int blk_remove_plug(struct request_queue *); | 695 | extern int blk_remove_plug(struct request_queue *); |
694 | extern void blk_recount_segments(struct request_queue *, struct bio *); | 696 | extern void blk_recount_segments(struct request_queue *, struct bio *); |
695 | extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, | 697 | extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, |
696 | unsigned int, void __user *); | 698 | unsigned int, void __user *); |
697 | extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, | 699 | extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, |
698 | struct scsi_ioctl_command __user *); | 700 | struct scsi_ioctl_command __user *); |
699 | 701 | ||
700 | /* | 702 | /* |
701 | * A queue has just exitted congestion. Note this in the global counter of | 703 | * A queue has just exitted congestion. Note this in the global counter of |
702 | * congested queues, and wake up anyone who was waiting for requests to be | 704 | * congested queues, and wake up anyone who was waiting for requests to be |
703 | * put back. | 705 | * put back. |
704 | */ | 706 | */ |
705 | static inline void blk_clear_queue_congested(struct request_queue *q, int sync) | 707 | static inline void blk_clear_queue_congested(struct request_queue *q, int sync) |
706 | { | 708 | { |
707 | clear_bdi_congested(&q->backing_dev_info, sync); | 709 | clear_bdi_congested(&q->backing_dev_info, sync); |
708 | } | 710 | } |
709 | 711 | ||
710 | /* | 712 | /* |
711 | * A queue has just entered congestion. Flag that in the queue's VM-visible | 713 | * A queue has just entered congestion. Flag that in the queue's VM-visible |
712 | * state flags and increment the global gounter of congested queues. | 714 | * state flags and increment the global gounter of congested queues. |
713 | */ | 715 | */ |
714 | static inline void blk_set_queue_congested(struct request_queue *q, int sync) | 716 | static inline void blk_set_queue_congested(struct request_queue *q, int sync) |
715 | { | 717 | { |
716 | set_bdi_congested(&q->backing_dev_info, sync); | 718 | set_bdi_congested(&q->backing_dev_info, sync); |
717 | } | 719 | } |
718 | 720 | ||
719 | extern void blk_start_queue(struct request_queue *q); | 721 | extern void blk_start_queue(struct request_queue *q); |
720 | extern void blk_stop_queue(struct request_queue *q); | 722 | extern void blk_stop_queue(struct request_queue *q); |
721 | extern void blk_sync_queue(struct request_queue *q); | 723 | extern void blk_sync_queue(struct request_queue *q); |
722 | extern void __blk_stop_queue(struct request_queue *q); | 724 | extern void __blk_stop_queue(struct request_queue *q); |
723 | extern void __blk_run_queue(struct request_queue *); | 725 | extern void __blk_run_queue(struct request_queue *); |
724 | extern void blk_run_queue(struct request_queue *); | 726 | extern void blk_run_queue(struct request_queue *); |
725 | extern int blk_rq_map_user(struct request_queue *, struct request *, | 727 | extern int blk_rq_map_user(struct request_queue *, struct request *, |
726 | struct rq_map_data *, void __user *, unsigned long, | 728 | struct rq_map_data *, void __user *, unsigned long, |
727 | gfp_t); | 729 | gfp_t); |
728 | extern int blk_rq_unmap_user(struct bio *); | 730 | extern int blk_rq_unmap_user(struct bio *); |
729 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); | 731 | extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); |
730 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, | 732 | extern int blk_rq_map_user_iov(struct request_queue *, struct request *, |
731 | struct rq_map_data *, struct sg_iovec *, int, | 733 | struct rq_map_data *, struct sg_iovec *, int, |
732 | unsigned int, gfp_t); | 734 | unsigned int, gfp_t); |
733 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, | 735 | extern int blk_execute_rq(struct request_queue *, struct gendisk *, |
734 | struct request *, int); | 736 | struct request *, int); |
735 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, | 737 | extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, |
736 | struct request *, int, rq_end_io_fn *); | 738 | struct request *, int, rq_end_io_fn *); |
737 | extern void blk_unplug(struct request_queue *q); | 739 | extern void blk_unplug(struct request_queue *q); |
738 | 740 | ||
739 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) | 741 | static inline struct request_queue *bdev_get_queue(struct block_device *bdev) |
740 | { | 742 | { |
741 | return bdev->bd_disk->queue; | 743 | return bdev->bd_disk->queue; |
742 | } | 744 | } |
743 | 745 | ||
744 | /* | 746 | /* |
745 | * blk_rq_pos() : the current sector | 747 | * blk_rq_pos() : the current sector |
746 | * blk_rq_bytes() : bytes left in the entire request | 748 | * blk_rq_bytes() : bytes left in the entire request |
747 | * blk_rq_cur_bytes() : bytes left in the current segment | 749 | * blk_rq_cur_bytes() : bytes left in the current segment |
748 | * blk_rq_err_bytes() : bytes left till the next error boundary | 750 | * blk_rq_err_bytes() : bytes left till the next error boundary |
749 | * blk_rq_sectors() : sectors left in the entire request | 751 | * blk_rq_sectors() : sectors left in the entire request |
750 | * blk_rq_cur_sectors() : sectors left in the current segment | 752 | * blk_rq_cur_sectors() : sectors left in the current segment |
751 | */ | 753 | */ |
752 | static inline sector_t blk_rq_pos(const struct request *rq) | 754 | static inline sector_t blk_rq_pos(const struct request *rq) |
753 | { | 755 | { |
754 | return rq->__sector; | 756 | return rq->__sector; |
755 | } | 757 | } |
756 | 758 | ||
757 | static inline unsigned int blk_rq_bytes(const struct request *rq) | 759 | static inline unsigned int blk_rq_bytes(const struct request *rq) |
758 | { | 760 | { |
759 | return rq->__data_len; | 761 | return rq->__data_len; |
760 | } | 762 | } |
761 | 763 | ||
762 | static inline int blk_rq_cur_bytes(const struct request *rq) | 764 | static inline int blk_rq_cur_bytes(const struct request *rq) |
763 | { | 765 | { |
764 | return rq->bio ? bio_cur_bytes(rq->bio) : 0; | 766 | return rq->bio ? bio_cur_bytes(rq->bio) : 0; |
765 | } | 767 | } |
766 | 768 | ||
767 | extern unsigned int blk_rq_err_bytes(const struct request *rq); | 769 | extern unsigned int blk_rq_err_bytes(const struct request *rq); |
768 | 770 | ||
769 | static inline unsigned int blk_rq_sectors(const struct request *rq) | 771 | static inline unsigned int blk_rq_sectors(const struct request *rq) |
770 | { | 772 | { |
771 | return blk_rq_bytes(rq) >> 9; | 773 | return blk_rq_bytes(rq) >> 9; |
772 | } | 774 | } |
773 | 775 | ||
774 | static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | 776 | static inline unsigned int blk_rq_cur_sectors(const struct request *rq) |
775 | { | 777 | { |
776 | return blk_rq_cur_bytes(rq) >> 9; | 778 | return blk_rq_cur_bytes(rq) >> 9; |
777 | } | 779 | } |
778 | 780 | ||
779 | /* | 781 | /* |
780 | * Request issue related functions. | 782 | * Request issue related functions. |
781 | */ | 783 | */ |
782 | extern struct request *blk_peek_request(struct request_queue *q); | 784 | extern struct request *blk_peek_request(struct request_queue *q); |
783 | extern void blk_start_request(struct request *rq); | 785 | extern void blk_start_request(struct request *rq); |
784 | extern struct request *blk_fetch_request(struct request_queue *q); | 786 | extern struct request *blk_fetch_request(struct request_queue *q); |
785 | 787 | ||
786 | /* | 788 | /* |
787 | * Request completion related functions. | 789 | * Request completion related functions. |
788 | * | 790 | * |
789 | * blk_update_request() completes given number of bytes and updates | 791 | * blk_update_request() completes given number of bytes and updates |
790 | * the request without completing it. | 792 | * the request without completing it. |
791 | * | 793 | * |
792 | * blk_end_request() and friends. __blk_end_request() must be called | 794 | * blk_end_request() and friends. __blk_end_request() must be called |
793 | * with the request queue spinlock acquired. | 795 | * with the request queue spinlock acquired. |
794 | * | 796 | * |
795 | * Several drivers define their own end_request and call | 797 | * Several drivers define their own end_request and call |
796 | * blk_end_request() for parts of the original function. | 798 | * blk_end_request() for parts of the original function. |
797 | * This prevents code duplication in drivers. | 799 | * This prevents code duplication in drivers. |
798 | */ | 800 | */ |
799 | extern bool blk_update_request(struct request *rq, int error, | 801 | extern bool blk_update_request(struct request *rq, int error, |
800 | unsigned int nr_bytes); | 802 | unsigned int nr_bytes); |
801 | extern bool blk_end_request(struct request *rq, int error, | 803 | extern bool blk_end_request(struct request *rq, int error, |
802 | unsigned int nr_bytes); | 804 | unsigned int nr_bytes); |
803 | extern void blk_end_request_all(struct request *rq, int error); | 805 | extern void blk_end_request_all(struct request *rq, int error); |
804 | extern bool blk_end_request_cur(struct request *rq, int error); | 806 | extern bool blk_end_request_cur(struct request *rq, int error); |
805 | extern bool blk_end_request_err(struct request *rq, int error); | 807 | extern bool blk_end_request_err(struct request *rq, int error); |
806 | extern bool __blk_end_request(struct request *rq, int error, | 808 | extern bool __blk_end_request(struct request *rq, int error, |
807 | unsigned int nr_bytes); | 809 | unsigned int nr_bytes); |
808 | extern void __blk_end_request_all(struct request *rq, int error); | 810 | extern void __blk_end_request_all(struct request *rq, int error); |
809 | extern bool __blk_end_request_cur(struct request *rq, int error); | 811 | extern bool __blk_end_request_cur(struct request *rq, int error); |
810 | extern bool __blk_end_request_err(struct request *rq, int error); | 812 | extern bool __blk_end_request_err(struct request *rq, int error); |
811 | 813 | ||
812 | extern void blk_complete_request(struct request *); | 814 | extern void blk_complete_request(struct request *); |
813 | extern void __blk_complete_request(struct request *); | 815 | extern void __blk_complete_request(struct request *); |
814 | extern void blk_abort_request(struct request *); | 816 | extern void blk_abort_request(struct request *); |
815 | extern void blk_abort_queue(struct request_queue *); | 817 | extern void blk_abort_queue(struct request_queue *); |
816 | extern void blk_unprep_request(struct request *); | 818 | extern void blk_unprep_request(struct request *); |
817 | 819 | ||
818 | /* | 820 | /* |
819 | * Access functions for manipulating queue properties | 821 | * Access functions for manipulating queue properties |
820 | */ | 822 | */ |
821 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, | 823 | extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, |
822 | spinlock_t *lock, int node_id); | 824 | spinlock_t *lock, int node_id); |
823 | extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, | 825 | extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, |
824 | request_fn_proc *, | 826 | request_fn_proc *, |
825 | spinlock_t *, int node_id); | 827 | spinlock_t *, int node_id); |
826 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); | 828 | extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); |
827 | extern struct request_queue *blk_init_allocated_queue(struct request_queue *, | 829 | extern struct request_queue *blk_init_allocated_queue(struct request_queue *, |
828 | request_fn_proc *, spinlock_t *); | 830 | request_fn_proc *, spinlock_t *); |
829 | extern void blk_cleanup_queue(struct request_queue *); | 831 | extern void blk_cleanup_queue(struct request_queue *); |
830 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); | 832 | extern void blk_queue_make_request(struct request_queue *, make_request_fn *); |
831 | extern void blk_queue_bounce_limit(struct request_queue *, u64); | 833 | extern void blk_queue_bounce_limit(struct request_queue *, u64); |
832 | extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); | 834 | extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); |
833 | extern void blk_queue_max_segments(struct request_queue *, unsigned short); | 835 | extern void blk_queue_max_segments(struct request_queue *, unsigned short); |
834 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); | 836 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); |
835 | extern void blk_queue_max_discard_sectors(struct request_queue *q, | 837 | extern void blk_queue_max_discard_sectors(struct request_queue *q, |
836 | unsigned int max_discard_sectors); | 838 | unsigned int max_discard_sectors); |
837 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); | 839 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); |
838 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); | 840 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); |
839 | extern void blk_queue_alignment_offset(struct request_queue *q, | 841 | extern void blk_queue_alignment_offset(struct request_queue *q, |
840 | unsigned int alignment); | 842 | unsigned int alignment); |
841 | extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); | 843 | extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); |
842 | extern void blk_queue_io_min(struct request_queue *q, unsigned int min); | 844 | extern void blk_queue_io_min(struct request_queue *q, unsigned int min); |
843 | extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); | 845 | extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); |
844 | extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); | 846 | extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); |
845 | extern void blk_set_default_limits(struct queue_limits *lim); | 847 | extern void blk_set_default_limits(struct queue_limits *lim); |
846 | extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | 848 | extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, |
847 | sector_t offset); | 849 | sector_t offset); |
848 | extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, | 850 | extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, |
849 | sector_t offset); | 851 | sector_t offset); |
850 | extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | 852 | extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, |
851 | sector_t offset); | 853 | sector_t offset); |
852 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); | 854 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); |
853 | extern void blk_queue_dma_pad(struct request_queue *, unsigned int); | 855 | extern void blk_queue_dma_pad(struct request_queue *, unsigned int); |
854 | extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); | 856 | extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); |
855 | extern int blk_queue_dma_drain(struct request_queue *q, | 857 | extern int blk_queue_dma_drain(struct request_queue *q, |
856 | dma_drain_needed_fn *dma_drain_needed, | 858 | dma_drain_needed_fn *dma_drain_needed, |
857 | void *buf, unsigned int size); | 859 | void *buf, unsigned int size); |
858 | extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); | 860 | extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); |
859 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); | 861 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); |
860 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); | 862 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); |
861 | extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); | 863 | extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); |
862 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); | 864 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); |
863 | extern void blk_queue_dma_alignment(struct request_queue *, int); | 865 | extern void blk_queue_dma_alignment(struct request_queue *, int); |
864 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); | 866 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); |
865 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | 867 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
866 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); | 868 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); |
867 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); | 869 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); |
870 | extern void blk_queue_flush(struct request_queue *q, unsigned int flush); | ||
868 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 871 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
869 | extern int blk_queue_ordered(struct request_queue *, unsigned); | ||
870 | extern bool blk_do_ordered(struct request_queue *, struct request **); | 872 | extern bool blk_do_ordered(struct request_queue *, struct request **); |
871 | extern unsigned blk_ordered_cur_seq(struct request_queue *); | 873 | extern unsigned blk_ordered_cur_seq(struct request_queue *); |
872 | extern unsigned blk_ordered_req_seq(struct request *); | 874 | extern unsigned blk_ordered_req_seq(struct request *); |
873 | extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); | 875 | extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); |
874 | 876 | ||
875 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); | 877 | extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); |
876 | extern void blk_dump_rq_flags(struct request *, char *); | 878 | extern void blk_dump_rq_flags(struct request *, char *); |
877 | extern void generic_unplug_device(struct request_queue *); | 879 | extern void generic_unplug_device(struct request_queue *); |
878 | extern long nr_blockdev_pages(void); | 880 | extern long nr_blockdev_pages(void); |
879 | 881 | ||
880 | int blk_get_queue(struct request_queue *); | 882 | int blk_get_queue(struct request_queue *); |
881 | struct request_queue *blk_alloc_queue(gfp_t); | 883 | struct request_queue *blk_alloc_queue(gfp_t); |
882 | struct request_queue *blk_alloc_queue_node(gfp_t, int); | 884 | struct request_queue *blk_alloc_queue_node(gfp_t, int); |
883 | extern void blk_put_queue(struct request_queue *); | 885 | extern void blk_put_queue(struct request_queue *); |
884 | 886 | ||
885 | /* | 887 | /* |
886 | * tag stuff | 888 | * tag stuff |
887 | */ | 889 | */ |
888 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 890 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) |
889 | extern int blk_queue_start_tag(struct request_queue *, struct request *); | 891 | extern int blk_queue_start_tag(struct request_queue *, struct request *); |
890 | extern struct request *blk_queue_find_tag(struct request_queue *, int); | 892 | extern struct request *blk_queue_find_tag(struct request_queue *, int); |
891 | extern void blk_queue_end_tag(struct request_queue *, struct request *); | 893 | extern void blk_queue_end_tag(struct request_queue *, struct request *); |
892 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); | 894 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); |
893 | extern void blk_queue_free_tags(struct request_queue *); | 895 | extern void blk_queue_free_tags(struct request_queue *); |
894 | extern int blk_queue_resize_tags(struct request_queue *, int); | 896 | extern int blk_queue_resize_tags(struct request_queue *, int); |
895 | extern void blk_queue_invalidate_tags(struct request_queue *); | 897 | extern void blk_queue_invalidate_tags(struct request_queue *); |
896 | extern struct blk_queue_tag *blk_init_tags(int); | 898 | extern struct blk_queue_tag *blk_init_tags(int); |
897 | extern void blk_free_tags(struct blk_queue_tag *); | 899 | extern void blk_free_tags(struct blk_queue_tag *); |
898 | 900 | ||
899 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | 901 | static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, |
900 | int tag) | 902 | int tag) |
901 | { | 903 | { |
902 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) | 904 | if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) |
903 | return NULL; | 905 | return NULL; |
904 | return bqt->tag_index[tag]; | 906 | return bqt->tag_index[tag]; |
905 | } | 907 | } |
906 | enum{ | 908 | enum{ |
907 | BLKDEV_WAIT, /* wait for completion */ | 909 | BLKDEV_WAIT, /* wait for completion */ |
908 | BLKDEV_BARRIER, /* issue request with barrier */ | 910 | BLKDEV_BARRIER, /* issue request with barrier */ |
909 | BLKDEV_SECURE, /* secure discard */ | 911 | BLKDEV_SECURE, /* secure discard */ |
910 | }; | 912 | }; |
911 | #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) | 913 | #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) |
912 | #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) | 914 | #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) |
913 | #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) | 915 | #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) |
914 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, | 916 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, |
915 | unsigned long); | 917 | unsigned long); |
916 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | 918 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
917 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 919 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); |
918 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 920 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
919 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 921 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); |
920 | static inline int sb_issue_discard(struct super_block *sb, | 922 | static inline int sb_issue_discard(struct super_block *sb, |
921 | sector_t block, sector_t nr_blocks) | 923 | sector_t block, sector_t nr_blocks) |
922 | { | 924 | { |
923 | block <<= (sb->s_blocksize_bits - 9); | 925 | block <<= (sb->s_blocksize_bits - 9); |
924 | nr_blocks <<= (sb->s_blocksize_bits - 9); | 926 | nr_blocks <<= (sb->s_blocksize_bits - 9); |
925 | return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, | 927 | return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, |
926 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | 928 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
927 | } | 929 | } |
928 | 930 | ||
929 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); | 931 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); |
930 | 932 | ||
931 | enum blk_default_limits { | 933 | enum blk_default_limits { |
932 | BLK_MAX_SEGMENTS = 128, | 934 | BLK_MAX_SEGMENTS = 128, |
933 | BLK_SAFE_MAX_SECTORS = 255, | 935 | BLK_SAFE_MAX_SECTORS = 255, |
934 | BLK_DEF_MAX_SECTORS = 1024, | 936 | BLK_DEF_MAX_SECTORS = 1024, |
935 | BLK_MAX_SEGMENT_SIZE = 65536, | 937 | BLK_MAX_SEGMENT_SIZE = 65536, |
936 | BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, | 938 | BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, |
937 | }; | 939 | }; |
938 | 940 | ||
939 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) | 941 | #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) |
940 | 942 | ||
941 | static inline unsigned long queue_bounce_pfn(struct request_queue *q) | 943 | static inline unsigned long queue_bounce_pfn(struct request_queue *q) |
942 | { | 944 | { |
943 | return q->limits.bounce_pfn; | 945 | return q->limits.bounce_pfn; |
944 | } | 946 | } |
945 | 947 | ||
946 | static inline unsigned long queue_segment_boundary(struct request_queue *q) | 948 | static inline unsigned long queue_segment_boundary(struct request_queue *q) |
947 | { | 949 | { |
948 | return q->limits.seg_boundary_mask; | 950 | return q->limits.seg_boundary_mask; |
949 | } | 951 | } |
950 | 952 | ||
951 | static inline unsigned int queue_max_sectors(struct request_queue *q) | 953 | static inline unsigned int queue_max_sectors(struct request_queue *q) |
952 | { | 954 | { |
953 | return q->limits.max_sectors; | 955 | return q->limits.max_sectors; |
954 | } | 956 | } |
955 | 957 | ||
956 | static inline unsigned int queue_max_hw_sectors(struct request_queue *q) | 958 | static inline unsigned int queue_max_hw_sectors(struct request_queue *q) |
957 | { | 959 | { |
958 | return q->limits.max_hw_sectors; | 960 | return q->limits.max_hw_sectors; |
959 | } | 961 | } |
960 | 962 | ||
961 | static inline unsigned short queue_max_segments(struct request_queue *q) | 963 | static inline unsigned short queue_max_segments(struct request_queue *q) |
962 | { | 964 | { |
963 | return q->limits.max_segments; | 965 | return q->limits.max_segments; |
964 | } | 966 | } |
965 | 967 | ||
966 | static inline unsigned int queue_max_segment_size(struct request_queue *q) | 968 | static inline unsigned int queue_max_segment_size(struct request_queue *q) |
967 | { | 969 | { |
968 | return q->limits.max_segment_size; | 970 | return q->limits.max_segment_size; |
969 | } | 971 | } |
970 | 972 | ||
971 | static inline unsigned short queue_logical_block_size(struct request_queue *q) | 973 | static inline unsigned short queue_logical_block_size(struct request_queue *q) |
972 | { | 974 | { |
973 | int retval = 512; | 975 | int retval = 512; |
974 | 976 | ||
975 | if (q && q->limits.logical_block_size) | 977 | if (q && q->limits.logical_block_size) |
976 | retval = q->limits.logical_block_size; | 978 | retval = q->limits.logical_block_size; |
977 | 979 | ||
978 | return retval; | 980 | return retval; |
979 | } | 981 | } |
980 | 982 | ||
981 | static inline unsigned short bdev_logical_block_size(struct block_device *bdev) | 983 | static inline unsigned short bdev_logical_block_size(struct block_device *bdev) |
982 | { | 984 | { |
983 | return queue_logical_block_size(bdev_get_queue(bdev)); | 985 | return queue_logical_block_size(bdev_get_queue(bdev)); |
984 | } | 986 | } |
985 | 987 | ||
986 | static inline unsigned int queue_physical_block_size(struct request_queue *q) | 988 | static inline unsigned int queue_physical_block_size(struct request_queue *q) |
987 | { | 989 | { |
988 | return q->limits.physical_block_size; | 990 | return q->limits.physical_block_size; |
989 | } | 991 | } |
990 | 992 | ||
991 | static inline int bdev_physical_block_size(struct block_device *bdev) | 993 | static inline int bdev_physical_block_size(struct block_device *bdev) |
992 | { | 994 | { |
993 | return queue_physical_block_size(bdev_get_queue(bdev)); | 995 | return queue_physical_block_size(bdev_get_queue(bdev)); |
994 | } | 996 | } |
995 | 997 | ||
996 | static inline unsigned int queue_io_min(struct request_queue *q) | 998 | static inline unsigned int queue_io_min(struct request_queue *q) |
997 | { | 999 | { |
998 | return q->limits.io_min; | 1000 | return q->limits.io_min; |
999 | } | 1001 | } |
1000 | 1002 | ||
1001 | static inline int bdev_io_min(struct block_device *bdev) | 1003 | static inline int bdev_io_min(struct block_device *bdev) |
1002 | { | 1004 | { |
1003 | return queue_io_min(bdev_get_queue(bdev)); | 1005 | return queue_io_min(bdev_get_queue(bdev)); |
1004 | } | 1006 | } |
1005 | 1007 | ||
1006 | static inline unsigned int queue_io_opt(struct request_queue *q) | 1008 | static inline unsigned int queue_io_opt(struct request_queue *q) |
1007 | { | 1009 | { |
1008 | return q->limits.io_opt; | 1010 | return q->limits.io_opt; |
1009 | } | 1011 | } |
1010 | 1012 | ||
1011 | static inline int bdev_io_opt(struct block_device *bdev) | 1013 | static inline int bdev_io_opt(struct block_device *bdev) |
1012 | { | 1014 | { |
1013 | return queue_io_opt(bdev_get_queue(bdev)); | 1015 | return queue_io_opt(bdev_get_queue(bdev)); |
1014 | } | 1016 | } |
1015 | 1017 | ||
1016 | static inline int queue_alignment_offset(struct request_queue *q) | 1018 | static inline int queue_alignment_offset(struct request_queue *q) |
1017 | { | 1019 | { |
1018 | if (q->limits.misaligned) | 1020 | if (q->limits.misaligned) |
1019 | return -1; | 1021 | return -1; |
1020 | 1022 | ||
1021 | return q->limits.alignment_offset; | 1023 | return q->limits.alignment_offset; |
1022 | } | 1024 | } |
1023 | 1025 | ||
1024 | static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) | 1026 | static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) |
1025 | { | 1027 | { |
1026 | unsigned int granularity = max(lim->physical_block_size, lim->io_min); | 1028 | unsigned int granularity = max(lim->physical_block_size, lim->io_min); |
1027 | unsigned int alignment = (sector << 9) & (granularity - 1); | 1029 | unsigned int alignment = (sector << 9) & (granularity - 1); |
1028 | 1030 | ||
1029 | return (granularity + lim->alignment_offset - alignment) | 1031 | return (granularity + lim->alignment_offset - alignment) |
1030 | & (granularity - 1); | 1032 | & (granularity - 1); |
1031 | } | 1033 | } |
1032 | 1034 | ||
1033 | static inline int bdev_alignment_offset(struct block_device *bdev) | 1035 | static inline int bdev_alignment_offset(struct block_device *bdev) |
1034 | { | 1036 | { |
1035 | struct request_queue *q = bdev_get_queue(bdev); | 1037 | struct request_queue *q = bdev_get_queue(bdev); |
1036 | 1038 | ||
1037 | if (q->limits.misaligned) | 1039 | if (q->limits.misaligned) |
1038 | return -1; | 1040 | return -1; |
1039 | 1041 | ||
1040 | if (bdev != bdev->bd_contains) | 1042 | if (bdev != bdev->bd_contains) |
1041 | return bdev->bd_part->alignment_offset; | 1043 | return bdev->bd_part->alignment_offset; |
1042 | 1044 | ||
1043 | return q->limits.alignment_offset; | 1045 | return q->limits.alignment_offset; |
1044 | } | 1046 | } |
1045 | 1047 | ||
1046 | static inline int queue_discard_alignment(struct request_queue *q) | 1048 | static inline int queue_discard_alignment(struct request_queue *q) |
1047 | { | 1049 | { |
1048 | if (q->limits.discard_misaligned) | 1050 | if (q->limits.discard_misaligned) |
1049 | return -1; | 1051 | return -1; |
1050 | 1052 | ||
1051 | return q->limits.discard_alignment; | 1053 | return q->limits.discard_alignment; |
1052 | } | 1054 | } |
1053 | 1055 | ||
1054 | static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) | 1056 | static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) |
1055 | { | 1057 | { |
1056 | unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); | 1058 | unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); |
1057 | 1059 | ||
1058 | return (lim->discard_granularity + lim->discard_alignment - alignment) | 1060 | return (lim->discard_granularity + lim->discard_alignment - alignment) |
1059 | & (lim->discard_granularity - 1); | 1061 | & (lim->discard_granularity - 1); |
1060 | } | 1062 | } |
1061 | 1063 | ||
1062 | static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) | 1064 | static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) |
1063 | { | 1065 | { |
1064 | if (q->limits.discard_zeroes_data == 1) | 1066 | if (q->limits.discard_zeroes_data == 1) |
1065 | return 1; | 1067 | return 1; |
1066 | 1068 | ||
1067 | return 0; | 1069 | return 0; |
1068 | } | 1070 | } |
1069 | 1071 | ||
1070 | static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) | 1072 | static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) |
1071 | { | 1073 | { |
1072 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); | 1074 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); |
1073 | } | 1075 | } |
1074 | 1076 | ||
1075 | static inline int queue_dma_alignment(struct request_queue *q) | 1077 | static inline int queue_dma_alignment(struct request_queue *q) |
1076 | { | 1078 | { |
1077 | return q ? q->dma_alignment : 511; | 1079 | return q ? q->dma_alignment : 511; |
1078 | } | 1080 | } |
1079 | 1081 | ||
1080 | static inline int blk_rq_aligned(struct request_queue *q, void *addr, | 1082 | static inline int blk_rq_aligned(struct request_queue *q, void *addr, |
1081 | unsigned int len) | 1083 | unsigned int len) |
1082 | { | 1084 | { |
1083 | unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; | 1085 | unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; |
1084 | return !((unsigned long)addr & alignment) && !(len & alignment); | 1086 | return !((unsigned long)addr & alignment) && !(len & alignment); |
1085 | } | 1087 | } |
1086 | 1088 | ||
1087 | /* assumes size > 256 */ | 1089 | /* assumes size > 256 */ |
1088 | static inline unsigned int blksize_bits(unsigned int size) | 1090 | static inline unsigned int blksize_bits(unsigned int size) |
1089 | { | 1091 | { |
1090 | unsigned int bits = 8; | 1092 | unsigned int bits = 8; |
1091 | do { | 1093 | do { |
1092 | bits++; | 1094 | bits++; |
1093 | size >>= 1; | 1095 | size >>= 1; |
1094 | } while (size > 256); | 1096 | } while (size > 256); |
1095 | return bits; | 1097 | return bits; |
1096 | } | 1098 | } |
1097 | 1099 | ||
1098 | static inline unsigned int block_size(struct block_device *bdev) | 1100 | static inline unsigned int block_size(struct block_device *bdev) |
1099 | { | 1101 | { |
1100 | return bdev->bd_block_size; | 1102 | return bdev->bd_block_size; |
1101 | } | 1103 | } |
1102 | 1104 | ||
1103 | typedef struct {struct page *v;} Sector; | 1105 | typedef struct {struct page *v;} Sector; |
1104 | 1106 | ||
1105 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); | 1107 | unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); |
1106 | 1108 | ||
1107 | static inline void put_dev_sector(Sector p) | 1109 | static inline void put_dev_sector(Sector p) |
1108 | { | 1110 | { |
1109 | page_cache_release(p.v); | 1111 | page_cache_release(p.v); |
1110 | } | 1112 | } |
1111 | 1113 | ||
1112 | struct work_struct; | 1114 | struct work_struct; |
1113 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1115 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); |
1114 | 1116 | ||
1115 | #ifdef CONFIG_BLK_CGROUP | 1117 | #ifdef CONFIG_BLK_CGROUP |
1116 | /* | 1118 | /* |
1117 | * This should not be using sched_clock(). A real patch is in progress | 1119 | * This should not be using sched_clock(). A real patch is in progress |
1118 | * to fix this up, until that is in place we need to disable preemption | 1120 | * to fix this up, until that is in place we need to disable preemption |
1119 | * around sched_clock() in this function and set_io_start_time_ns(). | 1121 | * around sched_clock() in this function and set_io_start_time_ns(). |
1120 | */ | 1122 | */ |
1121 | static inline void set_start_time_ns(struct request *req) | 1123 | static inline void set_start_time_ns(struct request *req) |
1122 | { | 1124 | { |
1123 | preempt_disable(); | 1125 | preempt_disable(); |
1124 | req->start_time_ns = sched_clock(); | 1126 | req->start_time_ns = sched_clock(); |
1125 | preempt_enable(); | 1127 | preempt_enable(); |
1126 | } | 1128 | } |
1127 | 1129 | ||
1128 | static inline void set_io_start_time_ns(struct request *req) | 1130 | static inline void set_io_start_time_ns(struct request *req) |
1129 | { | 1131 | { |
1130 | preempt_disable(); | 1132 | preempt_disable(); |
1131 | req->io_start_time_ns = sched_clock(); | 1133 | req->io_start_time_ns = sched_clock(); |
1132 | preempt_enable(); | 1134 | preempt_enable(); |
1133 | } | 1135 | } |
1134 | 1136 | ||
1135 | static inline uint64_t rq_start_time_ns(struct request *req) | 1137 | static inline uint64_t rq_start_time_ns(struct request *req) |
1136 | { | 1138 | { |
1137 | return req->start_time_ns; | 1139 | return req->start_time_ns; |
1138 | } | 1140 | } |
1139 | 1141 | ||
1140 | static inline uint64_t rq_io_start_time_ns(struct request *req) | 1142 | static inline uint64_t rq_io_start_time_ns(struct request *req) |
1141 | { | 1143 | { |
1142 | return req->io_start_time_ns; | 1144 | return req->io_start_time_ns; |
1143 | } | 1145 | } |
1144 | #else | 1146 | #else |
1145 | static inline void set_start_time_ns(struct request *req) {} | 1147 | static inline void set_start_time_ns(struct request *req) {} |
1146 | static inline void set_io_start_time_ns(struct request *req) {} | 1148 | static inline void set_io_start_time_ns(struct request *req) {} |
1147 | static inline uint64_t rq_start_time_ns(struct request *req) | 1149 | static inline uint64_t rq_start_time_ns(struct request *req) |
1148 | { | 1150 | { |
1149 | return 0; | 1151 | return 0; |
1150 | } | 1152 | } |
1151 | static inline uint64_t rq_io_start_time_ns(struct request *req) | 1153 | static inline uint64_t rq_io_start_time_ns(struct request *req) |
1152 | { | 1154 | { |
1153 | return 0; | 1155 | return 0; |
1154 | } | 1156 | } |
1155 | #endif | 1157 | #endif |
1156 | 1158 | ||
1157 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 1159 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
1158 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 1160 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
1159 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ | 1161 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ |
1160 | MODULE_ALIAS("block-major-" __stringify(major) "-*") | 1162 | MODULE_ALIAS("block-major-" __stringify(major) "-*") |
1161 | 1163 | ||
1162 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | 1164 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
1163 | 1165 | ||
1164 | #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ | 1166 | #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ |
1165 | #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ | 1167 | #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ |
1166 | 1168 | ||
1167 | struct blk_integrity_exchg { | 1169 | struct blk_integrity_exchg { |
1168 | void *prot_buf; | 1170 | void *prot_buf; |
1169 | void *data_buf; | 1171 | void *data_buf; |
1170 | sector_t sector; | 1172 | sector_t sector; |
1171 | unsigned int data_size; | 1173 | unsigned int data_size; |
1172 | unsigned short sector_size; | 1174 | unsigned short sector_size; |
1173 | const char *disk_name; | 1175 | const char *disk_name; |
1174 | }; | 1176 | }; |
1175 | 1177 | ||
1176 | typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); | 1178 | typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); |
1177 | typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); | 1179 | typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); |
1178 | typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); | 1180 | typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); |
1179 | typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); | 1181 | typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); |
1180 | 1182 | ||
1181 | struct blk_integrity { | 1183 | struct blk_integrity { |
1182 | integrity_gen_fn *generate_fn; | 1184 | integrity_gen_fn *generate_fn; |
1183 | integrity_vrfy_fn *verify_fn; | 1185 | integrity_vrfy_fn *verify_fn; |
1184 | integrity_set_tag_fn *set_tag_fn; | 1186 | integrity_set_tag_fn *set_tag_fn; |
1185 | integrity_get_tag_fn *get_tag_fn; | 1187 | integrity_get_tag_fn *get_tag_fn; |
1186 | 1188 | ||
1187 | unsigned short flags; | 1189 | unsigned short flags; |
1188 | unsigned short tuple_size; | 1190 | unsigned short tuple_size; |
1189 | unsigned short sector_size; | 1191 | unsigned short sector_size; |
1190 | unsigned short tag_size; | 1192 | unsigned short tag_size; |
1191 | 1193 | ||
1192 | const char *name; | 1194 | const char *name; |
1193 | 1195 | ||
1194 | struct kobject kobj; | 1196 | struct kobject kobj; |
1195 | }; | 1197 | }; |
1196 | 1198 | ||
1197 | extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); | 1199 | extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); |
1198 | extern void blk_integrity_unregister(struct gendisk *); | 1200 | extern void blk_integrity_unregister(struct gendisk *); |
1199 | extern int blk_integrity_compare(struct gendisk *, struct gendisk *); | 1201 | extern int blk_integrity_compare(struct gendisk *, struct gendisk *); |
1200 | extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); | 1202 | extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); |
1201 | extern int blk_rq_count_integrity_sg(struct request *); | 1203 | extern int blk_rq_count_integrity_sg(struct request *); |
1202 | 1204 | ||
1203 | static inline | 1205 | static inline |
1204 | struct blk_integrity *bdev_get_integrity(struct block_device *bdev) | 1206 | struct blk_integrity *bdev_get_integrity(struct block_device *bdev) |
1205 | { | 1207 | { |
1206 | return bdev->bd_disk->integrity; | 1208 | return bdev->bd_disk->integrity; |
1207 | } | 1209 | } |
1208 | 1210 | ||
1209 | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) | 1211 | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) |
1210 | { | 1212 | { |
1211 | return disk->integrity; | 1213 | return disk->integrity; |
1212 | } | 1214 | } |
1213 | 1215 | ||
1214 | static inline int blk_integrity_rq(struct request *rq) | 1216 | static inline int blk_integrity_rq(struct request *rq) |
1215 | { | 1217 | { |
1216 | if (rq->bio == NULL) | 1218 | if (rq->bio == NULL) |
1217 | return 0; | 1219 | return 0; |
1218 | 1220 | ||
1219 | return bio_integrity(rq->bio); | 1221 | return bio_integrity(rq->bio); |
1220 | } | 1222 | } |
1221 | 1223 | ||
1222 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | 1224 | #else /* CONFIG_BLK_DEV_INTEGRITY */ |
1223 | 1225 | ||
1224 | #define blk_integrity_rq(rq) (0) | 1226 | #define blk_integrity_rq(rq) (0) |
1225 | #define blk_rq_count_integrity_sg(a) (0) | 1227 | #define blk_rq_count_integrity_sg(a) (0) |
1226 | #define blk_rq_map_integrity_sg(a, b) (0) | 1228 | #define blk_rq_map_integrity_sg(a, b) (0) |
1227 | #define bdev_get_integrity(a) (0) | 1229 | #define bdev_get_integrity(a) (0) |
1228 | #define blk_get_integrity(a) (0) | 1230 | #define blk_get_integrity(a) (0) |
1229 | #define blk_integrity_compare(a, b) (0) | 1231 | #define blk_integrity_compare(a, b) (0) |
1230 | #define blk_integrity_register(a, b) (0) | 1232 | #define blk_integrity_register(a, b) (0) |
1231 | #define blk_integrity_unregister(a) do { } while (0); | 1233 | #define blk_integrity_unregister(a) do { } while (0); |
1232 | 1234 | ||
1233 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | 1235 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
1234 | 1236 | ||
1235 | struct block_device_operations { | 1237 | struct block_device_operations { |
1236 | int (*open) (struct block_device *, fmode_t); | 1238 | int (*open) (struct block_device *, fmode_t); |
1237 | int (*release) (struct gendisk *, fmode_t); | 1239 | int (*release) (struct gendisk *, fmode_t); |
1238 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1240 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1239 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1241 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1240 | int (*direct_access) (struct block_device *, sector_t, | 1242 | int (*direct_access) (struct block_device *, sector_t, |
1241 | void **, unsigned long *); | 1243 | void **, unsigned long *); |
1242 | int (*media_changed) (struct gendisk *); | 1244 | int (*media_changed) (struct gendisk *); |
1243 | void (*unlock_native_capacity) (struct gendisk *); | 1245 | void (*unlock_native_capacity) (struct gendisk *); |
1244 | int (*revalidate_disk) (struct gendisk *); | 1246 | int (*revalidate_disk) (struct gendisk *); |
1245 | int (*getgeo)(struct block_device *, struct hd_geometry *); | 1247 | int (*getgeo)(struct block_device *, struct hd_geometry *); |
1246 | /* this callback is with swap_lock and sometimes page table lock held */ | 1248 | /* this callback is with swap_lock and sometimes page table lock held */ |
1247 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | 1249 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); |
1248 | struct module *owner; | 1250 | struct module *owner; |
1249 | }; | 1251 | }; |
1250 | 1252 | ||
1251 | extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, | 1253 | extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, |
1252 | unsigned long); | 1254 | unsigned long); |
1253 | #else /* CONFIG_BLOCK */ | 1255 | #else /* CONFIG_BLOCK */ |
1254 | /* | 1256 | /* |
1255 | * stubs for when the block layer is configured out | 1257 | * stubs for when the block layer is configured out |
1256 | */ | 1258 | */ |
1257 | #define buffer_heads_over_limit 0 | 1259 | #define buffer_heads_over_limit 0 |
1258 | 1260 | ||
1259 | static inline long nr_blockdev_pages(void) | 1261 | static inline long nr_blockdev_pages(void) |
1260 | { | 1262 | { |
1261 | return 0; | 1263 | return 0; |
1262 | } | 1264 | } |
1263 | 1265 | ||
1264 | #endif /* CONFIG_BLOCK */ | 1266 | #endif /* CONFIG_BLOCK */ |
1265 | 1267 | ||
1266 | #endif | 1268 | #endif |