Commit 6878c32e5cc0e40980abe51d1f02fb453e27493e

Authored by Konrad Rzeszutek Wilk
1 parent 8c9ce606a6

xen/blkfront: Add WARN to deal with misbehaving backends.

Part of the ring structure is the 'id' field which is under
control of the frontend. The frontend stamps it with "some"
value (this some in this implementation being a value less
than BLK_RING_SIZE), and when it gets a response expects
said value to be in the response structure. We have a check
for the id field when spolling new requests but not when
de-spolling responses.

We also add an extra check in add_id_to_freelist to make
sure that the 'struct request' was not NULL - as we cannot
pass a NULL to __blk_end_request_all, otherwise that crashes
(and all the operations that the response is dealing with
end up with __blk_end_request_all).

Lastly we also print the name of the operation that failed.

[v1: s/BUG/WARN/ suggested by Stefano]
[v2: Add extra check in add_id_to_freelist]
[v3: Redid op_name per Jan's suggestion]
[v4: add const * and add WARN on failure returns]
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

Showing 1 changed file with 46 additions and 12 deletions Inline Diff

drivers/block/xen-blkfront.c
1 /* 1 /*
2 * blkfront.c 2 * blkfront.c
3 * 3 *
4 * XenLinux virtual block device driver. 4 * XenLinux virtual block device driver.
5 * 5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach 8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield 9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd 11 * Copyright (c) 2005, XenSource Ltd
12 * 12 *
13 * This program is free software; you can redistribute it and/or 13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2 14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed 15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other 16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license: 17 * software packages, subject to the following license:
18 * 18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy 19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without 20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify, 21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to 23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions: 24 * the following conditions:
25 * 25 *
26 * The above copyright notice and this permission notice shall be included in 26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software. 27 * all copies or substantial portions of the Software.
28 * 28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE. 35 * IN THE SOFTWARE.
36 */ 36 */
37 37
38 #include <linux/interrupt.h> 38 #include <linux/interrupt.h>
39 #include <linux/blkdev.h> 39 #include <linux/blkdev.h>
40 #include <linux/hdreg.h> 40 #include <linux/hdreg.h>
41 #include <linux/cdrom.h> 41 #include <linux/cdrom.h>
42 #include <linux/module.h> 42 #include <linux/module.h>
43 #include <linux/slab.h> 43 #include <linux/slab.h>
44 #include <linux/mutex.h> 44 #include <linux/mutex.h>
45 #include <linux/scatterlist.h> 45 #include <linux/scatterlist.h>
46 #include <linux/bitmap.h> 46 #include <linux/bitmap.h>
47 47
48 #include <xen/xen.h> 48 #include <xen/xen.h>
49 #include <xen/xenbus.h> 49 #include <xen/xenbus.h>
50 #include <xen/grant_table.h> 50 #include <xen/grant_table.h>
51 #include <xen/events.h> 51 #include <xen/events.h>
52 #include <xen/page.h> 52 #include <xen/page.h>
53 #include <xen/platform_pci.h> 53 #include <xen/platform_pci.h>
54 54
55 #include <xen/interface/grant_table.h> 55 #include <xen/interface/grant_table.h>
56 #include <xen/interface/io/blkif.h> 56 #include <xen/interface/io/blkif.h>
57 #include <xen/interface/io/protocols.h> 57 #include <xen/interface/io/protocols.h>
58 58
59 #include <asm/xen/hypervisor.h> 59 #include <asm/xen/hypervisor.h>
60 60
61 enum blkif_state { 61 enum blkif_state {
62 BLKIF_STATE_DISCONNECTED, 62 BLKIF_STATE_DISCONNECTED,
63 BLKIF_STATE_CONNECTED, 63 BLKIF_STATE_CONNECTED,
64 BLKIF_STATE_SUSPENDED, 64 BLKIF_STATE_SUSPENDED,
65 }; 65 };
66 66
67 struct blk_shadow { 67 struct blk_shadow {
68 struct blkif_request req; 68 struct blkif_request req;
69 struct request *request; 69 struct request *request;
70 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 70 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
71 }; 71 };
72 72
73 static DEFINE_MUTEX(blkfront_mutex); 73 static DEFINE_MUTEX(blkfront_mutex);
74 static const struct block_device_operations xlvbd_block_fops; 74 static const struct block_device_operations xlvbd_block_fops;
75 75
76 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 76 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
77 77
78 /* 78 /*
79 * We have one of these per vbd, whether ide, scsi or 'other'. They 79 * We have one of these per vbd, whether ide, scsi or 'other'. They
80 * hang in private_data off the gendisk structure. We may end up 80 * hang in private_data off the gendisk structure. We may end up
81 * putting all kinds of interesting stuff here :-) 81 * putting all kinds of interesting stuff here :-)
82 */ 82 */
83 struct blkfront_info 83 struct blkfront_info
84 { 84 {
85 spinlock_t io_lock; 85 spinlock_t io_lock;
86 struct mutex mutex; 86 struct mutex mutex;
87 struct xenbus_device *xbdev; 87 struct xenbus_device *xbdev;
88 struct gendisk *gd; 88 struct gendisk *gd;
89 int vdevice; 89 int vdevice;
90 blkif_vdev_t handle; 90 blkif_vdev_t handle;
91 enum blkif_state connected; 91 enum blkif_state connected;
92 int ring_ref; 92 int ring_ref;
93 struct blkif_front_ring ring; 93 struct blkif_front_ring ring;
94 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 94 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
95 unsigned int evtchn, irq; 95 unsigned int evtchn, irq;
96 struct request_queue *rq; 96 struct request_queue *rq;
97 struct work_struct work; 97 struct work_struct work;
98 struct gnttab_free_callback callback; 98 struct gnttab_free_callback callback;
99 struct blk_shadow shadow[BLK_RING_SIZE]; 99 struct blk_shadow shadow[BLK_RING_SIZE];
100 unsigned long shadow_free; 100 unsigned long shadow_free;
101 unsigned int feature_flush; 101 unsigned int feature_flush;
102 unsigned int flush_op; 102 unsigned int flush_op;
103 unsigned int feature_discard:1; 103 unsigned int feature_discard:1;
104 unsigned int feature_secdiscard:1; 104 unsigned int feature_secdiscard:1;
105 unsigned int discard_granularity; 105 unsigned int discard_granularity;
106 unsigned int discard_alignment; 106 unsigned int discard_alignment;
107 int is_ready; 107 int is_ready;
108 }; 108 };
109 109
110 static unsigned int nr_minors; 110 static unsigned int nr_minors;
111 static unsigned long *minors; 111 static unsigned long *minors;
112 static DEFINE_SPINLOCK(minor_lock); 112 static DEFINE_SPINLOCK(minor_lock);
113 113
114 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 114 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
115 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 115 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
116 #define GRANT_INVALID_REF 0 116 #define GRANT_INVALID_REF 0
117 117
118 #define PARTS_PER_DISK 16 118 #define PARTS_PER_DISK 16
119 #define PARTS_PER_EXT_DISK 256 119 #define PARTS_PER_EXT_DISK 256
120 120
121 #define BLKIF_MAJOR(dev) ((dev)>>8) 121 #define BLKIF_MAJOR(dev) ((dev)>>8)
122 #define BLKIF_MINOR(dev) ((dev) & 0xff) 122 #define BLKIF_MINOR(dev) ((dev) & 0xff)
123 123
124 #define EXT_SHIFT 28 124 #define EXT_SHIFT 28
125 #define EXTENDED (1<<EXT_SHIFT) 125 #define EXTENDED (1<<EXT_SHIFT)
126 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 126 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
127 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 127 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
128 #define EMULATED_HD_DISK_MINOR_OFFSET (0) 128 #define EMULATED_HD_DISK_MINOR_OFFSET (0)
129 #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) 129 #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
130 #define EMULATED_SD_DISK_MINOR_OFFSET (0) 130 #define EMULATED_SD_DISK_MINOR_OFFSET (0)
131 #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) 131 #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
132 132
133 #define DEV_NAME "xvd" /* name in /dev */ 133 #define DEV_NAME "xvd" /* name in /dev */
134 134
135 static int get_id_from_freelist(struct blkfront_info *info) 135 static int get_id_from_freelist(struct blkfront_info *info)
136 { 136 {
137 unsigned long free = info->shadow_free; 137 unsigned long free = info->shadow_free;
138 BUG_ON(free >= BLK_RING_SIZE); 138 BUG_ON(free >= BLK_RING_SIZE);
139 info->shadow_free = info->shadow[free].req.u.rw.id; 139 info->shadow_free = info->shadow[free].req.u.rw.id;
140 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ 140 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
141 return free; 141 return free;
142 } 142 }
143 143
144 static void add_id_to_freelist(struct blkfront_info *info, 144 static int add_id_to_freelist(struct blkfront_info *info,
145 unsigned long id) 145 unsigned long id)
146 { 146 {
147 if (info->shadow[id].req.u.rw.id != id)
148 return -EINVAL;
149 if (info->shadow[id].request == NULL)
150 return -EINVAL;
147 info->shadow[id].req.u.rw.id = info->shadow_free; 151 info->shadow[id].req.u.rw.id = info->shadow_free;
148 info->shadow[id].request = NULL; 152 info->shadow[id].request = NULL;
149 info->shadow_free = id; 153 info->shadow_free = id;
154 return 0;
150 } 155 }
151 156
157 static const char *op_name(int op)
158 {
159 static const char *const names[] = {
160 [BLKIF_OP_READ] = "read",
161 [BLKIF_OP_WRITE] = "write",
162 [BLKIF_OP_WRITE_BARRIER] = "barrier",
163 [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
164 [BLKIF_OP_DISCARD] = "discard" };
165
166 if (op < 0 || op >= ARRAY_SIZE(names))
167 return "unknown";
168
169 if (!names[op])
170 return "reserved";
171
172 return names[op];
173 }
152 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) 174 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
153 { 175 {
154 unsigned int end = minor + nr; 176 unsigned int end = minor + nr;
155 int rc; 177 int rc;
156 178
157 if (end > nr_minors) { 179 if (end > nr_minors) {
158 unsigned long *bitmap, *old; 180 unsigned long *bitmap, *old;
159 181
160 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap), 182 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
161 GFP_KERNEL); 183 GFP_KERNEL);
162 if (bitmap == NULL) 184 if (bitmap == NULL)
163 return -ENOMEM; 185 return -ENOMEM;
164 186
165 spin_lock(&minor_lock); 187 spin_lock(&minor_lock);
166 if (end > nr_minors) { 188 if (end > nr_minors) {
167 old = minors; 189 old = minors;
168 memcpy(bitmap, minors, 190 memcpy(bitmap, minors,
169 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); 191 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
170 minors = bitmap; 192 minors = bitmap;
171 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; 193 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
172 } else 194 } else
173 old = bitmap; 195 old = bitmap;
174 spin_unlock(&minor_lock); 196 spin_unlock(&minor_lock);
175 kfree(old); 197 kfree(old);
176 } 198 }
177 199
178 spin_lock(&minor_lock); 200 spin_lock(&minor_lock);
179 if (find_next_bit(minors, end, minor) >= end) { 201 if (find_next_bit(minors, end, minor) >= end) {
180 bitmap_set(minors, minor, nr); 202 bitmap_set(minors, minor, nr);
181 rc = 0; 203 rc = 0;
182 } else 204 } else
183 rc = -EBUSY; 205 rc = -EBUSY;
184 spin_unlock(&minor_lock); 206 spin_unlock(&minor_lock);
185 207
186 return rc; 208 return rc;
187 } 209 }
188 210
189 static void xlbd_release_minors(unsigned int minor, unsigned int nr) 211 static void xlbd_release_minors(unsigned int minor, unsigned int nr)
190 { 212 {
191 unsigned int end = minor + nr; 213 unsigned int end = minor + nr;
192 214
193 BUG_ON(end > nr_minors); 215 BUG_ON(end > nr_minors);
194 spin_lock(&minor_lock); 216 spin_lock(&minor_lock);
195 bitmap_clear(minors, minor, nr); 217 bitmap_clear(minors, minor, nr);
196 spin_unlock(&minor_lock); 218 spin_unlock(&minor_lock);
197 } 219 }
198 220
199 static void blkif_restart_queue_callback(void *arg) 221 static void blkif_restart_queue_callback(void *arg)
200 { 222 {
201 struct blkfront_info *info = (struct blkfront_info *)arg; 223 struct blkfront_info *info = (struct blkfront_info *)arg;
202 schedule_work(&info->work); 224 schedule_work(&info->work);
203 } 225 }
204 226
205 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) 227 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
206 { 228 {
207 /* We don't have real geometry info, but let's at least return 229 /* We don't have real geometry info, but let's at least return
208 values consistent with the size of the device */ 230 values consistent with the size of the device */
209 sector_t nsect = get_capacity(bd->bd_disk); 231 sector_t nsect = get_capacity(bd->bd_disk);
210 sector_t cylinders = nsect; 232 sector_t cylinders = nsect;
211 233
212 hg->heads = 0xff; 234 hg->heads = 0xff;
213 hg->sectors = 0x3f; 235 hg->sectors = 0x3f;
214 sector_div(cylinders, hg->heads * hg->sectors); 236 sector_div(cylinders, hg->heads * hg->sectors);
215 hg->cylinders = cylinders; 237 hg->cylinders = cylinders;
216 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) 238 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
217 hg->cylinders = 0xffff; 239 hg->cylinders = 0xffff;
218 return 0; 240 return 0;
219 } 241 }
220 242
221 static int blkif_ioctl(struct block_device *bdev, fmode_t mode, 243 static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
222 unsigned command, unsigned long argument) 244 unsigned command, unsigned long argument)
223 { 245 {
224 struct blkfront_info *info = bdev->bd_disk->private_data; 246 struct blkfront_info *info = bdev->bd_disk->private_data;
225 int i; 247 int i;
226 248
227 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", 249 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
228 command, (long)argument); 250 command, (long)argument);
229 251
230 switch (command) { 252 switch (command) {
231 case CDROMMULTISESSION: 253 case CDROMMULTISESSION:
232 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); 254 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
233 for (i = 0; i < sizeof(struct cdrom_multisession); i++) 255 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
234 if (put_user(0, (char __user *)(argument + i))) 256 if (put_user(0, (char __user *)(argument + i)))
235 return -EFAULT; 257 return -EFAULT;
236 return 0; 258 return 0;
237 259
238 case CDROM_GET_CAPABILITY: { 260 case CDROM_GET_CAPABILITY: {
239 struct gendisk *gd = info->gd; 261 struct gendisk *gd = info->gd;
240 if (gd->flags & GENHD_FL_CD) 262 if (gd->flags & GENHD_FL_CD)
241 return 0; 263 return 0;
242 return -EINVAL; 264 return -EINVAL;
243 } 265 }
244 266
245 default: 267 default:
246 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 268 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
247 command);*/ 269 command);*/
248 return -EINVAL; /* same return as native Linux */ 270 return -EINVAL; /* same return as native Linux */
249 } 271 }
250 272
251 return 0; 273 return 0;
252 } 274 }
253 275
254 /* 276 /*
255 * Generate a Xen blkfront IO request from a blk layer request. Reads 277 * Generate a Xen blkfront IO request from a blk layer request. Reads
256 * and writes are handled as expected. 278 * and writes are handled as expected.
257 * 279 *
258 * @req: a request struct 280 * @req: a request struct
259 */ 281 */
260 static int blkif_queue_request(struct request *req) 282 static int blkif_queue_request(struct request *req)
261 { 283 {
262 struct blkfront_info *info = req->rq_disk->private_data; 284 struct blkfront_info *info = req->rq_disk->private_data;
263 unsigned long buffer_mfn; 285 unsigned long buffer_mfn;
264 struct blkif_request *ring_req; 286 struct blkif_request *ring_req;
265 unsigned long id; 287 unsigned long id;
266 unsigned int fsect, lsect; 288 unsigned int fsect, lsect;
267 int i, ref; 289 int i, ref;
268 grant_ref_t gref_head; 290 grant_ref_t gref_head;
269 struct scatterlist *sg; 291 struct scatterlist *sg;
270 292
271 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 293 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
272 return 1; 294 return 1;
273 295
274 if (gnttab_alloc_grant_references( 296 if (gnttab_alloc_grant_references(
275 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 297 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
276 gnttab_request_free_callback( 298 gnttab_request_free_callback(
277 &info->callback, 299 &info->callback,
278 blkif_restart_queue_callback, 300 blkif_restart_queue_callback,
279 info, 301 info,
280 BLKIF_MAX_SEGMENTS_PER_REQUEST); 302 BLKIF_MAX_SEGMENTS_PER_REQUEST);
281 return 1; 303 return 1;
282 } 304 }
283 305
284 /* Fill out a communications ring structure. */ 306 /* Fill out a communications ring structure. */
285 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 307 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
286 id = get_id_from_freelist(info); 308 id = get_id_from_freelist(info);
287 info->shadow[id].request = req; 309 info->shadow[id].request = req;
288 310
289 ring_req->u.rw.id = id; 311 ring_req->u.rw.id = id;
290 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 312 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
291 ring_req->u.rw.handle = info->handle; 313 ring_req->u.rw.handle = info->handle;
292 314
293 ring_req->operation = rq_data_dir(req) ? 315 ring_req->operation = rq_data_dir(req) ?
294 BLKIF_OP_WRITE : BLKIF_OP_READ; 316 BLKIF_OP_WRITE : BLKIF_OP_READ;
295 317
296 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { 318 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
297 /* 319 /*
298 * Ideally we can do an unordered flush-to-disk. In case the 320 * Ideally we can do an unordered flush-to-disk. In case the
299 * backend onlysupports barriers, use that. A barrier request 321 * backend onlysupports barriers, use that. A barrier request
300 * a superset of FUA, so we can implement it the same 322 * a superset of FUA, so we can implement it the same
301 * way. (It's also a FLUSH+FUA, since it is 323 * way. (It's also a FLUSH+FUA, since it is
302 * guaranteed ordered WRT previous writes.) 324 * guaranteed ordered WRT previous writes.)
303 */ 325 */
304 ring_req->operation = info->flush_op; 326 ring_req->operation = info->flush_op;
305 } 327 }
306 328
307 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { 329 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
308 /* id, sector_number and handle are set above. */ 330 /* id, sector_number and handle are set above. */
309 ring_req->operation = BLKIF_OP_DISCARD; 331 ring_req->operation = BLKIF_OP_DISCARD;
310 ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 332 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
311 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) 333 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
312 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; 334 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
313 else 335 else
314 ring_req->u.discard.flag = 0; 336 ring_req->u.discard.flag = 0;
315 } else { 337 } else {
316 ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, 338 ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
317 info->sg); 339 info->sg);
318 BUG_ON(ring_req->u.rw.nr_segments > 340 BUG_ON(ring_req->u.rw.nr_segments >
319 BLKIF_MAX_SEGMENTS_PER_REQUEST); 341 BLKIF_MAX_SEGMENTS_PER_REQUEST);
320 342
321 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { 343 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
322 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 344 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
323 fsect = sg->offset >> 9; 345 fsect = sg->offset >> 9;
324 lsect = fsect + (sg->length >> 9) - 1; 346 lsect = fsect + (sg->length >> 9) - 1;
325 /* install a grant reference. */ 347 /* install a grant reference. */
326 ref = gnttab_claim_grant_reference(&gref_head); 348 ref = gnttab_claim_grant_reference(&gref_head);
327 BUG_ON(ref == -ENOSPC); 349 BUG_ON(ref == -ENOSPC);
328 350
329 gnttab_grant_foreign_access_ref( 351 gnttab_grant_foreign_access_ref(
330 ref, 352 ref,
331 info->xbdev->otherend_id, 353 info->xbdev->otherend_id,
332 buffer_mfn, 354 buffer_mfn,
333 rq_data_dir(req)); 355 rq_data_dir(req));
334 356
335 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 357 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
336 ring_req->u.rw.seg[i] = 358 ring_req->u.rw.seg[i] =
337 (struct blkif_request_segment) { 359 (struct blkif_request_segment) {
338 .gref = ref, 360 .gref = ref,
339 .first_sect = fsect, 361 .first_sect = fsect,
340 .last_sect = lsect }; 362 .last_sect = lsect };
341 } 363 }
342 } 364 }
343 365
344 info->ring.req_prod_pvt++; 366 info->ring.req_prod_pvt++;
345 367
346 /* Keep a private copy so we can reissue requests when recovering. */ 368 /* Keep a private copy so we can reissue requests when recovering. */
347 info->shadow[id].req = *ring_req; 369 info->shadow[id].req = *ring_req;
348 370
349 gnttab_free_grant_references(gref_head); 371 gnttab_free_grant_references(gref_head);
350 372
351 return 0; 373 return 0;
352 } 374 }
353 375
354 376
355 static inline void flush_requests(struct blkfront_info *info) 377 static inline void flush_requests(struct blkfront_info *info)
356 { 378 {
357 int notify; 379 int notify;
358 380
359 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 381 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
360 382
361 if (notify) 383 if (notify)
362 notify_remote_via_irq(info->irq); 384 notify_remote_via_irq(info->irq);
363 } 385 }
364 386
365 /* 387 /*
366 * do_blkif_request 388 * do_blkif_request
367 * read a block; request is in a request queue 389 * read a block; request is in a request queue
368 */ 390 */
369 static void do_blkif_request(struct request_queue *rq) 391 static void do_blkif_request(struct request_queue *rq)
370 { 392 {
371 struct blkfront_info *info = NULL; 393 struct blkfront_info *info = NULL;
372 struct request *req; 394 struct request *req;
373 int queued; 395 int queued;
374 396
375 pr_debug("Entered do_blkif_request\n"); 397 pr_debug("Entered do_blkif_request\n");
376 398
377 queued = 0; 399 queued = 0;
378 400
379 while ((req = blk_peek_request(rq)) != NULL) { 401 while ((req = blk_peek_request(rq)) != NULL) {
380 info = req->rq_disk->private_data; 402 info = req->rq_disk->private_data;
381 403
382 if (RING_FULL(&info->ring)) 404 if (RING_FULL(&info->ring))
383 goto wait; 405 goto wait;
384 406
385 blk_start_request(req); 407 blk_start_request(req);
386 408
387 if ((req->cmd_type != REQ_TYPE_FS) || 409 if ((req->cmd_type != REQ_TYPE_FS) ||
388 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && 410 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
389 !info->flush_op)) { 411 !info->flush_op)) {
390 __blk_end_request_all(req, -EIO); 412 __blk_end_request_all(req, -EIO);
391 continue; 413 continue;
392 } 414 }
393 415
394 pr_debug("do_blk_req %p: cmd %p, sec %lx, " 416 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
395 "(%u/%u) buffer:%p [%s]\n", 417 "(%u/%u) buffer:%p [%s]\n",
396 req, req->cmd, (unsigned long)blk_rq_pos(req), 418 req, req->cmd, (unsigned long)blk_rq_pos(req),
397 blk_rq_cur_sectors(req), blk_rq_sectors(req), 419 blk_rq_cur_sectors(req), blk_rq_sectors(req),
398 req->buffer, rq_data_dir(req) ? "write" : "read"); 420 req->buffer, rq_data_dir(req) ? "write" : "read");
399 421
400 if (blkif_queue_request(req)) { 422 if (blkif_queue_request(req)) {
401 blk_requeue_request(rq, req); 423 blk_requeue_request(rq, req);
402 wait: 424 wait:
403 /* Avoid pointless unplugs. */ 425 /* Avoid pointless unplugs. */
404 blk_stop_queue(rq); 426 blk_stop_queue(rq);
405 break; 427 break;
406 } 428 }
407 429
408 queued++; 430 queued++;
409 } 431 }
410 432
411 if (queued != 0) 433 if (queued != 0)
412 flush_requests(info); 434 flush_requests(info);
413 } 435 }
414 436
415 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 437 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
416 { 438 {
417 struct request_queue *rq; 439 struct request_queue *rq;
418 struct blkfront_info *info = gd->private_data; 440 struct blkfront_info *info = gd->private_data;
419 441
420 rq = blk_init_queue(do_blkif_request, &info->io_lock); 442 rq = blk_init_queue(do_blkif_request, &info->io_lock);
421 if (rq == NULL) 443 if (rq == NULL)
422 return -1; 444 return -1;
423 445
424 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 446 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
425 447
426 if (info->feature_discard) { 448 if (info->feature_discard) {
427 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); 449 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
428 blk_queue_max_discard_sectors(rq, get_capacity(gd)); 450 blk_queue_max_discard_sectors(rq, get_capacity(gd));
429 rq->limits.discard_granularity = info->discard_granularity; 451 rq->limits.discard_granularity = info->discard_granularity;
430 rq->limits.discard_alignment = info->discard_alignment; 452 rq->limits.discard_alignment = info->discard_alignment;
431 if (info->feature_secdiscard) 453 if (info->feature_secdiscard)
432 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); 454 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
433 } 455 }
434 456
435 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 457 /* Hard sector size and max sectors impersonate the equiv. hardware. */
436 blk_queue_logical_block_size(rq, sector_size); 458 blk_queue_logical_block_size(rq, sector_size);
437 blk_queue_max_hw_sectors(rq, 512); 459 blk_queue_max_hw_sectors(rq, 512);
438 460
439 /* Each segment in a request is up to an aligned page in size. */ 461 /* Each segment in a request is up to an aligned page in size. */
440 blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 462 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
441 blk_queue_max_segment_size(rq, PAGE_SIZE); 463 blk_queue_max_segment_size(rq, PAGE_SIZE);
442 464
443 /* Ensure a merged request will fit in a single I/O ring slot. */ 465 /* Ensure a merged request will fit in a single I/O ring slot. */
444 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); 466 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
445 467
446 /* Make sure buffer addresses are sector-aligned. */ 468 /* Make sure buffer addresses are sector-aligned. */
447 blk_queue_dma_alignment(rq, 511); 469 blk_queue_dma_alignment(rq, 511);
448 470
449 /* Make sure we don't use bounce buffers. */ 471 /* Make sure we don't use bounce buffers. */
450 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); 472 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
451 473
452 gd->queue = rq; 474 gd->queue = rq;
453 475
454 return 0; 476 return 0;
455 } 477 }
456 478
457 479
458 static void xlvbd_flush(struct blkfront_info *info) 480 static void xlvbd_flush(struct blkfront_info *info)
459 { 481 {
460 blk_queue_flush(info->rq, info->feature_flush); 482 blk_queue_flush(info->rq, info->feature_flush);
461 printk(KERN_INFO "blkfront: %s: %s: %s\n", 483 printk(KERN_INFO "blkfront: %s: %s: %s\n",
462 info->gd->disk_name, 484 info->gd->disk_name,
463 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 485 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
464 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? 486 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
465 "flush diskcache" : "barrier or flush"), 487 "flush diskcache" : "barrier or flush"),
466 info->feature_flush ? "enabled" : "disabled"); 488 info->feature_flush ? "enabled" : "disabled");
467 } 489 }
468 490
469 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) 491 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
470 { 492 {
471 int major; 493 int major;
472 major = BLKIF_MAJOR(vdevice); 494 major = BLKIF_MAJOR(vdevice);
473 *minor = BLKIF_MINOR(vdevice); 495 *minor = BLKIF_MINOR(vdevice);
474 switch (major) { 496 switch (major) {
475 case XEN_IDE0_MAJOR: 497 case XEN_IDE0_MAJOR:
476 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; 498 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
477 *minor = ((*minor / 64) * PARTS_PER_DISK) + 499 *minor = ((*minor / 64) * PARTS_PER_DISK) +
478 EMULATED_HD_DISK_MINOR_OFFSET; 500 EMULATED_HD_DISK_MINOR_OFFSET;
479 break; 501 break;
480 case XEN_IDE1_MAJOR: 502 case XEN_IDE1_MAJOR:
481 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; 503 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
482 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + 504 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
483 EMULATED_HD_DISK_MINOR_OFFSET; 505 EMULATED_HD_DISK_MINOR_OFFSET;
484 break; 506 break;
485 case XEN_SCSI_DISK0_MAJOR: 507 case XEN_SCSI_DISK0_MAJOR:
486 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; 508 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
487 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; 509 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
488 break; 510 break;
489 case XEN_SCSI_DISK1_MAJOR: 511 case XEN_SCSI_DISK1_MAJOR:
490 case XEN_SCSI_DISK2_MAJOR: 512 case XEN_SCSI_DISK2_MAJOR:
491 case XEN_SCSI_DISK3_MAJOR: 513 case XEN_SCSI_DISK3_MAJOR:
492 case XEN_SCSI_DISK4_MAJOR: 514 case XEN_SCSI_DISK4_MAJOR:
493 case XEN_SCSI_DISK5_MAJOR: 515 case XEN_SCSI_DISK5_MAJOR:
494 case XEN_SCSI_DISK6_MAJOR: 516 case XEN_SCSI_DISK6_MAJOR:
495 case XEN_SCSI_DISK7_MAJOR: 517 case XEN_SCSI_DISK7_MAJOR:
496 *offset = (*minor / PARTS_PER_DISK) + 518 *offset = (*minor / PARTS_PER_DISK) +
497 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + 519 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
498 EMULATED_SD_DISK_NAME_OFFSET; 520 EMULATED_SD_DISK_NAME_OFFSET;
499 *minor = *minor + 521 *minor = *minor +
500 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + 522 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
501 EMULATED_SD_DISK_MINOR_OFFSET; 523 EMULATED_SD_DISK_MINOR_OFFSET;
502 break; 524 break;
503 case XEN_SCSI_DISK8_MAJOR: 525 case XEN_SCSI_DISK8_MAJOR:
504 case XEN_SCSI_DISK9_MAJOR: 526 case XEN_SCSI_DISK9_MAJOR:
505 case XEN_SCSI_DISK10_MAJOR: 527 case XEN_SCSI_DISK10_MAJOR:
506 case XEN_SCSI_DISK11_MAJOR: 528 case XEN_SCSI_DISK11_MAJOR:
507 case XEN_SCSI_DISK12_MAJOR: 529 case XEN_SCSI_DISK12_MAJOR:
508 case XEN_SCSI_DISK13_MAJOR: 530 case XEN_SCSI_DISK13_MAJOR:
509 case XEN_SCSI_DISK14_MAJOR: 531 case XEN_SCSI_DISK14_MAJOR:
510 case XEN_SCSI_DISK15_MAJOR: 532 case XEN_SCSI_DISK15_MAJOR:
511 *offset = (*minor / PARTS_PER_DISK) + 533 *offset = (*minor / PARTS_PER_DISK) +
512 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + 534 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
513 EMULATED_SD_DISK_NAME_OFFSET; 535 EMULATED_SD_DISK_NAME_OFFSET;
514 *minor = *minor + 536 *minor = *minor +
515 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + 537 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
516 EMULATED_SD_DISK_MINOR_OFFSET; 538 EMULATED_SD_DISK_MINOR_OFFSET;
517 break; 539 break;
518 case XENVBD_MAJOR: 540 case XENVBD_MAJOR:
519 *offset = *minor / PARTS_PER_DISK; 541 *offset = *minor / PARTS_PER_DISK;
520 break; 542 break;
521 default: 543 default:
522 printk(KERN_WARNING "blkfront: your disk configuration is " 544 printk(KERN_WARNING "blkfront: your disk configuration is "
523 "incorrect, please use an xvd device instead\n"); 545 "incorrect, please use an xvd device instead\n");
524 return -ENODEV; 546 return -ENODEV;
525 } 547 }
526 return 0; 548 return 0;
527 } 549 }
528 550
529 static char *encode_disk_name(char *ptr, unsigned int n) 551 static char *encode_disk_name(char *ptr, unsigned int n)
530 { 552 {
531 if (n >= 26) 553 if (n >= 26)
532 ptr = encode_disk_name(ptr, n / 26 - 1); 554 ptr = encode_disk_name(ptr, n / 26 - 1);
533 *ptr = 'a' + n % 26; 555 *ptr = 'a' + n % 26;
534 return ptr + 1; 556 return ptr + 1;
535 } 557 }
536 558
537 static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 559 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
538 struct blkfront_info *info, 560 struct blkfront_info *info,
539 u16 vdisk_info, u16 sector_size) 561 u16 vdisk_info, u16 sector_size)
540 { 562 {
541 struct gendisk *gd; 563 struct gendisk *gd;
542 int nr_minors = 1; 564 int nr_minors = 1;
543 int err; 565 int err;
544 unsigned int offset; 566 unsigned int offset;
545 int minor; 567 int minor;
546 int nr_parts; 568 int nr_parts;
547 char *ptr; 569 char *ptr;
548 570
549 BUG_ON(info->gd != NULL); 571 BUG_ON(info->gd != NULL);
550 BUG_ON(info->rq != NULL); 572 BUG_ON(info->rq != NULL);
551 573
552 if ((info->vdevice>>EXT_SHIFT) > 1) { 574 if ((info->vdevice>>EXT_SHIFT) > 1) {
553 /* this is above the extended range; something is wrong */ 575 /* this is above the extended range; something is wrong */
554 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); 576 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
555 return -ENODEV; 577 return -ENODEV;
556 } 578 }
557 579
558 if (!VDEV_IS_EXTENDED(info->vdevice)) { 580 if (!VDEV_IS_EXTENDED(info->vdevice)) {
559 err = xen_translate_vdev(info->vdevice, &minor, &offset); 581 err = xen_translate_vdev(info->vdevice, &minor, &offset);
560 if (err) 582 if (err)
561 return err; 583 return err;
562 nr_parts = PARTS_PER_DISK; 584 nr_parts = PARTS_PER_DISK;
563 } else { 585 } else {
564 minor = BLKIF_MINOR_EXT(info->vdevice); 586 minor = BLKIF_MINOR_EXT(info->vdevice);
565 nr_parts = PARTS_PER_EXT_DISK; 587 nr_parts = PARTS_PER_EXT_DISK;
566 offset = minor / nr_parts; 588 offset = minor / nr_parts;
567 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) 589 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
568 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " 590 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
569 "emulated IDE disks,\n\t choose an xvd device name" 591 "emulated IDE disks,\n\t choose an xvd device name"
570 "from xvde on\n", info->vdevice); 592 "from xvde on\n", info->vdevice);
571 } 593 }
572 if (minor >> MINORBITS) { 594 if (minor >> MINORBITS) {
573 pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n", 595 pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n",
574 info->vdevice, minor); 596 info->vdevice, minor);
575 return -ENODEV; 597 return -ENODEV;
576 } 598 }
577 599
578 if ((minor % nr_parts) == 0) 600 if ((minor % nr_parts) == 0)
579 nr_minors = nr_parts; 601 nr_minors = nr_parts;
580 602
581 err = xlbd_reserve_minors(minor, nr_minors); 603 err = xlbd_reserve_minors(minor, nr_minors);
582 if (err) 604 if (err)
583 goto out; 605 goto out;
584 err = -ENODEV; 606 err = -ENODEV;
585 607
586 gd = alloc_disk(nr_minors); 608 gd = alloc_disk(nr_minors);
587 if (gd == NULL) 609 if (gd == NULL)
588 goto release; 610 goto release;
589 611
590 strcpy(gd->disk_name, DEV_NAME); 612 strcpy(gd->disk_name, DEV_NAME);
591 ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); 613 ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
592 BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN); 614 BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN);
593 if (nr_minors > 1) 615 if (nr_minors > 1)
594 *ptr = 0; 616 *ptr = 0;
595 else 617 else
596 snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr, 618 snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr,
597 "%d", minor & (nr_parts - 1)); 619 "%d", minor & (nr_parts - 1));
598 620
599 gd->major = XENVBD_MAJOR; 621 gd->major = XENVBD_MAJOR;
600 gd->first_minor = minor; 622 gd->first_minor = minor;
601 gd->fops = &xlvbd_block_fops; 623 gd->fops = &xlvbd_block_fops;
602 gd->private_data = info; 624 gd->private_data = info;
603 gd->driverfs_dev = &(info->xbdev->dev); 625 gd->driverfs_dev = &(info->xbdev->dev);
604 set_capacity(gd, capacity); 626 set_capacity(gd, capacity);
605 627
606 if (xlvbd_init_blk_queue(gd, sector_size)) { 628 if (xlvbd_init_blk_queue(gd, sector_size)) {
607 del_gendisk(gd); 629 del_gendisk(gd);
608 goto release; 630 goto release;
609 } 631 }
610 632
611 info->rq = gd->queue; 633 info->rq = gd->queue;
612 info->gd = gd; 634 info->gd = gd;
613 635
614 xlvbd_flush(info); 636 xlvbd_flush(info);
615 637
616 if (vdisk_info & VDISK_READONLY) 638 if (vdisk_info & VDISK_READONLY)
617 set_disk_ro(gd, 1); 639 set_disk_ro(gd, 1);
618 640
619 if (vdisk_info & VDISK_REMOVABLE) 641 if (vdisk_info & VDISK_REMOVABLE)
620 gd->flags |= GENHD_FL_REMOVABLE; 642 gd->flags |= GENHD_FL_REMOVABLE;
621 643
622 if (vdisk_info & VDISK_CDROM) 644 if (vdisk_info & VDISK_CDROM)
623 gd->flags |= GENHD_FL_CD; 645 gd->flags |= GENHD_FL_CD;
624 646
625 return 0; 647 return 0;
626 648
627 release: 649 release:
628 xlbd_release_minors(minor, nr_minors); 650 xlbd_release_minors(minor, nr_minors);
629 out: 651 out:
630 return err; 652 return err;
631 } 653 }
632 654
633 static void xlvbd_release_gendisk(struct blkfront_info *info) 655 static void xlvbd_release_gendisk(struct blkfront_info *info)
634 { 656 {
635 unsigned int minor, nr_minors; 657 unsigned int minor, nr_minors;
636 unsigned long flags; 658 unsigned long flags;
637 659
638 if (info->rq == NULL) 660 if (info->rq == NULL)
639 return; 661 return;
640 662
641 spin_lock_irqsave(&info->io_lock, flags); 663 spin_lock_irqsave(&info->io_lock, flags);
642 664
643 /* No more blkif_request(). */ 665 /* No more blkif_request(). */
644 blk_stop_queue(info->rq); 666 blk_stop_queue(info->rq);
645 667
646 /* No more gnttab callback work. */ 668 /* No more gnttab callback work. */
647 gnttab_cancel_free_callback(&info->callback); 669 gnttab_cancel_free_callback(&info->callback);
648 spin_unlock_irqrestore(&info->io_lock, flags); 670 spin_unlock_irqrestore(&info->io_lock, flags);
649 671
650 /* Flush gnttab callback work. Must be done with no locks held. */ 672 /* Flush gnttab callback work. Must be done with no locks held. */
651 flush_work_sync(&info->work); 673 flush_work_sync(&info->work);
652 674
653 del_gendisk(info->gd); 675 del_gendisk(info->gd);
654 676
655 minor = info->gd->first_minor; 677 minor = info->gd->first_minor;
656 nr_minors = info->gd->minors; 678 nr_minors = info->gd->minors;
657 xlbd_release_minors(minor, nr_minors); 679 xlbd_release_minors(minor, nr_minors);
658 680
659 blk_cleanup_queue(info->rq); 681 blk_cleanup_queue(info->rq);
660 info->rq = NULL; 682 info->rq = NULL;
661 683
662 put_disk(info->gd); 684 put_disk(info->gd);
663 info->gd = NULL; 685 info->gd = NULL;
664 } 686 }
665 687
666 static void kick_pending_request_queues(struct blkfront_info *info) 688 static void kick_pending_request_queues(struct blkfront_info *info)
667 { 689 {
668 if (!RING_FULL(&info->ring)) { 690 if (!RING_FULL(&info->ring)) {
669 /* Re-enable calldowns. */ 691 /* Re-enable calldowns. */
670 blk_start_queue(info->rq); 692 blk_start_queue(info->rq);
671 /* Kick things off immediately. */ 693 /* Kick things off immediately. */
672 do_blkif_request(info->rq); 694 do_blkif_request(info->rq);
673 } 695 }
674 } 696 }
675 697
676 static void blkif_restart_queue(struct work_struct *work) 698 static void blkif_restart_queue(struct work_struct *work)
677 { 699 {
678 struct blkfront_info *info = container_of(work, struct blkfront_info, work); 700 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
679 701
680 spin_lock_irq(&info->io_lock); 702 spin_lock_irq(&info->io_lock);
681 if (info->connected == BLKIF_STATE_CONNECTED) 703 if (info->connected == BLKIF_STATE_CONNECTED)
682 kick_pending_request_queues(info); 704 kick_pending_request_queues(info);
683 spin_unlock_irq(&info->io_lock); 705 spin_unlock_irq(&info->io_lock);
684 } 706 }
685 707
686 static void blkif_free(struct blkfront_info *info, int suspend) 708 static void blkif_free(struct blkfront_info *info, int suspend)
687 { 709 {
688 /* Prevent new requests being issued until we fix things up. */ 710 /* Prevent new requests being issued until we fix things up. */
689 spin_lock_irq(&info->io_lock); 711 spin_lock_irq(&info->io_lock);
690 info->connected = suspend ? 712 info->connected = suspend ?
691 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 713 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
692 /* No more blkif_request(). */ 714 /* No more blkif_request(). */
693 if (info->rq) 715 if (info->rq)
694 blk_stop_queue(info->rq); 716 blk_stop_queue(info->rq);
695 /* No more gnttab callback work. */ 717 /* No more gnttab callback work. */
696 gnttab_cancel_free_callback(&info->callback); 718 gnttab_cancel_free_callback(&info->callback);
697 spin_unlock_irq(&info->io_lock); 719 spin_unlock_irq(&info->io_lock);
698 720
699 /* Flush gnttab callback work. Must be done with no locks held. */ 721 /* Flush gnttab callback work. Must be done with no locks held. */
700 flush_work_sync(&info->work); 722 flush_work_sync(&info->work);
701 723
702 /* Free resources associated with old device channel. */ 724 /* Free resources associated with old device channel. */
703 if (info->ring_ref != GRANT_INVALID_REF) { 725 if (info->ring_ref != GRANT_INVALID_REF) {
704 gnttab_end_foreign_access(info->ring_ref, 0, 726 gnttab_end_foreign_access(info->ring_ref, 0,
705 (unsigned long)info->ring.sring); 727 (unsigned long)info->ring.sring);
706 info->ring_ref = GRANT_INVALID_REF; 728 info->ring_ref = GRANT_INVALID_REF;
707 info->ring.sring = NULL; 729 info->ring.sring = NULL;
708 } 730 }
709 if (info->irq) 731 if (info->irq)
710 unbind_from_irqhandler(info->irq, info); 732 unbind_from_irqhandler(info->irq, info);
711 info->evtchn = info->irq = 0; 733 info->evtchn = info->irq = 0;
712 734
713 } 735 }
714 736
715 static void blkif_completion(struct blk_shadow *s) 737 static void blkif_completion(struct blk_shadow *s)
716 { 738 {
717 int i; 739 int i;
718 /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place 740 /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
719 * flag. */ 741 * flag. */
720 for (i = 0; i < s->req.u.rw.nr_segments; i++) 742 for (i = 0; i < s->req.u.rw.nr_segments; i++)
721 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); 743 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
722 } 744 }
723 745
724 static irqreturn_t blkif_interrupt(int irq, void *dev_id) 746 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
725 { 747 {
726 struct request *req; 748 struct request *req;
727 struct blkif_response *bret; 749 struct blkif_response *bret;
728 RING_IDX i, rp; 750 RING_IDX i, rp;
729 unsigned long flags; 751 unsigned long flags;
730 struct blkfront_info *info = (struct blkfront_info *)dev_id; 752 struct blkfront_info *info = (struct blkfront_info *)dev_id;
731 int error; 753 int error;
732 754
733 spin_lock_irqsave(&info->io_lock, flags); 755 spin_lock_irqsave(&info->io_lock, flags);
734 756
735 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 757 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
736 spin_unlock_irqrestore(&info->io_lock, flags); 758 spin_unlock_irqrestore(&info->io_lock, flags);
737 return IRQ_HANDLED; 759 return IRQ_HANDLED;
738 } 760 }
739 761
740 again: 762 again:
741 rp = info->ring.sring->rsp_prod; 763 rp = info->ring.sring->rsp_prod;
742 rmb(); /* Ensure we see queued responses up to 'rp'. */ 764 rmb(); /* Ensure we see queued responses up to 'rp'. */
743 765
744 for (i = info->ring.rsp_cons; i != rp; i++) { 766 for (i = info->ring.rsp_cons; i != rp; i++) {
745 unsigned long id; 767 unsigned long id;
746 768
747 bret = RING_GET_RESPONSE(&info->ring, i); 769 bret = RING_GET_RESPONSE(&info->ring, i);
748 id = bret->id; 770 id = bret->id;
771 /*
772 * The backend has messed up and given us an id that we would
773 * never have given to it (we stamp it up to BLK_RING_SIZE -
774 * look in get_id_from_freelist.
775 */
776 if (id >= BLK_RING_SIZE) {
777 WARN(1, "%s: response to %s has incorrect id (%ld)\n",
778 info->gd->disk_name, op_name(bret->operation), id);
779 /* We can't safely get the 'struct request' as
780 * the id is busted. */
781 continue;
782 }
749 req = info->shadow[id].request; 783 req = info->shadow[id].request;
750 784
751 if (bret->operation != BLKIF_OP_DISCARD) 785 if (bret->operation != BLKIF_OP_DISCARD)
752 blkif_completion(&info->shadow[id]); 786 blkif_completion(&info->shadow[id]);
753 787
754 add_id_to_freelist(info, id); 788 if (add_id_to_freelist(info, id)) {
789 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
790 info->gd->disk_name, op_name(bret->operation), id);
791 continue;
792 }
755 793
756 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 794 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
757 switch (bret->operation) { 795 switch (bret->operation) {
758 case BLKIF_OP_DISCARD: 796 case BLKIF_OP_DISCARD:
759 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 797 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
760 struct request_queue *rq = info->rq; 798 struct request_queue *rq = info->rq;
761 printk(KERN_WARNING "blkfront: %s: discard op failed\n", 799 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
762 info->gd->disk_name); 800 info->gd->disk_name, op_name(bret->operation));
763 error = -EOPNOTSUPP; 801 error = -EOPNOTSUPP;
764 info->feature_discard = 0; 802 info->feature_discard = 0;
765 info->feature_secdiscard = 0; 803 info->feature_secdiscard = 0;
766 queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 804 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
767 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); 805 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
768 } 806 }
769 __blk_end_request_all(req, error); 807 __blk_end_request_all(req, error);
770 break; 808 break;
771 case BLKIF_OP_FLUSH_DISKCACHE: 809 case BLKIF_OP_FLUSH_DISKCACHE:
772 case BLKIF_OP_WRITE_BARRIER: 810 case BLKIF_OP_WRITE_BARRIER:
773 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 811 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
774 printk(KERN_WARNING "blkfront: %s: write %s op failed\n", 812 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
775 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 813 info->gd->disk_name, op_name(bret->operation));
776 "barrier" : "flush disk cache",
777 info->gd->disk_name);
778 error = -EOPNOTSUPP; 814 error = -EOPNOTSUPP;
779 } 815 }
780 if (unlikely(bret->status == BLKIF_RSP_ERROR && 816 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
781 info->shadow[id].req.u.rw.nr_segments == 0)) { 817 info->shadow[id].req.u.rw.nr_segments == 0)) {
782 printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", 818 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
783 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 819 info->gd->disk_name, op_name(bret->operation));
784 "barrier" : "flush disk cache",
785 info->gd->disk_name);
786 error = -EOPNOTSUPP; 820 error = -EOPNOTSUPP;
787 } 821 }
788 if (unlikely(error)) { 822 if (unlikely(error)) {
789 if (error == -EOPNOTSUPP) 823 if (error == -EOPNOTSUPP)
790 error = 0; 824 error = 0;
791 info->feature_flush = 0; 825 info->feature_flush = 0;
792 info->flush_op = 0; 826 info->flush_op = 0;
793 xlvbd_flush(info); 827 xlvbd_flush(info);
794 } 828 }
795 /* fall through */ 829 /* fall through */
796 case BLKIF_OP_READ: 830 case BLKIF_OP_READ:
797 case BLKIF_OP_WRITE: 831 case BLKIF_OP_WRITE:
798 if (unlikely(bret->status != BLKIF_RSP_OKAY)) 832 if (unlikely(bret->status != BLKIF_RSP_OKAY))
799 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " 833 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
800 "request: %x\n", bret->status); 834 "request: %x\n", bret->status);
801 835
802 __blk_end_request_all(req, error); 836 __blk_end_request_all(req, error);
803 break; 837 break;
804 default: 838 default:
805 BUG(); 839 BUG();
806 } 840 }
807 } 841 }
808 842
809 info->ring.rsp_cons = i; 843 info->ring.rsp_cons = i;
810 844
811 if (i != info->ring.req_prod_pvt) { 845 if (i != info->ring.req_prod_pvt) {
812 int more_to_do; 846 int more_to_do;
813 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 847 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
814 if (more_to_do) 848 if (more_to_do)
815 goto again; 849 goto again;
816 } else 850 } else
817 info->ring.sring->rsp_event = i + 1; 851 info->ring.sring->rsp_event = i + 1;
818 852
819 kick_pending_request_queues(info); 853 kick_pending_request_queues(info);
820 854
821 spin_unlock_irqrestore(&info->io_lock, flags); 855 spin_unlock_irqrestore(&info->io_lock, flags);
822 856
823 return IRQ_HANDLED; 857 return IRQ_HANDLED;
824 } 858 }
825 859
826 860
827 static int setup_blkring(struct xenbus_device *dev, 861 static int setup_blkring(struct xenbus_device *dev,
828 struct blkfront_info *info) 862 struct blkfront_info *info)
829 { 863 {
830 struct blkif_sring *sring; 864 struct blkif_sring *sring;
831 int err; 865 int err;
832 866
833 info->ring_ref = GRANT_INVALID_REF; 867 info->ring_ref = GRANT_INVALID_REF;
834 868
835 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); 869 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
836 if (!sring) { 870 if (!sring) {
837 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 871 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
838 return -ENOMEM; 872 return -ENOMEM;
839 } 873 }
840 SHARED_RING_INIT(sring); 874 SHARED_RING_INIT(sring);
841 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 875 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
842 876
843 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); 877 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
844 878
845 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); 879 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
846 if (err < 0) { 880 if (err < 0) {
847 free_page((unsigned long)sring); 881 free_page((unsigned long)sring);
848 info->ring.sring = NULL; 882 info->ring.sring = NULL;
849 goto fail; 883 goto fail;
850 } 884 }
851 info->ring_ref = err; 885 info->ring_ref = err;
852 886
853 err = xenbus_alloc_evtchn(dev, &info->evtchn); 887 err = xenbus_alloc_evtchn(dev, &info->evtchn);
854 if (err) 888 if (err)
855 goto fail; 889 goto fail;
856 890
857 err = bind_evtchn_to_irqhandler(info->evtchn, 891 err = bind_evtchn_to_irqhandler(info->evtchn,
858 blkif_interrupt, 892 blkif_interrupt,
859 IRQF_SAMPLE_RANDOM, "blkif", info); 893 IRQF_SAMPLE_RANDOM, "blkif", info);
860 if (err <= 0) { 894 if (err <= 0) {
861 xenbus_dev_fatal(dev, err, 895 xenbus_dev_fatal(dev, err,
862 "bind_evtchn_to_irqhandler failed"); 896 "bind_evtchn_to_irqhandler failed");
863 goto fail; 897 goto fail;
864 } 898 }
865 info->irq = err; 899 info->irq = err;
866 900
867 return 0; 901 return 0;
868 fail: 902 fail:
869 blkif_free(info, 0); 903 blkif_free(info, 0);
870 return err; 904 return err;
871 } 905 }
872 906
873 907
874 /* Common code used when first setting up, and when resuming. */ 908 /* Common code used when first setting up, and when resuming. */
875 static int talk_to_blkback(struct xenbus_device *dev, 909 static int talk_to_blkback(struct xenbus_device *dev,
876 struct blkfront_info *info) 910 struct blkfront_info *info)
877 { 911 {
878 const char *message = NULL; 912 const char *message = NULL;
879 struct xenbus_transaction xbt; 913 struct xenbus_transaction xbt;
880 int err; 914 int err;
881 915
882 /* Create shared ring, alloc event channel. */ 916 /* Create shared ring, alloc event channel. */
883 err = setup_blkring(dev, info); 917 err = setup_blkring(dev, info);
884 if (err) 918 if (err)
885 goto out; 919 goto out;
886 920
887 again: 921 again:
888 err = xenbus_transaction_start(&xbt); 922 err = xenbus_transaction_start(&xbt);
889 if (err) { 923 if (err) {
890 xenbus_dev_fatal(dev, err, "starting transaction"); 924 xenbus_dev_fatal(dev, err, "starting transaction");
891 goto destroy_blkring; 925 goto destroy_blkring;
892 } 926 }
893 927
894 err = xenbus_printf(xbt, dev->nodename, 928 err = xenbus_printf(xbt, dev->nodename,
895 "ring-ref", "%u", info->ring_ref); 929 "ring-ref", "%u", info->ring_ref);
896 if (err) { 930 if (err) {
897 message = "writing ring-ref"; 931 message = "writing ring-ref";
898 goto abort_transaction; 932 goto abort_transaction;
899 } 933 }
900 err = xenbus_printf(xbt, dev->nodename, 934 err = xenbus_printf(xbt, dev->nodename,
901 "event-channel", "%u", info->evtchn); 935 "event-channel", "%u", info->evtchn);
902 if (err) { 936 if (err) {
903 message = "writing event-channel"; 937 message = "writing event-channel";
904 goto abort_transaction; 938 goto abort_transaction;
905 } 939 }
906 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", 940 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
907 XEN_IO_PROTO_ABI_NATIVE); 941 XEN_IO_PROTO_ABI_NATIVE);
908 if (err) { 942 if (err) {
909 message = "writing protocol"; 943 message = "writing protocol";
910 goto abort_transaction; 944 goto abort_transaction;
911 } 945 }
912 946
913 err = xenbus_transaction_end(xbt, 0); 947 err = xenbus_transaction_end(xbt, 0);
914 if (err) { 948 if (err) {
915 if (err == -EAGAIN) 949 if (err == -EAGAIN)
916 goto again; 950 goto again;
917 xenbus_dev_fatal(dev, err, "completing transaction"); 951 xenbus_dev_fatal(dev, err, "completing transaction");
918 goto destroy_blkring; 952 goto destroy_blkring;
919 } 953 }
920 954
921 xenbus_switch_state(dev, XenbusStateInitialised); 955 xenbus_switch_state(dev, XenbusStateInitialised);
922 956
923 return 0; 957 return 0;
924 958
925 abort_transaction: 959 abort_transaction:
926 xenbus_transaction_end(xbt, 1); 960 xenbus_transaction_end(xbt, 1);
927 if (message) 961 if (message)
928 xenbus_dev_fatal(dev, err, "%s", message); 962 xenbus_dev_fatal(dev, err, "%s", message);
929 destroy_blkring: 963 destroy_blkring:
930 blkif_free(info, 0); 964 blkif_free(info, 0);
931 out: 965 out:
932 return err; 966 return err;
933 } 967 }
934 968
935 /** 969 /**
936 * Entry point to this code when a new device is created. Allocate the basic 970 * Entry point to this code when a new device is created. Allocate the basic
937 * structures and the ring buffer for communication with the backend, and 971 * structures and the ring buffer for communication with the backend, and
938 * inform the backend of the appropriate details for those. Switch to 972 * inform the backend of the appropriate details for those. Switch to
939 * Initialised state. 973 * Initialised state.
940 */ 974 */
941 static int blkfront_probe(struct xenbus_device *dev, 975 static int blkfront_probe(struct xenbus_device *dev,
942 const struct xenbus_device_id *id) 976 const struct xenbus_device_id *id)
943 { 977 {
944 int err, vdevice, i; 978 int err, vdevice, i;
945 struct blkfront_info *info; 979 struct blkfront_info *info;
946 980
947 /* FIXME: Use dynamic device id if this is not set. */ 981 /* FIXME: Use dynamic device id if this is not set. */
948 err = xenbus_scanf(XBT_NIL, dev->nodename, 982 err = xenbus_scanf(XBT_NIL, dev->nodename,
949 "virtual-device", "%i", &vdevice); 983 "virtual-device", "%i", &vdevice);
950 if (err != 1) { 984 if (err != 1) {
951 /* go looking in the extended area instead */ 985 /* go looking in the extended area instead */
952 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", 986 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
953 "%i", &vdevice); 987 "%i", &vdevice);
954 if (err != 1) { 988 if (err != 1) {
955 xenbus_dev_fatal(dev, err, "reading virtual-device"); 989 xenbus_dev_fatal(dev, err, "reading virtual-device");
956 return err; 990 return err;
957 } 991 }
958 } 992 }
959 993
960 if (xen_hvm_domain()) { 994 if (xen_hvm_domain()) {
961 char *type; 995 char *type;
962 int len; 996 int len;
963 /* no unplug has been done: do not hook devices != xen vbds */ 997 /* no unplug has been done: do not hook devices != xen vbds */
964 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { 998 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
965 int major; 999 int major;
966 1000
967 if (!VDEV_IS_EXTENDED(vdevice)) 1001 if (!VDEV_IS_EXTENDED(vdevice))
968 major = BLKIF_MAJOR(vdevice); 1002 major = BLKIF_MAJOR(vdevice);
969 else 1003 else
970 major = XENVBD_MAJOR; 1004 major = XENVBD_MAJOR;
971 1005
972 if (major != XENVBD_MAJOR) { 1006 if (major != XENVBD_MAJOR) {
973 printk(KERN_INFO 1007 printk(KERN_INFO
974 "%s: HVM does not support vbd %d as xen block device\n", 1008 "%s: HVM does not support vbd %d as xen block device\n",
975 __FUNCTION__, vdevice); 1009 __FUNCTION__, vdevice);
976 return -ENODEV; 1010 return -ENODEV;
977 } 1011 }
978 } 1012 }
979 /* do not create a PV cdrom device if we are an HVM guest */ 1013 /* do not create a PV cdrom device if we are an HVM guest */
980 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); 1014 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
981 if (IS_ERR(type)) 1015 if (IS_ERR(type))
982 return -ENODEV; 1016 return -ENODEV;
983 if (strncmp(type, "cdrom", 5) == 0) { 1017 if (strncmp(type, "cdrom", 5) == 0) {
984 kfree(type); 1018 kfree(type);
985 return -ENODEV; 1019 return -ENODEV;
986 } 1020 }
987 kfree(type); 1021 kfree(type);
988 } 1022 }
989 info = kzalloc(sizeof(*info), GFP_KERNEL); 1023 info = kzalloc(sizeof(*info), GFP_KERNEL);
990 if (!info) { 1024 if (!info) {
991 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); 1025 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
992 return -ENOMEM; 1026 return -ENOMEM;
993 } 1027 }
994 1028
995 mutex_init(&info->mutex); 1029 mutex_init(&info->mutex);
996 spin_lock_init(&info->io_lock); 1030 spin_lock_init(&info->io_lock);
997 info->xbdev = dev; 1031 info->xbdev = dev;
998 info->vdevice = vdevice; 1032 info->vdevice = vdevice;
999 info->connected = BLKIF_STATE_DISCONNECTED; 1033 info->connected = BLKIF_STATE_DISCONNECTED;
1000 INIT_WORK(&info->work, blkif_restart_queue); 1034 INIT_WORK(&info->work, blkif_restart_queue);
1001 1035
1002 for (i = 0; i < BLK_RING_SIZE; i++) 1036 for (i = 0; i < BLK_RING_SIZE; i++)
1003 info->shadow[i].req.u.rw.id = i+1; 1037 info->shadow[i].req.u.rw.id = i+1;
1004 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; 1038 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1005 1039
1006 /* Front end dir is a number, which is used as the id. */ 1040 /* Front end dir is a number, which is used as the id. */
1007 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 1041 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
1008 dev_set_drvdata(&dev->dev, info); 1042 dev_set_drvdata(&dev->dev, info);
1009 1043
1010 err = talk_to_blkback(dev, info); 1044 err = talk_to_blkback(dev, info);
1011 if (err) { 1045 if (err) {
1012 kfree(info); 1046 kfree(info);
1013 dev_set_drvdata(&dev->dev, NULL); 1047 dev_set_drvdata(&dev->dev, NULL);
1014 return err; 1048 return err;
1015 } 1049 }
1016 1050
1017 return 0; 1051 return 0;
1018 } 1052 }
1019 1053
1020 1054
1021 static int blkif_recover(struct blkfront_info *info) 1055 static int blkif_recover(struct blkfront_info *info)
1022 { 1056 {
1023 int i; 1057 int i;
1024 struct blkif_request *req; 1058 struct blkif_request *req;
1025 struct blk_shadow *copy; 1059 struct blk_shadow *copy;
1026 int j; 1060 int j;
1027 1061
1028 /* Stage 1: Make a safe copy of the shadow state. */ 1062 /* Stage 1: Make a safe copy of the shadow state. */
1029 copy = kmalloc(sizeof(info->shadow), 1063 copy = kmalloc(sizeof(info->shadow),
1030 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); 1064 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
1031 if (!copy) 1065 if (!copy)
1032 return -ENOMEM; 1066 return -ENOMEM;
1033 memcpy(copy, info->shadow, sizeof(info->shadow)); 1067 memcpy(copy, info->shadow, sizeof(info->shadow));
1034 1068
1035 /* Stage 2: Set up free list. */ 1069 /* Stage 2: Set up free list. */
1036 memset(&info->shadow, 0, sizeof(info->shadow)); 1070 memset(&info->shadow, 0, sizeof(info->shadow));
1037 for (i = 0; i < BLK_RING_SIZE; i++) 1071 for (i = 0; i < BLK_RING_SIZE; i++)
1038 info->shadow[i].req.u.rw.id = i+1; 1072 info->shadow[i].req.u.rw.id = i+1;
1039 info->shadow_free = info->ring.req_prod_pvt; 1073 info->shadow_free = info->ring.req_prod_pvt;
1040 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; 1074 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1041 1075
1042 /* Stage 3: Find pending requests and requeue them. */ 1076 /* Stage 3: Find pending requests and requeue them. */
1043 for (i = 0; i < BLK_RING_SIZE; i++) { 1077 for (i = 0; i < BLK_RING_SIZE; i++) {
1044 /* Not in use? */ 1078 /* Not in use? */
1045 if (!copy[i].request) 1079 if (!copy[i].request)
1046 continue; 1080 continue;
1047 1081
1048 /* Grab a request slot and copy shadow state into it. */ 1082 /* Grab a request slot and copy shadow state into it. */
1049 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 1083 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
1050 *req = copy[i].req; 1084 *req = copy[i].req;
1051 1085
1052 /* We get a new request id, and must reset the shadow state. */ 1086 /* We get a new request id, and must reset the shadow state. */
1053 req->u.rw.id = get_id_from_freelist(info); 1087 req->u.rw.id = get_id_from_freelist(info);
1054 memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i])); 1088 memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
1055 1089
1056 if (req->operation != BLKIF_OP_DISCARD) { 1090 if (req->operation != BLKIF_OP_DISCARD) {
1057 /* Rewrite any grant references invalidated by susp/resume. */ 1091 /* Rewrite any grant references invalidated by susp/resume. */
1058 for (j = 0; j < req->u.rw.nr_segments; j++) 1092 for (j = 0; j < req->u.rw.nr_segments; j++)
1059 gnttab_grant_foreign_access_ref( 1093 gnttab_grant_foreign_access_ref(
1060 req->u.rw.seg[j].gref, 1094 req->u.rw.seg[j].gref,
1061 info->xbdev->otherend_id, 1095 info->xbdev->otherend_id,
1062 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), 1096 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
1063 rq_data_dir(info->shadow[req->u.rw.id].request)); 1097 rq_data_dir(info->shadow[req->u.rw.id].request));
1064 } 1098 }
1065 info->shadow[req->u.rw.id].req = *req; 1099 info->shadow[req->u.rw.id].req = *req;
1066 1100
1067 info->ring.req_prod_pvt++; 1101 info->ring.req_prod_pvt++;
1068 } 1102 }
1069 1103
1070 kfree(copy); 1104 kfree(copy);
1071 1105
1072 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1106 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1073 1107
1074 spin_lock_irq(&info->io_lock); 1108 spin_lock_irq(&info->io_lock);
1075 1109
1076 /* Now safe for us to use the shared ring */ 1110 /* Now safe for us to use the shared ring */
1077 info->connected = BLKIF_STATE_CONNECTED; 1111 info->connected = BLKIF_STATE_CONNECTED;
1078 1112
1079 /* Send off requeued requests */ 1113 /* Send off requeued requests */
1080 flush_requests(info); 1114 flush_requests(info);
1081 1115
1082 /* Kick any other new requests queued since we resumed */ 1116 /* Kick any other new requests queued since we resumed */
1083 kick_pending_request_queues(info); 1117 kick_pending_request_queues(info);
1084 1118
1085 spin_unlock_irq(&info->io_lock); 1119 spin_unlock_irq(&info->io_lock);
1086 1120
1087 return 0; 1121 return 0;
1088 } 1122 }
1089 1123
1090 /** 1124 /**
1091 * We are reconnecting to the backend, due to a suspend/resume, or a backend 1125 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1092 * driver restart. We tear down our blkif structure and recreate it, but 1126 * driver restart. We tear down our blkif structure and recreate it, but
1093 * leave the device-layer structures intact so that this is transparent to the 1127 * leave the device-layer structures intact so that this is transparent to the
1094 * rest of the kernel. 1128 * rest of the kernel.
1095 */ 1129 */
1096 static int blkfront_resume(struct xenbus_device *dev) 1130 static int blkfront_resume(struct xenbus_device *dev)
1097 { 1131 {
1098 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1132 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1099 int err; 1133 int err;
1100 1134
1101 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); 1135 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
1102 1136
1103 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); 1137 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
1104 1138
1105 err = talk_to_blkback(dev, info); 1139 err = talk_to_blkback(dev, info);
1106 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 1140 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
1107 err = blkif_recover(info); 1141 err = blkif_recover(info);
1108 1142
1109 return err; 1143 return err;
1110 } 1144 }
1111 1145
1112 static void 1146 static void
1113 blkfront_closing(struct blkfront_info *info) 1147 blkfront_closing(struct blkfront_info *info)
1114 { 1148 {
1115 struct xenbus_device *xbdev = info->xbdev; 1149 struct xenbus_device *xbdev = info->xbdev;
1116 struct block_device *bdev = NULL; 1150 struct block_device *bdev = NULL;
1117 1151
1118 mutex_lock(&info->mutex); 1152 mutex_lock(&info->mutex);
1119 1153
1120 if (xbdev->state == XenbusStateClosing) { 1154 if (xbdev->state == XenbusStateClosing) {
1121 mutex_unlock(&info->mutex); 1155 mutex_unlock(&info->mutex);
1122 return; 1156 return;
1123 } 1157 }
1124 1158
1125 if (info->gd) 1159 if (info->gd)
1126 bdev = bdget_disk(info->gd, 0); 1160 bdev = bdget_disk(info->gd, 0);
1127 1161
1128 mutex_unlock(&info->mutex); 1162 mutex_unlock(&info->mutex);
1129 1163
1130 if (!bdev) { 1164 if (!bdev) {
1131 xenbus_frontend_closed(xbdev); 1165 xenbus_frontend_closed(xbdev);
1132 return; 1166 return;
1133 } 1167 }
1134 1168
1135 mutex_lock(&bdev->bd_mutex); 1169 mutex_lock(&bdev->bd_mutex);
1136 1170
1137 if (bdev->bd_openers) { 1171 if (bdev->bd_openers) {
1138 xenbus_dev_error(xbdev, -EBUSY, 1172 xenbus_dev_error(xbdev, -EBUSY,
1139 "Device in use; refusing to close"); 1173 "Device in use; refusing to close");
1140 xenbus_switch_state(xbdev, XenbusStateClosing); 1174 xenbus_switch_state(xbdev, XenbusStateClosing);
1141 } else { 1175 } else {
1142 xlvbd_release_gendisk(info); 1176 xlvbd_release_gendisk(info);
1143 xenbus_frontend_closed(xbdev); 1177 xenbus_frontend_closed(xbdev);
1144 } 1178 }
1145 1179
1146 mutex_unlock(&bdev->bd_mutex); 1180 mutex_unlock(&bdev->bd_mutex);
1147 bdput(bdev); 1181 bdput(bdev);
1148 } 1182 }
1149 1183
1150 static void blkfront_setup_discard(struct blkfront_info *info) 1184 static void blkfront_setup_discard(struct blkfront_info *info)
1151 { 1185 {
1152 int err; 1186 int err;
1153 char *type; 1187 char *type;
1154 unsigned int discard_granularity; 1188 unsigned int discard_granularity;
1155 unsigned int discard_alignment; 1189 unsigned int discard_alignment;
1156 unsigned int discard_secure; 1190 unsigned int discard_secure;
1157 1191
1158 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); 1192 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1159 if (IS_ERR(type)) 1193 if (IS_ERR(type))
1160 return; 1194 return;
1161 1195
1162 info->feature_secdiscard = 0; 1196 info->feature_secdiscard = 0;
1163 if (strncmp(type, "phy", 3) == 0) { 1197 if (strncmp(type, "phy", 3) == 0) {
1164 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1198 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1165 "discard-granularity", "%u", &discard_granularity, 1199 "discard-granularity", "%u", &discard_granularity,
1166 "discard-alignment", "%u", &discard_alignment, 1200 "discard-alignment", "%u", &discard_alignment,
1167 NULL); 1201 NULL);
1168 if (!err) { 1202 if (!err) {
1169 info->feature_discard = 1; 1203 info->feature_discard = 1;
1170 info->discard_granularity = discard_granularity; 1204 info->discard_granularity = discard_granularity;
1171 info->discard_alignment = discard_alignment; 1205 info->discard_alignment = discard_alignment;
1172 } 1206 }
1173 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1207 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1174 "discard-secure", "%d", &discard_secure, 1208 "discard-secure", "%d", &discard_secure,
1175 NULL); 1209 NULL);
1176 if (!err) 1210 if (!err)
1177 info->feature_secdiscard = discard_secure; 1211 info->feature_secdiscard = discard_secure;
1178 1212
1179 } else if (strncmp(type, "file", 4) == 0) 1213 } else if (strncmp(type, "file", 4) == 0)
1180 info->feature_discard = 1; 1214 info->feature_discard = 1;
1181 1215
1182 kfree(type); 1216 kfree(type);
1183 } 1217 }
1184 1218
1185 /* 1219 /*
1186 * Invoked when the backend is finally 'ready' (and has told produced 1220 * Invoked when the backend is finally 'ready' (and has told produced
1187 * the details about the physical device - #sectors, size, etc). 1221 * the details about the physical device - #sectors, size, etc).
1188 */ 1222 */
1189 static void blkfront_connect(struct blkfront_info *info) 1223 static void blkfront_connect(struct blkfront_info *info)
1190 { 1224 {
1191 unsigned long long sectors; 1225 unsigned long long sectors;
1192 unsigned long sector_size; 1226 unsigned long sector_size;
1193 unsigned int binfo; 1227 unsigned int binfo;
1194 int err; 1228 int err;
1195 int barrier, flush, discard; 1229 int barrier, flush, discard;
1196 1230
1197 switch (info->connected) { 1231 switch (info->connected) {
1198 case BLKIF_STATE_CONNECTED: 1232 case BLKIF_STATE_CONNECTED:
1199 /* 1233 /*
1200 * Potentially, the back-end may be signalling 1234 * Potentially, the back-end may be signalling
1201 * a capacity change; update the capacity. 1235 * a capacity change; update the capacity.
1202 */ 1236 */
1203 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, 1237 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1204 "sectors", "%Lu", &sectors); 1238 "sectors", "%Lu", &sectors);
1205 if (XENBUS_EXIST_ERR(err)) 1239 if (XENBUS_EXIST_ERR(err))
1206 return; 1240 return;
1207 printk(KERN_INFO "Setting capacity to %Lu\n", 1241 printk(KERN_INFO "Setting capacity to %Lu\n",
1208 sectors); 1242 sectors);
1209 set_capacity(info->gd, sectors); 1243 set_capacity(info->gd, sectors);
1210 revalidate_disk(info->gd); 1244 revalidate_disk(info->gd);
1211 1245
1212 /* fall through */ 1246 /* fall through */
1213 case BLKIF_STATE_SUSPENDED: 1247 case BLKIF_STATE_SUSPENDED:
1214 return; 1248 return;
1215 1249
1216 default: 1250 default:
1217 break; 1251 break;
1218 } 1252 }
1219 1253
1220 dev_dbg(&info->xbdev->dev, "%s:%s.\n", 1254 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
1221 __func__, info->xbdev->otherend); 1255 __func__, info->xbdev->otherend);
1222 1256
1223 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1257 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1224 "sectors", "%llu", &sectors, 1258 "sectors", "%llu", &sectors,
1225 "info", "%u", &binfo, 1259 "info", "%u", &binfo,
1226 "sector-size", "%lu", &sector_size, 1260 "sector-size", "%lu", &sector_size,
1227 NULL); 1261 NULL);
1228 if (err) { 1262 if (err) {
1229 xenbus_dev_fatal(info->xbdev, err, 1263 xenbus_dev_fatal(info->xbdev, err,
1230 "reading backend fields at %s", 1264 "reading backend fields at %s",
1231 info->xbdev->otherend); 1265 info->xbdev->otherend);
1232 return; 1266 return;
1233 } 1267 }
1234 1268
1235 info->feature_flush = 0; 1269 info->feature_flush = 0;
1236 info->flush_op = 0; 1270 info->flush_op = 0;
1237 1271
1238 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1272 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1239 "feature-barrier", "%d", &barrier, 1273 "feature-barrier", "%d", &barrier,
1240 NULL); 1274 NULL);
1241 1275
1242 /* 1276 /*
1243 * If there's no "feature-barrier" defined, then it means 1277 * If there's no "feature-barrier" defined, then it means
1244 * we're dealing with a very old backend which writes 1278 * we're dealing with a very old backend which writes
1245 * synchronously; nothing to do. 1279 * synchronously; nothing to do.
1246 * 1280 *
1247 * If there are barriers, then we use flush. 1281 * If there are barriers, then we use flush.
1248 */ 1282 */
1249 if (!err && barrier) { 1283 if (!err && barrier) {
1250 info->feature_flush = REQ_FLUSH | REQ_FUA; 1284 info->feature_flush = REQ_FLUSH | REQ_FUA;
1251 info->flush_op = BLKIF_OP_WRITE_BARRIER; 1285 info->flush_op = BLKIF_OP_WRITE_BARRIER;
1252 } 1286 }
1253 /* 1287 /*
1254 * And if there is "feature-flush-cache" use that above 1288 * And if there is "feature-flush-cache" use that above
1255 * barriers. 1289 * barriers.
1256 */ 1290 */
1257 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1291 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1258 "feature-flush-cache", "%d", &flush, 1292 "feature-flush-cache", "%d", &flush,
1259 NULL); 1293 NULL);
1260 1294
1261 if (!err && flush) { 1295 if (!err && flush) {
1262 info->feature_flush = REQ_FLUSH; 1296 info->feature_flush = REQ_FLUSH;
1263 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; 1297 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1264 } 1298 }
1265 1299
1266 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1300 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1267 "feature-discard", "%d", &discard, 1301 "feature-discard", "%d", &discard,
1268 NULL); 1302 NULL);
1269 1303
1270 if (!err && discard) 1304 if (!err && discard)
1271 blkfront_setup_discard(info); 1305 blkfront_setup_discard(info);
1272 1306
1273 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1307 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1274 if (err) { 1308 if (err) {
1275 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1309 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
1276 info->xbdev->otherend); 1310 info->xbdev->otherend);
1277 return; 1311 return;
1278 } 1312 }
1279 1313
1280 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1314 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1281 1315
1282 /* Kick pending requests. */ 1316 /* Kick pending requests. */
1283 spin_lock_irq(&info->io_lock); 1317 spin_lock_irq(&info->io_lock);
1284 info->connected = BLKIF_STATE_CONNECTED; 1318 info->connected = BLKIF_STATE_CONNECTED;
1285 kick_pending_request_queues(info); 1319 kick_pending_request_queues(info);
1286 spin_unlock_irq(&info->io_lock); 1320 spin_unlock_irq(&info->io_lock);
1287 1321
1288 add_disk(info->gd); 1322 add_disk(info->gd);
1289 1323
1290 info->is_ready = 1; 1324 info->is_ready = 1;
1291 } 1325 }
1292 1326
1293 /** 1327 /**
1294 * Callback received when the backend's state changes. 1328 * Callback received when the backend's state changes.
1295 */ 1329 */
1296 static void blkback_changed(struct xenbus_device *dev, 1330 static void blkback_changed(struct xenbus_device *dev,
1297 enum xenbus_state backend_state) 1331 enum xenbus_state backend_state)
1298 { 1332 {
1299 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1333 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1300 1334
1301 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); 1335 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
1302 1336
1303 switch (backend_state) { 1337 switch (backend_state) {
1304 case XenbusStateInitialising: 1338 case XenbusStateInitialising:
1305 case XenbusStateInitWait: 1339 case XenbusStateInitWait:
1306 case XenbusStateInitialised: 1340 case XenbusStateInitialised:
1307 case XenbusStateReconfiguring: 1341 case XenbusStateReconfiguring:
1308 case XenbusStateReconfigured: 1342 case XenbusStateReconfigured:
1309 case XenbusStateUnknown: 1343 case XenbusStateUnknown:
1310 case XenbusStateClosed: 1344 case XenbusStateClosed:
1311 break; 1345 break;
1312 1346
1313 case XenbusStateConnected: 1347 case XenbusStateConnected:
1314 blkfront_connect(info); 1348 blkfront_connect(info);
1315 break; 1349 break;
1316 1350
1317 case XenbusStateClosing: 1351 case XenbusStateClosing:
1318 blkfront_closing(info); 1352 blkfront_closing(info);
1319 break; 1353 break;
1320 } 1354 }
1321 } 1355 }
1322 1356
1323 static int blkfront_remove(struct xenbus_device *xbdev) 1357 static int blkfront_remove(struct xenbus_device *xbdev)
1324 { 1358 {
1325 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); 1359 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
1326 struct block_device *bdev = NULL; 1360 struct block_device *bdev = NULL;
1327 struct gendisk *disk; 1361 struct gendisk *disk;
1328 1362
1329 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); 1363 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
1330 1364
1331 blkif_free(info, 0); 1365 blkif_free(info, 0);
1332 1366
1333 mutex_lock(&info->mutex); 1367 mutex_lock(&info->mutex);
1334 1368
1335 disk = info->gd; 1369 disk = info->gd;
1336 if (disk) 1370 if (disk)
1337 bdev = bdget_disk(disk, 0); 1371 bdev = bdget_disk(disk, 0);
1338 1372
1339 info->xbdev = NULL; 1373 info->xbdev = NULL;
1340 mutex_unlock(&info->mutex); 1374 mutex_unlock(&info->mutex);
1341 1375
1342 if (!bdev) { 1376 if (!bdev) {
1343 kfree(info); 1377 kfree(info);
1344 return 0; 1378 return 0;
1345 } 1379 }
1346 1380
1347 /* 1381 /*
1348 * The xbdev was removed before we reached the Closed 1382 * The xbdev was removed before we reached the Closed
1349 * state. See if it's safe to remove the disk. If the bdev 1383 * state. See if it's safe to remove the disk. If the bdev
1350 * isn't closed yet, we let release take care of it. 1384 * isn't closed yet, we let release take care of it.
1351 */ 1385 */
1352 1386
1353 mutex_lock(&bdev->bd_mutex); 1387 mutex_lock(&bdev->bd_mutex);
1354 info = disk->private_data; 1388 info = disk->private_data;
1355 1389
1356 dev_warn(disk_to_dev(disk), 1390 dev_warn(disk_to_dev(disk),
1357 "%s was hot-unplugged, %d stale handles\n", 1391 "%s was hot-unplugged, %d stale handles\n",
1358 xbdev->nodename, bdev->bd_openers); 1392 xbdev->nodename, bdev->bd_openers);
1359 1393
1360 if (info && !bdev->bd_openers) { 1394 if (info && !bdev->bd_openers) {
1361 xlvbd_release_gendisk(info); 1395 xlvbd_release_gendisk(info);
1362 disk->private_data = NULL; 1396 disk->private_data = NULL;
1363 kfree(info); 1397 kfree(info);
1364 } 1398 }
1365 1399
1366 mutex_unlock(&bdev->bd_mutex); 1400 mutex_unlock(&bdev->bd_mutex);
1367 bdput(bdev); 1401 bdput(bdev);
1368 1402
1369 return 0; 1403 return 0;
1370 } 1404 }
1371 1405
1372 static int blkfront_is_ready(struct xenbus_device *dev) 1406 static int blkfront_is_ready(struct xenbus_device *dev)
1373 { 1407 {
1374 struct blkfront_info *info = dev_get_drvdata(&dev->dev); 1408 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1375 1409
1376 return info->is_ready && info->xbdev; 1410 return info->is_ready && info->xbdev;
1377 } 1411 }
1378 1412
1379 static int blkif_open(struct block_device *bdev, fmode_t mode) 1413 static int blkif_open(struct block_device *bdev, fmode_t mode)
1380 { 1414 {
1381 struct gendisk *disk = bdev->bd_disk; 1415 struct gendisk *disk = bdev->bd_disk;
1382 struct blkfront_info *info; 1416 struct blkfront_info *info;
1383 int err = 0; 1417 int err = 0;
1384 1418
1385 mutex_lock(&blkfront_mutex); 1419 mutex_lock(&blkfront_mutex);
1386 1420
1387 info = disk->private_data; 1421 info = disk->private_data;
1388 if (!info) { 1422 if (!info) {
1389 /* xbdev gone */ 1423 /* xbdev gone */
1390 err = -ERESTARTSYS; 1424 err = -ERESTARTSYS;
1391 goto out; 1425 goto out;
1392 } 1426 }
1393 1427
1394 mutex_lock(&info->mutex); 1428 mutex_lock(&info->mutex);
1395 1429
1396 if (!info->gd) 1430 if (!info->gd)
1397 /* xbdev is closed */ 1431 /* xbdev is closed */
1398 err = -ERESTARTSYS; 1432 err = -ERESTARTSYS;
1399 1433
1400 mutex_unlock(&info->mutex); 1434 mutex_unlock(&info->mutex);
1401 1435
1402 out: 1436 out:
1403 mutex_unlock(&blkfront_mutex); 1437 mutex_unlock(&blkfront_mutex);
1404 return err; 1438 return err;
1405 } 1439 }
1406 1440
1407 static int blkif_release(struct gendisk *disk, fmode_t mode) 1441 static int blkif_release(struct gendisk *disk, fmode_t mode)
1408 { 1442 {
1409 struct blkfront_info *info = disk->private_data; 1443 struct blkfront_info *info = disk->private_data;
1410 struct block_device *bdev; 1444 struct block_device *bdev;
1411 struct xenbus_device *xbdev; 1445 struct xenbus_device *xbdev;
1412 1446
1413 mutex_lock(&blkfront_mutex); 1447 mutex_lock(&blkfront_mutex);
1414 1448
1415 bdev = bdget_disk(disk, 0); 1449 bdev = bdget_disk(disk, 0);
1416 1450
1417 if (bdev->bd_openers) 1451 if (bdev->bd_openers)
1418 goto out; 1452 goto out;
1419 1453
1420 /* 1454 /*
1421 * Check if we have been instructed to close. We will have 1455 * Check if we have been instructed to close. We will have
1422 * deferred this request, because the bdev was still open. 1456 * deferred this request, because the bdev was still open.
1423 */ 1457 */
1424 1458
1425 mutex_lock(&info->mutex); 1459 mutex_lock(&info->mutex);
1426 xbdev = info->xbdev; 1460 xbdev = info->xbdev;
1427 1461
1428 if (xbdev && xbdev->state == XenbusStateClosing) { 1462 if (xbdev && xbdev->state == XenbusStateClosing) {
1429 /* pending switch to state closed */ 1463 /* pending switch to state closed */
1430 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 1464 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1431 xlvbd_release_gendisk(info); 1465 xlvbd_release_gendisk(info);
1432 xenbus_frontend_closed(info->xbdev); 1466 xenbus_frontend_closed(info->xbdev);
1433 } 1467 }
1434 1468
1435 mutex_unlock(&info->mutex); 1469 mutex_unlock(&info->mutex);
1436 1470
1437 if (!xbdev) { 1471 if (!xbdev) {
1438 /* sudden device removal */ 1472 /* sudden device removal */
1439 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 1473 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1440 xlvbd_release_gendisk(info); 1474 xlvbd_release_gendisk(info);
1441 disk->private_data = NULL; 1475 disk->private_data = NULL;
1442 kfree(info); 1476 kfree(info);
1443 } 1477 }
1444 1478
1445 out: 1479 out:
1446 bdput(bdev); 1480 bdput(bdev);
1447 mutex_unlock(&blkfront_mutex); 1481 mutex_unlock(&blkfront_mutex);
1448 return 0; 1482 return 0;
1449 } 1483 }
1450 1484
1451 static const struct block_device_operations xlvbd_block_fops = 1485 static const struct block_device_operations xlvbd_block_fops =
1452 { 1486 {
1453 .owner = THIS_MODULE, 1487 .owner = THIS_MODULE,
1454 .open = blkif_open, 1488 .open = blkif_open,
1455 .release = blkif_release, 1489 .release = blkif_release,
1456 .getgeo = blkif_getgeo, 1490 .getgeo = blkif_getgeo,
1457 .ioctl = blkif_ioctl, 1491 .ioctl = blkif_ioctl,
1458 }; 1492 };
1459 1493
1460 1494
1461 static const struct xenbus_device_id blkfront_ids[] = { 1495 static const struct xenbus_device_id blkfront_ids[] = {
1462 { "vbd" }, 1496 { "vbd" },
1463 { "" } 1497 { "" }
1464 }; 1498 };
1465 1499
1466 static DEFINE_XENBUS_DRIVER(blkfront, , 1500 static DEFINE_XENBUS_DRIVER(blkfront, ,
1467 .probe = blkfront_probe, 1501 .probe = blkfront_probe,
1468 .remove = blkfront_remove, 1502 .remove = blkfront_remove,
1469 .resume = blkfront_resume, 1503 .resume = blkfront_resume,
1470 .otherend_changed = blkback_changed, 1504 .otherend_changed = blkback_changed,
1471 .is_ready = blkfront_is_ready, 1505 .is_ready = blkfront_is_ready,
1472 ); 1506 );
1473 1507
1474 static int __init xlblk_init(void) 1508 static int __init xlblk_init(void)
1475 { 1509 {
1476 int ret; 1510 int ret;
1477 1511
1478 if (!xen_domain()) 1512 if (!xen_domain())
1479 return -ENODEV; 1513 return -ENODEV;
1480 1514
1481 if (xen_hvm_domain() && !xen_platform_pci_unplug) 1515 if (xen_hvm_domain() && !xen_platform_pci_unplug)
1482 return -ENODEV; 1516 return -ENODEV;
1483 1517
1484 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 1518 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1485 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", 1519 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1486 XENVBD_MAJOR, DEV_NAME); 1520 XENVBD_MAJOR, DEV_NAME);
1487 return -ENODEV; 1521 return -ENODEV;
1488 } 1522 }
1489 1523
1490 ret = xenbus_register_frontend(&blkfront_driver); 1524 ret = xenbus_register_frontend(&blkfront_driver);
1491 if (ret) { 1525 if (ret) {
1492 unregister_blkdev(XENVBD_MAJOR, DEV_NAME); 1526 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1493 return ret; 1527 return ret;
1494 } 1528 }
1495 1529
1496 return 0; 1530 return 0;
1497 } 1531 }
1498 module_init(xlblk_init); 1532 module_init(xlblk_init);
1499 1533
1500 1534
1501 static void __exit xlblk_exit(void) 1535 static void __exit xlblk_exit(void)
1502 { 1536 {
1503 xenbus_unregister_driver(&blkfront_driver); 1537 xenbus_unregister_driver(&blkfront_driver);
1504 unregister_blkdev(XENVBD_MAJOR, DEV_NAME); 1538 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1505 kfree(minors); 1539 kfree(minors);
1506 } 1540 }
1507 module_exit(xlblk_exit); 1541 module_exit(xlblk_exit);
1508 1542
1509 MODULE_DESCRIPTION("Xen virtual block device frontend"); 1543 MODULE_DESCRIPTION("Xen virtual block device frontend");
1510 MODULE_LICENSE("GPL"); 1544 MODULE_LICENSE("GPL");