Commit e75aa85892b2ee78c79edac720868cbef16e62eb

Authored by Tejun Heo
Committed by Linus Torvalds
1 parent d9b68e5e88

block_dev: always serialize exclusive open attempts

bd_prepare_to_claim() incorrectly allowed multiple attempts for
exclusive open to progress in parallel if the attempting holders are
identical.  This triggered BUG_ON() as reported in the following bug.

  https://bugzilla.kernel.org/show_bug.cgi?id=16393

__bd_abort_claiming() is used to finish claiming blocks and doesn't
work if multiple openers are inside a claiming block.  Allowing
multiple parallel open attempts to continue doesn't gain anything as
those are serialized down in the call chain anyway.  Fix it by always
allowing only single open attempt in a claiming block.

This problem can easily be reproduced by adding a delay after
bd_prepare_to_claim() and attempting to mount two partitions of a
disk.

stable: only applicable to v2.6.35

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 2 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/block_dev.c 2 * linux/fs/block_dev.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
6 */ 6 */
7 7
8 #include <linux/init.h> 8 #include <linux/init.h>
9 #include <linux/mm.h> 9 #include <linux/mm.h>
10 #include <linux/fcntl.h> 10 #include <linux/fcntl.h>
11 #include <linux/slab.h> 11 #include <linux/slab.h>
12 #include <linux/kmod.h> 12 #include <linux/kmod.h>
13 #include <linux/major.h> 13 #include <linux/major.h>
14 #include <linux/smp_lock.h> 14 #include <linux/smp_lock.h>
15 #include <linux/device_cgroup.h> 15 #include <linux/device_cgroup.h>
16 #include <linux/highmem.h> 16 #include <linux/highmem.h>
17 #include <linux/blkdev.h> 17 #include <linux/blkdev.h>
18 #include <linux/module.h> 18 #include <linux/module.h>
19 #include <linux/blkpg.h> 19 #include <linux/blkpg.h>
20 #include <linux/buffer_head.h> 20 #include <linux/buffer_head.h>
21 #include <linux/pagevec.h> 21 #include <linux/pagevec.h>
22 #include <linux/writeback.h> 22 #include <linux/writeback.h>
23 #include <linux/mpage.h> 23 #include <linux/mpage.h>
24 #include <linux/mount.h> 24 #include <linux/mount.h>
25 #include <linux/uio.h> 25 #include <linux/uio.h>
26 #include <linux/namei.h> 26 #include <linux/namei.h>
27 #include <linux/log2.h> 27 #include <linux/log2.h>
28 #include <linux/kmemleak.h> 28 #include <linux/kmemleak.h>
29 #include <asm/uaccess.h> 29 #include <asm/uaccess.h>
30 #include "internal.h" 30 #include "internal.h"
31 31
32 struct bdev_inode { 32 struct bdev_inode {
33 struct block_device bdev; 33 struct block_device bdev;
34 struct inode vfs_inode; 34 struct inode vfs_inode;
35 }; 35 };
36 36
37 static const struct address_space_operations def_blk_aops; 37 static const struct address_space_operations def_blk_aops;
38 38
39 static inline struct bdev_inode *BDEV_I(struct inode *inode) 39 static inline struct bdev_inode *BDEV_I(struct inode *inode)
40 { 40 {
41 return container_of(inode, struct bdev_inode, vfs_inode); 41 return container_of(inode, struct bdev_inode, vfs_inode);
42 } 42 }
43 43
44 inline struct block_device *I_BDEV(struct inode *inode) 44 inline struct block_device *I_BDEV(struct inode *inode)
45 { 45 {
46 return &BDEV_I(inode)->bdev; 46 return &BDEV_I(inode)->bdev;
47 } 47 }
48 48
49 EXPORT_SYMBOL(I_BDEV); 49 EXPORT_SYMBOL(I_BDEV);
50 50
51 static sector_t max_block(struct block_device *bdev) 51 static sector_t max_block(struct block_device *bdev)
52 { 52 {
53 sector_t retval = ~((sector_t)0); 53 sector_t retval = ~((sector_t)0);
54 loff_t sz = i_size_read(bdev->bd_inode); 54 loff_t sz = i_size_read(bdev->bd_inode);
55 55
56 if (sz) { 56 if (sz) {
57 unsigned int size = block_size(bdev); 57 unsigned int size = block_size(bdev);
58 unsigned int sizebits = blksize_bits(size); 58 unsigned int sizebits = blksize_bits(size);
59 retval = (sz >> sizebits); 59 retval = (sz >> sizebits);
60 } 60 }
61 return retval; 61 return retval;
62 } 62 }
63 63
64 /* Kill _all_ buffers and pagecache , dirty or not.. */ 64 /* Kill _all_ buffers and pagecache , dirty or not.. */
65 static void kill_bdev(struct block_device *bdev) 65 static void kill_bdev(struct block_device *bdev)
66 { 66 {
67 if (bdev->bd_inode->i_mapping->nrpages == 0) 67 if (bdev->bd_inode->i_mapping->nrpages == 0)
68 return; 68 return;
69 invalidate_bh_lrus(); 69 invalidate_bh_lrus();
70 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 70 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
71 } 71 }
72 72
73 int set_blocksize(struct block_device *bdev, int size) 73 int set_blocksize(struct block_device *bdev, int size)
74 { 74 {
75 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 75 /* Size must be a power of two, and between 512 and PAGE_SIZE */
76 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 76 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
77 return -EINVAL; 77 return -EINVAL;
78 78
79 /* Size cannot be smaller than the size supported by the device */ 79 /* Size cannot be smaller than the size supported by the device */
80 if (size < bdev_logical_block_size(bdev)) 80 if (size < bdev_logical_block_size(bdev))
81 return -EINVAL; 81 return -EINVAL;
82 82
83 /* Don't change the size if it is same as current */ 83 /* Don't change the size if it is same as current */
84 if (bdev->bd_block_size != size) { 84 if (bdev->bd_block_size != size) {
85 sync_blockdev(bdev); 85 sync_blockdev(bdev);
86 bdev->bd_block_size = size; 86 bdev->bd_block_size = size;
87 bdev->bd_inode->i_blkbits = blksize_bits(size); 87 bdev->bd_inode->i_blkbits = blksize_bits(size);
88 kill_bdev(bdev); 88 kill_bdev(bdev);
89 } 89 }
90 return 0; 90 return 0;
91 } 91 }
92 92
93 EXPORT_SYMBOL(set_blocksize); 93 EXPORT_SYMBOL(set_blocksize);
94 94
95 int sb_set_blocksize(struct super_block *sb, int size) 95 int sb_set_blocksize(struct super_block *sb, int size)
96 { 96 {
97 if (set_blocksize(sb->s_bdev, size)) 97 if (set_blocksize(sb->s_bdev, size))
98 return 0; 98 return 0;
99 /* If we get here, we know size is power of two 99 /* If we get here, we know size is power of two
100 * and it's value is between 512 and PAGE_SIZE */ 100 * and it's value is between 512 and PAGE_SIZE */
101 sb->s_blocksize = size; 101 sb->s_blocksize = size;
102 sb->s_blocksize_bits = blksize_bits(size); 102 sb->s_blocksize_bits = blksize_bits(size);
103 return sb->s_blocksize; 103 return sb->s_blocksize;
104 } 104 }
105 105
106 EXPORT_SYMBOL(sb_set_blocksize); 106 EXPORT_SYMBOL(sb_set_blocksize);
107 107
108 int sb_min_blocksize(struct super_block *sb, int size) 108 int sb_min_blocksize(struct super_block *sb, int size)
109 { 109 {
110 int minsize = bdev_logical_block_size(sb->s_bdev); 110 int minsize = bdev_logical_block_size(sb->s_bdev);
111 if (size < minsize) 111 if (size < minsize)
112 size = minsize; 112 size = minsize;
113 return sb_set_blocksize(sb, size); 113 return sb_set_blocksize(sb, size);
114 } 114 }
115 115
116 EXPORT_SYMBOL(sb_min_blocksize); 116 EXPORT_SYMBOL(sb_min_blocksize);
117 117
118 static int 118 static int
119 blkdev_get_block(struct inode *inode, sector_t iblock, 119 blkdev_get_block(struct inode *inode, sector_t iblock,
120 struct buffer_head *bh, int create) 120 struct buffer_head *bh, int create)
121 { 121 {
122 if (iblock >= max_block(I_BDEV(inode))) { 122 if (iblock >= max_block(I_BDEV(inode))) {
123 if (create) 123 if (create)
124 return -EIO; 124 return -EIO;
125 125
126 /* 126 /*
127 * for reads, we're just trying to fill a partial page. 127 * for reads, we're just trying to fill a partial page.
128 * return a hole, they will have to call get_block again 128 * return a hole, they will have to call get_block again
129 * before they can fill it, and they will get -EIO at that 129 * before they can fill it, and they will get -EIO at that
130 * time 130 * time
131 */ 131 */
132 return 0; 132 return 0;
133 } 133 }
134 bh->b_bdev = I_BDEV(inode); 134 bh->b_bdev = I_BDEV(inode);
135 bh->b_blocknr = iblock; 135 bh->b_blocknr = iblock;
136 set_buffer_mapped(bh); 136 set_buffer_mapped(bh);
137 return 0; 137 return 0;
138 } 138 }
139 139
140 static int 140 static int
141 blkdev_get_blocks(struct inode *inode, sector_t iblock, 141 blkdev_get_blocks(struct inode *inode, sector_t iblock,
142 struct buffer_head *bh, int create) 142 struct buffer_head *bh, int create)
143 { 143 {
144 sector_t end_block = max_block(I_BDEV(inode)); 144 sector_t end_block = max_block(I_BDEV(inode));
145 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 145 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
146 146
147 if ((iblock + max_blocks) > end_block) { 147 if ((iblock + max_blocks) > end_block) {
148 max_blocks = end_block - iblock; 148 max_blocks = end_block - iblock;
149 if ((long)max_blocks <= 0) { 149 if ((long)max_blocks <= 0) {
150 if (create) 150 if (create)
151 return -EIO; /* write fully beyond EOF */ 151 return -EIO; /* write fully beyond EOF */
152 /* 152 /*
153 * It is a read which is fully beyond EOF. We return 153 * It is a read which is fully beyond EOF. We return
154 * a !buffer_mapped buffer 154 * a !buffer_mapped buffer
155 */ 155 */
156 max_blocks = 0; 156 max_blocks = 0;
157 } 157 }
158 } 158 }
159 159
160 bh->b_bdev = I_BDEV(inode); 160 bh->b_bdev = I_BDEV(inode);
161 bh->b_blocknr = iblock; 161 bh->b_blocknr = iblock;
162 bh->b_size = max_blocks << inode->i_blkbits; 162 bh->b_size = max_blocks << inode->i_blkbits;
163 if (max_blocks) 163 if (max_blocks)
164 set_buffer_mapped(bh); 164 set_buffer_mapped(bh);
165 return 0; 165 return 0;
166 } 166 }
167 167
168 static ssize_t 168 static ssize_t
169 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 169 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
170 loff_t offset, unsigned long nr_segs) 170 loff_t offset, unsigned long nr_segs)
171 { 171 {
172 struct file *file = iocb->ki_filp; 172 struct file *file = iocb->ki_filp;
173 struct inode *inode = file->f_mapping->host; 173 struct inode *inode = file->f_mapping->host;
174 174
175 return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, 175 return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
176 I_BDEV(inode), iov, offset, nr_segs, 176 I_BDEV(inode), iov, offset, nr_segs,
177 blkdev_get_blocks, NULL); 177 blkdev_get_blocks, NULL);
178 } 178 }
179 179
180 int __sync_blockdev(struct block_device *bdev, int wait) 180 int __sync_blockdev(struct block_device *bdev, int wait)
181 { 181 {
182 if (!bdev) 182 if (!bdev)
183 return 0; 183 return 0;
184 if (!wait) 184 if (!wait)
185 return filemap_flush(bdev->bd_inode->i_mapping); 185 return filemap_flush(bdev->bd_inode->i_mapping);
186 return filemap_write_and_wait(bdev->bd_inode->i_mapping); 186 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
187 } 187 }
188 188
189 /* 189 /*
190 * Write out and wait upon all the dirty data associated with a block 190 * Write out and wait upon all the dirty data associated with a block
191 * device via its mapping. Does not take the superblock lock. 191 * device via its mapping. Does not take the superblock lock.
192 */ 192 */
193 int sync_blockdev(struct block_device *bdev) 193 int sync_blockdev(struct block_device *bdev)
194 { 194 {
195 return __sync_blockdev(bdev, 1); 195 return __sync_blockdev(bdev, 1);
196 } 196 }
197 EXPORT_SYMBOL(sync_blockdev); 197 EXPORT_SYMBOL(sync_blockdev);
198 198
199 /* 199 /*
200 * Write out and wait upon all dirty data associated with this 200 * Write out and wait upon all dirty data associated with this
201 * device. Filesystem data as well as the underlying block 201 * device. Filesystem data as well as the underlying block
202 * device. Takes the superblock lock. 202 * device. Takes the superblock lock.
203 */ 203 */
204 int fsync_bdev(struct block_device *bdev) 204 int fsync_bdev(struct block_device *bdev)
205 { 205 {
206 struct super_block *sb = get_super(bdev); 206 struct super_block *sb = get_super(bdev);
207 if (sb) { 207 if (sb) {
208 int res = sync_filesystem(sb); 208 int res = sync_filesystem(sb);
209 drop_super(sb); 209 drop_super(sb);
210 return res; 210 return res;
211 } 211 }
212 return sync_blockdev(bdev); 212 return sync_blockdev(bdev);
213 } 213 }
214 EXPORT_SYMBOL(fsync_bdev); 214 EXPORT_SYMBOL(fsync_bdev);
215 215
216 /** 216 /**
217 * freeze_bdev -- lock a filesystem and force it into a consistent state 217 * freeze_bdev -- lock a filesystem and force it into a consistent state
218 * @bdev: blockdevice to lock 218 * @bdev: blockdevice to lock
219 * 219 *
220 * If a superblock is found on this device, we take the s_umount semaphore 220 * If a superblock is found on this device, we take the s_umount semaphore
221 * on it to make sure nobody unmounts until the snapshot creation is done. 221 * on it to make sure nobody unmounts until the snapshot creation is done.
222 * The reference counter (bd_fsfreeze_count) guarantees that only the last 222 * The reference counter (bd_fsfreeze_count) guarantees that only the last
223 * unfreeze process can unfreeze the frozen filesystem actually when multiple 223 * unfreeze process can unfreeze the frozen filesystem actually when multiple
224 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and 224 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
225 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze 225 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
226 * actually. 226 * actually.
227 */ 227 */
228 struct super_block *freeze_bdev(struct block_device *bdev) 228 struct super_block *freeze_bdev(struct block_device *bdev)
229 { 229 {
230 struct super_block *sb; 230 struct super_block *sb;
231 int error = 0; 231 int error = 0;
232 232
233 mutex_lock(&bdev->bd_fsfreeze_mutex); 233 mutex_lock(&bdev->bd_fsfreeze_mutex);
234 if (++bdev->bd_fsfreeze_count > 1) { 234 if (++bdev->bd_fsfreeze_count > 1) {
235 /* 235 /*
236 * We don't even need to grab a reference - the first call 236 * We don't even need to grab a reference - the first call
237 * to freeze_bdev grab an active reference and only the last 237 * to freeze_bdev grab an active reference and only the last
238 * thaw_bdev drops it. 238 * thaw_bdev drops it.
239 */ 239 */
240 sb = get_super(bdev); 240 sb = get_super(bdev);
241 drop_super(sb); 241 drop_super(sb);
242 mutex_unlock(&bdev->bd_fsfreeze_mutex); 242 mutex_unlock(&bdev->bd_fsfreeze_mutex);
243 return sb; 243 return sb;
244 } 244 }
245 245
246 sb = get_active_super(bdev); 246 sb = get_active_super(bdev);
247 if (!sb) 247 if (!sb)
248 goto out; 248 goto out;
249 error = freeze_super(sb); 249 error = freeze_super(sb);
250 if (error) { 250 if (error) {
251 deactivate_super(sb); 251 deactivate_super(sb);
252 bdev->bd_fsfreeze_count--; 252 bdev->bd_fsfreeze_count--;
253 mutex_unlock(&bdev->bd_fsfreeze_mutex); 253 mutex_unlock(&bdev->bd_fsfreeze_mutex);
254 return ERR_PTR(error); 254 return ERR_PTR(error);
255 } 255 }
256 deactivate_super(sb); 256 deactivate_super(sb);
257 out: 257 out:
258 sync_blockdev(bdev); 258 sync_blockdev(bdev);
259 mutex_unlock(&bdev->bd_fsfreeze_mutex); 259 mutex_unlock(&bdev->bd_fsfreeze_mutex);
260 return sb; /* thaw_bdev releases s->s_umount */ 260 return sb; /* thaw_bdev releases s->s_umount */
261 } 261 }
262 EXPORT_SYMBOL(freeze_bdev); 262 EXPORT_SYMBOL(freeze_bdev);
263 263
264 /** 264 /**
265 * thaw_bdev -- unlock filesystem 265 * thaw_bdev -- unlock filesystem
266 * @bdev: blockdevice to unlock 266 * @bdev: blockdevice to unlock
267 * @sb: associated superblock 267 * @sb: associated superblock
268 * 268 *
269 * Unlocks the filesystem and marks it writeable again after freeze_bdev(). 269 * Unlocks the filesystem and marks it writeable again after freeze_bdev().
270 */ 270 */
271 int thaw_bdev(struct block_device *bdev, struct super_block *sb) 271 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
272 { 272 {
273 int error = -EINVAL; 273 int error = -EINVAL;
274 274
275 mutex_lock(&bdev->bd_fsfreeze_mutex); 275 mutex_lock(&bdev->bd_fsfreeze_mutex);
276 if (!bdev->bd_fsfreeze_count) 276 if (!bdev->bd_fsfreeze_count)
277 goto out; 277 goto out;
278 278
279 error = 0; 279 error = 0;
280 if (--bdev->bd_fsfreeze_count > 0) 280 if (--bdev->bd_fsfreeze_count > 0)
281 goto out; 281 goto out;
282 282
283 if (!sb) 283 if (!sb)
284 goto out; 284 goto out;
285 285
286 error = thaw_super(sb); 286 error = thaw_super(sb);
287 if (error) { 287 if (error) {
288 bdev->bd_fsfreeze_count++; 288 bdev->bd_fsfreeze_count++;
289 mutex_unlock(&bdev->bd_fsfreeze_mutex); 289 mutex_unlock(&bdev->bd_fsfreeze_mutex);
290 return error; 290 return error;
291 } 291 }
292 out: 292 out:
293 mutex_unlock(&bdev->bd_fsfreeze_mutex); 293 mutex_unlock(&bdev->bd_fsfreeze_mutex);
294 return 0; 294 return 0;
295 } 295 }
296 EXPORT_SYMBOL(thaw_bdev); 296 EXPORT_SYMBOL(thaw_bdev);
297 297
298 static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 298 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
299 { 299 {
300 return block_write_full_page(page, blkdev_get_block, wbc); 300 return block_write_full_page(page, blkdev_get_block, wbc);
301 } 301 }
302 302
303 static int blkdev_readpage(struct file * file, struct page * page) 303 static int blkdev_readpage(struct file * file, struct page * page)
304 { 304 {
305 return block_read_full_page(page, blkdev_get_block); 305 return block_read_full_page(page, blkdev_get_block);
306 } 306 }
307 307
308 static int blkdev_write_begin(struct file *file, struct address_space *mapping, 308 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
309 loff_t pos, unsigned len, unsigned flags, 309 loff_t pos, unsigned len, unsigned flags,
310 struct page **pagep, void **fsdata) 310 struct page **pagep, void **fsdata)
311 { 311 {
312 *pagep = NULL; 312 *pagep = NULL;
313 return block_write_begin_newtrunc(file, mapping, pos, len, flags, 313 return block_write_begin_newtrunc(file, mapping, pos, len, flags,
314 pagep, fsdata, blkdev_get_block); 314 pagep, fsdata, blkdev_get_block);
315 } 315 }
316 316
317 static int blkdev_write_end(struct file *file, struct address_space *mapping, 317 static int blkdev_write_end(struct file *file, struct address_space *mapping,
318 loff_t pos, unsigned len, unsigned copied, 318 loff_t pos, unsigned len, unsigned copied,
319 struct page *page, void *fsdata) 319 struct page *page, void *fsdata)
320 { 320 {
321 int ret; 321 int ret;
322 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 322 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
323 323
324 unlock_page(page); 324 unlock_page(page);
325 page_cache_release(page); 325 page_cache_release(page);
326 326
327 return ret; 327 return ret;
328 } 328 }
329 329
330 /* 330 /*
331 * private llseek: 331 * private llseek:
332 * for a block special file file->f_path.dentry->d_inode->i_size is zero 332 * for a block special file file->f_path.dentry->d_inode->i_size is zero
333 * so we compute the size by hand (just as in block_read/write above) 333 * so we compute the size by hand (just as in block_read/write above)
334 */ 334 */
335 static loff_t block_llseek(struct file *file, loff_t offset, int origin) 335 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
336 { 336 {
337 struct inode *bd_inode = file->f_mapping->host; 337 struct inode *bd_inode = file->f_mapping->host;
338 loff_t size; 338 loff_t size;
339 loff_t retval; 339 loff_t retval;
340 340
341 mutex_lock(&bd_inode->i_mutex); 341 mutex_lock(&bd_inode->i_mutex);
342 size = i_size_read(bd_inode); 342 size = i_size_read(bd_inode);
343 343
344 switch (origin) { 344 switch (origin) {
345 case 2: 345 case 2:
346 offset += size; 346 offset += size;
347 break; 347 break;
348 case 1: 348 case 1:
349 offset += file->f_pos; 349 offset += file->f_pos;
350 } 350 }
351 retval = -EINVAL; 351 retval = -EINVAL;
352 if (offset >= 0 && offset <= size) { 352 if (offset >= 0 && offset <= size) {
353 if (offset != file->f_pos) { 353 if (offset != file->f_pos) {
354 file->f_pos = offset; 354 file->f_pos = offset;
355 } 355 }
356 retval = offset; 356 retval = offset;
357 } 357 }
358 mutex_unlock(&bd_inode->i_mutex); 358 mutex_unlock(&bd_inode->i_mutex);
359 return retval; 359 return retval;
360 } 360 }
361 361
362 int blkdev_fsync(struct file *filp, int datasync) 362 int blkdev_fsync(struct file *filp, int datasync)
363 { 363 {
364 struct inode *bd_inode = filp->f_mapping->host; 364 struct inode *bd_inode = filp->f_mapping->host;
365 struct block_device *bdev = I_BDEV(bd_inode); 365 struct block_device *bdev = I_BDEV(bd_inode);
366 int error; 366 int error;
367 367
368 /* 368 /*
369 * There is no need to serialise calls to blkdev_issue_flush with 369 * There is no need to serialise calls to blkdev_issue_flush with
370 * i_mutex and doing so causes performance issues with concurrent 370 * i_mutex and doing so causes performance issues with concurrent
371 * O_SYNC writers to a block device. 371 * O_SYNC writers to a block device.
372 */ 372 */
373 mutex_unlock(&bd_inode->i_mutex); 373 mutex_unlock(&bd_inode->i_mutex);
374 374
375 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); 375 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
376 if (error == -EOPNOTSUPP) 376 if (error == -EOPNOTSUPP)
377 error = 0; 377 error = 0;
378 378
379 mutex_lock(&bd_inode->i_mutex); 379 mutex_lock(&bd_inode->i_mutex);
380 380
381 return error; 381 return error;
382 } 382 }
383 EXPORT_SYMBOL(blkdev_fsync); 383 EXPORT_SYMBOL(blkdev_fsync);
384 384
385 /* 385 /*
386 * pseudo-fs 386 * pseudo-fs
387 */ 387 */
388 388
389 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); 389 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
390 static struct kmem_cache * bdev_cachep __read_mostly; 390 static struct kmem_cache * bdev_cachep __read_mostly;
391 391
392 static struct inode *bdev_alloc_inode(struct super_block *sb) 392 static struct inode *bdev_alloc_inode(struct super_block *sb)
393 { 393 {
394 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); 394 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
395 if (!ei) 395 if (!ei)
396 return NULL; 396 return NULL;
397 return &ei->vfs_inode; 397 return &ei->vfs_inode;
398 } 398 }
399 399
400 static void bdev_destroy_inode(struct inode *inode) 400 static void bdev_destroy_inode(struct inode *inode)
401 { 401 {
402 struct bdev_inode *bdi = BDEV_I(inode); 402 struct bdev_inode *bdi = BDEV_I(inode);
403 403
404 kmem_cache_free(bdev_cachep, bdi); 404 kmem_cache_free(bdev_cachep, bdi);
405 } 405 }
406 406
407 static void init_once(void *foo) 407 static void init_once(void *foo)
408 { 408 {
409 struct bdev_inode *ei = (struct bdev_inode *) foo; 409 struct bdev_inode *ei = (struct bdev_inode *) foo;
410 struct block_device *bdev = &ei->bdev; 410 struct block_device *bdev = &ei->bdev;
411 411
412 memset(bdev, 0, sizeof(*bdev)); 412 memset(bdev, 0, sizeof(*bdev));
413 mutex_init(&bdev->bd_mutex); 413 mutex_init(&bdev->bd_mutex);
414 INIT_LIST_HEAD(&bdev->bd_inodes); 414 INIT_LIST_HEAD(&bdev->bd_inodes);
415 INIT_LIST_HEAD(&bdev->bd_list); 415 INIT_LIST_HEAD(&bdev->bd_list);
416 #ifdef CONFIG_SYSFS 416 #ifdef CONFIG_SYSFS
417 INIT_LIST_HEAD(&bdev->bd_holder_list); 417 INIT_LIST_HEAD(&bdev->bd_holder_list);
418 #endif 418 #endif
419 inode_init_once(&ei->vfs_inode); 419 inode_init_once(&ei->vfs_inode);
420 /* Initialize mutex for freeze. */ 420 /* Initialize mutex for freeze. */
421 mutex_init(&bdev->bd_fsfreeze_mutex); 421 mutex_init(&bdev->bd_fsfreeze_mutex);
422 } 422 }
423 423
424 static inline void __bd_forget(struct inode *inode) 424 static inline void __bd_forget(struct inode *inode)
425 { 425 {
426 list_del_init(&inode->i_devices); 426 list_del_init(&inode->i_devices);
427 inode->i_bdev = NULL; 427 inode->i_bdev = NULL;
428 inode->i_mapping = &inode->i_data; 428 inode->i_mapping = &inode->i_data;
429 } 429 }
430 430
431 static void bdev_clear_inode(struct inode *inode) 431 static void bdev_clear_inode(struct inode *inode)
432 { 432 {
433 struct block_device *bdev = &BDEV_I(inode)->bdev; 433 struct block_device *bdev = &BDEV_I(inode)->bdev;
434 struct list_head *p; 434 struct list_head *p;
435 spin_lock(&bdev_lock); 435 spin_lock(&bdev_lock);
436 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 436 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
437 __bd_forget(list_entry(p, struct inode, i_devices)); 437 __bd_forget(list_entry(p, struct inode, i_devices));
438 } 438 }
439 list_del_init(&bdev->bd_list); 439 list_del_init(&bdev->bd_list);
440 spin_unlock(&bdev_lock); 440 spin_unlock(&bdev_lock);
441 } 441 }
442 442
443 static const struct super_operations bdev_sops = { 443 static const struct super_operations bdev_sops = {
444 .statfs = simple_statfs, 444 .statfs = simple_statfs,
445 .alloc_inode = bdev_alloc_inode, 445 .alloc_inode = bdev_alloc_inode,
446 .destroy_inode = bdev_destroy_inode, 446 .destroy_inode = bdev_destroy_inode,
447 .drop_inode = generic_delete_inode, 447 .drop_inode = generic_delete_inode,
448 .clear_inode = bdev_clear_inode, 448 .clear_inode = bdev_clear_inode,
449 }; 449 };
450 450
451 static int bd_get_sb(struct file_system_type *fs_type, 451 static int bd_get_sb(struct file_system_type *fs_type,
452 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 452 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
453 { 453 {
454 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); 454 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
455 } 455 }
456 456
457 static struct file_system_type bd_type = { 457 static struct file_system_type bd_type = {
458 .name = "bdev", 458 .name = "bdev",
459 .get_sb = bd_get_sb, 459 .get_sb = bd_get_sb,
460 .kill_sb = kill_anon_super, 460 .kill_sb = kill_anon_super,
461 }; 461 };
462 462
463 struct super_block *blockdev_superblock __read_mostly; 463 struct super_block *blockdev_superblock __read_mostly;
464 464
465 void __init bdev_cache_init(void) 465 void __init bdev_cache_init(void)
466 { 466 {
467 int err; 467 int err;
468 struct vfsmount *bd_mnt; 468 struct vfsmount *bd_mnt;
469 469
470 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 470 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
471 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 471 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
472 SLAB_MEM_SPREAD|SLAB_PANIC), 472 SLAB_MEM_SPREAD|SLAB_PANIC),
473 init_once); 473 init_once);
474 err = register_filesystem(&bd_type); 474 err = register_filesystem(&bd_type);
475 if (err) 475 if (err)
476 panic("Cannot register bdev pseudo-fs"); 476 panic("Cannot register bdev pseudo-fs");
477 bd_mnt = kern_mount(&bd_type); 477 bd_mnt = kern_mount(&bd_type);
478 if (IS_ERR(bd_mnt)) 478 if (IS_ERR(bd_mnt))
479 panic("Cannot create bdev pseudo-fs"); 479 panic("Cannot create bdev pseudo-fs");
480 /* 480 /*
481 * This vfsmount structure is only used to obtain the 481 * This vfsmount structure is only used to obtain the
482 * blockdev_superblock, so tell kmemleak not to report it. 482 * blockdev_superblock, so tell kmemleak not to report it.
483 */ 483 */
484 kmemleak_not_leak(bd_mnt); 484 kmemleak_not_leak(bd_mnt);
485 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ 485 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
486 } 486 }
487 487
488 /* 488 /*
489 * Most likely _very_ bad one - but then it's hardly critical for small 489 * Most likely _very_ bad one - but then it's hardly critical for small
490 * /dev and can be fixed when somebody will need really large one. 490 * /dev and can be fixed when somebody will need really large one.
491 * Keep in mind that it will be fed through icache hash function too. 491 * Keep in mind that it will be fed through icache hash function too.
492 */ 492 */
493 static inline unsigned long hash(dev_t dev) 493 static inline unsigned long hash(dev_t dev)
494 { 494 {
495 return MAJOR(dev)+MINOR(dev); 495 return MAJOR(dev)+MINOR(dev);
496 } 496 }
497 497
498 static int bdev_test(struct inode *inode, void *data) 498 static int bdev_test(struct inode *inode, void *data)
499 { 499 {
500 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; 500 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
501 } 501 }
502 502
503 static int bdev_set(struct inode *inode, void *data) 503 static int bdev_set(struct inode *inode, void *data)
504 { 504 {
505 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; 505 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
506 return 0; 506 return 0;
507 } 507 }
508 508
509 static LIST_HEAD(all_bdevs); 509 static LIST_HEAD(all_bdevs);
510 510
511 struct block_device *bdget(dev_t dev) 511 struct block_device *bdget(dev_t dev)
512 { 512 {
513 struct block_device *bdev; 513 struct block_device *bdev;
514 struct inode *inode; 514 struct inode *inode;
515 515
516 inode = iget5_locked(blockdev_superblock, hash(dev), 516 inode = iget5_locked(blockdev_superblock, hash(dev),
517 bdev_test, bdev_set, &dev); 517 bdev_test, bdev_set, &dev);
518 518
519 if (!inode) 519 if (!inode)
520 return NULL; 520 return NULL;
521 521
522 bdev = &BDEV_I(inode)->bdev; 522 bdev = &BDEV_I(inode)->bdev;
523 523
524 if (inode->i_state & I_NEW) { 524 if (inode->i_state & I_NEW) {
525 bdev->bd_contains = NULL; 525 bdev->bd_contains = NULL;
526 bdev->bd_inode = inode; 526 bdev->bd_inode = inode;
527 bdev->bd_block_size = (1 << inode->i_blkbits); 527 bdev->bd_block_size = (1 << inode->i_blkbits);
528 bdev->bd_part_count = 0; 528 bdev->bd_part_count = 0;
529 bdev->bd_invalidated = 0; 529 bdev->bd_invalidated = 0;
530 inode->i_mode = S_IFBLK; 530 inode->i_mode = S_IFBLK;
531 inode->i_rdev = dev; 531 inode->i_rdev = dev;
532 inode->i_bdev = bdev; 532 inode->i_bdev = bdev;
533 inode->i_data.a_ops = &def_blk_aops; 533 inode->i_data.a_ops = &def_blk_aops;
534 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 534 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
535 inode->i_data.backing_dev_info = &default_backing_dev_info; 535 inode->i_data.backing_dev_info = &default_backing_dev_info;
536 spin_lock(&bdev_lock); 536 spin_lock(&bdev_lock);
537 list_add(&bdev->bd_list, &all_bdevs); 537 list_add(&bdev->bd_list, &all_bdevs);
538 spin_unlock(&bdev_lock); 538 spin_unlock(&bdev_lock);
539 unlock_new_inode(inode); 539 unlock_new_inode(inode);
540 } 540 }
541 return bdev; 541 return bdev;
542 } 542 }
543 543
544 EXPORT_SYMBOL(bdget); 544 EXPORT_SYMBOL(bdget);
545 545
546 /** 546 /**
547 * bdgrab -- Grab a reference to an already referenced block device 547 * bdgrab -- Grab a reference to an already referenced block device
548 * @bdev: Block device to grab a reference to. 548 * @bdev: Block device to grab a reference to.
549 */ 549 */
550 struct block_device *bdgrab(struct block_device *bdev) 550 struct block_device *bdgrab(struct block_device *bdev)
551 { 551 {
552 atomic_inc(&bdev->bd_inode->i_count); 552 atomic_inc(&bdev->bd_inode->i_count);
553 return bdev; 553 return bdev;
554 } 554 }
555 555
556 long nr_blockdev_pages(void) 556 long nr_blockdev_pages(void)
557 { 557 {
558 struct block_device *bdev; 558 struct block_device *bdev;
559 long ret = 0; 559 long ret = 0;
560 spin_lock(&bdev_lock); 560 spin_lock(&bdev_lock);
561 list_for_each_entry(bdev, &all_bdevs, bd_list) { 561 list_for_each_entry(bdev, &all_bdevs, bd_list) {
562 ret += bdev->bd_inode->i_mapping->nrpages; 562 ret += bdev->bd_inode->i_mapping->nrpages;
563 } 563 }
564 spin_unlock(&bdev_lock); 564 spin_unlock(&bdev_lock);
565 return ret; 565 return ret;
566 } 566 }
567 567
568 void bdput(struct block_device *bdev) 568 void bdput(struct block_device *bdev)
569 { 569 {
570 iput(bdev->bd_inode); 570 iput(bdev->bd_inode);
571 } 571 }
572 572
573 EXPORT_SYMBOL(bdput); 573 EXPORT_SYMBOL(bdput);
574 574
575 static struct block_device *bd_acquire(struct inode *inode) 575 static struct block_device *bd_acquire(struct inode *inode)
576 { 576 {
577 struct block_device *bdev; 577 struct block_device *bdev;
578 578
579 spin_lock(&bdev_lock); 579 spin_lock(&bdev_lock);
580 bdev = inode->i_bdev; 580 bdev = inode->i_bdev;
581 if (bdev) { 581 if (bdev) {
582 atomic_inc(&bdev->bd_inode->i_count); 582 atomic_inc(&bdev->bd_inode->i_count);
583 spin_unlock(&bdev_lock); 583 spin_unlock(&bdev_lock);
584 return bdev; 584 return bdev;
585 } 585 }
586 spin_unlock(&bdev_lock); 586 spin_unlock(&bdev_lock);
587 587
588 bdev = bdget(inode->i_rdev); 588 bdev = bdget(inode->i_rdev);
589 if (bdev) { 589 if (bdev) {
590 spin_lock(&bdev_lock); 590 spin_lock(&bdev_lock);
591 if (!inode->i_bdev) { 591 if (!inode->i_bdev) {
592 /* 592 /*
593 * We take an additional bd_inode->i_count for inode, 593 * We take an additional bd_inode->i_count for inode,
594 * and it's released in clear_inode() of inode. 594 * and it's released in clear_inode() of inode.
595 * So, we can access it via ->i_mapping always 595 * So, we can access it via ->i_mapping always
596 * without igrab(). 596 * without igrab().
597 */ 597 */
598 atomic_inc(&bdev->bd_inode->i_count); 598 atomic_inc(&bdev->bd_inode->i_count);
599 inode->i_bdev = bdev; 599 inode->i_bdev = bdev;
600 inode->i_mapping = bdev->bd_inode->i_mapping; 600 inode->i_mapping = bdev->bd_inode->i_mapping;
601 list_add(&inode->i_devices, &bdev->bd_inodes); 601 list_add(&inode->i_devices, &bdev->bd_inodes);
602 } 602 }
603 spin_unlock(&bdev_lock); 603 spin_unlock(&bdev_lock);
604 } 604 }
605 return bdev; 605 return bdev;
606 } 606 }
607 607
608 /* Call when you free inode */ 608 /* Call when you free inode */
609 609
610 void bd_forget(struct inode *inode) 610 void bd_forget(struct inode *inode)
611 { 611 {
612 struct block_device *bdev = NULL; 612 struct block_device *bdev = NULL;
613 613
614 spin_lock(&bdev_lock); 614 spin_lock(&bdev_lock);
615 if (inode->i_bdev) { 615 if (inode->i_bdev) {
616 if (!sb_is_blkdev_sb(inode->i_sb)) 616 if (!sb_is_blkdev_sb(inode->i_sb))
617 bdev = inode->i_bdev; 617 bdev = inode->i_bdev;
618 __bd_forget(inode); 618 __bd_forget(inode);
619 } 619 }
620 spin_unlock(&bdev_lock); 620 spin_unlock(&bdev_lock);
621 621
622 if (bdev) 622 if (bdev)
623 iput(bdev->bd_inode); 623 iput(bdev->bd_inode);
624 } 624 }
625 625
626 /** 626 /**
627 * bd_may_claim - test whether a block device can be claimed 627 * bd_may_claim - test whether a block device can be claimed
628 * @bdev: block device of interest 628 * @bdev: block device of interest
629 * @whole: whole block device containing @bdev, may equal @bdev 629 * @whole: whole block device containing @bdev, may equal @bdev
630 * @holder: holder trying to claim @bdev 630 * @holder: holder trying to claim @bdev
631 * 631 *
632 * Test whther @bdev can be claimed by @holder. 632 * Test whther @bdev can be claimed by @holder.
633 * 633 *
634 * CONTEXT: 634 * CONTEXT:
635 * spin_lock(&bdev_lock). 635 * spin_lock(&bdev_lock).
636 * 636 *
637 * RETURNS: 637 * RETURNS:
638 * %true if @bdev can be claimed, %false otherwise. 638 * %true if @bdev can be claimed, %false otherwise.
639 */ 639 */
640 static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, 640 static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
641 void *holder) 641 void *holder)
642 { 642 {
643 if (bdev->bd_holder == holder) 643 if (bdev->bd_holder == holder)
644 return true; /* already a holder */ 644 return true; /* already a holder */
645 else if (bdev->bd_holder != NULL) 645 else if (bdev->bd_holder != NULL)
646 return false; /* held by someone else */ 646 return false; /* held by someone else */
647 else if (bdev->bd_contains == bdev) 647 else if (bdev->bd_contains == bdev)
648 return true; /* is a whole device which isn't held */ 648 return true; /* is a whole device which isn't held */
649 649
650 else if (whole->bd_holder == bd_claim) 650 else if (whole->bd_holder == bd_claim)
651 return true; /* is a partition of a device that is being partitioned */ 651 return true; /* is a partition of a device that is being partitioned */
652 else if (whole->bd_holder != NULL) 652 else if (whole->bd_holder != NULL)
653 return false; /* is a partition of a held device */ 653 return false; /* is a partition of a held device */
654 else 654 else
655 return true; /* is a partition of an un-held device */ 655 return true; /* is a partition of an un-held device */
656 } 656 }
657 657
658 /** 658 /**
659 * bd_prepare_to_claim - prepare to claim a block device 659 * bd_prepare_to_claim - prepare to claim a block device
660 * @bdev: block device of interest 660 * @bdev: block device of interest
661 * @whole: the whole device containing @bdev, may equal @bdev 661 * @whole: the whole device containing @bdev, may equal @bdev
662 * @holder: holder trying to claim @bdev 662 * @holder: holder trying to claim @bdev
663 * 663 *
664 * Prepare to claim @bdev. This function fails if @bdev is already 664 * Prepare to claim @bdev. This function fails if @bdev is already
665 * claimed by another holder and waits if another claiming is in 665 * claimed by another holder and waits if another claiming is in
666 * progress. This function doesn't actually claim. On successful 666 * progress. This function doesn't actually claim. On successful
667 * return, the caller has ownership of bd_claiming and bd_holder[s]. 667 * return, the caller has ownership of bd_claiming and bd_holder[s].
668 * 668 *
669 * CONTEXT: 669 * CONTEXT:
670 * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab 670 * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab
671 * it multiple times. 671 * it multiple times.
672 * 672 *
673 * RETURNS: 673 * RETURNS:
674 * 0 if @bdev can be claimed, -EBUSY otherwise. 674 * 0 if @bdev can be claimed, -EBUSY otherwise.
675 */ 675 */
676 static int bd_prepare_to_claim(struct block_device *bdev, 676 static int bd_prepare_to_claim(struct block_device *bdev,
677 struct block_device *whole, void *holder) 677 struct block_device *whole, void *holder)
678 { 678 {
679 retry: 679 retry:
680 /* if someone else claimed, fail */ 680 /* if someone else claimed, fail */
681 if (!bd_may_claim(bdev, whole, holder)) 681 if (!bd_may_claim(bdev, whole, holder))
682 return -EBUSY; 682 return -EBUSY;
683 683
684 /* if someone else is claiming, wait for it to finish */ 684 /* if claiming is already in progress, wait for it to finish */
685 if (whole->bd_claiming && whole->bd_claiming != holder) { 685 if (whole->bd_claiming) {
686 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); 686 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
687 DEFINE_WAIT(wait); 687 DEFINE_WAIT(wait);
688 688
689 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); 689 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
690 spin_unlock(&bdev_lock); 690 spin_unlock(&bdev_lock);
691 schedule(); 691 schedule();
692 finish_wait(wq, &wait); 692 finish_wait(wq, &wait);
693 spin_lock(&bdev_lock); 693 spin_lock(&bdev_lock);
694 goto retry; 694 goto retry;
695 } 695 }
696 696
697 /* yay, all mine */ 697 /* yay, all mine */
698 return 0; 698 return 0;
699 } 699 }
700 700
701 /** 701 /**
702 * bd_start_claiming - start claiming a block device 702 * bd_start_claiming - start claiming a block device
703 * @bdev: block device of interest 703 * @bdev: block device of interest
704 * @holder: holder trying to claim @bdev 704 * @holder: holder trying to claim @bdev
705 * 705 *
706 * @bdev is about to be opened exclusively. Check @bdev can be opened 706 * @bdev is about to be opened exclusively. Check @bdev can be opened
707 * exclusively and mark that an exclusive open is in progress. Each 707 * exclusively and mark that an exclusive open is in progress. Each
708 * successful call to this function must be matched with a call to 708 * successful call to this function must be matched with a call to
709 * either bd_finish_claiming() or bd_abort_claiming() (which do not 709 * either bd_finish_claiming() or bd_abort_claiming() (which do not
710 * fail). 710 * fail).
711 * 711 *
712 * This function is used to gain exclusive access to the block device 712 * This function is used to gain exclusive access to the block device
713 * without actually causing other exclusive open attempts to fail. It 713 * without actually causing other exclusive open attempts to fail. It
714 * should be used when the open sequence itself requires exclusive 714 * should be used when the open sequence itself requires exclusive
715 * access but may subsequently fail. 715 * access but may subsequently fail.
716 * 716 *
717 * CONTEXT: 717 * CONTEXT:
718 * Might sleep. 718 * Might sleep.
719 * 719 *
720 * RETURNS: 720 * RETURNS:
721 * Pointer to the block device containing @bdev on success, ERR_PTR() 721 * Pointer to the block device containing @bdev on success, ERR_PTR()
722 * value on failure. 722 * value on failure.
723 */ 723 */
724 static struct block_device *bd_start_claiming(struct block_device *bdev, 724 static struct block_device *bd_start_claiming(struct block_device *bdev,
725 void *holder) 725 void *holder)
726 { 726 {
727 struct gendisk *disk; 727 struct gendisk *disk;
728 struct block_device *whole; 728 struct block_device *whole;
729 int partno, err; 729 int partno, err;
730 730
731 might_sleep(); 731 might_sleep();
732 732
733 /* 733 /*
734 * @bdev might not have been initialized properly yet, look up 734 * @bdev might not have been initialized properly yet, look up
735 * and grab the outer block device the hard way. 735 * and grab the outer block device the hard way.
736 */ 736 */
737 disk = get_gendisk(bdev->bd_dev, &partno); 737 disk = get_gendisk(bdev->bd_dev, &partno);
738 if (!disk) 738 if (!disk)
739 return ERR_PTR(-ENXIO); 739 return ERR_PTR(-ENXIO);
740 740
741 whole = bdget_disk(disk, 0); 741 whole = bdget_disk(disk, 0);
742 module_put(disk->fops->owner); 742 module_put(disk->fops->owner);
743 put_disk(disk); 743 put_disk(disk);
744 if (!whole) 744 if (!whole)
745 return ERR_PTR(-ENOMEM); 745 return ERR_PTR(-ENOMEM);
746 746
747 /* prepare to claim, if successful, mark claiming in progress */ 747 /* prepare to claim, if successful, mark claiming in progress */
748 spin_lock(&bdev_lock); 748 spin_lock(&bdev_lock);
749 749
750 err = bd_prepare_to_claim(bdev, whole, holder); 750 err = bd_prepare_to_claim(bdev, whole, holder);
751 if (err == 0) { 751 if (err == 0) {
752 whole->bd_claiming = holder; 752 whole->bd_claiming = holder;
753 spin_unlock(&bdev_lock); 753 spin_unlock(&bdev_lock);
754 return whole; 754 return whole;
755 } else { 755 } else {
756 spin_unlock(&bdev_lock); 756 spin_unlock(&bdev_lock);
757 bdput(whole); 757 bdput(whole);
758 return ERR_PTR(err); 758 return ERR_PTR(err);
759 } 759 }
760 } 760 }
761 761
762 /* releases bdev_lock */ 762 /* releases bdev_lock */
763 static void __bd_abort_claiming(struct block_device *whole, void *holder) 763 static void __bd_abort_claiming(struct block_device *whole, void *holder)
764 { 764 {
765 BUG_ON(whole->bd_claiming != holder); 765 BUG_ON(whole->bd_claiming != holder);
766 whole->bd_claiming = NULL; 766 whole->bd_claiming = NULL;
767 wake_up_bit(&whole->bd_claiming, 0); 767 wake_up_bit(&whole->bd_claiming, 0);
768 768
769 spin_unlock(&bdev_lock); 769 spin_unlock(&bdev_lock);
770 bdput(whole); 770 bdput(whole);
771 } 771 }
772 772
773 /** 773 /**
774 * bd_abort_claiming - abort claiming a block device 774 * bd_abort_claiming - abort claiming a block device
775 * @whole: whole block device returned by bd_start_claiming() 775 * @whole: whole block device returned by bd_start_claiming()
776 * @holder: holder trying to claim @bdev 776 * @holder: holder trying to claim @bdev
777 * 777 *
778 * Abort a claiming block started by bd_start_claiming(). Note that 778 * Abort a claiming block started by bd_start_claiming(). Note that
779 * @whole is not the block device to be claimed but the whole device 779 * @whole is not the block device to be claimed but the whole device
780 * returned by bd_start_claiming(). 780 * returned by bd_start_claiming().
781 * 781 *
782 * CONTEXT: 782 * CONTEXT:
783 * Grabs and releases bdev_lock. 783 * Grabs and releases bdev_lock.
784 */ 784 */
785 static void bd_abort_claiming(struct block_device *whole, void *holder) 785 static void bd_abort_claiming(struct block_device *whole, void *holder)
786 { 786 {
787 spin_lock(&bdev_lock); 787 spin_lock(&bdev_lock);
788 __bd_abort_claiming(whole, holder); /* releases bdev_lock */ 788 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
789 } 789 }
790 790
791 /* increment holders when we have a legitimate claim. requires bdev_lock */ 791 /* increment holders when we have a legitimate claim. requires bdev_lock */
792 static void __bd_claim(struct block_device *bdev, struct block_device *whole, 792 static void __bd_claim(struct block_device *bdev, struct block_device *whole,
793 void *holder) 793 void *holder)
794 { 794 {
795 /* note that for a whole device bd_holders 795 /* note that for a whole device bd_holders
796 * will be incremented twice, and bd_holder will 796 * will be incremented twice, and bd_holder will
797 * be set to bd_claim before being set to holder 797 * be set to bd_claim before being set to holder
798 */ 798 */
799 whole->bd_holders++; 799 whole->bd_holders++;
800 whole->bd_holder = bd_claim; 800 whole->bd_holder = bd_claim;
801 bdev->bd_holders++; 801 bdev->bd_holders++;
802 bdev->bd_holder = holder; 802 bdev->bd_holder = holder;
803 } 803 }
804 804
805 /** 805 /**
806 * bd_finish_claiming - finish claiming a block device 806 * bd_finish_claiming - finish claiming a block device
807 * @bdev: block device of interest (passed to bd_start_claiming()) 807 * @bdev: block device of interest (passed to bd_start_claiming())
808 * @whole: whole block device returned by bd_start_claiming() 808 * @whole: whole block device returned by bd_start_claiming()
809 * @holder: holder trying to claim @bdev 809 * @holder: holder trying to claim @bdev
810 * 810 *
811 * Finish a claiming block started by bd_start_claiming(). 811 * Finish a claiming block started by bd_start_claiming().
812 * 812 *
813 * CONTEXT: 813 * CONTEXT:
814 * Grabs and releases bdev_lock. 814 * Grabs and releases bdev_lock.
815 */ 815 */
816 static void bd_finish_claiming(struct block_device *bdev, 816 static void bd_finish_claiming(struct block_device *bdev,
817 struct block_device *whole, void *holder) 817 struct block_device *whole, void *holder)
818 { 818 {
819 spin_lock(&bdev_lock); 819 spin_lock(&bdev_lock);
820 BUG_ON(!bd_may_claim(bdev, whole, holder)); 820 BUG_ON(!bd_may_claim(bdev, whole, holder));
821 __bd_claim(bdev, whole, holder); 821 __bd_claim(bdev, whole, holder);
822 __bd_abort_claiming(whole, holder); /* not actually an abort */ 822 __bd_abort_claiming(whole, holder); /* not actually an abort */
823 } 823 }
824 824
825 /** 825 /**
826 * bd_claim - claim a block device 826 * bd_claim - claim a block device
827 * @bdev: block device to claim 827 * @bdev: block device to claim
828 * @holder: holder trying to claim @bdev 828 * @holder: holder trying to claim @bdev
829 * 829 *
830 * Try to claim @bdev which must have been opened successfully. 830 * Try to claim @bdev which must have been opened successfully.
831 * 831 *
832 * CONTEXT: 832 * CONTEXT:
833 * Might sleep. 833 * Might sleep.
834 * 834 *
835 * RETURNS: 835 * RETURNS:
836 * 0 if successful, -EBUSY if @bdev is already claimed. 836 * 0 if successful, -EBUSY if @bdev is already claimed.
837 */ 837 */
838 int bd_claim(struct block_device *bdev, void *holder) 838 int bd_claim(struct block_device *bdev, void *holder)
839 { 839 {
840 struct block_device *whole = bdev->bd_contains; 840 struct block_device *whole = bdev->bd_contains;
841 int res; 841 int res;
842 842
843 might_sleep(); 843 might_sleep();
844 844
845 spin_lock(&bdev_lock); 845 spin_lock(&bdev_lock);
846 res = bd_prepare_to_claim(bdev, whole, holder); 846 res = bd_prepare_to_claim(bdev, whole, holder);
847 if (res == 0) 847 if (res == 0)
848 __bd_claim(bdev, whole, holder); 848 __bd_claim(bdev, whole, holder);
849 spin_unlock(&bdev_lock); 849 spin_unlock(&bdev_lock);
850 850
851 return res; 851 return res;
852 } 852 }
853 EXPORT_SYMBOL(bd_claim); 853 EXPORT_SYMBOL(bd_claim);
854 854
855 void bd_release(struct block_device *bdev) 855 void bd_release(struct block_device *bdev)
856 { 856 {
857 spin_lock(&bdev_lock); 857 spin_lock(&bdev_lock);
858 if (!--bdev->bd_contains->bd_holders) 858 if (!--bdev->bd_contains->bd_holders)
859 bdev->bd_contains->bd_holder = NULL; 859 bdev->bd_contains->bd_holder = NULL;
860 if (!--bdev->bd_holders) 860 if (!--bdev->bd_holders)
861 bdev->bd_holder = NULL; 861 bdev->bd_holder = NULL;
862 spin_unlock(&bdev_lock); 862 spin_unlock(&bdev_lock);
863 } 863 }
864 864
865 EXPORT_SYMBOL(bd_release); 865 EXPORT_SYMBOL(bd_release);
866 866
867 #ifdef CONFIG_SYSFS 867 #ifdef CONFIG_SYSFS
868 /* 868 /*
869 * Functions for bd_claim_by_kobject / bd_release_from_kobject 869 * Functions for bd_claim_by_kobject / bd_release_from_kobject
870 * 870 *
871 * If a kobject is passed to bd_claim_by_kobject() 871 * If a kobject is passed to bd_claim_by_kobject()
872 * and the kobject has a parent directory, 872 * and the kobject has a parent directory,
873 * following symlinks are created: 873 * following symlinks are created:
874 * o from the kobject to the claimed bdev 874 * o from the kobject to the claimed bdev
875 * o from "holders" directory of the bdev to the parent of the kobject 875 * o from "holders" directory of the bdev to the parent of the kobject
876 * bd_release_from_kobject() removes these symlinks. 876 * bd_release_from_kobject() removes these symlinks.
877 * 877 *
878 * Example: 878 * Example:
879 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to 879 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
880 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: 880 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
881 * /sys/block/dm-0/slaves/sda --> /sys/block/sda 881 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
882 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 882 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
883 */ 883 */
884 884
885 static int add_symlink(struct kobject *from, struct kobject *to) 885 static int add_symlink(struct kobject *from, struct kobject *to)
886 { 886 {
887 if (!from || !to) 887 if (!from || !to)
888 return 0; 888 return 0;
889 return sysfs_create_link(from, to, kobject_name(to)); 889 return sysfs_create_link(from, to, kobject_name(to));
890 } 890 }
891 891
892 static void del_symlink(struct kobject *from, struct kobject *to) 892 static void del_symlink(struct kobject *from, struct kobject *to)
893 { 893 {
894 if (!from || !to) 894 if (!from || !to)
895 return; 895 return;
896 sysfs_remove_link(from, kobject_name(to)); 896 sysfs_remove_link(from, kobject_name(to));
897 } 897 }
898 898
899 /* 899 /*
900 * 'struct bd_holder' contains pointers to kobjects symlinked by 900 * 'struct bd_holder' contains pointers to kobjects symlinked by
901 * bd_claim_by_kobject. 901 * bd_claim_by_kobject.
902 * It's connected to bd_holder_list which is protected by bdev->bd_sem. 902 * It's connected to bd_holder_list which is protected by bdev->bd_sem.
903 */ 903 */
904 struct bd_holder { 904 struct bd_holder {
905 struct list_head list; /* chain of holders of the bdev */ 905 struct list_head list; /* chain of holders of the bdev */
906 int count; /* references from the holder */ 906 int count; /* references from the holder */
907 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ 907 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
908 struct kobject *hdev; /* e.g. "/block/dm-0" */ 908 struct kobject *hdev; /* e.g. "/block/dm-0" */
909 struct kobject *hdir; /* e.g. "/block/sda/holders" */ 909 struct kobject *hdir; /* e.g. "/block/sda/holders" */
910 struct kobject *sdev; /* e.g. "/block/sda" */ 910 struct kobject *sdev; /* e.g. "/block/sda" */
911 }; 911 };
912 912
913 /* 913 /*
914 * Get references of related kobjects at once. 914 * Get references of related kobjects at once.
915 * Returns 1 on success. 0 on failure. 915 * Returns 1 on success. 0 on failure.
916 * 916 *
917 * Should call bd_holder_release_dirs() after successful use. 917 * Should call bd_holder_release_dirs() after successful use.
918 */ 918 */
919 static int bd_holder_grab_dirs(struct block_device *bdev, 919 static int bd_holder_grab_dirs(struct block_device *bdev,
920 struct bd_holder *bo) 920 struct bd_holder *bo)
921 { 921 {
922 if (!bdev || !bo) 922 if (!bdev || !bo)
923 return 0; 923 return 0;
924 924
925 bo->sdir = kobject_get(bo->sdir); 925 bo->sdir = kobject_get(bo->sdir);
926 if (!bo->sdir) 926 if (!bo->sdir)
927 return 0; 927 return 0;
928 928
929 bo->hdev = kobject_get(bo->sdir->parent); 929 bo->hdev = kobject_get(bo->sdir->parent);
930 if (!bo->hdev) 930 if (!bo->hdev)
931 goto fail_put_sdir; 931 goto fail_put_sdir;
932 932
933 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); 933 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
934 if (!bo->sdev) 934 if (!bo->sdev)
935 goto fail_put_hdev; 935 goto fail_put_hdev;
936 936
937 bo->hdir = kobject_get(bdev->bd_part->holder_dir); 937 bo->hdir = kobject_get(bdev->bd_part->holder_dir);
938 if (!bo->hdir) 938 if (!bo->hdir)
939 goto fail_put_sdev; 939 goto fail_put_sdev;
940 940
941 return 1; 941 return 1;
942 942
943 fail_put_sdev: 943 fail_put_sdev:
944 kobject_put(bo->sdev); 944 kobject_put(bo->sdev);
945 fail_put_hdev: 945 fail_put_hdev:
946 kobject_put(bo->hdev); 946 kobject_put(bo->hdev);
947 fail_put_sdir: 947 fail_put_sdir:
948 kobject_put(bo->sdir); 948 kobject_put(bo->sdir);
949 949
950 return 0; 950 return 0;
951 } 951 }
952 952
953 /* Put references of related kobjects at once. */ 953 /* Put references of related kobjects at once. */
954 static void bd_holder_release_dirs(struct bd_holder *bo) 954 static void bd_holder_release_dirs(struct bd_holder *bo)
955 { 955 {
956 kobject_put(bo->hdir); 956 kobject_put(bo->hdir);
957 kobject_put(bo->sdev); 957 kobject_put(bo->sdev);
958 kobject_put(bo->hdev); 958 kobject_put(bo->hdev);
959 kobject_put(bo->sdir); 959 kobject_put(bo->sdir);
960 } 960 }
961 961
962 static struct bd_holder *alloc_bd_holder(struct kobject *kobj) 962 static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
963 { 963 {
964 struct bd_holder *bo; 964 struct bd_holder *bo;
965 965
966 bo = kzalloc(sizeof(*bo), GFP_KERNEL); 966 bo = kzalloc(sizeof(*bo), GFP_KERNEL);
967 if (!bo) 967 if (!bo)
968 return NULL; 968 return NULL;
969 969
970 bo->count = 1; 970 bo->count = 1;
971 bo->sdir = kobj; 971 bo->sdir = kobj;
972 972
973 return bo; 973 return bo;
974 } 974 }
975 975
976 static void free_bd_holder(struct bd_holder *bo) 976 static void free_bd_holder(struct bd_holder *bo)
977 { 977 {
978 kfree(bo); 978 kfree(bo);
979 } 979 }
980 980
981 /** 981 /**
982 * find_bd_holder - find matching struct bd_holder from the block device 982 * find_bd_holder - find matching struct bd_holder from the block device
983 * 983 *
984 * @bdev: struct block device to be searched 984 * @bdev: struct block device to be searched
985 * @bo: target struct bd_holder 985 * @bo: target struct bd_holder
986 * 986 *
987 * Returns matching entry with @bo in @bdev->bd_holder_list. 987 * Returns matching entry with @bo in @bdev->bd_holder_list.
988 * If found, increment the reference count and return the pointer. 988 * If found, increment the reference count and return the pointer.
989 * If not found, returns NULL. 989 * If not found, returns NULL.
990 */ 990 */
991 static struct bd_holder *find_bd_holder(struct block_device *bdev, 991 static struct bd_holder *find_bd_holder(struct block_device *bdev,
992 struct bd_holder *bo) 992 struct bd_holder *bo)
993 { 993 {
994 struct bd_holder *tmp; 994 struct bd_holder *tmp;
995 995
996 list_for_each_entry(tmp, &bdev->bd_holder_list, list) 996 list_for_each_entry(tmp, &bdev->bd_holder_list, list)
997 if (tmp->sdir == bo->sdir) { 997 if (tmp->sdir == bo->sdir) {
998 tmp->count++; 998 tmp->count++;
999 return tmp; 999 return tmp;
1000 } 1000 }
1001 1001
1002 return NULL; 1002 return NULL;
1003 } 1003 }
1004 1004
1005 /** 1005 /**
1006 * add_bd_holder - create sysfs symlinks for bd_claim() relationship 1006 * add_bd_holder - create sysfs symlinks for bd_claim() relationship
1007 * 1007 *
1008 * @bdev: block device to be bd_claimed 1008 * @bdev: block device to be bd_claimed
1009 * @bo: preallocated and initialized by alloc_bd_holder() 1009 * @bo: preallocated and initialized by alloc_bd_holder()
1010 * 1010 *
1011 * Add @bo to @bdev->bd_holder_list, create symlinks. 1011 * Add @bo to @bdev->bd_holder_list, create symlinks.
1012 * 1012 *
1013 * Returns 0 if symlinks are created. 1013 * Returns 0 if symlinks are created.
1014 * Returns -ve if something fails. 1014 * Returns -ve if something fails.
1015 */ 1015 */
1016 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) 1016 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
1017 { 1017 {
1018 int err; 1018 int err;
1019 1019
1020 if (!bo) 1020 if (!bo)
1021 return -EINVAL; 1021 return -EINVAL;
1022 1022
1023 if (!bd_holder_grab_dirs(bdev, bo)) 1023 if (!bd_holder_grab_dirs(bdev, bo))
1024 return -EBUSY; 1024 return -EBUSY;
1025 1025
1026 err = add_symlink(bo->sdir, bo->sdev); 1026 err = add_symlink(bo->sdir, bo->sdev);
1027 if (err) 1027 if (err)
1028 return err; 1028 return err;
1029 1029
1030 err = add_symlink(bo->hdir, bo->hdev); 1030 err = add_symlink(bo->hdir, bo->hdev);
1031 if (err) { 1031 if (err) {
1032 del_symlink(bo->sdir, bo->sdev); 1032 del_symlink(bo->sdir, bo->sdev);
1033 return err; 1033 return err;
1034 } 1034 }
1035 1035
1036 list_add_tail(&bo->list, &bdev->bd_holder_list); 1036 list_add_tail(&bo->list, &bdev->bd_holder_list);
1037 return 0; 1037 return 0;
1038 } 1038 }
1039 1039
1040 /** 1040 /**
1041 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship 1041 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
1042 * 1042 *
1043 * @bdev: block device to be bd_claimed 1043 * @bdev: block device to be bd_claimed
1044 * @kobj: holder's kobject 1044 * @kobj: holder's kobject
1045 * 1045 *
1046 * If there is matching entry with @kobj in @bdev->bd_holder_list 1046 * If there is matching entry with @kobj in @bdev->bd_holder_list
1047 * and no other bd_claim() from the same kobject, 1047 * and no other bd_claim() from the same kobject,
1048 * remove the struct bd_holder from the list, delete symlinks for it. 1048 * remove the struct bd_holder from the list, delete symlinks for it.
1049 * 1049 *
1050 * Returns a pointer to the struct bd_holder when it's removed from the list 1050 * Returns a pointer to the struct bd_holder when it's removed from the list
1051 * and ready to be freed. 1051 * and ready to be freed.
1052 * Returns NULL if matching claim isn't found or there is other bd_claim() 1052 * Returns NULL if matching claim isn't found or there is other bd_claim()
1053 * by the same kobject. 1053 * by the same kobject.
1054 */ 1054 */
1055 static struct bd_holder *del_bd_holder(struct block_device *bdev, 1055 static struct bd_holder *del_bd_holder(struct block_device *bdev,
1056 struct kobject *kobj) 1056 struct kobject *kobj)
1057 { 1057 {
1058 struct bd_holder *bo; 1058 struct bd_holder *bo;
1059 1059
1060 list_for_each_entry(bo, &bdev->bd_holder_list, list) { 1060 list_for_each_entry(bo, &bdev->bd_holder_list, list) {
1061 if (bo->sdir == kobj) { 1061 if (bo->sdir == kobj) {
1062 bo->count--; 1062 bo->count--;
1063 BUG_ON(bo->count < 0); 1063 BUG_ON(bo->count < 0);
1064 if (!bo->count) { 1064 if (!bo->count) {
1065 list_del(&bo->list); 1065 list_del(&bo->list);
1066 del_symlink(bo->sdir, bo->sdev); 1066 del_symlink(bo->sdir, bo->sdev);
1067 del_symlink(bo->hdir, bo->hdev); 1067 del_symlink(bo->hdir, bo->hdev);
1068 bd_holder_release_dirs(bo); 1068 bd_holder_release_dirs(bo);
1069 return bo; 1069 return bo;
1070 } 1070 }
1071 break; 1071 break;
1072 } 1072 }
1073 } 1073 }
1074 1074
1075 return NULL; 1075 return NULL;
1076 } 1076 }
1077 1077
1078 /** 1078 /**
1079 * bd_claim_by_kobject - bd_claim() with additional kobject signature 1079 * bd_claim_by_kobject - bd_claim() with additional kobject signature
1080 * 1080 *
1081 * @bdev: block device to be claimed 1081 * @bdev: block device to be claimed
1082 * @holder: holder's signature 1082 * @holder: holder's signature
1083 * @kobj: holder's kobject 1083 * @kobj: holder's kobject
1084 * 1084 *
1085 * Do bd_claim() and if it succeeds, create sysfs symlinks between 1085 * Do bd_claim() and if it succeeds, create sysfs symlinks between
1086 * the bdev and the holder's kobject. 1086 * the bdev and the holder's kobject.
1087 * Use bd_release_from_kobject() when relesing the claimed bdev. 1087 * Use bd_release_from_kobject() when relesing the claimed bdev.
1088 * 1088 *
1089 * Returns 0 on success. (same as bd_claim()) 1089 * Returns 0 on success. (same as bd_claim())
1090 * Returns errno on failure. 1090 * Returns errno on failure.
1091 */ 1091 */
1092 static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 1092 static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
1093 struct kobject *kobj) 1093 struct kobject *kobj)
1094 { 1094 {
1095 int err; 1095 int err;
1096 struct bd_holder *bo, *found; 1096 struct bd_holder *bo, *found;
1097 1097
1098 if (!kobj) 1098 if (!kobj)
1099 return -EINVAL; 1099 return -EINVAL;
1100 1100
1101 bo = alloc_bd_holder(kobj); 1101 bo = alloc_bd_holder(kobj);
1102 if (!bo) 1102 if (!bo)
1103 return -ENOMEM; 1103 return -ENOMEM;
1104 1104
1105 mutex_lock(&bdev->bd_mutex); 1105 mutex_lock(&bdev->bd_mutex);
1106 1106
1107 err = bd_claim(bdev, holder); 1107 err = bd_claim(bdev, holder);
1108 if (err) 1108 if (err)
1109 goto fail; 1109 goto fail;
1110 1110
1111 found = find_bd_holder(bdev, bo); 1111 found = find_bd_holder(bdev, bo);
1112 if (found) 1112 if (found)
1113 goto fail; 1113 goto fail;
1114 1114
1115 err = add_bd_holder(bdev, bo); 1115 err = add_bd_holder(bdev, bo);
1116 if (err) 1116 if (err)
1117 bd_release(bdev); 1117 bd_release(bdev);
1118 else 1118 else
1119 bo = NULL; 1119 bo = NULL;
1120 fail: 1120 fail:
1121 mutex_unlock(&bdev->bd_mutex); 1121 mutex_unlock(&bdev->bd_mutex);
1122 free_bd_holder(bo); 1122 free_bd_holder(bo);
1123 return err; 1123 return err;
1124 } 1124 }
1125 1125
1126 /** 1126 /**
1127 * bd_release_from_kobject - bd_release() with additional kobject signature 1127 * bd_release_from_kobject - bd_release() with additional kobject signature
1128 * 1128 *
1129 * @bdev: block device to be released 1129 * @bdev: block device to be released
1130 * @kobj: holder's kobject 1130 * @kobj: holder's kobject
1131 * 1131 *
1132 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). 1132 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
1133 */ 1133 */
1134 static void bd_release_from_kobject(struct block_device *bdev, 1134 static void bd_release_from_kobject(struct block_device *bdev,
1135 struct kobject *kobj) 1135 struct kobject *kobj)
1136 { 1136 {
1137 if (!kobj) 1137 if (!kobj)
1138 return; 1138 return;
1139 1139
1140 mutex_lock(&bdev->bd_mutex); 1140 mutex_lock(&bdev->bd_mutex);
1141 bd_release(bdev); 1141 bd_release(bdev);
1142 free_bd_holder(del_bd_holder(bdev, kobj)); 1142 free_bd_holder(del_bd_holder(bdev, kobj));
1143 mutex_unlock(&bdev->bd_mutex); 1143 mutex_unlock(&bdev->bd_mutex);
1144 } 1144 }
1145 1145
1146 /** 1146 /**
1147 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() 1147 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
1148 * 1148 *
1149 * @bdev: block device to be claimed 1149 * @bdev: block device to be claimed
1150 * @holder: holder's signature 1150 * @holder: holder's signature
1151 * @disk: holder's gendisk 1151 * @disk: holder's gendisk
1152 * 1152 *
1153 * Call bd_claim_by_kobject() with getting @disk->slave_dir. 1153 * Call bd_claim_by_kobject() with getting @disk->slave_dir.
1154 */ 1154 */
1155 int bd_claim_by_disk(struct block_device *bdev, void *holder, 1155 int bd_claim_by_disk(struct block_device *bdev, void *holder,
1156 struct gendisk *disk) 1156 struct gendisk *disk)
1157 { 1157 {
1158 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); 1158 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
1159 } 1159 }
1160 EXPORT_SYMBOL_GPL(bd_claim_by_disk); 1160 EXPORT_SYMBOL_GPL(bd_claim_by_disk);
1161 1161
1162 /** 1162 /**
1163 * bd_release_from_disk - wrapper function for bd_release_from_kobject() 1163 * bd_release_from_disk - wrapper function for bd_release_from_kobject()
1164 * 1164 *
1165 * @bdev: block device to be claimed 1165 * @bdev: block device to be claimed
1166 * @disk: holder's gendisk 1166 * @disk: holder's gendisk
1167 * 1167 *
1168 * Call bd_release_from_kobject() and put @disk->slave_dir. 1168 * Call bd_release_from_kobject() and put @disk->slave_dir.
1169 */ 1169 */
1170 void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) 1170 void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
1171 { 1171 {
1172 bd_release_from_kobject(bdev, disk->slave_dir); 1172 bd_release_from_kobject(bdev, disk->slave_dir);
1173 kobject_put(disk->slave_dir); 1173 kobject_put(disk->slave_dir);
1174 } 1174 }
1175 EXPORT_SYMBOL_GPL(bd_release_from_disk); 1175 EXPORT_SYMBOL_GPL(bd_release_from_disk);
1176 #endif 1176 #endif
1177 1177
1178 /* 1178 /*
1179 * Tries to open block device by device number. Use it ONLY if you 1179 * Tries to open block device by device number. Use it ONLY if you
1180 * really do not have anything better - i.e. when you are behind a 1180 * really do not have anything better - i.e. when you are behind a
1181 * truly sucky interface and all you are given is a device number. _Never_ 1181 * truly sucky interface and all you are given is a device number. _Never_
1182 * to be used for internal purposes. If you ever need it - reconsider 1182 * to be used for internal purposes. If you ever need it - reconsider
1183 * your API. 1183 * your API.
1184 */ 1184 */
1185 struct block_device *open_by_devnum(dev_t dev, fmode_t mode) 1185 struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
1186 { 1186 {
1187 struct block_device *bdev = bdget(dev); 1187 struct block_device *bdev = bdget(dev);
1188 int err = -ENOMEM; 1188 int err = -ENOMEM;
1189 if (bdev) 1189 if (bdev)
1190 err = blkdev_get(bdev, mode); 1190 err = blkdev_get(bdev, mode);
1191 return err ? ERR_PTR(err) : bdev; 1191 return err ? ERR_PTR(err) : bdev;
1192 } 1192 }
1193 1193
1194 EXPORT_SYMBOL(open_by_devnum); 1194 EXPORT_SYMBOL(open_by_devnum);
1195 1195
1196 /** 1196 /**
1197 * flush_disk - invalidates all buffer-cache entries on a disk 1197 * flush_disk - invalidates all buffer-cache entries on a disk
1198 * 1198 *
1199 * @bdev: struct block device to be flushed 1199 * @bdev: struct block device to be flushed
1200 * 1200 *
1201 * Invalidates all buffer-cache entries on a disk. It should be called 1201 * Invalidates all buffer-cache entries on a disk. It should be called
1202 * when a disk has been changed -- either by a media change or online 1202 * when a disk has been changed -- either by a media change or online
1203 * resize. 1203 * resize.
1204 */ 1204 */
1205 static void flush_disk(struct block_device *bdev) 1205 static void flush_disk(struct block_device *bdev)
1206 { 1206 {
1207 if (__invalidate_device(bdev)) { 1207 if (__invalidate_device(bdev)) {
1208 char name[BDEVNAME_SIZE] = ""; 1208 char name[BDEVNAME_SIZE] = "";
1209 1209
1210 if (bdev->bd_disk) 1210 if (bdev->bd_disk)
1211 disk_name(bdev->bd_disk, 0, name); 1211 disk_name(bdev->bd_disk, 0, name);
1212 printk(KERN_WARNING "VFS: busy inodes on changed media or " 1212 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1213 "resized disk %s\n", name); 1213 "resized disk %s\n", name);
1214 } 1214 }
1215 1215
1216 if (!bdev->bd_disk) 1216 if (!bdev->bd_disk)
1217 return; 1217 return;
1218 if (disk_partitionable(bdev->bd_disk)) 1218 if (disk_partitionable(bdev->bd_disk))
1219 bdev->bd_invalidated = 1; 1219 bdev->bd_invalidated = 1;
1220 } 1220 }
1221 1221
1222 /** 1222 /**
1223 * check_disk_size_change - checks for disk size change and adjusts bdev size. 1223 * check_disk_size_change - checks for disk size change and adjusts bdev size.
1224 * @disk: struct gendisk to check 1224 * @disk: struct gendisk to check
1225 * @bdev: struct bdev to adjust. 1225 * @bdev: struct bdev to adjust.
1226 * 1226 *
1227 * This routine checks to see if the bdev size does not match the disk size 1227 * This routine checks to see if the bdev size does not match the disk size
1228 * and adjusts it if it differs. 1228 * and adjusts it if it differs.
1229 */ 1229 */
1230 void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) 1230 void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1231 { 1231 {
1232 loff_t disk_size, bdev_size; 1232 loff_t disk_size, bdev_size;
1233 1233
1234 disk_size = (loff_t)get_capacity(disk) << 9; 1234 disk_size = (loff_t)get_capacity(disk) << 9;
1235 bdev_size = i_size_read(bdev->bd_inode); 1235 bdev_size = i_size_read(bdev->bd_inode);
1236 if (disk_size != bdev_size) { 1236 if (disk_size != bdev_size) {
1237 char name[BDEVNAME_SIZE]; 1237 char name[BDEVNAME_SIZE];
1238 1238
1239 disk_name(disk, 0, name); 1239 disk_name(disk, 0, name);
1240 printk(KERN_INFO 1240 printk(KERN_INFO
1241 "%s: detected capacity change from %lld to %lld\n", 1241 "%s: detected capacity change from %lld to %lld\n",
1242 name, bdev_size, disk_size); 1242 name, bdev_size, disk_size);
1243 i_size_write(bdev->bd_inode, disk_size); 1243 i_size_write(bdev->bd_inode, disk_size);
1244 flush_disk(bdev); 1244 flush_disk(bdev);
1245 } 1245 }
1246 } 1246 }
1247 EXPORT_SYMBOL(check_disk_size_change); 1247 EXPORT_SYMBOL(check_disk_size_change);
1248 1248
1249 /** 1249 /**
1250 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back 1250 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
1251 * @disk: struct gendisk to be revalidated 1251 * @disk: struct gendisk to be revalidated
1252 * 1252 *
1253 * This routine is a wrapper for lower-level driver's revalidate_disk 1253 * This routine is a wrapper for lower-level driver's revalidate_disk
1254 * call-backs. It is used to do common pre and post operations needed 1254 * call-backs. It is used to do common pre and post operations needed
1255 * for all revalidate_disk operations. 1255 * for all revalidate_disk operations.
1256 */ 1256 */
1257 int revalidate_disk(struct gendisk *disk) 1257 int revalidate_disk(struct gendisk *disk)
1258 { 1258 {
1259 struct block_device *bdev; 1259 struct block_device *bdev;
1260 int ret = 0; 1260 int ret = 0;
1261 1261
1262 if (disk->fops->revalidate_disk) 1262 if (disk->fops->revalidate_disk)
1263 ret = disk->fops->revalidate_disk(disk); 1263 ret = disk->fops->revalidate_disk(disk);
1264 1264
1265 bdev = bdget_disk(disk, 0); 1265 bdev = bdget_disk(disk, 0);
1266 if (!bdev) 1266 if (!bdev)
1267 return ret; 1267 return ret;
1268 1268
1269 mutex_lock(&bdev->bd_mutex); 1269 mutex_lock(&bdev->bd_mutex);
1270 check_disk_size_change(disk, bdev); 1270 check_disk_size_change(disk, bdev);
1271 mutex_unlock(&bdev->bd_mutex); 1271 mutex_unlock(&bdev->bd_mutex);
1272 bdput(bdev); 1272 bdput(bdev);
1273 return ret; 1273 return ret;
1274 } 1274 }
1275 EXPORT_SYMBOL(revalidate_disk); 1275 EXPORT_SYMBOL(revalidate_disk);
1276 1276
1277 /* 1277 /*
1278 * This routine checks whether a removable media has been changed, 1278 * This routine checks whether a removable media has been changed,
1279 * and invalidates all buffer-cache-entries in that case. This 1279 * and invalidates all buffer-cache-entries in that case. This
1280 * is a relatively slow routine, so we have to try to minimize using 1280 * is a relatively slow routine, so we have to try to minimize using
1281 * it. Thus it is called only upon a 'mount' or 'open'. This 1281 * it. Thus it is called only upon a 'mount' or 'open'. This
1282 * is the best way of combining speed and utility, I think. 1282 * is the best way of combining speed and utility, I think.
1283 * People changing diskettes in the middle of an operation deserve 1283 * People changing diskettes in the middle of an operation deserve
1284 * to lose :-) 1284 * to lose :-)
1285 */ 1285 */
1286 int check_disk_change(struct block_device *bdev) 1286 int check_disk_change(struct block_device *bdev)
1287 { 1287 {
1288 struct gendisk *disk = bdev->bd_disk; 1288 struct gendisk *disk = bdev->bd_disk;
1289 const struct block_device_operations *bdops = disk->fops; 1289 const struct block_device_operations *bdops = disk->fops;
1290 1290
1291 if (!bdops->media_changed) 1291 if (!bdops->media_changed)
1292 return 0; 1292 return 0;
1293 if (!bdops->media_changed(bdev->bd_disk)) 1293 if (!bdops->media_changed(bdev->bd_disk))
1294 return 0; 1294 return 0;
1295 1295
1296 flush_disk(bdev); 1296 flush_disk(bdev);
1297 if (bdops->revalidate_disk) 1297 if (bdops->revalidate_disk)
1298 bdops->revalidate_disk(bdev->bd_disk); 1298 bdops->revalidate_disk(bdev->bd_disk);
1299 return 1; 1299 return 1;
1300 } 1300 }
1301 1301
1302 EXPORT_SYMBOL(check_disk_change); 1302 EXPORT_SYMBOL(check_disk_change);
1303 1303
1304 void bd_set_size(struct block_device *bdev, loff_t size) 1304 void bd_set_size(struct block_device *bdev, loff_t size)
1305 { 1305 {
1306 unsigned bsize = bdev_logical_block_size(bdev); 1306 unsigned bsize = bdev_logical_block_size(bdev);
1307 1307
1308 bdev->bd_inode->i_size = size; 1308 bdev->bd_inode->i_size = size;
1309 while (bsize < PAGE_CACHE_SIZE) { 1309 while (bsize < PAGE_CACHE_SIZE) {
1310 if (size & bsize) 1310 if (size & bsize)
1311 break; 1311 break;
1312 bsize <<= 1; 1312 bsize <<= 1;
1313 } 1313 }
1314 bdev->bd_block_size = bsize; 1314 bdev->bd_block_size = bsize;
1315 bdev->bd_inode->i_blkbits = blksize_bits(bsize); 1315 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1316 } 1316 }
1317 EXPORT_SYMBOL(bd_set_size); 1317 EXPORT_SYMBOL(bd_set_size);
1318 1318
1319 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1319 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1320 1320
1321 /* 1321 /*
1322 * bd_mutex locking: 1322 * bd_mutex locking:
1323 * 1323 *
1324 * mutex_lock(part->bd_mutex) 1324 * mutex_lock(part->bd_mutex)
1325 * mutex_lock_nested(whole->bd_mutex, 1) 1325 * mutex_lock_nested(whole->bd_mutex, 1)
1326 */ 1326 */
1327 1327
1328 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 1328 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1329 { 1329 {
1330 struct gendisk *disk; 1330 struct gendisk *disk;
1331 int ret; 1331 int ret;
1332 int partno; 1332 int partno;
1333 int perm = 0; 1333 int perm = 0;
1334 1334
1335 if (mode & FMODE_READ) 1335 if (mode & FMODE_READ)
1336 perm |= MAY_READ; 1336 perm |= MAY_READ;
1337 if (mode & FMODE_WRITE) 1337 if (mode & FMODE_WRITE)
1338 perm |= MAY_WRITE; 1338 perm |= MAY_WRITE;
1339 /* 1339 /*
1340 * hooks: /n/, see "layering violations". 1340 * hooks: /n/, see "layering violations".
1341 */ 1341 */
1342 ret = devcgroup_inode_permission(bdev->bd_inode, perm); 1342 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1343 if (ret != 0) { 1343 if (ret != 0) {
1344 bdput(bdev); 1344 bdput(bdev);
1345 return ret; 1345 return ret;
1346 } 1346 }
1347 1347
1348 lock_kernel(); 1348 lock_kernel();
1349 restart: 1349 restart:
1350 1350
1351 ret = -ENXIO; 1351 ret = -ENXIO;
1352 disk = get_gendisk(bdev->bd_dev, &partno); 1352 disk = get_gendisk(bdev->bd_dev, &partno);
1353 if (!disk) 1353 if (!disk)
1354 goto out_unlock_kernel; 1354 goto out_unlock_kernel;
1355 1355
1356 mutex_lock_nested(&bdev->bd_mutex, for_part); 1356 mutex_lock_nested(&bdev->bd_mutex, for_part);
1357 if (!bdev->bd_openers) { 1357 if (!bdev->bd_openers) {
1358 bdev->bd_disk = disk; 1358 bdev->bd_disk = disk;
1359 bdev->bd_contains = bdev; 1359 bdev->bd_contains = bdev;
1360 if (!partno) { 1360 if (!partno) {
1361 struct backing_dev_info *bdi; 1361 struct backing_dev_info *bdi;
1362 1362
1363 ret = -ENXIO; 1363 ret = -ENXIO;
1364 bdev->bd_part = disk_get_part(disk, partno); 1364 bdev->bd_part = disk_get_part(disk, partno);
1365 if (!bdev->bd_part) 1365 if (!bdev->bd_part)
1366 goto out_clear; 1366 goto out_clear;
1367 1367
1368 if (disk->fops->open) { 1368 if (disk->fops->open) {
1369 ret = disk->fops->open(bdev, mode); 1369 ret = disk->fops->open(bdev, mode);
1370 if (ret == -ERESTARTSYS) { 1370 if (ret == -ERESTARTSYS) {
1371 /* Lost a race with 'disk' being 1371 /* Lost a race with 'disk' being
1372 * deleted, try again. 1372 * deleted, try again.
1373 * See md.c 1373 * See md.c
1374 */ 1374 */
1375 disk_put_part(bdev->bd_part); 1375 disk_put_part(bdev->bd_part);
1376 bdev->bd_part = NULL; 1376 bdev->bd_part = NULL;
1377 module_put(disk->fops->owner); 1377 module_put(disk->fops->owner);
1378 put_disk(disk); 1378 put_disk(disk);
1379 bdev->bd_disk = NULL; 1379 bdev->bd_disk = NULL;
1380 mutex_unlock(&bdev->bd_mutex); 1380 mutex_unlock(&bdev->bd_mutex);
1381 goto restart; 1381 goto restart;
1382 } 1382 }
1383 if (ret) 1383 if (ret)
1384 goto out_clear; 1384 goto out_clear;
1385 } 1385 }
1386 if (!bdev->bd_openers) { 1386 if (!bdev->bd_openers) {
1387 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1387 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1388 bdi = blk_get_backing_dev_info(bdev); 1388 bdi = blk_get_backing_dev_info(bdev);
1389 if (bdi == NULL) 1389 if (bdi == NULL)
1390 bdi = &default_backing_dev_info; 1390 bdi = &default_backing_dev_info;
1391 bdev->bd_inode->i_data.backing_dev_info = bdi; 1391 bdev->bd_inode->i_data.backing_dev_info = bdi;
1392 } 1392 }
1393 if (bdev->bd_invalidated) 1393 if (bdev->bd_invalidated)
1394 rescan_partitions(disk, bdev); 1394 rescan_partitions(disk, bdev);
1395 } else { 1395 } else {
1396 struct block_device *whole; 1396 struct block_device *whole;
1397 whole = bdget_disk(disk, 0); 1397 whole = bdget_disk(disk, 0);
1398 ret = -ENOMEM; 1398 ret = -ENOMEM;
1399 if (!whole) 1399 if (!whole)
1400 goto out_clear; 1400 goto out_clear;
1401 BUG_ON(for_part); 1401 BUG_ON(for_part);
1402 ret = __blkdev_get(whole, mode, 1); 1402 ret = __blkdev_get(whole, mode, 1);
1403 if (ret) 1403 if (ret)
1404 goto out_clear; 1404 goto out_clear;
1405 bdev->bd_contains = whole; 1405 bdev->bd_contains = whole;
1406 bdev->bd_inode->i_data.backing_dev_info = 1406 bdev->bd_inode->i_data.backing_dev_info =
1407 whole->bd_inode->i_data.backing_dev_info; 1407 whole->bd_inode->i_data.backing_dev_info;
1408 bdev->bd_part = disk_get_part(disk, partno); 1408 bdev->bd_part = disk_get_part(disk, partno);
1409 if (!(disk->flags & GENHD_FL_UP) || 1409 if (!(disk->flags & GENHD_FL_UP) ||
1410 !bdev->bd_part || !bdev->bd_part->nr_sects) { 1410 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1411 ret = -ENXIO; 1411 ret = -ENXIO;
1412 goto out_clear; 1412 goto out_clear;
1413 } 1413 }
1414 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1414 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1415 } 1415 }
1416 } else { 1416 } else {
1417 module_put(disk->fops->owner); 1417 module_put(disk->fops->owner);
1418 put_disk(disk); 1418 put_disk(disk);
1419 disk = NULL; 1419 disk = NULL;
1420 if (bdev->bd_contains == bdev) { 1420 if (bdev->bd_contains == bdev) {
1421 if (bdev->bd_disk->fops->open) { 1421 if (bdev->bd_disk->fops->open) {
1422 ret = bdev->bd_disk->fops->open(bdev, mode); 1422 ret = bdev->bd_disk->fops->open(bdev, mode);
1423 if (ret) 1423 if (ret)
1424 goto out_unlock_bdev; 1424 goto out_unlock_bdev;
1425 } 1425 }
1426 if (bdev->bd_invalidated) 1426 if (bdev->bd_invalidated)
1427 rescan_partitions(bdev->bd_disk, bdev); 1427 rescan_partitions(bdev->bd_disk, bdev);
1428 } 1428 }
1429 } 1429 }
1430 bdev->bd_openers++; 1430 bdev->bd_openers++;
1431 if (for_part) 1431 if (for_part)
1432 bdev->bd_part_count++; 1432 bdev->bd_part_count++;
1433 mutex_unlock(&bdev->bd_mutex); 1433 mutex_unlock(&bdev->bd_mutex);
1434 unlock_kernel(); 1434 unlock_kernel();
1435 return 0; 1435 return 0;
1436 1436
1437 out_clear: 1437 out_clear:
1438 disk_put_part(bdev->bd_part); 1438 disk_put_part(bdev->bd_part);
1439 bdev->bd_disk = NULL; 1439 bdev->bd_disk = NULL;
1440 bdev->bd_part = NULL; 1440 bdev->bd_part = NULL;
1441 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1441 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1442 if (bdev != bdev->bd_contains) 1442 if (bdev != bdev->bd_contains)
1443 __blkdev_put(bdev->bd_contains, mode, 1); 1443 __blkdev_put(bdev->bd_contains, mode, 1);
1444 bdev->bd_contains = NULL; 1444 bdev->bd_contains = NULL;
1445 out_unlock_bdev: 1445 out_unlock_bdev:
1446 mutex_unlock(&bdev->bd_mutex); 1446 mutex_unlock(&bdev->bd_mutex);
1447 out_unlock_kernel: 1447 out_unlock_kernel:
1448 unlock_kernel(); 1448 unlock_kernel();
1449 1449
1450 if (disk) 1450 if (disk)
1451 module_put(disk->fops->owner); 1451 module_put(disk->fops->owner);
1452 put_disk(disk); 1452 put_disk(disk);
1453 bdput(bdev); 1453 bdput(bdev);
1454 1454
1455 return ret; 1455 return ret;
1456 } 1456 }
1457 1457
1458 int blkdev_get(struct block_device *bdev, fmode_t mode) 1458 int blkdev_get(struct block_device *bdev, fmode_t mode)
1459 { 1459 {
1460 return __blkdev_get(bdev, mode, 0); 1460 return __blkdev_get(bdev, mode, 0);
1461 } 1461 }
1462 EXPORT_SYMBOL(blkdev_get); 1462 EXPORT_SYMBOL(blkdev_get);
1463 1463
1464 static int blkdev_open(struct inode * inode, struct file * filp) 1464 static int blkdev_open(struct inode * inode, struct file * filp)
1465 { 1465 {
1466 struct block_device *whole = NULL; 1466 struct block_device *whole = NULL;
1467 struct block_device *bdev; 1467 struct block_device *bdev;
1468 int res; 1468 int res;
1469 1469
1470 /* 1470 /*
1471 * Preserve backwards compatibility and allow large file access 1471 * Preserve backwards compatibility and allow large file access
1472 * even if userspace doesn't ask for it explicitly. Some mkfs 1472 * even if userspace doesn't ask for it explicitly. Some mkfs
1473 * binary needs it. We might want to drop this workaround 1473 * binary needs it. We might want to drop this workaround
1474 * during an unstable branch. 1474 * during an unstable branch.
1475 */ 1475 */
1476 filp->f_flags |= O_LARGEFILE; 1476 filp->f_flags |= O_LARGEFILE;
1477 1477
1478 if (filp->f_flags & O_NDELAY) 1478 if (filp->f_flags & O_NDELAY)
1479 filp->f_mode |= FMODE_NDELAY; 1479 filp->f_mode |= FMODE_NDELAY;
1480 if (filp->f_flags & O_EXCL) 1480 if (filp->f_flags & O_EXCL)
1481 filp->f_mode |= FMODE_EXCL; 1481 filp->f_mode |= FMODE_EXCL;
1482 if ((filp->f_flags & O_ACCMODE) == 3) 1482 if ((filp->f_flags & O_ACCMODE) == 3)
1483 filp->f_mode |= FMODE_WRITE_IOCTL; 1483 filp->f_mode |= FMODE_WRITE_IOCTL;
1484 1484
1485 bdev = bd_acquire(inode); 1485 bdev = bd_acquire(inode);
1486 if (bdev == NULL) 1486 if (bdev == NULL)
1487 return -ENOMEM; 1487 return -ENOMEM;
1488 1488
1489 if (filp->f_mode & FMODE_EXCL) { 1489 if (filp->f_mode & FMODE_EXCL) {
1490 whole = bd_start_claiming(bdev, filp); 1490 whole = bd_start_claiming(bdev, filp);
1491 if (IS_ERR(whole)) { 1491 if (IS_ERR(whole)) {
1492 bdput(bdev); 1492 bdput(bdev);
1493 return PTR_ERR(whole); 1493 return PTR_ERR(whole);
1494 } 1494 }
1495 } 1495 }
1496 1496
1497 filp->f_mapping = bdev->bd_inode->i_mapping; 1497 filp->f_mapping = bdev->bd_inode->i_mapping;
1498 1498
1499 res = blkdev_get(bdev, filp->f_mode); 1499 res = blkdev_get(bdev, filp->f_mode);
1500 1500
1501 if (whole) { 1501 if (whole) {
1502 if (res == 0) 1502 if (res == 0)
1503 bd_finish_claiming(bdev, whole, filp); 1503 bd_finish_claiming(bdev, whole, filp);
1504 else 1504 else
1505 bd_abort_claiming(whole, filp); 1505 bd_abort_claiming(whole, filp);
1506 } 1506 }
1507 1507
1508 return res; 1508 return res;
1509 } 1509 }
1510 1510
1511 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1511 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1512 { 1512 {
1513 int ret = 0; 1513 int ret = 0;
1514 struct gendisk *disk = bdev->bd_disk; 1514 struct gendisk *disk = bdev->bd_disk;
1515 struct block_device *victim = NULL; 1515 struct block_device *victim = NULL;
1516 1516
1517 mutex_lock_nested(&bdev->bd_mutex, for_part); 1517 mutex_lock_nested(&bdev->bd_mutex, for_part);
1518 lock_kernel(); 1518 lock_kernel();
1519 if (for_part) 1519 if (for_part)
1520 bdev->bd_part_count--; 1520 bdev->bd_part_count--;
1521 1521
1522 if (!--bdev->bd_openers) { 1522 if (!--bdev->bd_openers) {
1523 sync_blockdev(bdev); 1523 sync_blockdev(bdev);
1524 kill_bdev(bdev); 1524 kill_bdev(bdev);
1525 } 1525 }
1526 if (bdev->bd_contains == bdev) { 1526 if (bdev->bd_contains == bdev) {
1527 if (disk->fops->release) 1527 if (disk->fops->release)
1528 ret = disk->fops->release(disk, mode); 1528 ret = disk->fops->release(disk, mode);
1529 } 1529 }
1530 if (!bdev->bd_openers) { 1530 if (!bdev->bd_openers) {
1531 struct module *owner = disk->fops->owner; 1531 struct module *owner = disk->fops->owner;
1532 1532
1533 put_disk(disk); 1533 put_disk(disk);
1534 module_put(owner); 1534 module_put(owner);
1535 disk_put_part(bdev->bd_part); 1535 disk_put_part(bdev->bd_part);
1536 bdev->bd_part = NULL; 1536 bdev->bd_part = NULL;
1537 bdev->bd_disk = NULL; 1537 bdev->bd_disk = NULL;
1538 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1538 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1539 if (bdev != bdev->bd_contains) 1539 if (bdev != bdev->bd_contains)
1540 victim = bdev->bd_contains; 1540 victim = bdev->bd_contains;
1541 bdev->bd_contains = NULL; 1541 bdev->bd_contains = NULL;
1542 } 1542 }
1543 unlock_kernel(); 1543 unlock_kernel();
1544 mutex_unlock(&bdev->bd_mutex); 1544 mutex_unlock(&bdev->bd_mutex);
1545 bdput(bdev); 1545 bdput(bdev);
1546 if (victim) 1546 if (victim)
1547 __blkdev_put(victim, mode, 1); 1547 __blkdev_put(victim, mode, 1);
1548 return ret; 1548 return ret;
1549 } 1549 }
1550 1550
1551 int blkdev_put(struct block_device *bdev, fmode_t mode) 1551 int blkdev_put(struct block_device *bdev, fmode_t mode)
1552 { 1552 {
1553 return __blkdev_put(bdev, mode, 0); 1553 return __blkdev_put(bdev, mode, 0);
1554 } 1554 }
1555 EXPORT_SYMBOL(blkdev_put); 1555 EXPORT_SYMBOL(blkdev_put);
1556 1556
1557 static int blkdev_close(struct inode * inode, struct file * filp) 1557 static int blkdev_close(struct inode * inode, struct file * filp)
1558 { 1558 {
1559 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1559 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1560 if (bdev->bd_holder == filp) 1560 if (bdev->bd_holder == filp)
1561 bd_release(bdev); 1561 bd_release(bdev);
1562 return blkdev_put(bdev, filp->f_mode); 1562 return blkdev_put(bdev, filp->f_mode);
1563 } 1563 }
1564 1564
1565 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1565 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1566 { 1566 {
1567 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1567 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1568 fmode_t mode = file->f_mode; 1568 fmode_t mode = file->f_mode;
1569 1569
1570 /* 1570 /*
1571 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have 1571 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
1572 * to updated it before every ioctl. 1572 * to updated it before every ioctl.
1573 */ 1573 */
1574 if (file->f_flags & O_NDELAY) 1574 if (file->f_flags & O_NDELAY)
1575 mode |= FMODE_NDELAY; 1575 mode |= FMODE_NDELAY;
1576 else 1576 else
1577 mode &= ~FMODE_NDELAY; 1577 mode &= ~FMODE_NDELAY;
1578 1578
1579 return blkdev_ioctl(bdev, mode, cmd, arg); 1579 return blkdev_ioctl(bdev, mode, cmd, arg);
1580 } 1580 }
1581 1581
1582 /* 1582 /*
1583 * Write data to the block device. Only intended for the block device itself 1583 * Write data to the block device. Only intended for the block device itself
1584 * and the raw driver which basically is a fake block device. 1584 * and the raw driver which basically is a fake block device.
1585 * 1585 *
1586 * Does not take i_mutex for the write and thus is not for general purpose 1586 * Does not take i_mutex for the write and thus is not for general purpose
1587 * use. 1587 * use.
1588 */ 1588 */
1589 ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 1589 ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1590 unsigned long nr_segs, loff_t pos) 1590 unsigned long nr_segs, loff_t pos)
1591 { 1591 {
1592 struct file *file = iocb->ki_filp; 1592 struct file *file = iocb->ki_filp;
1593 ssize_t ret; 1593 ssize_t ret;
1594 1594
1595 BUG_ON(iocb->ki_pos != pos); 1595 BUG_ON(iocb->ki_pos != pos);
1596 1596
1597 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1597 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1598 if (ret > 0 || ret == -EIOCBQUEUED) { 1598 if (ret > 0 || ret == -EIOCBQUEUED) {
1599 ssize_t err; 1599 ssize_t err;
1600 1600
1601 err = generic_write_sync(file, pos, ret); 1601 err = generic_write_sync(file, pos, ret);
1602 if (err < 0 && ret > 0) 1602 if (err < 0 && ret > 0)
1603 ret = err; 1603 ret = err;
1604 } 1604 }
1605 return ret; 1605 return ret;
1606 } 1606 }
1607 EXPORT_SYMBOL_GPL(blkdev_aio_write); 1607 EXPORT_SYMBOL_GPL(blkdev_aio_write);
1608 1608
1609 /* 1609 /*
1610 * Try to release a page associated with block device when the system 1610 * Try to release a page associated with block device when the system
1611 * is under memory pressure. 1611 * is under memory pressure.
1612 */ 1612 */
1613 static int blkdev_releasepage(struct page *page, gfp_t wait) 1613 static int blkdev_releasepage(struct page *page, gfp_t wait)
1614 { 1614 {
1615 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; 1615 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1616 1616
1617 if (super && super->s_op->bdev_try_to_free_page) 1617 if (super && super->s_op->bdev_try_to_free_page)
1618 return super->s_op->bdev_try_to_free_page(super, page, wait); 1618 return super->s_op->bdev_try_to_free_page(super, page, wait);
1619 1619
1620 return try_to_free_buffers(page); 1620 return try_to_free_buffers(page);
1621 } 1621 }
1622 1622
1623 static const struct address_space_operations def_blk_aops = { 1623 static const struct address_space_operations def_blk_aops = {
1624 .readpage = blkdev_readpage, 1624 .readpage = blkdev_readpage,
1625 .writepage = blkdev_writepage, 1625 .writepage = blkdev_writepage,
1626 .sync_page = block_sync_page, 1626 .sync_page = block_sync_page,
1627 .write_begin = blkdev_write_begin, 1627 .write_begin = blkdev_write_begin,
1628 .write_end = blkdev_write_end, 1628 .write_end = blkdev_write_end,
1629 .writepages = generic_writepages, 1629 .writepages = generic_writepages,
1630 .releasepage = blkdev_releasepage, 1630 .releasepage = blkdev_releasepage,
1631 .direct_IO = blkdev_direct_IO, 1631 .direct_IO = blkdev_direct_IO,
1632 }; 1632 };
1633 1633
1634 const struct file_operations def_blk_fops = { 1634 const struct file_operations def_blk_fops = {
1635 .open = blkdev_open, 1635 .open = blkdev_open,
1636 .release = blkdev_close, 1636 .release = blkdev_close,
1637 .llseek = block_llseek, 1637 .llseek = block_llseek,
1638 .read = do_sync_read, 1638 .read = do_sync_read,
1639 .write = do_sync_write, 1639 .write = do_sync_write,
1640 .aio_read = generic_file_aio_read, 1640 .aio_read = generic_file_aio_read,
1641 .aio_write = blkdev_aio_write, 1641 .aio_write = blkdev_aio_write,
1642 .mmap = generic_file_mmap, 1642 .mmap = generic_file_mmap,
1643 .fsync = blkdev_fsync, 1643 .fsync = blkdev_fsync,
1644 .unlocked_ioctl = block_ioctl, 1644 .unlocked_ioctl = block_ioctl,
1645 #ifdef CONFIG_COMPAT 1645 #ifdef CONFIG_COMPAT
1646 .compat_ioctl = compat_blkdev_ioctl, 1646 .compat_ioctl = compat_blkdev_ioctl,
1647 #endif 1647 #endif
1648 .splice_read = generic_file_splice_read, 1648 .splice_read = generic_file_splice_read,
1649 .splice_write = generic_file_splice_write, 1649 .splice_write = generic_file_splice_write,
1650 }; 1650 };
1651 1651
1652 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1652 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1653 { 1653 {
1654 int res; 1654 int res;
1655 mm_segment_t old_fs = get_fs(); 1655 mm_segment_t old_fs = get_fs();
1656 set_fs(KERNEL_DS); 1656 set_fs(KERNEL_DS);
1657 res = blkdev_ioctl(bdev, 0, cmd, arg); 1657 res = blkdev_ioctl(bdev, 0, cmd, arg);
1658 set_fs(old_fs); 1658 set_fs(old_fs);
1659 return res; 1659 return res;
1660 } 1660 }
1661 1661
1662 EXPORT_SYMBOL(ioctl_by_bdev); 1662 EXPORT_SYMBOL(ioctl_by_bdev);
1663 1663
1664 /** 1664 /**
1665 * lookup_bdev - lookup a struct block_device by name 1665 * lookup_bdev - lookup a struct block_device by name
1666 * @pathname: special file representing the block device 1666 * @pathname: special file representing the block device
1667 * 1667 *
1668 * Get a reference to the blockdevice at @pathname in the current 1668 * Get a reference to the blockdevice at @pathname in the current
1669 * namespace if possible and return it. Return ERR_PTR(error) 1669 * namespace if possible and return it. Return ERR_PTR(error)
1670 * otherwise. 1670 * otherwise.
1671 */ 1671 */
1672 struct block_device *lookup_bdev(const char *pathname) 1672 struct block_device *lookup_bdev(const char *pathname)
1673 { 1673 {
1674 struct block_device *bdev; 1674 struct block_device *bdev;
1675 struct inode *inode; 1675 struct inode *inode;
1676 struct path path; 1676 struct path path;
1677 int error; 1677 int error;
1678 1678
1679 if (!pathname || !*pathname) 1679 if (!pathname || !*pathname)
1680 return ERR_PTR(-EINVAL); 1680 return ERR_PTR(-EINVAL);
1681 1681
1682 error = kern_path(pathname, LOOKUP_FOLLOW, &path); 1682 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1683 if (error) 1683 if (error)
1684 return ERR_PTR(error); 1684 return ERR_PTR(error);
1685 1685
1686 inode = path.dentry->d_inode; 1686 inode = path.dentry->d_inode;
1687 error = -ENOTBLK; 1687 error = -ENOTBLK;
1688 if (!S_ISBLK(inode->i_mode)) 1688 if (!S_ISBLK(inode->i_mode))
1689 goto fail; 1689 goto fail;
1690 error = -EACCES; 1690 error = -EACCES;
1691 if (path.mnt->mnt_flags & MNT_NODEV) 1691 if (path.mnt->mnt_flags & MNT_NODEV)
1692 goto fail; 1692 goto fail;
1693 error = -ENOMEM; 1693 error = -ENOMEM;
1694 bdev = bd_acquire(inode); 1694 bdev = bd_acquire(inode);
1695 if (!bdev) 1695 if (!bdev)
1696 goto fail; 1696 goto fail;
1697 out: 1697 out:
1698 path_put(&path); 1698 path_put(&path);
1699 return bdev; 1699 return bdev;
1700 fail: 1700 fail:
1701 bdev = ERR_PTR(error); 1701 bdev = ERR_PTR(error);
1702 goto out; 1702 goto out;
1703 } 1703 }
1704 EXPORT_SYMBOL(lookup_bdev); 1704 EXPORT_SYMBOL(lookup_bdev);
1705 1705
1706 /** 1706 /**
1707 * open_bdev_exclusive - open a block device by name and set it up for use 1707 * open_bdev_exclusive - open a block device by name and set it up for use
1708 * 1708 *
1709 * @path: special file representing the block device 1709 * @path: special file representing the block device
1710 * @mode: FMODE_... combination to pass be used 1710 * @mode: FMODE_... combination to pass be used
1711 * @holder: owner for exclusion 1711 * @holder: owner for exclusion
1712 * 1712 *
1713 * Open the blockdevice described by the special file at @path, claim it 1713 * Open the blockdevice described by the special file at @path, claim it
1714 * for the @holder. 1714 * for the @holder.
1715 */ 1715 */
1716 struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1716 struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1717 { 1717 {
1718 struct block_device *bdev, *whole; 1718 struct block_device *bdev, *whole;
1719 int error; 1719 int error;
1720 1720
1721 bdev = lookup_bdev(path); 1721 bdev = lookup_bdev(path);
1722 if (IS_ERR(bdev)) 1722 if (IS_ERR(bdev))
1723 return bdev; 1723 return bdev;
1724 1724
1725 whole = bd_start_claiming(bdev, holder); 1725 whole = bd_start_claiming(bdev, holder);
1726 if (IS_ERR(whole)) { 1726 if (IS_ERR(whole)) {
1727 bdput(bdev); 1727 bdput(bdev);
1728 return whole; 1728 return whole;
1729 } 1729 }
1730 1730
1731 error = blkdev_get(bdev, mode); 1731 error = blkdev_get(bdev, mode);
1732 if (error) 1732 if (error)
1733 goto out_abort_claiming; 1733 goto out_abort_claiming;
1734 1734
1735 error = -EACCES; 1735 error = -EACCES;
1736 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1736 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1737 goto out_blkdev_put; 1737 goto out_blkdev_put;
1738 1738
1739 bd_finish_claiming(bdev, whole, holder); 1739 bd_finish_claiming(bdev, whole, holder);
1740 return bdev; 1740 return bdev;
1741 1741
1742 out_blkdev_put: 1742 out_blkdev_put:
1743 blkdev_put(bdev, mode); 1743 blkdev_put(bdev, mode);
1744 out_abort_claiming: 1744 out_abort_claiming:
1745 bd_abort_claiming(whole, holder); 1745 bd_abort_claiming(whole, holder);
1746 return ERR_PTR(error); 1746 return ERR_PTR(error);
1747 } 1747 }
1748 1748
1749 EXPORT_SYMBOL(open_bdev_exclusive); 1749 EXPORT_SYMBOL(open_bdev_exclusive);
1750 1750
1751 /** 1751 /**
1752 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() 1752 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1753 * 1753 *
1754 * @bdev: blockdevice to close 1754 * @bdev: blockdevice to close
1755 * @mode: mode, must match that used to open. 1755 * @mode: mode, must match that used to open.
1756 * 1756 *
1757 * This is the counterpart to open_bdev_exclusive(). 1757 * This is the counterpart to open_bdev_exclusive().
1758 */ 1758 */
1759 void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) 1759 void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1760 { 1760 {
1761 bd_release(bdev); 1761 bd_release(bdev);
1762 blkdev_put(bdev, mode); 1762 blkdev_put(bdev, mode);
1763 } 1763 }
1764 1764
1765 EXPORT_SYMBOL(close_bdev_exclusive); 1765 EXPORT_SYMBOL(close_bdev_exclusive);
1766 1766
1767 int __invalidate_device(struct block_device *bdev) 1767 int __invalidate_device(struct block_device *bdev)
1768 { 1768 {
1769 struct super_block *sb = get_super(bdev); 1769 struct super_block *sb = get_super(bdev);
1770 int res = 0; 1770 int res = 0;
1771 1771
1772 if (sb) { 1772 if (sb) {
1773 /* 1773 /*
1774 * no need to lock the super, get_super holds the 1774 * no need to lock the super, get_super holds the
1775 * read mutex so the filesystem cannot go away 1775 * read mutex so the filesystem cannot go away
1776 * under us (->put_super runs with the write lock 1776 * under us (->put_super runs with the write lock
1777 * hold). 1777 * hold).
1778 */ 1778 */
1779 shrink_dcache_sb(sb); 1779 shrink_dcache_sb(sb);
1780 res = invalidate_inodes(sb); 1780 res = invalidate_inodes(sb);
1781 drop_super(sb); 1781 drop_super(sb);
1782 } 1782 }
1783 invalidate_bdev(bdev); 1783 invalidate_bdev(bdev);
1784 return res; 1784 return res;
1785 } 1785 }
1786 EXPORT_SYMBOL(__invalidate_device); 1786 EXPORT_SYMBOL(__invalidate_device);
1787 1787