Commit 5cee5815d1564bbbd505fea86f4550f1efdb5cd0

Authored by Jan Kara
Committed by Al Viro
1 parent 429479f031

vfs: Make sys_sync() use fsync_super() (version 4)

It is unnecessarily fragile to have two places (fsync_super() and do_sync())
doing data integrity sync of the filesystem. Alter __fsync_super() to
accommodate needs of both callers and use it. So after this patch
__fsync_super() is the only place where we gather all the calls needed to
properly send all data on a filesystem to disk.

Nice bonus is that we get a complete livelock avoidance and write_supers()
is now only used for periodic writeback of superblocks.

sync_blockdevs() introduced a couple of patches ago is gone now.

[build fixes folded]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 7 changed files with 51 additions and 135 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/block_dev.c 2 * linux/fs/block_dev.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
6 */ 6 */
7 7
8 #include <linux/init.h> 8 #include <linux/init.h>
9 #include <linux/mm.h> 9 #include <linux/mm.h>
10 #include <linux/fcntl.h> 10 #include <linux/fcntl.h>
11 #include <linux/slab.h> 11 #include <linux/slab.h>
12 #include <linux/kmod.h> 12 #include <linux/kmod.h>
13 #include <linux/major.h> 13 #include <linux/major.h>
14 #include <linux/smp_lock.h> 14 #include <linux/smp_lock.h>
15 #include <linux/device_cgroup.h> 15 #include <linux/device_cgroup.h>
16 #include <linux/highmem.h> 16 #include <linux/highmem.h>
17 #include <linux/blkdev.h> 17 #include <linux/blkdev.h>
18 #include <linux/module.h> 18 #include <linux/module.h>
19 #include <linux/blkpg.h> 19 #include <linux/blkpg.h>
20 #include <linux/buffer_head.h> 20 #include <linux/buffer_head.h>
21 #include <linux/pagevec.h> 21 #include <linux/pagevec.h>
22 #include <linux/writeback.h> 22 #include <linux/writeback.h>
23 #include <linux/mpage.h> 23 #include <linux/mpage.h>
24 #include <linux/mount.h> 24 #include <linux/mount.h>
25 #include <linux/uio.h> 25 #include <linux/uio.h>
26 #include <linux/namei.h> 26 #include <linux/namei.h>
27 #include <linux/log2.h> 27 #include <linux/log2.h>
28 #include <linux/kmemleak.h> 28 #include <linux/kmemleak.h>
29 #include <asm/uaccess.h> 29 #include <asm/uaccess.h>
30 #include "internal.h" 30 #include "internal.h"
31 31
32 struct bdev_inode { 32 struct bdev_inode {
33 struct block_device bdev; 33 struct block_device bdev;
34 struct inode vfs_inode; 34 struct inode vfs_inode;
35 }; 35 };
36 36
37 static const struct address_space_operations def_blk_aops; 37 static const struct address_space_operations def_blk_aops;
38 38
39 static inline struct bdev_inode *BDEV_I(struct inode *inode) 39 static inline struct bdev_inode *BDEV_I(struct inode *inode)
40 { 40 {
41 return container_of(inode, struct bdev_inode, vfs_inode); 41 return container_of(inode, struct bdev_inode, vfs_inode);
42 } 42 }
43 43
44 inline struct block_device *I_BDEV(struct inode *inode) 44 inline struct block_device *I_BDEV(struct inode *inode)
45 { 45 {
46 return &BDEV_I(inode)->bdev; 46 return &BDEV_I(inode)->bdev;
47 } 47 }
48 48
49 EXPORT_SYMBOL(I_BDEV); 49 EXPORT_SYMBOL(I_BDEV);
50 50
51 static sector_t max_block(struct block_device *bdev) 51 static sector_t max_block(struct block_device *bdev)
52 { 52 {
53 sector_t retval = ~((sector_t)0); 53 sector_t retval = ~((sector_t)0);
54 loff_t sz = i_size_read(bdev->bd_inode); 54 loff_t sz = i_size_read(bdev->bd_inode);
55 55
56 if (sz) { 56 if (sz) {
57 unsigned int size = block_size(bdev); 57 unsigned int size = block_size(bdev);
58 unsigned int sizebits = blksize_bits(size); 58 unsigned int sizebits = blksize_bits(size);
59 retval = (sz >> sizebits); 59 retval = (sz >> sizebits);
60 } 60 }
61 return retval; 61 return retval;
62 } 62 }
63 63
64 /* Kill _all_ buffers and pagecache , dirty or not.. */ 64 /* Kill _all_ buffers and pagecache , dirty or not.. */
65 static void kill_bdev(struct block_device *bdev) 65 static void kill_bdev(struct block_device *bdev)
66 { 66 {
67 if (bdev->bd_inode->i_mapping->nrpages == 0) 67 if (bdev->bd_inode->i_mapping->nrpages == 0)
68 return; 68 return;
69 invalidate_bh_lrus(); 69 invalidate_bh_lrus();
70 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 70 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
71 } 71 }
72 72
73 int set_blocksize(struct block_device *bdev, int size) 73 int set_blocksize(struct block_device *bdev, int size)
74 { 74 {
75 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 75 /* Size must be a power of two, and between 512 and PAGE_SIZE */
76 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 76 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
77 return -EINVAL; 77 return -EINVAL;
78 78
79 /* Size cannot be smaller than the size supported by the device */ 79 /* Size cannot be smaller than the size supported by the device */
80 if (size < bdev_logical_block_size(bdev)) 80 if (size < bdev_logical_block_size(bdev))
81 return -EINVAL; 81 return -EINVAL;
82 82
83 /* Don't change the size if it is same as current */ 83 /* Don't change the size if it is same as current */
84 if (bdev->bd_block_size != size) { 84 if (bdev->bd_block_size != size) {
85 sync_blockdev(bdev); 85 sync_blockdev(bdev);
86 bdev->bd_block_size = size; 86 bdev->bd_block_size = size;
87 bdev->bd_inode->i_blkbits = blksize_bits(size); 87 bdev->bd_inode->i_blkbits = blksize_bits(size);
88 kill_bdev(bdev); 88 kill_bdev(bdev);
89 } 89 }
90 return 0; 90 return 0;
91 } 91 }
92 92
93 EXPORT_SYMBOL(set_blocksize); 93 EXPORT_SYMBOL(set_blocksize);
94 94
95 int sb_set_blocksize(struct super_block *sb, int size) 95 int sb_set_blocksize(struct super_block *sb, int size)
96 { 96 {
97 if (set_blocksize(sb->s_bdev, size)) 97 if (set_blocksize(sb->s_bdev, size))
98 return 0; 98 return 0;
99 /* If we get here, we know size is power of two 99 /* If we get here, we know size is power of two
100 * and it's value is between 512 and PAGE_SIZE */ 100 * and it's value is between 512 and PAGE_SIZE */
101 sb->s_blocksize = size; 101 sb->s_blocksize = size;
102 sb->s_blocksize_bits = blksize_bits(size); 102 sb->s_blocksize_bits = blksize_bits(size);
103 return sb->s_blocksize; 103 return sb->s_blocksize;
104 } 104 }
105 105
106 EXPORT_SYMBOL(sb_set_blocksize); 106 EXPORT_SYMBOL(sb_set_blocksize);
107 107
108 int sb_min_blocksize(struct super_block *sb, int size) 108 int sb_min_blocksize(struct super_block *sb, int size)
109 { 109 {
110 int minsize = bdev_logical_block_size(sb->s_bdev); 110 int minsize = bdev_logical_block_size(sb->s_bdev);
111 if (size < minsize) 111 if (size < minsize)
112 size = minsize; 112 size = minsize;
113 return sb_set_blocksize(sb, size); 113 return sb_set_blocksize(sb, size);
114 } 114 }
115 115
116 EXPORT_SYMBOL(sb_min_blocksize); 116 EXPORT_SYMBOL(sb_min_blocksize);
117 117
118 static int 118 static int
119 blkdev_get_block(struct inode *inode, sector_t iblock, 119 blkdev_get_block(struct inode *inode, sector_t iblock,
120 struct buffer_head *bh, int create) 120 struct buffer_head *bh, int create)
121 { 121 {
122 if (iblock >= max_block(I_BDEV(inode))) { 122 if (iblock >= max_block(I_BDEV(inode))) {
123 if (create) 123 if (create)
124 return -EIO; 124 return -EIO;
125 125
126 /* 126 /*
127 * for reads, we're just trying to fill a partial page. 127 * for reads, we're just trying to fill a partial page.
128 * return a hole, they will have to call get_block again 128 * return a hole, they will have to call get_block again
129 * before they can fill it, and they will get -EIO at that 129 * before they can fill it, and they will get -EIO at that
130 * time 130 * time
131 */ 131 */
132 return 0; 132 return 0;
133 } 133 }
134 bh->b_bdev = I_BDEV(inode); 134 bh->b_bdev = I_BDEV(inode);
135 bh->b_blocknr = iblock; 135 bh->b_blocknr = iblock;
136 set_buffer_mapped(bh); 136 set_buffer_mapped(bh);
137 return 0; 137 return 0;
138 } 138 }
139 139
140 static int 140 static int
141 blkdev_get_blocks(struct inode *inode, sector_t iblock, 141 blkdev_get_blocks(struct inode *inode, sector_t iblock,
142 struct buffer_head *bh, int create) 142 struct buffer_head *bh, int create)
143 { 143 {
144 sector_t end_block = max_block(I_BDEV(inode)); 144 sector_t end_block = max_block(I_BDEV(inode));
145 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 145 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
146 146
147 if ((iblock + max_blocks) > end_block) { 147 if ((iblock + max_blocks) > end_block) {
148 max_blocks = end_block - iblock; 148 max_blocks = end_block - iblock;
149 if ((long)max_blocks <= 0) { 149 if ((long)max_blocks <= 0) {
150 if (create) 150 if (create)
151 return -EIO; /* write fully beyond EOF */ 151 return -EIO; /* write fully beyond EOF */
152 /* 152 /*
153 * It is a read which is fully beyond EOF. We return 153 * It is a read which is fully beyond EOF. We return
154 * a !buffer_mapped buffer 154 * a !buffer_mapped buffer
155 */ 155 */
156 max_blocks = 0; 156 max_blocks = 0;
157 } 157 }
158 } 158 }
159 159
160 bh->b_bdev = I_BDEV(inode); 160 bh->b_bdev = I_BDEV(inode);
161 bh->b_blocknr = iblock; 161 bh->b_blocknr = iblock;
162 bh->b_size = max_blocks << inode->i_blkbits; 162 bh->b_size = max_blocks << inode->i_blkbits;
163 if (max_blocks) 163 if (max_blocks)
164 set_buffer_mapped(bh); 164 set_buffer_mapped(bh);
165 return 0; 165 return 0;
166 } 166 }
167 167
168 static ssize_t 168 static ssize_t
169 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 169 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
170 loff_t offset, unsigned long nr_segs) 170 loff_t offset, unsigned long nr_segs)
171 { 171 {
172 struct file *file = iocb->ki_filp; 172 struct file *file = iocb->ki_filp;
173 struct inode *inode = file->f_mapping->host; 173 struct inode *inode = file->f_mapping->host;
174 174
175 return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), 175 return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
176 iov, offset, nr_segs, blkdev_get_blocks, NULL); 176 iov, offset, nr_segs, blkdev_get_blocks, NULL);
177 } 177 }
178 178
179 int __sync_blockdev(struct block_device *bdev, int wait)
180 {
181 if (!bdev)
182 return 0;
183 if (!wait)
184 return filemap_flush(bdev->bd_inode->i_mapping);
185 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
186 }
187
179 /* 188 /*
180 * Write out and wait upon all the dirty data associated with a block 189 * Write out and wait upon all the dirty data associated with a block
181 * device via its mapping. Does not take the superblock lock. 190 * device via its mapping. Does not take the superblock lock.
182 */ 191 */
183 int sync_blockdev(struct block_device *bdev) 192 int sync_blockdev(struct block_device *bdev)
184 { 193 {
185 int ret = 0; 194 return __sync_blockdev(bdev, 1);
186
187 if (bdev)
188 ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
189 return ret;
190 } 195 }
191 EXPORT_SYMBOL(sync_blockdev); 196 EXPORT_SYMBOL(sync_blockdev);
192 197
193 /* 198 /*
194 * Write out and wait upon all dirty data associated with this 199 * Write out and wait upon all dirty data associated with this
195 * device. Filesystem data as well as the underlying block 200 * device. Filesystem data as well as the underlying block
196 * device. Takes the superblock lock. 201 * device. Takes the superblock lock.
197 */ 202 */
198 int fsync_bdev(struct block_device *bdev) 203 int fsync_bdev(struct block_device *bdev)
199 { 204 {
200 struct super_block *sb = get_super(bdev); 205 struct super_block *sb = get_super(bdev);
201 if (sb) { 206 if (sb) {
202 int res = fsync_super(sb); 207 int res = fsync_super(sb);
203 drop_super(sb); 208 drop_super(sb);
204 return res; 209 return res;
205 } 210 }
206 return sync_blockdev(bdev); 211 return sync_blockdev(bdev);
207 } 212 }
208 EXPORT_SYMBOL(fsync_bdev); 213 EXPORT_SYMBOL(fsync_bdev);
209 214
210 /** 215 /**
211 * freeze_bdev -- lock a filesystem and force it into a consistent state 216 * freeze_bdev -- lock a filesystem and force it into a consistent state
212 * @bdev: blockdevice to lock 217 * @bdev: blockdevice to lock
213 * 218 *
214 * This takes the block device bd_mount_sem to make sure no new mounts 219 * This takes the block device bd_mount_sem to make sure no new mounts
215 * happen on bdev until thaw_bdev() is called. 220 * happen on bdev until thaw_bdev() is called.
216 * If a superblock is found on this device, we take the s_umount semaphore 221 * If a superblock is found on this device, we take the s_umount semaphore
217 * on it to make sure nobody unmounts until the snapshot creation is done. 222 * on it to make sure nobody unmounts until the snapshot creation is done.
218 * The reference counter (bd_fsfreeze_count) guarantees that only the last 223 * The reference counter (bd_fsfreeze_count) guarantees that only the last
219 * unfreeze process can unfreeze the frozen filesystem actually when multiple 224 * unfreeze process can unfreeze the frozen filesystem actually when multiple
220 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and 225 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
221 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze 226 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
222 * actually. 227 * actually.
223 */ 228 */
224 struct super_block *freeze_bdev(struct block_device *bdev) 229 struct super_block *freeze_bdev(struct block_device *bdev)
225 { 230 {
226 struct super_block *sb; 231 struct super_block *sb;
227 int error = 0; 232 int error = 0;
228 233
229 mutex_lock(&bdev->bd_fsfreeze_mutex); 234 mutex_lock(&bdev->bd_fsfreeze_mutex);
230 if (bdev->bd_fsfreeze_count > 0) { 235 if (bdev->bd_fsfreeze_count > 0) {
231 bdev->bd_fsfreeze_count++; 236 bdev->bd_fsfreeze_count++;
232 sb = get_super(bdev); 237 sb = get_super(bdev);
233 mutex_unlock(&bdev->bd_fsfreeze_mutex); 238 mutex_unlock(&bdev->bd_fsfreeze_mutex);
234 return sb; 239 return sb;
235 } 240 }
236 bdev->bd_fsfreeze_count++; 241 bdev->bd_fsfreeze_count++;
237 242
238 down(&bdev->bd_mount_sem); 243 down(&bdev->bd_mount_sem);
239 sb = get_super(bdev); 244 sb = get_super(bdev);
240 if (sb && !(sb->s_flags & MS_RDONLY)) { 245 if (sb && !(sb->s_flags & MS_RDONLY)) {
241 sb->s_frozen = SB_FREEZE_WRITE; 246 sb->s_frozen = SB_FREEZE_WRITE;
242 smp_wmb(); 247 smp_wmb();
243 248
244 fsync_super(sb); 249 fsync_super(sb);
245 250
246 sb->s_frozen = SB_FREEZE_TRANS; 251 sb->s_frozen = SB_FREEZE_TRANS;
247 smp_wmb(); 252 smp_wmb();
248 253
249 sync_blockdev(sb->s_bdev); 254 sync_blockdev(sb->s_bdev);
250 255
251 if (sb->s_op->freeze_fs) { 256 if (sb->s_op->freeze_fs) {
252 error = sb->s_op->freeze_fs(sb); 257 error = sb->s_op->freeze_fs(sb);
253 if (error) { 258 if (error) {
254 printk(KERN_ERR 259 printk(KERN_ERR
255 "VFS:Filesystem freeze failed\n"); 260 "VFS:Filesystem freeze failed\n");
256 sb->s_frozen = SB_UNFROZEN; 261 sb->s_frozen = SB_UNFROZEN;
257 drop_super(sb); 262 drop_super(sb);
258 up(&bdev->bd_mount_sem); 263 up(&bdev->bd_mount_sem);
259 bdev->bd_fsfreeze_count--; 264 bdev->bd_fsfreeze_count--;
260 mutex_unlock(&bdev->bd_fsfreeze_mutex); 265 mutex_unlock(&bdev->bd_fsfreeze_mutex);
261 return ERR_PTR(error); 266 return ERR_PTR(error);
262 } 267 }
263 } 268 }
264 } 269 }
265 270
266 sync_blockdev(bdev); 271 sync_blockdev(bdev);
267 mutex_unlock(&bdev->bd_fsfreeze_mutex); 272 mutex_unlock(&bdev->bd_fsfreeze_mutex);
268 273
269 return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ 274 return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
270 } 275 }
271 EXPORT_SYMBOL(freeze_bdev); 276 EXPORT_SYMBOL(freeze_bdev);
272 277
273 /** 278 /**
274 * thaw_bdev -- unlock filesystem 279 * thaw_bdev -- unlock filesystem
275 * @bdev: blockdevice to unlock 280 * @bdev: blockdevice to unlock
276 * @sb: associated superblock 281 * @sb: associated superblock
277 * 282 *
278 * Unlocks the filesystem and marks it writeable again after freeze_bdev(). 283 * Unlocks the filesystem and marks it writeable again after freeze_bdev().
279 */ 284 */
280 int thaw_bdev(struct block_device *bdev, struct super_block *sb) 285 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
281 { 286 {
282 int error = 0; 287 int error = 0;
283 288
284 mutex_lock(&bdev->bd_fsfreeze_mutex); 289 mutex_lock(&bdev->bd_fsfreeze_mutex);
285 if (!bdev->bd_fsfreeze_count) { 290 if (!bdev->bd_fsfreeze_count) {
286 mutex_unlock(&bdev->bd_fsfreeze_mutex); 291 mutex_unlock(&bdev->bd_fsfreeze_mutex);
287 return -EINVAL; 292 return -EINVAL;
288 } 293 }
289 294
290 bdev->bd_fsfreeze_count--; 295 bdev->bd_fsfreeze_count--;
291 if (bdev->bd_fsfreeze_count > 0) { 296 if (bdev->bd_fsfreeze_count > 0) {
292 if (sb) 297 if (sb)
293 drop_super(sb); 298 drop_super(sb);
294 mutex_unlock(&bdev->bd_fsfreeze_mutex); 299 mutex_unlock(&bdev->bd_fsfreeze_mutex);
295 return 0; 300 return 0;
296 } 301 }
297 302
298 if (sb) { 303 if (sb) {
299 BUG_ON(sb->s_bdev != bdev); 304 BUG_ON(sb->s_bdev != bdev);
300 if (!(sb->s_flags & MS_RDONLY)) { 305 if (!(sb->s_flags & MS_RDONLY)) {
301 if (sb->s_op->unfreeze_fs) { 306 if (sb->s_op->unfreeze_fs) {
302 error = sb->s_op->unfreeze_fs(sb); 307 error = sb->s_op->unfreeze_fs(sb);
303 if (error) { 308 if (error) {
304 printk(KERN_ERR 309 printk(KERN_ERR
305 "VFS:Filesystem thaw failed\n"); 310 "VFS:Filesystem thaw failed\n");
306 sb->s_frozen = SB_FREEZE_TRANS; 311 sb->s_frozen = SB_FREEZE_TRANS;
307 bdev->bd_fsfreeze_count++; 312 bdev->bd_fsfreeze_count++;
308 mutex_unlock(&bdev->bd_fsfreeze_mutex); 313 mutex_unlock(&bdev->bd_fsfreeze_mutex);
309 return error; 314 return error;
310 } 315 }
311 } 316 }
312 sb->s_frozen = SB_UNFROZEN; 317 sb->s_frozen = SB_UNFROZEN;
313 smp_wmb(); 318 smp_wmb();
314 wake_up(&sb->s_wait_unfrozen); 319 wake_up(&sb->s_wait_unfrozen);
315 } 320 }
316 drop_super(sb); 321 drop_super(sb);
317 } 322 }
318 323
319 up(&bdev->bd_mount_sem); 324 up(&bdev->bd_mount_sem);
320 mutex_unlock(&bdev->bd_fsfreeze_mutex); 325 mutex_unlock(&bdev->bd_fsfreeze_mutex);
321 return 0; 326 return 0;
322 } 327 }
323 EXPORT_SYMBOL(thaw_bdev); 328 EXPORT_SYMBOL(thaw_bdev);
324 329
325 static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 330 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
326 { 331 {
327 return block_write_full_page(page, blkdev_get_block, wbc); 332 return block_write_full_page(page, blkdev_get_block, wbc);
328 } 333 }
329 334
330 static int blkdev_readpage(struct file * file, struct page * page) 335 static int blkdev_readpage(struct file * file, struct page * page)
331 { 336 {
332 return block_read_full_page(page, blkdev_get_block); 337 return block_read_full_page(page, blkdev_get_block);
333 } 338 }
334 339
335 static int blkdev_write_begin(struct file *file, struct address_space *mapping, 340 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
336 loff_t pos, unsigned len, unsigned flags, 341 loff_t pos, unsigned len, unsigned flags,
337 struct page **pagep, void **fsdata) 342 struct page **pagep, void **fsdata)
338 { 343 {
339 *pagep = NULL; 344 *pagep = NULL;
340 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 345 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
341 blkdev_get_block); 346 blkdev_get_block);
342 } 347 }
343 348
344 static int blkdev_write_end(struct file *file, struct address_space *mapping, 349 static int blkdev_write_end(struct file *file, struct address_space *mapping,
345 loff_t pos, unsigned len, unsigned copied, 350 loff_t pos, unsigned len, unsigned copied,
346 struct page *page, void *fsdata) 351 struct page *page, void *fsdata)
347 { 352 {
348 int ret; 353 int ret;
349 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 354 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
350 355
351 unlock_page(page); 356 unlock_page(page);
352 page_cache_release(page); 357 page_cache_release(page);
353 358
354 return ret; 359 return ret;
355 } 360 }
356 361
357 /* 362 /*
358 * private llseek: 363 * private llseek:
359 * for a block special file file->f_path.dentry->d_inode->i_size is zero 364 * for a block special file file->f_path.dentry->d_inode->i_size is zero
360 * so we compute the size by hand (just as in block_read/write above) 365 * so we compute the size by hand (just as in block_read/write above)
361 */ 366 */
362 static loff_t block_llseek(struct file *file, loff_t offset, int origin) 367 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
363 { 368 {
364 struct inode *bd_inode = file->f_mapping->host; 369 struct inode *bd_inode = file->f_mapping->host;
365 loff_t size; 370 loff_t size;
366 loff_t retval; 371 loff_t retval;
367 372
368 mutex_lock(&bd_inode->i_mutex); 373 mutex_lock(&bd_inode->i_mutex);
369 size = i_size_read(bd_inode); 374 size = i_size_read(bd_inode);
370 375
371 switch (origin) { 376 switch (origin) {
372 case 2: 377 case 2:
373 offset += size; 378 offset += size;
374 break; 379 break;
375 case 1: 380 case 1:
376 offset += file->f_pos; 381 offset += file->f_pos;
377 } 382 }
378 retval = -EINVAL; 383 retval = -EINVAL;
379 if (offset >= 0 && offset <= size) { 384 if (offset >= 0 && offset <= size) {
380 if (offset != file->f_pos) { 385 if (offset != file->f_pos) {
381 file->f_pos = offset; 386 file->f_pos = offset;
382 } 387 }
383 retval = offset; 388 retval = offset;
384 } 389 }
385 mutex_unlock(&bd_inode->i_mutex); 390 mutex_unlock(&bd_inode->i_mutex);
386 return retval; 391 return retval;
387 } 392 }
388 393
389 /* 394 /*
390 * Filp is never NULL; the only case when ->fsync() is called with 395 * Filp is never NULL; the only case when ->fsync() is called with
391 * NULL first argument is nfsd_sync_dir() and that's not a directory. 396 * NULL first argument is nfsd_sync_dir() and that's not a directory.
392 */ 397 */
393 398
394 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) 399 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
395 { 400 {
396 return sync_blockdev(I_BDEV(filp->f_mapping->host)); 401 return sync_blockdev(I_BDEV(filp->f_mapping->host));
397 } 402 }
398 403
399 /* 404 /*
400 * pseudo-fs 405 * pseudo-fs
401 */ 406 */
402 407
403 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); 408 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
404 static struct kmem_cache * bdev_cachep __read_mostly; 409 static struct kmem_cache * bdev_cachep __read_mostly;
405 410
406 static struct inode *bdev_alloc_inode(struct super_block *sb) 411 static struct inode *bdev_alloc_inode(struct super_block *sb)
407 { 412 {
408 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); 413 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
409 if (!ei) 414 if (!ei)
410 return NULL; 415 return NULL;
411 return &ei->vfs_inode; 416 return &ei->vfs_inode;
412 } 417 }
413 418
414 static void bdev_destroy_inode(struct inode *inode) 419 static void bdev_destroy_inode(struct inode *inode)
415 { 420 {
416 struct bdev_inode *bdi = BDEV_I(inode); 421 struct bdev_inode *bdi = BDEV_I(inode);
417 422
418 bdi->bdev.bd_inode_backing_dev_info = NULL; 423 bdi->bdev.bd_inode_backing_dev_info = NULL;
419 kmem_cache_free(bdev_cachep, bdi); 424 kmem_cache_free(bdev_cachep, bdi);
420 } 425 }
421 426
422 static void init_once(void *foo) 427 static void init_once(void *foo)
423 { 428 {
424 struct bdev_inode *ei = (struct bdev_inode *) foo; 429 struct bdev_inode *ei = (struct bdev_inode *) foo;
425 struct block_device *bdev = &ei->bdev; 430 struct block_device *bdev = &ei->bdev;
426 431
427 memset(bdev, 0, sizeof(*bdev)); 432 memset(bdev, 0, sizeof(*bdev));
428 mutex_init(&bdev->bd_mutex); 433 mutex_init(&bdev->bd_mutex);
429 sema_init(&bdev->bd_mount_sem, 1); 434 sema_init(&bdev->bd_mount_sem, 1);
430 INIT_LIST_HEAD(&bdev->bd_inodes); 435 INIT_LIST_HEAD(&bdev->bd_inodes);
431 INIT_LIST_HEAD(&bdev->bd_list); 436 INIT_LIST_HEAD(&bdev->bd_list);
432 #ifdef CONFIG_SYSFS 437 #ifdef CONFIG_SYSFS
433 INIT_LIST_HEAD(&bdev->bd_holder_list); 438 INIT_LIST_HEAD(&bdev->bd_holder_list);
434 #endif 439 #endif
435 inode_init_once(&ei->vfs_inode); 440 inode_init_once(&ei->vfs_inode);
436 /* Initialize mutex for freeze. */ 441 /* Initialize mutex for freeze. */
437 mutex_init(&bdev->bd_fsfreeze_mutex); 442 mutex_init(&bdev->bd_fsfreeze_mutex);
438 } 443 }
439 444
440 static inline void __bd_forget(struct inode *inode) 445 static inline void __bd_forget(struct inode *inode)
441 { 446 {
442 list_del_init(&inode->i_devices); 447 list_del_init(&inode->i_devices);
443 inode->i_bdev = NULL; 448 inode->i_bdev = NULL;
444 inode->i_mapping = &inode->i_data; 449 inode->i_mapping = &inode->i_data;
445 } 450 }
446 451
447 static void bdev_clear_inode(struct inode *inode) 452 static void bdev_clear_inode(struct inode *inode)
448 { 453 {
449 struct block_device *bdev = &BDEV_I(inode)->bdev; 454 struct block_device *bdev = &BDEV_I(inode)->bdev;
450 struct list_head *p; 455 struct list_head *p;
451 spin_lock(&bdev_lock); 456 spin_lock(&bdev_lock);
452 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 457 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
453 __bd_forget(list_entry(p, struct inode, i_devices)); 458 __bd_forget(list_entry(p, struct inode, i_devices));
454 } 459 }
455 list_del_init(&bdev->bd_list); 460 list_del_init(&bdev->bd_list);
456 spin_unlock(&bdev_lock); 461 spin_unlock(&bdev_lock);
457 } 462 }
458 463
459 static const struct super_operations bdev_sops = { 464 static const struct super_operations bdev_sops = {
460 .statfs = simple_statfs, 465 .statfs = simple_statfs,
461 .alloc_inode = bdev_alloc_inode, 466 .alloc_inode = bdev_alloc_inode,
462 .destroy_inode = bdev_destroy_inode, 467 .destroy_inode = bdev_destroy_inode,
463 .drop_inode = generic_delete_inode, 468 .drop_inode = generic_delete_inode,
464 .clear_inode = bdev_clear_inode, 469 .clear_inode = bdev_clear_inode,
465 }; 470 };
466 471
467 static int bd_get_sb(struct file_system_type *fs_type, 472 static int bd_get_sb(struct file_system_type *fs_type,
468 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 473 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
469 { 474 {
470 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); 475 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
471 } 476 }
472 477
473 static struct file_system_type bd_type = { 478 static struct file_system_type bd_type = {
474 .name = "bdev", 479 .name = "bdev",
475 .get_sb = bd_get_sb, 480 .get_sb = bd_get_sb,
476 .kill_sb = kill_anon_super, 481 .kill_sb = kill_anon_super,
477 }; 482 };
478 483
479 struct super_block *blockdev_superblock __read_mostly; 484 struct super_block *blockdev_superblock __read_mostly;
480 485
481 void __init bdev_cache_init(void) 486 void __init bdev_cache_init(void)
482 { 487 {
483 int err; 488 int err;
484 struct vfsmount *bd_mnt; 489 struct vfsmount *bd_mnt;
485 490
486 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 491 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
487 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 492 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
488 SLAB_MEM_SPREAD|SLAB_PANIC), 493 SLAB_MEM_SPREAD|SLAB_PANIC),
489 init_once); 494 init_once);
490 err = register_filesystem(&bd_type); 495 err = register_filesystem(&bd_type);
491 if (err) 496 if (err)
492 panic("Cannot register bdev pseudo-fs"); 497 panic("Cannot register bdev pseudo-fs");
493 bd_mnt = kern_mount(&bd_type); 498 bd_mnt = kern_mount(&bd_type);
494 if (IS_ERR(bd_mnt)) 499 if (IS_ERR(bd_mnt))
495 panic("Cannot create bdev pseudo-fs"); 500 panic("Cannot create bdev pseudo-fs");
496 /* 501 /*
497 * This vfsmount structure is only used to obtain the 502 * This vfsmount structure is only used to obtain the
498 * blockdev_superblock, so tell kmemleak not to report it. 503 * blockdev_superblock, so tell kmemleak not to report it.
499 */ 504 */
500 kmemleak_not_leak(bd_mnt); 505 kmemleak_not_leak(bd_mnt);
501 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ 506 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
502 } 507 }
503 508
504 /* 509 /*
505 * Most likely _very_ bad one - but then it's hardly critical for small 510 * Most likely _very_ bad one - but then it's hardly critical for small
506 * /dev and can be fixed when somebody will need really large one. 511 * /dev and can be fixed when somebody will need really large one.
507 * Keep in mind that it will be fed through icache hash function too. 512 * Keep in mind that it will be fed through icache hash function too.
508 */ 513 */
509 static inline unsigned long hash(dev_t dev) 514 static inline unsigned long hash(dev_t dev)
510 { 515 {
511 return MAJOR(dev)+MINOR(dev); 516 return MAJOR(dev)+MINOR(dev);
512 } 517 }
513 518
514 static int bdev_test(struct inode *inode, void *data) 519 static int bdev_test(struct inode *inode, void *data)
515 { 520 {
516 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; 521 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
517 } 522 }
518 523
519 static int bdev_set(struct inode *inode, void *data) 524 static int bdev_set(struct inode *inode, void *data)
520 { 525 {
521 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; 526 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
522 return 0; 527 return 0;
523 } 528 }
524 529
525 static LIST_HEAD(all_bdevs); 530 static LIST_HEAD(all_bdevs);
526 531
527 struct block_device *bdget(dev_t dev) 532 struct block_device *bdget(dev_t dev)
528 { 533 {
529 struct block_device *bdev; 534 struct block_device *bdev;
530 struct inode *inode; 535 struct inode *inode;
531 536
532 inode = iget5_locked(blockdev_superblock, hash(dev), 537 inode = iget5_locked(blockdev_superblock, hash(dev),
533 bdev_test, bdev_set, &dev); 538 bdev_test, bdev_set, &dev);
534 539
535 if (!inode) 540 if (!inode)
536 return NULL; 541 return NULL;
537 542
538 bdev = &BDEV_I(inode)->bdev; 543 bdev = &BDEV_I(inode)->bdev;
539 544
540 if (inode->i_state & I_NEW) { 545 if (inode->i_state & I_NEW) {
541 bdev->bd_contains = NULL; 546 bdev->bd_contains = NULL;
542 bdev->bd_inode = inode; 547 bdev->bd_inode = inode;
543 bdev->bd_block_size = (1 << inode->i_blkbits); 548 bdev->bd_block_size = (1 << inode->i_blkbits);
544 bdev->bd_part_count = 0; 549 bdev->bd_part_count = 0;
545 bdev->bd_invalidated = 0; 550 bdev->bd_invalidated = 0;
546 inode->i_mode = S_IFBLK; 551 inode->i_mode = S_IFBLK;
547 inode->i_rdev = dev; 552 inode->i_rdev = dev;
548 inode->i_bdev = bdev; 553 inode->i_bdev = bdev;
549 inode->i_data.a_ops = &def_blk_aops; 554 inode->i_data.a_ops = &def_blk_aops;
550 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 555 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
551 inode->i_data.backing_dev_info = &default_backing_dev_info; 556 inode->i_data.backing_dev_info = &default_backing_dev_info;
552 spin_lock(&bdev_lock); 557 spin_lock(&bdev_lock);
553 list_add(&bdev->bd_list, &all_bdevs); 558 list_add(&bdev->bd_list, &all_bdevs);
554 spin_unlock(&bdev_lock); 559 spin_unlock(&bdev_lock);
555 unlock_new_inode(inode); 560 unlock_new_inode(inode);
556 } 561 }
557 return bdev; 562 return bdev;
558 } 563 }
559 564
560 EXPORT_SYMBOL(bdget); 565 EXPORT_SYMBOL(bdget);
561 566
562 long nr_blockdev_pages(void) 567 long nr_blockdev_pages(void)
563 { 568 {
564 struct block_device *bdev; 569 struct block_device *bdev;
565 long ret = 0; 570 long ret = 0;
566 spin_lock(&bdev_lock); 571 spin_lock(&bdev_lock);
567 list_for_each_entry(bdev, &all_bdevs, bd_list) { 572 list_for_each_entry(bdev, &all_bdevs, bd_list) {
568 ret += bdev->bd_inode->i_mapping->nrpages; 573 ret += bdev->bd_inode->i_mapping->nrpages;
569 } 574 }
570 spin_unlock(&bdev_lock); 575 spin_unlock(&bdev_lock);
571 return ret; 576 return ret;
572 } 577 }
573 578
574 void bdput(struct block_device *bdev) 579 void bdput(struct block_device *bdev)
575 { 580 {
576 iput(bdev->bd_inode); 581 iput(bdev->bd_inode);
577 } 582 }
578 583
579 EXPORT_SYMBOL(bdput); 584 EXPORT_SYMBOL(bdput);
580 585
581 static struct block_device *bd_acquire(struct inode *inode) 586 static struct block_device *bd_acquire(struct inode *inode)
582 { 587 {
583 struct block_device *bdev; 588 struct block_device *bdev;
584 589
585 spin_lock(&bdev_lock); 590 spin_lock(&bdev_lock);
586 bdev = inode->i_bdev; 591 bdev = inode->i_bdev;
587 if (bdev) { 592 if (bdev) {
588 atomic_inc(&bdev->bd_inode->i_count); 593 atomic_inc(&bdev->bd_inode->i_count);
589 spin_unlock(&bdev_lock); 594 spin_unlock(&bdev_lock);
590 return bdev; 595 return bdev;
591 } 596 }
592 spin_unlock(&bdev_lock); 597 spin_unlock(&bdev_lock);
593 598
594 bdev = bdget(inode->i_rdev); 599 bdev = bdget(inode->i_rdev);
595 if (bdev) { 600 if (bdev) {
596 spin_lock(&bdev_lock); 601 spin_lock(&bdev_lock);
597 if (!inode->i_bdev) { 602 if (!inode->i_bdev) {
598 /* 603 /*
599 * We take an additional bd_inode->i_count for inode, 604 * We take an additional bd_inode->i_count for inode,
600 * and it's released in clear_inode() of inode. 605 * and it's released in clear_inode() of inode.
601 * So, we can access it via ->i_mapping always 606 * So, we can access it via ->i_mapping always
602 * without igrab(). 607 * without igrab().
603 */ 608 */
604 atomic_inc(&bdev->bd_inode->i_count); 609 atomic_inc(&bdev->bd_inode->i_count);
605 inode->i_bdev = bdev; 610 inode->i_bdev = bdev;
606 inode->i_mapping = bdev->bd_inode->i_mapping; 611 inode->i_mapping = bdev->bd_inode->i_mapping;
607 list_add(&inode->i_devices, &bdev->bd_inodes); 612 list_add(&inode->i_devices, &bdev->bd_inodes);
608 } 613 }
609 spin_unlock(&bdev_lock); 614 spin_unlock(&bdev_lock);
610 } 615 }
611 return bdev; 616 return bdev;
612 } 617 }
613 618
614 /* Call when you free inode */ 619 /* Call when you free inode */
615 620
616 void bd_forget(struct inode *inode) 621 void bd_forget(struct inode *inode)
617 { 622 {
618 struct block_device *bdev = NULL; 623 struct block_device *bdev = NULL;
619 624
620 spin_lock(&bdev_lock); 625 spin_lock(&bdev_lock);
621 if (inode->i_bdev) { 626 if (inode->i_bdev) {
622 if (!sb_is_blkdev_sb(inode->i_sb)) 627 if (!sb_is_blkdev_sb(inode->i_sb))
623 bdev = inode->i_bdev; 628 bdev = inode->i_bdev;
624 __bd_forget(inode); 629 __bd_forget(inode);
625 } 630 }
626 spin_unlock(&bdev_lock); 631 spin_unlock(&bdev_lock);
627 632
628 if (bdev) 633 if (bdev)
629 iput(bdev->bd_inode); 634 iput(bdev->bd_inode);
630 } 635 }
631 636
632 int bd_claim(struct block_device *bdev, void *holder) 637 int bd_claim(struct block_device *bdev, void *holder)
633 { 638 {
634 int res; 639 int res;
635 spin_lock(&bdev_lock); 640 spin_lock(&bdev_lock);
636 641
637 /* first decide result */ 642 /* first decide result */
638 if (bdev->bd_holder == holder) 643 if (bdev->bd_holder == holder)
639 res = 0; /* already a holder */ 644 res = 0; /* already a holder */
640 else if (bdev->bd_holder != NULL) 645 else if (bdev->bd_holder != NULL)
641 res = -EBUSY; /* held by someone else */ 646 res = -EBUSY; /* held by someone else */
642 else if (bdev->bd_contains == bdev) 647 else if (bdev->bd_contains == bdev)
643 res = 0; /* is a whole device which isn't held */ 648 res = 0; /* is a whole device which isn't held */
644 649
645 else if (bdev->bd_contains->bd_holder == bd_claim) 650 else if (bdev->bd_contains->bd_holder == bd_claim)
646 res = 0; /* is a partition of a device that is being partitioned */ 651 res = 0; /* is a partition of a device that is being partitioned */
647 else if (bdev->bd_contains->bd_holder != NULL) 652 else if (bdev->bd_contains->bd_holder != NULL)
648 res = -EBUSY; /* is a partition of a held device */ 653 res = -EBUSY; /* is a partition of a held device */
649 else 654 else
650 res = 0; /* is a partition of an un-held device */ 655 res = 0; /* is a partition of an un-held device */
651 656
652 /* now impose change */ 657 /* now impose change */
653 if (res==0) { 658 if (res==0) {
654 /* note that for a whole device bd_holders 659 /* note that for a whole device bd_holders
655 * will be incremented twice, and bd_holder will 660 * will be incremented twice, and bd_holder will
656 * be set to bd_claim before being set to holder 661 * be set to bd_claim before being set to holder
657 */ 662 */
658 bdev->bd_contains->bd_holders ++; 663 bdev->bd_contains->bd_holders ++;
659 bdev->bd_contains->bd_holder = bd_claim; 664 bdev->bd_contains->bd_holder = bd_claim;
660 bdev->bd_holders++; 665 bdev->bd_holders++;
661 bdev->bd_holder = holder; 666 bdev->bd_holder = holder;
662 } 667 }
663 spin_unlock(&bdev_lock); 668 spin_unlock(&bdev_lock);
664 return res; 669 return res;
665 } 670 }
666 671
667 EXPORT_SYMBOL(bd_claim); 672 EXPORT_SYMBOL(bd_claim);
668 673
669 void bd_release(struct block_device *bdev) 674 void bd_release(struct block_device *bdev)
670 { 675 {
671 spin_lock(&bdev_lock); 676 spin_lock(&bdev_lock);
672 if (!--bdev->bd_contains->bd_holders) 677 if (!--bdev->bd_contains->bd_holders)
673 bdev->bd_contains->bd_holder = NULL; 678 bdev->bd_contains->bd_holder = NULL;
674 if (!--bdev->bd_holders) 679 if (!--bdev->bd_holders)
675 bdev->bd_holder = NULL; 680 bdev->bd_holder = NULL;
676 spin_unlock(&bdev_lock); 681 spin_unlock(&bdev_lock);
677 } 682 }
678 683
679 EXPORT_SYMBOL(bd_release); 684 EXPORT_SYMBOL(bd_release);
680 685
681 #ifdef CONFIG_SYSFS 686 #ifdef CONFIG_SYSFS
682 /* 687 /*
683 * Functions for bd_claim_by_kobject / bd_release_from_kobject 688 * Functions for bd_claim_by_kobject / bd_release_from_kobject
684 * 689 *
685 * If a kobject is passed to bd_claim_by_kobject() 690 * If a kobject is passed to bd_claim_by_kobject()
686 * and the kobject has a parent directory, 691 * and the kobject has a parent directory,
687 * following symlinks are created: 692 * following symlinks are created:
688 * o from the kobject to the claimed bdev 693 * o from the kobject to the claimed bdev
689 * o from "holders" directory of the bdev to the parent of the kobject 694 * o from "holders" directory of the bdev to the parent of the kobject
690 * bd_release_from_kobject() removes these symlinks. 695 * bd_release_from_kobject() removes these symlinks.
691 * 696 *
692 * Example: 697 * Example:
693 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to 698 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
694 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: 699 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
695 * /sys/block/dm-0/slaves/sda --> /sys/block/sda 700 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
696 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 701 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
697 */ 702 */
698 703
699 static int add_symlink(struct kobject *from, struct kobject *to) 704 static int add_symlink(struct kobject *from, struct kobject *to)
700 { 705 {
701 if (!from || !to) 706 if (!from || !to)
702 return 0; 707 return 0;
703 return sysfs_create_link(from, to, kobject_name(to)); 708 return sysfs_create_link(from, to, kobject_name(to));
704 } 709 }
705 710
706 static void del_symlink(struct kobject *from, struct kobject *to) 711 static void del_symlink(struct kobject *from, struct kobject *to)
707 { 712 {
708 if (!from || !to) 713 if (!from || !to)
709 return; 714 return;
710 sysfs_remove_link(from, kobject_name(to)); 715 sysfs_remove_link(from, kobject_name(to));
711 } 716 }
712 717
713 /* 718 /*
714 * 'struct bd_holder' contains pointers to kobjects symlinked by 719 * 'struct bd_holder' contains pointers to kobjects symlinked by
715 * bd_claim_by_kobject. 720 * bd_claim_by_kobject.
716 * It's connected to bd_holder_list which is protected by bdev->bd_sem. 721 * It's connected to bd_holder_list which is protected by bdev->bd_sem.
717 */ 722 */
718 struct bd_holder { 723 struct bd_holder {
719 struct list_head list; /* chain of holders of the bdev */ 724 struct list_head list; /* chain of holders of the bdev */
720 int count; /* references from the holder */ 725 int count; /* references from the holder */
721 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ 726 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
722 struct kobject *hdev; /* e.g. "/block/dm-0" */ 727 struct kobject *hdev; /* e.g. "/block/dm-0" */
723 struct kobject *hdir; /* e.g. "/block/sda/holders" */ 728 struct kobject *hdir; /* e.g. "/block/sda/holders" */
724 struct kobject *sdev; /* e.g. "/block/sda" */ 729 struct kobject *sdev; /* e.g. "/block/sda" */
725 }; 730 };
726 731
727 /* 732 /*
728 * Get references of related kobjects at once. 733 * Get references of related kobjects at once.
729 * Returns 1 on success. 0 on failure. 734 * Returns 1 on success. 0 on failure.
730 * 735 *
731 * Should call bd_holder_release_dirs() after successful use. 736 * Should call bd_holder_release_dirs() after successful use.
732 */ 737 */
733 static int bd_holder_grab_dirs(struct block_device *bdev, 738 static int bd_holder_grab_dirs(struct block_device *bdev,
734 struct bd_holder *bo) 739 struct bd_holder *bo)
735 { 740 {
736 if (!bdev || !bo) 741 if (!bdev || !bo)
737 return 0; 742 return 0;
738 743
739 bo->sdir = kobject_get(bo->sdir); 744 bo->sdir = kobject_get(bo->sdir);
740 if (!bo->sdir) 745 if (!bo->sdir)
741 return 0; 746 return 0;
742 747
743 bo->hdev = kobject_get(bo->sdir->parent); 748 bo->hdev = kobject_get(bo->sdir->parent);
744 if (!bo->hdev) 749 if (!bo->hdev)
745 goto fail_put_sdir; 750 goto fail_put_sdir;
746 751
747 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); 752 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
748 if (!bo->sdev) 753 if (!bo->sdev)
749 goto fail_put_hdev; 754 goto fail_put_hdev;
750 755
751 bo->hdir = kobject_get(bdev->bd_part->holder_dir); 756 bo->hdir = kobject_get(bdev->bd_part->holder_dir);
752 if (!bo->hdir) 757 if (!bo->hdir)
753 goto fail_put_sdev; 758 goto fail_put_sdev;
754 759
755 return 1; 760 return 1;
756 761
757 fail_put_sdev: 762 fail_put_sdev:
758 kobject_put(bo->sdev); 763 kobject_put(bo->sdev);
759 fail_put_hdev: 764 fail_put_hdev:
760 kobject_put(bo->hdev); 765 kobject_put(bo->hdev);
761 fail_put_sdir: 766 fail_put_sdir:
762 kobject_put(bo->sdir); 767 kobject_put(bo->sdir);
763 768
764 return 0; 769 return 0;
765 } 770 }
766 771
767 /* Put references of related kobjects at once. */ 772 /* Put references of related kobjects at once. */
768 static void bd_holder_release_dirs(struct bd_holder *bo) 773 static void bd_holder_release_dirs(struct bd_holder *bo)
769 { 774 {
770 kobject_put(bo->hdir); 775 kobject_put(bo->hdir);
771 kobject_put(bo->sdev); 776 kobject_put(bo->sdev);
772 kobject_put(bo->hdev); 777 kobject_put(bo->hdev);
773 kobject_put(bo->sdir); 778 kobject_put(bo->sdir);
774 } 779 }
775 780
776 static struct bd_holder *alloc_bd_holder(struct kobject *kobj) 781 static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
777 { 782 {
778 struct bd_holder *bo; 783 struct bd_holder *bo;
779 784
780 bo = kzalloc(sizeof(*bo), GFP_KERNEL); 785 bo = kzalloc(sizeof(*bo), GFP_KERNEL);
781 if (!bo) 786 if (!bo)
782 return NULL; 787 return NULL;
783 788
784 bo->count = 1; 789 bo->count = 1;
785 bo->sdir = kobj; 790 bo->sdir = kobj;
786 791
787 return bo; 792 return bo;
788 } 793 }
789 794
790 static void free_bd_holder(struct bd_holder *bo) 795 static void free_bd_holder(struct bd_holder *bo)
791 { 796 {
792 kfree(bo); 797 kfree(bo);
793 } 798 }
794 799
795 /** 800 /**
796 * find_bd_holder - find matching struct bd_holder from the block device 801 * find_bd_holder - find matching struct bd_holder from the block device
797 * 802 *
798 * @bdev: struct block device to be searched 803 * @bdev: struct block device to be searched
799 * @bo: target struct bd_holder 804 * @bo: target struct bd_holder
800 * 805 *
801 * Returns matching entry with @bo in @bdev->bd_holder_list. 806 * Returns matching entry with @bo in @bdev->bd_holder_list.
802 * If found, increment the reference count and return the pointer. 807 * If found, increment the reference count and return the pointer.
803 * If not found, returns NULL. 808 * If not found, returns NULL.
804 */ 809 */
805 static struct bd_holder *find_bd_holder(struct block_device *bdev, 810 static struct bd_holder *find_bd_holder(struct block_device *bdev,
806 struct bd_holder *bo) 811 struct bd_holder *bo)
807 { 812 {
808 struct bd_holder *tmp; 813 struct bd_holder *tmp;
809 814
810 list_for_each_entry(tmp, &bdev->bd_holder_list, list) 815 list_for_each_entry(tmp, &bdev->bd_holder_list, list)
811 if (tmp->sdir == bo->sdir) { 816 if (tmp->sdir == bo->sdir) {
812 tmp->count++; 817 tmp->count++;
813 return tmp; 818 return tmp;
814 } 819 }
815 820
816 return NULL; 821 return NULL;
817 } 822 }
818 823
819 /** 824 /**
820 * add_bd_holder - create sysfs symlinks for bd_claim() relationship 825 * add_bd_holder - create sysfs symlinks for bd_claim() relationship
821 * 826 *
822 * @bdev: block device to be bd_claimed 827 * @bdev: block device to be bd_claimed
823 * @bo: preallocated and initialized by alloc_bd_holder() 828 * @bo: preallocated and initialized by alloc_bd_holder()
824 * 829 *
825 * Add @bo to @bdev->bd_holder_list, create symlinks. 830 * Add @bo to @bdev->bd_holder_list, create symlinks.
826 * 831 *
827 * Returns 0 if symlinks are created. 832 * Returns 0 if symlinks are created.
828 * Returns -ve if something fails. 833 * Returns -ve if something fails.
829 */ 834 */
830 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) 835 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
831 { 836 {
832 int err; 837 int err;
833 838
834 if (!bo) 839 if (!bo)
835 return -EINVAL; 840 return -EINVAL;
836 841
837 if (!bd_holder_grab_dirs(bdev, bo)) 842 if (!bd_holder_grab_dirs(bdev, bo))
838 return -EBUSY; 843 return -EBUSY;
839 844
840 err = add_symlink(bo->sdir, bo->sdev); 845 err = add_symlink(bo->sdir, bo->sdev);
841 if (err) 846 if (err)
842 return err; 847 return err;
843 848
844 err = add_symlink(bo->hdir, bo->hdev); 849 err = add_symlink(bo->hdir, bo->hdev);
845 if (err) { 850 if (err) {
846 del_symlink(bo->sdir, bo->sdev); 851 del_symlink(bo->sdir, bo->sdev);
847 return err; 852 return err;
848 } 853 }
849 854
850 list_add_tail(&bo->list, &bdev->bd_holder_list); 855 list_add_tail(&bo->list, &bdev->bd_holder_list);
851 return 0; 856 return 0;
852 } 857 }
853 858
854 /** 859 /**
855 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship 860 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
856 * 861 *
857 * @bdev: block device to be bd_claimed 862 * @bdev: block device to be bd_claimed
858 * @kobj: holder's kobject 863 * @kobj: holder's kobject
859 * 864 *
860 * If there is matching entry with @kobj in @bdev->bd_holder_list 865 * If there is matching entry with @kobj in @bdev->bd_holder_list
861 * and no other bd_claim() from the same kobject, 866 * and no other bd_claim() from the same kobject,
862 * remove the struct bd_holder from the list, delete symlinks for it. 867 * remove the struct bd_holder from the list, delete symlinks for it.
863 * 868 *
864 * Returns a pointer to the struct bd_holder when it's removed from the list 869 * Returns a pointer to the struct bd_holder when it's removed from the list
865 * and ready to be freed. 870 * and ready to be freed.
866 * Returns NULL if matching claim isn't found or there is other bd_claim() 871 * Returns NULL if matching claim isn't found or there is other bd_claim()
867 * by the same kobject. 872 * by the same kobject.
868 */ 873 */
869 static struct bd_holder *del_bd_holder(struct block_device *bdev, 874 static struct bd_holder *del_bd_holder(struct block_device *bdev,
870 struct kobject *kobj) 875 struct kobject *kobj)
871 { 876 {
872 struct bd_holder *bo; 877 struct bd_holder *bo;
873 878
874 list_for_each_entry(bo, &bdev->bd_holder_list, list) { 879 list_for_each_entry(bo, &bdev->bd_holder_list, list) {
875 if (bo->sdir == kobj) { 880 if (bo->sdir == kobj) {
876 bo->count--; 881 bo->count--;
877 BUG_ON(bo->count < 0); 882 BUG_ON(bo->count < 0);
878 if (!bo->count) { 883 if (!bo->count) {
879 list_del(&bo->list); 884 list_del(&bo->list);
880 del_symlink(bo->sdir, bo->sdev); 885 del_symlink(bo->sdir, bo->sdev);
881 del_symlink(bo->hdir, bo->hdev); 886 del_symlink(bo->hdir, bo->hdev);
882 bd_holder_release_dirs(bo); 887 bd_holder_release_dirs(bo);
883 return bo; 888 return bo;
884 } 889 }
885 break; 890 break;
886 } 891 }
887 } 892 }
888 893
889 return NULL; 894 return NULL;
890 } 895 }
891 896
892 /** 897 /**
893 * bd_claim_by_kobject - bd_claim() with additional kobject signature 898 * bd_claim_by_kobject - bd_claim() with additional kobject signature
894 * 899 *
895 * @bdev: block device to be claimed 900 * @bdev: block device to be claimed
896 * @holder: holder's signature 901 * @holder: holder's signature
897 * @kobj: holder's kobject 902 * @kobj: holder's kobject
898 * 903 *
899 * Do bd_claim() and if it succeeds, create sysfs symlinks between 904 * Do bd_claim() and if it succeeds, create sysfs symlinks between
900 * the bdev and the holder's kobject. 905 * the bdev and the holder's kobject.
901 * Use bd_release_from_kobject() when relesing the claimed bdev. 906 * Use bd_release_from_kobject() when relesing the claimed bdev.
902 * 907 *
903 * Returns 0 on success. (same as bd_claim()) 908 * Returns 0 on success. (same as bd_claim())
904 * Returns errno on failure. 909 * Returns errno on failure.
905 */ 910 */
906 static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 911 static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
907 struct kobject *kobj) 912 struct kobject *kobj)
908 { 913 {
909 int err; 914 int err;
910 struct bd_holder *bo, *found; 915 struct bd_holder *bo, *found;
911 916
912 if (!kobj) 917 if (!kobj)
913 return -EINVAL; 918 return -EINVAL;
914 919
915 bo = alloc_bd_holder(kobj); 920 bo = alloc_bd_holder(kobj);
916 if (!bo) 921 if (!bo)
917 return -ENOMEM; 922 return -ENOMEM;
918 923
919 mutex_lock(&bdev->bd_mutex); 924 mutex_lock(&bdev->bd_mutex);
920 925
921 err = bd_claim(bdev, holder); 926 err = bd_claim(bdev, holder);
922 if (err) 927 if (err)
923 goto fail; 928 goto fail;
924 929
925 found = find_bd_holder(bdev, bo); 930 found = find_bd_holder(bdev, bo);
926 if (found) 931 if (found)
927 goto fail; 932 goto fail;
928 933
929 err = add_bd_holder(bdev, bo); 934 err = add_bd_holder(bdev, bo);
930 if (err) 935 if (err)
931 bd_release(bdev); 936 bd_release(bdev);
932 else 937 else
933 bo = NULL; 938 bo = NULL;
934 fail: 939 fail:
935 mutex_unlock(&bdev->bd_mutex); 940 mutex_unlock(&bdev->bd_mutex);
936 free_bd_holder(bo); 941 free_bd_holder(bo);
937 return err; 942 return err;
938 } 943 }
939 944
940 /** 945 /**
941 * bd_release_from_kobject - bd_release() with additional kobject signature 946 * bd_release_from_kobject - bd_release() with additional kobject signature
942 * 947 *
943 * @bdev: block device to be released 948 * @bdev: block device to be released
944 * @kobj: holder's kobject 949 * @kobj: holder's kobject
945 * 950 *
946 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). 951 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
947 */ 952 */
948 static void bd_release_from_kobject(struct block_device *bdev, 953 static void bd_release_from_kobject(struct block_device *bdev,
949 struct kobject *kobj) 954 struct kobject *kobj)
950 { 955 {
951 if (!kobj) 956 if (!kobj)
952 return; 957 return;
953 958
954 mutex_lock(&bdev->bd_mutex); 959 mutex_lock(&bdev->bd_mutex);
955 bd_release(bdev); 960 bd_release(bdev);
956 free_bd_holder(del_bd_holder(bdev, kobj)); 961 free_bd_holder(del_bd_holder(bdev, kobj));
957 mutex_unlock(&bdev->bd_mutex); 962 mutex_unlock(&bdev->bd_mutex);
958 } 963 }
959 964
960 /** 965 /**
961 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() 966 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
962 * 967 *
963 * @bdev: block device to be claimed 968 * @bdev: block device to be claimed
964 * @holder: holder's signature 969 * @holder: holder's signature
965 * @disk: holder's gendisk 970 * @disk: holder's gendisk
966 * 971 *
967 * Call bd_claim_by_kobject() with getting @disk->slave_dir. 972 * Call bd_claim_by_kobject() with getting @disk->slave_dir.
968 */ 973 */
969 int bd_claim_by_disk(struct block_device *bdev, void *holder, 974 int bd_claim_by_disk(struct block_device *bdev, void *holder,
970 struct gendisk *disk) 975 struct gendisk *disk)
971 { 976 {
972 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); 977 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
973 } 978 }
974 EXPORT_SYMBOL_GPL(bd_claim_by_disk); 979 EXPORT_SYMBOL_GPL(bd_claim_by_disk);
975 980
976 /** 981 /**
977 * bd_release_from_disk - wrapper function for bd_release_from_kobject() 982 * bd_release_from_disk - wrapper function for bd_release_from_kobject()
978 * 983 *
979 * @bdev: block device to be claimed 984 * @bdev: block device to be claimed
980 * @disk: holder's gendisk 985 * @disk: holder's gendisk
981 * 986 *
982 * Call bd_release_from_kobject() and put @disk->slave_dir. 987 * Call bd_release_from_kobject() and put @disk->slave_dir.
983 */ 988 */
984 void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) 989 void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
985 { 990 {
986 bd_release_from_kobject(bdev, disk->slave_dir); 991 bd_release_from_kobject(bdev, disk->slave_dir);
987 kobject_put(disk->slave_dir); 992 kobject_put(disk->slave_dir);
988 } 993 }
989 EXPORT_SYMBOL_GPL(bd_release_from_disk); 994 EXPORT_SYMBOL_GPL(bd_release_from_disk);
990 #endif 995 #endif
991 996
992 /* 997 /*
993 * Tries to open block device by device number. Use it ONLY if you 998 * Tries to open block device by device number. Use it ONLY if you
994 * really do not have anything better - i.e. when you are behind a 999 * really do not have anything better - i.e. when you are behind a
995 * truly sucky interface and all you are given is a device number. _Never_ 1000 * truly sucky interface and all you are given is a device number. _Never_
996 * to be used for internal purposes. If you ever need it - reconsider 1001 * to be used for internal purposes. If you ever need it - reconsider
997 * your API. 1002 * your API.
998 */ 1003 */
999 struct block_device *open_by_devnum(dev_t dev, fmode_t mode) 1004 struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
1000 { 1005 {
1001 struct block_device *bdev = bdget(dev); 1006 struct block_device *bdev = bdget(dev);
1002 int err = -ENOMEM; 1007 int err = -ENOMEM;
1003 if (bdev) 1008 if (bdev)
1004 err = blkdev_get(bdev, mode); 1009 err = blkdev_get(bdev, mode);
1005 return err ? ERR_PTR(err) : bdev; 1010 return err ? ERR_PTR(err) : bdev;
1006 } 1011 }
1007 1012
1008 EXPORT_SYMBOL(open_by_devnum); 1013 EXPORT_SYMBOL(open_by_devnum);
1009 1014
1010 /** 1015 /**
1011 * flush_disk - invalidates all buffer-cache entries on a disk 1016 * flush_disk - invalidates all buffer-cache entries on a disk
1012 * 1017 *
1013 * @bdev: struct block device to be flushed 1018 * @bdev: struct block device to be flushed
1014 * 1019 *
1015 * Invalidates all buffer-cache entries on a disk. It should be called 1020 * Invalidates all buffer-cache entries on a disk. It should be called
1016 * when a disk has been changed -- either by a media change or online 1021 * when a disk has been changed -- either by a media change or online
1017 * resize. 1022 * resize.
1018 */ 1023 */
1019 static void flush_disk(struct block_device *bdev) 1024 static void flush_disk(struct block_device *bdev)
1020 { 1025 {
1021 if (__invalidate_device(bdev)) { 1026 if (__invalidate_device(bdev)) {
1022 char name[BDEVNAME_SIZE] = ""; 1027 char name[BDEVNAME_SIZE] = "";
1023 1028
1024 if (bdev->bd_disk) 1029 if (bdev->bd_disk)
1025 disk_name(bdev->bd_disk, 0, name); 1030 disk_name(bdev->bd_disk, 0, name);
1026 printk(KERN_WARNING "VFS: busy inodes on changed media or " 1031 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1027 "resized disk %s\n", name); 1032 "resized disk %s\n", name);
1028 } 1033 }
1029 1034
1030 if (!bdev->bd_disk) 1035 if (!bdev->bd_disk)
1031 return; 1036 return;
1032 if (disk_partitionable(bdev->bd_disk)) 1037 if (disk_partitionable(bdev->bd_disk))
1033 bdev->bd_invalidated = 1; 1038 bdev->bd_invalidated = 1;
1034 } 1039 }
1035 1040
1036 /** 1041 /**
1037 * check_disk_size_change - checks for disk size change and adjusts bdev size. 1042 * check_disk_size_change - checks for disk size change and adjusts bdev size.
1038 * @disk: struct gendisk to check 1043 * @disk: struct gendisk to check
1039 * @bdev: struct bdev to adjust. 1044 * @bdev: struct bdev to adjust.
1040 * 1045 *
1041 * This routine checks to see if the bdev size does not match the disk size 1046 * This routine checks to see if the bdev size does not match the disk size
1042 * and adjusts it if it differs. 1047 * and adjusts it if it differs.
1043 */ 1048 */
1044 void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) 1049 void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1045 { 1050 {
1046 loff_t disk_size, bdev_size; 1051 loff_t disk_size, bdev_size;
1047 1052
1048 disk_size = (loff_t)get_capacity(disk) << 9; 1053 disk_size = (loff_t)get_capacity(disk) << 9;
1049 bdev_size = i_size_read(bdev->bd_inode); 1054 bdev_size = i_size_read(bdev->bd_inode);
1050 if (disk_size != bdev_size) { 1055 if (disk_size != bdev_size) {
1051 char name[BDEVNAME_SIZE]; 1056 char name[BDEVNAME_SIZE];
1052 1057
1053 disk_name(disk, 0, name); 1058 disk_name(disk, 0, name);
1054 printk(KERN_INFO 1059 printk(KERN_INFO
1055 "%s: detected capacity change from %lld to %lld\n", 1060 "%s: detected capacity change from %lld to %lld\n",
1056 name, bdev_size, disk_size); 1061 name, bdev_size, disk_size);
1057 i_size_write(bdev->bd_inode, disk_size); 1062 i_size_write(bdev->bd_inode, disk_size);
1058 flush_disk(bdev); 1063 flush_disk(bdev);
1059 } 1064 }
1060 } 1065 }
1061 EXPORT_SYMBOL(check_disk_size_change); 1066 EXPORT_SYMBOL(check_disk_size_change);
1062 1067
1063 /** 1068 /**
1064 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back 1069 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
1065 * @disk: struct gendisk to be revalidated 1070 * @disk: struct gendisk to be revalidated
1066 * 1071 *
1067 * This routine is a wrapper for lower-level driver's revalidate_disk 1072 * This routine is a wrapper for lower-level driver's revalidate_disk
1068 * call-backs. It is used to do common pre and post operations needed 1073 * call-backs. It is used to do common pre and post operations needed
1069 * for all revalidate_disk operations. 1074 * for all revalidate_disk operations.
1070 */ 1075 */
1071 int revalidate_disk(struct gendisk *disk) 1076 int revalidate_disk(struct gendisk *disk)
1072 { 1077 {
1073 struct block_device *bdev; 1078 struct block_device *bdev;
1074 int ret = 0; 1079 int ret = 0;
1075 1080
1076 if (disk->fops->revalidate_disk) 1081 if (disk->fops->revalidate_disk)
1077 ret = disk->fops->revalidate_disk(disk); 1082 ret = disk->fops->revalidate_disk(disk);
1078 1083
1079 bdev = bdget_disk(disk, 0); 1084 bdev = bdget_disk(disk, 0);
1080 if (!bdev) 1085 if (!bdev)
1081 return ret; 1086 return ret;
1082 1087
1083 mutex_lock(&bdev->bd_mutex); 1088 mutex_lock(&bdev->bd_mutex);
1084 check_disk_size_change(disk, bdev); 1089 check_disk_size_change(disk, bdev);
1085 mutex_unlock(&bdev->bd_mutex); 1090 mutex_unlock(&bdev->bd_mutex);
1086 bdput(bdev); 1091 bdput(bdev);
1087 return ret; 1092 return ret;
1088 } 1093 }
1089 EXPORT_SYMBOL(revalidate_disk); 1094 EXPORT_SYMBOL(revalidate_disk);
1090 1095
1091 /* 1096 /*
1092 * This routine checks whether a removable media has been changed, 1097 * This routine checks whether a removable media has been changed,
1093 * and invalidates all buffer-cache-entries in that case. This 1098 * and invalidates all buffer-cache-entries in that case. This
1094 * is a relatively slow routine, so we have to try to minimize using 1099 * is a relatively slow routine, so we have to try to minimize using
1095 * it. Thus it is called only upon a 'mount' or 'open'. This 1100 * it. Thus it is called only upon a 'mount' or 'open'. This
1096 * is the best way of combining speed and utility, I think. 1101 * is the best way of combining speed and utility, I think.
1097 * People changing diskettes in the middle of an operation deserve 1102 * People changing diskettes in the middle of an operation deserve
1098 * to lose :-) 1103 * to lose :-)
1099 */ 1104 */
1100 int check_disk_change(struct block_device *bdev) 1105 int check_disk_change(struct block_device *bdev)
1101 { 1106 {
1102 struct gendisk *disk = bdev->bd_disk; 1107 struct gendisk *disk = bdev->bd_disk;
1103 struct block_device_operations * bdops = disk->fops; 1108 struct block_device_operations * bdops = disk->fops;
1104 1109
1105 if (!bdops->media_changed) 1110 if (!bdops->media_changed)
1106 return 0; 1111 return 0;
1107 if (!bdops->media_changed(bdev->bd_disk)) 1112 if (!bdops->media_changed(bdev->bd_disk))
1108 return 0; 1113 return 0;
1109 1114
1110 flush_disk(bdev); 1115 flush_disk(bdev);
1111 if (bdops->revalidate_disk) 1116 if (bdops->revalidate_disk)
1112 bdops->revalidate_disk(bdev->bd_disk); 1117 bdops->revalidate_disk(bdev->bd_disk);
1113 return 1; 1118 return 1;
1114 } 1119 }
1115 1120
1116 EXPORT_SYMBOL(check_disk_change); 1121 EXPORT_SYMBOL(check_disk_change);
1117 1122
1118 void bd_set_size(struct block_device *bdev, loff_t size) 1123 void bd_set_size(struct block_device *bdev, loff_t size)
1119 { 1124 {
1120 unsigned bsize = bdev_logical_block_size(bdev); 1125 unsigned bsize = bdev_logical_block_size(bdev);
1121 1126
1122 bdev->bd_inode->i_size = size; 1127 bdev->bd_inode->i_size = size;
1123 while (bsize < PAGE_CACHE_SIZE) { 1128 while (bsize < PAGE_CACHE_SIZE) {
1124 if (size & bsize) 1129 if (size & bsize)
1125 break; 1130 break;
1126 bsize <<= 1; 1131 bsize <<= 1;
1127 } 1132 }
1128 bdev->bd_block_size = bsize; 1133 bdev->bd_block_size = bsize;
1129 bdev->bd_inode->i_blkbits = blksize_bits(bsize); 1134 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1130 } 1135 }
1131 EXPORT_SYMBOL(bd_set_size); 1136 EXPORT_SYMBOL(bd_set_size);
1132 1137
1133 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1138 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1134 1139
1135 /* 1140 /*
1136 * bd_mutex locking: 1141 * bd_mutex locking:
1137 * 1142 *
1138 * mutex_lock(part->bd_mutex) 1143 * mutex_lock(part->bd_mutex)
1139 * mutex_lock_nested(whole->bd_mutex, 1) 1144 * mutex_lock_nested(whole->bd_mutex, 1)
1140 */ 1145 */
1141 1146
1142 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 1147 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1143 { 1148 {
1144 struct gendisk *disk; 1149 struct gendisk *disk;
1145 int ret; 1150 int ret;
1146 int partno; 1151 int partno;
1147 int perm = 0; 1152 int perm = 0;
1148 1153
1149 if (mode & FMODE_READ) 1154 if (mode & FMODE_READ)
1150 perm |= MAY_READ; 1155 perm |= MAY_READ;
1151 if (mode & FMODE_WRITE) 1156 if (mode & FMODE_WRITE)
1152 perm |= MAY_WRITE; 1157 perm |= MAY_WRITE;
1153 /* 1158 /*
1154 * hooks: /n/, see "layering violations". 1159 * hooks: /n/, see "layering violations".
1155 */ 1160 */
1156 ret = devcgroup_inode_permission(bdev->bd_inode, perm); 1161 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1157 if (ret != 0) { 1162 if (ret != 0) {
1158 bdput(bdev); 1163 bdput(bdev);
1159 return ret; 1164 return ret;
1160 } 1165 }
1161 1166
1162 lock_kernel(); 1167 lock_kernel();
1163 restart: 1168 restart:
1164 1169
1165 ret = -ENXIO; 1170 ret = -ENXIO;
1166 disk = get_gendisk(bdev->bd_dev, &partno); 1171 disk = get_gendisk(bdev->bd_dev, &partno);
1167 if (!disk) 1172 if (!disk)
1168 goto out_unlock_kernel; 1173 goto out_unlock_kernel;
1169 1174
1170 mutex_lock_nested(&bdev->bd_mutex, for_part); 1175 mutex_lock_nested(&bdev->bd_mutex, for_part);
1171 if (!bdev->bd_openers) { 1176 if (!bdev->bd_openers) {
1172 bdev->bd_disk = disk; 1177 bdev->bd_disk = disk;
1173 bdev->bd_contains = bdev; 1178 bdev->bd_contains = bdev;
1174 if (!partno) { 1179 if (!partno) {
1175 struct backing_dev_info *bdi; 1180 struct backing_dev_info *bdi;
1176 1181
1177 ret = -ENXIO; 1182 ret = -ENXIO;
1178 bdev->bd_part = disk_get_part(disk, partno); 1183 bdev->bd_part = disk_get_part(disk, partno);
1179 if (!bdev->bd_part) 1184 if (!bdev->bd_part)
1180 goto out_clear; 1185 goto out_clear;
1181 1186
1182 if (disk->fops->open) { 1187 if (disk->fops->open) {
1183 ret = disk->fops->open(bdev, mode); 1188 ret = disk->fops->open(bdev, mode);
1184 if (ret == -ERESTARTSYS) { 1189 if (ret == -ERESTARTSYS) {
1185 /* Lost a race with 'disk' being 1190 /* Lost a race with 'disk' being
1186 * deleted, try again. 1191 * deleted, try again.
1187 * See md.c 1192 * See md.c
1188 */ 1193 */
1189 disk_put_part(bdev->bd_part); 1194 disk_put_part(bdev->bd_part);
1190 bdev->bd_part = NULL; 1195 bdev->bd_part = NULL;
1191 module_put(disk->fops->owner); 1196 module_put(disk->fops->owner);
1192 put_disk(disk); 1197 put_disk(disk);
1193 bdev->bd_disk = NULL; 1198 bdev->bd_disk = NULL;
1194 mutex_unlock(&bdev->bd_mutex); 1199 mutex_unlock(&bdev->bd_mutex);
1195 goto restart; 1200 goto restart;
1196 } 1201 }
1197 if (ret) 1202 if (ret)
1198 goto out_clear; 1203 goto out_clear;
1199 } 1204 }
1200 if (!bdev->bd_openers) { 1205 if (!bdev->bd_openers) {
1201 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1206 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1202 bdi = blk_get_backing_dev_info(bdev); 1207 bdi = blk_get_backing_dev_info(bdev);
1203 if (bdi == NULL) 1208 if (bdi == NULL)
1204 bdi = &default_backing_dev_info; 1209 bdi = &default_backing_dev_info;
1205 bdev->bd_inode->i_data.backing_dev_info = bdi; 1210 bdev->bd_inode->i_data.backing_dev_info = bdi;
1206 } 1211 }
1207 if (bdev->bd_invalidated) 1212 if (bdev->bd_invalidated)
1208 rescan_partitions(disk, bdev); 1213 rescan_partitions(disk, bdev);
1209 } else { 1214 } else {
1210 struct block_device *whole; 1215 struct block_device *whole;
1211 whole = bdget_disk(disk, 0); 1216 whole = bdget_disk(disk, 0);
1212 ret = -ENOMEM; 1217 ret = -ENOMEM;
1213 if (!whole) 1218 if (!whole)
1214 goto out_clear; 1219 goto out_clear;
1215 BUG_ON(for_part); 1220 BUG_ON(for_part);
1216 ret = __blkdev_get(whole, mode, 1); 1221 ret = __blkdev_get(whole, mode, 1);
1217 if (ret) 1222 if (ret)
1218 goto out_clear; 1223 goto out_clear;
1219 bdev->bd_contains = whole; 1224 bdev->bd_contains = whole;
1220 bdev->bd_inode->i_data.backing_dev_info = 1225 bdev->bd_inode->i_data.backing_dev_info =
1221 whole->bd_inode->i_data.backing_dev_info; 1226 whole->bd_inode->i_data.backing_dev_info;
1222 bdev->bd_part = disk_get_part(disk, partno); 1227 bdev->bd_part = disk_get_part(disk, partno);
1223 if (!(disk->flags & GENHD_FL_UP) || 1228 if (!(disk->flags & GENHD_FL_UP) ||
1224 !bdev->bd_part || !bdev->bd_part->nr_sects) { 1229 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1225 ret = -ENXIO; 1230 ret = -ENXIO;
1226 goto out_clear; 1231 goto out_clear;
1227 } 1232 }
1228 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1233 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1229 } 1234 }
1230 } else { 1235 } else {
1231 put_disk(disk); 1236 put_disk(disk);
1232 module_put(disk->fops->owner); 1237 module_put(disk->fops->owner);
1233 disk = NULL; 1238 disk = NULL;
1234 if (bdev->bd_contains == bdev) { 1239 if (bdev->bd_contains == bdev) {
1235 if (bdev->bd_disk->fops->open) { 1240 if (bdev->bd_disk->fops->open) {
1236 ret = bdev->bd_disk->fops->open(bdev, mode); 1241 ret = bdev->bd_disk->fops->open(bdev, mode);
1237 if (ret) 1242 if (ret)
1238 goto out_unlock_bdev; 1243 goto out_unlock_bdev;
1239 } 1244 }
1240 if (bdev->bd_invalidated) 1245 if (bdev->bd_invalidated)
1241 rescan_partitions(bdev->bd_disk, bdev); 1246 rescan_partitions(bdev->bd_disk, bdev);
1242 } 1247 }
1243 } 1248 }
1244 bdev->bd_openers++; 1249 bdev->bd_openers++;
1245 if (for_part) 1250 if (for_part)
1246 bdev->bd_part_count++; 1251 bdev->bd_part_count++;
1247 mutex_unlock(&bdev->bd_mutex); 1252 mutex_unlock(&bdev->bd_mutex);
1248 unlock_kernel(); 1253 unlock_kernel();
1249 return 0; 1254 return 0;
1250 1255
1251 out_clear: 1256 out_clear:
1252 disk_put_part(bdev->bd_part); 1257 disk_put_part(bdev->bd_part);
1253 bdev->bd_disk = NULL; 1258 bdev->bd_disk = NULL;
1254 bdev->bd_part = NULL; 1259 bdev->bd_part = NULL;
1255 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1260 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1256 if (bdev != bdev->bd_contains) 1261 if (bdev != bdev->bd_contains)
1257 __blkdev_put(bdev->bd_contains, mode, 1); 1262 __blkdev_put(bdev->bd_contains, mode, 1);
1258 bdev->bd_contains = NULL; 1263 bdev->bd_contains = NULL;
1259 out_unlock_bdev: 1264 out_unlock_bdev:
1260 mutex_unlock(&bdev->bd_mutex); 1265 mutex_unlock(&bdev->bd_mutex);
1261 out_unlock_kernel: 1266 out_unlock_kernel:
1262 unlock_kernel(); 1267 unlock_kernel();
1263 1268
1264 if (disk) 1269 if (disk)
1265 module_put(disk->fops->owner); 1270 module_put(disk->fops->owner);
1266 put_disk(disk); 1271 put_disk(disk);
1267 bdput(bdev); 1272 bdput(bdev);
1268 1273
1269 return ret; 1274 return ret;
1270 } 1275 }
1271 1276
1272 int blkdev_get(struct block_device *bdev, fmode_t mode) 1277 int blkdev_get(struct block_device *bdev, fmode_t mode)
1273 { 1278 {
1274 return __blkdev_get(bdev, mode, 0); 1279 return __blkdev_get(bdev, mode, 0);
1275 } 1280 }
1276 EXPORT_SYMBOL(blkdev_get); 1281 EXPORT_SYMBOL(blkdev_get);
1277 1282
1278 static int blkdev_open(struct inode * inode, struct file * filp) 1283 static int blkdev_open(struct inode * inode, struct file * filp)
1279 { 1284 {
1280 struct block_device *bdev; 1285 struct block_device *bdev;
1281 int res; 1286 int res;
1282 1287
1283 /* 1288 /*
1284 * Preserve backwards compatibility and allow large file access 1289 * Preserve backwards compatibility and allow large file access
1285 * even if userspace doesn't ask for it explicitly. Some mkfs 1290 * even if userspace doesn't ask for it explicitly. Some mkfs
1286 * binary needs it. We might want to drop this workaround 1291 * binary needs it. We might want to drop this workaround
1287 * during an unstable branch. 1292 * during an unstable branch.
1288 */ 1293 */
1289 filp->f_flags |= O_LARGEFILE; 1294 filp->f_flags |= O_LARGEFILE;
1290 1295
1291 if (filp->f_flags & O_NDELAY) 1296 if (filp->f_flags & O_NDELAY)
1292 filp->f_mode |= FMODE_NDELAY; 1297 filp->f_mode |= FMODE_NDELAY;
1293 if (filp->f_flags & O_EXCL) 1298 if (filp->f_flags & O_EXCL)
1294 filp->f_mode |= FMODE_EXCL; 1299 filp->f_mode |= FMODE_EXCL;
1295 if ((filp->f_flags & O_ACCMODE) == 3) 1300 if ((filp->f_flags & O_ACCMODE) == 3)
1296 filp->f_mode |= FMODE_WRITE_IOCTL; 1301 filp->f_mode |= FMODE_WRITE_IOCTL;
1297 1302
1298 bdev = bd_acquire(inode); 1303 bdev = bd_acquire(inode);
1299 if (bdev == NULL) 1304 if (bdev == NULL)
1300 return -ENOMEM; 1305 return -ENOMEM;
1301 1306
1302 filp->f_mapping = bdev->bd_inode->i_mapping; 1307 filp->f_mapping = bdev->bd_inode->i_mapping;
1303 1308
1304 res = blkdev_get(bdev, filp->f_mode); 1309 res = blkdev_get(bdev, filp->f_mode);
1305 if (res) 1310 if (res)
1306 return res; 1311 return res;
1307 1312
1308 if (filp->f_mode & FMODE_EXCL) { 1313 if (filp->f_mode & FMODE_EXCL) {
1309 res = bd_claim(bdev, filp); 1314 res = bd_claim(bdev, filp);
1310 if (res) 1315 if (res)
1311 goto out_blkdev_put; 1316 goto out_blkdev_put;
1312 } 1317 }
1313 1318
1314 return 0; 1319 return 0;
1315 1320
1316 out_blkdev_put: 1321 out_blkdev_put:
1317 blkdev_put(bdev, filp->f_mode); 1322 blkdev_put(bdev, filp->f_mode);
1318 return res; 1323 return res;
1319 } 1324 }
1320 1325
1321 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1326 static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1322 { 1327 {
1323 int ret = 0; 1328 int ret = 0;
1324 struct gendisk *disk = bdev->bd_disk; 1329 struct gendisk *disk = bdev->bd_disk;
1325 struct block_device *victim = NULL; 1330 struct block_device *victim = NULL;
1326 1331
1327 mutex_lock_nested(&bdev->bd_mutex, for_part); 1332 mutex_lock_nested(&bdev->bd_mutex, for_part);
1328 lock_kernel(); 1333 lock_kernel();
1329 if (for_part) 1334 if (for_part)
1330 bdev->bd_part_count--; 1335 bdev->bd_part_count--;
1331 1336
1332 if (!--bdev->bd_openers) { 1337 if (!--bdev->bd_openers) {
1333 sync_blockdev(bdev); 1338 sync_blockdev(bdev);
1334 kill_bdev(bdev); 1339 kill_bdev(bdev);
1335 } 1340 }
1336 if (bdev->bd_contains == bdev) { 1341 if (bdev->bd_contains == bdev) {
1337 if (disk->fops->release) 1342 if (disk->fops->release)
1338 ret = disk->fops->release(disk, mode); 1343 ret = disk->fops->release(disk, mode);
1339 } 1344 }
1340 if (!bdev->bd_openers) { 1345 if (!bdev->bd_openers) {
1341 struct module *owner = disk->fops->owner; 1346 struct module *owner = disk->fops->owner;
1342 1347
1343 put_disk(disk); 1348 put_disk(disk);
1344 module_put(owner); 1349 module_put(owner);
1345 disk_put_part(bdev->bd_part); 1350 disk_put_part(bdev->bd_part);
1346 bdev->bd_part = NULL; 1351 bdev->bd_part = NULL;
1347 bdev->bd_disk = NULL; 1352 bdev->bd_disk = NULL;
1348 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1353 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1349 if (bdev != bdev->bd_contains) 1354 if (bdev != bdev->bd_contains)
1350 victim = bdev->bd_contains; 1355 victim = bdev->bd_contains;
1351 bdev->bd_contains = NULL; 1356 bdev->bd_contains = NULL;
1352 } 1357 }
1353 unlock_kernel(); 1358 unlock_kernel();
1354 mutex_unlock(&bdev->bd_mutex); 1359 mutex_unlock(&bdev->bd_mutex);
1355 bdput(bdev); 1360 bdput(bdev);
1356 if (victim) 1361 if (victim)
1357 __blkdev_put(victim, mode, 1); 1362 __blkdev_put(victim, mode, 1);
1358 return ret; 1363 return ret;
1359 } 1364 }
1360 1365
1361 int blkdev_put(struct block_device *bdev, fmode_t mode) 1366 int blkdev_put(struct block_device *bdev, fmode_t mode)
1362 { 1367 {
1363 return __blkdev_put(bdev, mode, 0); 1368 return __blkdev_put(bdev, mode, 0);
1364 } 1369 }
1365 EXPORT_SYMBOL(blkdev_put); 1370 EXPORT_SYMBOL(blkdev_put);
1366 1371
1367 static int blkdev_close(struct inode * inode, struct file * filp) 1372 static int blkdev_close(struct inode * inode, struct file * filp)
1368 { 1373 {
1369 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1374 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1370 if (bdev->bd_holder == filp) 1375 if (bdev->bd_holder == filp)
1371 bd_release(bdev); 1376 bd_release(bdev);
1372 return blkdev_put(bdev, filp->f_mode); 1377 return blkdev_put(bdev, filp->f_mode);
1373 } 1378 }
1374 1379
1375 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1380 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1376 { 1381 {
1377 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1382 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1378 fmode_t mode = file->f_mode; 1383 fmode_t mode = file->f_mode;
1379 1384
1380 /* 1385 /*
1381 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have 1386 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
1382 * to updated it before every ioctl. 1387 * to updated it before every ioctl.
1383 */ 1388 */
1384 if (file->f_flags & O_NDELAY) 1389 if (file->f_flags & O_NDELAY)
1385 mode |= FMODE_NDELAY; 1390 mode |= FMODE_NDELAY;
1386 else 1391 else
1387 mode &= ~FMODE_NDELAY; 1392 mode &= ~FMODE_NDELAY;
1388 1393
1389 return blkdev_ioctl(bdev, mode, cmd, arg); 1394 return blkdev_ioctl(bdev, mode, cmd, arg);
1390 } 1395 }
1391 1396
1392 /* 1397 /*
1393 * Try to release a page associated with block device when the system 1398 * Try to release a page associated with block device when the system
1394 * is under memory pressure. 1399 * is under memory pressure.
1395 */ 1400 */
1396 static int blkdev_releasepage(struct page *page, gfp_t wait) 1401 static int blkdev_releasepage(struct page *page, gfp_t wait)
1397 { 1402 {
1398 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; 1403 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1399 1404
1400 if (super && super->s_op->bdev_try_to_free_page) 1405 if (super && super->s_op->bdev_try_to_free_page)
1401 return super->s_op->bdev_try_to_free_page(super, page, wait); 1406 return super->s_op->bdev_try_to_free_page(super, page, wait);
1402 1407
1403 return try_to_free_buffers(page); 1408 return try_to_free_buffers(page);
1404 } 1409 }
1405 1410
1406 static const struct address_space_operations def_blk_aops = { 1411 static const struct address_space_operations def_blk_aops = {
1407 .readpage = blkdev_readpage, 1412 .readpage = blkdev_readpage,
1408 .writepage = blkdev_writepage, 1413 .writepage = blkdev_writepage,
1409 .sync_page = block_sync_page, 1414 .sync_page = block_sync_page,
1410 .write_begin = blkdev_write_begin, 1415 .write_begin = blkdev_write_begin,
1411 .write_end = blkdev_write_end, 1416 .write_end = blkdev_write_end,
1412 .writepages = generic_writepages, 1417 .writepages = generic_writepages,
1413 .releasepage = blkdev_releasepage, 1418 .releasepage = blkdev_releasepage,
1414 .direct_IO = blkdev_direct_IO, 1419 .direct_IO = blkdev_direct_IO,
1415 }; 1420 };
1416 1421
1417 const struct file_operations def_blk_fops = { 1422 const struct file_operations def_blk_fops = {
1418 .open = blkdev_open, 1423 .open = blkdev_open,
1419 .release = blkdev_close, 1424 .release = blkdev_close,
1420 .llseek = block_llseek, 1425 .llseek = block_llseek,
1421 .read = do_sync_read, 1426 .read = do_sync_read,
1422 .write = do_sync_write, 1427 .write = do_sync_write,
1423 .aio_read = generic_file_aio_read, 1428 .aio_read = generic_file_aio_read,
1424 .aio_write = generic_file_aio_write_nolock, 1429 .aio_write = generic_file_aio_write_nolock,
1425 .mmap = generic_file_mmap, 1430 .mmap = generic_file_mmap,
1426 .fsync = block_fsync, 1431 .fsync = block_fsync,
1427 .unlocked_ioctl = block_ioctl, 1432 .unlocked_ioctl = block_ioctl,
1428 #ifdef CONFIG_COMPAT 1433 #ifdef CONFIG_COMPAT
1429 .compat_ioctl = compat_blkdev_ioctl, 1434 .compat_ioctl = compat_blkdev_ioctl,
1430 #endif 1435 #endif
1431 .splice_read = generic_file_splice_read, 1436 .splice_read = generic_file_splice_read,
1432 .splice_write = generic_file_splice_write, 1437 .splice_write = generic_file_splice_write,
1433 }; 1438 };
1434 1439
1435 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1440 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1436 { 1441 {
1437 int res; 1442 int res;
1438 mm_segment_t old_fs = get_fs(); 1443 mm_segment_t old_fs = get_fs();
1439 set_fs(KERNEL_DS); 1444 set_fs(KERNEL_DS);
1440 res = blkdev_ioctl(bdev, 0, cmd, arg); 1445 res = blkdev_ioctl(bdev, 0, cmd, arg);
1441 set_fs(old_fs); 1446 set_fs(old_fs);
1442 return res; 1447 return res;
1443 } 1448 }
1444 1449
1445 EXPORT_SYMBOL(ioctl_by_bdev); 1450 EXPORT_SYMBOL(ioctl_by_bdev);
1446 1451
1447 /** 1452 /**
1448 * lookup_bdev - lookup a struct block_device by name 1453 * lookup_bdev - lookup a struct block_device by name
1449 * @pathname: special file representing the block device 1454 * @pathname: special file representing the block device
1450 * 1455 *
1451 * Get a reference to the blockdevice at @pathname in the current 1456 * Get a reference to the blockdevice at @pathname in the current
1452 * namespace if possible and return it. Return ERR_PTR(error) 1457 * namespace if possible and return it. Return ERR_PTR(error)
1453 * otherwise. 1458 * otherwise.
1454 */ 1459 */
1455 struct block_device *lookup_bdev(const char *pathname) 1460 struct block_device *lookup_bdev(const char *pathname)
1456 { 1461 {
1457 struct block_device *bdev; 1462 struct block_device *bdev;
1458 struct inode *inode; 1463 struct inode *inode;
1459 struct path path; 1464 struct path path;
1460 int error; 1465 int error;
1461 1466
1462 if (!pathname || !*pathname) 1467 if (!pathname || !*pathname)
1463 return ERR_PTR(-EINVAL); 1468 return ERR_PTR(-EINVAL);
1464 1469
1465 error = kern_path(pathname, LOOKUP_FOLLOW, &path); 1470 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1466 if (error) 1471 if (error)
1467 return ERR_PTR(error); 1472 return ERR_PTR(error);
1468 1473
1469 inode = path.dentry->d_inode; 1474 inode = path.dentry->d_inode;
1470 error = -ENOTBLK; 1475 error = -ENOTBLK;
1471 if (!S_ISBLK(inode->i_mode)) 1476 if (!S_ISBLK(inode->i_mode))
1472 goto fail; 1477 goto fail;
1473 error = -EACCES; 1478 error = -EACCES;
1474 if (path.mnt->mnt_flags & MNT_NODEV) 1479 if (path.mnt->mnt_flags & MNT_NODEV)
1475 goto fail; 1480 goto fail;
1476 error = -ENOMEM; 1481 error = -ENOMEM;
1477 bdev = bd_acquire(inode); 1482 bdev = bd_acquire(inode);
1478 if (!bdev) 1483 if (!bdev)
1479 goto fail; 1484 goto fail;
1480 out: 1485 out:
1481 path_put(&path); 1486 path_put(&path);
1482 return bdev; 1487 return bdev;
1483 fail: 1488 fail:
1484 bdev = ERR_PTR(error); 1489 bdev = ERR_PTR(error);
1485 goto out; 1490 goto out;
1486 } 1491 }
1487 EXPORT_SYMBOL(lookup_bdev); 1492 EXPORT_SYMBOL(lookup_bdev);
1488 1493
1489 /** 1494 /**
1490 * open_bdev_exclusive - open a block device by name and set it up for use 1495 * open_bdev_exclusive - open a block device by name and set it up for use
1491 * 1496 *
1492 * @path: special file representing the block device 1497 * @path: special file representing the block device
1493 * @mode: FMODE_... combination to pass be used 1498 * @mode: FMODE_... combination to pass be used
1494 * @holder: owner for exclusion 1499 * @holder: owner for exclusion
1495 * 1500 *
1496 * Open the blockdevice described by the special file at @path, claim it 1501 * Open the blockdevice described by the special file at @path, claim it
1497 * for the @holder. 1502 * for the @holder.
1498 */ 1503 */
1499 struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1504 struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1500 { 1505 {
1501 struct block_device *bdev; 1506 struct block_device *bdev;
1502 int error = 0; 1507 int error = 0;
1503 1508
1504 bdev = lookup_bdev(path); 1509 bdev = lookup_bdev(path);
1505 if (IS_ERR(bdev)) 1510 if (IS_ERR(bdev))
1506 return bdev; 1511 return bdev;
1507 1512
1508 error = blkdev_get(bdev, mode); 1513 error = blkdev_get(bdev, mode);
1509 if (error) 1514 if (error)
1510 return ERR_PTR(error); 1515 return ERR_PTR(error);
1511 error = -EACCES; 1516 error = -EACCES;
1512 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1517 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1513 goto blkdev_put; 1518 goto blkdev_put;
1514 error = bd_claim(bdev, holder); 1519 error = bd_claim(bdev, holder);
1515 if (error) 1520 if (error)
1516 goto blkdev_put; 1521 goto blkdev_put;
1517 1522
1518 return bdev; 1523 return bdev;
1519 1524
1520 blkdev_put: 1525 blkdev_put:
1521 blkdev_put(bdev, mode); 1526 blkdev_put(bdev, mode);
1522 return ERR_PTR(error); 1527 return ERR_PTR(error);
1523 } 1528 }
1524 1529
1525 EXPORT_SYMBOL(open_bdev_exclusive); 1530 EXPORT_SYMBOL(open_bdev_exclusive);
1526 1531
1527 /** 1532 /**
1528 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() 1533 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1529 * 1534 *
1530 * @bdev: blockdevice to close 1535 * @bdev: blockdevice to close
1531 * @mode: mode, must match that used to open. 1536 * @mode: mode, must match that used to open.
1532 * 1537 *
1533 * This is the counterpart to open_bdev_exclusive(). 1538 * This is the counterpart to open_bdev_exclusive().
1534 */ 1539 */
1535 void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) 1540 void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1536 { 1541 {
1537 bd_release(bdev); 1542 bd_release(bdev);
1538 blkdev_put(bdev, mode); 1543 blkdev_put(bdev, mode);
1539 } 1544 }
1540 1545
1541 EXPORT_SYMBOL(close_bdev_exclusive); 1546 EXPORT_SYMBOL(close_bdev_exclusive);
1542 1547
1543 int __invalidate_device(struct block_device *bdev) 1548 int __invalidate_device(struct block_device *bdev)
1544 { 1549 {
1545 struct super_block *sb = get_super(bdev); 1550 struct super_block *sb = get_super(bdev);
1546 int res = 0; 1551 int res = 0;
1547 1552
1548 if (sb) { 1553 if (sb) {
1549 /* 1554 /*
1550 * no need to lock the super, get_super holds the 1555 * no need to lock the super, get_super holds the
1551 * read mutex so the filesystem cannot go away 1556 * read mutex so the filesystem cannot go away
1552 * under us (->put_super runs with the write lock 1557 * under us (->put_super runs with the write lock
1553 * hold). 1558 * hold).
1554 */ 1559 */
1555 shrink_dcache_sb(sb); 1560 shrink_dcache_sb(sb);
1556 res = invalidate_inodes(sb); 1561 res = invalidate_inodes(sb);
1557 drop_super(sb); 1562 drop_super(sb);
1558 } 1563 }
1559 invalidate_bdev(bdev); 1564 invalidate_bdev(bdev);
1 /* 1 /*
2 * fs/fs-writeback.c 2 * fs/fs-writeback.c
3 * 3 *
4 * Copyright (C) 2002, Linus Torvalds. 4 * Copyright (C) 2002, Linus Torvalds.
5 * 5 *
6 * Contains all the functions related to writing back and waiting 6 * Contains all the functions related to writing back and waiting
7 * upon dirty inodes against superblocks, and writing back dirty 7 * upon dirty inodes against superblocks, and writing back dirty
8 * pages against inodes. ie: data writeback. Writeout of the 8 * pages against inodes. ie: data writeback. Writeout of the
9 * inode itself is not handled here. 9 * inode itself is not handled here.
10 * 10 *
11 * 10Apr2002 Andrew Morton 11 * 10Apr2002 Andrew Morton
12 * Split out of fs/inode.c 12 * Split out of fs/inode.c
13 * Additions for address_space-based writeback 13 * Additions for address_space-based writeback
14 */ 14 */
15 15
16 #include <linux/kernel.h> 16 #include <linux/kernel.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/spinlock.h> 18 #include <linux/spinlock.h>
19 #include <linux/sched.h> 19 #include <linux/sched.h>
20 #include <linux/fs.h> 20 #include <linux/fs.h>
21 #include <linux/mm.h> 21 #include <linux/mm.h>
22 #include <linux/writeback.h> 22 #include <linux/writeback.h>
23 #include <linux/blkdev.h> 23 #include <linux/blkdev.h>
24 #include <linux/backing-dev.h> 24 #include <linux/backing-dev.h>
25 #include <linux/buffer_head.h> 25 #include <linux/buffer_head.h>
26 #include "internal.h" 26 #include "internal.h"
27 27
28 28
29 /** 29 /**
30 * writeback_acquire - attempt to get exclusive writeback access to a device 30 * writeback_acquire - attempt to get exclusive writeback access to a device
31 * @bdi: the device's backing_dev_info structure 31 * @bdi: the device's backing_dev_info structure
32 * 32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on 33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained 34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state. 35 * via a flag in the request_queue's backing_dev_info.state.
36 * 36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info, 37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this 38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices. 39 * may prevent concurrent writeback against multiple devices.
40 */ 40 */
41 static int writeback_acquire(struct backing_dev_info *bdi) 41 static int writeback_acquire(struct backing_dev_info *bdi)
42 { 42 {
43 return !test_and_set_bit(BDI_pdflush, &bdi->state); 43 return !test_and_set_bit(BDI_pdflush, &bdi->state);
44 } 44 }
45 45
46 /** 46 /**
47 * writeback_in_progress - determine whether there is writeback in progress 47 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure. 48 * @bdi: the device's backing_dev_info structure.
49 * 49 *
50 * Determine whether there is writeback in progress against a backing device. 50 * Determine whether there is writeback in progress against a backing device.
51 */ 51 */
52 int writeback_in_progress(struct backing_dev_info *bdi) 52 int writeback_in_progress(struct backing_dev_info *bdi)
53 { 53 {
54 return test_bit(BDI_pdflush, &bdi->state); 54 return test_bit(BDI_pdflush, &bdi->state);
55 } 55 }
56 56
57 /** 57 /**
58 * writeback_release - relinquish exclusive writeback access against a device. 58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure 59 * @bdi: the device's backing_dev_info structure
60 */ 60 */
61 static void writeback_release(struct backing_dev_info *bdi) 61 static void writeback_release(struct backing_dev_info *bdi)
62 { 62 {
63 BUG_ON(!writeback_in_progress(bdi)); 63 BUG_ON(!writeback_in_progress(bdi));
64 clear_bit(BDI_pdflush, &bdi->state); 64 clear_bit(BDI_pdflush, &bdi->state);
65 } 65 }
66 66
67 /** 67 /**
68 * __mark_inode_dirty - internal function 68 * __mark_inode_dirty - internal function
69 * @inode: inode to mark 69 * @inode: inode to mark
70 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) 70 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
71 * Mark an inode as dirty. Callers should use mark_inode_dirty or 71 * Mark an inode as dirty. Callers should use mark_inode_dirty or
72 * mark_inode_dirty_sync. 72 * mark_inode_dirty_sync.
73 * 73 *
74 * Put the inode on the super block's dirty list. 74 * Put the inode on the super block's dirty list.
75 * 75 *
76 * CAREFUL! We mark it dirty unconditionally, but move it onto the 76 * CAREFUL! We mark it dirty unconditionally, but move it onto the
77 * dirty list only if it is hashed or if it refers to a blockdev. 77 * dirty list only if it is hashed or if it refers to a blockdev.
78 * If it was not hashed, it will never be added to the dirty list 78 * If it was not hashed, it will never be added to the dirty list
79 * even if it is later hashed, as it will have been marked dirty already. 79 * even if it is later hashed, as it will have been marked dirty already.
80 * 80 *
81 * In short, make sure you hash any inodes _before_ you start marking 81 * In short, make sure you hash any inodes _before_ you start marking
82 * them dirty. 82 * them dirty.
83 * 83 *
84 * This function *must* be atomic for the I_DIRTY_PAGES case - 84 * This function *must* be atomic for the I_DIRTY_PAGES case -
85 * set_page_dirty() is called under spinlock in several places. 85 * set_page_dirty() is called under spinlock in several places.
86 * 86 *
87 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of 87 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
88 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of 88 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
89 * the kernel-internal blockdev inode represents the dirtying time of the 89 * the kernel-internal blockdev inode represents the dirtying time of the
90 * blockdev's pages. This is why for I_DIRTY_PAGES we always use 90 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
91 * page->mapping->host, so the page-dirtying time is recorded in the internal 91 * page->mapping->host, so the page-dirtying time is recorded in the internal
92 * blockdev inode. 92 * blockdev inode.
93 */ 93 */
94 void __mark_inode_dirty(struct inode *inode, int flags) 94 void __mark_inode_dirty(struct inode *inode, int flags)
95 { 95 {
96 struct super_block *sb = inode->i_sb; 96 struct super_block *sb = inode->i_sb;
97 97
98 /* 98 /*
99 * Don't do this for I_DIRTY_PAGES - that doesn't actually 99 * Don't do this for I_DIRTY_PAGES - that doesn't actually
100 * dirty the inode itself 100 * dirty the inode itself
101 */ 101 */
102 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 102 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
103 if (sb->s_op->dirty_inode) 103 if (sb->s_op->dirty_inode)
104 sb->s_op->dirty_inode(inode); 104 sb->s_op->dirty_inode(inode);
105 } 105 }
106 106
107 /* 107 /*
108 * make sure that changes are seen by all cpus before we test i_state 108 * make sure that changes are seen by all cpus before we test i_state
109 * -- mikulas 109 * -- mikulas
110 */ 110 */
111 smp_mb(); 111 smp_mb();
112 112
113 /* avoid the locking if we can */ 113 /* avoid the locking if we can */
114 if ((inode->i_state & flags) == flags) 114 if ((inode->i_state & flags) == flags)
115 return; 115 return;
116 116
117 if (unlikely(block_dump)) { 117 if (unlikely(block_dump)) {
118 struct dentry *dentry = NULL; 118 struct dentry *dentry = NULL;
119 const char *name = "?"; 119 const char *name = "?";
120 120
121 if (!list_empty(&inode->i_dentry)) { 121 if (!list_empty(&inode->i_dentry)) {
122 dentry = list_entry(inode->i_dentry.next, 122 dentry = list_entry(inode->i_dentry.next,
123 struct dentry, d_alias); 123 struct dentry, d_alias);
124 if (dentry && dentry->d_name.name) 124 if (dentry && dentry->d_name.name)
125 name = (const char *) dentry->d_name.name; 125 name = (const char *) dentry->d_name.name;
126 } 126 }
127 127
128 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) 128 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
129 printk(KERN_DEBUG 129 printk(KERN_DEBUG
130 "%s(%d): dirtied inode %lu (%s) on %s\n", 130 "%s(%d): dirtied inode %lu (%s) on %s\n",
131 current->comm, task_pid_nr(current), inode->i_ino, 131 current->comm, task_pid_nr(current), inode->i_ino,
132 name, inode->i_sb->s_id); 132 name, inode->i_sb->s_id);
133 } 133 }
134 134
135 spin_lock(&inode_lock); 135 spin_lock(&inode_lock);
136 if ((inode->i_state & flags) != flags) { 136 if ((inode->i_state & flags) != flags) {
137 const int was_dirty = inode->i_state & I_DIRTY; 137 const int was_dirty = inode->i_state & I_DIRTY;
138 138
139 inode->i_state |= flags; 139 inode->i_state |= flags;
140 140
141 /* 141 /*
142 * If the inode is being synced, just update its dirty state. 142 * If the inode is being synced, just update its dirty state.
143 * The unlocker will place the inode on the appropriate 143 * The unlocker will place the inode on the appropriate
144 * superblock list, based upon its state. 144 * superblock list, based upon its state.
145 */ 145 */
146 if (inode->i_state & I_SYNC) 146 if (inode->i_state & I_SYNC)
147 goto out; 147 goto out;
148 148
149 /* 149 /*
150 * Only add valid (hashed) inodes to the superblock's 150 * Only add valid (hashed) inodes to the superblock's
151 * dirty list. Add blockdev inodes as well. 151 * dirty list. Add blockdev inodes as well.
152 */ 152 */
153 if (!S_ISBLK(inode->i_mode)) { 153 if (!S_ISBLK(inode->i_mode)) {
154 if (hlist_unhashed(&inode->i_hash)) 154 if (hlist_unhashed(&inode->i_hash))
155 goto out; 155 goto out;
156 } 156 }
157 if (inode->i_state & (I_FREEING|I_CLEAR)) 157 if (inode->i_state & (I_FREEING|I_CLEAR))
158 goto out; 158 goto out;
159 159
160 /* 160 /*
161 * If the inode was already on s_dirty/s_io/s_more_io, don't 161 * If the inode was already on s_dirty/s_io/s_more_io, don't
162 * reposition it (that would break s_dirty time-ordering). 162 * reposition it (that would break s_dirty time-ordering).
163 */ 163 */
164 if (!was_dirty) { 164 if (!was_dirty) {
165 inode->dirtied_when = jiffies; 165 inode->dirtied_when = jiffies;
166 list_move(&inode->i_list, &sb->s_dirty); 166 list_move(&inode->i_list, &sb->s_dirty);
167 } 167 }
168 } 168 }
169 out: 169 out:
170 spin_unlock(&inode_lock); 170 spin_unlock(&inode_lock);
171 } 171 }
172 172
173 EXPORT_SYMBOL(__mark_inode_dirty); 173 EXPORT_SYMBOL(__mark_inode_dirty);
174 174
175 static int write_inode(struct inode *inode, int sync) 175 static int write_inode(struct inode *inode, int sync)
176 { 176 {
177 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 177 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
178 return inode->i_sb->s_op->write_inode(inode, sync); 178 return inode->i_sb->s_op->write_inode(inode, sync);
179 return 0; 179 return 0;
180 } 180 }
181 181
182 /* 182 /*
183 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 183 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
184 * furthest end of its superblock's dirty-inode list. 184 * furthest end of its superblock's dirty-inode list.
185 * 185 *
186 * Before stamping the inode's ->dirtied_when, we check to see whether it is 186 * Before stamping the inode's ->dirtied_when, we check to see whether it is
187 * already the most-recently-dirtied inode on the s_dirty list. If that is 187 * already the most-recently-dirtied inode on the s_dirty list. If that is
188 * the case then the inode must have been redirtied while it was being written 188 * the case then the inode must have been redirtied while it was being written
189 * out and we don't reset its dirtied_when. 189 * out and we don't reset its dirtied_when.
190 */ 190 */
191 static void redirty_tail(struct inode *inode) 191 static void redirty_tail(struct inode *inode)
192 { 192 {
193 struct super_block *sb = inode->i_sb; 193 struct super_block *sb = inode->i_sb;
194 194
195 if (!list_empty(&sb->s_dirty)) { 195 if (!list_empty(&sb->s_dirty)) {
196 struct inode *tail_inode; 196 struct inode *tail_inode;
197 197
198 tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); 198 tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);
199 if (time_before(inode->dirtied_when, 199 if (time_before(inode->dirtied_when,
200 tail_inode->dirtied_when)) 200 tail_inode->dirtied_when))
201 inode->dirtied_when = jiffies; 201 inode->dirtied_when = jiffies;
202 } 202 }
203 list_move(&inode->i_list, &sb->s_dirty); 203 list_move(&inode->i_list, &sb->s_dirty);
204 } 204 }
205 205
206 /* 206 /*
207 * requeue inode for re-scanning after sb->s_io list is exhausted. 207 * requeue inode for re-scanning after sb->s_io list is exhausted.
208 */ 208 */
209 static void requeue_io(struct inode *inode) 209 static void requeue_io(struct inode *inode)
210 { 210 {
211 list_move(&inode->i_list, &inode->i_sb->s_more_io); 211 list_move(&inode->i_list, &inode->i_sb->s_more_io);
212 } 212 }
213 213
214 static void inode_sync_complete(struct inode *inode) 214 static void inode_sync_complete(struct inode *inode)
215 { 215 {
216 /* 216 /*
217 * Prevent speculative execution through spin_unlock(&inode_lock); 217 * Prevent speculative execution through spin_unlock(&inode_lock);
218 */ 218 */
219 smp_mb(); 219 smp_mb();
220 wake_up_bit(&inode->i_state, __I_SYNC); 220 wake_up_bit(&inode->i_state, __I_SYNC);
221 } 221 }
222 222
223 static bool inode_dirtied_after(struct inode *inode, unsigned long t) 223 static bool inode_dirtied_after(struct inode *inode, unsigned long t)
224 { 224 {
225 bool ret = time_after(inode->dirtied_when, t); 225 bool ret = time_after(inode->dirtied_when, t);
226 #ifndef CONFIG_64BIT 226 #ifndef CONFIG_64BIT
227 /* 227 /*
228 * For inodes being constantly redirtied, dirtied_when can get stuck. 228 * For inodes being constantly redirtied, dirtied_when can get stuck.
229 * It _appears_ to be in the future, but is actually in distant past. 229 * It _appears_ to be in the future, but is actually in distant past.
230 * This test is necessary to prevent such wrapped-around relative times 230 * This test is necessary to prevent such wrapped-around relative times
231 * from permanently stopping the whole pdflush writeback. 231 * from permanently stopping the whole pdflush writeback.
232 */ 232 */
233 ret = ret && time_before_eq(inode->dirtied_when, jiffies); 233 ret = ret && time_before_eq(inode->dirtied_when, jiffies);
234 #endif 234 #endif
235 return ret; 235 return ret;
236 } 236 }
237 237
238 /* 238 /*
239 * Move expired dirty inodes from @delaying_queue to @dispatch_queue. 239 * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
240 */ 240 */
241 static void move_expired_inodes(struct list_head *delaying_queue, 241 static void move_expired_inodes(struct list_head *delaying_queue,
242 struct list_head *dispatch_queue, 242 struct list_head *dispatch_queue,
243 unsigned long *older_than_this) 243 unsigned long *older_than_this)
244 { 244 {
245 while (!list_empty(delaying_queue)) { 245 while (!list_empty(delaying_queue)) {
246 struct inode *inode = list_entry(delaying_queue->prev, 246 struct inode *inode = list_entry(delaying_queue->prev,
247 struct inode, i_list); 247 struct inode, i_list);
248 if (older_than_this && 248 if (older_than_this &&
249 inode_dirtied_after(inode, *older_than_this)) 249 inode_dirtied_after(inode, *older_than_this))
250 break; 250 break;
251 list_move(&inode->i_list, dispatch_queue); 251 list_move(&inode->i_list, dispatch_queue);
252 } 252 }
253 } 253 }
254 254
255 /* 255 /*
256 * Queue all expired dirty inodes for io, eldest first. 256 * Queue all expired dirty inodes for io, eldest first.
257 */ 257 */
258 static void queue_io(struct super_block *sb, 258 static void queue_io(struct super_block *sb,
259 unsigned long *older_than_this) 259 unsigned long *older_than_this)
260 { 260 {
261 list_splice_init(&sb->s_more_io, sb->s_io.prev); 261 list_splice_init(&sb->s_more_io, sb->s_io.prev);
262 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); 262 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);
263 } 263 }
264 264
265 int sb_has_dirty_inodes(struct super_block *sb) 265 int sb_has_dirty_inodes(struct super_block *sb)
266 { 266 {
267 return !list_empty(&sb->s_dirty) || 267 return !list_empty(&sb->s_dirty) ||
268 !list_empty(&sb->s_io) || 268 !list_empty(&sb->s_io) ||
269 !list_empty(&sb->s_more_io); 269 !list_empty(&sb->s_more_io);
270 } 270 }
271 EXPORT_SYMBOL(sb_has_dirty_inodes); 271 EXPORT_SYMBOL(sb_has_dirty_inodes);
272 272
273 /* 273 /*
274 * Write a single inode's dirty pages and inode data out to disk. 274 * Write a single inode's dirty pages and inode data out to disk.
275 * If `wait' is set, wait on the writeout. 275 * If `wait' is set, wait on the writeout.
276 * 276 *
277 * The whole writeout design is quite complex and fragile. We want to avoid 277 * The whole writeout design is quite complex and fragile. We want to avoid
278 * starvation of particular inodes when others are being redirtied, prevent 278 * starvation of particular inodes when others are being redirtied, prevent
279 * livelocks, etc. 279 * livelocks, etc.
280 * 280 *
281 * Called under inode_lock. 281 * Called under inode_lock.
282 */ 282 */
283 static int 283 static int
284 __sync_single_inode(struct inode *inode, struct writeback_control *wbc) 284 __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
285 { 285 {
286 unsigned dirty; 286 unsigned dirty;
287 struct address_space *mapping = inode->i_mapping; 287 struct address_space *mapping = inode->i_mapping;
288 int wait = wbc->sync_mode == WB_SYNC_ALL; 288 int wait = wbc->sync_mode == WB_SYNC_ALL;
289 int ret; 289 int ret;
290 290
291 BUG_ON(inode->i_state & I_SYNC); 291 BUG_ON(inode->i_state & I_SYNC);
292 WARN_ON(inode->i_state & I_NEW); 292 WARN_ON(inode->i_state & I_NEW);
293 293
294 /* Set I_SYNC, reset I_DIRTY */ 294 /* Set I_SYNC, reset I_DIRTY */
295 dirty = inode->i_state & I_DIRTY; 295 dirty = inode->i_state & I_DIRTY;
296 inode->i_state |= I_SYNC; 296 inode->i_state |= I_SYNC;
297 inode->i_state &= ~I_DIRTY; 297 inode->i_state &= ~I_DIRTY;
298 298
299 spin_unlock(&inode_lock); 299 spin_unlock(&inode_lock);
300 300
301 ret = do_writepages(mapping, wbc); 301 ret = do_writepages(mapping, wbc);
302 302
303 /* Don't write the inode if only I_DIRTY_PAGES was set */ 303 /* Don't write the inode if only I_DIRTY_PAGES was set */
304 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 304 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
305 int err = write_inode(inode, wait); 305 int err = write_inode(inode, wait);
306 if (ret == 0) 306 if (ret == 0)
307 ret = err; 307 ret = err;
308 } 308 }
309 309
310 if (wait) { 310 if (wait) {
311 int err = filemap_fdatawait(mapping); 311 int err = filemap_fdatawait(mapping);
312 if (ret == 0) 312 if (ret == 0)
313 ret = err; 313 ret = err;
314 } 314 }
315 315
316 spin_lock(&inode_lock); 316 spin_lock(&inode_lock);
317 WARN_ON(inode->i_state & I_NEW); 317 WARN_ON(inode->i_state & I_NEW);
318 inode->i_state &= ~I_SYNC; 318 inode->i_state &= ~I_SYNC;
319 if (!(inode->i_state & I_FREEING)) { 319 if (!(inode->i_state & I_FREEING)) {
320 if (!(inode->i_state & I_DIRTY) && 320 if (!(inode->i_state & I_DIRTY) &&
321 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 321 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
322 /* 322 /*
323 * We didn't write back all the pages. nfs_writepages() 323 * We didn't write back all the pages. nfs_writepages()
324 * sometimes bales out without doing anything. Redirty 324 * sometimes bales out without doing anything. Redirty
325 * the inode; Move it from s_io onto s_more_io/s_dirty. 325 * the inode; Move it from s_io onto s_more_io/s_dirty.
326 */ 326 */
327 /* 327 /*
328 * akpm: if the caller was the kupdate function we put 328 * akpm: if the caller was the kupdate function we put
329 * this inode at the head of s_dirty so it gets first 329 * this inode at the head of s_dirty so it gets first
330 * consideration. Otherwise, move it to the tail, for 330 * consideration. Otherwise, move it to the tail, for
331 * the reasons described there. I'm not really sure 331 * the reasons described there. I'm not really sure
332 * how much sense this makes. Presumably I had a good 332 * how much sense this makes. Presumably I had a good
333 * reasons for doing it this way, and I'd rather not 333 * reasons for doing it this way, and I'd rather not
334 * muck with it at present. 334 * muck with it at present.
335 */ 335 */
336 if (wbc->for_kupdate) { 336 if (wbc->for_kupdate) {
337 /* 337 /*
338 * For the kupdate function we move the inode 338 * For the kupdate function we move the inode
339 * to s_more_io so it will get more writeout as 339 * to s_more_io so it will get more writeout as
340 * soon as the queue becomes uncongested. 340 * soon as the queue becomes uncongested.
341 */ 341 */
342 inode->i_state |= I_DIRTY_PAGES; 342 inode->i_state |= I_DIRTY_PAGES;
343 if (wbc->nr_to_write <= 0) { 343 if (wbc->nr_to_write <= 0) {
344 /* 344 /*
345 * slice used up: queue for next turn 345 * slice used up: queue for next turn
346 */ 346 */
347 requeue_io(inode); 347 requeue_io(inode);
348 } else { 348 } else {
349 /* 349 /*
350 * somehow blocked: retry later 350 * somehow blocked: retry later
351 */ 351 */
352 redirty_tail(inode); 352 redirty_tail(inode);
353 } 353 }
354 } else { 354 } else {
355 /* 355 /*
356 * Otherwise fully redirty the inode so that 356 * Otherwise fully redirty the inode so that
357 * other inodes on this superblock will get some 357 * other inodes on this superblock will get some
358 * writeout. Otherwise heavy writing to one 358 * writeout. Otherwise heavy writing to one
359 * file would indefinitely suspend writeout of 359 * file would indefinitely suspend writeout of
360 * all the other files. 360 * all the other files.
361 */ 361 */
362 inode->i_state |= I_DIRTY_PAGES; 362 inode->i_state |= I_DIRTY_PAGES;
363 redirty_tail(inode); 363 redirty_tail(inode);
364 } 364 }
365 } else if (inode->i_state & I_DIRTY) { 365 } else if (inode->i_state & I_DIRTY) {
366 /* 366 /*
367 * Someone redirtied the inode while were writing back 367 * Someone redirtied the inode while were writing back
368 * the pages. 368 * the pages.
369 */ 369 */
370 redirty_tail(inode); 370 redirty_tail(inode);
371 } else if (atomic_read(&inode->i_count)) { 371 } else if (atomic_read(&inode->i_count)) {
372 /* 372 /*
373 * The inode is clean, inuse 373 * The inode is clean, inuse
374 */ 374 */
375 list_move(&inode->i_list, &inode_in_use); 375 list_move(&inode->i_list, &inode_in_use);
376 } else { 376 } else {
377 /* 377 /*
378 * The inode is clean, unused 378 * The inode is clean, unused
379 */ 379 */
380 list_move(&inode->i_list, &inode_unused); 380 list_move(&inode->i_list, &inode_unused);
381 } 381 }
382 } 382 }
383 inode_sync_complete(inode); 383 inode_sync_complete(inode);
384 return ret; 384 return ret;
385 } 385 }
386 386
387 /* 387 /*
388 * Write out an inode's dirty pages. Called under inode_lock. Either the 388 * Write out an inode's dirty pages. Called under inode_lock. Either the
389 * caller has ref on the inode (either via __iget or via syscall against an fd) 389 * caller has ref on the inode (either via __iget or via syscall against an fd)
390 * or the inode has I_WILL_FREE set (via generic_forget_inode) 390 * or the inode has I_WILL_FREE set (via generic_forget_inode)
391 */ 391 */
392 static int 392 static int
393 __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 393 __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
394 { 394 {
395 wait_queue_head_t *wqh; 395 wait_queue_head_t *wqh;
396 396
397 if (!atomic_read(&inode->i_count)) 397 if (!atomic_read(&inode->i_count))
398 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 398 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
399 else 399 else
400 WARN_ON(inode->i_state & I_WILL_FREE); 400 WARN_ON(inode->i_state & I_WILL_FREE);
401 401
402 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { 402 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
403 /* 403 /*
404 * We're skipping this inode because it's locked, and we're not 404 * We're skipping this inode because it's locked, and we're not
405 * doing writeback-for-data-integrity. Move it to s_more_io so 405 * doing writeback-for-data-integrity. Move it to s_more_io so
406 * that writeback can proceed with the other inodes on s_io. 406 * that writeback can proceed with the other inodes on s_io.
407 * We'll have another go at writing back this inode when we 407 * We'll have another go at writing back this inode when we
408 * completed a full scan of s_io. 408 * completed a full scan of s_io.
409 */ 409 */
410 requeue_io(inode); 410 requeue_io(inode);
411 return 0; 411 return 0;
412 } 412 }
413 413
414 /* 414 /*
415 * It's a data-integrity sync. We must wait. 415 * It's a data-integrity sync. We must wait.
416 */ 416 */
417 if (inode->i_state & I_SYNC) { 417 if (inode->i_state & I_SYNC) {
418 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); 418 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
419 419
420 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 420 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
421 do { 421 do {
422 spin_unlock(&inode_lock); 422 spin_unlock(&inode_lock);
423 __wait_on_bit(wqh, &wq, inode_wait, 423 __wait_on_bit(wqh, &wq, inode_wait,
424 TASK_UNINTERRUPTIBLE); 424 TASK_UNINTERRUPTIBLE);
425 spin_lock(&inode_lock); 425 spin_lock(&inode_lock);
426 } while (inode->i_state & I_SYNC); 426 } while (inode->i_state & I_SYNC);
427 } 427 }
428 return __sync_single_inode(inode, wbc); 428 return __sync_single_inode(inode, wbc);
429 } 429 }
430 430
431 /* 431 /*
432 * Write out a superblock's list of dirty inodes. A wait will be performed 432 * Write out a superblock's list of dirty inodes. A wait will be performed
433 * upon no inodes, all inodes or the final one, depending upon sync_mode. 433 * upon no inodes, all inodes or the final one, depending upon sync_mode.
434 * 434 *
435 * If older_than_this is non-NULL, then only write out inodes which 435 * If older_than_this is non-NULL, then only write out inodes which
436 * had their first dirtying at a time earlier than *older_than_this. 436 * had their first dirtying at a time earlier than *older_than_this.
437 * 437 *
438 * If we're a pdflush thread, then implement pdflush collision avoidance 438 * If we're a pdflush thread, then implement pdflush collision avoidance
439 * against the entire list. 439 * against the entire list.
440 * 440 *
441 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 441 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
442 * This function assumes that the blockdev superblock's inodes are backed by 442 * This function assumes that the blockdev superblock's inodes are backed by
443 * a variety of queues, so all inodes are searched. For other superblocks, 443 * a variety of queues, so all inodes are searched. For other superblocks,
444 * assume that all inodes are backed by the same queue. 444 * assume that all inodes are backed by the same queue.
445 * 445 *
446 * FIXME: this linear search could get expensive with many fileystems. But 446 * FIXME: this linear search could get expensive with many fileystems. But
447 * how to fix? We need to go from an address_space to all inodes which share 447 * how to fix? We need to go from an address_space to all inodes which share
448 * a queue with that address_space. (Easy: have a global "dirty superblocks" 448 * a queue with that address_space. (Easy: have a global "dirty superblocks"
449 * list). 449 * list).
450 * 450 *
451 * The inodes to be written are parked on sb->s_io. They are moved back onto 451 * The inodes to be written are parked on sb->s_io. They are moved back onto
452 * sb->s_dirty as they are selected for writing. This way, none can be missed 452 * sb->s_dirty as they are selected for writing. This way, none can be missed
453 * on the writer throttling path, and we get decent balancing between many 453 * on the writer throttling path, and we get decent balancing between many
454 * throttled threads: we don't want them all piling up on inode_sync_wait. 454 * throttled threads: we don't want them all piling up on inode_sync_wait.
455 */ 455 */
456 void generic_sync_sb_inodes(struct super_block *sb, 456 void generic_sync_sb_inodes(struct super_block *sb,
457 struct writeback_control *wbc) 457 struct writeback_control *wbc)
458 { 458 {
459 const unsigned long start = jiffies; /* livelock avoidance */ 459 const unsigned long start = jiffies; /* livelock avoidance */
460 int sync = wbc->sync_mode == WB_SYNC_ALL; 460 int sync = wbc->sync_mode == WB_SYNC_ALL;
461 461
462 spin_lock(&inode_lock); 462 spin_lock(&inode_lock);
463 if (!wbc->for_kupdate || list_empty(&sb->s_io)) 463 if (!wbc->for_kupdate || list_empty(&sb->s_io))
464 queue_io(sb, wbc->older_than_this); 464 queue_io(sb, wbc->older_than_this);
465 465
466 while (!list_empty(&sb->s_io)) { 466 while (!list_empty(&sb->s_io)) {
467 struct inode *inode = list_entry(sb->s_io.prev, 467 struct inode *inode = list_entry(sb->s_io.prev,
468 struct inode, i_list); 468 struct inode, i_list);
469 struct address_space *mapping = inode->i_mapping; 469 struct address_space *mapping = inode->i_mapping;
470 struct backing_dev_info *bdi = mapping->backing_dev_info; 470 struct backing_dev_info *bdi = mapping->backing_dev_info;
471 long pages_skipped; 471 long pages_skipped;
472 472
473 if (!bdi_cap_writeback_dirty(bdi)) { 473 if (!bdi_cap_writeback_dirty(bdi)) {
474 redirty_tail(inode); 474 redirty_tail(inode);
475 if (sb_is_blkdev_sb(sb)) { 475 if (sb_is_blkdev_sb(sb)) {
476 /* 476 /*
477 * Dirty memory-backed blockdev: the ramdisk 477 * Dirty memory-backed blockdev: the ramdisk
478 * driver does this. Skip just this inode 478 * driver does this. Skip just this inode
479 */ 479 */
480 continue; 480 continue;
481 } 481 }
482 /* 482 /*
483 * Dirty memory-backed inode against a filesystem other 483 * Dirty memory-backed inode against a filesystem other
484 * than the kernel-internal bdev filesystem. Skip the 484 * than the kernel-internal bdev filesystem. Skip the
485 * entire superblock. 485 * entire superblock.
486 */ 486 */
487 break; 487 break;
488 } 488 }
489 489
490 if (inode->i_state & I_NEW) { 490 if (inode->i_state & I_NEW) {
491 requeue_io(inode); 491 requeue_io(inode);
492 continue; 492 continue;
493 } 493 }
494 494
495 if (wbc->nonblocking && bdi_write_congested(bdi)) { 495 if (wbc->nonblocking && bdi_write_congested(bdi)) {
496 wbc->encountered_congestion = 1; 496 wbc->encountered_congestion = 1;
497 if (!sb_is_blkdev_sb(sb)) 497 if (!sb_is_blkdev_sb(sb))
498 break; /* Skip a congested fs */ 498 break; /* Skip a congested fs */
499 requeue_io(inode); 499 requeue_io(inode);
500 continue; /* Skip a congested blockdev */ 500 continue; /* Skip a congested blockdev */
501 } 501 }
502 502
503 if (wbc->bdi && bdi != wbc->bdi) { 503 if (wbc->bdi && bdi != wbc->bdi) {
504 if (!sb_is_blkdev_sb(sb)) 504 if (!sb_is_blkdev_sb(sb))
505 break; /* fs has the wrong queue */ 505 break; /* fs has the wrong queue */
506 requeue_io(inode); 506 requeue_io(inode);
507 continue; /* blockdev has wrong queue */ 507 continue; /* blockdev has wrong queue */
508 } 508 }
509 509
510 /* 510 /*
511 * Was this inode dirtied after sync_sb_inodes was called? 511 * Was this inode dirtied after sync_sb_inodes was called?
512 * This keeps sync from extra jobs and livelock. 512 * This keeps sync from extra jobs and livelock.
513 */ 513 */
514 if (inode_dirtied_after(inode, start)) 514 if (inode_dirtied_after(inode, start))
515 break; 515 break;
516 516
517 /* Is another pdflush already flushing this queue? */ 517 /* Is another pdflush already flushing this queue? */
518 if (current_is_pdflush() && !writeback_acquire(bdi)) 518 if (current_is_pdflush() && !writeback_acquire(bdi))
519 break; 519 break;
520 520
521 BUG_ON(inode->i_state & I_FREEING); 521 BUG_ON(inode->i_state & I_FREEING);
522 __iget(inode); 522 __iget(inode);
523 pages_skipped = wbc->pages_skipped; 523 pages_skipped = wbc->pages_skipped;
524 __writeback_single_inode(inode, wbc); 524 __writeback_single_inode(inode, wbc);
525 if (current_is_pdflush()) 525 if (current_is_pdflush())
526 writeback_release(bdi); 526 writeback_release(bdi);
527 if (wbc->pages_skipped != pages_skipped) { 527 if (wbc->pages_skipped != pages_skipped) {
528 /* 528 /*
529 * writeback is not making progress due to locked 529 * writeback is not making progress due to locked
530 * buffers. Skip this inode for now. 530 * buffers. Skip this inode for now.
531 */ 531 */
532 redirty_tail(inode); 532 redirty_tail(inode);
533 } 533 }
534 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
535 iput(inode); 535 iput(inode);
536 cond_resched(); 536 cond_resched();
537 spin_lock(&inode_lock); 537 spin_lock(&inode_lock);
538 if (wbc->nr_to_write <= 0) { 538 if (wbc->nr_to_write <= 0) {
539 wbc->more_io = 1; 539 wbc->more_io = 1;
540 break; 540 break;
541 } 541 }
542 if (!list_empty(&sb->s_more_io)) 542 if (!list_empty(&sb->s_more_io))
543 wbc->more_io = 1; 543 wbc->more_io = 1;
544 } 544 }
545 545
546 if (sync) { 546 if (sync) {
547 struct inode *inode, *old_inode = NULL; 547 struct inode *inode, *old_inode = NULL;
548 548
549 /* 549 /*
550 * Data integrity sync. Must wait for all pages under writeback, 550 * Data integrity sync. Must wait for all pages under writeback,
551 * because there may have been pages dirtied before our sync 551 * because there may have been pages dirtied before our sync
552 * call, but which had writeout started before we write it out. 552 * call, but which had writeout started before we write it out.
553 * In which case, the inode may not be on the dirty list, but 553 * In which case, the inode may not be on the dirty list, but
554 * we still have to wait for that writeout. 554 * we still have to wait for that writeout.
555 */ 555 */
556 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 556 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
557 struct address_space *mapping; 557 struct address_space *mapping;
558 558
559 if (inode->i_state & 559 if (inode->i_state &
560 (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 560 (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
561 continue; 561 continue;
562 mapping = inode->i_mapping; 562 mapping = inode->i_mapping;
563 if (mapping->nrpages == 0) 563 if (mapping->nrpages == 0)
564 continue; 564 continue;
565 __iget(inode); 565 __iget(inode);
566 spin_unlock(&inode_lock); 566 spin_unlock(&inode_lock);
567 /* 567 /*
568 * We hold a reference to 'inode' so it couldn't have 568 * We hold a reference to 'inode' so it couldn't have
569 * been removed from s_inodes list while we dropped the 569 * been removed from s_inodes list while we dropped the
570 * inode_lock. We cannot iput the inode now as we can 570 * inode_lock. We cannot iput the inode now as we can
571 * be holding the last reference and we cannot iput it 571 * be holding the last reference and we cannot iput it
572 * under inode_lock. So we keep the reference and iput 572 * under inode_lock. So we keep the reference and iput
573 * it later. 573 * it later.
574 */ 574 */
575 iput(old_inode); 575 iput(old_inode);
576 old_inode = inode; 576 old_inode = inode;
577 577
578 filemap_fdatawait(mapping); 578 filemap_fdatawait(mapping);
579 579
580 cond_resched(); 580 cond_resched();
581 581
582 spin_lock(&inode_lock); 582 spin_lock(&inode_lock);
583 } 583 }
584 spin_unlock(&inode_lock); 584 spin_unlock(&inode_lock);
585 iput(old_inode); 585 iput(old_inode);
586 } else 586 } else
587 spin_unlock(&inode_lock); 587 spin_unlock(&inode_lock);
588 588
589 return; /* Leave any unwritten inodes on s_io */ 589 return; /* Leave any unwritten inodes on s_io */
590 } 590 }
591 EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); 591 EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
592 592
593 static void sync_sb_inodes(struct super_block *sb, 593 static void sync_sb_inodes(struct super_block *sb,
594 struct writeback_control *wbc) 594 struct writeback_control *wbc)
595 { 595 {
596 generic_sync_sb_inodes(sb, wbc); 596 generic_sync_sb_inodes(sb, wbc);
597 } 597 }
598 598
599 /* 599 /*
600 * Start writeback of dirty pagecache data against all unlocked inodes. 600 * Start writeback of dirty pagecache data against all unlocked inodes.
601 * 601 *
602 * Note: 602 * Note:
603 * We don't need to grab a reference to superblock here. If it has non-empty 603 * We don't need to grab a reference to superblock here. If it has non-empty
604 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed 604 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
605 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all 605 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
606 * empty. Since __sync_single_inode() regains inode_lock before it finally moves 606 * empty. Since __sync_single_inode() regains inode_lock before it finally moves
607 * inode from superblock lists we are OK. 607 * inode from superblock lists we are OK.
608 * 608 *
609 * If `older_than_this' is non-zero then only flush inodes which have a 609 * If `older_than_this' is non-zero then only flush inodes which have a
610 * flushtime older than *older_than_this. 610 * flushtime older than *older_than_this.
611 * 611 *
612 * If `bdi' is non-zero then we will scan the first inode against each 612 * If `bdi' is non-zero then we will scan the first inode against each
613 * superblock until we find the matching ones. One group will be the dirty 613 * superblock until we find the matching ones. One group will be the dirty
614 * inodes against a filesystem. Then when we hit the dummy blockdev superblock, 614 * inodes against a filesystem. Then when we hit the dummy blockdev superblock,
615 * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not 615 * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not
616 * super-efficient but we're about to do a ton of I/O... 616 * super-efficient but we're about to do a ton of I/O...
617 */ 617 */
618 void 618 void
619 writeback_inodes(struct writeback_control *wbc) 619 writeback_inodes(struct writeback_control *wbc)
620 { 620 {
621 struct super_block *sb; 621 struct super_block *sb;
622 622
623 might_sleep(); 623 might_sleep();
624 spin_lock(&sb_lock); 624 spin_lock(&sb_lock);
625 restart: 625 restart:
626 list_for_each_entry_reverse(sb, &super_blocks, s_list) { 626 list_for_each_entry_reverse(sb, &super_blocks, s_list) {
627 if (sb_has_dirty_inodes(sb)) { 627 if (sb_has_dirty_inodes(sb)) {
628 /* we're making our own get_super here */ 628 /* we're making our own get_super here */
629 sb->s_count++; 629 sb->s_count++;
630 spin_unlock(&sb_lock); 630 spin_unlock(&sb_lock);
631 /* 631 /*
632 * If we can't get the readlock, there's no sense in 632 * If we can't get the readlock, there's no sense in
633 * waiting around, most of the time the FS is going to 633 * waiting around, most of the time the FS is going to
634 * be unmounted by the time it is released. 634 * be unmounted by the time it is released.
635 */ 635 */
636 if (down_read_trylock(&sb->s_umount)) { 636 if (down_read_trylock(&sb->s_umount)) {
637 if (sb->s_root) 637 if (sb->s_root)
638 sync_sb_inodes(sb, wbc); 638 sync_sb_inodes(sb, wbc);
639 up_read(&sb->s_umount); 639 up_read(&sb->s_umount);
640 } 640 }
641 spin_lock(&sb_lock); 641 spin_lock(&sb_lock);
642 if (__put_super_and_need_restart(sb)) 642 if (__put_super_and_need_restart(sb))
643 goto restart; 643 goto restart;
644 } 644 }
645 if (wbc->nr_to_write <= 0) 645 if (wbc->nr_to_write <= 0)
646 break; 646 break;
647 } 647 }
648 spin_unlock(&sb_lock); 648 spin_unlock(&sb_lock);
649 } 649 }
650 650
651 /* 651 /*
652 * writeback and wait upon the filesystem's dirty inodes. The caller will 652 * writeback and wait upon the filesystem's dirty inodes. The caller will
653 * do this in two passes - one to write, and one to wait. 653 * do this in two passes - one to write, and one to wait.
654 * 654 *
655 * A finite limit is set on the number of pages which will be written. 655 * A finite limit is set on the number of pages which will be written.
656 * To prevent infinite livelock of sys_sync(). 656 * To prevent infinite livelock of sys_sync().
657 * 657 *
658 * We add in the number of potentially dirty inodes, because each inode write 658 * We add in the number of potentially dirty inodes, because each inode write
659 * can dirty pagecache in the underlying blockdev. 659 * can dirty pagecache in the underlying blockdev.
660 */ 660 */
661 void sync_inodes_sb(struct super_block *sb, int wait) 661 void sync_inodes_sb(struct super_block *sb, int wait)
662 { 662 {
663 struct writeback_control wbc = { 663 struct writeback_control wbc = {
664 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, 664 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
665 .range_start = 0, 665 .range_start = 0,
666 .range_end = LLONG_MAX, 666 .range_end = LLONG_MAX,
667 }; 667 };
668 668
669 if (!wait) { 669 if (!wait) {
670 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 670 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
671 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 671 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
672 672
673 wbc.nr_to_write = nr_dirty + nr_unstable + 673 wbc.nr_to_write = nr_dirty + nr_unstable +
674 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 674 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
675 } else 675 } else
676 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ 676 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
677 677
678 sync_sb_inodes(sb, &wbc); 678 sync_sb_inodes(sb, &wbc);
679 } 679 }
680 680
681 /** 681 /**
682 * sync_inodes - writes all inodes to disk
683 * @wait: wait for completion
684 *
685 * sync_inodes() goes through each super block's dirty inode list, writes the
686 * inodes out, waits on the writeout and puts the inodes back on the normal
687 * list.
688 *
689 * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle
690 * part of the sync functions is that the blockdev "superblock" is processed
691 * last. This is because the write_inode() function of a typical fs will
692 * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
693 * What we want to do is to perform all that dirtying first, and then write
694 * back all those inode blocks via the blockdev mapping in one sweep. So the
695 * additional (somewhat redundant) sync_blockdev() calls here are to make
696 * sure that really happens. Because if we call sync_inodes_sb(wait=1) with
697 * outstanding dirty inodes, the writeback goes block-at-a-time within the
698 * filesystem's write_inode(). This is extremely slow.
699 */
700 static void __sync_inodes(int wait)
701 {
702 struct super_block *sb;
703
704 spin_lock(&sb_lock);
705 restart:
706 list_for_each_entry(sb, &super_blocks, s_list) {
707 sb->s_count++;
708 spin_unlock(&sb_lock);
709 down_read(&sb->s_umount);
710 if (sb->s_root) {
711 sync_inodes_sb(sb, wait);
712 sync_blockdev(sb->s_bdev);
713 }
714 up_read(&sb->s_umount);
715 spin_lock(&sb_lock);
716 if (__put_super_and_need_restart(sb))
717 goto restart;
718 }
719 spin_unlock(&sb_lock);
720 }
721
722 void sync_inodes(int wait)
723 {
724 __sync_inodes(0);
725
726 if (wait)
727 __sync_inodes(1);
728 }
729
730 /**
731 * write_inode_now - write an inode to disk 682 * write_inode_now - write an inode to disk
732 * @inode: inode to write to disk 683 * @inode: inode to write to disk
733 * @sync: whether the write should be synchronous or not 684 * @sync: whether the write should be synchronous or not
734 * 685 *
735 * This function commits an inode to disk immediately if it is dirty. This is 686 * This function commits an inode to disk immediately if it is dirty. This is
736 * primarily needed by knfsd. 687 * primarily needed by knfsd.
737 * 688 *
738 * The caller must either have a ref on the inode or must have set I_WILL_FREE. 689 * The caller must either have a ref on the inode or must have set I_WILL_FREE.
739 */ 690 */
740 int write_inode_now(struct inode *inode, int sync) 691 int write_inode_now(struct inode *inode, int sync)
741 { 692 {
742 int ret; 693 int ret;
743 struct writeback_control wbc = { 694 struct writeback_control wbc = {
744 .nr_to_write = LONG_MAX, 695 .nr_to_write = LONG_MAX,
745 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, 696 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
746 .range_start = 0, 697 .range_start = 0,
747 .range_end = LLONG_MAX, 698 .range_end = LLONG_MAX,
748 }; 699 };
749 700
750 if (!mapping_cap_writeback_dirty(inode->i_mapping)) 701 if (!mapping_cap_writeback_dirty(inode->i_mapping))
751 wbc.nr_to_write = 0; 702 wbc.nr_to_write = 0;
752 703
753 might_sleep(); 704 might_sleep();
754 spin_lock(&inode_lock); 705 spin_lock(&inode_lock);
755 ret = __writeback_single_inode(inode, &wbc); 706 ret = __writeback_single_inode(inode, &wbc);
756 spin_unlock(&inode_lock); 707 spin_unlock(&inode_lock);
757 if (sync) 708 if (sync)
758 inode_sync_wait(inode); 709 inode_sync_wait(inode);
759 return ret; 710 return ret;
760 } 711 }
761 EXPORT_SYMBOL(write_inode_now); 712 EXPORT_SYMBOL(write_inode_now);
762 713
763 /** 714 /**
764 * sync_inode - write an inode and its pages to disk. 715 * sync_inode - write an inode and its pages to disk.
765 * @inode: the inode to sync 716 * @inode: the inode to sync
766 * @wbc: controls the writeback mode 717 * @wbc: controls the writeback mode
767 * 718 *
768 * sync_inode() will write an inode and its pages to disk. It will also 719 * sync_inode() will write an inode and its pages to disk. It will also
769 * correctly update the inode on its superblock's dirty inode lists and will 720 * correctly update the inode on its superblock's dirty inode lists and will
770 * update inode->i_state. 721 * update inode->i_state.
771 * 722 *
772 * The caller must have a ref on the inode. 723 * The caller must have a ref on the inode.
773 */ 724 */
774 int sync_inode(struct inode *inode, struct writeback_control *wbc) 725 int sync_inode(struct inode *inode, struct writeback_control *wbc)
775 { 726 {
776 int ret; 727 int ret;
777 728
778 spin_lock(&inode_lock); 729 spin_lock(&inode_lock);
779 ret = __writeback_single_inode(inode, wbc); 730 ret = __writeback_single_inode(inode, wbc);
780 spin_unlock(&inode_lock); 731 spin_unlock(&inode_lock);
781 return ret; 732 return ret;
782 } 733 }
783 EXPORT_SYMBOL(sync_inode); 734 EXPORT_SYMBOL(sync_inode);
784 735
785 /** 736 /**
786 * generic_osync_inode - flush all dirty data for a given inode to disk 737 * generic_osync_inode - flush all dirty data for a given inode to disk
787 * @inode: inode to write 738 * @inode: inode to write
788 * @mapping: the address_space that should be flushed 739 * @mapping: the address_space that should be flushed
789 * @what: what to write and wait upon 740 * @what: what to write and wait upon
790 * 741 *
791 * This can be called by file_write functions for files which have the 742 * This can be called by file_write functions for files which have the
792 * O_SYNC flag set, to flush dirty writes to disk. 743 * O_SYNC flag set, to flush dirty writes to disk.
793 * 744 *
794 * @what is a bitmask, specifying which part of the inode's data should be 745 * @what is a bitmask, specifying which part of the inode's data should be
795 * written and waited upon. 746 * written and waited upon.
796 * 747 *
797 * OSYNC_DATA: i_mapping's dirty data 748 * OSYNC_DATA: i_mapping's dirty data
798 * OSYNC_METADATA: the buffers at i_mapping->private_list 749 * OSYNC_METADATA: the buffers at i_mapping->private_list
799 * OSYNC_INODE: the inode itself 750 * OSYNC_INODE: the inode itself
800 */ 751 */
801 752
802 int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what) 753 int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
803 { 754 {
804 int err = 0; 755 int err = 0;
805 int need_write_inode_now = 0; 756 int need_write_inode_now = 0;
806 int err2; 757 int err2;
807 758
808 if (what & OSYNC_DATA) 759 if (what & OSYNC_DATA)
809 err = filemap_fdatawrite(mapping); 760 err = filemap_fdatawrite(mapping);
810 if (what & (OSYNC_METADATA|OSYNC_DATA)) { 761 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
811 err2 = sync_mapping_buffers(mapping); 762 err2 = sync_mapping_buffers(mapping);
812 if (!err) 763 if (!err)
813 err = err2; 764 err = err2;
814 } 765 }
815 if (what & OSYNC_DATA) { 766 if (what & OSYNC_DATA) {
816 err2 = filemap_fdatawait(mapping); 767 err2 = filemap_fdatawait(mapping);
817 if (!err) 768 if (!err)
818 err = err2; 769 err = err2;
819 } 770 }
820 771
821 spin_lock(&inode_lock); 772 spin_lock(&inode_lock);
822 if ((inode->i_state & I_DIRTY) && 773 if ((inode->i_state & I_DIRTY) &&
823 ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) 774 ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
824 need_write_inode_now = 1; 775 need_write_inode_now = 1;
825 spin_unlock(&inode_lock); 776 spin_unlock(&inode_lock);
826 777
827 if (need_write_inode_now) { 778 if (need_write_inode_now) {
828 err2 = write_inode_now(inode, 1); 779 err2 = write_inode_now(inode, 1);
829 if (!err) 780 if (!err)
830 err = err2; 781 err = err2;
831 } 782 }
832 else 783 else
833 inode_sync_wait(inode); 784 inode_sync_wait(inode);
834 785
835 return err; 786 return err;
836 } 787 }
837 EXPORT_SYMBOL(generic_osync_inode); 788 EXPORT_SYMBOL(generic_osync_inode);
838 789
1 /* fs/ internal definitions 1 /* fs/ internal definitions
2 * 2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12 struct super_block; 12 struct super_block;
13 struct linux_binprm; 13 struct linux_binprm;
14 struct path; 14 struct path;
15 15
16 /* 16 /*
17 * block_dev.c 17 * block_dev.c
18 */ 18 */
19 #ifdef CONFIG_BLOCK 19 #ifdef CONFIG_BLOCK
20 extern struct super_block *blockdev_superblock; 20 extern struct super_block *blockdev_superblock;
21 extern void __init bdev_cache_init(void); 21 extern void __init bdev_cache_init(void);
22 22
23 static inline int sb_is_blkdev_sb(struct super_block *sb) 23 static inline int sb_is_blkdev_sb(struct super_block *sb)
24 { 24 {
25 return sb == blockdev_superblock; 25 return sb == blockdev_superblock;
26 } 26 }
27 27
28 extern int __sync_blockdev(struct block_device *bdev, int wait);
29
28 #else 30 #else
29 static inline void bdev_cache_init(void) 31 static inline void bdev_cache_init(void)
30 { 32 {
31 } 33 }
32 34
33 static inline int sb_is_blkdev_sb(struct super_block *sb) 35 static inline int sb_is_blkdev_sb(struct super_block *sb)
34 { 36 {
35 return 0; 37 return 0;
36 } 38 }
39
40 static inline int __sync_blockdev(struct block_device *bdev, int wait)
41 {
42 return 0;
43 }
37 #endif 44 #endif
38 45
39 /* 46 /*
40 * char_dev.c 47 * char_dev.c
41 */ 48 */
42 extern void __init chrdev_init(void); 49 extern void __init chrdev_init(void);
43 50
44 /* 51 /*
45 * exec.c 52 * exec.c
46 */ 53 */
47 extern int check_unsafe_exec(struct linux_binprm *); 54 extern int check_unsafe_exec(struct linux_binprm *);
48 55
49 /* 56 /*
50 * namespace.c 57 * namespace.c
51 */ 58 */
52 extern int copy_mount_options(const void __user *, unsigned long *); 59 extern int copy_mount_options(const void __user *, unsigned long *);
53 60
54 extern void free_vfsmnt(struct vfsmount *); 61 extern void free_vfsmnt(struct vfsmount *);
55 extern struct vfsmount *alloc_vfsmnt(const char *); 62 extern struct vfsmount *alloc_vfsmnt(const char *);
56 extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 63 extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
57 extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 64 extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
58 struct vfsmount *); 65 struct vfsmount *);
59 extern void release_mounts(struct list_head *); 66 extern void release_mounts(struct list_head *);
60 extern void umount_tree(struct vfsmount *, int, struct list_head *); 67 extern void umount_tree(struct vfsmount *, int, struct list_head *);
61 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); 68 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
62 69
63 extern void __init mnt_init(void); 70 extern void __init mnt_init(void);
64 71
65 /* 72 /*
66 * fs_struct.c 73 * fs_struct.c
67 */ 74 */
68 extern void chroot_fs_refs(struct path *, struct path *); 75 extern void chroot_fs_refs(struct path *, struct path *);
69 76
70 /* 77 /*
71 * file_table.c 78 * file_table.c
72 */ 79 */
73 extern void mark_files_ro(struct super_block *); 80 extern void mark_files_ro(struct super_block *);
74
75 /*
76 * super.c
1 /* 1 /*
2 * linux/fs/super.c 2 * linux/fs/super.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * super.c contains code to handle: - mount structures 6 * super.c contains code to handle: - mount structures
7 * - super-block tables 7 * - super-block tables
8 * - filesystem drivers list 8 * - filesystem drivers list
9 * - mount system call 9 * - mount system call
10 * - umount system call 10 * - umount system call
11 * - ustat system call 11 * - ustat system call
12 * 12 *
13 * GK 2/5/95 - Changed to support mounting the root fs via NFS 13 * GK 2/5/95 - Changed to support mounting the root fs via NFS
14 * 14 *
15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall 15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
17 * Added options to /proc/mounts: 17 * Added options to /proc/mounts:
18 * Torbjรถrn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. 18 * Torbjรถrn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
21 */ 21 */
22 22
23 #include <linux/module.h> 23 #include <linux/module.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/init.h> 25 #include <linux/init.h>
26 #include <linux/smp_lock.h> 26 #include <linux/smp_lock.h>
27 #include <linux/acct.h> 27 #include <linux/acct.h>
28 #include <linux/blkdev.h> 28 #include <linux/blkdev.h>
29 #include <linux/quotaops.h> 29 #include <linux/quotaops.h>
30 #include <linux/namei.h> 30 #include <linux/namei.h>
31 #include <linux/buffer_head.h> /* for fsync_super() */ 31 #include <linux/buffer_head.h> /* for fsync_super() */
32 #include <linux/mount.h> 32 #include <linux/mount.h>
33 #include <linux/security.h> 33 #include <linux/security.h>
34 #include <linux/syscalls.h> 34 #include <linux/syscalls.h>
35 #include <linux/vfs.h> 35 #include <linux/vfs.h>
36 #include <linux/writeback.h> /* for the emergency remount stuff */ 36 #include <linux/writeback.h> /* for the emergency remount stuff */
37 #include <linux/idr.h> 37 #include <linux/idr.h>
38 #include <linux/kobject.h> 38 #include <linux/kobject.h>
39 #include <linux/mutex.h> 39 #include <linux/mutex.h>
40 #include <linux/file.h> 40 #include <linux/file.h>
41 #include <asm/uaccess.h> 41 #include <asm/uaccess.h>
42 #include "internal.h" 42 #include "internal.h"
43 43
44 44
45 LIST_HEAD(super_blocks); 45 LIST_HEAD(super_blocks);
46 DEFINE_SPINLOCK(sb_lock); 46 DEFINE_SPINLOCK(sb_lock);
47 47
48 /** 48 /**
49 * alloc_super - create new superblock 49 * alloc_super - create new superblock
50 * @type: filesystem type superblock should belong to 50 * @type: filesystem type superblock should belong to
51 * 51 *
52 * Allocates and initializes a new &struct super_block. alloc_super() 52 * Allocates and initializes a new &struct super_block. alloc_super()
53 * returns a pointer new superblock or %NULL if allocation had failed. 53 * returns a pointer new superblock or %NULL if allocation had failed.
54 */ 54 */
55 static struct super_block *alloc_super(struct file_system_type *type) 55 static struct super_block *alloc_super(struct file_system_type *type)
56 { 56 {
57 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 57 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
58 static struct super_operations default_op; 58 static struct super_operations default_op;
59 59
60 if (s) { 60 if (s) {
61 if (security_sb_alloc(s)) { 61 if (security_sb_alloc(s)) {
62 kfree(s); 62 kfree(s);
63 s = NULL; 63 s = NULL;
64 goto out; 64 goto out;
65 } 65 }
66 INIT_LIST_HEAD(&s->s_dirty); 66 INIT_LIST_HEAD(&s->s_dirty);
67 INIT_LIST_HEAD(&s->s_io); 67 INIT_LIST_HEAD(&s->s_io);
68 INIT_LIST_HEAD(&s->s_more_io); 68 INIT_LIST_HEAD(&s->s_more_io);
69 INIT_LIST_HEAD(&s->s_files); 69 INIT_LIST_HEAD(&s->s_files);
70 INIT_LIST_HEAD(&s->s_instances); 70 INIT_LIST_HEAD(&s->s_instances);
71 INIT_HLIST_HEAD(&s->s_anon); 71 INIT_HLIST_HEAD(&s->s_anon);
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 INIT_LIST_HEAD(&s->s_dentry_lru); 73 INIT_LIST_HEAD(&s->s_dentry_lru);
74 init_rwsem(&s->s_umount); 74 init_rwsem(&s->s_umount);
75 mutex_init(&s->s_lock); 75 mutex_init(&s->s_lock);
76 lockdep_set_class(&s->s_umount, &type->s_umount_key); 76 lockdep_set_class(&s->s_umount, &type->s_umount_key);
77 /* 77 /*
78 * The locking rules for s_lock are up to the 78 * The locking rules for s_lock are up to the
79 * filesystem. For example ext3fs has different 79 * filesystem. For example ext3fs has different
80 * lock ordering than usbfs: 80 * lock ordering than usbfs:
81 */ 81 */
82 lockdep_set_class(&s->s_lock, &type->s_lock_key); 82 lockdep_set_class(&s->s_lock, &type->s_lock_key);
83 /* 83 /*
84 * sget() can have s_umount recursion. 84 * sget() can have s_umount recursion.
85 * 85 *
86 * When it cannot find a suitable sb, it allocates a new 86 * When it cannot find a suitable sb, it allocates a new
87 * one (this one), and tries again to find a suitable old 87 * one (this one), and tries again to find a suitable old
88 * one. 88 * one.
89 * 89 *
90 * In case that succeeds, it will acquire the s_umount 90 * In case that succeeds, it will acquire the s_umount
91 * lock of the old one. Since these are clearly distrinct 91 * lock of the old one. Since these are clearly distrinct
92 * locks, and this object isn't exposed yet, there's no 92 * locks, and this object isn't exposed yet, there's no
93 * risk of deadlocks. 93 * risk of deadlocks.
94 * 94 *
95 * Annotate this by putting this lock in a different 95 * Annotate this by putting this lock in a different
96 * subclass. 96 * subclass.
97 */ 97 */
98 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); 98 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
99 s->s_count = S_BIAS; 99 s->s_count = S_BIAS;
100 atomic_set(&s->s_active, 1); 100 atomic_set(&s->s_active, 1);
101 mutex_init(&s->s_vfs_rename_mutex); 101 mutex_init(&s->s_vfs_rename_mutex);
102 mutex_init(&s->s_dquot.dqio_mutex); 102 mutex_init(&s->s_dquot.dqio_mutex);
103 mutex_init(&s->s_dquot.dqonoff_mutex); 103 mutex_init(&s->s_dquot.dqonoff_mutex);
104 init_rwsem(&s->s_dquot.dqptr_sem); 104 init_rwsem(&s->s_dquot.dqptr_sem);
105 init_waitqueue_head(&s->s_wait_unfrozen); 105 init_waitqueue_head(&s->s_wait_unfrozen);
106 s->s_maxbytes = MAX_NON_LFS; 106 s->s_maxbytes = MAX_NON_LFS;
107 s->dq_op = sb_dquot_ops; 107 s->dq_op = sb_dquot_ops;
108 s->s_qcop = sb_quotactl_ops; 108 s->s_qcop = sb_quotactl_ops;
109 s->s_op = &default_op; 109 s->s_op = &default_op;
110 s->s_time_gran = 1000000000; 110 s->s_time_gran = 1000000000;
111 } 111 }
112 out: 112 out:
113 return s; 113 return s;
114 } 114 }
115 115
116 /** 116 /**
117 * destroy_super - frees a superblock 117 * destroy_super - frees a superblock
118 * @s: superblock to free 118 * @s: superblock to free
119 * 119 *
120 * Frees a superblock. 120 * Frees a superblock.
121 */ 121 */
122 static inline void destroy_super(struct super_block *s) 122 static inline void destroy_super(struct super_block *s)
123 { 123 {
124 security_sb_free(s); 124 security_sb_free(s);
125 kfree(s->s_subtype); 125 kfree(s->s_subtype);
126 kfree(s->s_options); 126 kfree(s->s_options);
127 kfree(s); 127 kfree(s);
128 } 128 }
129 129
130 /* Superblock refcounting */ 130 /* Superblock refcounting */
131 131
132 /* 132 /*
133 * Drop a superblock's refcount. Returns non-zero if the superblock was 133 * Drop a superblock's refcount. Returns non-zero if the superblock was
134 * destroyed. The caller must hold sb_lock. 134 * destroyed. The caller must hold sb_lock.
135 */ 135 */
136 static int __put_super(struct super_block *sb) 136 static int __put_super(struct super_block *sb)
137 { 137 {
138 int ret = 0; 138 int ret = 0;
139 139
140 if (!--sb->s_count) { 140 if (!--sb->s_count) {
141 destroy_super(sb); 141 destroy_super(sb);
142 ret = 1; 142 ret = 1;
143 } 143 }
144 return ret; 144 return ret;
145 } 145 }
146 146
147 /* 147 /*
148 * Drop a superblock's refcount. 148 * Drop a superblock's refcount.
149 * Returns non-zero if the superblock is about to be destroyed and 149 * Returns non-zero if the superblock is about to be destroyed and
150 * at least is already removed from super_blocks list, so if we are 150 * at least is already removed from super_blocks list, so if we are
151 * making a loop through super blocks then we need to restart. 151 * making a loop through super blocks then we need to restart.
152 * The caller must hold sb_lock. 152 * The caller must hold sb_lock.
153 */ 153 */
154 int __put_super_and_need_restart(struct super_block *sb) 154 int __put_super_and_need_restart(struct super_block *sb)
155 { 155 {
156 /* check for race with generic_shutdown_super() */ 156 /* check for race with generic_shutdown_super() */
157 if (list_empty(&sb->s_list)) { 157 if (list_empty(&sb->s_list)) {
158 /* super block is removed, need to restart... */ 158 /* super block is removed, need to restart... */
159 __put_super(sb); 159 __put_super(sb);
160 return 1; 160 return 1;
161 } 161 }
162 /* can't be the last, since s_list is still in use */ 162 /* can't be the last, since s_list is still in use */
163 sb->s_count--; 163 sb->s_count--;
164 BUG_ON(sb->s_count == 0); 164 BUG_ON(sb->s_count == 0);
165 return 0; 165 return 0;
166 } 166 }
167 167
168 /** 168 /**
169 * put_super - drop a temporary reference to superblock 169 * put_super - drop a temporary reference to superblock
170 * @sb: superblock in question 170 * @sb: superblock in question
171 * 171 *
172 * Drops a temporary reference, frees superblock if there's no 172 * Drops a temporary reference, frees superblock if there's no
173 * references left. 173 * references left.
174 */ 174 */
175 static void put_super(struct super_block *sb) 175 static void put_super(struct super_block *sb)
176 { 176 {
177 spin_lock(&sb_lock); 177 spin_lock(&sb_lock);
178 __put_super(sb); 178 __put_super(sb);
179 spin_unlock(&sb_lock); 179 spin_unlock(&sb_lock);
180 } 180 }
181 181
182 182
183 /** 183 /**
184 * deactivate_super - drop an active reference to superblock 184 * deactivate_super - drop an active reference to superblock
185 * @s: superblock to deactivate 185 * @s: superblock to deactivate
186 * 186 *
187 * Drops an active reference to superblock, acquiring a temprory one if 187 * Drops an active reference to superblock, acquiring a temprory one if
188 * there is no active references left. In that case we lock superblock, 188 * there is no active references left. In that case we lock superblock,
189 * tell fs driver to shut it down and drop the temporary reference we 189 * tell fs driver to shut it down and drop the temporary reference we
190 * had just acquired. 190 * had just acquired.
191 */ 191 */
192 void deactivate_super(struct super_block *s) 192 void deactivate_super(struct super_block *s)
193 { 193 {
194 struct file_system_type *fs = s->s_type; 194 struct file_system_type *fs = s->s_type;
195 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 195 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
196 s->s_count -= S_BIAS-1; 196 s->s_count -= S_BIAS-1;
197 spin_unlock(&sb_lock); 197 spin_unlock(&sb_lock);
198 vfs_dq_off(s, 0); 198 vfs_dq_off(s, 0);
199 down_write(&s->s_umount); 199 down_write(&s->s_umount);
200 fs->kill_sb(s); 200 fs->kill_sb(s);
201 put_filesystem(fs); 201 put_filesystem(fs);
202 put_super(s); 202 put_super(s);
203 } 203 }
204 } 204 }
205 205
206 EXPORT_SYMBOL(deactivate_super); 206 EXPORT_SYMBOL(deactivate_super);
207 207
208 /** 208 /**
209 * deactivate_locked_super - drop an active reference to superblock 209 * deactivate_locked_super - drop an active reference to superblock
210 * @s: superblock to deactivate 210 * @s: superblock to deactivate
211 * 211 *
212 * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that 212 * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that
213 * it does not unlock it until it's all over. As the result, it's safe to 213 * it does not unlock it until it's all over. As the result, it's safe to
214 * use to dispose of new superblock on ->get_sb() failure exits - nobody 214 * use to dispose of new superblock on ->get_sb() failure exits - nobody
215 * will see the sucker until it's all over. Equivalent using up_write + 215 * will see the sucker until it's all over. Equivalent using up_write +
216 * deactivate_super is safe for that purpose only if superblock is either 216 * deactivate_super is safe for that purpose only if superblock is either
217 * safe to use or has NULL ->s_root when we unlock. 217 * safe to use or has NULL ->s_root when we unlock.
218 */ 218 */
219 void deactivate_locked_super(struct super_block *s) 219 void deactivate_locked_super(struct super_block *s)
220 { 220 {
221 struct file_system_type *fs = s->s_type; 221 struct file_system_type *fs = s->s_type;
222 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 222 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
223 s->s_count -= S_BIAS-1; 223 s->s_count -= S_BIAS-1;
224 spin_unlock(&sb_lock); 224 spin_unlock(&sb_lock);
225 vfs_dq_off(s, 0); 225 vfs_dq_off(s, 0);
226 fs->kill_sb(s); 226 fs->kill_sb(s);
227 put_filesystem(fs); 227 put_filesystem(fs);
228 put_super(s); 228 put_super(s);
229 } else { 229 } else {
230 up_write(&s->s_umount); 230 up_write(&s->s_umount);
231 } 231 }
232 } 232 }
233 233
234 EXPORT_SYMBOL(deactivate_locked_super); 234 EXPORT_SYMBOL(deactivate_locked_super);
235 235
236 /** 236 /**
237 * grab_super - acquire an active reference 237 * grab_super - acquire an active reference
238 * @s: reference we are trying to make active 238 * @s: reference we are trying to make active
239 * 239 *
240 * Tries to acquire an active reference. grab_super() is used when we 240 * Tries to acquire an active reference. grab_super() is used when we
241 * had just found a superblock in super_blocks or fs_type->fs_supers 241 * had just found a superblock in super_blocks or fs_type->fs_supers
242 * and want to turn it into a full-blown active reference. grab_super() 242 * and want to turn it into a full-blown active reference. grab_super()
243 * is called with sb_lock held and drops it. Returns 1 in case of 243 * is called with sb_lock held and drops it. Returns 1 in case of
244 * success, 0 if we had failed (superblock contents was already dead or 244 * success, 0 if we had failed (superblock contents was already dead or
245 * dying when grab_super() had been called). 245 * dying when grab_super() had been called).
246 */ 246 */
247 static int grab_super(struct super_block *s) __releases(sb_lock) 247 static int grab_super(struct super_block *s) __releases(sb_lock)
248 { 248 {
249 s->s_count++; 249 s->s_count++;
250 spin_unlock(&sb_lock); 250 spin_unlock(&sb_lock);
251 down_write(&s->s_umount); 251 down_write(&s->s_umount);
252 if (s->s_root) { 252 if (s->s_root) {
253 spin_lock(&sb_lock); 253 spin_lock(&sb_lock);
254 if (s->s_count > S_BIAS) { 254 if (s->s_count > S_BIAS) {
255 atomic_inc(&s->s_active); 255 atomic_inc(&s->s_active);
256 s->s_count--; 256 s->s_count--;
257 spin_unlock(&sb_lock); 257 spin_unlock(&sb_lock);
258 return 1; 258 return 1;
259 } 259 }
260 spin_unlock(&sb_lock); 260 spin_unlock(&sb_lock);
261 } 261 }
262 up_write(&s->s_umount); 262 up_write(&s->s_umount);
263 put_super(s); 263 put_super(s);
264 yield(); 264 yield();
265 return 0; 265 return 0;
266 } 266 }
267 267
268 /* 268 /*
269 * Superblock locking. We really ought to get rid of these two. 269 * Superblock locking. We really ought to get rid of these two.
270 */ 270 */
271 void lock_super(struct super_block * sb) 271 void lock_super(struct super_block * sb)
272 { 272 {
273 get_fs_excl(); 273 get_fs_excl();
274 mutex_lock(&sb->s_lock); 274 mutex_lock(&sb->s_lock);
275 } 275 }
276 276
277 void unlock_super(struct super_block * sb) 277 void unlock_super(struct super_block * sb)
278 { 278 {
279 put_fs_excl(); 279 put_fs_excl();
280 mutex_unlock(&sb->s_lock); 280 mutex_unlock(&sb->s_lock);
281 } 281 }
282 282
283 EXPORT_SYMBOL(lock_super); 283 EXPORT_SYMBOL(lock_super);
284 EXPORT_SYMBOL(unlock_super); 284 EXPORT_SYMBOL(unlock_super);
285 285
286 /* 286 /*
287 * Write out and wait upon all dirty data associated with this 287 * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
288 * superblock. Filesystem data as well as the underlying block 288 * just dirties buffers with inodes so we have to submit IO for these buffers
289 * device. Takes the superblock lock. Requires a second blkdev 289 * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
290 * flush by the caller to complete the operation. 290 * case write_inode() functions do sync_dirty_buffer() and thus effectively
291 * write one block at a time.
291 */ 292 */
292 static int __fsync_super(struct super_block *sb) 293 static int __fsync_super(struct super_block *sb, int wait)
293 { 294 {
294 sync_inodes_sb(sb, 0);
295 vfs_dq_sync(sb); 295 vfs_dq_sync(sb);
296 sync_inodes_sb(sb, 1); 296 sync_inodes_sb(sb, wait);
297 lock_super(sb); 297 lock_super(sb);
298 if (sb->s_dirt && sb->s_op->write_super) 298 if (sb->s_dirt && sb->s_op->write_super)
299 sb->s_op->write_super(sb); 299 sb->s_op->write_super(sb);
300 unlock_super(sb); 300 unlock_super(sb);
301 if (sb->s_op->sync_fs) 301 if (sb->s_op->sync_fs)
302 sb->s_op->sync_fs(sb, 1); 302 sb->s_op->sync_fs(sb, wait);
303 return sync_blockdev(sb->s_bdev); 303 return __sync_blockdev(sb->s_bdev, wait);
304 } 304 }
305 305
306 /* 306 /*
307 * Write out and wait upon all dirty data associated with this 307 * Write out and wait upon all dirty data associated with this
308 * superblock. Filesystem data as well as the underlying block 308 * superblock. Filesystem data as well as the underlying block
309 * device. Takes the superblock lock. 309 * device. Takes the superblock lock.
310 */ 310 */
311 int fsync_super(struct super_block *sb) 311 int fsync_super(struct super_block *sb)
312 { 312 {
313 return __fsync_super(sb); 313 int ret;
314
315 ret = __fsync_super(sb, 0);
316 if (ret < 0)
317 return ret;
318 return __fsync_super(sb, 1);
314 } 319 }
315 EXPORT_SYMBOL_GPL(fsync_super); 320 EXPORT_SYMBOL_GPL(fsync_super);
316 321
317 /** 322 /**
318 * generic_shutdown_super - common helper for ->kill_sb() 323 * generic_shutdown_super - common helper for ->kill_sb()
319 * @sb: superblock to kill 324 * @sb: superblock to kill
320 * 325 *
321 * generic_shutdown_super() does all fs-independent work on superblock 326 * generic_shutdown_super() does all fs-independent work on superblock
322 * shutdown. Typical ->kill_sb() should pick all fs-specific objects 327 * shutdown. Typical ->kill_sb() should pick all fs-specific objects
323 * that need destruction out of superblock, call generic_shutdown_super() 328 * that need destruction out of superblock, call generic_shutdown_super()
324 * and release aforementioned objects. Note: dentries and inodes _are_ 329 * and release aforementioned objects. Note: dentries and inodes _are_
325 * taken care of and do not need specific handling. 330 * taken care of and do not need specific handling.
326 * 331 *
327 * Upon calling this function, the filesystem may no longer alter or 332 * Upon calling this function, the filesystem may no longer alter or
328 * rearrange the set of dentries belonging to this super_block, nor may it 333 * rearrange the set of dentries belonging to this super_block, nor may it
329 * change the attachments of dentries to inodes. 334 * change the attachments of dentries to inodes.
330 */ 335 */
331 void generic_shutdown_super(struct super_block *sb) 336 void generic_shutdown_super(struct super_block *sb)
332 { 337 {
333 const struct super_operations *sop = sb->s_op; 338 const struct super_operations *sop = sb->s_op;
334 339
335 340
336 if (sb->s_root) { 341 if (sb->s_root) {
337 shrink_dcache_for_umount(sb); 342 shrink_dcache_for_umount(sb);
338 fsync_super(sb); 343 fsync_super(sb);
339 lock_super(sb); 344 lock_super(sb);
340 sb->s_flags &= ~MS_ACTIVE; 345 sb->s_flags &= ~MS_ACTIVE;
341 346
342 /* bad name - it should be evict_inodes() */ 347 /* bad name - it should be evict_inodes() */
343 invalidate_inodes(sb); 348 invalidate_inodes(sb);
344 lock_kernel(); 349 lock_kernel();
345 350
346 if (sop->write_super && sb->s_dirt) 351 if (sop->write_super && sb->s_dirt)
347 sop->write_super(sb); 352 sop->write_super(sb);
348 if (sop->put_super) 353 if (sop->put_super)
349 sop->put_super(sb); 354 sop->put_super(sb);
350 355
351 /* Forget any remaining inodes */ 356 /* Forget any remaining inodes */
352 if (invalidate_inodes(sb)) { 357 if (invalidate_inodes(sb)) {
353 printk("VFS: Busy inodes after unmount of %s. " 358 printk("VFS: Busy inodes after unmount of %s. "
354 "Self-destruct in 5 seconds. Have a nice day...\n", 359 "Self-destruct in 5 seconds. Have a nice day...\n",
355 sb->s_id); 360 sb->s_id);
356 } 361 }
357 362
358 unlock_kernel(); 363 unlock_kernel();
359 unlock_super(sb); 364 unlock_super(sb);
360 } 365 }
361 spin_lock(&sb_lock); 366 spin_lock(&sb_lock);
362 /* should be initialized for __put_super_and_need_restart() */ 367 /* should be initialized for __put_super_and_need_restart() */
363 list_del_init(&sb->s_list); 368 list_del_init(&sb->s_list);
364 list_del(&sb->s_instances); 369 list_del(&sb->s_instances);
365 spin_unlock(&sb_lock); 370 spin_unlock(&sb_lock);
366 up_write(&sb->s_umount); 371 up_write(&sb->s_umount);
367 } 372 }
368 373
369 EXPORT_SYMBOL(generic_shutdown_super); 374 EXPORT_SYMBOL(generic_shutdown_super);
370 375
371 /** 376 /**
372 * sget - find or create a superblock 377 * sget - find or create a superblock
373 * @type: filesystem type superblock should belong to 378 * @type: filesystem type superblock should belong to
374 * @test: comparison callback 379 * @test: comparison callback
375 * @set: setup callback 380 * @set: setup callback
376 * @data: argument to each of them 381 * @data: argument to each of them
377 */ 382 */
378 struct super_block *sget(struct file_system_type *type, 383 struct super_block *sget(struct file_system_type *type,
379 int (*test)(struct super_block *,void *), 384 int (*test)(struct super_block *,void *),
380 int (*set)(struct super_block *,void *), 385 int (*set)(struct super_block *,void *),
381 void *data) 386 void *data)
382 { 387 {
383 struct super_block *s = NULL; 388 struct super_block *s = NULL;
384 struct super_block *old; 389 struct super_block *old;
385 int err; 390 int err;
386 391
387 retry: 392 retry:
388 spin_lock(&sb_lock); 393 spin_lock(&sb_lock);
389 if (test) { 394 if (test) {
390 list_for_each_entry(old, &type->fs_supers, s_instances) { 395 list_for_each_entry(old, &type->fs_supers, s_instances) {
391 if (!test(old, data)) 396 if (!test(old, data))
392 continue; 397 continue;
393 if (!grab_super(old)) 398 if (!grab_super(old))
394 goto retry; 399 goto retry;
395 if (s) { 400 if (s) {
396 up_write(&s->s_umount); 401 up_write(&s->s_umount);
397 destroy_super(s); 402 destroy_super(s);
398 } 403 }
399 return old; 404 return old;
400 } 405 }
401 } 406 }
402 if (!s) { 407 if (!s) {
403 spin_unlock(&sb_lock); 408 spin_unlock(&sb_lock);
404 s = alloc_super(type); 409 s = alloc_super(type);
405 if (!s) 410 if (!s)
406 return ERR_PTR(-ENOMEM); 411 return ERR_PTR(-ENOMEM);
407 goto retry; 412 goto retry;
408 } 413 }
409 414
410 err = set(s, data); 415 err = set(s, data);
411 if (err) { 416 if (err) {
412 spin_unlock(&sb_lock); 417 spin_unlock(&sb_lock);
413 up_write(&s->s_umount); 418 up_write(&s->s_umount);
414 destroy_super(s); 419 destroy_super(s);
415 return ERR_PTR(err); 420 return ERR_PTR(err);
416 } 421 }
417 s->s_type = type; 422 s->s_type = type;
418 strlcpy(s->s_id, type->name, sizeof(s->s_id)); 423 strlcpy(s->s_id, type->name, sizeof(s->s_id));
419 list_add_tail(&s->s_list, &super_blocks); 424 list_add_tail(&s->s_list, &super_blocks);
420 list_add(&s->s_instances, &type->fs_supers); 425 list_add(&s->s_instances, &type->fs_supers);
421 spin_unlock(&sb_lock); 426 spin_unlock(&sb_lock);
422 get_filesystem(type); 427 get_filesystem(type);
423 return s; 428 return s;
424 } 429 }
425 430
426 EXPORT_SYMBOL(sget); 431 EXPORT_SYMBOL(sget);
427 432
428 void drop_super(struct super_block *sb) 433 void drop_super(struct super_block *sb)
429 { 434 {
430 up_read(&sb->s_umount); 435 up_read(&sb->s_umount);
431 put_super(sb); 436 put_super(sb);
432 } 437 }
433 438
434 EXPORT_SYMBOL(drop_super); 439 EXPORT_SYMBOL(drop_super);
435 440
436 static inline void write_super(struct super_block *sb) 441 static inline void write_super(struct super_block *sb)
437 { 442 {
438 lock_super(sb); 443 lock_super(sb);
439 if (sb->s_root && sb->s_dirt) 444 if (sb->s_root && sb->s_dirt)
440 if (sb->s_op->write_super) 445 if (sb->s_op->write_super)
441 sb->s_op->write_super(sb); 446 sb->s_op->write_super(sb);
442 unlock_super(sb); 447 unlock_super(sb);
443 } 448 }
444 449
445 /* 450 /*
446 * Note: check the dirty flag before waiting, so we don't 451 * Note: check the dirty flag before waiting, so we don't
447 * hold up the sync while mounting a device. (The newly 452 * hold up the sync while mounting a device. (The newly
448 * mounted device won't need syncing.) 453 * mounted device won't need syncing.)
449 */ 454 */
450 void sync_supers(void) 455 void sync_supers(void)
451 { 456 {
452 struct super_block *sb; 457 struct super_block *sb;
453 458
454 spin_lock(&sb_lock); 459 spin_lock(&sb_lock);
455 restart: 460 restart:
456 list_for_each_entry(sb, &super_blocks, s_list) { 461 list_for_each_entry(sb, &super_blocks, s_list) {
457 if (sb->s_dirt) { 462 if (sb->s_dirt) {
458 sb->s_count++; 463 sb->s_count++;
459 spin_unlock(&sb_lock); 464 spin_unlock(&sb_lock);
460 down_read(&sb->s_umount); 465 down_read(&sb->s_umount);
461 write_super(sb); 466 write_super(sb);
462 up_read(&sb->s_umount); 467 up_read(&sb->s_umount);
463 spin_lock(&sb_lock); 468 spin_lock(&sb_lock);
464 if (__put_super_and_need_restart(sb)) 469 if (__put_super_and_need_restart(sb))
465 goto restart; 470 goto restart;
466 } 471 }
467 } 472 }
468 spin_unlock(&sb_lock); 473 spin_unlock(&sb_lock);
469 } 474 }
470 475
471 /* 476 /*
472 * Call the ->sync_fs super_op against all filesystems which are r/w and 477 * Sync all the data for all the filesystems (called by sys_sync() and
473 * which implement it. 478 * emergency sync)
474 * 479 *
475 * This operation is careful to avoid the livelock which could easily happen 480 * This operation is careful to avoid the livelock which could easily happen
476 * if two or more filesystems are being continuously dirtied. s_need_sync_fs 481 * if two or more filesystems are being continuously dirtied. s_need_sync
477 * is used only here. We set it against all filesystems and then clear it as 482 * is used only here. We set it against all filesystems and then clear it as
478 * we sync them. So redirtied filesystems are skipped. 483 * we sync them. So redirtied filesystems are skipped.
479 * 484 *
480 * But if process A is currently running sync_filesystems and then process B 485 * But if process A is currently running sync_filesystems and then process B
481 * calls sync_filesystems as well, process B will set all the s_need_sync_fs 486 * calls sync_filesystems as well, process B will set all the s_need_sync
482 * flags again, which will cause process A to resync everything. Fix that with 487 * flags again, which will cause process A to resync everything. Fix that with
483 * a local mutex. 488 * a local mutex.
484 *
485 * (Fabian) Avoid sync_fs with clean fs & wait mode 0
486 */ 489 */
487 void sync_filesystems(int wait) 490 void sync_filesystems(int wait)
488 { 491 {
489 struct super_block *sb; 492 struct super_block *sb;
490 static DEFINE_MUTEX(mutex); 493 static DEFINE_MUTEX(mutex);
491 494
492 mutex_lock(&mutex); /* Could be down_interruptible */ 495 mutex_lock(&mutex); /* Could be down_interruptible */
493 spin_lock(&sb_lock); 496 spin_lock(&sb_lock);
494 list_for_each_entry(sb, &super_blocks, s_list) { 497 list_for_each_entry(sb, &super_blocks, s_list) {
495 if (!sb->s_op->sync_fs)
496 continue;
497 if (sb->s_flags & MS_RDONLY) 498 if (sb->s_flags & MS_RDONLY)
498 continue; 499 continue;
499 sb->s_need_sync_fs = 1; 500 sb->s_need_sync = 1;
500 } 501 }
501 502
502 restart: 503 restart:
503 list_for_each_entry(sb, &super_blocks, s_list) { 504 list_for_each_entry(sb, &super_blocks, s_list) {
504 if (!sb->s_need_sync_fs) 505 if (!sb->s_need_sync)
505 continue; 506 continue;
506 sb->s_need_sync_fs = 0; 507 sb->s_need_sync = 0;
507 if (sb->s_flags & MS_RDONLY) 508 if (sb->s_flags & MS_RDONLY)
508 continue; /* hm. Was remounted r/o meanwhile */ 509 continue; /* hm. Was remounted r/o meanwhile */
509 sb->s_count++; 510 sb->s_count++;
510 spin_unlock(&sb_lock); 511 spin_unlock(&sb_lock);
511 down_read(&sb->s_umount); 512 down_read(&sb->s_umount);
512 if (sb->s_root) 513 if (sb->s_root)
513 sb->s_op->sync_fs(sb, wait); 514 __fsync_super(sb, wait);
514 up_read(&sb->s_umount); 515 up_read(&sb->s_umount);
515 /* restart only when sb is no longer on the list */ 516 /* restart only when sb is no longer on the list */
516 spin_lock(&sb_lock); 517 spin_lock(&sb_lock);
517 if (__put_super_and_need_restart(sb)) 518 if (__put_super_and_need_restart(sb))
518 goto restart; 519 goto restart;
519 } 520 }
520 spin_unlock(&sb_lock); 521 spin_unlock(&sb_lock);
521 mutex_unlock(&mutex); 522 mutex_unlock(&mutex);
522 } 523 }
523
524 #ifdef CONFIG_BLOCK
525 /*
526 * Sync all block devices underlying some superblock
527 */
528 void sync_blockdevs(void)
529 {
530 struct super_block *sb;
531
532 spin_lock(&sb_lock);
533 restart:
534 list_for_each_entry(sb, &super_blocks, s_list) {
535 if (!sb->s_bdev)
536 continue;
537 sb->s_count++;
538 spin_unlock(&sb_lock);
539 down_read(&sb->s_umount);
540 if (sb->s_root)
541 sync_blockdev(sb->s_bdev);
542 up_read(&sb->s_umount);
543 spin_lock(&sb_lock);
544 if (__put_super_and_need_restart(sb))
545 goto restart;
546 }
547 spin_unlock(&sb_lock);
548 }
549 #endif
550 524
551 /** 525 /**
552 * get_super - get the superblock of a device 526 * get_super - get the superblock of a device
553 * @bdev: device to get the superblock for 527 * @bdev: device to get the superblock for
554 * 528 *
555 * Scans the superblock list and finds the superblock of the file system 529 * Scans the superblock list and finds the superblock of the file system
556 * mounted on the device given. %NULL is returned if no match is found. 530 * mounted on the device given. %NULL is returned if no match is found.
557 */ 531 */
558 532
559 struct super_block * get_super(struct block_device *bdev) 533 struct super_block * get_super(struct block_device *bdev)
560 { 534 {
561 struct super_block *sb; 535 struct super_block *sb;
562 536
563 if (!bdev) 537 if (!bdev)
564 return NULL; 538 return NULL;
565 539
566 spin_lock(&sb_lock); 540 spin_lock(&sb_lock);
567 rescan: 541 rescan:
568 list_for_each_entry(sb, &super_blocks, s_list) { 542 list_for_each_entry(sb, &super_blocks, s_list) {
569 if (sb->s_bdev == bdev) { 543 if (sb->s_bdev == bdev) {
570 sb->s_count++; 544 sb->s_count++;
571 spin_unlock(&sb_lock); 545 spin_unlock(&sb_lock);
572 down_read(&sb->s_umount); 546 down_read(&sb->s_umount);
573 if (sb->s_root) 547 if (sb->s_root)
574 return sb; 548 return sb;
575 up_read(&sb->s_umount); 549 up_read(&sb->s_umount);
576 /* restart only when sb is no longer on the list */ 550 /* restart only when sb is no longer on the list */
577 spin_lock(&sb_lock); 551 spin_lock(&sb_lock);
578 if (__put_super_and_need_restart(sb)) 552 if (__put_super_and_need_restart(sb))
579 goto rescan; 553 goto rescan;
580 } 554 }
581 } 555 }
582 spin_unlock(&sb_lock); 556 spin_unlock(&sb_lock);
583 return NULL; 557 return NULL;
584 } 558 }
585 559
586 EXPORT_SYMBOL(get_super); 560 EXPORT_SYMBOL(get_super);
587 561
588 struct super_block * user_get_super(dev_t dev) 562 struct super_block * user_get_super(dev_t dev)
589 { 563 {
590 struct super_block *sb; 564 struct super_block *sb;
591 565
592 spin_lock(&sb_lock); 566 spin_lock(&sb_lock);
593 rescan: 567 rescan:
594 list_for_each_entry(sb, &super_blocks, s_list) { 568 list_for_each_entry(sb, &super_blocks, s_list) {
595 if (sb->s_dev == dev) { 569 if (sb->s_dev == dev) {
596 sb->s_count++; 570 sb->s_count++;
597 spin_unlock(&sb_lock); 571 spin_unlock(&sb_lock);
598 down_read(&sb->s_umount); 572 down_read(&sb->s_umount);
599 if (sb->s_root) 573 if (sb->s_root)
600 return sb; 574 return sb;
601 up_read(&sb->s_umount); 575 up_read(&sb->s_umount);
602 /* restart only when sb is no longer on the list */ 576 /* restart only when sb is no longer on the list */
603 spin_lock(&sb_lock); 577 spin_lock(&sb_lock);
604 if (__put_super_and_need_restart(sb)) 578 if (__put_super_and_need_restart(sb))
605 goto rescan; 579 goto rescan;
606 } 580 }
607 } 581 }
608 spin_unlock(&sb_lock); 582 spin_unlock(&sb_lock);
609 return NULL; 583 return NULL;
610 } 584 }
611 585
612 SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) 586 SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
613 { 587 {
614 struct super_block *s; 588 struct super_block *s;
615 struct ustat tmp; 589 struct ustat tmp;
616 struct kstatfs sbuf; 590 struct kstatfs sbuf;
617 int err = -EINVAL; 591 int err = -EINVAL;
618 592
619 s = user_get_super(new_decode_dev(dev)); 593 s = user_get_super(new_decode_dev(dev));
620 if (s == NULL) 594 if (s == NULL)
621 goto out; 595 goto out;
622 err = vfs_statfs(s->s_root, &sbuf); 596 err = vfs_statfs(s->s_root, &sbuf);
623 drop_super(s); 597 drop_super(s);
624 if (err) 598 if (err)
625 goto out; 599 goto out;
626 600
627 memset(&tmp,0,sizeof(struct ustat)); 601 memset(&tmp,0,sizeof(struct ustat));
628 tmp.f_tfree = sbuf.f_bfree; 602 tmp.f_tfree = sbuf.f_bfree;
629 tmp.f_tinode = sbuf.f_ffree; 603 tmp.f_tinode = sbuf.f_ffree;
630 604
631 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; 605 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
632 out: 606 out:
633 return err; 607 return err;
634 } 608 }
635 609
636 /** 610 /**
637 * do_remount_sb - asks filesystem to change mount options. 611 * do_remount_sb - asks filesystem to change mount options.
638 * @sb: superblock in question 612 * @sb: superblock in question
639 * @flags: numeric part of options 613 * @flags: numeric part of options
640 * @data: the rest of options 614 * @data: the rest of options
641 * @force: whether or not to force the change 615 * @force: whether or not to force the change
642 * 616 *
643 * Alters the mount options of a mounted file system. 617 * Alters the mount options of a mounted file system.
644 */ 618 */
645 int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 619 int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
646 { 620 {
647 int retval; 621 int retval;
648 int remount_rw; 622 int remount_rw;
649 623
650 #ifdef CONFIG_BLOCK 624 #ifdef CONFIG_BLOCK
651 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 625 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
652 return -EACCES; 626 return -EACCES;
653 #endif 627 #endif
654 if (flags & MS_RDONLY) 628 if (flags & MS_RDONLY)
655 acct_auto_close(sb); 629 acct_auto_close(sb);
656 shrink_dcache_sb(sb); 630 shrink_dcache_sb(sb);
657 fsync_super(sb); 631 fsync_super(sb);
658 632
659 /* If we are remounting RDONLY and current sb is read/write, 633 /* If we are remounting RDONLY and current sb is read/write,
660 make sure there are no rw files opened */ 634 make sure there are no rw files opened */
661 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { 635 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
662 if (force) 636 if (force)
663 mark_files_ro(sb); 637 mark_files_ro(sb);
664 else if (!fs_may_remount_ro(sb)) 638 else if (!fs_may_remount_ro(sb))
665 return -EBUSY; 639 return -EBUSY;
666 retval = vfs_dq_off(sb, 1); 640 retval = vfs_dq_off(sb, 1);
667 if (retval < 0 && retval != -ENOSYS) 641 if (retval < 0 && retval != -ENOSYS)
668 return -EBUSY; 642 return -EBUSY;
669 } 643 }
670 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); 644 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
671 645
672 if (sb->s_op->remount_fs) { 646 if (sb->s_op->remount_fs) {
673 lock_super(sb); 647 lock_super(sb);
674 retval = sb->s_op->remount_fs(sb, &flags, data); 648 retval = sb->s_op->remount_fs(sb, &flags, data);
675 unlock_super(sb); 649 unlock_super(sb);
676 if (retval) 650 if (retval)
677 return retval; 651 return retval;
678 } 652 }
679 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 653 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
680 if (remount_rw) 654 if (remount_rw)
681 vfs_dq_quota_on_remount(sb); 655 vfs_dq_quota_on_remount(sb);
682 return 0; 656 return 0;
683 } 657 }
684 658
685 static void do_emergency_remount(struct work_struct *work) 659 static void do_emergency_remount(struct work_struct *work)
686 { 660 {
687 struct super_block *sb; 661 struct super_block *sb;
688 662
689 spin_lock(&sb_lock); 663 spin_lock(&sb_lock);
690 list_for_each_entry(sb, &super_blocks, s_list) { 664 list_for_each_entry(sb, &super_blocks, s_list) {
691 sb->s_count++; 665 sb->s_count++;
692 spin_unlock(&sb_lock); 666 spin_unlock(&sb_lock);
693 down_read(&sb->s_umount); 667 down_read(&sb->s_umount);
694 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { 668 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) {
695 /* 669 /*
696 * ->remount_fs needs lock_kernel(). 670 * ->remount_fs needs lock_kernel().
697 * 671 *
698 * What lock protects sb->s_flags?? 672 * What lock protects sb->s_flags??
699 */ 673 */
700 lock_kernel(); 674 lock_kernel();
701 do_remount_sb(sb, MS_RDONLY, NULL, 1); 675 do_remount_sb(sb, MS_RDONLY, NULL, 1);
702 unlock_kernel(); 676 unlock_kernel();
703 } 677 }
704 drop_super(sb); 678 drop_super(sb);
705 spin_lock(&sb_lock); 679 spin_lock(&sb_lock);
706 } 680 }
707 spin_unlock(&sb_lock); 681 spin_unlock(&sb_lock);
708 kfree(work); 682 kfree(work);
709 printk("Emergency Remount complete\n"); 683 printk("Emergency Remount complete\n");
710 } 684 }
711 685
712 void emergency_remount(void) 686 void emergency_remount(void)
713 { 687 {
714 struct work_struct *work; 688 struct work_struct *work;
715 689
716 work = kmalloc(sizeof(*work), GFP_ATOMIC); 690 work = kmalloc(sizeof(*work), GFP_ATOMIC);
717 if (work) { 691 if (work) {
718 INIT_WORK(work, do_emergency_remount); 692 INIT_WORK(work, do_emergency_remount);
719 schedule_work(work); 693 schedule_work(work);
720 } 694 }
721 } 695 }
722 696
723 /* 697 /*
724 * Unnamed block devices are dummy devices used by virtual 698 * Unnamed block devices are dummy devices used by virtual
725 * filesystems which don't use real block-devices. -- jrs 699 * filesystems which don't use real block-devices. -- jrs
726 */ 700 */
727 701
728 static DEFINE_IDA(unnamed_dev_ida); 702 static DEFINE_IDA(unnamed_dev_ida);
729 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 703 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
730 704
731 int set_anon_super(struct super_block *s, void *data) 705 int set_anon_super(struct super_block *s, void *data)
732 { 706 {
733 int dev; 707 int dev;
734 int error; 708 int error;
735 709
736 retry: 710 retry:
737 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) 711 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
738 return -ENOMEM; 712 return -ENOMEM;
739 spin_lock(&unnamed_dev_lock); 713 spin_lock(&unnamed_dev_lock);
740 error = ida_get_new(&unnamed_dev_ida, &dev); 714 error = ida_get_new(&unnamed_dev_ida, &dev);
741 spin_unlock(&unnamed_dev_lock); 715 spin_unlock(&unnamed_dev_lock);
742 if (error == -EAGAIN) 716 if (error == -EAGAIN)
743 /* We raced and lost with another CPU. */ 717 /* We raced and lost with another CPU. */
744 goto retry; 718 goto retry;
745 else if (error) 719 else if (error)
746 return -EAGAIN; 720 return -EAGAIN;
747 721
748 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 722 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
749 spin_lock(&unnamed_dev_lock); 723 spin_lock(&unnamed_dev_lock);
750 ida_remove(&unnamed_dev_ida, dev); 724 ida_remove(&unnamed_dev_ida, dev);
751 spin_unlock(&unnamed_dev_lock); 725 spin_unlock(&unnamed_dev_lock);
752 return -EMFILE; 726 return -EMFILE;
753 } 727 }
754 s->s_dev = MKDEV(0, dev & MINORMASK); 728 s->s_dev = MKDEV(0, dev & MINORMASK);
755 return 0; 729 return 0;
756 } 730 }
757 731
758 EXPORT_SYMBOL(set_anon_super); 732 EXPORT_SYMBOL(set_anon_super);
759 733
760 void kill_anon_super(struct super_block *sb) 734 void kill_anon_super(struct super_block *sb)
761 { 735 {
762 int slot = MINOR(sb->s_dev); 736 int slot = MINOR(sb->s_dev);
763 737
764 generic_shutdown_super(sb); 738 generic_shutdown_super(sb);
765 spin_lock(&unnamed_dev_lock); 739 spin_lock(&unnamed_dev_lock);
766 ida_remove(&unnamed_dev_ida, slot); 740 ida_remove(&unnamed_dev_ida, slot);
767 spin_unlock(&unnamed_dev_lock); 741 spin_unlock(&unnamed_dev_lock);
768 } 742 }
769 743
770 EXPORT_SYMBOL(kill_anon_super); 744 EXPORT_SYMBOL(kill_anon_super);
771 745
772 void kill_litter_super(struct super_block *sb) 746 void kill_litter_super(struct super_block *sb)
773 { 747 {
774 if (sb->s_root) 748 if (sb->s_root)
775 d_genocide(sb->s_root); 749 d_genocide(sb->s_root);
776 kill_anon_super(sb); 750 kill_anon_super(sb);
777 } 751 }
778 752
779 EXPORT_SYMBOL(kill_litter_super); 753 EXPORT_SYMBOL(kill_litter_super);
780 754
781 static int ns_test_super(struct super_block *sb, void *data) 755 static int ns_test_super(struct super_block *sb, void *data)
782 { 756 {
783 return sb->s_fs_info == data; 757 return sb->s_fs_info == data;
784 } 758 }
785 759
786 static int ns_set_super(struct super_block *sb, void *data) 760 static int ns_set_super(struct super_block *sb, void *data)
787 { 761 {
788 sb->s_fs_info = data; 762 sb->s_fs_info = data;
789 return set_anon_super(sb, NULL); 763 return set_anon_super(sb, NULL);
790 } 764 }
791 765
792 int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 766 int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
793 int (*fill_super)(struct super_block *, void *, int), 767 int (*fill_super)(struct super_block *, void *, int),
794 struct vfsmount *mnt) 768 struct vfsmount *mnt)
795 { 769 {
796 struct super_block *sb; 770 struct super_block *sb;
797 771
798 sb = sget(fs_type, ns_test_super, ns_set_super, data); 772 sb = sget(fs_type, ns_test_super, ns_set_super, data);
799 if (IS_ERR(sb)) 773 if (IS_ERR(sb))
800 return PTR_ERR(sb); 774 return PTR_ERR(sb);
801 775
802 if (!sb->s_root) { 776 if (!sb->s_root) {
803 int err; 777 int err;
804 sb->s_flags = flags; 778 sb->s_flags = flags;
805 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 779 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
806 if (err) { 780 if (err) {
807 deactivate_locked_super(sb); 781 deactivate_locked_super(sb);
808 return err; 782 return err;
809 } 783 }
810 784
811 sb->s_flags |= MS_ACTIVE; 785 sb->s_flags |= MS_ACTIVE;
812 } 786 }
813 787
814 simple_set_mnt(mnt, sb); 788 simple_set_mnt(mnt, sb);
815 return 0; 789 return 0;
816 } 790 }
817 791
818 EXPORT_SYMBOL(get_sb_ns); 792 EXPORT_SYMBOL(get_sb_ns);
819 793
820 #ifdef CONFIG_BLOCK 794 #ifdef CONFIG_BLOCK
821 static int set_bdev_super(struct super_block *s, void *data) 795 static int set_bdev_super(struct super_block *s, void *data)
822 { 796 {
823 s->s_bdev = data; 797 s->s_bdev = data;
824 s->s_dev = s->s_bdev->bd_dev; 798 s->s_dev = s->s_bdev->bd_dev;
825 return 0; 799 return 0;
826 } 800 }
827 801
828 static int test_bdev_super(struct super_block *s, void *data) 802 static int test_bdev_super(struct super_block *s, void *data)
829 { 803 {
830 return (void *)s->s_bdev == data; 804 return (void *)s->s_bdev == data;
831 } 805 }
832 806
833 int get_sb_bdev(struct file_system_type *fs_type, 807 int get_sb_bdev(struct file_system_type *fs_type,
834 int flags, const char *dev_name, void *data, 808 int flags, const char *dev_name, void *data,
835 int (*fill_super)(struct super_block *, void *, int), 809 int (*fill_super)(struct super_block *, void *, int),
836 struct vfsmount *mnt) 810 struct vfsmount *mnt)
837 { 811 {
838 struct block_device *bdev; 812 struct block_device *bdev;
839 struct super_block *s; 813 struct super_block *s;
840 fmode_t mode = FMODE_READ; 814 fmode_t mode = FMODE_READ;
841 int error = 0; 815 int error = 0;
842 816
843 if (!(flags & MS_RDONLY)) 817 if (!(flags & MS_RDONLY))
844 mode |= FMODE_WRITE; 818 mode |= FMODE_WRITE;
845 819
846 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 820 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
847 if (IS_ERR(bdev)) 821 if (IS_ERR(bdev))
848 return PTR_ERR(bdev); 822 return PTR_ERR(bdev);
849 823
850 /* 824 /*
851 * once the super is inserted into the list by sget, s_umount 825 * once the super is inserted into the list by sget, s_umount
852 * will protect the lockfs code from trying to start a snapshot 826 * will protect the lockfs code from trying to start a snapshot
853 * while we are mounting 827 * while we are mounting
854 */ 828 */
855 down(&bdev->bd_mount_sem); 829 down(&bdev->bd_mount_sem);
856 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); 830 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
857 up(&bdev->bd_mount_sem); 831 up(&bdev->bd_mount_sem);
858 if (IS_ERR(s)) 832 if (IS_ERR(s))
859 goto error_s; 833 goto error_s;
860 834
861 if (s->s_root) { 835 if (s->s_root) {
862 if ((flags ^ s->s_flags) & MS_RDONLY) { 836 if ((flags ^ s->s_flags) & MS_RDONLY) {
863 deactivate_locked_super(s); 837 deactivate_locked_super(s);
864 error = -EBUSY; 838 error = -EBUSY;
865 goto error_bdev; 839 goto error_bdev;
866 } 840 }
867 841
868 close_bdev_exclusive(bdev, mode); 842 close_bdev_exclusive(bdev, mode);
869 } else { 843 } else {
870 char b[BDEVNAME_SIZE]; 844 char b[BDEVNAME_SIZE];
871 845
872 s->s_flags = flags; 846 s->s_flags = flags;
873 s->s_mode = mode; 847 s->s_mode = mode;
874 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 848 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
875 sb_set_blocksize(s, block_size(bdev)); 849 sb_set_blocksize(s, block_size(bdev));
876 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 850 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
877 if (error) { 851 if (error) {
878 deactivate_locked_super(s); 852 deactivate_locked_super(s);
879 goto error; 853 goto error;
880 } 854 }
881 855
882 s->s_flags |= MS_ACTIVE; 856 s->s_flags |= MS_ACTIVE;
883 bdev->bd_super = s; 857 bdev->bd_super = s;
884 } 858 }
885 859
886 simple_set_mnt(mnt, s); 860 simple_set_mnt(mnt, s);
887 return 0; 861 return 0;
888 862
889 error_s: 863 error_s:
890 error = PTR_ERR(s); 864 error = PTR_ERR(s);
891 error_bdev: 865 error_bdev:
892 close_bdev_exclusive(bdev, mode); 866 close_bdev_exclusive(bdev, mode);
893 error: 867 error:
894 return error; 868 return error;
895 } 869 }
896 870
897 EXPORT_SYMBOL(get_sb_bdev); 871 EXPORT_SYMBOL(get_sb_bdev);
898 872
899 void kill_block_super(struct super_block *sb) 873 void kill_block_super(struct super_block *sb)
900 { 874 {
901 struct block_device *bdev = sb->s_bdev; 875 struct block_device *bdev = sb->s_bdev;
902 fmode_t mode = sb->s_mode; 876 fmode_t mode = sb->s_mode;
903 877
904 bdev->bd_super = NULL; 878 bdev->bd_super = NULL;
905 generic_shutdown_super(sb); 879 generic_shutdown_super(sb);
906 sync_blockdev(bdev); 880 sync_blockdev(bdev);
907 close_bdev_exclusive(bdev, mode); 881 close_bdev_exclusive(bdev, mode);
908 } 882 }
909 883
910 EXPORT_SYMBOL(kill_block_super); 884 EXPORT_SYMBOL(kill_block_super);
911 #endif 885 #endif
912 886
913 int get_sb_nodev(struct file_system_type *fs_type, 887 int get_sb_nodev(struct file_system_type *fs_type,
914 int flags, void *data, 888 int flags, void *data,
915 int (*fill_super)(struct super_block *, void *, int), 889 int (*fill_super)(struct super_block *, void *, int),
916 struct vfsmount *mnt) 890 struct vfsmount *mnt)
917 { 891 {
918 int error; 892 int error;
919 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 893 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
920 894
921 if (IS_ERR(s)) 895 if (IS_ERR(s))
922 return PTR_ERR(s); 896 return PTR_ERR(s);
923 897
924 s->s_flags = flags; 898 s->s_flags = flags;
925 899
926 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 900 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
927 if (error) { 901 if (error) {
928 deactivate_locked_super(s); 902 deactivate_locked_super(s);
929 return error; 903 return error;
930 } 904 }
931 s->s_flags |= MS_ACTIVE; 905 s->s_flags |= MS_ACTIVE;
932 simple_set_mnt(mnt, s); 906 simple_set_mnt(mnt, s);
933 return 0; 907 return 0;
934 } 908 }
935 909
936 EXPORT_SYMBOL(get_sb_nodev); 910 EXPORT_SYMBOL(get_sb_nodev);
937 911
938 static int compare_single(struct super_block *s, void *p) 912 static int compare_single(struct super_block *s, void *p)
939 { 913 {
940 return 1; 914 return 1;
941 } 915 }
942 916
943 int get_sb_single(struct file_system_type *fs_type, 917 int get_sb_single(struct file_system_type *fs_type,
944 int flags, void *data, 918 int flags, void *data,
945 int (*fill_super)(struct super_block *, void *, int), 919 int (*fill_super)(struct super_block *, void *, int),
946 struct vfsmount *mnt) 920 struct vfsmount *mnt)
947 { 921 {
948 struct super_block *s; 922 struct super_block *s;
949 int error; 923 int error;
950 924
951 s = sget(fs_type, compare_single, set_anon_super, NULL); 925 s = sget(fs_type, compare_single, set_anon_super, NULL);
952 if (IS_ERR(s)) 926 if (IS_ERR(s))
953 return PTR_ERR(s); 927 return PTR_ERR(s);
954 if (!s->s_root) { 928 if (!s->s_root) {
955 s->s_flags = flags; 929 s->s_flags = flags;
956 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 930 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
957 if (error) { 931 if (error) {
958 deactivate_locked_super(s); 932 deactivate_locked_super(s);
959 return error; 933 return error;
960 } 934 }
961 s->s_flags |= MS_ACTIVE; 935 s->s_flags |= MS_ACTIVE;
962 } 936 }
963 do_remount_sb(s, flags, data, 0); 937 do_remount_sb(s, flags, data, 0);
964 simple_set_mnt(mnt, s); 938 simple_set_mnt(mnt, s);
965 return 0; 939 return 0;
966 } 940 }
967 941
968 EXPORT_SYMBOL(get_sb_single); 942 EXPORT_SYMBOL(get_sb_single);
969 943
970 struct vfsmount * 944 struct vfsmount *
971 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 945 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
972 { 946 {
973 struct vfsmount *mnt; 947 struct vfsmount *mnt;
974 char *secdata = NULL; 948 char *secdata = NULL;
975 int error; 949 int error;
976 950
977 if (!type) 951 if (!type)
978 return ERR_PTR(-ENODEV); 952 return ERR_PTR(-ENODEV);
979 953
980 error = -ENOMEM; 954 error = -ENOMEM;
981 mnt = alloc_vfsmnt(name); 955 mnt = alloc_vfsmnt(name);
982 if (!mnt) 956 if (!mnt)
983 goto out; 957 goto out;
984 958
985 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 959 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
986 secdata = alloc_secdata(); 960 secdata = alloc_secdata();
987 if (!secdata) 961 if (!secdata)
988 goto out_mnt; 962 goto out_mnt;
989 963
990 error = security_sb_copy_data(data, secdata); 964 error = security_sb_copy_data(data, secdata);
991 if (error) 965 if (error)
992 goto out_free_secdata; 966 goto out_free_secdata;
993 } 967 }
994 968
995 error = type->get_sb(type, flags, name, data, mnt); 969 error = type->get_sb(type, flags, name, data, mnt);
996 if (error < 0) 970 if (error < 0)
997 goto out_free_secdata; 971 goto out_free_secdata;
998 BUG_ON(!mnt->mnt_sb); 972 BUG_ON(!mnt->mnt_sb);
999 973
1000 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 974 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
1001 if (error) 975 if (error)
1002 goto out_sb; 976 goto out_sb;
1003 977
1004 mnt->mnt_mountpoint = mnt->mnt_root; 978 mnt->mnt_mountpoint = mnt->mnt_root;
1005 mnt->mnt_parent = mnt; 979 mnt->mnt_parent = mnt;
1006 up_write(&mnt->mnt_sb->s_umount); 980 up_write(&mnt->mnt_sb->s_umount);
1007 free_secdata(secdata); 981 free_secdata(secdata);
1008 return mnt; 982 return mnt;
1009 out_sb: 983 out_sb:
1010 dput(mnt->mnt_root); 984 dput(mnt->mnt_root);
1011 deactivate_locked_super(mnt->mnt_sb); 985 deactivate_locked_super(mnt->mnt_sb);
1012 out_free_secdata: 986 out_free_secdata:
1013 free_secdata(secdata); 987 free_secdata(secdata);
1014 out_mnt: 988 out_mnt:
1015 free_vfsmnt(mnt); 989 free_vfsmnt(mnt);
1016 out: 990 out:
1017 return ERR_PTR(error); 991 return ERR_PTR(error);
1018 } 992 }
1019 993
1020 EXPORT_SYMBOL_GPL(vfs_kern_mount); 994 EXPORT_SYMBOL_GPL(vfs_kern_mount);
1021 995
1022 static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) 996 static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1023 { 997 {
1024 int err; 998 int err;
1025 const char *subtype = strchr(fstype, '.'); 999 const char *subtype = strchr(fstype, '.');
1026 if (subtype) { 1000 if (subtype) {
1027 subtype++; 1001 subtype++;
1028 err = -EINVAL; 1002 err = -EINVAL;
1029 if (!subtype[0]) 1003 if (!subtype[0])
1030 goto err; 1004 goto err;
1031 } else 1005 } else
1032 subtype = ""; 1006 subtype = "";
1033 1007
1034 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); 1008 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
1035 err = -ENOMEM; 1009 err = -ENOMEM;
1036 if (!mnt->mnt_sb->s_subtype) 1010 if (!mnt->mnt_sb->s_subtype)
1037 goto err; 1011 goto err;
1038 return mnt; 1012 return mnt;
1039 1013
1040 err: 1014 err:
1041 mntput(mnt); 1015 mntput(mnt);
1042 return ERR_PTR(err); 1016 return ERR_PTR(err);
1043 } 1017 }
1044 1018
1045 struct vfsmount * 1019 struct vfsmount *
1046 do_kern_mount(const char *fstype, int flags, const char *name, void *data) 1020 do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1047 { 1021 {
1048 struct file_system_type *type = get_fs_type(fstype); 1022 struct file_system_type *type = get_fs_type(fstype);
1049 struct vfsmount *mnt; 1023 struct vfsmount *mnt;
1050 if (!type) 1024 if (!type)
1051 return ERR_PTR(-ENODEV); 1025 return ERR_PTR(-ENODEV);
1052 mnt = vfs_kern_mount(type, flags, name, data); 1026 mnt = vfs_kern_mount(type, flags, name, data);
1053 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && 1027 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1054 !mnt->mnt_sb->s_subtype) 1028 !mnt->mnt_sb->s_subtype)
1055 mnt = fs_set_subtype(mnt, fstype); 1029 mnt = fs_set_subtype(mnt, fstype);
1056 put_filesystem(type); 1030 put_filesystem(type);
1057 return mnt; 1031 return mnt;
1058 } 1032 }
1059 EXPORT_SYMBOL_GPL(do_kern_mount); 1033 EXPORT_SYMBOL_GPL(do_kern_mount);
1060 1034
1061 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 1035 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
1 /* 1 /*
2 * High-level sync()-related operations 2 * High-level sync()-related operations
3 */ 3 */
4 4
5 #include <linux/kernel.h> 5 #include <linux/kernel.h>
6 #include <linux/file.h> 6 #include <linux/file.h>
7 #include <linux/fs.h> 7 #include <linux/fs.h>
8 #include <linux/module.h> 8 #include <linux/module.h>
9 #include <linux/sched.h> 9 #include <linux/sched.h>
10 #include <linux/writeback.h> 10 #include <linux/writeback.h>
11 #include <linux/syscalls.h> 11 #include <linux/syscalls.h>
12 #include <linux/linkage.h> 12 #include <linux/linkage.h>
13 #include <linux/pagemap.h> 13 #include <linux/pagemap.h>
14 #include <linux/quotaops.h> 14 #include <linux/quotaops.h>
15 #include <linux/buffer_head.h> 15 #include <linux/buffer_head.h>
16 #include "internal.h" 16 #include "internal.h"
17 17
18 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ 18 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
19 SYNC_FILE_RANGE_WAIT_AFTER) 19 SYNC_FILE_RANGE_WAIT_AFTER)
20 20
21 /* 21 SYSCALL_DEFINE0(sync)
22 * sync everything. Start out by waking pdflush, because that writes back
23 * all queues in parallel.
24 */
25 static void do_sync(unsigned long wait)
26 { 22 {
27 wakeup_pdflush(0); 23 sync_filesystems(0);
28 sync_inodes(0); /* All mappings, inodes and their blockdevs */ 24 sync_filesystems(1);
29 vfs_dq_sync(NULL);
30 sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */
31 sync_supers(); /* Write the superblocks */
32 sync_filesystems(0); /* Start syncing the filesystems */
33 sync_filesystems(wait); /* Waitingly sync the filesystems */
34 sync_blockdevs();
35 if (!wait)
36 printk("Emergency Sync complete\n");
37 if (unlikely(laptop_mode)) 25 if (unlikely(laptop_mode))
38 laptop_sync_completion(); 26 laptop_sync_completion();
39 }
40
41 SYSCALL_DEFINE0(sync)
42 {
43 do_sync(1);
44 return 0; 27 return 0;
45 } 28 }
46 29
47 static void do_sync_work(struct work_struct *work) 30 static void do_sync_work(struct work_struct *work)
48 { 31 {
49 do_sync(0); 32 /*
33 * Sync twice to reduce the possibility we skipped some inodes / pages
34 * because they were temporarily locked
35 */
36 sync_filesystems(0);
37 sync_filesystems(0);
38 printk("Emergency Sync complete\n");
50 kfree(work); 39 kfree(work);
51 } 40 }
52 41
53 void emergency_sync(void) 42 void emergency_sync(void)
54 { 43 {
55 struct work_struct *work; 44 struct work_struct *work;
56 45
57 work = kmalloc(sizeof(*work), GFP_ATOMIC); 46 work = kmalloc(sizeof(*work), GFP_ATOMIC);
58 if (work) { 47 if (work) {
59 INIT_WORK(work, do_sync_work); 48 INIT_WORK(work, do_sync_work);
60 schedule_work(work); 49 schedule_work(work);
61 } 50 }
62 } 51 }
63 52
64 /* 53 /*
65 * Generic function to fsync a file. 54 * Generic function to fsync a file.
66 * 55 *
67 * filp may be NULL if called via the msync of a vma. 56 * filp may be NULL if called via the msync of a vma.
68 */ 57 */
69 int file_fsync(struct file *filp, struct dentry *dentry, int datasync) 58 int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
70 { 59 {
71 struct inode * inode = dentry->d_inode; 60 struct inode * inode = dentry->d_inode;
72 struct super_block * sb; 61 struct super_block * sb;
73 int ret, err; 62 int ret, err;
74 63
75 /* sync the inode to buffers */ 64 /* sync the inode to buffers */
76 ret = write_inode_now(inode, 0); 65 ret = write_inode_now(inode, 0);
77 66
78 /* sync the superblock to buffers */ 67 /* sync the superblock to buffers */
79 sb = inode->i_sb; 68 sb = inode->i_sb;
80 lock_super(sb); 69 lock_super(sb);
81 if (sb->s_dirt && sb->s_op->write_super) 70 if (sb->s_dirt && sb->s_op->write_super)
82 sb->s_op->write_super(sb); 71 sb->s_op->write_super(sb);
83 unlock_super(sb); 72 unlock_super(sb);
84 73
85 /* .. finally sync the buffers to disk */ 74 /* .. finally sync the buffers to disk */
86 err = sync_blockdev(sb->s_bdev); 75 err = sync_blockdev(sb->s_bdev);
87 if (!ret) 76 if (!ret)
88 ret = err; 77 ret = err;
89 return ret; 78 return ret;
90 } 79 }
91 80
92 /** 81 /**
93 * vfs_fsync - perform a fsync or fdatasync on a file 82 * vfs_fsync - perform a fsync or fdatasync on a file
94 * @file: file to sync 83 * @file: file to sync
95 * @dentry: dentry of @file 84 * @dentry: dentry of @file
96 * @data: only perform a fdatasync operation 85 * @data: only perform a fdatasync operation
97 * 86 *
98 * Write back data and metadata for @file to disk. If @datasync is 87 * Write back data and metadata for @file to disk. If @datasync is
99 * set only metadata needed to access modified file data is written. 88 * set only metadata needed to access modified file data is written.
100 * 89 *
101 * In case this function is called from nfsd @file may be %NULL and 90 * In case this function is called from nfsd @file may be %NULL and
102 * only @dentry is set. This can only happen when the filesystem 91 * only @dentry is set. This can only happen when the filesystem
103 * implements the export_operations API. 92 * implements the export_operations API.
104 */ 93 */
105 int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) 94 int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
106 { 95 {
107 const struct file_operations *fop; 96 const struct file_operations *fop;
108 struct address_space *mapping; 97 struct address_space *mapping;
109 int err, ret; 98 int err, ret;
110 99
111 /* 100 /*
112 * Get mapping and operations from the file in case we have 101 * Get mapping and operations from the file in case we have
113 * as file, or get the default values for them in case we 102 * as file, or get the default values for them in case we
114 * don't have a struct file available. Damn nfsd.. 103 * don't have a struct file available. Damn nfsd..
115 */ 104 */
116 if (file) { 105 if (file) {
117 mapping = file->f_mapping; 106 mapping = file->f_mapping;
118 fop = file->f_op; 107 fop = file->f_op;
119 } else { 108 } else {
120 mapping = dentry->d_inode->i_mapping; 109 mapping = dentry->d_inode->i_mapping;
121 fop = dentry->d_inode->i_fop; 110 fop = dentry->d_inode->i_fop;
122 } 111 }
123 112
124 if (!fop || !fop->fsync) { 113 if (!fop || !fop->fsync) {
125 ret = -EINVAL; 114 ret = -EINVAL;
126 goto out; 115 goto out;
127 } 116 }
128 117
129 ret = filemap_fdatawrite(mapping); 118 ret = filemap_fdatawrite(mapping);
130 119
131 /* 120 /*
132 * We need to protect against concurrent writers, which could cause 121 * We need to protect against concurrent writers, which could cause
133 * livelocks in fsync_buffers_list(). 122 * livelocks in fsync_buffers_list().
134 */ 123 */
135 mutex_lock(&mapping->host->i_mutex); 124 mutex_lock(&mapping->host->i_mutex);
136 err = fop->fsync(file, dentry, datasync); 125 err = fop->fsync(file, dentry, datasync);
137 if (!ret) 126 if (!ret)
138 ret = err; 127 ret = err;
139 mutex_unlock(&mapping->host->i_mutex); 128 mutex_unlock(&mapping->host->i_mutex);
140 err = filemap_fdatawait(mapping); 129 err = filemap_fdatawait(mapping);
141 if (!ret) 130 if (!ret)
142 ret = err; 131 ret = err;
143 out: 132 out:
144 return ret; 133 return ret;
145 } 134 }
146 EXPORT_SYMBOL(vfs_fsync); 135 EXPORT_SYMBOL(vfs_fsync);
147 136
148 static int do_fsync(unsigned int fd, int datasync) 137 static int do_fsync(unsigned int fd, int datasync)
149 { 138 {
150 struct file *file; 139 struct file *file;
151 int ret = -EBADF; 140 int ret = -EBADF;
152 141
153 file = fget(fd); 142 file = fget(fd);
154 if (file) { 143 if (file) {
155 ret = vfs_fsync(file, file->f_path.dentry, datasync); 144 ret = vfs_fsync(file, file->f_path.dentry, datasync);
156 fput(file); 145 fput(file);
157 } 146 }
158 return ret; 147 return ret;
159 } 148 }
160 149
161 SYSCALL_DEFINE1(fsync, unsigned int, fd) 150 SYSCALL_DEFINE1(fsync, unsigned int, fd)
162 { 151 {
163 return do_fsync(fd, 0); 152 return do_fsync(fd, 0);
164 } 153 }
165 154
166 SYSCALL_DEFINE1(fdatasync, unsigned int, fd) 155 SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
167 { 156 {
168 return do_fsync(fd, 1); 157 return do_fsync(fd, 1);
169 } 158 }
170 159
171 /* 160 /*
172 * sys_sync_file_range() permits finely controlled syncing over a segment of 161 * sys_sync_file_range() permits finely controlled syncing over a segment of
173 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is 162 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
174 * zero then sys_sync_file_range() will operate from offset out to EOF. 163 * zero then sys_sync_file_range() will operate from offset out to EOF.
175 * 164 *
176 * The flag bits are: 165 * The flag bits are:
177 * 166 *
178 * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range 167 * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
179 * before performing the write. 168 * before performing the write.
180 * 169 *
181 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the 170 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
182 * range which are not presently under writeback. Note that this may block for 171 * range which are not presently under writeback. Note that this may block for
183 * significant periods due to exhaustion of disk request structures. 172 * significant periods due to exhaustion of disk request structures.
184 * 173 *
185 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range 174 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
186 * after performing the write. 175 * after performing the write.
187 * 176 *
188 * Useful combinations of the flag bits are: 177 * Useful combinations of the flag bits are:
189 * 178 *
190 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages 179 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
191 * in the range which were dirty on entry to sys_sync_file_range() are placed 180 * in the range which were dirty on entry to sys_sync_file_range() are placed
192 * under writeout. This is a start-write-for-data-integrity operation. 181 * under writeout. This is a start-write-for-data-integrity operation.
193 * 182 *
194 * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which 183 * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
195 * are not presently under writeout. This is an asynchronous flush-to-disk 184 * are not presently under writeout. This is an asynchronous flush-to-disk
196 * operation. Not suitable for data integrity operations. 185 * operation. Not suitable for data integrity operations.
197 * 186 *
198 * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for 187 * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
199 * completion of writeout of all pages in the range. This will be used after an 188 * completion of writeout of all pages in the range. This will be used after an
200 * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait 189 * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
201 * for that operation to complete and to return the result. 190 * for that operation to complete and to return the result.
202 * 191 *
203 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER: 192 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
204 * a traditional sync() operation. This is a write-for-data-integrity operation 193 * a traditional sync() operation. This is a write-for-data-integrity operation
205 * which will ensure that all pages in the range which were dirty on entry to 194 * which will ensure that all pages in the range which were dirty on entry to
206 * sys_sync_file_range() are committed to disk. 195 * sys_sync_file_range() are committed to disk.
207 * 196 *
208 * 197 *
209 * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any 198 * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
210 * I/O errors or ENOSPC conditions and will return those to the caller, after 199 * I/O errors or ENOSPC conditions and will return those to the caller, after
211 * clearing the EIO and ENOSPC flags in the address_space. 200 * clearing the EIO and ENOSPC flags in the address_space.
212 * 201 *
213 * It should be noted that none of these operations write out the file's 202 * It should be noted that none of these operations write out the file's
214 * metadata. So unless the application is strictly performing overwrites of 203 * metadata. So unless the application is strictly performing overwrites of
215 * already-instantiated disk blocks, there are no guarantees here that the data 204 * already-instantiated disk blocks, there are no guarantees here that the data
216 * will be available after a crash. 205 * will be available after a crash.
217 */ 206 */
218 SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, 207 SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
219 unsigned int flags) 208 unsigned int flags)
220 { 209 {
221 int ret; 210 int ret;
222 struct file *file; 211 struct file *file;
223 loff_t endbyte; /* inclusive */ 212 loff_t endbyte; /* inclusive */
224 int fput_needed; 213 int fput_needed;
225 umode_t i_mode; 214 umode_t i_mode;
226 215
227 ret = -EINVAL; 216 ret = -EINVAL;
228 if (flags & ~VALID_FLAGS) 217 if (flags & ~VALID_FLAGS)
229 goto out; 218 goto out;
230 219
231 endbyte = offset + nbytes; 220 endbyte = offset + nbytes;
232 221
233 if ((s64)offset < 0) 222 if ((s64)offset < 0)
234 goto out; 223 goto out;
235 if ((s64)endbyte < 0) 224 if ((s64)endbyte < 0)
236 goto out; 225 goto out;
237 if (endbyte < offset) 226 if (endbyte < offset)
238 goto out; 227 goto out;
239 228
240 if (sizeof(pgoff_t) == 4) { 229 if (sizeof(pgoff_t) == 4) {
241 if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { 230 if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
242 /* 231 /*
243 * The range starts outside a 32 bit machine's 232 * The range starts outside a 32 bit machine's
244 * pagecache addressing capabilities. Let it "succeed" 233 * pagecache addressing capabilities. Let it "succeed"
245 */ 234 */
246 ret = 0; 235 ret = 0;
247 goto out; 236 goto out;
248 } 237 }
249 if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { 238 if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
250 /* 239 /*
251 * Out to EOF 240 * Out to EOF
252 */ 241 */
253 nbytes = 0; 242 nbytes = 0;
254 } 243 }
255 } 244 }
256 245
257 if (nbytes == 0) 246 if (nbytes == 0)
258 endbyte = LLONG_MAX; 247 endbyte = LLONG_MAX;
259 else 248 else
260 endbyte--; /* inclusive */ 249 endbyte--; /* inclusive */
261 250
262 ret = -EBADF; 251 ret = -EBADF;
263 file = fget_light(fd, &fput_needed); 252 file = fget_light(fd, &fput_needed);
264 if (!file) 253 if (!file)
265 goto out; 254 goto out;
266 255
267 i_mode = file->f_path.dentry->d_inode->i_mode; 256 i_mode = file->f_path.dentry->d_inode->i_mode;
268 ret = -ESPIPE; 257 ret = -ESPIPE;
269 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 258 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
270 !S_ISLNK(i_mode)) 259 !S_ISLNK(i_mode))
271 goto out_put; 260 goto out_put;
272 261
273 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); 262 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
274 out_put: 263 out_put:
275 fput_light(file, fput_needed); 264 fput_light(file, fput_needed);
276 out: 265 out:
277 return ret; 266 return ret;
278 } 267 }
279 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 268 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
280 asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, 269 asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes,
281 long flags) 270 long flags)
282 { 271 {
283 return SYSC_sync_file_range((int) fd, offset, nbytes, 272 return SYSC_sync_file_range((int) fd, offset, nbytes,
284 (unsigned int) flags); 273 (unsigned int) flags);
285 } 274 }
286 SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); 275 SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range);
287 #endif 276 #endif
288 277
289 /* It would be nice if people remember that not all the world's an i386 278 /* It would be nice if people remember that not all the world's an i386
290 when they introduce new system calls */ 279 when they introduce new system calls */
291 SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, 280 SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags,
292 loff_t offset, loff_t nbytes) 281 loff_t offset, loff_t nbytes)
293 { 282 {
294 return sys_sync_file_range(fd, offset, nbytes, flags); 283 return sys_sync_file_range(fd, offset, nbytes, flags);
295 } 284 }
296 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 285 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
297 asmlinkage long SyS_sync_file_range2(long fd, long flags, 286 asmlinkage long SyS_sync_file_range2(long fd, long flags,
298 loff_t offset, loff_t nbytes) 287 loff_t offset, loff_t nbytes)
299 { 288 {
300 return SYSC_sync_file_range2((int) fd, (unsigned int) flags, 289 return SYSC_sync_file_range2((int) fd, (unsigned int) flags,
301 offset, nbytes); 290 offset, nbytes);
302 } 291 }
303 SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); 292 SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
304 #endif 293 #endif
305 294
306 /* 295 /*
307 * `endbyte' is inclusive 296 * `endbyte' is inclusive
308 */ 297 */
309 int do_sync_mapping_range(struct address_space *mapping, loff_t offset, 298 int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
310 loff_t endbyte, unsigned int flags) 299 loff_t endbyte, unsigned int flags)
311 { 300 {
312 int ret; 301 int ret;
313 302
314 if (!mapping) { 303 if (!mapping) {
315 ret = -EINVAL; 304 ret = -EINVAL;
316 goto out; 305 goto out;
317 } 306 }
318 307
319 ret = 0; 308 ret = 0;
320 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { 309 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
321 ret = wait_on_page_writeback_range(mapping, 310 ret = wait_on_page_writeback_range(mapping,
322 offset >> PAGE_CACHE_SHIFT, 311 offset >> PAGE_CACHE_SHIFT,
323 endbyte >> PAGE_CACHE_SHIFT); 312 endbyte >> PAGE_CACHE_SHIFT);
324 if (ret < 0) 313 if (ret < 0)
325 goto out; 314 goto out;
326 } 315 }
327 316
328 if (flags & SYNC_FILE_RANGE_WRITE) { 317 if (flags & SYNC_FILE_RANGE_WRITE) {
329 ret = __filemap_fdatawrite_range(mapping, offset, endbyte, 318 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
330 WB_SYNC_ALL); 319 WB_SYNC_ALL);
331 if (ret < 0) 320 if (ret < 0)
332 goto out; 321 goto out;
333 } 322 }
334 323
335 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) { 324 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
336 ret = wait_on_page_writeback_range(mapping, 325 ret = wait_on_page_writeback_range(mapping,
337 offset >> PAGE_CACHE_SHIFT, 326 offset >> PAGE_CACHE_SHIFT,
338 endbyte >> PAGE_CACHE_SHIFT); 327 endbyte >> PAGE_CACHE_SHIFT);
1 #ifndef _LINUX_FS_H 1 #ifndef _LINUX_FS_H
2 #define _LINUX_FS_H 2 #define _LINUX_FS_H
3 3
4 /* 4 /*
5 * This file has definitions for some important file table 5 * This file has definitions for some important file table
6 * structures etc. 6 * structures etc.
7 */ 7 */
8 8
9 #include <linux/limits.h> 9 #include <linux/limits.h>
10 #include <linux/ioctl.h> 10 #include <linux/ioctl.h>
11 11
12 /* 12 /*
13 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change 13 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
14 * the file limit at runtime and only root can increase the per-process 14 * the file limit at runtime and only root can increase the per-process
15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
16 * upper limit on files-per-process. 16 * upper limit on files-per-process.
17 * 17 *
18 * Some programs (notably those using select()) may have to be 18 * Some programs (notably those using select()) may have to be
19 * recompiled to take full advantage of the new limits.. 19 * recompiled to take full advantage of the new limits..
20 */ 20 */
21 21
22 /* Fixed constants first: */ 22 /* Fixed constants first: */
23 #undef NR_OPEN 23 #undef NR_OPEN
24 #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ 24 #define INR_OPEN 1024 /* Initial setting for nfile rlimits */
25 25
26 #define BLOCK_SIZE_BITS 10 26 #define BLOCK_SIZE_BITS 10
27 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) 27 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
28 28
29 #define SEEK_SET 0 /* seek relative to beginning of file */ 29 #define SEEK_SET 0 /* seek relative to beginning of file */
30 #define SEEK_CUR 1 /* seek relative to current file position */ 30 #define SEEK_CUR 1 /* seek relative to current file position */
31 #define SEEK_END 2 /* seek relative to end of file */ 31 #define SEEK_END 2 /* seek relative to end of file */
32 #define SEEK_MAX SEEK_END 32 #define SEEK_MAX SEEK_END
33 33
34 /* And dynamically-tunable limits and defaults: */ 34 /* And dynamically-tunable limits and defaults: */
35 struct files_stat_struct { 35 struct files_stat_struct {
36 int nr_files; /* read only */ 36 int nr_files; /* read only */
37 int nr_free_files; /* read only */ 37 int nr_free_files; /* read only */
38 int max_files; /* tunable */ 38 int max_files; /* tunable */
39 }; 39 };
40 40
41 struct inodes_stat_t { 41 struct inodes_stat_t {
42 int nr_inodes; 42 int nr_inodes;
43 int nr_unused; 43 int nr_unused;
44 int dummy[5]; /* padding for sysctl ABI compatibility */ 44 int dummy[5]; /* padding for sysctl ABI compatibility */
45 }; 45 };
46 46
47 47
48 #define NR_FILE 8192 /* this can well be larger on a larger system */ 48 #define NR_FILE 8192 /* this can well be larger on a larger system */
49 49
50 #define MAY_EXEC 1 50 #define MAY_EXEC 1
51 #define MAY_WRITE 2 51 #define MAY_WRITE 2
52 #define MAY_READ 4 52 #define MAY_READ 4
53 #define MAY_APPEND 8 53 #define MAY_APPEND 8
54 #define MAY_ACCESS 16 54 #define MAY_ACCESS 16
55 #define MAY_OPEN 32 55 #define MAY_OPEN 32
56 56
57 /* 57 /*
58 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond 58 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
59 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() 59 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
60 */ 60 */
61 61
62 /* file is open for reading */ 62 /* file is open for reading */
63 #define FMODE_READ ((__force fmode_t)1) 63 #define FMODE_READ ((__force fmode_t)1)
64 /* file is open for writing */ 64 /* file is open for writing */
65 #define FMODE_WRITE ((__force fmode_t)2) 65 #define FMODE_WRITE ((__force fmode_t)2)
66 /* file is seekable */ 66 /* file is seekable */
67 #define FMODE_LSEEK ((__force fmode_t)4) 67 #define FMODE_LSEEK ((__force fmode_t)4)
68 /* file can be accessed using pread */ 68 /* file can be accessed using pread */
69 #define FMODE_PREAD ((__force fmode_t)8) 69 #define FMODE_PREAD ((__force fmode_t)8)
70 /* file can be accessed using pwrite */ 70 /* file can be accessed using pwrite */
71 #define FMODE_PWRITE ((__force fmode_t)16) 71 #define FMODE_PWRITE ((__force fmode_t)16)
72 /* File is opened for execution with sys_execve / sys_uselib */ 72 /* File is opened for execution with sys_execve / sys_uselib */
73 #define FMODE_EXEC ((__force fmode_t)32) 73 #define FMODE_EXEC ((__force fmode_t)32)
74 /* File is opened with O_NDELAY (only set for block devices) */ 74 /* File is opened with O_NDELAY (only set for block devices) */
75 #define FMODE_NDELAY ((__force fmode_t)64) 75 #define FMODE_NDELAY ((__force fmode_t)64)
76 /* File is opened with O_EXCL (only set for block devices) */ 76 /* File is opened with O_EXCL (only set for block devices) */
77 #define FMODE_EXCL ((__force fmode_t)128) 77 #define FMODE_EXCL ((__force fmode_t)128)
78 /* File is opened using open(.., 3, ..) and is writeable only for ioctls 78 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
79 (specialy hack for floppy.c) */ 79 (specialy hack for floppy.c) */
80 #define FMODE_WRITE_IOCTL ((__force fmode_t)256) 80 #define FMODE_WRITE_IOCTL ((__force fmode_t)256)
81 81
82 /* 82 /*
83 * Don't update ctime and mtime. 83 * Don't update ctime and mtime.
84 * 84 *
85 * Currently a special hack for the XFS open_by_handle ioctl, but we'll 85 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
86 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. 86 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
87 */ 87 */
88 #define FMODE_NOCMTIME ((__force fmode_t)2048) 88 #define FMODE_NOCMTIME ((__force fmode_t)2048)
89 89
90 /* 90 /*
91 * The below are the various read and write types that we support. Some of 91 * The below are the various read and write types that we support. Some of
92 * them include behavioral modifiers that send information down to the 92 * them include behavioral modifiers that send information down to the
93 * block layer and IO scheduler. Terminology: 93 * block layer and IO scheduler. Terminology:
94 * 94 *
95 * The block layer uses device plugging to defer IO a little bit, in 95 * The block layer uses device plugging to defer IO a little bit, in
96 * the hope that we will see more IO very shortly. This increases 96 * the hope that we will see more IO very shortly. This increases
97 * coalescing of adjacent IO and thus reduces the number of IOs we 97 * coalescing of adjacent IO and thus reduces the number of IOs we
98 * have to send to the device. It also allows for better queuing, 98 * have to send to the device. It also allows for better queuing,
99 * if the IO isn't mergeable. If the caller is going to be waiting 99 * if the IO isn't mergeable. If the caller is going to be waiting
100 * for the IO, then he must ensure that the device is unplugged so 100 * for the IO, then he must ensure that the device is unplugged so
101 * that the IO is dispatched to the driver. 101 * that the IO is dispatched to the driver.
102 * 102 *
103 * All IO is handled async in Linux. This is fine for background 103 * All IO is handled async in Linux. This is fine for background
104 * writes, but for reads or writes that someone waits for completion 104 * writes, but for reads or writes that someone waits for completion
105 * on, we want to notify the block layer and IO scheduler so that they 105 * on, we want to notify the block layer and IO scheduler so that they
106 * know about it. That allows them to make better scheduling 106 * know about it. That allows them to make better scheduling
107 * decisions. So when the below references 'sync' and 'async', it 107 * decisions. So when the below references 'sync' and 'async', it
108 * is referencing this priority hint. 108 * is referencing this priority hint.
109 * 109 *
110 * With that in mind, the available types are: 110 * With that in mind, the available types are:
111 * 111 *
112 * READ A normal read operation. Device will be plugged. 112 * READ A normal read operation. Device will be plugged.
113 * READ_SYNC A synchronous read. Device is not plugged, caller can 113 * READ_SYNC A synchronous read. Device is not plugged, caller can
114 * immediately wait on this read without caring about 114 * immediately wait on this read without caring about
115 * unplugging. 115 * unplugging.
116 * READA Used for read-ahead operations. Lower priority, and the 116 * READA Used for read-ahead operations. Lower priority, and the
117 * block layer could (in theory) choose to ignore this 117 * block layer could (in theory) choose to ignore this
118 * request if it runs into resource problems. 118 * request if it runs into resource problems.
119 * WRITE A normal async write. Device will be plugged. 119 * WRITE A normal async write. Device will be plugged.
120 * SWRITE Like WRITE, but a special case for ll_rw_block() that 120 * SWRITE Like WRITE, but a special case for ll_rw_block() that
121 * tells it to lock the buffer first. Normally a buffer 121 * tells it to lock the buffer first. Normally a buffer
122 * must be locked before doing IO. 122 * must be locked before doing IO.
123 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down 123 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
124 * the hint that someone will be waiting on this IO 124 * the hint that someone will be waiting on this IO
125 * shortly. The device must still be unplugged explicitly, 125 * shortly. The device must still be unplugged explicitly,
126 * WRITE_SYNC_PLUG does not do this as we could be 126 * WRITE_SYNC_PLUG does not do this as we could be
127 * submitting more writes before we actually wait on any 127 * submitting more writes before we actually wait on any
128 * of them. 128 * of them.
129 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device 129 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
130 * immediately after submission. The write equivalent 130 * immediately after submission. The write equivalent
131 * of READ_SYNC. 131 * of READ_SYNC.
132 * WRITE_ODIRECT Special case write for O_DIRECT only. 132 * WRITE_ODIRECT Special case write for O_DIRECT only.
133 * SWRITE_SYNC 133 * SWRITE_SYNC
134 * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. 134 * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
135 * See SWRITE. 135 * See SWRITE.
136 * WRITE_BARRIER Like WRITE, but tells the block layer that all 136 * WRITE_BARRIER Like WRITE, but tells the block layer that all
137 * previously submitted writes must be safely on storage 137 * previously submitted writes must be safely on storage
138 * before this one is started. Also guarantees that when 138 * before this one is started. Also guarantees that when
139 * this write is complete, it itself is also safely on 139 * this write is complete, it itself is also safely on
140 * storage. Prevents reordering of writes on both sides 140 * storage. Prevents reordering of writes on both sides
141 * of this IO. 141 * of this IO.
142 * 142 *
143 */ 143 */
144 #define RW_MASK 1 144 #define RW_MASK 1
145 #define RWA_MASK 2 145 #define RWA_MASK 2
146 #define READ 0 146 #define READ 0
147 #define WRITE 1 147 #define WRITE 1
148 #define READA 2 /* read-ahead - don't block if no resources */ 148 #define READA 2 /* read-ahead - don't block if no resources */
149 #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ 149 #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
150 #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) 150 #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
151 #define READ_META (READ | (1 << BIO_RW_META)) 151 #define READ_META (READ | (1 << BIO_RW_META))
152 #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) 152 #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
153 #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) 153 #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
154 #define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) 154 #define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
155 #define SWRITE_SYNC_PLUG \ 155 #define SWRITE_SYNC_PLUG \
156 (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) 156 (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
157 #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) 157 #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
158 #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) 158 #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
159 159
160 /* 160 /*
161 * These aren't really reads or writes, they pass down information about 161 * These aren't really reads or writes, they pass down information about
162 * parts of device that are now unused by the file system. 162 * parts of device that are now unused by the file system.
163 */ 163 */
164 #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) 164 #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
165 #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) 165 #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
166 166
167 #define SEL_IN 1 167 #define SEL_IN 1
168 #define SEL_OUT 2 168 #define SEL_OUT 2
169 #define SEL_EX 4 169 #define SEL_EX 4
170 170
171 /* public flags for file_system_type */ 171 /* public flags for file_system_type */
172 #define FS_REQUIRES_DEV 1 172 #define FS_REQUIRES_DEV 1
173 #define FS_BINARY_MOUNTDATA 2 173 #define FS_BINARY_MOUNTDATA 2
174 #define FS_HAS_SUBTYPE 4 174 #define FS_HAS_SUBTYPE 4
175 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 175 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
176 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() 176 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
177 * during rename() internally. 177 * during rename() internally.
178 */ 178 */
179 179
180 /* 180 /*
181 * These are the fs-independent mount-flags: up to 32 flags are supported 181 * These are the fs-independent mount-flags: up to 32 flags are supported
182 */ 182 */
183 #define MS_RDONLY 1 /* Mount read-only */ 183 #define MS_RDONLY 1 /* Mount read-only */
184 #define MS_NOSUID 2 /* Ignore suid and sgid bits */ 184 #define MS_NOSUID 2 /* Ignore suid and sgid bits */
185 #define MS_NODEV 4 /* Disallow access to device special files */ 185 #define MS_NODEV 4 /* Disallow access to device special files */
186 #define MS_NOEXEC 8 /* Disallow program execution */ 186 #define MS_NOEXEC 8 /* Disallow program execution */
187 #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ 187 #define MS_SYNCHRONOUS 16 /* Writes are synced at once */
188 #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ 188 #define MS_REMOUNT 32 /* Alter flags of a mounted FS */
189 #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ 189 #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
190 #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ 190 #define MS_DIRSYNC 128 /* Directory modifications are synchronous */
191 #define MS_NOATIME 1024 /* Do not update access times. */ 191 #define MS_NOATIME 1024 /* Do not update access times. */
192 #define MS_NODIRATIME 2048 /* Do not update directory access times */ 192 #define MS_NODIRATIME 2048 /* Do not update directory access times */
193 #define MS_BIND 4096 193 #define MS_BIND 4096
194 #define MS_MOVE 8192 194 #define MS_MOVE 8192
195 #define MS_REC 16384 195 #define MS_REC 16384
196 #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. 196 #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
197 MS_VERBOSE is deprecated. */ 197 MS_VERBOSE is deprecated. */
198 #define MS_SILENT 32768 198 #define MS_SILENT 32768
199 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ 199 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
200 #define MS_UNBINDABLE (1<<17) /* change to unbindable */ 200 #define MS_UNBINDABLE (1<<17) /* change to unbindable */
201 #define MS_PRIVATE (1<<18) /* change to private */ 201 #define MS_PRIVATE (1<<18) /* change to private */
202 #define MS_SLAVE (1<<19) /* change to slave */ 202 #define MS_SLAVE (1<<19) /* change to slave */
203 #define MS_SHARED (1<<20) /* change to shared */ 203 #define MS_SHARED (1<<20) /* change to shared */
204 #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ 204 #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
205 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ 205 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
206 #define MS_I_VERSION (1<<23) /* Update inode I_version field */ 206 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
207 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ 207 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
208 #define MS_ACTIVE (1<<30) 208 #define MS_ACTIVE (1<<30)
209 #define MS_NOUSER (1<<31) 209 #define MS_NOUSER (1<<31)
210 210
211 /* 211 /*
212 * Superblock flags that can be altered by MS_REMOUNT 212 * Superblock flags that can be altered by MS_REMOUNT
213 */ 213 */
214 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) 214 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
215 215
216 /* 216 /*
217 * Old magic mount flag and mask 217 * Old magic mount flag and mask
218 */ 218 */
219 #define MS_MGC_VAL 0xC0ED0000 219 #define MS_MGC_VAL 0xC0ED0000
220 #define MS_MGC_MSK 0xffff0000 220 #define MS_MGC_MSK 0xffff0000
221 221
222 /* Inode flags - they have nothing to superblock flags now */ 222 /* Inode flags - they have nothing to superblock flags now */
223 223
224 #define S_SYNC 1 /* Writes are synced at once */ 224 #define S_SYNC 1 /* Writes are synced at once */
225 #define S_NOATIME 2 /* Do not update access times */ 225 #define S_NOATIME 2 /* Do not update access times */
226 #define S_APPEND 4 /* Append-only file */ 226 #define S_APPEND 4 /* Append-only file */
227 #define S_IMMUTABLE 8 /* Immutable file */ 227 #define S_IMMUTABLE 8 /* Immutable file */
228 #define S_DEAD 16 /* removed, but still open directory */ 228 #define S_DEAD 16 /* removed, but still open directory */
229 #define S_NOQUOTA 32 /* Inode is not counted to quota */ 229 #define S_NOQUOTA 32 /* Inode is not counted to quota */
230 #define S_DIRSYNC 64 /* Directory modifications are synchronous */ 230 #define S_DIRSYNC 64 /* Directory modifications are synchronous */
231 #define S_NOCMTIME 128 /* Do not update file c/mtime */ 231 #define S_NOCMTIME 128 /* Do not update file c/mtime */
232 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ 232 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
233 #define S_PRIVATE 512 /* Inode is fs-internal */ 233 #define S_PRIVATE 512 /* Inode is fs-internal */
234 234
235 /* 235 /*
236 * Note that nosuid etc flags are inode-specific: setting some file-system 236 * Note that nosuid etc flags are inode-specific: setting some file-system
237 * flags just means all the inodes inherit those flags by default. It might be 237 * flags just means all the inodes inherit those flags by default. It might be
238 * possible to override it selectively if you really wanted to with some 238 * possible to override it selectively if you really wanted to with some
239 * ioctl() that is not currently implemented. 239 * ioctl() that is not currently implemented.
240 * 240 *
241 * Exception: MS_RDONLY is always applied to the entire file system. 241 * Exception: MS_RDONLY is always applied to the entire file system.
242 * 242 *
243 * Unfortunately, it is possible to change a filesystems flags with it mounted 243 * Unfortunately, it is possible to change a filesystems flags with it mounted
244 * with files in use. This means that all of the inodes will not have their 244 * with files in use. This means that all of the inodes will not have their
245 * i_flags updated. Hence, i_flags no longer inherit the superblock mount 245 * i_flags updated. Hence, i_flags no longer inherit the superblock mount
246 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org 246 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
247 */ 247 */
248 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) 248 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))
249 249
250 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) 250 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
251 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ 251 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \
252 ((inode)->i_flags & S_SYNC)) 252 ((inode)->i_flags & S_SYNC))
253 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ 253 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
254 ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) 254 ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
255 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) 255 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
256 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) 256 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
257 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) 257 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
258 258
259 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) 259 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
260 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) 260 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
261 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) 261 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
262 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) 262 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
263 263
264 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) 264 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
265 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) 265 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
266 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) 266 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
267 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) 267 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
268 268
269 /* the read-only stuff doesn't really belong here, but any other place is 269 /* the read-only stuff doesn't really belong here, but any other place is
270 probably as bad and I don't want to create yet another include file. */ 270 probably as bad and I don't want to create yet another include file. */
271 271
272 #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ 272 #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
273 #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ 273 #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
274 #define BLKRRPART _IO(0x12,95) /* re-read partition table */ 274 #define BLKRRPART _IO(0x12,95) /* re-read partition table */
275 #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ 275 #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
276 #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ 276 #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
277 #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ 277 #define BLKRASET _IO(0x12,98) /* set read ahead for block device */
278 #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ 278 #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */
279 #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ 279 #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
280 #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ 280 #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
281 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ 281 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
282 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ 282 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
283 #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ 283 #define BLKSSZGET _IO(0x12,104)/* get block device sector size */
284 #if 0 284 #if 0
285 #define BLKPG _IO(0x12,105)/* See blkpg.h */ 285 #define BLKPG _IO(0x12,105)/* See blkpg.h */
286 286
287 /* Some people are morons. Do not use sizeof! */ 287 /* Some people are morons. Do not use sizeof! */
288 288
289 #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ 289 #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */
290 #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ 290 #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */
291 /* This was here just to show that the number is taken - 291 /* This was here just to show that the number is taken -
292 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ 292 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
293 #endif 293 #endif
294 /* A jump here: 108-111 have been used for various private purposes. */ 294 /* A jump here: 108-111 have been used for various private purposes. */
295 #define BLKBSZGET _IOR(0x12,112,size_t) 295 #define BLKBSZGET _IOR(0x12,112,size_t)
296 #define BLKBSZSET _IOW(0x12,113,size_t) 296 #define BLKBSZSET _IOW(0x12,113,size_t)
297 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ 297 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
298 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) 298 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
299 #define BLKTRACESTART _IO(0x12,116) 299 #define BLKTRACESTART _IO(0x12,116)
300 #define BLKTRACESTOP _IO(0x12,117) 300 #define BLKTRACESTOP _IO(0x12,117)
301 #define BLKTRACETEARDOWN _IO(0x12,118) 301 #define BLKTRACETEARDOWN _IO(0x12,118)
302 #define BLKDISCARD _IO(0x12,119) 302 #define BLKDISCARD _IO(0x12,119)
303 303
304 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 304 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
305 #define FIBMAP _IO(0x00,1) /* bmap access */ 305 #define FIBMAP _IO(0x00,1) /* bmap access */
306 #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ 306 #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
307 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 307 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */
308 #define FITHAW _IOWR('X', 120, int) /* Thaw */ 308 #define FITHAW _IOWR('X', 120, int) /* Thaw */
309 309
310 #define FS_IOC_GETFLAGS _IOR('f', 1, long) 310 #define FS_IOC_GETFLAGS _IOR('f', 1, long)
311 #define FS_IOC_SETFLAGS _IOW('f', 2, long) 311 #define FS_IOC_SETFLAGS _IOW('f', 2, long)
312 #define FS_IOC_GETVERSION _IOR('v', 1, long) 312 #define FS_IOC_GETVERSION _IOR('v', 1, long)
313 #define FS_IOC_SETVERSION _IOW('v', 2, long) 313 #define FS_IOC_SETVERSION _IOW('v', 2, long)
314 #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) 314 #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
315 #define FS_IOC32_GETFLAGS _IOR('f', 1, int) 315 #define FS_IOC32_GETFLAGS _IOR('f', 1, int)
316 #define FS_IOC32_SETFLAGS _IOW('f', 2, int) 316 #define FS_IOC32_SETFLAGS _IOW('f', 2, int)
317 #define FS_IOC32_GETVERSION _IOR('v', 1, int) 317 #define FS_IOC32_GETVERSION _IOR('v', 1, int)
318 #define FS_IOC32_SETVERSION _IOW('v', 2, int) 318 #define FS_IOC32_SETVERSION _IOW('v', 2, int)
319 319
320 /* 320 /*
321 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) 321 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
322 */ 322 */
323 #define FS_SECRM_FL 0x00000001 /* Secure deletion */ 323 #define FS_SECRM_FL 0x00000001 /* Secure deletion */
324 #define FS_UNRM_FL 0x00000002 /* Undelete */ 324 #define FS_UNRM_FL 0x00000002 /* Undelete */
325 #define FS_COMPR_FL 0x00000004 /* Compress file */ 325 #define FS_COMPR_FL 0x00000004 /* Compress file */
326 #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ 326 #define FS_SYNC_FL 0x00000008 /* Synchronous updates */
327 #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ 327 #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
328 #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ 328 #define FS_APPEND_FL 0x00000020 /* writes to file may only append */
329 #define FS_NODUMP_FL 0x00000040 /* do not dump file */ 329 #define FS_NODUMP_FL 0x00000040 /* do not dump file */
330 #define FS_NOATIME_FL 0x00000080 /* do not update atime */ 330 #define FS_NOATIME_FL 0x00000080 /* do not update atime */
331 /* Reserved for compression usage... */ 331 /* Reserved for compression usage... */
332 #define FS_DIRTY_FL 0x00000100 332 #define FS_DIRTY_FL 0x00000100
333 #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ 333 #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
334 #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ 334 #define FS_NOCOMP_FL 0x00000400 /* Don't compress */
335 #define FS_ECOMPR_FL 0x00000800 /* Compression error */ 335 #define FS_ECOMPR_FL 0x00000800 /* Compression error */
336 /* End compression flags --- maybe not all used */ 336 /* End compression flags --- maybe not all used */
337 #define FS_BTREE_FL 0x00001000 /* btree format dir */ 337 #define FS_BTREE_FL 0x00001000 /* btree format dir */
338 #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ 338 #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
339 #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ 339 #define FS_IMAGIC_FL 0x00002000 /* AFS directory */
340 #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ 340 #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
341 #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ 341 #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
342 #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ 342 #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
343 #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 343 #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
344 #define FS_EXTENT_FL 0x00080000 /* Extents */ 344 #define FS_EXTENT_FL 0x00080000 /* Extents */
345 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ 345 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
346 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ 346 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
347 347
348 #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ 348 #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
349 #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 349 #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
350 350
351 351
352 #define SYNC_FILE_RANGE_WAIT_BEFORE 1 352 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
353 #define SYNC_FILE_RANGE_WRITE 2 353 #define SYNC_FILE_RANGE_WRITE 2
354 #define SYNC_FILE_RANGE_WAIT_AFTER 4 354 #define SYNC_FILE_RANGE_WAIT_AFTER 4
355 355
356 #ifdef __KERNEL__ 356 #ifdef __KERNEL__
357 357
358 #include <linux/linkage.h> 358 #include <linux/linkage.h>
359 #include <linux/wait.h> 359 #include <linux/wait.h>
360 #include <linux/types.h> 360 #include <linux/types.h>
361 #include <linux/kdev_t.h> 361 #include <linux/kdev_t.h>
362 #include <linux/dcache.h> 362 #include <linux/dcache.h>
363 #include <linux/path.h> 363 #include <linux/path.h>
364 #include <linux/stat.h> 364 #include <linux/stat.h>
365 #include <linux/cache.h> 365 #include <linux/cache.h>
366 #include <linux/kobject.h> 366 #include <linux/kobject.h>
367 #include <linux/list.h> 367 #include <linux/list.h>
368 #include <linux/radix-tree.h> 368 #include <linux/radix-tree.h>
369 #include <linux/prio_tree.h> 369 #include <linux/prio_tree.h>
370 #include <linux/init.h> 370 #include <linux/init.h>
371 #include <linux/pid.h> 371 #include <linux/pid.h>
372 #include <linux/mutex.h> 372 #include <linux/mutex.h>
373 #include <linux/capability.h> 373 #include <linux/capability.h>
374 #include <linux/semaphore.h> 374 #include <linux/semaphore.h>
375 #include <linux/fiemap.h> 375 #include <linux/fiemap.h>
376 376
377 #include <asm/atomic.h> 377 #include <asm/atomic.h>
378 #include <asm/byteorder.h> 378 #include <asm/byteorder.h>
379 379
380 struct export_operations; 380 struct export_operations;
381 struct hd_geometry; 381 struct hd_geometry;
382 struct iovec; 382 struct iovec;
383 struct nameidata; 383 struct nameidata;
384 struct kiocb; 384 struct kiocb;
385 struct pipe_inode_info; 385 struct pipe_inode_info;
386 struct poll_table_struct; 386 struct poll_table_struct;
387 struct kstatfs; 387 struct kstatfs;
388 struct vm_area_struct; 388 struct vm_area_struct;
389 struct vfsmount; 389 struct vfsmount;
390 struct cred; 390 struct cred;
391 391
392 extern void __init inode_init(void); 392 extern void __init inode_init(void);
393 extern void __init inode_init_early(void); 393 extern void __init inode_init_early(void);
394 extern void __init files_init(unsigned long); 394 extern void __init files_init(unsigned long);
395 395
396 extern struct files_stat_struct files_stat; 396 extern struct files_stat_struct files_stat;
397 extern int get_max_files(void); 397 extern int get_max_files(void);
398 extern int sysctl_nr_open; 398 extern int sysctl_nr_open;
399 extern struct inodes_stat_t inodes_stat; 399 extern struct inodes_stat_t inodes_stat;
400 extern int leases_enable, lease_break_time; 400 extern int leases_enable, lease_break_time;
401 #ifdef CONFIG_DNOTIFY 401 #ifdef CONFIG_DNOTIFY
402 extern int dir_notify_enable; 402 extern int dir_notify_enable;
403 #endif 403 #endif
404 404
405 struct buffer_head; 405 struct buffer_head;
406 typedef int (get_block_t)(struct inode *inode, sector_t iblock, 406 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
407 struct buffer_head *bh_result, int create); 407 struct buffer_head *bh_result, int create);
408 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, 408 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
409 ssize_t bytes, void *private); 409 ssize_t bytes, void *private);
410 410
411 /* 411 /*
412 * Attribute flags. These should be or-ed together to figure out what 412 * Attribute flags. These should be or-ed together to figure out what
413 * has been changed! 413 * has been changed!
414 */ 414 */
415 #define ATTR_MODE (1 << 0) 415 #define ATTR_MODE (1 << 0)
416 #define ATTR_UID (1 << 1) 416 #define ATTR_UID (1 << 1)
417 #define ATTR_GID (1 << 2) 417 #define ATTR_GID (1 << 2)
418 #define ATTR_SIZE (1 << 3) 418 #define ATTR_SIZE (1 << 3)
419 #define ATTR_ATIME (1 << 4) 419 #define ATTR_ATIME (1 << 4)
420 #define ATTR_MTIME (1 << 5) 420 #define ATTR_MTIME (1 << 5)
421 #define ATTR_CTIME (1 << 6) 421 #define ATTR_CTIME (1 << 6)
422 #define ATTR_ATIME_SET (1 << 7) 422 #define ATTR_ATIME_SET (1 << 7)
423 #define ATTR_MTIME_SET (1 << 8) 423 #define ATTR_MTIME_SET (1 << 8)
424 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ 424 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
425 #define ATTR_ATTR_FLAG (1 << 10) 425 #define ATTR_ATTR_FLAG (1 << 10)
426 #define ATTR_KILL_SUID (1 << 11) 426 #define ATTR_KILL_SUID (1 << 11)
427 #define ATTR_KILL_SGID (1 << 12) 427 #define ATTR_KILL_SGID (1 << 12)
428 #define ATTR_FILE (1 << 13) 428 #define ATTR_FILE (1 << 13)
429 #define ATTR_KILL_PRIV (1 << 14) 429 #define ATTR_KILL_PRIV (1 << 14)
430 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ 430 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
431 #define ATTR_TIMES_SET (1 << 16) 431 #define ATTR_TIMES_SET (1 << 16)
432 432
433 /* 433 /*
434 * This is the Inode Attributes structure, used for notify_change(). It 434 * This is the Inode Attributes structure, used for notify_change(). It
435 * uses the above definitions as flags, to know which values have changed. 435 * uses the above definitions as flags, to know which values have changed.
436 * Also, in this manner, a Filesystem can look at only the values it cares 436 * Also, in this manner, a Filesystem can look at only the values it cares
437 * about. Basically, these are the attributes that the VFS layer can 437 * about. Basically, these are the attributes that the VFS layer can
438 * request to change from the FS layer. 438 * request to change from the FS layer.
439 * 439 *
440 * Derek Atkins <warlord@MIT.EDU> 94-10-20 440 * Derek Atkins <warlord@MIT.EDU> 94-10-20
441 */ 441 */
442 struct iattr { 442 struct iattr {
443 unsigned int ia_valid; 443 unsigned int ia_valid;
444 umode_t ia_mode; 444 umode_t ia_mode;
445 uid_t ia_uid; 445 uid_t ia_uid;
446 gid_t ia_gid; 446 gid_t ia_gid;
447 loff_t ia_size; 447 loff_t ia_size;
448 struct timespec ia_atime; 448 struct timespec ia_atime;
449 struct timespec ia_mtime; 449 struct timespec ia_mtime;
450 struct timespec ia_ctime; 450 struct timespec ia_ctime;
451 451
452 /* 452 /*
453 * Not an attribute, but an auxilary info for filesystems wanting to 453 * Not an attribute, but an auxilary info for filesystems wanting to
454 * implement an ftruncate() like method. NOTE: filesystem should 454 * implement an ftruncate() like method. NOTE: filesystem should
455 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). 455 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
456 */ 456 */
457 struct file *ia_file; 457 struct file *ia_file;
458 }; 458 };
459 459
460 /* 460 /*
461 * Includes for diskquotas. 461 * Includes for diskquotas.
462 */ 462 */
463 #include <linux/quota.h> 463 #include <linux/quota.h>
464 464
465 /** 465 /**
466 * enum positive_aop_returns - aop return codes with specific semantics 466 * enum positive_aop_returns - aop return codes with specific semantics
467 * 467 *
468 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 468 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
469 * completed, that the page is still locked, and 469 * completed, that the page is still locked, and
470 * should be considered active. The VM uses this hint 470 * should be considered active. The VM uses this hint
471 * to return the page to the active list -- it won't 471 * to return the page to the active list -- it won't
472 * be a candidate for writeback again in the near 472 * be a candidate for writeback again in the near
473 * future. Other callers must be careful to unlock 473 * future. Other callers must be careful to unlock
474 * the page if they get this return. Returned by 474 * the page if they get this return. Returned by
475 * writepage(); 475 * writepage();
476 * 476 *
477 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 477 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
478 * unlocked it and the page might have been truncated. 478 * unlocked it and the page might have been truncated.
479 * The caller should back up to acquiring a new page and 479 * The caller should back up to acquiring a new page and
480 * trying again. The aop will be taking reasonable 480 * trying again. The aop will be taking reasonable
481 * precautions not to livelock. If the caller held a page 481 * precautions not to livelock. If the caller held a page
482 * reference, it should drop it before retrying. Returned 482 * reference, it should drop it before retrying. Returned
483 * by readpage(). 483 * by readpage().
484 * 484 *
485 * address_space_operation functions return these large constants to indicate 485 * address_space_operation functions return these large constants to indicate
486 * special semantics to the caller. These are much larger than the bytes in a 486 * special semantics to the caller. These are much larger than the bytes in a
487 * page to allow for functions that return the number of bytes operated on in a 487 * page to allow for functions that return the number of bytes operated on in a
488 * given page. 488 * given page.
489 */ 489 */
490 490
491 enum positive_aop_returns { 491 enum positive_aop_returns {
492 AOP_WRITEPAGE_ACTIVATE = 0x80000, 492 AOP_WRITEPAGE_ACTIVATE = 0x80000,
493 AOP_TRUNCATED_PAGE = 0x80001, 493 AOP_TRUNCATED_PAGE = 0x80001,
494 }; 494 };
495 495
496 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ 496 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
497 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ 497 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
498 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct 498 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
499 * helper code (eg buffer layer) 499 * helper code (eg buffer layer)
500 * to clear GFP_FS from alloc */ 500 * to clear GFP_FS from alloc */
501 501
502 /* 502 /*
503 * oh the beauties of C type declarations. 503 * oh the beauties of C type declarations.
504 */ 504 */
505 struct page; 505 struct page;
506 struct address_space; 506 struct address_space;
507 struct writeback_control; 507 struct writeback_control;
508 508
509 struct iov_iter { 509 struct iov_iter {
510 const struct iovec *iov; 510 const struct iovec *iov;
511 unsigned long nr_segs; 511 unsigned long nr_segs;
512 size_t iov_offset; 512 size_t iov_offset;
513 size_t count; 513 size_t count;
514 }; 514 };
515 515
516 size_t iov_iter_copy_from_user_atomic(struct page *page, 516 size_t iov_iter_copy_from_user_atomic(struct page *page,
517 struct iov_iter *i, unsigned long offset, size_t bytes); 517 struct iov_iter *i, unsigned long offset, size_t bytes);
518 size_t iov_iter_copy_from_user(struct page *page, 518 size_t iov_iter_copy_from_user(struct page *page,
519 struct iov_iter *i, unsigned long offset, size_t bytes); 519 struct iov_iter *i, unsigned long offset, size_t bytes);
520 void iov_iter_advance(struct iov_iter *i, size_t bytes); 520 void iov_iter_advance(struct iov_iter *i, size_t bytes);
521 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); 521 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
522 size_t iov_iter_single_seg_count(struct iov_iter *i); 522 size_t iov_iter_single_seg_count(struct iov_iter *i);
523 523
524 static inline void iov_iter_init(struct iov_iter *i, 524 static inline void iov_iter_init(struct iov_iter *i,
525 const struct iovec *iov, unsigned long nr_segs, 525 const struct iovec *iov, unsigned long nr_segs,
526 size_t count, size_t written) 526 size_t count, size_t written)
527 { 527 {
528 i->iov = iov; 528 i->iov = iov;
529 i->nr_segs = nr_segs; 529 i->nr_segs = nr_segs;
530 i->iov_offset = 0; 530 i->iov_offset = 0;
531 i->count = count + written; 531 i->count = count + written;
532 532
533 iov_iter_advance(i, written); 533 iov_iter_advance(i, written);
534 } 534 }
535 535
536 static inline size_t iov_iter_count(struct iov_iter *i) 536 static inline size_t iov_iter_count(struct iov_iter *i)
537 { 537 {
538 return i->count; 538 return i->count;
539 } 539 }
540 540
541 /* 541 /*
542 * "descriptor" for what we're up to with a read. 542 * "descriptor" for what we're up to with a read.
543 * This allows us to use the same read code yet 543 * This allows us to use the same read code yet
544 * have multiple different users of the data that 544 * have multiple different users of the data that
545 * we read from a file. 545 * we read from a file.
546 * 546 *
547 * The simplest case just copies the data to user 547 * The simplest case just copies the data to user
548 * mode. 548 * mode.
549 */ 549 */
550 typedef struct { 550 typedef struct {
551 size_t written; 551 size_t written;
552 size_t count; 552 size_t count;
553 union { 553 union {
554 char __user *buf; 554 char __user *buf;
555 void *data; 555 void *data;
556 } arg; 556 } arg;
557 int error; 557 int error;
558 } read_descriptor_t; 558 } read_descriptor_t;
559 559
560 typedef int (*read_actor_t)(read_descriptor_t *, struct page *, 560 typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
561 unsigned long, unsigned long); 561 unsigned long, unsigned long);
562 562
563 struct address_space_operations { 563 struct address_space_operations {
564 int (*writepage)(struct page *page, struct writeback_control *wbc); 564 int (*writepage)(struct page *page, struct writeback_control *wbc);
565 int (*readpage)(struct file *, struct page *); 565 int (*readpage)(struct file *, struct page *);
566 void (*sync_page)(struct page *); 566 void (*sync_page)(struct page *);
567 567
568 /* Write back some dirty pages from this mapping. */ 568 /* Write back some dirty pages from this mapping. */
569 int (*writepages)(struct address_space *, struct writeback_control *); 569 int (*writepages)(struct address_space *, struct writeback_control *);
570 570
571 /* Set a page dirty. Return true if this dirtied it */ 571 /* Set a page dirty. Return true if this dirtied it */
572 int (*set_page_dirty)(struct page *page); 572 int (*set_page_dirty)(struct page *page);
573 573
574 int (*readpages)(struct file *filp, struct address_space *mapping, 574 int (*readpages)(struct file *filp, struct address_space *mapping,
575 struct list_head *pages, unsigned nr_pages); 575 struct list_head *pages, unsigned nr_pages);
576 576
577 int (*write_begin)(struct file *, struct address_space *mapping, 577 int (*write_begin)(struct file *, struct address_space *mapping,
578 loff_t pos, unsigned len, unsigned flags, 578 loff_t pos, unsigned len, unsigned flags,
579 struct page **pagep, void **fsdata); 579 struct page **pagep, void **fsdata);
580 int (*write_end)(struct file *, struct address_space *mapping, 580 int (*write_end)(struct file *, struct address_space *mapping,
581 loff_t pos, unsigned len, unsigned copied, 581 loff_t pos, unsigned len, unsigned copied,
582 struct page *page, void *fsdata); 582 struct page *page, void *fsdata);
583 583
584 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ 584 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
585 sector_t (*bmap)(struct address_space *, sector_t); 585 sector_t (*bmap)(struct address_space *, sector_t);
586 void (*invalidatepage) (struct page *, unsigned long); 586 void (*invalidatepage) (struct page *, unsigned long);
587 int (*releasepage) (struct page *, gfp_t); 587 int (*releasepage) (struct page *, gfp_t);
588 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 588 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
589 loff_t offset, unsigned long nr_segs); 589 loff_t offset, unsigned long nr_segs);
590 int (*get_xip_mem)(struct address_space *, pgoff_t, int, 590 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
591 void **, unsigned long *); 591 void **, unsigned long *);
592 /* migrate the contents of a page to the specified target */ 592 /* migrate the contents of a page to the specified target */
593 int (*migratepage) (struct address_space *, 593 int (*migratepage) (struct address_space *,
594 struct page *, struct page *); 594 struct page *, struct page *);
595 int (*launder_page) (struct page *); 595 int (*launder_page) (struct page *);
596 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 596 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
597 unsigned long); 597 unsigned long);
598 }; 598 };
599 599
600 /* 600 /*
601 * pagecache_write_begin/pagecache_write_end must be used by general code 601 * pagecache_write_begin/pagecache_write_end must be used by general code
602 * to write into the pagecache. 602 * to write into the pagecache.
603 */ 603 */
604 int pagecache_write_begin(struct file *, struct address_space *mapping, 604 int pagecache_write_begin(struct file *, struct address_space *mapping,
605 loff_t pos, unsigned len, unsigned flags, 605 loff_t pos, unsigned len, unsigned flags,
606 struct page **pagep, void **fsdata); 606 struct page **pagep, void **fsdata);
607 607
608 int pagecache_write_end(struct file *, struct address_space *mapping, 608 int pagecache_write_end(struct file *, struct address_space *mapping,
609 loff_t pos, unsigned len, unsigned copied, 609 loff_t pos, unsigned len, unsigned copied,
610 struct page *page, void *fsdata); 610 struct page *page, void *fsdata);
611 611
612 struct backing_dev_info; 612 struct backing_dev_info;
613 struct address_space { 613 struct address_space {
614 struct inode *host; /* owner: inode, block_device */ 614 struct inode *host; /* owner: inode, block_device */
615 struct radix_tree_root page_tree; /* radix tree of all pages */ 615 struct radix_tree_root page_tree; /* radix tree of all pages */
616 spinlock_t tree_lock; /* and lock protecting it */ 616 spinlock_t tree_lock; /* and lock protecting it */
617 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 617 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
618 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 618 struct prio_tree_root i_mmap; /* tree of private and shared mappings */
619 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 619 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
620 spinlock_t i_mmap_lock; /* protect tree, count, list */ 620 spinlock_t i_mmap_lock; /* protect tree, count, list */
621 unsigned int truncate_count; /* Cover race condition with truncate */ 621 unsigned int truncate_count; /* Cover race condition with truncate */
622 unsigned long nrpages; /* number of total pages */ 622 unsigned long nrpages; /* number of total pages */
623 pgoff_t writeback_index;/* writeback starts here */ 623 pgoff_t writeback_index;/* writeback starts here */
624 const struct address_space_operations *a_ops; /* methods */ 624 const struct address_space_operations *a_ops; /* methods */
625 unsigned long flags; /* error bits/gfp mask */ 625 unsigned long flags; /* error bits/gfp mask */
626 struct backing_dev_info *backing_dev_info; /* device readahead, etc */ 626 struct backing_dev_info *backing_dev_info; /* device readahead, etc */
627 spinlock_t private_lock; /* for use by the address_space */ 627 spinlock_t private_lock; /* for use by the address_space */
628 struct list_head private_list; /* ditto */ 628 struct list_head private_list; /* ditto */
629 struct address_space *assoc_mapping; /* ditto */ 629 struct address_space *assoc_mapping; /* ditto */
630 } __attribute__((aligned(sizeof(long)))); 630 } __attribute__((aligned(sizeof(long))));
631 /* 631 /*
632 * On most architectures that alignment is already the case; but 632 * On most architectures that alignment is already the case; but
633 * must be enforced here for CRIS, to let the least signficant bit 633 * must be enforced here for CRIS, to let the least signficant bit
634 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. 634 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
635 */ 635 */
636 636
637 struct block_device { 637 struct block_device {
638 dev_t bd_dev; /* not a kdev_t - it's a search key */ 638 dev_t bd_dev; /* not a kdev_t - it's a search key */
639 struct inode * bd_inode; /* will die */ 639 struct inode * bd_inode; /* will die */
640 struct super_block * bd_super; 640 struct super_block * bd_super;
641 int bd_openers; 641 int bd_openers;
642 struct mutex bd_mutex; /* open/close mutex */ 642 struct mutex bd_mutex; /* open/close mutex */
643 struct semaphore bd_mount_sem; 643 struct semaphore bd_mount_sem;
644 struct list_head bd_inodes; 644 struct list_head bd_inodes;
645 void * bd_holder; 645 void * bd_holder;
646 int bd_holders; 646 int bd_holders;
647 #ifdef CONFIG_SYSFS 647 #ifdef CONFIG_SYSFS
648 struct list_head bd_holder_list; 648 struct list_head bd_holder_list;
649 #endif 649 #endif
650 struct block_device * bd_contains; 650 struct block_device * bd_contains;
651 unsigned bd_block_size; 651 unsigned bd_block_size;
652 struct hd_struct * bd_part; 652 struct hd_struct * bd_part;
653 /* number of times partitions within this device have been opened. */ 653 /* number of times partitions within this device have been opened. */
654 unsigned bd_part_count; 654 unsigned bd_part_count;
655 int bd_invalidated; 655 int bd_invalidated;
656 struct gendisk * bd_disk; 656 struct gendisk * bd_disk;
657 struct list_head bd_list; 657 struct list_head bd_list;
658 struct backing_dev_info *bd_inode_backing_dev_info; 658 struct backing_dev_info *bd_inode_backing_dev_info;
659 /* 659 /*
660 * Private data. You must have bd_claim'ed the block_device 660 * Private data. You must have bd_claim'ed the block_device
661 * to use this. NOTE: bd_claim allows an owner to claim 661 * to use this. NOTE: bd_claim allows an owner to claim
662 * the same device multiple times, the owner must take special 662 * the same device multiple times, the owner must take special
663 * care to not mess up bd_private for that case. 663 * care to not mess up bd_private for that case.
664 */ 664 */
665 unsigned long bd_private; 665 unsigned long bd_private;
666 666
667 /* The counter of freeze processes */ 667 /* The counter of freeze processes */
668 int bd_fsfreeze_count; 668 int bd_fsfreeze_count;
669 /* Mutex for freeze */ 669 /* Mutex for freeze */
670 struct mutex bd_fsfreeze_mutex; 670 struct mutex bd_fsfreeze_mutex;
671 }; 671 };
672 672
673 /* 673 /*
674 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache 674 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
675 * radix trees 675 * radix trees
676 */ 676 */
677 #define PAGECACHE_TAG_DIRTY 0 677 #define PAGECACHE_TAG_DIRTY 0
678 #define PAGECACHE_TAG_WRITEBACK 1 678 #define PAGECACHE_TAG_WRITEBACK 1
679 679
680 int mapping_tagged(struct address_space *mapping, int tag); 680 int mapping_tagged(struct address_space *mapping, int tag);
681 681
682 /* 682 /*
683 * Might pages of this file be mapped into userspace? 683 * Might pages of this file be mapped into userspace?
684 */ 684 */
685 static inline int mapping_mapped(struct address_space *mapping) 685 static inline int mapping_mapped(struct address_space *mapping)
686 { 686 {
687 return !prio_tree_empty(&mapping->i_mmap) || 687 return !prio_tree_empty(&mapping->i_mmap) ||
688 !list_empty(&mapping->i_mmap_nonlinear); 688 !list_empty(&mapping->i_mmap_nonlinear);
689 } 689 }
690 690
691 /* 691 /*
692 * Might pages of this file have been modified in userspace? 692 * Might pages of this file have been modified in userspace?
693 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff 693 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
694 * marks vma as VM_SHARED if it is shared, and the file was opened for 694 * marks vma as VM_SHARED if it is shared, and the file was opened for
695 * writing i.e. vma may be mprotected writable even if now readonly. 695 * writing i.e. vma may be mprotected writable even if now readonly.
696 */ 696 */
697 static inline int mapping_writably_mapped(struct address_space *mapping) 697 static inline int mapping_writably_mapped(struct address_space *mapping)
698 { 698 {
699 return mapping->i_mmap_writable != 0; 699 return mapping->i_mmap_writable != 0;
700 } 700 }
701 701
702 /* 702 /*
703 * Use sequence counter to get consistent i_size on 32-bit processors. 703 * Use sequence counter to get consistent i_size on 32-bit processors.
704 */ 704 */
705 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 705 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
706 #include <linux/seqlock.h> 706 #include <linux/seqlock.h>
707 #define __NEED_I_SIZE_ORDERED 707 #define __NEED_I_SIZE_ORDERED
708 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) 708 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
709 #else 709 #else
710 #define i_size_ordered_init(inode) do { } while (0) 710 #define i_size_ordered_init(inode) do { } while (0)
711 #endif 711 #endif
712 712
713 struct inode { 713 struct inode {
714 struct hlist_node i_hash; 714 struct hlist_node i_hash;
715 struct list_head i_list; 715 struct list_head i_list;
716 struct list_head i_sb_list; 716 struct list_head i_sb_list;
717 struct list_head i_dentry; 717 struct list_head i_dentry;
718 unsigned long i_ino; 718 unsigned long i_ino;
719 atomic_t i_count; 719 atomic_t i_count;
720 unsigned int i_nlink; 720 unsigned int i_nlink;
721 uid_t i_uid; 721 uid_t i_uid;
722 gid_t i_gid; 722 gid_t i_gid;
723 dev_t i_rdev; 723 dev_t i_rdev;
724 u64 i_version; 724 u64 i_version;
725 loff_t i_size; 725 loff_t i_size;
726 #ifdef __NEED_I_SIZE_ORDERED 726 #ifdef __NEED_I_SIZE_ORDERED
727 seqcount_t i_size_seqcount; 727 seqcount_t i_size_seqcount;
728 #endif 728 #endif
729 struct timespec i_atime; 729 struct timespec i_atime;
730 struct timespec i_mtime; 730 struct timespec i_mtime;
731 struct timespec i_ctime; 731 struct timespec i_ctime;
732 unsigned int i_blkbits; 732 unsigned int i_blkbits;
733 blkcnt_t i_blocks; 733 blkcnt_t i_blocks;
734 unsigned short i_bytes; 734 unsigned short i_bytes;
735 umode_t i_mode; 735 umode_t i_mode;
736 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 736 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
737 struct mutex i_mutex; 737 struct mutex i_mutex;
738 struct rw_semaphore i_alloc_sem; 738 struct rw_semaphore i_alloc_sem;
739 const struct inode_operations *i_op; 739 const struct inode_operations *i_op;
740 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 740 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
741 struct super_block *i_sb; 741 struct super_block *i_sb;
742 struct file_lock *i_flock; 742 struct file_lock *i_flock;
743 struct address_space *i_mapping; 743 struct address_space *i_mapping;
744 struct address_space i_data; 744 struct address_space i_data;
745 #ifdef CONFIG_QUOTA 745 #ifdef CONFIG_QUOTA
746 struct dquot *i_dquot[MAXQUOTAS]; 746 struct dquot *i_dquot[MAXQUOTAS];
747 #endif 747 #endif
748 struct list_head i_devices; 748 struct list_head i_devices;
749 union { 749 union {
750 struct pipe_inode_info *i_pipe; 750 struct pipe_inode_info *i_pipe;
751 struct block_device *i_bdev; 751 struct block_device *i_bdev;
752 struct cdev *i_cdev; 752 struct cdev *i_cdev;
753 }; 753 };
754 int i_cindex; 754 int i_cindex;
755 755
756 __u32 i_generation; 756 __u32 i_generation;
757 757
758 #ifdef CONFIG_FSNOTIFY 758 #ifdef CONFIG_FSNOTIFY
759 __u32 i_fsnotify_mask; /* all events this inode cares about */ 759 __u32 i_fsnotify_mask; /* all events this inode cares about */
760 struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ 760 struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */
761 #endif 761 #endif
762 762
763 #ifdef CONFIG_INOTIFY 763 #ifdef CONFIG_INOTIFY
764 struct list_head inotify_watches; /* watches on this inode */ 764 struct list_head inotify_watches; /* watches on this inode */
765 struct mutex inotify_mutex; /* protects the watches list */ 765 struct mutex inotify_mutex; /* protects the watches list */
766 #endif 766 #endif
767 767
768 unsigned long i_state; 768 unsigned long i_state;
769 unsigned long dirtied_when; /* jiffies of first dirtying */ 769 unsigned long dirtied_when; /* jiffies of first dirtying */
770 770
771 unsigned int i_flags; 771 unsigned int i_flags;
772 772
773 atomic_t i_writecount; 773 atomic_t i_writecount;
774 #ifdef CONFIG_SECURITY 774 #ifdef CONFIG_SECURITY
775 void *i_security; 775 void *i_security;
776 #endif 776 #endif
777 void *i_private; /* fs or device private pointer */ 777 void *i_private; /* fs or device private pointer */
778 }; 778 };
779 779
780 /* 780 /*
781 * inode->i_mutex nesting subclasses for the lock validator: 781 * inode->i_mutex nesting subclasses for the lock validator:
782 * 782 *
783 * 0: the object of the current VFS operation 783 * 0: the object of the current VFS operation
784 * 1: parent 784 * 1: parent
785 * 2: child/target 785 * 2: child/target
786 * 3: quota file 786 * 3: quota file
787 * 787 *
788 * The locking order between these classes is 788 * The locking order between these classes is
789 * parent -> child -> normal -> xattr -> quota 789 * parent -> child -> normal -> xattr -> quota
790 */ 790 */
791 enum inode_i_mutex_lock_class 791 enum inode_i_mutex_lock_class
792 { 792 {
793 I_MUTEX_NORMAL, 793 I_MUTEX_NORMAL,
794 I_MUTEX_PARENT, 794 I_MUTEX_PARENT,
795 I_MUTEX_CHILD, 795 I_MUTEX_CHILD,
796 I_MUTEX_XATTR, 796 I_MUTEX_XATTR,
797 I_MUTEX_QUOTA 797 I_MUTEX_QUOTA
798 }; 798 };
799 799
800 /* 800 /*
801 * NOTE: in a 32bit arch with a preemptable kernel and 801 * NOTE: in a 32bit arch with a preemptable kernel and
802 * an UP compile the i_size_read/write must be atomic 802 * an UP compile the i_size_read/write must be atomic
803 * with respect to the local cpu (unlike with preempt disabled), 803 * with respect to the local cpu (unlike with preempt disabled),
804 * but they don't need to be atomic with respect to other cpus like in 804 * but they don't need to be atomic with respect to other cpus like in
805 * true SMP (so they need either to either locally disable irq around 805 * true SMP (so they need either to either locally disable irq around
806 * the read or for example on x86 they can be still implemented as a 806 * the read or for example on x86 they can be still implemented as a
807 * cmpxchg8b without the need of the lock prefix). For SMP compiles 807 * cmpxchg8b without the need of the lock prefix). For SMP compiles
808 * and 64bit archs it makes no difference if preempt is enabled or not. 808 * and 64bit archs it makes no difference if preempt is enabled or not.
809 */ 809 */
810 static inline loff_t i_size_read(const struct inode *inode) 810 static inline loff_t i_size_read(const struct inode *inode)
811 { 811 {
812 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 812 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
813 loff_t i_size; 813 loff_t i_size;
814 unsigned int seq; 814 unsigned int seq;
815 815
816 do { 816 do {
817 seq = read_seqcount_begin(&inode->i_size_seqcount); 817 seq = read_seqcount_begin(&inode->i_size_seqcount);
818 i_size = inode->i_size; 818 i_size = inode->i_size;
819 } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); 819 } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
820 return i_size; 820 return i_size;
821 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 821 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
822 loff_t i_size; 822 loff_t i_size;
823 823
824 preempt_disable(); 824 preempt_disable();
825 i_size = inode->i_size; 825 i_size = inode->i_size;
826 preempt_enable(); 826 preempt_enable();
827 return i_size; 827 return i_size;
828 #else 828 #else
829 return inode->i_size; 829 return inode->i_size;
830 #endif 830 #endif
831 } 831 }
832 832
833 /* 833 /*
834 * NOTE: unlike i_size_read(), i_size_write() does need locking around it 834 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
835 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount 835 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
836 * can be lost, resulting in subsequent i_size_read() calls spinning forever. 836 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
837 */ 837 */
838 static inline void i_size_write(struct inode *inode, loff_t i_size) 838 static inline void i_size_write(struct inode *inode, loff_t i_size)
839 { 839 {
840 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 840 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
841 write_seqcount_begin(&inode->i_size_seqcount); 841 write_seqcount_begin(&inode->i_size_seqcount);
842 inode->i_size = i_size; 842 inode->i_size = i_size;
843 write_seqcount_end(&inode->i_size_seqcount); 843 write_seqcount_end(&inode->i_size_seqcount);
844 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 844 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
845 preempt_disable(); 845 preempt_disable();
846 inode->i_size = i_size; 846 inode->i_size = i_size;
847 preempt_enable(); 847 preempt_enable();
848 #else 848 #else
849 inode->i_size = i_size; 849 inode->i_size = i_size;
850 #endif 850 #endif
851 } 851 }
852 852
853 static inline unsigned iminor(const struct inode *inode) 853 static inline unsigned iminor(const struct inode *inode)
854 { 854 {
855 return MINOR(inode->i_rdev); 855 return MINOR(inode->i_rdev);
856 } 856 }
857 857
858 static inline unsigned imajor(const struct inode *inode) 858 static inline unsigned imajor(const struct inode *inode)
859 { 859 {
860 return MAJOR(inode->i_rdev); 860 return MAJOR(inode->i_rdev);
861 } 861 }
862 862
863 extern struct block_device *I_BDEV(struct inode *inode); 863 extern struct block_device *I_BDEV(struct inode *inode);
864 864
865 struct fown_struct { 865 struct fown_struct {
866 rwlock_t lock; /* protects pid, uid, euid fields */ 866 rwlock_t lock; /* protects pid, uid, euid fields */
867 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ 867 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
868 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ 868 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
869 uid_t uid, euid; /* uid/euid of process setting the owner */ 869 uid_t uid, euid; /* uid/euid of process setting the owner */
870 int signum; /* posix.1b rt signal to be delivered on IO */ 870 int signum; /* posix.1b rt signal to be delivered on IO */
871 }; 871 };
872 872
873 /* 873 /*
874 * Track a single file's readahead state 874 * Track a single file's readahead state
875 */ 875 */
876 struct file_ra_state { 876 struct file_ra_state {
877 pgoff_t start; /* where readahead started */ 877 pgoff_t start; /* where readahead started */
878 unsigned int size; /* # of readahead pages */ 878 unsigned int size; /* # of readahead pages */
879 unsigned int async_size; /* do asynchronous readahead when 879 unsigned int async_size; /* do asynchronous readahead when
880 there are only # of pages ahead */ 880 there are only # of pages ahead */
881 881
882 unsigned int ra_pages; /* Maximum readahead window */ 882 unsigned int ra_pages; /* Maximum readahead window */
883 int mmap_miss; /* Cache miss stat for mmap accesses */ 883 int mmap_miss; /* Cache miss stat for mmap accesses */
884 loff_t prev_pos; /* Cache last read() position */ 884 loff_t prev_pos; /* Cache last read() position */
885 }; 885 };
886 886
887 /* 887 /*
888 * Check if @index falls in the readahead windows. 888 * Check if @index falls in the readahead windows.
889 */ 889 */
890 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) 890 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
891 { 891 {
892 return (index >= ra->start && 892 return (index >= ra->start &&
893 index < ra->start + ra->size); 893 index < ra->start + ra->size);
894 } 894 }
895 895
896 #define FILE_MNT_WRITE_TAKEN 1 896 #define FILE_MNT_WRITE_TAKEN 1
897 #define FILE_MNT_WRITE_RELEASED 2 897 #define FILE_MNT_WRITE_RELEASED 2
898 898
899 struct file { 899 struct file {
900 /* 900 /*
901 * fu_list becomes invalid after file_free is called and queued via 901 * fu_list becomes invalid after file_free is called and queued via
902 * fu_rcuhead for RCU freeing 902 * fu_rcuhead for RCU freeing
903 */ 903 */
904 union { 904 union {
905 struct list_head fu_list; 905 struct list_head fu_list;
906 struct rcu_head fu_rcuhead; 906 struct rcu_head fu_rcuhead;
907 } f_u; 907 } f_u;
908 struct path f_path; 908 struct path f_path;
909 #define f_dentry f_path.dentry 909 #define f_dentry f_path.dentry
910 #define f_vfsmnt f_path.mnt 910 #define f_vfsmnt f_path.mnt
911 const struct file_operations *f_op; 911 const struct file_operations *f_op;
912 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ 912 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
913 atomic_long_t f_count; 913 atomic_long_t f_count;
914 unsigned int f_flags; 914 unsigned int f_flags;
915 fmode_t f_mode; 915 fmode_t f_mode;
916 loff_t f_pos; 916 loff_t f_pos;
917 struct fown_struct f_owner; 917 struct fown_struct f_owner;
918 const struct cred *f_cred; 918 const struct cred *f_cred;
919 struct file_ra_state f_ra; 919 struct file_ra_state f_ra;
920 920
921 u64 f_version; 921 u64 f_version;
922 #ifdef CONFIG_SECURITY 922 #ifdef CONFIG_SECURITY
923 void *f_security; 923 void *f_security;
924 #endif 924 #endif
925 /* needed for tty driver, and maybe others */ 925 /* needed for tty driver, and maybe others */
926 void *private_data; 926 void *private_data;
927 927
928 #ifdef CONFIG_EPOLL 928 #ifdef CONFIG_EPOLL
929 /* Used by fs/eventpoll.c to link all the hooks to this file */ 929 /* Used by fs/eventpoll.c to link all the hooks to this file */
930 struct list_head f_ep_links; 930 struct list_head f_ep_links;
931 #endif /* #ifdef CONFIG_EPOLL */ 931 #endif /* #ifdef CONFIG_EPOLL */
932 struct address_space *f_mapping; 932 struct address_space *f_mapping;
933 #ifdef CONFIG_DEBUG_WRITECOUNT 933 #ifdef CONFIG_DEBUG_WRITECOUNT
934 unsigned long f_mnt_write_state; 934 unsigned long f_mnt_write_state;
935 #endif 935 #endif
936 }; 936 };
937 extern spinlock_t files_lock; 937 extern spinlock_t files_lock;
938 #define file_list_lock() spin_lock(&files_lock); 938 #define file_list_lock() spin_lock(&files_lock);
939 #define file_list_unlock() spin_unlock(&files_lock); 939 #define file_list_unlock() spin_unlock(&files_lock);
940 940
941 #define get_file(x) atomic_long_inc(&(x)->f_count) 941 #define get_file(x) atomic_long_inc(&(x)->f_count)
942 #define file_count(x) atomic_long_read(&(x)->f_count) 942 #define file_count(x) atomic_long_read(&(x)->f_count)
943 943
944 #ifdef CONFIG_DEBUG_WRITECOUNT 944 #ifdef CONFIG_DEBUG_WRITECOUNT
945 static inline void file_take_write(struct file *f) 945 static inline void file_take_write(struct file *f)
946 { 946 {
947 WARN_ON(f->f_mnt_write_state != 0); 947 WARN_ON(f->f_mnt_write_state != 0);
948 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; 948 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
949 } 949 }
950 static inline void file_release_write(struct file *f) 950 static inline void file_release_write(struct file *f)
951 { 951 {
952 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; 952 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
953 } 953 }
954 static inline void file_reset_write(struct file *f) 954 static inline void file_reset_write(struct file *f)
955 { 955 {
956 f->f_mnt_write_state = 0; 956 f->f_mnt_write_state = 0;
957 } 957 }
958 static inline void file_check_state(struct file *f) 958 static inline void file_check_state(struct file *f)
959 { 959 {
960 /* 960 /*
961 * At this point, either both or neither of these bits 961 * At this point, either both or neither of these bits
962 * should be set. 962 * should be set.
963 */ 963 */
964 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); 964 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
965 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); 965 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
966 } 966 }
967 static inline int file_check_writeable(struct file *f) 967 static inline int file_check_writeable(struct file *f)
968 { 968 {
969 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) 969 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
970 return 0; 970 return 0;
971 printk(KERN_WARNING "writeable file with no " 971 printk(KERN_WARNING "writeable file with no "
972 "mnt_want_write()\n"); 972 "mnt_want_write()\n");
973 WARN_ON(1); 973 WARN_ON(1);
974 return -EINVAL; 974 return -EINVAL;
975 } 975 }
976 #else /* !CONFIG_DEBUG_WRITECOUNT */ 976 #else /* !CONFIG_DEBUG_WRITECOUNT */
977 static inline void file_take_write(struct file *filp) {} 977 static inline void file_take_write(struct file *filp) {}
978 static inline void file_release_write(struct file *filp) {} 978 static inline void file_release_write(struct file *filp) {}
979 static inline void file_reset_write(struct file *filp) {} 979 static inline void file_reset_write(struct file *filp) {}
980 static inline void file_check_state(struct file *filp) {} 980 static inline void file_check_state(struct file *filp) {}
981 static inline int file_check_writeable(struct file *filp) 981 static inline int file_check_writeable(struct file *filp)
982 { 982 {
983 return 0; 983 return 0;
984 } 984 }
985 #endif /* CONFIG_DEBUG_WRITECOUNT */ 985 #endif /* CONFIG_DEBUG_WRITECOUNT */
986 986
987 #define MAX_NON_LFS ((1UL<<31) - 1) 987 #define MAX_NON_LFS ((1UL<<31) - 1)
988 988
989 /* Page cache limit. The filesystems should put that into their s_maxbytes 989 /* Page cache limit. The filesystems should put that into their s_maxbytes
990 limits, otherwise bad things can happen in VM. */ 990 limits, otherwise bad things can happen in VM. */
991 #if BITS_PER_LONG==32 991 #if BITS_PER_LONG==32
992 #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 992 #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
993 #elif BITS_PER_LONG==64 993 #elif BITS_PER_LONG==64
994 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 994 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
995 #endif 995 #endif
996 996
997 #define FL_POSIX 1 997 #define FL_POSIX 1
998 #define FL_FLOCK 2 998 #define FL_FLOCK 2
999 #define FL_ACCESS 8 /* not trying to lock, just looking */ 999 #define FL_ACCESS 8 /* not trying to lock, just looking */
1000 #define FL_EXISTS 16 /* when unlocking, test for existence */ 1000 #define FL_EXISTS 16 /* when unlocking, test for existence */
1001 #define FL_LEASE 32 /* lease held on this file */ 1001 #define FL_LEASE 32 /* lease held on this file */
1002 #define FL_CLOSE 64 /* unlock on close */ 1002 #define FL_CLOSE 64 /* unlock on close */
1003 #define FL_SLEEP 128 /* A blocking lock */ 1003 #define FL_SLEEP 128 /* A blocking lock */
1004 1004
1005 /* 1005 /*
1006 * Special return value from posix_lock_file() and vfs_lock_file() for 1006 * Special return value from posix_lock_file() and vfs_lock_file() for
1007 * asynchronous locking. 1007 * asynchronous locking.
1008 */ 1008 */
1009 #define FILE_LOCK_DEFERRED 1 1009 #define FILE_LOCK_DEFERRED 1
1010 1010
1011 /* 1011 /*
1012 * The POSIX file lock owner is determined by 1012 * The POSIX file lock owner is determined by
1013 * the "struct files_struct" in the thread group 1013 * the "struct files_struct" in the thread group
1014 * (or NULL for no owner - BSD locks). 1014 * (or NULL for no owner - BSD locks).
1015 * 1015 *
1016 * Lockd stuffs a "host" pointer into this. 1016 * Lockd stuffs a "host" pointer into this.
1017 */ 1017 */
1018 typedef struct files_struct *fl_owner_t; 1018 typedef struct files_struct *fl_owner_t;
1019 1019
1020 struct file_lock_operations { 1020 struct file_lock_operations {
1021 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 1021 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1022 void (*fl_release_private)(struct file_lock *); 1022 void (*fl_release_private)(struct file_lock *);
1023 }; 1023 };
1024 1024
1025 struct lock_manager_operations { 1025 struct lock_manager_operations {
1026 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 1026 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
1027 void (*fl_notify)(struct file_lock *); /* unblock callback */ 1027 void (*fl_notify)(struct file_lock *); /* unblock callback */
1028 int (*fl_grant)(struct file_lock *, struct file_lock *, int); 1028 int (*fl_grant)(struct file_lock *, struct file_lock *, int);
1029 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 1029 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1030 void (*fl_release_private)(struct file_lock *); 1030 void (*fl_release_private)(struct file_lock *);
1031 void (*fl_break)(struct file_lock *); 1031 void (*fl_break)(struct file_lock *);
1032 int (*fl_mylease)(struct file_lock *, struct file_lock *); 1032 int (*fl_mylease)(struct file_lock *, struct file_lock *);
1033 int (*fl_change)(struct file_lock **, int); 1033 int (*fl_change)(struct file_lock **, int);
1034 }; 1034 };
1035 1035
1036 struct lock_manager { 1036 struct lock_manager {
1037 struct list_head list; 1037 struct list_head list;
1038 }; 1038 };
1039 1039
1040 void locks_start_grace(struct lock_manager *); 1040 void locks_start_grace(struct lock_manager *);
1041 void locks_end_grace(struct lock_manager *); 1041 void locks_end_grace(struct lock_manager *);
1042 int locks_in_grace(void); 1042 int locks_in_grace(void);
1043 1043
1044 /* that will die - we need it for nfs_lock_info */ 1044 /* that will die - we need it for nfs_lock_info */
1045 #include <linux/nfs_fs_i.h> 1045 #include <linux/nfs_fs_i.h>
1046 1046
1047 struct file_lock { 1047 struct file_lock {
1048 struct file_lock *fl_next; /* singly linked list for this inode */ 1048 struct file_lock *fl_next; /* singly linked list for this inode */
1049 struct list_head fl_link; /* doubly linked list of all locks */ 1049 struct list_head fl_link; /* doubly linked list of all locks */
1050 struct list_head fl_block; /* circular list of blocked processes */ 1050 struct list_head fl_block; /* circular list of blocked processes */
1051 fl_owner_t fl_owner; 1051 fl_owner_t fl_owner;
1052 unsigned char fl_flags; 1052 unsigned char fl_flags;
1053 unsigned char fl_type; 1053 unsigned char fl_type;
1054 unsigned int fl_pid; 1054 unsigned int fl_pid;
1055 struct pid *fl_nspid; 1055 struct pid *fl_nspid;
1056 wait_queue_head_t fl_wait; 1056 wait_queue_head_t fl_wait;
1057 struct file *fl_file; 1057 struct file *fl_file;
1058 loff_t fl_start; 1058 loff_t fl_start;
1059 loff_t fl_end; 1059 loff_t fl_end;
1060 1060
1061 struct fasync_struct * fl_fasync; /* for lease break notifications */ 1061 struct fasync_struct * fl_fasync; /* for lease break notifications */
1062 unsigned long fl_break_time; /* for nonblocking lease breaks */ 1062 unsigned long fl_break_time; /* for nonblocking lease breaks */
1063 1063
1064 struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ 1064 struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
1065 struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ 1065 struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
1066 union { 1066 union {
1067 struct nfs_lock_info nfs_fl; 1067 struct nfs_lock_info nfs_fl;
1068 struct nfs4_lock_info nfs4_fl; 1068 struct nfs4_lock_info nfs4_fl;
1069 struct { 1069 struct {
1070 struct list_head link; /* link in AFS vnode's pending_locks list */ 1070 struct list_head link; /* link in AFS vnode's pending_locks list */
1071 int state; /* state of grant or error if -ve */ 1071 int state; /* state of grant or error if -ve */
1072 } afs; 1072 } afs;
1073 } fl_u; 1073 } fl_u;
1074 }; 1074 };
1075 1075
1076 /* The following constant reflects the upper bound of the file/locking space */ 1076 /* The following constant reflects the upper bound of the file/locking space */
1077 #ifndef OFFSET_MAX 1077 #ifndef OFFSET_MAX
1078 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) 1078 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
1079 #define OFFSET_MAX INT_LIMIT(loff_t) 1079 #define OFFSET_MAX INT_LIMIT(loff_t)
1080 #define OFFT_OFFSET_MAX INT_LIMIT(off_t) 1080 #define OFFT_OFFSET_MAX INT_LIMIT(off_t)
1081 #endif 1081 #endif
1082 1082
1083 #include <linux/fcntl.h> 1083 #include <linux/fcntl.h>
1084 1084
1085 extern void send_sigio(struct fown_struct *fown, int fd, int band); 1085 extern void send_sigio(struct fown_struct *fown, int fd, int band);
1086 1086
1087 /* fs/sync.c */ 1087 /* fs/sync.c */
1088 extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, 1088 extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
1089 loff_t endbyte, unsigned int flags); 1089 loff_t endbyte, unsigned int flags);
1090 1090
1091 #ifdef CONFIG_FILE_LOCKING 1091 #ifdef CONFIG_FILE_LOCKING
1092 extern int fcntl_getlk(struct file *, struct flock __user *); 1092 extern int fcntl_getlk(struct file *, struct flock __user *);
1093 extern int fcntl_setlk(unsigned int, struct file *, unsigned int, 1093 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
1094 struct flock __user *); 1094 struct flock __user *);
1095 1095
1096 #if BITS_PER_LONG == 32 1096 #if BITS_PER_LONG == 32
1097 extern int fcntl_getlk64(struct file *, struct flock64 __user *); 1097 extern int fcntl_getlk64(struct file *, struct flock64 __user *);
1098 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, 1098 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
1099 struct flock64 __user *); 1099 struct flock64 __user *);
1100 #endif 1100 #endif
1101 1101
1102 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); 1102 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
1103 extern int fcntl_getlease(struct file *filp); 1103 extern int fcntl_getlease(struct file *filp);
1104 1104
1105 /* fs/locks.c */ 1105 /* fs/locks.c */
1106 extern void locks_init_lock(struct file_lock *); 1106 extern void locks_init_lock(struct file_lock *);
1107 extern void locks_copy_lock(struct file_lock *, struct file_lock *); 1107 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
1108 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); 1108 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
1109 extern void locks_remove_posix(struct file *, fl_owner_t); 1109 extern void locks_remove_posix(struct file *, fl_owner_t);
1110 extern void locks_remove_flock(struct file *); 1110 extern void locks_remove_flock(struct file *);
1111 extern void posix_test_lock(struct file *, struct file_lock *); 1111 extern void posix_test_lock(struct file *, struct file_lock *);
1112 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); 1112 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
1113 extern int posix_lock_file_wait(struct file *, struct file_lock *); 1113 extern int posix_lock_file_wait(struct file *, struct file_lock *);
1114 extern int posix_unblock_lock(struct file *, struct file_lock *); 1114 extern int posix_unblock_lock(struct file *, struct file_lock *);
1115 extern int vfs_test_lock(struct file *, struct file_lock *); 1115 extern int vfs_test_lock(struct file *, struct file_lock *);
1116 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); 1116 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
1117 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); 1117 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
1118 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); 1118 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
1119 extern int __break_lease(struct inode *inode, unsigned int flags); 1119 extern int __break_lease(struct inode *inode, unsigned int flags);
1120 extern void lease_get_mtime(struct inode *, struct timespec *time); 1120 extern void lease_get_mtime(struct inode *, struct timespec *time);
1121 extern int generic_setlease(struct file *, long, struct file_lock **); 1121 extern int generic_setlease(struct file *, long, struct file_lock **);
1122 extern int vfs_setlease(struct file *, long, struct file_lock **); 1122 extern int vfs_setlease(struct file *, long, struct file_lock **);
1123 extern int lease_modify(struct file_lock **, int); 1123 extern int lease_modify(struct file_lock **, int);
1124 extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 1124 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
1125 extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 1125 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
1126 #else /* !CONFIG_FILE_LOCKING */ 1126 #else /* !CONFIG_FILE_LOCKING */
1127 static inline int fcntl_getlk(struct file *file, struct flock __user *user) 1127 static inline int fcntl_getlk(struct file *file, struct flock __user *user)
1128 { 1128 {
1129 return -EINVAL; 1129 return -EINVAL;
1130 } 1130 }
1131 1131
1132 static inline int fcntl_setlk(unsigned int fd, struct file *file, 1132 static inline int fcntl_setlk(unsigned int fd, struct file *file,
1133 unsigned int cmd, struct flock __user *user) 1133 unsigned int cmd, struct flock __user *user)
1134 { 1134 {
1135 return -EACCES; 1135 return -EACCES;
1136 } 1136 }
1137 1137
1138 #if BITS_PER_LONG == 32 1138 #if BITS_PER_LONG == 32
1139 static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) 1139 static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
1140 { 1140 {
1141 return -EINVAL; 1141 return -EINVAL;
1142 } 1142 }
1143 1143
1144 static inline int fcntl_setlk64(unsigned int fd, struct file *file, 1144 static inline int fcntl_setlk64(unsigned int fd, struct file *file,
1145 unsigned int cmd, struct flock64 __user *user) 1145 unsigned int cmd, struct flock64 __user *user)
1146 { 1146 {
1147 return -EACCES; 1147 return -EACCES;
1148 } 1148 }
1149 #endif 1149 #endif
1150 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1150 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1151 { 1151 {
1152 return 0; 1152 return 0;
1153 } 1153 }
1154 1154
1155 static inline int fcntl_getlease(struct file *filp) 1155 static inline int fcntl_getlease(struct file *filp)
1156 { 1156 {
1157 return 0; 1157 return 0;
1158 } 1158 }
1159 1159
1160 static inline void locks_init_lock(struct file_lock *fl) 1160 static inline void locks_init_lock(struct file_lock *fl)
1161 { 1161 {
1162 return; 1162 return;
1163 } 1163 }
1164 1164
1165 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1165 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1166 { 1166 {
1167 return; 1167 return;
1168 } 1168 }
1169 1169
1170 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1170 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1171 { 1171 {
1172 return; 1172 return;
1173 } 1173 }
1174 1174
1175 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) 1175 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
1176 { 1176 {
1177 return; 1177 return;
1178 } 1178 }
1179 1179
1180 static inline void locks_remove_flock(struct file *filp) 1180 static inline void locks_remove_flock(struct file *filp)
1181 { 1181 {
1182 return; 1182 return;
1183 } 1183 }
1184 1184
1185 static inline void posix_test_lock(struct file *filp, struct file_lock *fl) 1185 static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
1186 { 1186 {
1187 return; 1187 return;
1188 } 1188 }
1189 1189
1190 static inline int posix_lock_file(struct file *filp, struct file_lock *fl, 1190 static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
1191 struct file_lock *conflock) 1191 struct file_lock *conflock)
1192 { 1192 {
1193 return -ENOLCK; 1193 return -ENOLCK;
1194 } 1194 }
1195 1195
1196 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1196 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1197 { 1197 {
1198 return -ENOLCK; 1198 return -ENOLCK;
1199 } 1199 }
1200 1200
1201 static inline int posix_unblock_lock(struct file *filp, 1201 static inline int posix_unblock_lock(struct file *filp,
1202 struct file_lock *waiter) 1202 struct file_lock *waiter)
1203 { 1203 {
1204 return -ENOENT; 1204 return -ENOENT;
1205 } 1205 }
1206 1206
1207 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) 1207 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
1208 { 1208 {
1209 return 0; 1209 return 0;
1210 } 1210 }
1211 1211
1212 static inline int vfs_lock_file(struct file *filp, unsigned int cmd, 1212 static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
1213 struct file_lock *fl, struct file_lock *conf) 1213 struct file_lock *fl, struct file_lock *conf)
1214 { 1214 {
1215 return -ENOLCK; 1215 return -ENOLCK;
1216 } 1216 }
1217 1217
1218 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 1218 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
1219 { 1219 {
1220 return 0; 1220 return 0;
1221 } 1221 }
1222 1222
1223 static inline int flock_lock_file_wait(struct file *filp, 1223 static inline int flock_lock_file_wait(struct file *filp,
1224 struct file_lock *request) 1224 struct file_lock *request)
1225 { 1225 {
1226 return -ENOLCK; 1226 return -ENOLCK;
1227 } 1227 }
1228 1228
1229 static inline int __break_lease(struct inode *inode, unsigned int mode) 1229 static inline int __break_lease(struct inode *inode, unsigned int mode)
1230 { 1230 {
1231 return 0; 1231 return 0;
1232 } 1232 }
1233 1233
1234 static inline void lease_get_mtime(struct inode *inode, struct timespec *time) 1234 static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
1235 { 1235 {
1236 return; 1236 return;
1237 } 1237 }
1238 1238
1239 static inline int generic_setlease(struct file *filp, long arg, 1239 static inline int generic_setlease(struct file *filp, long arg,
1240 struct file_lock **flp) 1240 struct file_lock **flp)
1241 { 1241 {
1242 return -EINVAL; 1242 return -EINVAL;
1243 } 1243 }
1244 1244
1245 static inline int vfs_setlease(struct file *filp, long arg, 1245 static inline int vfs_setlease(struct file *filp, long arg,
1246 struct file_lock **lease) 1246 struct file_lock **lease)
1247 { 1247 {
1248 return -EINVAL; 1248 return -EINVAL;
1249 } 1249 }
1250 1250
1251 static inline int lease_modify(struct file_lock **before, int arg) 1251 static inline int lease_modify(struct file_lock **before, int arg)
1252 { 1252 {
1253 return -EINVAL; 1253 return -EINVAL;
1254 } 1254 }
1255 1255
1256 static inline int lock_may_read(struct inode *inode, loff_t start, 1256 static inline int lock_may_read(struct inode *inode, loff_t start,
1257 unsigned long len) 1257 unsigned long len)
1258 { 1258 {
1259 return 1; 1259 return 1;
1260 } 1260 }
1261 1261
1262 static inline int lock_may_write(struct inode *inode, loff_t start, 1262 static inline int lock_may_write(struct inode *inode, loff_t start,
1263 unsigned long len) 1263 unsigned long len)
1264 { 1264 {
1265 return 1; 1265 return 1;
1266 } 1266 }
1267 1267
1268 #endif /* !CONFIG_FILE_LOCKING */ 1268 #endif /* !CONFIG_FILE_LOCKING */
1269 1269
1270 1270
1271 struct fasync_struct { 1271 struct fasync_struct {
1272 int magic; 1272 int magic;
1273 int fa_fd; 1273 int fa_fd;
1274 struct fasync_struct *fa_next; /* singly linked list */ 1274 struct fasync_struct *fa_next; /* singly linked list */
1275 struct file *fa_file; 1275 struct file *fa_file;
1276 }; 1276 };
1277 1277
1278 #define FASYNC_MAGIC 0x4601 1278 #define FASYNC_MAGIC 0x4601
1279 1279
1280 /* SMP safe fasync helpers: */ 1280 /* SMP safe fasync helpers: */
1281 extern int fasync_helper(int, struct file *, int, struct fasync_struct **); 1281 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1282 /* can be called from interrupts */ 1282 /* can be called from interrupts */
1283 extern void kill_fasync(struct fasync_struct **, int, int); 1283 extern void kill_fasync(struct fasync_struct **, int, int);
1284 /* only for net: no internal synchronization */ 1284 /* only for net: no internal synchronization */
1285 extern void __kill_fasync(struct fasync_struct *, int, int); 1285 extern void __kill_fasync(struct fasync_struct *, int, int);
1286 1286
1287 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); 1287 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1288 extern int f_setown(struct file *filp, unsigned long arg, int force); 1288 extern int f_setown(struct file *filp, unsigned long arg, int force);
1289 extern void f_delown(struct file *filp); 1289 extern void f_delown(struct file *filp);
1290 extern pid_t f_getown(struct file *filp); 1290 extern pid_t f_getown(struct file *filp);
1291 extern int send_sigurg(struct fown_struct *fown); 1291 extern int send_sigurg(struct fown_struct *fown);
1292 1292
1293 /* 1293 /*
1294 * Umount options 1294 * Umount options
1295 */ 1295 */
1296 1296
1297 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ 1297 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
1298 #define MNT_DETACH 0x00000002 /* Just detach from the tree */ 1298 #define MNT_DETACH 0x00000002 /* Just detach from the tree */
1299 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ 1299 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */
1300 1300
1301 extern struct list_head super_blocks; 1301 extern struct list_head super_blocks;
1302 extern spinlock_t sb_lock; 1302 extern spinlock_t sb_lock;
1303 1303
1304 #define sb_entry(list) list_entry((list), struct super_block, s_list) 1304 #define sb_entry(list) list_entry((list), struct super_block, s_list)
1305 #define S_BIAS (1<<30) 1305 #define S_BIAS (1<<30)
1306 struct super_block { 1306 struct super_block {
1307 struct list_head s_list; /* Keep this first */ 1307 struct list_head s_list; /* Keep this first */
1308 dev_t s_dev; /* search index; _not_ kdev_t */ 1308 dev_t s_dev; /* search index; _not_ kdev_t */
1309 unsigned long s_blocksize; 1309 unsigned long s_blocksize;
1310 unsigned char s_blocksize_bits; 1310 unsigned char s_blocksize_bits;
1311 unsigned char s_dirt; 1311 unsigned char s_dirt;
1312 unsigned long long s_maxbytes; /* Max file size */ 1312 unsigned long long s_maxbytes; /* Max file size */
1313 struct file_system_type *s_type; 1313 struct file_system_type *s_type;
1314 const struct super_operations *s_op; 1314 const struct super_operations *s_op;
1315 struct dquot_operations *dq_op; 1315 struct dquot_operations *dq_op;
1316 struct quotactl_ops *s_qcop; 1316 struct quotactl_ops *s_qcop;
1317 const struct export_operations *s_export_op; 1317 const struct export_operations *s_export_op;
1318 unsigned long s_flags; 1318 unsigned long s_flags;
1319 unsigned long s_magic; 1319 unsigned long s_magic;
1320 struct dentry *s_root; 1320 struct dentry *s_root;
1321 struct rw_semaphore s_umount; 1321 struct rw_semaphore s_umount;
1322 struct mutex s_lock; 1322 struct mutex s_lock;
1323 int s_count; 1323 int s_count;
1324 int s_need_sync_fs; 1324 int s_need_sync;
1325 atomic_t s_active; 1325 atomic_t s_active;
1326 #ifdef CONFIG_SECURITY 1326 #ifdef CONFIG_SECURITY
1327 void *s_security; 1327 void *s_security;
1328 #endif 1328 #endif
1329 struct xattr_handler **s_xattr; 1329 struct xattr_handler **s_xattr;
1330 1330
1331 struct list_head s_inodes; /* all inodes */ 1331 struct list_head s_inodes; /* all inodes */
1332 struct list_head s_dirty; /* dirty inodes */ 1332 struct list_head s_dirty; /* dirty inodes */
1333 struct list_head s_io; /* parked for writeback */ 1333 struct list_head s_io; /* parked for writeback */
1334 struct list_head s_more_io; /* parked for more writeback */ 1334 struct list_head s_more_io; /* parked for more writeback */
1335 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1335 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
1336 struct list_head s_files; 1336 struct list_head s_files;
1337 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1337 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
1338 struct list_head s_dentry_lru; /* unused dentry lru */ 1338 struct list_head s_dentry_lru; /* unused dentry lru */
1339 int s_nr_dentry_unused; /* # of dentry on lru */ 1339 int s_nr_dentry_unused; /* # of dentry on lru */
1340 1340
1341 struct block_device *s_bdev; 1341 struct block_device *s_bdev;
1342 struct mtd_info *s_mtd; 1342 struct mtd_info *s_mtd;
1343 struct list_head s_instances; 1343 struct list_head s_instances;
1344 struct quota_info s_dquot; /* Diskquota specific options */ 1344 struct quota_info s_dquot; /* Diskquota specific options */
1345 1345
1346 int s_frozen; 1346 int s_frozen;
1347 wait_queue_head_t s_wait_unfrozen; 1347 wait_queue_head_t s_wait_unfrozen;
1348 1348
1349 char s_id[32]; /* Informational name */ 1349 char s_id[32]; /* Informational name */
1350 1350
1351 void *s_fs_info; /* Filesystem private info */ 1351 void *s_fs_info; /* Filesystem private info */
1352 fmode_t s_mode; 1352 fmode_t s_mode;
1353 1353
1354 /* 1354 /*
1355 * The next field is for VFS *only*. No filesystems have any business 1355 * The next field is for VFS *only*. No filesystems have any business
1356 * even looking at it. You had been warned. 1356 * even looking at it. You had been warned.
1357 */ 1357 */
1358 struct mutex s_vfs_rename_mutex; /* Kludge */ 1358 struct mutex s_vfs_rename_mutex; /* Kludge */
1359 1359
1360 /* Granularity of c/m/atime in ns. 1360 /* Granularity of c/m/atime in ns.
1361 Cannot be worse than a second */ 1361 Cannot be worse than a second */
1362 u32 s_time_gran; 1362 u32 s_time_gran;
1363 1363
1364 /* 1364 /*
1365 * Filesystem subtype. If non-empty the filesystem type field 1365 * Filesystem subtype. If non-empty the filesystem type field
1366 * in /proc/mounts will be "type.subtype" 1366 * in /proc/mounts will be "type.subtype"
1367 */ 1367 */
1368 char *s_subtype; 1368 char *s_subtype;
1369 1369
1370 /* 1370 /*
1371 * Saved mount options for lazy filesystems using 1371 * Saved mount options for lazy filesystems using
1372 * generic_show_options() 1372 * generic_show_options()
1373 */ 1373 */
1374 char *s_options; 1374 char *s_options;
1375 }; 1375 };
1376 1376
1377 extern struct timespec current_fs_time(struct super_block *sb); 1377 extern struct timespec current_fs_time(struct super_block *sb);
1378 1378
1379 /* 1379 /*
1380 * Snapshotting support. 1380 * Snapshotting support.
1381 */ 1381 */
1382 enum { 1382 enum {
1383 SB_UNFROZEN = 0, 1383 SB_UNFROZEN = 0,
1384 SB_FREEZE_WRITE = 1, 1384 SB_FREEZE_WRITE = 1,
1385 SB_FREEZE_TRANS = 2, 1385 SB_FREEZE_TRANS = 2,
1386 }; 1386 };
1387 1387
1388 #define vfs_check_frozen(sb, level) \ 1388 #define vfs_check_frozen(sb, level) \
1389 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1389 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
1390 1390
1391 #define get_fs_excl() atomic_inc(&current->fs_excl) 1391 #define get_fs_excl() atomic_inc(&current->fs_excl)
1392 #define put_fs_excl() atomic_dec(&current->fs_excl) 1392 #define put_fs_excl() atomic_dec(&current->fs_excl)
1393 #define has_fs_excl() atomic_read(&current->fs_excl) 1393 #define has_fs_excl() atomic_read(&current->fs_excl)
1394 1394
1395 #define is_owner_or_cap(inode) \ 1395 #define is_owner_or_cap(inode) \
1396 ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) 1396 ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER))
1397 1397
1398 /* not quite ready to be deprecated, but... */ 1398 /* not quite ready to be deprecated, but... */
1399 extern void lock_super(struct super_block *); 1399 extern void lock_super(struct super_block *);
1400 extern void unlock_super(struct super_block *); 1400 extern void unlock_super(struct super_block *);
1401 1401
1402 /* 1402 /*
1403 * VFS helper functions.. 1403 * VFS helper functions..
1404 */ 1404 */
1405 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); 1405 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
1406 extern int vfs_mkdir(struct inode *, struct dentry *, int); 1406 extern int vfs_mkdir(struct inode *, struct dentry *, int);
1407 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); 1407 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
1408 extern int vfs_symlink(struct inode *, struct dentry *, const char *); 1408 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
1409 extern int vfs_link(struct dentry *, struct inode *, struct dentry *); 1409 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
1410 extern int vfs_rmdir(struct inode *, struct dentry *); 1410 extern int vfs_rmdir(struct inode *, struct dentry *);
1411 extern int vfs_unlink(struct inode *, struct dentry *); 1411 extern int vfs_unlink(struct inode *, struct dentry *);
1412 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 1412 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
1413 1413
1414 /* 1414 /*
1415 * VFS dentry helper functions. 1415 * VFS dentry helper functions.
1416 */ 1416 */
1417 extern void dentry_unhash(struct dentry *dentry); 1417 extern void dentry_unhash(struct dentry *dentry);
1418 1418
1419 /* 1419 /*
1420 * VFS file helper functions. 1420 * VFS file helper functions.
1421 */ 1421 */
1422 extern int file_permission(struct file *, int); 1422 extern int file_permission(struct file *, int);
1423 1423
1424 /* 1424 /*
1425 * VFS FS_IOC_FIEMAP helper definitions. 1425 * VFS FS_IOC_FIEMAP helper definitions.
1426 */ 1426 */
1427 struct fiemap_extent_info { 1427 struct fiemap_extent_info {
1428 unsigned int fi_flags; /* Flags as passed from user */ 1428 unsigned int fi_flags; /* Flags as passed from user */
1429 unsigned int fi_extents_mapped; /* Number of mapped extents */ 1429 unsigned int fi_extents_mapped; /* Number of mapped extents */
1430 unsigned int fi_extents_max; /* Size of fiemap_extent array */ 1430 unsigned int fi_extents_max; /* Size of fiemap_extent array */
1431 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent 1431 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
1432 * array */ 1432 * array */
1433 }; 1433 };
1434 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, 1434 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
1435 u64 phys, u64 len, u32 flags); 1435 u64 phys, u64 len, u32 flags);
1436 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); 1436 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1437 1437
1438 /* 1438 /*
1439 * File types 1439 * File types
1440 * 1440 *
1441 * NOTE! These match bits 12..15 of stat.st_mode 1441 * NOTE! These match bits 12..15 of stat.st_mode
1442 * (ie "(i_mode >> 12) & 15"). 1442 * (ie "(i_mode >> 12) & 15").
1443 */ 1443 */
1444 #define DT_UNKNOWN 0 1444 #define DT_UNKNOWN 0
1445 #define DT_FIFO 1 1445 #define DT_FIFO 1
1446 #define DT_CHR 2 1446 #define DT_CHR 2
1447 #define DT_DIR 4 1447 #define DT_DIR 4
1448 #define DT_BLK 6 1448 #define DT_BLK 6
1449 #define DT_REG 8 1449 #define DT_REG 8
1450 #define DT_LNK 10 1450 #define DT_LNK 10
1451 #define DT_SOCK 12 1451 #define DT_SOCK 12
1452 #define DT_WHT 14 1452 #define DT_WHT 14
1453 1453
1454 #define OSYNC_METADATA (1<<0) 1454 #define OSYNC_METADATA (1<<0)
1455 #define OSYNC_DATA (1<<1) 1455 #define OSYNC_DATA (1<<1)
1456 #define OSYNC_INODE (1<<2) 1456 #define OSYNC_INODE (1<<2)
1457 int generic_osync_inode(struct inode *, struct address_space *, int); 1457 int generic_osync_inode(struct inode *, struct address_space *, int);
1458 1458
1459 /* 1459 /*
1460 * This is the "filldir" function type, used by readdir() to let 1460 * This is the "filldir" function type, used by readdir() to let
1461 * the kernel specify what kind of dirent layout it wants to have. 1461 * the kernel specify what kind of dirent layout it wants to have.
1462 * This allows the kernel to read directories into kernel space or 1462 * This allows the kernel to read directories into kernel space or
1463 * to have different dirent layouts depending on the binary type. 1463 * to have different dirent layouts depending on the binary type.
1464 */ 1464 */
1465 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); 1465 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
1466 struct block_device_operations; 1466 struct block_device_operations;
1467 1467
1468 /* These macros are for out of kernel modules to test that 1468 /* These macros are for out of kernel modules to test that
1469 * the kernel supports the unlocked_ioctl and compat_ioctl 1469 * the kernel supports the unlocked_ioctl and compat_ioctl
1470 * fields in struct file_operations. */ 1470 * fields in struct file_operations. */
1471 #define HAVE_COMPAT_IOCTL 1 1471 #define HAVE_COMPAT_IOCTL 1
1472 #define HAVE_UNLOCKED_IOCTL 1 1472 #define HAVE_UNLOCKED_IOCTL 1
1473 1473
1474 /* 1474 /*
1475 * NOTE: 1475 * NOTE:
1476 * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl 1476 * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl
1477 * can be called without the big kernel lock held in all filesystems. 1477 * can be called without the big kernel lock held in all filesystems.
1478 */ 1478 */
1479 struct file_operations { 1479 struct file_operations {
1480 struct module *owner; 1480 struct module *owner;
1481 loff_t (*llseek) (struct file *, loff_t, int); 1481 loff_t (*llseek) (struct file *, loff_t, int);
1482 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 1482 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
1483 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 1483 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
1484 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1484 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1485 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1485 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1486 int (*readdir) (struct file *, void *, filldir_t); 1486 int (*readdir) (struct file *, void *, filldir_t);
1487 unsigned int (*poll) (struct file *, struct poll_table_struct *); 1487 unsigned int (*poll) (struct file *, struct poll_table_struct *);
1488 int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); 1488 int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
1489 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 1489 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
1490 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 1490 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
1491 int (*mmap) (struct file *, struct vm_area_struct *); 1491 int (*mmap) (struct file *, struct vm_area_struct *);
1492 int (*open) (struct inode *, struct file *); 1492 int (*open) (struct inode *, struct file *);
1493 int (*flush) (struct file *, fl_owner_t id); 1493 int (*flush) (struct file *, fl_owner_t id);
1494 int (*release) (struct inode *, struct file *); 1494 int (*release) (struct inode *, struct file *);
1495 int (*fsync) (struct file *, struct dentry *, int datasync); 1495 int (*fsync) (struct file *, struct dentry *, int datasync);
1496 int (*aio_fsync) (struct kiocb *, int datasync); 1496 int (*aio_fsync) (struct kiocb *, int datasync);
1497 int (*fasync) (int, struct file *, int); 1497 int (*fasync) (int, struct file *, int);
1498 int (*lock) (struct file *, int, struct file_lock *); 1498 int (*lock) (struct file *, int, struct file_lock *);
1499 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); 1499 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
1500 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 1500 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1501 int (*check_flags)(int); 1501 int (*check_flags)(int);
1502 int (*flock) (struct file *, int, struct file_lock *); 1502 int (*flock) (struct file *, int, struct file_lock *);
1503 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); 1503 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
1504 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); 1504 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1505 int (*setlease)(struct file *, long, struct file_lock **); 1505 int (*setlease)(struct file *, long, struct file_lock **);
1506 }; 1506 };
1507 1507
1508 struct inode_operations { 1508 struct inode_operations {
1509 int (*create) (struct inode *,struct dentry *,int, struct nameidata *); 1509 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
1510 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); 1510 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
1511 int (*link) (struct dentry *,struct inode *,struct dentry *); 1511 int (*link) (struct dentry *,struct inode *,struct dentry *);
1512 int (*unlink) (struct inode *,struct dentry *); 1512 int (*unlink) (struct inode *,struct dentry *);
1513 int (*symlink) (struct inode *,struct dentry *,const char *); 1513 int (*symlink) (struct inode *,struct dentry *,const char *);
1514 int (*mkdir) (struct inode *,struct dentry *,int); 1514 int (*mkdir) (struct inode *,struct dentry *,int);
1515 int (*rmdir) (struct inode *,struct dentry *); 1515 int (*rmdir) (struct inode *,struct dentry *);
1516 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 1516 int (*mknod) (struct inode *,struct dentry *,int,dev_t);
1517 int (*rename) (struct inode *, struct dentry *, 1517 int (*rename) (struct inode *, struct dentry *,
1518 struct inode *, struct dentry *); 1518 struct inode *, struct dentry *);
1519 int (*readlink) (struct dentry *, char __user *,int); 1519 int (*readlink) (struct dentry *, char __user *,int);
1520 void * (*follow_link) (struct dentry *, struct nameidata *); 1520 void * (*follow_link) (struct dentry *, struct nameidata *);
1521 void (*put_link) (struct dentry *, struct nameidata *, void *); 1521 void (*put_link) (struct dentry *, struct nameidata *, void *);
1522 void (*truncate) (struct inode *); 1522 void (*truncate) (struct inode *);
1523 int (*permission) (struct inode *, int); 1523 int (*permission) (struct inode *, int);
1524 int (*setattr) (struct dentry *, struct iattr *); 1524 int (*setattr) (struct dentry *, struct iattr *);
1525 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 1525 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1526 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 1526 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
1527 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 1527 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1528 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1528 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1529 int (*removexattr) (struct dentry *, const char *); 1529 int (*removexattr) (struct dentry *, const char *);
1530 void (*truncate_range)(struct inode *, loff_t, loff_t); 1530 void (*truncate_range)(struct inode *, loff_t, loff_t);
1531 long (*fallocate)(struct inode *inode, int mode, loff_t offset, 1531 long (*fallocate)(struct inode *inode, int mode, loff_t offset,
1532 loff_t len); 1532 loff_t len);
1533 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1533 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1534 u64 len); 1534 u64 len);
1535 }; 1535 };
1536 1536
1537 struct seq_file; 1537 struct seq_file;
1538 1538
1539 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 1539 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
1540 unsigned long nr_segs, unsigned long fast_segs, 1540 unsigned long nr_segs, unsigned long fast_segs,
1541 struct iovec *fast_pointer, 1541 struct iovec *fast_pointer,
1542 struct iovec **ret_pointer); 1542 struct iovec **ret_pointer);
1543 1543
1544 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); 1544 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1545 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); 1545 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
1546 extern ssize_t vfs_readv(struct file *, const struct iovec __user *, 1546 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1547 unsigned long, loff_t *); 1547 unsigned long, loff_t *);
1548 extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1548 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1549 unsigned long, loff_t *); 1549 unsigned long, loff_t *);
1550 1550
1551 struct super_operations { 1551 struct super_operations {
1552 struct inode *(*alloc_inode)(struct super_block *sb); 1552 struct inode *(*alloc_inode)(struct super_block *sb);
1553 void (*destroy_inode)(struct inode *); 1553 void (*destroy_inode)(struct inode *);
1554 1554
1555 void (*dirty_inode) (struct inode *); 1555 void (*dirty_inode) (struct inode *);
1556 int (*write_inode) (struct inode *, int); 1556 int (*write_inode) (struct inode *, int);
1557 void (*drop_inode) (struct inode *); 1557 void (*drop_inode) (struct inode *);
1558 void (*delete_inode) (struct inode *); 1558 void (*delete_inode) (struct inode *);
1559 void (*put_super) (struct super_block *); 1559 void (*put_super) (struct super_block *);
1560 void (*write_super) (struct super_block *); 1560 void (*write_super) (struct super_block *);
1561 int (*sync_fs)(struct super_block *sb, int wait); 1561 int (*sync_fs)(struct super_block *sb, int wait);
1562 int (*freeze_fs) (struct super_block *); 1562 int (*freeze_fs) (struct super_block *);
1563 int (*unfreeze_fs) (struct super_block *); 1563 int (*unfreeze_fs) (struct super_block *);
1564 int (*statfs) (struct dentry *, struct kstatfs *); 1564 int (*statfs) (struct dentry *, struct kstatfs *);
1565 int (*remount_fs) (struct super_block *, int *, char *); 1565 int (*remount_fs) (struct super_block *, int *, char *);
1566 void (*clear_inode) (struct inode *); 1566 void (*clear_inode) (struct inode *);
1567 void (*umount_begin) (struct super_block *); 1567 void (*umount_begin) (struct super_block *);
1568 1568
1569 int (*show_options)(struct seq_file *, struct vfsmount *); 1569 int (*show_options)(struct seq_file *, struct vfsmount *);
1570 int (*show_stats)(struct seq_file *, struct vfsmount *); 1570 int (*show_stats)(struct seq_file *, struct vfsmount *);
1571 #ifdef CONFIG_QUOTA 1571 #ifdef CONFIG_QUOTA
1572 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 1572 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1573 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1573 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1574 #endif 1574 #endif
1575 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1575 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1576 }; 1576 };
1577 1577
1578 /* 1578 /*
1579 * Inode state bits. Protected by inode_lock. 1579 * Inode state bits. Protected by inode_lock.
1580 * 1580 *
1581 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1581 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
1582 * I_DIRTY_DATASYNC and I_DIRTY_PAGES. 1582 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
1583 * 1583 *
1584 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, 1584 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
1585 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 1585 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
1586 * various stages of removing an inode. 1586 * various stages of removing an inode.
1587 * 1587 *
1588 * Two bits are used for locking and completion notification, I_LOCK and I_SYNC. 1588 * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
1589 * 1589 *
1590 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 1590 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
1591 * fdatasync(). i_atime is the usual cause. 1591 * fdatasync(). i_atime is the usual cause.
1592 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of 1592 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
1593 * these changes separately from I_DIRTY_SYNC so that we 1593 * these changes separately from I_DIRTY_SYNC so that we
1594 * don't have to write inode on fdatasync() when only 1594 * don't have to write inode on fdatasync() when only
1595 * mtime has changed in it. 1595 * mtime has changed in it.
1596 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 1596 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
1597 * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both 1597 * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both
1598 * are cleared by unlock_new_inode(), called from iget(). 1598 * are cleared by unlock_new_inode(), called from iget().
1599 * I_WILL_FREE Must be set when calling write_inode_now() if i_count 1599 * I_WILL_FREE Must be set when calling write_inode_now() if i_count
1600 * is zero. I_FREEING must be set when I_WILL_FREE is 1600 * is zero. I_FREEING must be set when I_WILL_FREE is
1601 * cleared. 1601 * cleared.
1602 * I_FREEING Set when inode is about to be freed but still has dirty 1602 * I_FREEING Set when inode is about to be freed but still has dirty
1603 * pages or buffers attached or the inode itself is still 1603 * pages or buffers attached or the inode itself is still
1604 * dirty. 1604 * dirty.
1605 * I_CLEAR Set by clear_inode(). In this state the inode is clean 1605 * I_CLEAR Set by clear_inode(). In this state the inode is clean
1606 * and can be destroyed. 1606 * and can be destroyed.
1607 * 1607 *
1608 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are 1608 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
1609 * prohibited for many purposes. iget() must wait for 1609 * prohibited for many purposes. iget() must wait for
1610 * the inode to be completely released, then create it 1610 * the inode to be completely released, then create it
1611 * anew. Other functions will just ignore such inodes, 1611 * anew. Other functions will just ignore such inodes,
1612 * if appropriate. I_LOCK is used for waiting. 1612 * if appropriate. I_LOCK is used for waiting.
1613 * 1613 *
1614 * I_LOCK Serves as both a mutex and completion notification. 1614 * I_LOCK Serves as both a mutex and completion notification.
1615 * New inodes set I_LOCK. If two processes both create 1615 * New inodes set I_LOCK. If two processes both create
1616 * the same inode, one of them will release its inode and 1616 * the same inode, one of them will release its inode and
1617 * wait for I_LOCK to be released before returning. 1617 * wait for I_LOCK to be released before returning.
1618 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can 1618 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
1619 * also cause waiting on I_LOCK, without I_LOCK actually 1619 * also cause waiting on I_LOCK, without I_LOCK actually
1620 * being set. find_inode() uses this to prevent returning 1620 * being set. find_inode() uses this to prevent returning
1621 * nearly-dead inodes. 1621 * nearly-dead inodes.
1622 * I_SYNC Similar to I_LOCK, but limited in scope to writeback 1622 * I_SYNC Similar to I_LOCK, but limited in scope to writeback
1623 * of inode dirty data. Having a separate lock for this 1623 * of inode dirty data. Having a separate lock for this
1624 * purpose reduces latency and prevents some filesystem- 1624 * purpose reduces latency and prevents some filesystem-
1625 * specific deadlocks. 1625 * specific deadlocks.
1626 * 1626 *
1627 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1627 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1628 * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on 1628 * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on
1629 * I_CLEAR? If not, why? 1629 * I_CLEAR? If not, why?
1630 */ 1630 */
1631 #define I_DIRTY_SYNC 1 1631 #define I_DIRTY_SYNC 1
1632 #define I_DIRTY_DATASYNC 2 1632 #define I_DIRTY_DATASYNC 2
1633 #define I_DIRTY_PAGES 4 1633 #define I_DIRTY_PAGES 4
1634 #define I_NEW 8 1634 #define I_NEW 8
1635 #define I_WILL_FREE 16 1635 #define I_WILL_FREE 16
1636 #define I_FREEING 32 1636 #define I_FREEING 32
1637 #define I_CLEAR 64 1637 #define I_CLEAR 64
1638 #define __I_LOCK 7 1638 #define __I_LOCK 7
1639 #define I_LOCK (1 << __I_LOCK) 1639 #define I_LOCK (1 << __I_LOCK)
1640 #define __I_SYNC 8 1640 #define __I_SYNC 8
1641 #define I_SYNC (1 << __I_SYNC) 1641 #define I_SYNC (1 << __I_SYNC)
1642 1642
1643 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1643 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1644 1644
1645 extern void __mark_inode_dirty(struct inode *, int); 1645 extern void __mark_inode_dirty(struct inode *, int);
1646 static inline void mark_inode_dirty(struct inode *inode) 1646 static inline void mark_inode_dirty(struct inode *inode)
1647 { 1647 {
1648 __mark_inode_dirty(inode, I_DIRTY); 1648 __mark_inode_dirty(inode, I_DIRTY);
1649 } 1649 }
1650 1650
1651 static inline void mark_inode_dirty_sync(struct inode *inode) 1651 static inline void mark_inode_dirty_sync(struct inode *inode)
1652 { 1652 {
1653 __mark_inode_dirty(inode, I_DIRTY_SYNC); 1653 __mark_inode_dirty(inode, I_DIRTY_SYNC);
1654 } 1654 }
1655 1655
1656 /** 1656 /**
1657 * inc_nlink - directly increment an inode's link count 1657 * inc_nlink - directly increment an inode's link count
1658 * @inode: inode 1658 * @inode: inode
1659 * 1659 *
1660 * This is a low-level filesystem helper to replace any 1660 * This is a low-level filesystem helper to replace any
1661 * direct filesystem manipulation of i_nlink. Currently, 1661 * direct filesystem manipulation of i_nlink. Currently,
1662 * it is only here for parity with dec_nlink(). 1662 * it is only here for parity with dec_nlink().
1663 */ 1663 */
1664 static inline void inc_nlink(struct inode *inode) 1664 static inline void inc_nlink(struct inode *inode)
1665 { 1665 {
1666 inode->i_nlink++; 1666 inode->i_nlink++;
1667 } 1667 }
1668 1668
1669 static inline void inode_inc_link_count(struct inode *inode) 1669 static inline void inode_inc_link_count(struct inode *inode)
1670 { 1670 {
1671 inc_nlink(inode); 1671 inc_nlink(inode);
1672 mark_inode_dirty(inode); 1672 mark_inode_dirty(inode);
1673 } 1673 }
1674 1674
1675 /** 1675 /**
1676 * drop_nlink - directly drop an inode's link count 1676 * drop_nlink - directly drop an inode's link count
1677 * @inode: inode 1677 * @inode: inode
1678 * 1678 *
1679 * This is a low-level filesystem helper to replace any 1679 * This is a low-level filesystem helper to replace any
1680 * direct filesystem manipulation of i_nlink. In cases 1680 * direct filesystem manipulation of i_nlink. In cases
1681 * where we are attempting to track writes to the 1681 * where we are attempting to track writes to the
1682 * filesystem, a decrement to zero means an imminent 1682 * filesystem, a decrement to zero means an imminent
1683 * write when the file is truncated and actually unlinked 1683 * write when the file is truncated and actually unlinked
1684 * on the filesystem. 1684 * on the filesystem.
1685 */ 1685 */
1686 static inline void drop_nlink(struct inode *inode) 1686 static inline void drop_nlink(struct inode *inode)
1687 { 1687 {
1688 inode->i_nlink--; 1688 inode->i_nlink--;
1689 } 1689 }
1690 1690
1691 /** 1691 /**
1692 * clear_nlink - directly zero an inode's link count 1692 * clear_nlink - directly zero an inode's link count
1693 * @inode: inode 1693 * @inode: inode
1694 * 1694 *
1695 * This is a low-level filesystem helper to replace any 1695 * This is a low-level filesystem helper to replace any
1696 * direct filesystem manipulation of i_nlink. See 1696 * direct filesystem manipulation of i_nlink. See
1697 * drop_nlink() for why we care about i_nlink hitting zero. 1697 * drop_nlink() for why we care about i_nlink hitting zero.
1698 */ 1698 */
1699 static inline void clear_nlink(struct inode *inode) 1699 static inline void clear_nlink(struct inode *inode)
1700 { 1700 {
1701 inode->i_nlink = 0; 1701 inode->i_nlink = 0;
1702 } 1702 }
1703 1703
1704 static inline void inode_dec_link_count(struct inode *inode) 1704 static inline void inode_dec_link_count(struct inode *inode)
1705 { 1705 {
1706 drop_nlink(inode); 1706 drop_nlink(inode);
1707 mark_inode_dirty(inode); 1707 mark_inode_dirty(inode);
1708 } 1708 }
1709 1709
1710 /** 1710 /**
1711 * inode_inc_iversion - increments i_version 1711 * inode_inc_iversion - increments i_version
1712 * @inode: inode that need to be updated 1712 * @inode: inode that need to be updated
1713 * 1713 *
1714 * Every time the inode is modified, the i_version field will be incremented. 1714 * Every time the inode is modified, the i_version field will be incremented.
1715 * The filesystem has to be mounted with i_version flag 1715 * The filesystem has to be mounted with i_version flag
1716 */ 1716 */
1717 1717
1718 static inline void inode_inc_iversion(struct inode *inode) 1718 static inline void inode_inc_iversion(struct inode *inode)
1719 { 1719 {
1720 spin_lock(&inode->i_lock); 1720 spin_lock(&inode->i_lock);
1721 inode->i_version++; 1721 inode->i_version++;
1722 spin_unlock(&inode->i_lock); 1722 spin_unlock(&inode->i_lock);
1723 } 1723 }
1724 1724
1725 extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); 1725 extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry);
1726 static inline void file_accessed(struct file *file) 1726 static inline void file_accessed(struct file *file)
1727 { 1727 {
1728 if (!(file->f_flags & O_NOATIME)) 1728 if (!(file->f_flags & O_NOATIME))
1729 touch_atime(file->f_path.mnt, file->f_path.dentry); 1729 touch_atime(file->f_path.mnt, file->f_path.dentry);
1730 } 1730 }
1731 1731
1732 int sync_inode(struct inode *inode, struct writeback_control *wbc); 1732 int sync_inode(struct inode *inode, struct writeback_control *wbc);
1733 1733
1734 struct file_system_type { 1734 struct file_system_type {
1735 const char *name; 1735 const char *name;
1736 int fs_flags; 1736 int fs_flags;
1737 int (*get_sb) (struct file_system_type *, int, 1737 int (*get_sb) (struct file_system_type *, int,
1738 const char *, void *, struct vfsmount *); 1738 const char *, void *, struct vfsmount *);
1739 void (*kill_sb) (struct super_block *); 1739 void (*kill_sb) (struct super_block *);
1740 struct module *owner; 1740 struct module *owner;
1741 struct file_system_type * next; 1741 struct file_system_type * next;
1742 struct list_head fs_supers; 1742 struct list_head fs_supers;
1743 1743
1744 struct lock_class_key s_lock_key; 1744 struct lock_class_key s_lock_key;
1745 struct lock_class_key s_umount_key; 1745 struct lock_class_key s_umount_key;
1746 1746
1747 struct lock_class_key i_lock_key; 1747 struct lock_class_key i_lock_key;
1748 struct lock_class_key i_mutex_key; 1748 struct lock_class_key i_mutex_key;
1749 struct lock_class_key i_mutex_dir_key; 1749 struct lock_class_key i_mutex_dir_key;
1750 struct lock_class_key i_alloc_sem_key; 1750 struct lock_class_key i_alloc_sem_key;
1751 }; 1751 };
1752 1752
1753 extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 1753 extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
1754 int (*fill_super)(struct super_block *, void *, int), 1754 int (*fill_super)(struct super_block *, void *, int),
1755 struct vfsmount *mnt); 1755 struct vfsmount *mnt);
1756 extern int get_sb_bdev(struct file_system_type *fs_type, 1756 extern int get_sb_bdev(struct file_system_type *fs_type,
1757 int flags, const char *dev_name, void *data, 1757 int flags, const char *dev_name, void *data,
1758 int (*fill_super)(struct super_block *, void *, int), 1758 int (*fill_super)(struct super_block *, void *, int),
1759 struct vfsmount *mnt); 1759 struct vfsmount *mnt);
1760 extern int get_sb_single(struct file_system_type *fs_type, 1760 extern int get_sb_single(struct file_system_type *fs_type,
1761 int flags, void *data, 1761 int flags, void *data,
1762 int (*fill_super)(struct super_block *, void *, int), 1762 int (*fill_super)(struct super_block *, void *, int),
1763 struct vfsmount *mnt); 1763 struct vfsmount *mnt);
1764 extern int get_sb_nodev(struct file_system_type *fs_type, 1764 extern int get_sb_nodev(struct file_system_type *fs_type,
1765 int flags, void *data, 1765 int flags, void *data,
1766 int (*fill_super)(struct super_block *, void *, int), 1766 int (*fill_super)(struct super_block *, void *, int),
1767 struct vfsmount *mnt); 1767 struct vfsmount *mnt);
1768 void generic_shutdown_super(struct super_block *sb); 1768 void generic_shutdown_super(struct super_block *sb);
1769 void kill_block_super(struct super_block *sb); 1769 void kill_block_super(struct super_block *sb);
1770 void kill_anon_super(struct super_block *sb); 1770 void kill_anon_super(struct super_block *sb);
1771 void kill_litter_super(struct super_block *sb); 1771 void kill_litter_super(struct super_block *sb);
1772 void deactivate_super(struct super_block *sb); 1772 void deactivate_super(struct super_block *sb);
1773 void deactivate_locked_super(struct super_block *sb); 1773 void deactivate_locked_super(struct super_block *sb);
1774 int set_anon_super(struct super_block *s, void *data); 1774 int set_anon_super(struct super_block *s, void *data);
1775 struct super_block *sget(struct file_system_type *type, 1775 struct super_block *sget(struct file_system_type *type,
1776 int (*test)(struct super_block *,void *), 1776 int (*test)(struct super_block *,void *),
1777 int (*set)(struct super_block *,void *), 1777 int (*set)(struct super_block *,void *),
1778 void *data); 1778 void *data);
1779 extern int get_sb_pseudo(struct file_system_type *, char *, 1779 extern int get_sb_pseudo(struct file_system_type *, char *,
1780 const struct super_operations *ops, unsigned long, 1780 const struct super_operations *ops, unsigned long,
1781 struct vfsmount *mnt); 1781 struct vfsmount *mnt);
1782 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); 1782 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
1783 int __put_super_and_need_restart(struct super_block *sb); 1783 int __put_super_and_need_restart(struct super_block *sb);
1784 1784
1785 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1785 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
1786 #define fops_get(fops) \ 1786 #define fops_get(fops) \
1787 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) 1787 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
1788 #define fops_put(fops) \ 1788 #define fops_put(fops) \
1789 do { if (fops) module_put((fops)->owner); } while(0) 1789 do { if (fops) module_put((fops)->owner); } while(0)
1790 1790
1791 extern int register_filesystem(struct file_system_type *); 1791 extern int register_filesystem(struct file_system_type *);
1792 extern int unregister_filesystem(struct file_system_type *); 1792 extern int unregister_filesystem(struct file_system_type *);
1793 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); 1793 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
1794 #define kern_mount(type) kern_mount_data(type, NULL) 1794 #define kern_mount(type) kern_mount_data(type, NULL)
1795 extern int may_umount_tree(struct vfsmount *); 1795 extern int may_umount_tree(struct vfsmount *);
1796 extern int may_umount(struct vfsmount *); 1796 extern int may_umount(struct vfsmount *);
1797 extern long do_mount(char *, char *, char *, unsigned long, void *); 1797 extern long do_mount(char *, char *, char *, unsigned long, void *);
1798 extern struct vfsmount *collect_mounts(struct path *); 1798 extern struct vfsmount *collect_mounts(struct path *);
1799 extern void drop_collected_mounts(struct vfsmount *); 1799 extern void drop_collected_mounts(struct vfsmount *);
1800 1800
1801 extern int vfs_statfs(struct dentry *, struct kstatfs *); 1801 extern int vfs_statfs(struct dentry *, struct kstatfs *);
1802 1802
1803 extern int current_umask(void); 1803 extern int current_umask(void);
1804 1804
1805 /* /sys/fs */ 1805 /* /sys/fs */
1806 extern struct kobject *fs_kobj; 1806 extern struct kobject *fs_kobj;
1807 1807
1808 extern int rw_verify_area(int, struct file *, loff_t *, size_t); 1808 extern int rw_verify_area(int, struct file *, loff_t *, size_t);
1809 1809
1810 #define FLOCK_VERIFY_READ 1 1810 #define FLOCK_VERIFY_READ 1
1811 #define FLOCK_VERIFY_WRITE 2 1811 #define FLOCK_VERIFY_WRITE 2
1812 1812
1813 #ifdef CONFIG_FILE_LOCKING 1813 #ifdef CONFIG_FILE_LOCKING
1814 extern int locks_mandatory_locked(struct inode *); 1814 extern int locks_mandatory_locked(struct inode *);
1815 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 1815 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
1816 1816
1817 /* 1817 /*
1818 * Candidates for mandatory locking have the setgid bit set 1818 * Candidates for mandatory locking have the setgid bit set
1819 * but no group execute bit - an otherwise meaningless combination. 1819 * but no group execute bit - an otherwise meaningless combination.
1820 */ 1820 */
1821 1821
1822 static inline int __mandatory_lock(struct inode *ino) 1822 static inline int __mandatory_lock(struct inode *ino)
1823 { 1823 {
1824 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; 1824 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
1825 } 1825 }
1826 1826
1827 /* 1827 /*
1828 * ... and these candidates should be on MS_MANDLOCK mounted fs, 1828 * ... and these candidates should be on MS_MANDLOCK mounted fs,
1829 * otherwise these will be advisory locks 1829 * otherwise these will be advisory locks
1830 */ 1830 */
1831 1831
1832 static inline int mandatory_lock(struct inode *ino) 1832 static inline int mandatory_lock(struct inode *ino)
1833 { 1833 {
1834 return IS_MANDLOCK(ino) && __mandatory_lock(ino); 1834 return IS_MANDLOCK(ino) && __mandatory_lock(ino);
1835 } 1835 }
1836 1836
1837 static inline int locks_verify_locked(struct inode *inode) 1837 static inline int locks_verify_locked(struct inode *inode)
1838 { 1838 {
1839 if (mandatory_lock(inode)) 1839 if (mandatory_lock(inode))
1840 return locks_mandatory_locked(inode); 1840 return locks_mandatory_locked(inode);
1841 return 0; 1841 return 0;
1842 } 1842 }
1843 1843
1844 static inline int locks_verify_truncate(struct inode *inode, 1844 static inline int locks_verify_truncate(struct inode *inode,
1845 struct file *filp, 1845 struct file *filp,
1846 loff_t size) 1846 loff_t size)
1847 { 1847 {
1848 if (inode->i_flock && mandatory_lock(inode)) 1848 if (inode->i_flock && mandatory_lock(inode))
1849 return locks_mandatory_area( 1849 return locks_mandatory_area(
1850 FLOCK_VERIFY_WRITE, inode, filp, 1850 FLOCK_VERIFY_WRITE, inode, filp,
1851 size < inode->i_size ? size : inode->i_size, 1851 size < inode->i_size ? size : inode->i_size,
1852 (size < inode->i_size ? inode->i_size - size 1852 (size < inode->i_size ? inode->i_size - size
1853 : size - inode->i_size) 1853 : size - inode->i_size)
1854 ); 1854 );
1855 return 0; 1855 return 0;
1856 } 1856 }
1857 1857
1858 static inline int break_lease(struct inode *inode, unsigned int mode) 1858 static inline int break_lease(struct inode *inode, unsigned int mode)
1859 { 1859 {
1860 if (inode->i_flock) 1860 if (inode->i_flock)
1861 return __break_lease(inode, mode); 1861 return __break_lease(inode, mode);
1862 return 0; 1862 return 0;
1863 } 1863 }
1864 #else /* !CONFIG_FILE_LOCKING */ 1864 #else /* !CONFIG_FILE_LOCKING */
1865 static inline int locks_mandatory_locked(struct inode *inode) 1865 static inline int locks_mandatory_locked(struct inode *inode)
1866 { 1866 {
1867 return 0; 1867 return 0;
1868 } 1868 }
1869 1869
1870 static inline int locks_mandatory_area(int rw, struct inode *inode, 1870 static inline int locks_mandatory_area(int rw, struct inode *inode,
1871 struct file *filp, loff_t offset, 1871 struct file *filp, loff_t offset,
1872 size_t count) 1872 size_t count)
1873 { 1873 {
1874 return 0; 1874 return 0;
1875 } 1875 }
1876 1876
1877 static inline int __mandatory_lock(struct inode *inode) 1877 static inline int __mandatory_lock(struct inode *inode)
1878 { 1878 {
1879 return 0; 1879 return 0;
1880 } 1880 }
1881 1881
1882 static inline int mandatory_lock(struct inode *inode) 1882 static inline int mandatory_lock(struct inode *inode)
1883 { 1883 {
1884 return 0; 1884 return 0;
1885 } 1885 }
1886 1886
1887 static inline int locks_verify_locked(struct inode *inode) 1887 static inline int locks_verify_locked(struct inode *inode)
1888 { 1888 {
1889 return 0; 1889 return 0;
1890 } 1890 }
1891 1891
1892 static inline int locks_verify_truncate(struct inode *inode, struct file *filp, 1892 static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
1893 size_t size) 1893 size_t size)
1894 { 1894 {
1895 return 0; 1895 return 0;
1896 } 1896 }
1897 1897
1898 static inline int break_lease(struct inode *inode, unsigned int mode) 1898 static inline int break_lease(struct inode *inode, unsigned int mode)
1899 { 1899 {
1900 return 0; 1900 return 0;
1901 } 1901 }
1902 1902
1903 #endif /* CONFIG_FILE_LOCKING */ 1903 #endif /* CONFIG_FILE_LOCKING */
1904 1904
1905 /* fs/open.c */ 1905 /* fs/open.c */
1906 1906
1907 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, 1907 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
1908 struct file *filp); 1908 struct file *filp);
1909 extern long do_sys_open(int dfd, const char __user *filename, int flags, 1909 extern long do_sys_open(int dfd, const char __user *filename, int flags,
1910 int mode); 1910 int mode);
1911 extern struct file *filp_open(const char *, int, int); 1911 extern struct file *filp_open(const char *, int, int);
1912 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 1912 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1913 const struct cred *); 1913 const struct cred *);
1914 extern int filp_close(struct file *, fl_owner_t id); 1914 extern int filp_close(struct file *, fl_owner_t id);
1915 extern char * getname(const char __user *); 1915 extern char * getname(const char __user *);
1916 1916
1917 /* fs/dcache.c */ 1917 /* fs/dcache.c */
1918 extern void __init vfs_caches_init_early(void); 1918 extern void __init vfs_caches_init_early(void);
1919 extern void __init vfs_caches_init(unsigned long); 1919 extern void __init vfs_caches_init(unsigned long);
1920 1920
1921 extern struct kmem_cache *names_cachep; 1921 extern struct kmem_cache *names_cachep;
1922 1922
1923 #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) 1923 #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
1924 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) 1924 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
1925 #ifndef CONFIG_AUDITSYSCALL 1925 #ifndef CONFIG_AUDITSYSCALL
1926 #define putname(name) __putname(name) 1926 #define putname(name) __putname(name)
1927 #else 1927 #else
1928 extern void putname(const char *name); 1928 extern void putname(const char *name);
1929 #endif 1929 #endif
1930 1930
1931 #ifdef CONFIG_BLOCK 1931 #ifdef CONFIG_BLOCK
1932 extern int register_blkdev(unsigned int, const char *); 1932 extern int register_blkdev(unsigned int, const char *);
1933 extern void unregister_blkdev(unsigned int, const char *); 1933 extern void unregister_blkdev(unsigned int, const char *);
1934 extern struct block_device *bdget(dev_t); 1934 extern struct block_device *bdget(dev_t);
1935 extern void bd_set_size(struct block_device *, loff_t size); 1935 extern void bd_set_size(struct block_device *, loff_t size);
1936 extern void bd_forget(struct inode *inode); 1936 extern void bd_forget(struct inode *inode);
1937 extern void bdput(struct block_device *); 1937 extern void bdput(struct block_device *);
1938 extern struct block_device *open_by_devnum(dev_t, fmode_t); 1938 extern struct block_device *open_by_devnum(dev_t, fmode_t);
1939 extern void invalidate_bdev(struct block_device *); 1939 extern void invalidate_bdev(struct block_device *);
1940 extern int sync_blockdev(struct block_device *bdev); 1940 extern int sync_blockdev(struct block_device *bdev);
1941 extern struct super_block *freeze_bdev(struct block_device *); 1941 extern struct super_block *freeze_bdev(struct block_device *);
1942 extern void emergency_thaw_all(void); 1942 extern void emergency_thaw_all(void);
1943 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); 1943 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
1944 extern int fsync_bdev(struct block_device *); 1944 extern int fsync_bdev(struct block_device *);
1945 extern int fsync_super(struct super_block *); 1945 extern int fsync_super(struct super_block *);
1946 extern int fsync_no_super(struct block_device *); 1946 extern int fsync_no_super(struct block_device *);
1947 #else 1947 #else
1948 static inline void bd_forget(struct inode *inode) {} 1948 static inline void bd_forget(struct inode *inode) {}
1949 static inline int sync_blockdev(struct block_device *bdev) { return 0; } 1949 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
1950 static inline void invalidate_bdev(struct block_device *bdev) {} 1950 static inline void invalidate_bdev(struct block_device *bdev) {}
1951 1951
1952 static inline struct super_block *freeze_bdev(struct block_device *sb) 1952 static inline struct super_block *freeze_bdev(struct block_device *sb)
1953 { 1953 {
1954 return NULL; 1954 return NULL;
1955 } 1955 }
1956 1956
1957 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) 1957 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
1958 { 1958 {
1959 return 0; 1959 return 0;
1960 } 1960 }
1961 #endif 1961 #endif
1962 extern const struct file_operations def_blk_fops; 1962 extern const struct file_operations def_blk_fops;
1963 extern const struct file_operations def_chr_fops; 1963 extern const struct file_operations def_chr_fops;
1964 extern const struct file_operations bad_sock_fops; 1964 extern const struct file_operations bad_sock_fops;
1965 extern const struct file_operations def_fifo_fops; 1965 extern const struct file_operations def_fifo_fops;
1966 #ifdef CONFIG_BLOCK 1966 #ifdef CONFIG_BLOCK
1967 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 1967 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
1968 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 1968 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
1969 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 1969 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
1970 extern int blkdev_get(struct block_device *, fmode_t); 1970 extern int blkdev_get(struct block_device *, fmode_t);
1971 extern int blkdev_put(struct block_device *, fmode_t); 1971 extern int blkdev_put(struct block_device *, fmode_t);
1972 extern int bd_claim(struct block_device *, void *); 1972 extern int bd_claim(struct block_device *, void *);
1973 extern void bd_release(struct block_device *); 1973 extern void bd_release(struct block_device *);
1974 #ifdef CONFIG_SYSFS 1974 #ifdef CONFIG_SYSFS
1975 extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); 1975 extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
1976 extern void bd_release_from_disk(struct block_device *, struct gendisk *); 1976 extern void bd_release_from_disk(struct block_device *, struct gendisk *);
1977 #else 1977 #else
1978 #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) 1978 #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
1979 #define bd_release_from_disk(bdev, disk) bd_release(bdev) 1979 #define bd_release_from_disk(bdev, disk) bd_release(bdev)
1980 #endif 1980 #endif
1981 #endif 1981 #endif
1982 1982
1983 /* fs/char_dev.c */ 1983 /* fs/char_dev.c */
1984 #define CHRDEV_MAJOR_HASH_SIZE 255 1984 #define CHRDEV_MAJOR_HASH_SIZE 255
1985 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); 1985 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
1986 extern int register_chrdev_region(dev_t, unsigned, const char *); 1986 extern int register_chrdev_region(dev_t, unsigned, const char *);
1987 extern int register_chrdev(unsigned int, const char *, 1987 extern int register_chrdev(unsigned int, const char *,
1988 const struct file_operations *); 1988 const struct file_operations *);
1989 extern void unregister_chrdev(unsigned int, const char *); 1989 extern void unregister_chrdev(unsigned int, const char *);
1990 extern void unregister_chrdev_region(dev_t, unsigned); 1990 extern void unregister_chrdev_region(dev_t, unsigned);
1991 extern void chrdev_show(struct seq_file *,off_t); 1991 extern void chrdev_show(struct seq_file *,off_t);
1992 1992
1993 /* fs/block_dev.c */ 1993 /* fs/block_dev.c */
1994 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 1994 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
1995 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 1995 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
1996 1996
1997 #ifdef CONFIG_BLOCK 1997 #ifdef CONFIG_BLOCK
1998 #define BLKDEV_MAJOR_HASH_SIZE 255 1998 #define BLKDEV_MAJOR_HASH_SIZE 255
1999 extern const char *__bdevname(dev_t, char *buffer); 1999 extern const char *__bdevname(dev_t, char *buffer);
2000 extern const char *bdevname(struct block_device *bdev, char *buffer); 2000 extern const char *bdevname(struct block_device *bdev, char *buffer);
2001 extern struct block_device *lookup_bdev(const char *); 2001 extern struct block_device *lookup_bdev(const char *);
2002 extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); 2002 extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
2003 extern void close_bdev_exclusive(struct block_device *, fmode_t); 2003 extern void close_bdev_exclusive(struct block_device *, fmode_t);
2004 extern void blkdev_show(struct seq_file *,off_t); 2004 extern void blkdev_show(struct seq_file *,off_t);
2005 2005
2006 #else 2006 #else
2007 #define BLKDEV_MAJOR_HASH_SIZE 0 2007 #define BLKDEV_MAJOR_HASH_SIZE 0
2008 #endif 2008 #endif
2009 2009
2010 extern void init_special_inode(struct inode *, umode_t, dev_t); 2010 extern void init_special_inode(struct inode *, umode_t, dev_t);
2011 2011
2012 /* Invalid inode operations -- fs/bad_inode.c */ 2012 /* Invalid inode operations -- fs/bad_inode.c */
2013 extern void make_bad_inode(struct inode *); 2013 extern void make_bad_inode(struct inode *);
2014 extern int is_bad_inode(struct inode *); 2014 extern int is_bad_inode(struct inode *);
2015 2015
2016 extern const struct file_operations read_pipefifo_fops; 2016 extern const struct file_operations read_pipefifo_fops;
2017 extern const struct file_operations write_pipefifo_fops; 2017 extern const struct file_operations write_pipefifo_fops;
2018 extern const struct file_operations rdwr_pipefifo_fops; 2018 extern const struct file_operations rdwr_pipefifo_fops;
2019 2019
2020 extern int fs_may_remount_ro(struct super_block *); 2020 extern int fs_may_remount_ro(struct super_block *);
2021 2021
2022 #ifdef CONFIG_BLOCK 2022 #ifdef CONFIG_BLOCK
2023 /* 2023 /*
2024 * return READ, READA, or WRITE 2024 * return READ, READA, or WRITE
2025 */ 2025 */
2026 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) 2026 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK))
2027 2027
2028 /* 2028 /*
2029 * return data direction, READ or WRITE 2029 * return data direction, READ or WRITE
2030 */ 2030 */
2031 #define bio_data_dir(bio) ((bio)->bi_rw & 1) 2031 #define bio_data_dir(bio) ((bio)->bi_rw & 1)
2032 2032
2033 extern void check_disk_size_change(struct gendisk *disk, 2033 extern void check_disk_size_change(struct gendisk *disk,
2034 struct block_device *bdev); 2034 struct block_device *bdev);
2035 extern int revalidate_disk(struct gendisk *); 2035 extern int revalidate_disk(struct gendisk *);
2036 extern int check_disk_change(struct block_device *); 2036 extern int check_disk_change(struct block_device *);
2037 extern int __invalidate_device(struct block_device *); 2037 extern int __invalidate_device(struct block_device *);
2038 extern int invalidate_partition(struct gendisk *, int); 2038 extern int invalidate_partition(struct gendisk *, int);
2039 #endif 2039 #endif
2040 extern int invalidate_inodes(struct super_block *); 2040 extern int invalidate_inodes(struct super_block *);
2041 unsigned long __invalidate_mapping_pages(struct address_space *mapping, 2041 unsigned long __invalidate_mapping_pages(struct address_space *mapping,
2042 pgoff_t start, pgoff_t end, 2042 pgoff_t start, pgoff_t end,
2043 bool be_atomic); 2043 bool be_atomic);
2044 unsigned long invalidate_mapping_pages(struct address_space *mapping, 2044 unsigned long invalidate_mapping_pages(struct address_space *mapping,
2045 pgoff_t start, pgoff_t end); 2045 pgoff_t start, pgoff_t end);
2046 2046
2047 static inline unsigned long __deprecated 2047 static inline unsigned long __deprecated
2048 invalidate_inode_pages(struct address_space *mapping) 2048 invalidate_inode_pages(struct address_space *mapping)
2049 { 2049 {
2050 return invalidate_mapping_pages(mapping, 0, ~0UL); 2050 return invalidate_mapping_pages(mapping, 0, ~0UL);
2051 } 2051 }
2052 2052
2053 static inline void invalidate_remote_inode(struct inode *inode) 2053 static inline void invalidate_remote_inode(struct inode *inode)
2054 { 2054 {
2055 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2055 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2056 S_ISLNK(inode->i_mode)) 2056 S_ISLNK(inode->i_mode))
2057 invalidate_mapping_pages(inode->i_mapping, 0, -1); 2057 invalidate_mapping_pages(inode->i_mapping, 0, -1);
2058 } 2058 }
2059 extern int invalidate_inode_pages2(struct address_space *mapping); 2059 extern int invalidate_inode_pages2(struct address_space *mapping);
2060 extern int invalidate_inode_pages2_range(struct address_space *mapping, 2060 extern int invalidate_inode_pages2_range(struct address_space *mapping,
2061 pgoff_t start, pgoff_t end); 2061 pgoff_t start, pgoff_t end);
2062 extern void generic_sync_sb_inodes(struct super_block *sb, 2062 extern void generic_sync_sb_inodes(struct super_block *sb,
2063 struct writeback_control *wbc); 2063 struct writeback_control *wbc);
2064 extern int write_inode_now(struct inode *, int); 2064 extern int write_inode_now(struct inode *, int);
2065 extern int filemap_fdatawrite(struct address_space *); 2065 extern int filemap_fdatawrite(struct address_space *);
2066 extern int filemap_flush(struct address_space *); 2066 extern int filemap_flush(struct address_space *);
2067 extern int filemap_fdatawait(struct address_space *); 2067 extern int filemap_fdatawait(struct address_space *);
2068 extern int filemap_write_and_wait(struct address_space *mapping); 2068 extern int filemap_write_and_wait(struct address_space *mapping);
2069 extern int filemap_write_and_wait_range(struct address_space *mapping, 2069 extern int filemap_write_and_wait_range(struct address_space *mapping,
2070 loff_t lstart, loff_t lend); 2070 loff_t lstart, loff_t lend);
2071 extern int wait_on_page_writeback_range(struct address_space *mapping, 2071 extern int wait_on_page_writeback_range(struct address_space *mapping,
2072 pgoff_t start, pgoff_t end); 2072 pgoff_t start, pgoff_t end);
2073 extern int __filemap_fdatawrite_range(struct address_space *mapping, 2073 extern int __filemap_fdatawrite_range(struct address_space *mapping,
2074 loff_t start, loff_t end, int sync_mode); 2074 loff_t start, loff_t end, int sync_mode);
2075 extern int filemap_fdatawrite_range(struct address_space *mapping, 2075 extern int filemap_fdatawrite_range(struct address_space *mapping,
2076 loff_t start, loff_t end); 2076 loff_t start, loff_t end);
2077 2077
2078 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); 2078 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
2079 extern void sync_supers(void); 2079 extern void sync_supers(void);
2080 extern void sync_filesystems(int wait); 2080 extern void sync_filesystems(int wait);
2081 extern void emergency_sync(void); 2081 extern void emergency_sync(void);
2082 extern void emergency_remount(void); 2082 extern void emergency_remount(void);
2083 extern int do_remount_sb(struct super_block *sb, int flags, 2083 extern int do_remount_sb(struct super_block *sb, int flags,
2084 void *data, int force); 2084 void *data, int force);
2085 #ifdef CONFIG_BLOCK 2085 #ifdef CONFIG_BLOCK
2086 extern sector_t bmap(struct inode *, sector_t); 2086 extern sector_t bmap(struct inode *, sector_t);
2087 #endif 2087 #endif
2088 extern int notify_change(struct dentry *, struct iattr *); 2088 extern int notify_change(struct dentry *, struct iattr *);
2089 extern int inode_permission(struct inode *, int); 2089 extern int inode_permission(struct inode *, int);
2090 extern int generic_permission(struct inode *, int, 2090 extern int generic_permission(struct inode *, int,
2091 int (*check_acl)(struct inode *, int)); 2091 int (*check_acl)(struct inode *, int));
2092 2092
2093 static inline bool execute_ok(struct inode *inode) 2093 static inline bool execute_ok(struct inode *inode)
2094 { 2094 {
2095 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); 2095 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2096 } 2096 }
2097 2097
2098 extern int get_write_access(struct inode *); 2098 extern int get_write_access(struct inode *);
2099 extern int deny_write_access(struct file *); 2099 extern int deny_write_access(struct file *);
2100 static inline void put_write_access(struct inode * inode) 2100 static inline void put_write_access(struct inode * inode)
2101 { 2101 {
2102 atomic_dec(&inode->i_writecount); 2102 atomic_dec(&inode->i_writecount);
2103 } 2103 }
2104 static inline void allow_write_access(struct file *file) 2104 static inline void allow_write_access(struct file *file)
2105 { 2105 {
2106 if (file) 2106 if (file)
2107 atomic_inc(&file->f_path.dentry->d_inode->i_writecount); 2107 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
2108 } 2108 }
2109 extern int do_pipe_flags(int *, int); 2109 extern int do_pipe_flags(int *, int);
2110 extern struct file *create_read_pipe(struct file *f, int flags); 2110 extern struct file *create_read_pipe(struct file *f, int flags);
2111 extern struct file *create_write_pipe(int flags); 2111 extern struct file *create_write_pipe(int flags);
2112 extern void free_write_pipe(struct file *); 2112 extern void free_write_pipe(struct file *);
2113 2113
2114 extern struct file *do_filp_open(int dfd, const char *pathname, 2114 extern struct file *do_filp_open(int dfd, const char *pathname,
2115 int open_flag, int mode, int acc_mode); 2115 int open_flag, int mode, int acc_mode);
2116 extern int may_open(struct path *, int, int); 2116 extern int may_open(struct path *, int, int);
2117 2117
2118 extern int kernel_read(struct file *, unsigned long, char *, unsigned long); 2118 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
2119 extern struct file * open_exec(const char *); 2119 extern struct file * open_exec(const char *);
2120 2120
2121 /* fs/dcache.c -- generic fs support functions */ 2121 /* fs/dcache.c -- generic fs support functions */
2122 extern int is_subdir(struct dentry *, struct dentry *); 2122 extern int is_subdir(struct dentry *, struct dentry *);
2123 extern ino_t find_inode_number(struct dentry *, struct qstr *); 2123 extern ino_t find_inode_number(struct dentry *, struct qstr *);
2124 2124
2125 #include <linux/err.h> 2125 #include <linux/err.h>
2126 2126
2127 /* needed for stackable file system support */ 2127 /* needed for stackable file system support */
2128 extern loff_t default_llseek(struct file *file, loff_t offset, int origin); 2128 extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
2129 2129
2130 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); 2130 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
2131 2131
2132 extern struct inode * inode_init_always(struct super_block *, struct inode *); 2132 extern struct inode * inode_init_always(struct super_block *, struct inode *);
2133 extern void inode_init_once(struct inode *); 2133 extern void inode_init_once(struct inode *);
2134 extern void inode_add_to_lists(struct super_block *, struct inode *); 2134 extern void inode_add_to_lists(struct super_block *, struct inode *);
2135 extern void iput(struct inode *); 2135 extern void iput(struct inode *);
2136 extern struct inode * igrab(struct inode *); 2136 extern struct inode * igrab(struct inode *);
2137 extern ino_t iunique(struct super_block *, ino_t); 2137 extern ino_t iunique(struct super_block *, ino_t);
2138 extern int inode_needs_sync(struct inode *inode); 2138 extern int inode_needs_sync(struct inode *inode);
2139 extern void generic_delete_inode(struct inode *inode); 2139 extern void generic_delete_inode(struct inode *inode);
2140 extern void generic_drop_inode(struct inode *inode); 2140 extern void generic_drop_inode(struct inode *inode);
2141 2141
2142 extern struct inode *ilookup5_nowait(struct super_block *sb, 2142 extern struct inode *ilookup5_nowait(struct super_block *sb,
2143 unsigned long hashval, int (*test)(struct inode *, void *), 2143 unsigned long hashval, int (*test)(struct inode *, void *),
2144 void *data); 2144 void *data);
2145 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 2145 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
2146 int (*test)(struct inode *, void *), void *data); 2146 int (*test)(struct inode *, void *), void *data);
2147 extern struct inode *ilookup(struct super_block *sb, unsigned long ino); 2147 extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
2148 2148
2149 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); 2149 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
2150 extern struct inode * iget_locked(struct super_block *, unsigned long); 2150 extern struct inode * iget_locked(struct super_block *, unsigned long);
2151 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); 2151 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
2152 extern int insert_inode_locked(struct inode *); 2152 extern int insert_inode_locked(struct inode *);
2153 extern void unlock_new_inode(struct inode *); 2153 extern void unlock_new_inode(struct inode *);
2154 2154
2155 extern void __iget(struct inode * inode); 2155 extern void __iget(struct inode * inode);
2156 extern void iget_failed(struct inode *); 2156 extern void iget_failed(struct inode *);
2157 extern void clear_inode(struct inode *); 2157 extern void clear_inode(struct inode *);
2158 extern void destroy_inode(struct inode *); 2158 extern void destroy_inode(struct inode *);
2159 extern struct inode *new_inode(struct super_block *); 2159 extern struct inode *new_inode(struct super_block *);
2160 extern int should_remove_suid(struct dentry *); 2160 extern int should_remove_suid(struct dentry *);
2161 extern int file_remove_suid(struct file *); 2161 extern int file_remove_suid(struct file *);
2162 2162
2163 extern void __insert_inode_hash(struct inode *, unsigned long hashval); 2163 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
2164 extern void remove_inode_hash(struct inode *); 2164 extern void remove_inode_hash(struct inode *);
2165 static inline void insert_inode_hash(struct inode *inode) { 2165 static inline void insert_inode_hash(struct inode *inode) {
2166 __insert_inode_hash(inode, inode->i_ino); 2166 __insert_inode_hash(inode, inode->i_ino);
2167 } 2167 }
2168 2168
2169 extern struct file * get_empty_filp(void); 2169 extern struct file * get_empty_filp(void);
2170 extern void file_move(struct file *f, struct list_head *list); 2170 extern void file_move(struct file *f, struct list_head *list);
2171 extern void file_kill(struct file *f); 2171 extern void file_kill(struct file *f);
2172 #ifdef CONFIG_BLOCK 2172 #ifdef CONFIG_BLOCK
2173 struct bio; 2173 struct bio;
2174 extern void submit_bio(int, struct bio *); 2174 extern void submit_bio(int, struct bio *);
2175 extern int bdev_read_only(struct block_device *); 2175 extern int bdev_read_only(struct block_device *);
2176 #endif 2176 #endif
2177 extern int set_blocksize(struct block_device *, int); 2177 extern int set_blocksize(struct block_device *, int);
2178 extern int sb_set_blocksize(struct super_block *, int); 2178 extern int sb_set_blocksize(struct super_block *, int);
2179 extern int sb_min_blocksize(struct super_block *, int); 2179 extern int sb_min_blocksize(struct super_block *, int);
2180 extern int sb_has_dirty_inodes(struct super_block *); 2180 extern int sb_has_dirty_inodes(struct super_block *);
2181 2181
2182 extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2182 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2183 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2183 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2184 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 2184 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
2185 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2185 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2186 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2186 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2187 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2187 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2188 extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, 2188 extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
2189 unsigned long, loff_t); 2189 unsigned long, loff_t);
2190 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2190 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
2191 unsigned long *, loff_t, loff_t *, size_t, size_t); 2191 unsigned long *, loff_t, loff_t *, size_t, size_t);
2192 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, 2192 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
2193 unsigned long, loff_t, loff_t *, size_t, ssize_t); 2193 unsigned long, loff_t, loff_t *, size_t, ssize_t);
2194 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2194 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
2195 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2195 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
2196 extern int generic_segment_checks(const struct iovec *iov, 2196 extern int generic_segment_checks(const struct iovec *iov,
2197 unsigned long *nr_segs, size_t *count, int access_flags); 2197 unsigned long *nr_segs, size_t *count, int access_flags);
2198 2198
2199 /* fs/splice.c */ 2199 /* fs/splice.c */
2200 extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2200 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
2201 struct pipe_inode_info *, size_t, unsigned int); 2201 struct pipe_inode_info *, size_t, unsigned int);
2202 extern ssize_t default_file_splice_read(struct file *, loff_t *, 2202 extern ssize_t default_file_splice_read(struct file *, loff_t *,
2203 struct pipe_inode_info *, size_t, unsigned int); 2203 struct pipe_inode_info *, size_t, unsigned int);
2204 extern ssize_t generic_file_splice_write(struct pipe_inode_info *, 2204 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
2205 struct file *, loff_t *, size_t, unsigned int); 2205 struct file *, loff_t *, size_t, unsigned int);
2206 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 2206 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
2207 struct file *out, loff_t *, size_t len, unsigned int flags); 2207 struct file *out, loff_t *, size_t len, unsigned int flags);
2208 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 2208 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
2209 size_t len, unsigned int flags); 2209 size_t len, unsigned int flags);
2210 2210
2211 extern void 2211 extern void
2212 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 2212 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
2213 extern loff_t no_llseek(struct file *file, loff_t offset, int origin); 2213 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
2214 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); 2214 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
2215 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, 2215 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
2216 int origin); 2216 int origin);
2217 extern int generic_file_open(struct inode * inode, struct file * filp); 2217 extern int generic_file_open(struct inode * inode, struct file * filp);
2218 extern int nonseekable_open(struct inode * inode, struct file * filp); 2218 extern int nonseekable_open(struct inode * inode, struct file * filp);
2219 2219
2220 #ifdef CONFIG_FS_XIP 2220 #ifdef CONFIG_FS_XIP
2221 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, 2221 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
2222 loff_t *ppos); 2222 loff_t *ppos);
2223 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 2223 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
2224 extern ssize_t xip_file_write(struct file *filp, const char __user *buf, 2224 extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
2225 size_t len, loff_t *ppos); 2225 size_t len, loff_t *ppos);
2226 extern int xip_truncate_page(struct address_space *mapping, loff_t from); 2226 extern int xip_truncate_page(struct address_space *mapping, loff_t from);
2227 #else 2227 #else
2228 static inline int xip_truncate_page(struct address_space *mapping, loff_t from) 2228 static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
2229 { 2229 {
2230 return 0; 2230 return 0;
2231 } 2231 }
2232 #endif 2232 #endif
2233 2233
2234 #ifdef CONFIG_BLOCK 2234 #ifdef CONFIG_BLOCK
2235 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2235 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2236 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2236 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2237 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 2237 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
2238 int lock_type); 2238 int lock_type);
2239 2239
2240 enum { 2240 enum {
2241 DIO_LOCKING = 1, /* need locking between buffered and direct access */ 2241 DIO_LOCKING = 1, /* need locking between buffered and direct access */
2242 DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ 2242 DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */
2243 DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ 2243 DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
2244 }; 2244 };
2245 2245
2246 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, 2246 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2247 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2247 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2248 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2248 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2249 dio_iodone_t end_io) 2249 dio_iodone_t end_io)
2250 { 2250 {
2251 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2251 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2252 nr_segs, get_block, end_io, DIO_LOCKING); 2252 nr_segs, get_block, end_io, DIO_LOCKING);
2253 } 2253 }
2254 2254
2255 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, 2255 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
2256 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2256 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2257 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2257 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2258 dio_iodone_t end_io) 2258 dio_iodone_t end_io)
2259 { 2259 {
2260 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2260 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2261 nr_segs, get_block, end_io, DIO_NO_LOCKING); 2261 nr_segs, get_block, end_io, DIO_NO_LOCKING);
2262 } 2262 }
2263 2263
2264 static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, 2264 static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
2265 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2265 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2266 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2266 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2267 dio_iodone_t end_io) 2267 dio_iodone_t end_io)
2268 { 2268 {
2269 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2269 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2270 nr_segs, get_block, end_io, DIO_OWN_LOCKING); 2270 nr_segs, get_block, end_io, DIO_OWN_LOCKING);
2271 } 2271 }
2272 #endif 2272 #endif
2273 2273
2274 extern const struct file_operations generic_ro_fops; 2274 extern const struct file_operations generic_ro_fops;
2275 2275
2276 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2276 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
2277 2277
2278 extern int vfs_readlink(struct dentry *, char __user *, int, const char *); 2278 extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
2279 extern int vfs_follow_link(struct nameidata *, const char *); 2279 extern int vfs_follow_link(struct nameidata *, const char *);
2280 extern int page_readlink(struct dentry *, char __user *, int); 2280 extern int page_readlink(struct dentry *, char __user *, int);
2281 extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2281 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
2282 extern void page_put_link(struct dentry *, struct nameidata *, void *); 2282 extern void page_put_link(struct dentry *, struct nameidata *, void *);
2283 extern int __page_symlink(struct inode *inode, const char *symname, int len, 2283 extern int __page_symlink(struct inode *inode, const char *symname, int len,
2284 int nofs); 2284 int nofs);
2285 extern int page_symlink(struct inode *inode, const char *symname, int len); 2285 extern int page_symlink(struct inode *inode, const char *symname, int len);
2286 extern const struct inode_operations page_symlink_inode_operations; 2286 extern const struct inode_operations page_symlink_inode_operations;
2287 extern int generic_readlink(struct dentry *, char __user *, int); 2287 extern int generic_readlink(struct dentry *, char __user *, int);
2288 extern void generic_fillattr(struct inode *, struct kstat *); 2288 extern void generic_fillattr(struct inode *, struct kstat *);
2289 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2289 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2290 void inode_add_bytes(struct inode *inode, loff_t bytes); 2290 void inode_add_bytes(struct inode *inode, loff_t bytes);
2291 void inode_sub_bytes(struct inode *inode, loff_t bytes); 2291 void inode_sub_bytes(struct inode *inode, loff_t bytes);
2292 loff_t inode_get_bytes(struct inode *inode); 2292 loff_t inode_get_bytes(struct inode *inode);
2293 void inode_set_bytes(struct inode *inode, loff_t bytes); 2293 void inode_set_bytes(struct inode *inode, loff_t bytes);
2294 2294
2295 extern int vfs_readdir(struct file *, filldir_t, void *); 2295 extern int vfs_readdir(struct file *, filldir_t, void *);
2296 2296
2297 extern int vfs_stat(char __user *, struct kstat *); 2297 extern int vfs_stat(char __user *, struct kstat *);
2298 extern int vfs_lstat(char __user *, struct kstat *); 2298 extern int vfs_lstat(char __user *, struct kstat *);
2299 extern int vfs_fstat(unsigned int, struct kstat *); 2299 extern int vfs_fstat(unsigned int, struct kstat *);
2300 extern int vfs_fstatat(int , char __user *, struct kstat *, int); 2300 extern int vfs_fstatat(int , char __user *, struct kstat *, int);
2301 2301
2302 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, 2302 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
2303 unsigned long arg); 2303 unsigned long arg);
2304 extern int __generic_block_fiemap(struct inode *inode, 2304 extern int __generic_block_fiemap(struct inode *inode,
2305 struct fiemap_extent_info *fieinfo, u64 start, 2305 struct fiemap_extent_info *fieinfo, u64 start,
2306 u64 len, get_block_t *get_block); 2306 u64 len, get_block_t *get_block);
2307 extern int generic_block_fiemap(struct inode *inode, 2307 extern int generic_block_fiemap(struct inode *inode,
2308 struct fiemap_extent_info *fieinfo, u64 start, 2308 struct fiemap_extent_info *fieinfo, u64 start,
2309 u64 len, get_block_t *get_block); 2309 u64 len, get_block_t *get_block);
2310 2310
2311 extern void get_filesystem(struct file_system_type *fs); 2311 extern void get_filesystem(struct file_system_type *fs);
2312 extern void put_filesystem(struct file_system_type *fs); 2312 extern void put_filesystem(struct file_system_type *fs);
2313 extern struct file_system_type *get_fs_type(const char *name); 2313 extern struct file_system_type *get_fs_type(const char *name);
2314 extern struct super_block *get_super(struct block_device *); 2314 extern struct super_block *get_super(struct block_device *);
2315 extern struct super_block *user_get_super(dev_t); 2315 extern struct super_block *user_get_super(dev_t);
2316 extern void drop_super(struct super_block *sb); 2316 extern void drop_super(struct super_block *sb);
2317 2317
2318 extern int dcache_dir_open(struct inode *, struct file *); 2318 extern int dcache_dir_open(struct inode *, struct file *);
2319 extern int dcache_dir_close(struct inode *, struct file *); 2319 extern int dcache_dir_close(struct inode *, struct file *);
2320 extern loff_t dcache_dir_lseek(struct file *, loff_t, int); 2320 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
2321 extern int dcache_readdir(struct file *, void *, filldir_t); 2321 extern int dcache_readdir(struct file *, void *, filldir_t);
2322 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2322 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2323 extern int simple_statfs(struct dentry *, struct kstatfs *); 2323 extern int simple_statfs(struct dentry *, struct kstatfs *);
2324 extern int simple_link(struct dentry *, struct inode *, struct dentry *); 2324 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
2325 extern int simple_unlink(struct inode *, struct dentry *); 2325 extern int simple_unlink(struct inode *, struct dentry *);
2326 extern int simple_rmdir(struct inode *, struct dentry *); 2326 extern int simple_rmdir(struct inode *, struct dentry *);
2327 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 2327 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2328 extern int simple_sync_file(struct file *, struct dentry *, int); 2328 extern int simple_sync_file(struct file *, struct dentry *, int);
2329 extern int simple_empty(struct dentry *); 2329 extern int simple_empty(struct dentry *);
2330 extern int simple_readpage(struct file *file, struct page *page); 2330 extern int simple_readpage(struct file *file, struct page *page);
2331 extern int simple_prepare_write(struct file *file, struct page *page, 2331 extern int simple_prepare_write(struct file *file, struct page *page,
2332 unsigned offset, unsigned to); 2332 unsigned offset, unsigned to);
2333 extern int simple_write_begin(struct file *file, struct address_space *mapping, 2333 extern int simple_write_begin(struct file *file, struct address_space *mapping,
2334 loff_t pos, unsigned len, unsigned flags, 2334 loff_t pos, unsigned len, unsigned flags,
2335 struct page **pagep, void **fsdata); 2335 struct page **pagep, void **fsdata);
2336 extern int simple_write_end(struct file *file, struct address_space *mapping, 2336 extern int simple_write_end(struct file *file, struct address_space *mapping,
2337 loff_t pos, unsigned len, unsigned copied, 2337 loff_t pos, unsigned len, unsigned copied,
2338 struct page *page, void *fsdata); 2338 struct page *page, void *fsdata);
2339 2339
2340 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); 2340 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
2341 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); 2341 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
2342 extern const struct file_operations simple_dir_operations; 2342 extern const struct file_operations simple_dir_operations;
2343 extern const struct inode_operations simple_dir_inode_operations; 2343 extern const struct inode_operations simple_dir_inode_operations;
2344 struct tree_descr { char *name; const struct file_operations *ops; int mode; }; 2344 struct tree_descr { char *name; const struct file_operations *ops; int mode; };
2345 struct dentry *d_alloc_name(struct dentry *, const char *); 2345 struct dentry *d_alloc_name(struct dentry *, const char *);
2346 extern int simple_fill_super(struct super_block *, int, struct tree_descr *); 2346 extern int simple_fill_super(struct super_block *, int, struct tree_descr *);
2347 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); 2347 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
2348 extern void simple_release_fs(struct vfsmount **mount, int *count); 2348 extern void simple_release_fs(struct vfsmount **mount, int *count);
2349 2349
2350 extern ssize_t simple_read_from_buffer(void __user *to, size_t count, 2350 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
2351 loff_t *ppos, const void *from, size_t available); 2351 loff_t *ppos, const void *from, size_t available);
2352 2352
2353 #ifdef CONFIG_MIGRATION 2353 #ifdef CONFIG_MIGRATION
2354 extern int buffer_migrate_page(struct address_space *, 2354 extern int buffer_migrate_page(struct address_space *,
2355 struct page *, struct page *); 2355 struct page *, struct page *);
2356 #else 2356 #else
2357 #define buffer_migrate_page NULL 2357 #define buffer_migrate_page NULL
2358 #endif 2358 #endif
2359 2359
2360 extern int inode_change_ok(struct inode *, struct iattr *); 2360 extern int inode_change_ok(struct inode *, struct iattr *);
2361 extern int __must_check inode_setattr(struct inode *, struct iattr *); 2361 extern int __must_check inode_setattr(struct inode *, struct iattr *);
2362 2362
2363 extern void file_update_time(struct file *file); 2363 extern void file_update_time(struct file *file);
2364 2364
2365 extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); 2365 extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
2366 extern void save_mount_options(struct super_block *sb, char *options); 2366 extern void save_mount_options(struct super_block *sb, char *options);
2367 extern void replace_mount_options(struct super_block *sb, char *options); 2367 extern void replace_mount_options(struct super_block *sb, char *options);
2368 2368
2369 static inline ino_t parent_ino(struct dentry *dentry) 2369 static inline ino_t parent_ino(struct dentry *dentry)
2370 { 2370 {
2371 ino_t res; 2371 ino_t res;
2372 2372
2373 spin_lock(&dentry->d_lock); 2373 spin_lock(&dentry->d_lock);
2374 res = dentry->d_parent->d_inode->i_ino; 2374 res = dentry->d_parent->d_inode->i_ino;
2375 spin_unlock(&dentry->d_lock); 2375 spin_unlock(&dentry->d_lock);
2376 return res; 2376 return res;
2377 } 2377 }
2378 2378
2379 /* Transaction based IO helpers */ 2379 /* Transaction based IO helpers */
2380 2380
2381 /* 2381 /*
2382 * An argresp is stored in an allocated page and holds the 2382 * An argresp is stored in an allocated page and holds the
2383 * size of the argument or response, along with its content 2383 * size of the argument or response, along with its content
2384 */ 2384 */
2385 struct simple_transaction_argresp { 2385 struct simple_transaction_argresp {
2386 ssize_t size; 2386 ssize_t size;
2387 char data[0]; 2387 char data[0];
2388 }; 2388 };
2389 2389
2390 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) 2390 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
2391 2391
2392 char *simple_transaction_get(struct file *file, const char __user *buf, 2392 char *simple_transaction_get(struct file *file, const char __user *buf,
2393 size_t size); 2393 size_t size);
2394 ssize_t simple_transaction_read(struct file *file, char __user *buf, 2394 ssize_t simple_transaction_read(struct file *file, char __user *buf,
2395 size_t size, loff_t *pos); 2395 size_t size, loff_t *pos);
2396 int simple_transaction_release(struct inode *inode, struct file *file); 2396 int simple_transaction_release(struct inode *inode, struct file *file);
2397 2397
2398 void simple_transaction_set(struct file *file, size_t n); 2398 void simple_transaction_set(struct file *file, size_t n);
2399 2399
2400 /* 2400 /*
2401 * simple attribute files 2401 * simple attribute files
2402 * 2402 *
2403 * These attributes behave similar to those in sysfs: 2403 * These attributes behave similar to those in sysfs:
2404 * 2404 *
2405 * Writing to an attribute immediately sets a value, an open file can be 2405 * Writing to an attribute immediately sets a value, an open file can be
2406 * written to multiple times. 2406 * written to multiple times.
2407 * 2407 *
2408 * Reading from an attribute creates a buffer from the value that might get 2408 * Reading from an attribute creates a buffer from the value that might get
2409 * read with multiple read calls. When the attribute has been read 2409 * read with multiple read calls. When the attribute has been read
2410 * completely, no further read calls are possible until the file is opened 2410 * completely, no further read calls are possible until the file is opened
2411 * again. 2411 * again.
2412 * 2412 *
2413 * All attributes contain a text representation of a numeric value 2413 * All attributes contain a text representation of a numeric value
2414 * that are accessed with the get() and set() functions. 2414 * that are accessed with the get() and set() functions.
2415 */ 2415 */
2416 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ 2416 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
2417 static int __fops ## _open(struct inode *inode, struct file *file) \ 2417 static int __fops ## _open(struct inode *inode, struct file *file) \
2418 { \ 2418 { \
2419 __simple_attr_check_format(__fmt, 0ull); \ 2419 __simple_attr_check_format(__fmt, 0ull); \
2420 return simple_attr_open(inode, file, __get, __set, __fmt); \ 2420 return simple_attr_open(inode, file, __get, __set, __fmt); \
2421 } \ 2421 } \
2422 static struct file_operations __fops = { \ 2422 static struct file_operations __fops = { \
2423 .owner = THIS_MODULE, \ 2423 .owner = THIS_MODULE, \
2424 .open = __fops ## _open, \ 2424 .open = __fops ## _open, \
2425 .release = simple_attr_release, \ 2425 .release = simple_attr_release, \
2426 .read = simple_attr_read, \ 2426 .read = simple_attr_read, \
2427 .write = simple_attr_write, \ 2427 .write = simple_attr_write, \
2428 }; 2428 };
2429 2429
2430 static inline void __attribute__((format(printf, 1, 2))) 2430 static inline void __attribute__((format(printf, 1, 2)))
2431 __simple_attr_check_format(const char *fmt, ...) 2431 __simple_attr_check_format(const char *fmt, ...)
2432 { 2432 {
2433 /* don't do anything, just let the compiler check the arguments; */ 2433 /* don't do anything, just let the compiler check the arguments; */
2434 } 2434 }
2435 2435
2436 int simple_attr_open(struct inode *inode, struct file *file, 2436 int simple_attr_open(struct inode *inode, struct file *file,
2437 int (*get)(void *, u64 *), int (*set)(void *, u64), 2437 int (*get)(void *, u64 *), int (*set)(void *, u64),
2438 const char *fmt); 2438 const char *fmt);
2439 int simple_attr_release(struct inode *inode, struct file *file); 2439 int simple_attr_release(struct inode *inode, struct file *file);
2440 ssize_t simple_attr_read(struct file *file, char __user *buf, 2440 ssize_t simple_attr_read(struct file *file, char __user *buf,
2441 size_t len, loff_t *ppos); 2441 size_t len, loff_t *ppos);
2442 ssize_t simple_attr_write(struct file *file, const char __user *buf, 2442 ssize_t simple_attr_write(struct file *file, const char __user *buf,
2443 size_t len, loff_t *ppos); 2443 size_t len, loff_t *ppos);
2444 2444
2445 struct ctl_table; 2445 struct ctl_table;
2446 int proc_nr_files(struct ctl_table *table, int write, struct file *filp, 2446 int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
2447 void __user *buffer, size_t *lenp, loff_t *ppos); 2447 void __user *buffer, size_t *lenp, loff_t *ppos);
2448 2448
2449 int __init get_filesystem_list(char *buf); 2449 int __init get_filesystem_list(char *buf);
2450 2450
2451 #endif /* __KERNEL__ */ 2451 #endif /* __KERNEL__ */
2452 #endif /* _LINUX_FS_H */ 2452 #endif /* _LINUX_FS_H */
2453 2453
include/linux/writeback.h
1 /* 1 /*
2 * include/linux/writeback.h 2 * include/linux/writeback.h
3 */ 3 */
4 #ifndef WRITEBACK_H 4 #ifndef WRITEBACK_H
5 #define WRITEBACK_H 5 #define WRITEBACK_H
6 6
7 #include <linux/sched.h> 7 #include <linux/sched.h>
8 #include <linux/fs.h> 8 #include <linux/fs.h>
9 9
10 struct backing_dev_info; 10 struct backing_dev_info;
11 11
12 extern spinlock_t inode_lock; 12 extern spinlock_t inode_lock;
13 extern struct list_head inode_in_use; 13 extern struct list_head inode_in_use;
14 extern struct list_head inode_unused; 14 extern struct list_head inode_unused;
15 15
16 /* 16 /*
17 * Yes, writeback.h requires sched.h 17 * Yes, writeback.h requires sched.h
18 * No, sched.h is not included from here. 18 * No, sched.h is not included from here.
19 */ 19 */
20 static inline int task_is_pdflush(struct task_struct *task) 20 static inline int task_is_pdflush(struct task_struct *task)
21 { 21 {
22 return task->flags & PF_FLUSHER; 22 return task->flags & PF_FLUSHER;
23 } 23 }
24 24
25 #define current_is_pdflush() task_is_pdflush(current) 25 #define current_is_pdflush() task_is_pdflush(current)
26 26
27 /* 27 /*
28 * fs/fs-writeback.c 28 * fs/fs-writeback.c
29 */ 29 */
30 enum writeback_sync_modes { 30 enum writeback_sync_modes {
31 WB_SYNC_NONE, /* Don't wait on anything */ 31 WB_SYNC_NONE, /* Don't wait on anything */
32 WB_SYNC_ALL, /* Wait on every mapping */ 32 WB_SYNC_ALL, /* Wait on every mapping */
33 }; 33 };
34 34
35 /* 35 /*
36 * A control structure which tells the writeback code what to do. These are 36 * A control structure which tells the writeback code what to do. These are
37 * always on the stack, and hence need no locking. They are always initialised 37 * always on the stack, and hence need no locking. They are always initialised
38 * in a manner such that unspecified fields are set to zero. 38 * in a manner such that unspecified fields are set to zero.
39 */ 39 */
40 struct writeback_control { 40 struct writeback_control {
41 struct backing_dev_info *bdi; /* If !NULL, only write back this 41 struct backing_dev_info *bdi; /* If !NULL, only write back this
42 queue */ 42 queue */
43 enum writeback_sync_modes sync_mode; 43 enum writeback_sync_modes sync_mode;
44 unsigned long *older_than_this; /* If !NULL, only write back inodes 44 unsigned long *older_than_this; /* If !NULL, only write back inodes
45 older than this */ 45 older than this */
46 long nr_to_write; /* Write this many pages, and decrement 46 long nr_to_write; /* Write this many pages, and decrement
47 this for each page written */ 47 this for each page written */
48 long pages_skipped; /* Pages which were not written */ 48 long pages_skipped; /* Pages which were not written */
49 49
50 /* 50 /*
51 * For a_ops->writepages(): is start or end are non-zero then this is 51 * For a_ops->writepages(): is start or end are non-zero then this is
52 * a hint that the filesystem need only write out the pages inside that 52 * a hint that the filesystem need only write out the pages inside that
53 * byterange. The byte at `end' is included in the writeout request. 53 * byterange. The byte at `end' is included in the writeout request.
54 */ 54 */
55 loff_t range_start; 55 loff_t range_start;
56 loff_t range_end; 56 loff_t range_end;
57 57
58 unsigned nonblocking:1; /* Don't get stuck on request queues */ 58 unsigned nonblocking:1; /* Don't get stuck on request queues */
59 unsigned encountered_congestion:1; /* An output: a queue is full */ 59 unsigned encountered_congestion:1; /* An output: a queue is full */
60 unsigned for_kupdate:1; /* A kupdate writeback */ 60 unsigned for_kupdate:1; /* A kupdate writeback */
61 unsigned for_reclaim:1; /* Invoked from the page allocator */ 61 unsigned for_reclaim:1; /* Invoked from the page allocator */
62 unsigned for_writepages:1; /* This is a writepages() call */ 62 unsigned for_writepages:1; /* This is a writepages() call */
63 unsigned range_cyclic:1; /* range_start is cyclic */ 63 unsigned range_cyclic:1; /* range_start is cyclic */
64 unsigned more_io:1; /* more io to be dispatched */ 64 unsigned more_io:1; /* more io to be dispatched */
65 /* 65 /*
66 * write_cache_pages() won't update wbc->nr_to_write and 66 * write_cache_pages() won't update wbc->nr_to_write and
67 * mapping->writeback_index if no_nrwrite_index_update 67 * mapping->writeback_index if no_nrwrite_index_update
68 * is set. write_cache_pages() may write more than we 68 * is set. write_cache_pages() may write more than we
69 * requested and we want to make sure nr_to_write and 69 * requested and we want to make sure nr_to_write and
70 * writeback_index are updated in a consistent manner 70 * writeback_index are updated in a consistent manner
71 * so we use a single control to update them 71 * so we use a single control to update them
72 */ 72 */
73 unsigned no_nrwrite_index_update:1; 73 unsigned no_nrwrite_index_update:1;
74 }; 74 };
75 75
76 /* 76 /*
77 * fs/fs-writeback.c 77 * fs/fs-writeback.c
78 */ 78 */
79 void writeback_inodes(struct writeback_control *wbc); 79 void writeback_inodes(struct writeback_control *wbc);
80 int inode_wait(void *); 80 int inode_wait(void *);
81 void sync_inodes_sb(struct super_block *, int wait); 81 void sync_inodes_sb(struct super_block *, int wait);
82 void sync_inodes(int wait);
83 82
84 /* writeback.h requires fs.h; it, too, is not included from here. */ 83 /* writeback.h requires fs.h; it, too, is not included from here. */
85 static inline void wait_on_inode(struct inode *inode) 84 static inline void wait_on_inode(struct inode *inode)
86 { 85 {
87 might_sleep(); 86 might_sleep();
88 wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, 87 wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
89 TASK_UNINTERRUPTIBLE); 88 TASK_UNINTERRUPTIBLE);
90 } 89 }
91 static inline void inode_sync_wait(struct inode *inode) 90 static inline void inode_sync_wait(struct inode *inode)
92 { 91 {
93 might_sleep(); 92 might_sleep();
94 wait_on_bit(&inode->i_state, __I_SYNC, inode_wait, 93 wait_on_bit(&inode->i_state, __I_SYNC, inode_wait,
95 TASK_UNINTERRUPTIBLE); 94 TASK_UNINTERRUPTIBLE);
96 } 95 }
97 96
98 97
99 /* 98 /*
100 * mm/page-writeback.c 99 * mm/page-writeback.c
101 */ 100 */
102 int wakeup_pdflush(long nr_pages); 101 int wakeup_pdflush(long nr_pages);
103 void laptop_io_completion(void); 102 void laptop_io_completion(void);
104 void laptop_sync_completion(void); 103 void laptop_sync_completion(void);
105 void throttle_vm_writeout(gfp_t gfp_mask); 104 void throttle_vm_writeout(gfp_t gfp_mask);
106 105
107 /* These are exported to sysctl. */ 106 /* These are exported to sysctl. */
108 extern int dirty_background_ratio; 107 extern int dirty_background_ratio;
109 extern unsigned long dirty_background_bytes; 108 extern unsigned long dirty_background_bytes;
110 extern int vm_dirty_ratio; 109 extern int vm_dirty_ratio;
111 extern unsigned long vm_dirty_bytes; 110 extern unsigned long vm_dirty_bytes;
112 extern unsigned int dirty_writeback_interval; 111 extern unsigned int dirty_writeback_interval;
113 extern unsigned int dirty_expire_interval; 112 extern unsigned int dirty_expire_interval;
114 extern int vm_highmem_is_dirtyable; 113 extern int vm_highmem_is_dirtyable;
115 extern int block_dump; 114 extern int block_dump;
116 extern int laptop_mode; 115 extern int laptop_mode;
117 116
118 extern unsigned long determine_dirtyable_memory(void); 117 extern unsigned long determine_dirtyable_memory(void);
119 118
120 extern int dirty_background_ratio_handler(struct ctl_table *table, int write, 119 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
121 struct file *filp, void __user *buffer, size_t *lenp, 120 struct file *filp, void __user *buffer, size_t *lenp,
122 loff_t *ppos); 121 loff_t *ppos);
123 extern int dirty_background_bytes_handler(struct ctl_table *table, int write, 122 extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
124 struct file *filp, void __user *buffer, size_t *lenp, 123 struct file *filp, void __user *buffer, size_t *lenp,
125 loff_t *ppos); 124 loff_t *ppos);
126 extern int dirty_ratio_handler(struct ctl_table *table, int write, 125 extern int dirty_ratio_handler(struct ctl_table *table, int write,
127 struct file *filp, void __user *buffer, size_t *lenp, 126 struct file *filp, void __user *buffer, size_t *lenp,
128 loff_t *ppos); 127 loff_t *ppos);
129 extern int dirty_bytes_handler(struct ctl_table *table, int write, 128 extern int dirty_bytes_handler(struct ctl_table *table, int write,
130 struct file *filp, void __user *buffer, size_t *lenp, 129 struct file *filp, void __user *buffer, size_t *lenp,
131 loff_t *ppos); 130 loff_t *ppos);
132 131
133 struct ctl_table; 132 struct ctl_table;
134 struct file; 133 struct file;
135 int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, 134 int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
136 void __user *, size_t *, loff_t *); 135 void __user *, size_t *, loff_t *);
137 136
138 void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, 137 void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
139 unsigned long *pbdi_dirty, struct backing_dev_info *bdi); 138 unsigned long *pbdi_dirty, struct backing_dev_info *bdi);
140 139
141 void page_writeback_init(void); 140 void page_writeback_init(void);
142 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, 141 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
143 unsigned long nr_pages_dirtied); 142 unsigned long nr_pages_dirtied);
144 143
145 static inline void 144 static inline void
146 balance_dirty_pages_ratelimited(struct address_space *mapping) 145 balance_dirty_pages_ratelimited(struct address_space *mapping)
147 { 146 {
148 balance_dirty_pages_ratelimited_nr(mapping, 1); 147 balance_dirty_pages_ratelimited_nr(mapping, 1);
149 } 148 }
150 149
151 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, 150 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
152 void *data); 151 void *data);
153 152
154 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); 153 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
155 int generic_writepages(struct address_space *mapping, 154 int generic_writepages(struct address_space *mapping,
156 struct writeback_control *wbc); 155 struct writeback_control *wbc);
157 int write_cache_pages(struct address_space *mapping, 156 int write_cache_pages(struct address_space *mapping,
158 struct writeback_control *wbc, writepage_t writepage, 157 struct writeback_control *wbc, writepage_t writepage,
159 void *data); 158 void *data);
160 int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 159 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
161 int sync_page_range(struct inode *inode, struct address_space *mapping, 160 int sync_page_range(struct inode *inode, struct address_space *mapping,
162 loff_t pos, loff_t count); 161 loff_t pos, loff_t count);
163 int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, 162 int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
164 loff_t pos, loff_t count); 163 loff_t pos, loff_t count);
165 void set_page_dirty_balance(struct page *page, int page_mkwrite); 164 void set_page_dirty_balance(struct page *page, int page_mkwrite);
166 void writeback_set_ratelimit(void); 165 void writeback_set_ratelimit(void);
167 166
168 /* pdflush.c */ 167 /* pdflush.c */
169 extern int nr_pdflush_threads; /* Global so it can be exported to sysctl 168 extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
170 read-only. */ 169 read-only. */
171 170
172 171
173 #endif /* WRITEBACK_H */ 172 #endif /* WRITEBACK_H */
174 173