Commit 5cee5815d1564bbbd505fea86f4550f1efdb5cd0
Committed by
Al Viro
1 parent
429479f031
Exists in
master
and in
7 other branches
vfs: Make sys_sync() use fsync_super() (version 4)
It is unnecessarily fragile to have two places (fsync_super() and do_sync()) doing data integrity sync of the filesystem. Alter __fsync_super() to accommodate needs of both callers and use it. So after this patch __fsync_super() is the only place where we gather all the calls needed to properly send all data on a filesystem to disk. Nice bonus is that we get a complete livelock avoidance and write_supers() is now only used for periodic writeback of superblocks. sync_blockdevs() introduced a couple of patches ago is gone now. [build fixes folded] Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 7 changed files with 51 additions and 135 deletions Inline Diff
fs/block_dev.c
1 | /* | 1 | /* |
2 | * linux/fs/block_dev.c | 2 | * linux/fs/block_dev.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | 5 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/fcntl.h> | 10 | #include <linux/fcntl.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
13 | #include <linux/major.h> | 13 | #include <linux/major.h> |
14 | #include <linux/smp_lock.h> | 14 | #include <linux/smp_lock.h> |
15 | #include <linux/device_cgroup.h> | 15 | #include <linux/device_cgroup.h> |
16 | #include <linux/highmem.h> | 16 | #include <linux/highmem.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/blkpg.h> | 19 | #include <linux/blkpg.h> |
20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
21 | #include <linux/pagevec.h> | 21 | #include <linux/pagevec.h> |
22 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
23 | #include <linux/mpage.h> | 23 | #include <linux/mpage.h> |
24 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
25 | #include <linux/uio.h> | 25 | #include <linux/uio.h> |
26 | #include <linux/namei.h> | 26 | #include <linux/namei.h> |
27 | #include <linux/log2.h> | 27 | #include <linux/log2.h> |
28 | #include <linux/kmemleak.h> | 28 | #include <linux/kmemleak.h> |
29 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
30 | #include "internal.h" | 30 | #include "internal.h" |
31 | 31 | ||
32 | struct bdev_inode { | 32 | struct bdev_inode { |
33 | struct block_device bdev; | 33 | struct block_device bdev; |
34 | struct inode vfs_inode; | 34 | struct inode vfs_inode; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | static const struct address_space_operations def_blk_aops; | 37 | static const struct address_space_operations def_blk_aops; |
38 | 38 | ||
39 | static inline struct bdev_inode *BDEV_I(struct inode *inode) | 39 | static inline struct bdev_inode *BDEV_I(struct inode *inode) |
40 | { | 40 | { |
41 | return container_of(inode, struct bdev_inode, vfs_inode); | 41 | return container_of(inode, struct bdev_inode, vfs_inode); |
42 | } | 42 | } |
43 | 43 | ||
44 | inline struct block_device *I_BDEV(struct inode *inode) | 44 | inline struct block_device *I_BDEV(struct inode *inode) |
45 | { | 45 | { |
46 | return &BDEV_I(inode)->bdev; | 46 | return &BDEV_I(inode)->bdev; |
47 | } | 47 | } |
48 | 48 | ||
49 | EXPORT_SYMBOL(I_BDEV); | 49 | EXPORT_SYMBOL(I_BDEV); |
50 | 50 | ||
51 | static sector_t max_block(struct block_device *bdev) | 51 | static sector_t max_block(struct block_device *bdev) |
52 | { | 52 | { |
53 | sector_t retval = ~((sector_t)0); | 53 | sector_t retval = ~((sector_t)0); |
54 | loff_t sz = i_size_read(bdev->bd_inode); | 54 | loff_t sz = i_size_read(bdev->bd_inode); |
55 | 55 | ||
56 | if (sz) { | 56 | if (sz) { |
57 | unsigned int size = block_size(bdev); | 57 | unsigned int size = block_size(bdev); |
58 | unsigned int sizebits = blksize_bits(size); | 58 | unsigned int sizebits = blksize_bits(size); |
59 | retval = (sz >> sizebits); | 59 | retval = (sz >> sizebits); |
60 | } | 60 | } |
61 | return retval; | 61 | return retval; |
62 | } | 62 | } |
63 | 63 | ||
64 | /* Kill _all_ buffers and pagecache , dirty or not.. */ | 64 | /* Kill _all_ buffers and pagecache , dirty or not.. */ |
65 | static void kill_bdev(struct block_device *bdev) | 65 | static void kill_bdev(struct block_device *bdev) |
66 | { | 66 | { |
67 | if (bdev->bd_inode->i_mapping->nrpages == 0) | 67 | if (bdev->bd_inode->i_mapping->nrpages == 0) |
68 | return; | 68 | return; |
69 | invalidate_bh_lrus(); | 69 | invalidate_bh_lrus(); |
70 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | 70 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); |
71 | } | 71 | } |
72 | 72 | ||
73 | int set_blocksize(struct block_device *bdev, int size) | 73 | int set_blocksize(struct block_device *bdev, int size) |
74 | { | 74 | { |
75 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ | 75 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ |
76 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) | 76 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) |
77 | return -EINVAL; | 77 | return -EINVAL; |
78 | 78 | ||
79 | /* Size cannot be smaller than the size supported by the device */ | 79 | /* Size cannot be smaller than the size supported by the device */ |
80 | if (size < bdev_logical_block_size(bdev)) | 80 | if (size < bdev_logical_block_size(bdev)) |
81 | return -EINVAL; | 81 | return -EINVAL; |
82 | 82 | ||
83 | /* Don't change the size if it is same as current */ | 83 | /* Don't change the size if it is same as current */ |
84 | if (bdev->bd_block_size != size) { | 84 | if (bdev->bd_block_size != size) { |
85 | sync_blockdev(bdev); | 85 | sync_blockdev(bdev); |
86 | bdev->bd_block_size = size; | 86 | bdev->bd_block_size = size; |
87 | bdev->bd_inode->i_blkbits = blksize_bits(size); | 87 | bdev->bd_inode->i_blkbits = blksize_bits(size); |
88 | kill_bdev(bdev); | 88 | kill_bdev(bdev); |
89 | } | 89 | } |
90 | return 0; | 90 | return 0; |
91 | } | 91 | } |
92 | 92 | ||
93 | EXPORT_SYMBOL(set_blocksize); | 93 | EXPORT_SYMBOL(set_blocksize); |
94 | 94 | ||
95 | int sb_set_blocksize(struct super_block *sb, int size) | 95 | int sb_set_blocksize(struct super_block *sb, int size) |
96 | { | 96 | { |
97 | if (set_blocksize(sb->s_bdev, size)) | 97 | if (set_blocksize(sb->s_bdev, size)) |
98 | return 0; | 98 | return 0; |
99 | /* If we get here, we know size is power of two | 99 | /* If we get here, we know size is power of two |
100 | * and it's value is between 512 and PAGE_SIZE */ | 100 | * and it's value is between 512 and PAGE_SIZE */ |
101 | sb->s_blocksize = size; | 101 | sb->s_blocksize = size; |
102 | sb->s_blocksize_bits = blksize_bits(size); | 102 | sb->s_blocksize_bits = blksize_bits(size); |
103 | return sb->s_blocksize; | 103 | return sb->s_blocksize; |
104 | } | 104 | } |
105 | 105 | ||
106 | EXPORT_SYMBOL(sb_set_blocksize); | 106 | EXPORT_SYMBOL(sb_set_blocksize); |
107 | 107 | ||
108 | int sb_min_blocksize(struct super_block *sb, int size) | 108 | int sb_min_blocksize(struct super_block *sb, int size) |
109 | { | 109 | { |
110 | int minsize = bdev_logical_block_size(sb->s_bdev); | 110 | int minsize = bdev_logical_block_size(sb->s_bdev); |
111 | if (size < minsize) | 111 | if (size < minsize) |
112 | size = minsize; | 112 | size = minsize; |
113 | return sb_set_blocksize(sb, size); | 113 | return sb_set_blocksize(sb, size); |
114 | } | 114 | } |
115 | 115 | ||
116 | EXPORT_SYMBOL(sb_min_blocksize); | 116 | EXPORT_SYMBOL(sb_min_blocksize); |
117 | 117 | ||
118 | static int | 118 | static int |
119 | blkdev_get_block(struct inode *inode, sector_t iblock, | 119 | blkdev_get_block(struct inode *inode, sector_t iblock, |
120 | struct buffer_head *bh, int create) | 120 | struct buffer_head *bh, int create) |
121 | { | 121 | { |
122 | if (iblock >= max_block(I_BDEV(inode))) { | 122 | if (iblock >= max_block(I_BDEV(inode))) { |
123 | if (create) | 123 | if (create) |
124 | return -EIO; | 124 | return -EIO; |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * for reads, we're just trying to fill a partial page. | 127 | * for reads, we're just trying to fill a partial page. |
128 | * return a hole, they will have to call get_block again | 128 | * return a hole, they will have to call get_block again |
129 | * before they can fill it, and they will get -EIO at that | 129 | * before they can fill it, and they will get -EIO at that |
130 | * time | 130 | * time |
131 | */ | 131 | */ |
132 | return 0; | 132 | return 0; |
133 | } | 133 | } |
134 | bh->b_bdev = I_BDEV(inode); | 134 | bh->b_bdev = I_BDEV(inode); |
135 | bh->b_blocknr = iblock; | 135 | bh->b_blocknr = iblock; |
136 | set_buffer_mapped(bh); | 136 | set_buffer_mapped(bh); |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | 139 | ||
140 | static int | 140 | static int |
141 | blkdev_get_blocks(struct inode *inode, sector_t iblock, | 141 | blkdev_get_blocks(struct inode *inode, sector_t iblock, |
142 | struct buffer_head *bh, int create) | 142 | struct buffer_head *bh, int create) |
143 | { | 143 | { |
144 | sector_t end_block = max_block(I_BDEV(inode)); | 144 | sector_t end_block = max_block(I_BDEV(inode)); |
145 | unsigned long max_blocks = bh->b_size >> inode->i_blkbits; | 145 | unsigned long max_blocks = bh->b_size >> inode->i_blkbits; |
146 | 146 | ||
147 | if ((iblock + max_blocks) > end_block) { | 147 | if ((iblock + max_blocks) > end_block) { |
148 | max_blocks = end_block - iblock; | 148 | max_blocks = end_block - iblock; |
149 | if ((long)max_blocks <= 0) { | 149 | if ((long)max_blocks <= 0) { |
150 | if (create) | 150 | if (create) |
151 | return -EIO; /* write fully beyond EOF */ | 151 | return -EIO; /* write fully beyond EOF */ |
152 | /* | 152 | /* |
153 | * It is a read which is fully beyond EOF. We return | 153 | * It is a read which is fully beyond EOF. We return |
154 | * a !buffer_mapped buffer | 154 | * a !buffer_mapped buffer |
155 | */ | 155 | */ |
156 | max_blocks = 0; | 156 | max_blocks = 0; |
157 | } | 157 | } |
158 | } | 158 | } |
159 | 159 | ||
160 | bh->b_bdev = I_BDEV(inode); | 160 | bh->b_bdev = I_BDEV(inode); |
161 | bh->b_blocknr = iblock; | 161 | bh->b_blocknr = iblock; |
162 | bh->b_size = max_blocks << inode->i_blkbits; | 162 | bh->b_size = max_blocks << inode->i_blkbits; |
163 | if (max_blocks) | 163 | if (max_blocks) |
164 | set_buffer_mapped(bh); | 164 | set_buffer_mapped(bh); |
165 | return 0; | 165 | return 0; |
166 | } | 166 | } |
167 | 167 | ||
168 | static ssize_t | 168 | static ssize_t |
169 | blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 169 | blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, |
170 | loff_t offset, unsigned long nr_segs) | 170 | loff_t offset, unsigned long nr_segs) |
171 | { | 171 | { |
172 | struct file *file = iocb->ki_filp; | 172 | struct file *file = iocb->ki_filp; |
173 | struct inode *inode = file->f_mapping->host; | 173 | struct inode *inode = file->f_mapping->host; |
174 | 174 | ||
175 | return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), | 175 | return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), |
176 | iov, offset, nr_segs, blkdev_get_blocks, NULL); | 176 | iov, offset, nr_segs, blkdev_get_blocks, NULL); |
177 | } | 177 | } |
178 | 178 | ||
179 | int __sync_blockdev(struct block_device *bdev, int wait) | ||
180 | { | ||
181 | if (!bdev) | ||
182 | return 0; | ||
183 | if (!wait) | ||
184 | return filemap_flush(bdev->bd_inode->i_mapping); | ||
185 | return filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
186 | } | ||
187 | |||
179 | /* | 188 | /* |
180 | * Write out and wait upon all the dirty data associated with a block | 189 | * Write out and wait upon all the dirty data associated with a block |
181 | * device via its mapping. Does not take the superblock lock. | 190 | * device via its mapping. Does not take the superblock lock. |
182 | */ | 191 | */ |
183 | int sync_blockdev(struct block_device *bdev) | 192 | int sync_blockdev(struct block_device *bdev) |
184 | { | 193 | { |
185 | int ret = 0; | 194 | return __sync_blockdev(bdev, 1); |
186 | |||
187 | if (bdev) | ||
188 | ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
189 | return ret; | ||
190 | } | 195 | } |
191 | EXPORT_SYMBOL(sync_blockdev); | 196 | EXPORT_SYMBOL(sync_blockdev); |
192 | 197 | ||
193 | /* | 198 | /* |
194 | * Write out and wait upon all dirty data associated with this | 199 | * Write out and wait upon all dirty data associated with this |
195 | * device. Filesystem data as well as the underlying block | 200 | * device. Filesystem data as well as the underlying block |
196 | * device. Takes the superblock lock. | 201 | * device. Takes the superblock lock. |
197 | */ | 202 | */ |
198 | int fsync_bdev(struct block_device *bdev) | 203 | int fsync_bdev(struct block_device *bdev) |
199 | { | 204 | { |
200 | struct super_block *sb = get_super(bdev); | 205 | struct super_block *sb = get_super(bdev); |
201 | if (sb) { | 206 | if (sb) { |
202 | int res = fsync_super(sb); | 207 | int res = fsync_super(sb); |
203 | drop_super(sb); | 208 | drop_super(sb); |
204 | return res; | 209 | return res; |
205 | } | 210 | } |
206 | return sync_blockdev(bdev); | 211 | return sync_blockdev(bdev); |
207 | } | 212 | } |
208 | EXPORT_SYMBOL(fsync_bdev); | 213 | EXPORT_SYMBOL(fsync_bdev); |
209 | 214 | ||
210 | /** | 215 | /** |
211 | * freeze_bdev -- lock a filesystem and force it into a consistent state | 216 | * freeze_bdev -- lock a filesystem and force it into a consistent state |
212 | * @bdev: blockdevice to lock | 217 | * @bdev: blockdevice to lock |
213 | * | 218 | * |
214 | * This takes the block device bd_mount_sem to make sure no new mounts | 219 | * This takes the block device bd_mount_sem to make sure no new mounts |
215 | * happen on bdev until thaw_bdev() is called. | 220 | * happen on bdev until thaw_bdev() is called. |
216 | * If a superblock is found on this device, we take the s_umount semaphore | 221 | * If a superblock is found on this device, we take the s_umount semaphore |
217 | * on it to make sure nobody unmounts until the snapshot creation is done. | 222 | * on it to make sure nobody unmounts until the snapshot creation is done. |
218 | * The reference counter (bd_fsfreeze_count) guarantees that only the last | 223 | * The reference counter (bd_fsfreeze_count) guarantees that only the last |
219 | * unfreeze process can unfreeze the frozen filesystem actually when multiple | 224 | * unfreeze process can unfreeze the frozen filesystem actually when multiple |
220 | * freeze requests arrive simultaneously. It counts up in freeze_bdev() and | 225 | * freeze requests arrive simultaneously. It counts up in freeze_bdev() and |
221 | * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze | 226 | * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze |
222 | * actually. | 227 | * actually. |
223 | */ | 228 | */ |
224 | struct super_block *freeze_bdev(struct block_device *bdev) | 229 | struct super_block *freeze_bdev(struct block_device *bdev) |
225 | { | 230 | { |
226 | struct super_block *sb; | 231 | struct super_block *sb; |
227 | int error = 0; | 232 | int error = 0; |
228 | 233 | ||
229 | mutex_lock(&bdev->bd_fsfreeze_mutex); | 234 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
230 | if (bdev->bd_fsfreeze_count > 0) { | 235 | if (bdev->bd_fsfreeze_count > 0) { |
231 | bdev->bd_fsfreeze_count++; | 236 | bdev->bd_fsfreeze_count++; |
232 | sb = get_super(bdev); | 237 | sb = get_super(bdev); |
233 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 238 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
234 | return sb; | 239 | return sb; |
235 | } | 240 | } |
236 | bdev->bd_fsfreeze_count++; | 241 | bdev->bd_fsfreeze_count++; |
237 | 242 | ||
238 | down(&bdev->bd_mount_sem); | 243 | down(&bdev->bd_mount_sem); |
239 | sb = get_super(bdev); | 244 | sb = get_super(bdev); |
240 | if (sb && !(sb->s_flags & MS_RDONLY)) { | 245 | if (sb && !(sb->s_flags & MS_RDONLY)) { |
241 | sb->s_frozen = SB_FREEZE_WRITE; | 246 | sb->s_frozen = SB_FREEZE_WRITE; |
242 | smp_wmb(); | 247 | smp_wmb(); |
243 | 248 | ||
244 | fsync_super(sb); | 249 | fsync_super(sb); |
245 | 250 | ||
246 | sb->s_frozen = SB_FREEZE_TRANS; | 251 | sb->s_frozen = SB_FREEZE_TRANS; |
247 | smp_wmb(); | 252 | smp_wmb(); |
248 | 253 | ||
249 | sync_blockdev(sb->s_bdev); | 254 | sync_blockdev(sb->s_bdev); |
250 | 255 | ||
251 | if (sb->s_op->freeze_fs) { | 256 | if (sb->s_op->freeze_fs) { |
252 | error = sb->s_op->freeze_fs(sb); | 257 | error = sb->s_op->freeze_fs(sb); |
253 | if (error) { | 258 | if (error) { |
254 | printk(KERN_ERR | 259 | printk(KERN_ERR |
255 | "VFS:Filesystem freeze failed\n"); | 260 | "VFS:Filesystem freeze failed\n"); |
256 | sb->s_frozen = SB_UNFROZEN; | 261 | sb->s_frozen = SB_UNFROZEN; |
257 | drop_super(sb); | 262 | drop_super(sb); |
258 | up(&bdev->bd_mount_sem); | 263 | up(&bdev->bd_mount_sem); |
259 | bdev->bd_fsfreeze_count--; | 264 | bdev->bd_fsfreeze_count--; |
260 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 265 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
261 | return ERR_PTR(error); | 266 | return ERR_PTR(error); |
262 | } | 267 | } |
263 | } | 268 | } |
264 | } | 269 | } |
265 | 270 | ||
266 | sync_blockdev(bdev); | 271 | sync_blockdev(bdev); |
267 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 272 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
268 | 273 | ||
269 | return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ | 274 | return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ |
270 | } | 275 | } |
271 | EXPORT_SYMBOL(freeze_bdev); | 276 | EXPORT_SYMBOL(freeze_bdev); |
272 | 277 | ||
273 | /** | 278 | /** |
274 | * thaw_bdev -- unlock filesystem | 279 | * thaw_bdev -- unlock filesystem |
275 | * @bdev: blockdevice to unlock | 280 | * @bdev: blockdevice to unlock |
276 | * @sb: associated superblock | 281 | * @sb: associated superblock |
277 | * | 282 | * |
278 | * Unlocks the filesystem and marks it writeable again after freeze_bdev(). | 283 | * Unlocks the filesystem and marks it writeable again after freeze_bdev(). |
279 | */ | 284 | */ |
280 | int thaw_bdev(struct block_device *bdev, struct super_block *sb) | 285 | int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
281 | { | 286 | { |
282 | int error = 0; | 287 | int error = 0; |
283 | 288 | ||
284 | mutex_lock(&bdev->bd_fsfreeze_mutex); | 289 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
285 | if (!bdev->bd_fsfreeze_count) { | 290 | if (!bdev->bd_fsfreeze_count) { |
286 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 291 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
287 | return -EINVAL; | 292 | return -EINVAL; |
288 | } | 293 | } |
289 | 294 | ||
290 | bdev->bd_fsfreeze_count--; | 295 | bdev->bd_fsfreeze_count--; |
291 | if (bdev->bd_fsfreeze_count > 0) { | 296 | if (bdev->bd_fsfreeze_count > 0) { |
292 | if (sb) | 297 | if (sb) |
293 | drop_super(sb); | 298 | drop_super(sb); |
294 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 299 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
295 | return 0; | 300 | return 0; |
296 | } | 301 | } |
297 | 302 | ||
298 | if (sb) { | 303 | if (sb) { |
299 | BUG_ON(sb->s_bdev != bdev); | 304 | BUG_ON(sb->s_bdev != bdev); |
300 | if (!(sb->s_flags & MS_RDONLY)) { | 305 | if (!(sb->s_flags & MS_RDONLY)) { |
301 | if (sb->s_op->unfreeze_fs) { | 306 | if (sb->s_op->unfreeze_fs) { |
302 | error = sb->s_op->unfreeze_fs(sb); | 307 | error = sb->s_op->unfreeze_fs(sb); |
303 | if (error) { | 308 | if (error) { |
304 | printk(KERN_ERR | 309 | printk(KERN_ERR |
305 | "VFS:Filesystem thaw failed\n"); | 310 | "VFS:Filesystem thaw failed\n"); |
306 | sb->s_frozen = SB_FREEZE_TRANS; | 311 | sb->s_frozen = SB_FREEZE_TRANS; |
307 | bdev->bd_fsfreeze_count++; | 312 | bdev->bd_fsfreeze_count++; |
308 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 313 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
309 | return error; | 314 | return error; |
310 | } | 315 | } |
311 | } | 316 | } |
312 | sb->s_frozen = SB_UNFROZEN; | 317 | sb->s_frozen = SB_UNFROZEN; |
313 | smp_wmb(); | 318 | smp_wmb(); |
314 | wake_up(&sb->s_wait_unfrozen); | 319 | wake_up(&sb->s_wait_unfrozen); |
315 | } | 320 | } |
316 | drop_super(sb); | 321 | drop_super(sb); |
317 | } | 322 | } |
318 | 323 | ||
319 | up(&bdev->bd_mount_sem); | 324 | up(&bdev->bd_mount_sem); |
320 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 325 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
321 | return 0; | 326 | return 0; |
322 | } | 327 | } |
323 | EXPORT_SYMBOL(thaw_bdev); | 328 | EXPORT_SYMBOL(thaw_bdev); |
324 | 329 | ||
325 | static int blkdev_writepage(struct page *page, struct writeback_control *wbc) | 330 | static int blkdev_writepage(struct page *page, struct writeback_control *wbc) |
326 | { | 331 | { |
327 | return block_write_full_page(page, blkdev_get_block, wbc); | 332 | return block_write_full_page(page, blkdev_get_block, wbc); |
328 | } | 333 | } |
329 | 334 | ||
330 | static int blkdev_readpage(struct file * file, struct page * page) | 335 | static int blkdev_readpage(struct file * file, struct page * page) |
331 | { | 336 | { |
332 | return block_read_full_page(page, blkdev_get_block); | 337 | return block_read_full_page(page, blkdev_get_block); |
333 | } | 338 | } |
334 | 339 | ||
335 | static int blkdev_write_begin(struct file *file, struct address_space *mapping, | 340 | static int blkdev_write_begin(struct file *file, struct address_space *mapping, |
336 | loff_t pos, unsigned len, unsigned flags, | 341 | loff_t pos, unsigned len, unsigned flags, |
337 | struct page **pagep, void **fsdata) | 342 | struct page **pagep, void **fsdata) |
338 | { | 343 | { |
339 | *pagep = NULL; | 344 | *pagep = NULL; |
340 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 345 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
341 | blkdev_get_block); | 346 | blkdev_get_block); |
342 | } | 347 | } |
343 | 348 | ||
344 | static int blkdev_write_end(struct file *file, struct address_space *mapping, | 349 | static int blkdev_write_end(struct file *file, struct address_space *mapping, |
345 | loff_t pos, unsigned len, unsigned copied, | 350 | loff_t pos, unsigned len, unsigned copied, |
346 | struct page *page, void *fsdata) | 351 | struct page *page, void *fsdata) |
347 | { | 352 | { |
348 | int ret; | 353 | int ret; |
349 | ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 354 | ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
350 | 355 | ||
351 | unlock_page(page); | 356 | unlock_page(page); |
352 | page_cache_release(page); | 357 | page_cache_release(page); |
353 | 358 | ||
354 | return ret; | 359 | return ret; |
355 | } | 360 | } |
356 | 361 | ||
357 | /* | 362 | /* |
358 | * private llseek: | 363 | * private llseek: |
359 | * for a block special file file->f_path.dentry->d_inode->i_size is zero | 364 | * for a block special file file->f_path.dentry->d_inode->i_size is zero |
360 | * so we compute the size by hand (just as in block_read/write above) | 365 | * so we compute the size by hand (just as in block_read/write above) |
361 | */ | 366 | */ |
362 | static loff_t block_llseek(struct file *file, loff_t offset, int origin) | 367 | static loff_t block_llseek(struct file *file, loff_t offset, int origin) |
363 | { | 368 | { |
364 | struct inode *bd_inode = file->f_mapping->host; | 369 | struct inode *bd_inode = file->f_mapping->host; |
365 | loff_t size; | 370 | loff_t size; |
366 | loff_t retval; | 371 | loff_t retval; |
367 | 372 | ||
368 | mutex_lock(&bd_inode->i_mutex); | 373 | mutex_lock(&bd_inode->i_mutex); |
369 | size = i_size_read(bd_inode); | 374 | size = i_size_read(bd_inode); |
370 | 375 | ||
371 | switch (origin) { | 376 | switch (origin) { |
372 | case 2: | 377 | case 2: |
373 | offset += size; | 378 | offset += size; |
374 | break; | 379 | break; |
375 | case 1: | 380 | case 1: |
376 | offset += file->f_pos; | 381 | offset += file->f_pos; |
377 | } | 382 | } |
378 | retval = -EINVAL; | 383 | retval = -EINVAL; |
379 | if (offset >= 0 && offset <= size) { | 384 | if (offset >= 0 && offset <= size) { |
380 | if (offset != file->f_pos) { | 385 | if (offset != file->f_pos) { |
381 | file->f_pos = offset; | 386 | file->f_pos = offset; |
382 | } | 387 | } |
383 | retval = offset; | 388 | retval = offset; |
384 | } | 389 | } |
385 | mutex_unlock(&bd_inode->i_mutex); | 390 | mutex_unlock(&bd_inode->i_mutex); |
386 | return retval; | 391 | return retval; |
387 | } | 392 | } |
388 | 393 | ||
389 | /* | 394 | /* |
390 | * Filp is never NULL; the only case when ->fsync() is called with | 395 | * Filp is never NULL; the only case when ->fsync() is called with |
391 | * NULL first argument is nfsd_sync_dir() and that's not a directory. | 396 | * NULL first argument is nfsd_sync_dir() and that's not a directory. |
392 | */ | 397 | */ |
393 | 398 | ||
394 | static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) | 399 | static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) |
395 | { | 400 | { |
396 | return sync_blockdev(I_BDEV(filp->f_mapping->host)); | 401 | return sync_blockdev(I_BDEV(filp->f_mapping->host)); |
397 | } | 402 | } |
398 | 403 | ||
399 | /* | 404 | /* |
400 | * pseudo-fs | 405 | * pseudo-fs |
401 | */ | 406 | */ |
402 | 407 | ||
403 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); | 408 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); |
404 | static struct kmem_cache * bdev_cachep __read_mostly; | 409 | static struct kmem_cache * bdev_cachep __read_mostly; |
405 | 410 | ||
406 | static struct inode *bdev_alloc_inode(struct super_block *sb) | 411 | static struct inode *bdev_alloc_inode(struct super_block *sb) |
407 | { | 412 | { |
408 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); | 413 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); |
409 | if (!ei) | 414 | if (!ei) |
410 | return NULL; | 415 | return NULL; |
411 | return &ei->vfs_inode; | 416 | return &ei->vfs_inode; |
412 | } | 417 | } |
413 | 418 | ||
414 | static void bdev_destroy_inode(struct inode *inode) | 419 | static void bdev_destroy_inode(struct inode *inode) |
415 | { | 420 | { |
416 | struct bdev_inode *bdi = BDEV_I(inode); | 421 | struct bdev_inode *bdi = BDEV_I(inode); |
417 | 422 | ||
418 | bdi->bdev.bd_inode_backing_dev_info = NULL; | 423 | bdi->bdev.bd_inode_backing_dev_info = NULL; |
419 | kmem_cache_free(bdev_cachep, bdi); | 424 | kmem_cache_free(bdev_cachep, bdi); |
420 | } | 425 | } |
421 | 426 | ||
422 | static void init_once(void *foo) | 427 | static void init_once(void *foo) |
423 | { | 428 | { |
424 | struct bdev_inode *ei = (struct bdev_inode *) foo; | 429 | struct bdev_inode *ei = (struct bdev_inode *) foo; |
425 | struct block_device *bdev = &ei->bdev; | 430 | struct block_device *bdev = &ei->bdev; |
426 | 431 | ||
427 | memset(bdev, 0, sizeof(*bdev)); | 432 | memset(bdev, 0, sizeof(*bdev)); |
428 | mutex_init(&bdev->bd_mutex); | 433 | mutex_init(&bdev->bd_mutex); |
429 | sema_init(&bdev->bd_mount_sem, 1); | 434 | sema_init(&bdev->bd_mount_sem, 1); |
430 | INIT_LIST_HEAD(&bdev->bd_inodes); | 435 | INIT_LIST_HEAD(&bdev->bd_inodes); |
431 | INIT_LIST_HEAD(&bdev->bd_list); | 436 | INIT_LIST_HEAD(&bdev->bd_list); |
432 | #ifdef CONFIG_SYSFS | 437 | #ifdef CONFIG_SYSFS |
433 | INIT_LIST_HEAD(&bdev->bd_holder_list); | 438 | INIT_LIST_HEAD(&bdev->bd_holder_list); |
434 | #endif | 439 | #endif |
435 | inode_init_once(&ei->vfs_inode); | 440 | inode_init_once(&ei->vfs_inode); |
436 | /* Initialize mutex for freeze. */ | 441 | /* Initialize mutex for freeze. */ |
437 | mutex_init(&bdev->bd_fsfreeze_mutex); | 442 | mutex_init(&bdev->bd_fsfreeze_mutex); |
438 | } | 443 | } |
439 | 444 | ||
440 | static inline void __bd_forget(struct inode *inode) | 445 | static inline void __bd_forget(struct inode *inode) |
441 | { | 446 | { |
442 | list_del_init(&inode->i_devices); | 447 | list_del_init(&inode->i_devices); |
443 | inode->i_bdev = NULL; | 448 | inode->i_bdev = NULL; |
444 | inode->i_mapping = &inode->i_data; | 449 | inode->i_mapping = &inode->i_data; |
445 | } | 450 | } |
446 | 451 | ||
447 | static void bdev_clear_inode(struct inode *inode) | 452 | static void bdev_clear_inode(struct inode *inode) |
448 | { | 453 | { |
449 | struct block_device *bdev = &BDEV_I(inode)->bdev; | 454 | struct block_device *bdev = &BDEV_I(inode)->bdev; |
450 | struct list_head *p; | 455 | struct list_head *p; |
451 | spin_lock(&bdev_lock); | 456 | spin_lock(&bdev_lock); |
452 | while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { | 457 | while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { |
453 | __bd_forget(list_entry(p, struct inode, i_devices)); | 458 | __bd_forget(list_entry(p, struct inode, i_devices)); |
454 | } | 459 | } |
455 | list_del_init(&bdev->bd_list); | 460 | list_del_init(&bdev->bd_list); |
456 | spin_unlock(&bdev_lock); | 461 | spin_unlock(&bdev_lock); |
457 | } | 462 | } |
458 | 463 | ||
459 | static const struct super_operations bdev_sops = { | 464 | static const struct super_operations bdev_sops = { |
460 | .statfs = simple_statfs, | 465 | .statfs = simple_statfs, |
461 | .alloc_inode = bdev_alloc_inode, | 466 | .alloc_inode = bdev_alloc_inode, |
462 | .destroy_inode = bdev_destroy_inode, | 467 | .destroy_inode = bdev_destroy_inode, |
463 | .drop_inode = generic_delete_inode, | 468 | .drop_inode = generic_delete_inode, |
464 | .clear_inode = bdev_clear_inode, | 469 | .clear_inode = bdev_clear_inode, |
465 | }; | 470 | }; |
466 | 471 | ||
467 | static int bd_get_sb(struct file_system_type *fs_type, | 472 | static int bd_get_sb(struct file_system_type *fs_type, |
468 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 473 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
469 | { | 474 | { |
470 | return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); | 475 | return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); |
471 | } | 476 | } |
472 | 477 | ||
473 | static struct file_system_type bd_type = { | 478 | static struct file_system_type bd_type = { |
474 | .name = "bdev", | 479 | .name = "bdev", |
475 | .get_sb = bd_get_sb, | 480 | .get_sb = bd_get_sb, |
476 | .kill_sb = kill_anon_super, | 481 | .kill_sb = kill_anon_super, |
477 | }; | 482 | }; |
478 | 483 | ||
479 | struct super_block *blockdev_superblock __read_mostly; | 484 | struct super_block *blockdev_superblock __read_mostly; |
480 | 485 | ||
481 | void __init bdev_cache_init(void) | 486 | void __init bdev_cache_init(void) |
482 | { | 487 | { |
483 | int err; | 488 | int err; |
484 | struct vfsmount *bd_mnt; | 489 | struct vfsmount *bd_mnt; |
485 | 490 | ||
486 | bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), | 491 | bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), |
487 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 492 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
488 | SLAB_MEM_SPREAD|SLAB_PANIC), | 493 | SLAB_MEM_SPREAD|SLAB_PANIC), |
489 | init_once); | 494 | init_once); |
490 | err = register_filesystem(&bd_type); | 495 | err = register_filesystem(&bd_type); |
491 | if (err) | 496 | if (err) |
492 | panic("Cannot register bdev pseudo-fs"); | 497 | panic("Cannot register bdev pseudo-fs"); |
493 | bd_mnt = kern_mount(&bd_type); | 498 | bd_mnt = kern_mount(&bd_type); |
494 | if (IS_ERR(bd_mnt)) | 499 | if (IS_ERR(bd_mnt)) |
495 | panic("Cannot create bdev pseudo-fs"); | 500 | panic("Cannot create bdev pseudo-fs"); |
496 | /* | 501 | /* |
497 | * This vfsmount structure is only used to obtain the | 502 | * This vfsmount structure is only used to obtain the |
498 | * blockdev_superblock, so tell kmemleak not to report it. | 503 | * blockdev_superblock, so tell kmemleak not to report it. |
499 | */ | 504 | */ |
500 | kmemleak_not_leak(bd_mnt); | 505 | kmemleak_not_leak(bd_mnt); |
501 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ | 506 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ |
502 | } | 507 | } |
503 | 508 | ||
504 | /* | 509 | /* |
505 | * Most likely _very_ bad one - but then it's hardly critical for small | 510 | * Most likely _very_ bad one - but then it's hardly critical for small |
506 | * /dev and can be fixed when somebody will need really large one. | 511 | * /dev and can be fixed when somebody will need really large one. |
507 | * Keep in mind that it will be fed through icache hash function too. | 512 | * Keep in mind that it will be fed through icache hash function too. |
508 | */ | 513 | */ |
509 | static inline unsigned long hash(dev_t dev) | 514 | static inline unsigned long hash(dev_t dev) |
510 | { | 515 | { |
511 | return MAJOR(dev)+MINOR(dev); | 516 | return MAJOR(dev)+MINOR(dev); |
512 | } | 517 | } |
513 | 518 | ||
514 | static int bdev_test(struct inode *inode, void *data) | 519 | static int bdev_test(struct inode *inode, void *data) |
515 | { | 520 | { |
516 | return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; | 521 | return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; |
517 | } | 522 | } |
518 | 523 | ||
519 | static int bdev_set(struct inode *inode, void *data) | 524 | static int bdev_set(struct inode *inode, void *data) |
520 | { | 525 | { |
521 | BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; | 526 | BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; |
522 | return 0; | 527 | return 0; |
523 | } | 528 | } |
524 | 529 | ||
525 | static LIST_HEAD(all_bdevs); | 530 | static LIST_HEAD(all_bdevs); |
526 | 531 | ||
527 | struct block_device *bdget(dev_t dev) | 532 | struct block_device *bdget(dev_t dev) |
528 | { | 533 | { |
529 | struct block_device *bdev; | 534 | struct block_device *bdev; |
530 | struct inode *inode; | 535 | struct inode *inode; |
531 | 536 | ||
532 | inode = iget5_locked(blockdev_superblock, hash(dev), | 537 | inode = iget5_locked(blockdev_superblock, hash(dev), |
533 | bdev_test, bdev_set, &dev); | 538 | bdev_test, bdev_set, &dev); |
534 | 539 | ||
535 | if (!inode) | 540 | if (!inode) |
536 | return NULL; | 541 | return NULL; |
537 | 542 | ||
538 | bdev = &BDEV_I(inode)->bdev; | 543 | bdev = &BDEV_I(inode)->bdev; |
539 | 544 | ||
540 | if (inode->i_state & I_NEW) { | 545 | if (inode->i_state & I_NEW) { |
541 | bdev->bd_contains = NULL; | 546 | bdev->bd_contains = NULL; |
542 | bdev->bd_inode = inode; | 547 | bdev->bd_inode = inode; |
543 | bdev->bd_block_size = (1 << inode->i_blkbits); | 548 | bdev->bd_block_size = (1 << inode->i_blkbits); |
544 | bdev->bd_part_count = 0; | 549 | bdev->bd_part_count = 0; |
545 | bdev->bd_invalidated = 0; | 550 | bdev->bd_invalidated = 0; |
546 | inode->i_mode = S_IFBLK; | 551 | inode->i_mode = S_IFBLK; |
547 | inode->i_rdev = dev; | 552 | inode->i_rdev = dev; |
548 | inode->i_bdev = bdev; | 553 | inode->i_bdev = bdev; |
549 | inode->i_data.a_ops = &def_blk_aops; | 554 | inode->i_data.a_ops = &def_blk_aops; |
550 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); | 555 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); |
551 | inode->i_data.backing_dev_info = &default_backing_dev_info; | 556 | inode->i_data.backing_dev_info = &default_backing_dev_info; |
552 | spin_lock(&bdev_lock); | 557 | spin_lock(&bdev_lock); |
553 | list_add(&bdev->bd_list, &all_bdevs); | 558 | list_add(&bdev->bd_list, &all_bdevs); |
554 | spin_unlock(&bdev_lock); | 559 | spin_unlock(&bdev_lock); |
555 | unlock_new_inode(inode); | 560 | unlock_new_inode(inode); |
556 | } | 561 | } |
557 | return bdev; | 562 | return bdev; |
558 | } | 563 | } |
559 | 564 | ||
560 | EXPORT_SYMBOL(bdget); | 565 | EXPORT_SYMBOL(bdget); |
561 | 566 | ||
562 | long nr_blockdev_pages(void) | 567 | long nr_blockdev_pages(void) |
563 | { | 568 | { |
564 | struct block_device *bdev; | 569 | struct block_device *bdev; |
565 | long ret = 0; | 570 | long ret = 0; |
566 | spin_lock(&bdev_lock); | 571 | spin_lock(&bdev_lock); |
567 | list_for_each_entry(bdev, &all_bdevs, bd_list) { | 572 | list_for_each_entry(bdev, &all_bdevs, bd_list) { |
568 | ret += bdev->bd_inode->i_mapping->nrpages; | 573 | ret += bdev->bd_inode->i_mapping->nrpages; |
569 | } | 574 | } |
570 | spin_unlock(&bdev_lock); | 575 | spin_unlock(&bdev_lock); |
571 | return ret; | 576 | return ret; |
572 | } | 577 | } |
573 | 578 | ||
574 | void bdput(struct block_device *bdev) | 579 | void bdput(struct block_device *bdev) |
575 | { | 580 | { |
576 | iput(bdev->bd_inode); | 581 | iput(bdev->bd_inode); |
577 | } | 582 | } |
578 | 583 | ||
579 | EXPORT_SYMBOL(bdput); | 584 | EXPORT_SYMBOL(bdput); |
580 | 585 | ||
581 | static struct block_device *bd_acquire(struct inode *inode) | 586 | static struct block_device *bd_acquire(struct inode *inode) |
582 | { | 587 | { |
583 | struct block_device *bdev; | 588 | struct block_device *bdev; |
584 | 589 | ||
585 | spin_lock(&bdev_lock); | 590 | spin_lock(&bdev_lock); |
586 | bdev = inode->i_bdev; | 591 | bdev = inode->i_bdev; |
587 | if (bdev) { | 592 | if (bdev) { |
588 | atomic_inc(&bdev->bd_inode->i_count); | 593 | atomic_inc(&bdev->bd_inode->i_count); |
589 | spin_unlock(&bdev_lock); | 594 | spin_unlock(&bdev_lock); |
590 | return bdev; | 595 | return bdev; |
591 | } | 596 | } |
592 | spin_unlock(&bdev_lock); | 597 | spin_unlock(&bdev_lock); |
593 | 598 | ||
594 | bdev = bdget(inode->i_rdev); | 599 | bdev = bdget(inode->i_rdev); |
595 | if (bdev) { | 600 | if (bdev) { |
596 | spin_lock(&bdev_lock); | 601 | spin_lock(&bdev_lock); |
597 | if (!inode->i_bdev) { | 602 | if (!inode->i_bdev) { |
598 | /* | 603 | /* |
599 | * We take an additional bd_inode->i_count for inode, | 604 | * We take an additional bd_inode->i_count for inode, |
600 | * and it's released in clear_inode() of inode. | 605 | * and it's released in clear_inode() of inode. |
601 | * So, we can access it via ->i_mapping always | 606 | * So, we can access it via ->i_mapping always |
602 | * without igrab(). | 607 | * without igrab(). |
603 | */ | 608 | */ |
604 | atomic_inc(&bdev->bd_inode->i_count); | 609 | atomic_inc(&bdev->bd_inode->i_count); |
605 | inode->i_bdev = bdev; | 610 | inode->i_bdev = bdev; |
606 | inode->i_mapping = bdev->bd_inode->i_mapping; | 611 | inode->i_mapping = bdev->bd_inode->i_mapping; |
607 | list_add(&inode->i_devices, &bdev->bd_inodes); | 612 | list_add(&inode->i_devices, &bdev->bd_inodes); |
608 | } | 613 | } |
609 | spin_unlock(&bdev_lock); | 614 | spin_unlock(&bdev_lock); |
610 | } | 615 | } |
611 | return bdev; | 616 | return bdev; |
612 | } | 617 | } |
613 | 618 | ||
614 | /* Call when you free inode */ | 619 | /* Call when you free inode */ |
615 | 620 | ||
616 | void bd_forget(struct inode *inode) | 621 | void bd_forget(struct inode *inode) |
617 | { | 622 | { |
618 | struct block_device *bdev = NULL; | 623 | struct block_device *bdev = NULL; |
619 | 624 | ||
620 | spin_lock(&bdev_lock); | 625 | spin_lock(&bdev_lock); |
621 | if (inode->i_bdev) { | 626 | if (inode->i_bdev) { |
622 | if (!sb_is_blkdev_sb(inode->i_sb)) | 627 | if (!sb_is_blkdev_sb(inode->i_sb)) |
623 | bdev = inode->i_bdev; | 628 | bdev = inode->i_bdev; |
624 | __bd_forget(inode); | 629 | __bd_forget(inode); |
625 | } | 630 | } |
626 | spin_unlock(&bdev_lock); | 631 | spin_unlock(&bdev_lock); |
627 | 632 | ||
628 | if (bdev) | 633 | if (bdev) |
629 | iput(bdev->bd_inode); | 634 | iput(bdev->bd_inode); |
630 | } | 635 | } |
631 | 636 | ||
632 | int bd_claim(struct block_device *bdev, void *holder) | 637 | int bd_claim(struct block_device *bdev, void *holder) |
633 | { | 638 | { |
634 | int res; | 639 | int res; |
635 | spin_lock(&bdev_lock); | 640 | spin_lock(&bdev_lock); |
636 | 641 | ||
637 | /* first decide result */ | 642 | /* first decide result */ |
638 | if (bdev->bd_holder == holder) | 643 | if (bdev->bd_holder == holder) |
639 | res = 0; /* already a holder */ | 644 | res = 0; /* already a holder */ |
640 | else if (bdev->bd_holder != NULL) | 645 | else if (bdev->bd_holder != NULL) |
641 | res = -EBUSY; /* held by someone else */ | 646 | res = -EBUSY; /* held by someone else */ |
642 | else if (bdev->bd_contains == bdev) | 647 | else if (bdev->bd_contains == bdev) |
643 | res = 0; /* is a whole device which isn't held */ | 648 | res = 0; /* is a whole device which isn't held */ |
644 | 649 | ||
645 | else if (bdev->bd_contains->bd_holder == bd_claim) | 650 | else if (bdev->bd_contains->bd_holder == bd_claim) |
646 | res = 0; /* is a partition of a device that is being partitioned */ | 651 | res = 0; /* is a partition of a device that is being partitioned */ |
647 | else if (bdev->bd_contains->bd_holder != NULL) | 652 | else if (bdev->bd_contains->bd_holder != NULL) |
648 | res = -EBUSY; /* is a partition of a held device */ | 653 | res = -EBUSY; /* is a partition of a held device */ |
649 | else | 654 | else |
650 | res = 0; /* is a partition of an un-held device */ | 655 | res = 0; /* is a partition of an un-held device */ |
651 | 656 | ||
652 | /* now impose change */ | 657 | /* now impose change */ |
653 | if (res==0) { | 658 | if (res==0) { |
654 | /* note that for a whole device bd_holders | 659 | /* note that for a whole device bd_holders |
655 | * will be incremented twice, and bd_holder will | 660 | * will be incremented twice, and bd_holder will |
656 | * be set to bd_claim before being set to holder | 661 | * be set to bd_claim before being set to holder |
657 | */ | 662 | */ |
658 | bdev->bd_contains->bd_holders ++; | 663 | bdev->bd_contains->bd_holders ++; |
659 | bdev->bd_contains->bd_holder = bd_claim; | 664 | bdev->bd_contains->bd_holder = bd_claim; |
660 | bdev->bd_holders++; | 665 | bdev->bd_holders++; |
661 | bdev->bd_holder = holder; | 666 | bdev->bd_holder = holder; |
662 | } | 667 | } |
663 | spin_unlock(&bdev_lock); | 668 | spin_unlock(&bdev_lock); |
664 | return res; | 669 | return res; |
665 | } | 670 | } |
666 | 671 | ||
667 | EXPORT_SYMBOL(bd_claim); | 672 | EXPORT_SYMBOL(bd_claim); |
668 | 673 | ||
669 | void bd_release(struct block_device *bdev) | 674 | void bd_release(struct block_device *bdev) |
670 | { | 675 | { |
671 | spin_lock(&bdev_lock); | 676 | spin_lock(&bdev_lock); |
672 | if (!--bdev->bd_contains->bd_holders) | 677 | if (!--bdev->bd_contains->bd_holders) |
673 | bdev->bd_contains->bd_holder = NULL; | 678 | bdev->bd_contains->bd_holder = NULL; |
674 | if (!--bdev->bd_holders) | 679 | if (!--bdev->bd_holders) |
675 | bdev->bd_holder = NULL; | 680 | bdev->bd_holder = NULL; |
676 | spin_unlock(&bdev_lock); | 681 | spin_unlock(&bdev_lock); |
677 | } | 682 | } |
678 | 683 | ||
679 | EXPORT_SYMBOL(bd_release); | 684 | EXPORT_SYMBOL(bd_release); |
680 | 685 | ||
681 | #ifdef CONFIG_SYSFS | 686 | #ifdef CONFIG_SYSFS |
682 | /* | 687 | /* |
683 | * Functions for bd_claim_by_kobject / bd_release_from_kobject | 688 | * Functions for bd_claim_by_kobject / bd_release_from_kobject |
684 | * | 689 | * |
685 | * If a kobject is passed to bd_claim_by_kobject() | 690 | * If a kobject is passed to bd_claim_by_kobject() |
686 | * and the kobject has a parent directory, | 691 | * and the kobject has a parent directory, |
687 | * following symlinks are created: | 692 | * following symlinks are created: |
688 | * o from the kobject to the claimed bdev | 693 | * o from the kobject to the claimed bdev |
689 | * o from "holders" directory of the bdev to the parent of the kobject | 694 | * o from "holders" directory of the bdev to the parent of the kobject |
690 | * bd_release_from_kobject() removes these symlinks. | 695 | * bd_release_from_kobject() removes these symlinks. |
691 | * | 696 | * |
692 | * Example: | 697 | * Example: |
693 | * If /dev/dm-0 maps to /dev/sda, kobject corresponding to | 698 | * If /dev/dm-0 maps to /dev/sda, kobject corresponding to |
694 | * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: | 699 | * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: |
695 | * /sys/block/dm-0/slaves/sda --> /sys/block/sda | 700 | * /sys/block/dm-0/slaves/sda --> /sys/block/sda |
696 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | 701 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 |
697 | */ | 702 | */ |
698 | 703 | ||
699 | static int add_symlink(struct kobject *from, struct kobject *to) | 704 | static int add_symlink(struct kobject *from, struct kobject *to) |
700 | { | 705 | { |
701 | if (!from || !to) | 706 | if (!from || !to) |
702 | return 0; | 707 | return 0; |
703 | return sysfs_create_link(from, to, kobject_name(to)); | 708 | return sysfs_create_link(from, to, kobject_name(to)); |
704 | } | 709 | } |
705 | 710 | ||
706 | static void del_symlink(struct kobject *from, struct kobject *to) | 711 | static void del_symlink(struct kobject *from, struct kobject *to) |
707 | { | 712 | { |
708 | if (!from || !to) | 713 | if (!from || !to) |
709 | return; | 714 | return; |
710 | sysfs_remove_link(from, kobject_name(to)); | 715 | sysfs_remove_link(from, kobject_name(to)); |
711 | } | 716 | } |
712 | 717 | ||
713 | /* | 718 | /* |
714 | * 'struct bd_holder' contains pointers to kobjects symlinked by | 719 | * 'struct bd_holder' contains pointers to kobjects symlinked by |
715 | * bd_claim_by_kobject. | 720 | * bd_claim_by_kobject. |
716 | * It's connected to bd_holder_list which is protected by bdev->bd_sem. | 721 | * It's connected to bd_holder_list which is protected by bdev->bd_sem. |
717 | */ | 722 | */ |
718 | struct bd_holder { | 723 | struct bd_holder { |
719 | struct list_head list; /* chain of holders of the bdev */ | 724 | struct list_head list; /* chain of holders of the bdev */ |
720 | int count; /* references from the holder */ | 725 | int count; /* references from the holder */ |
721 | struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ | 726 | struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ |
722 | struct kobject *hdev; /* e.g. "/block/dm-0" */ | 727 | struct kobject *hdev; /* e.g. "/block/dm-0" */ |
723 | struct kobject *hdir; /* e.g. "/block/sda/holders" */ | 728 | struct kobject *hdir; /* e.g. "/block/sda/holders" */ |
724 | struct kobject *sdev; /* e.g. "/block/sda" */ | 729 | struct kobject *sdev; /* e.g. "/block/sda" */ |
725 | }; | 730 | }; |
726 | 731 | ||
727 | /* | 732 | /* |
728 | * Get references of related kobjects at once. | 733 | * Get references of related kobjects at once. |
729 | * Returns 1 on success. 0 on failure. | 734 | * Returns 1 on success. 0 on failure. |
730 | * | 735 | * |
731 | * Should call bd_holder_release_dirs() after successful use. | 736 | * Should call bd_holder_release_dirs() after successful use. |
732 | */ | 737 | */ |
733 | static int bd_holder_grab_dirs(struct block_device *bdev, | 738 | static int bd_holder_grab_dirs(struct block_device *bdev, |
734 | struct bd_holder *bo) | 739 | struct bd_holder *bo) |
735 | { | 740 | { |
736 | if (!bdev || !bo) | 741 | if (!bdev || !bo) |
737 | return 0; | 742 | return 0; |
738 | 743 | ||
739 | bo->sdir = kobject_get(bo->sdir); | 744 | bo->sdir = kobject_get(bo->sdir); |
740 | if (!bo->sdir) | 745 | if (!bo->sdir) |
741 | return 0; | 746 | return 0; |
742 | 747 | ||
743 | bo->hdev = kobject_get(bo->sdir->parent); | 748 | bo->hdev = kobject_get(bo->sdir->parent); |
744 | if (!bo->hdev) | 749 | if (!bo->hdev) |
745 | goto fail_put_sdir; | 750 | goto fail_put_sdir; |
746 | 751 | ||
747 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); | 752 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); |
748 | if (!bo->sdev) | 753 | if (!bo->sdev) |
749 | goto fail_put_hdev; | 754 | goto fail_put_hdev; |
750 | 755 | ||
751 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); | 756 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); |
752 | if (!bo->hdir) | 757 | if (!bo->hdir) |
753 | goto fail_put_sdev; | 758 | goto fail_put_sdev; |
754 | 759 | ||
755 | return 1; | 760 | return 1; |
756 | 761 | ||
757 | fail_put_sdev: | 762 | fail_put_sdev: |
758 | kobject_put(bo->sdev); | 763 | kobject_put(bo->sdev); |
759 | fail_put_hdev: | 764 | fail_put_hdev: |
760 | kobject_put(bo->hdev); | 765 | kobject_put(bo->hdev); |
761 | fail_put_sdir: | 766 | fail_put_sdir: |
762 | kobject_put(bo->sdir); | 767 | kobject_put(bo->sdir); |
763 | 768 | ||
764 | return 0; | 769 | return 0; |
765 | } | 770 | } |
766 | 771 | ||
767 | /* Put references of related kobjects at once. */ | 772 | /* Put references of related kobjects at once. */ |
768 | static void bd_holder_release_dirs(struct bd_holder *bo) | 773 | static void bd_holder_release_dirs(struct bd_holder *bo) |
769 | { | 774 | { |
770 | kobject_put(bo->hdir); | 775 | kobject_put(bo->hdir); |
771 | kobject_put(bo->sdev); | 776 | kobject_put(bo->sdev); |
772 | kobject_put(bo->hdev); | 777 | kobject_put(bo->hdev); |
773 | kobject_put(bo->sdir); | 778 | kobject_put(bo->sdir); |
774 | } | 779 | } |
775 | 780 | ||
776 | static struct bd_holder *alloc_bd_holder(struct kobject *kobj) | 781 | static struct bd_holder *alloc_bd_holder(struct kobject *kobj) |
777 | { | 782 | { |
778 | struct bd_holder *bo; | 783 | struct bd_holder *bo; |
779 | 784 | ||
780 | bo = kzalloc(sizeof(*bo), GFP_KERNEL); | 785 | bo = kzalloc(sizeof(*bo), GFP_KERNEL); |
781 | if (!bo) | 786 | if (!bo) |
782 | return NULL; | 787 | return NULL; |
783 | 788 | ||
784 | bo->count = 1; | 789 | bo->count = 1; |
785 | bo->sdir = kobj; | 790 | bo->sdir = kobj; |
786 | 791 | ||
787 | return bo; | 792 | return bo; |
788 | } | 793 | } |
789 | 794 | ||
790 | static void free_bd_holder(struct bd_holder *bo) | 795 | static void free_bd_holder(struct bd_holder *bo) |
791 | { | 796 | { |
792 | kfree(bo); | 797 | kfree(bo); |
793 | } | 798 | } |
794 | 799 | ||
795 | /** | 800 | /** |
796 | * find_bd_holder - find matching struct bd_holder from the block device | 801 | * find_bd_holder - find matching struct bd_holder from the block device |
797 | * | 802 | * |
798 | * @bdev: struct block device to be searched | 803 | * @bdev: struct block device to be searched |
799 | * @bo: target struct bd_holder | 804 | * @bo: target struct bd_holder |
800 | * | 805 | * |
801 | * Returns matching entry with @bo in @bdev->bd_holder_list. | 806 | * Returns matching entry with @bo in @bdev->bd_holder_list. |
802 | * If found, increment the reference count and return the pointer. | 807 | * If found, increment the reference count and return the pointer. |
803 | * If not found, returns NULL. | 808 | * If not found, returns NULL. |
804 | */ | 809 | */ |
805 | static struct bd_holder *find_bd_holder(struct block_device *bdev, | 810 | static struct bd_holder *find_bd_holder(struct block_device *bdev, |
806 | struct bd_holder *bo) | 811 | struct bd_holder *bo) |
807 | { | 812 | { |
808 | struct bd_holder *tmp; | 813 | struct bd_holder *tmp; |
809 | 814 | ||
810 | list_for_each_entry(tmp, &bdev->bd_holder_list, list) | 815 | list_for_each_entry(tmp, &bdev->bd_holder_list, list) |
811 | if (tmp->sdir == bo->sdir) { | 816 | if (tmp->sdir == bo->sdir) { |
812 | tmp->count++; | 817 | tmp->count++; |
813 | return tmp; | 818 | return tmp; |
814 | } | 819 | } |
815 | 820 | ||
816 | return NULL; | 821 | return NULL; |
817 | } | 822 | } |
818 | 823 | ||
819 | /** | 824 | /** |
820 | * add_bd_holder - create sysfs symlinks for bd_claim() relationship | 825 | * add_bd_holder - create sysfs symlinks for bd_claim() relationship |
821 | * | 826 | * |
822 | * @bdev: block device to be bd_claimed | 827 | * @bdev: block device to be bd_claimed |
823 | * @bo: preallocated and initialized by alloc_bd_holder() | 828 | * @bo: preallocated and initialized by alloc_bd_holder() |
824 | * | 829 | * |
825 | * Add @bo to @bdev->bd_holder_list, create symlinks. | 830 | * Add @bo to @bdev->bd_holder_list, create symlinks. |
826 | * | 831 | * |
827 | * Returns 0 if symlinks are created. | 832 | * Returns 0 if symlinks are created. |
828 | * Returns -ve if something fails. | 833 | * Returns -ve if something fails. |
829 | */ | 834 | */ |
830 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | 835 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) |
831 | { | 836 | { |
832 | int err; | 837 | int err; |
833 | 838 | ||
834 | if (!bo) | 839 | if (!bo) |
835 | return -EINVAL; | 840 | return -EINVAL; |
836 | 841 | ||
837 | if (!bd_holder_grab_dirs(bdev, bo)) | 842 | if (!bd_holder_grab_dirs(bdev, bo)) |
838 | return -EBUSY; | 843 | return -EBUSY; |
839 | 844 | ||
840 | err = add_symlink(bo->sdir, bo->sdev); | 845 | err = add_symlink(bo->sdir, bo->sdev); |
841 | if (err) | 846 | if (err) |
842 | return err; | 847 | return err; |
843 | 848 | ||
844 | err = add_symlink(bo->hdir, bo->hdev); | 849 | err = add_symlink(bo->hdir, bo->hdev); |
845 | if (err) { | 850 | if (err) { |
846 | del_symlink(bo->sdir, bo->sdev); | 851 | del_symlink(bo->sdir, bo->sdev); |
847 | return err; | 852 | return err; |
848 | } | 853 | } |
849 | 854 | ||
850 | list_add_tail(&bo->list, &bdev->bd_holder_list); | 855 | list_add_tail(&bo->list, &bdev->bd_holder_list); |
851 | return 0; | 856 | return 0; |
852 | } | 857 | } |
853 | 858 | ||
854 | /** | 859 | /** |
855 | * del_bd_holder - delete sysfs symlinks for bd_claim() relationship | 860 | * del_bd_holder - delete sysfs symlinks for bd_claim() relationship |
856 | * | 861 | * |
857 | * @bdev: block device to be bd_claimed | 862 | * @bdev: block device to be bd_claimed |
858 | * @kobj: holder's kobject | 863 | * @kobj: holder's kobject |
859 | * | 864 | * |
860 | * If there is matching entry with @kobj in @bdev->bd_holder_list | 865 | * If there is matching entry with @kobj in @bdev->bd_holder_list |
861 | * and no other bd_claim() from the same kobject, | 866 | * and no other bd_claim() from the same kobject, |
862 | * remove the struct bd_holder from the list, delete symlinks for it. | 867 | * remove the struct bd_holder from the list, delete symlinks for it. |
863 | * | 868 | * |
864 | * Returns a pointer to the struct bd_holder when it's removed from the list | 869 | * Returns a pointer to the struct bd_holder when it's removed from the list |
865 | * and ready to be freed. | 870 | * and ready to be freed. |
866 | * Returns NULL if matching claim isn't found or there is other bd_claim() | 871 | * Returns NULL if matching claim isn't found or there is other bd_claim() |
867 | * by the same kobject. | 872 | * by the same kobject. |
868 | */ | 873 | */ |
869 | static struct bd_holder *del_bd_holder(struct block_device *bdev, | 874 | static struct bd_holder *del_bd_holder(struct block_device *bdev, |
870 | struct kobject *kobj) | 875 | struct kobject *kobj) |
871 | { | 876 | { |
872 | struct bd_holder *bo; | 877 | struct bd_holder *bo; |
873 | 878 | ||
874 | list_for_each_entry(bo, &bdev->bd_holder_list, list) { | 879 | list_for_each_entry(bo, &bdev->bd_holder_list, list) { |
875 | if (bo->sdir == kobj) { | 880 | if (bo->sdir == kobj) { |
876 | bo->count--; | 881 | bo->count--; |
877 | BUG_ON(bo->count < 0); | 882 | BUG_ON(bo->count < 0); |
878 | if (!bo->count) { | 883 | if (!bo->count) { |
879 | list_del(&bo->list); | 884 | list_del(&bo->list); |
880 | del_symlink(bo->sdir, bo->sdev); | 885 | del_symlink(bo->sdir, bo->sdev); |
881 | del_symlink(bo->hdir, bo->hdev); | 886 | del_symlink(bo->hdir, bo->hdev); |
882 | bd_holder_release_dirs(bo); | 887 | bd_holder_release_dirs(bo); |
883 | return bo; | 888 | return bo; |
884 | } | 889 | } |
885 | break; | 890 | break; |
886 | } | 891 | } |
887 | } | 892 | } |
888 | 893 | ||
889 | return NULL; | 894 | return NULL; |
890 | } | 895 | } |
891 | 896 | ||
892 | /** | 897 | /** |
893 | * bd_claim_by_kobject - bd_claim() with additional kobject signature | 898 | * bd_claim_by_kobject - bd_claim() with additional kobject signature |
894 | * | 899 | * |
895 | * @bdev: block device to be claimed | 900 | * @bdev: block device to be claimed |
896 | * @holder: holder's signature | 901 | * @holder: holder's signature |
897 | * @kobj: holder's kobject | 902 | * @kobj: holder's kobject |
898 | * | 903 | * |
899 | * Do bd_claim() and if it succeeds, create sysfs symlinks between | 904 | * Do bd_claim() and if it succeeds, create sysfs symlinks between |
900 | * the bdev and the holder's kobject. | 905 | * the bdev and the holder's kobject. |
901 | * Use bd_release_from_kobject() when relesing the claimed bdev. | 906 | * Use bd_release_from_kobject() when relesing the claimed bdev. |
902 | * | 907 | * |
903 | * Returns 0 on success. (same as bd_claim()) | 908 | * Returns 0 on success. (same as bd_claim()) |
904 | * Returns errno on failure. | 909 | * Returns errno on failure. |
905 | */ | 910 | */ |
906 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | 911 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, |
907 | struct kobject *kobj) | 912 | struct kobject *kobj) |
908 | { | 913 | { |
909 | int err; | 914 | int err; |
910 | struct bd_holder *bo, *found; | 915 | struct bd_holder *bo, *found; |
911 | 916 | ||
912 | if (!kobj) | 917 | if (!kobj) |
913 | return -EINVAL; | 918 | return -EINVAL; |
914 | 919 | ||
915 | bo = alloc_bd_holder(kobj); | 920 | bo = alloc_bd_holder(kobj); |
916 | if (!bo) | 921 | if (!bo) |
917 | return -ENOMEM; | 922 | return -ENOMEM; |
918 | 923 | ||
919 | mutex_lock(&bdev->bd_mutex); | 924 | mutex_lock(&bdev->bd_mutex); |
920 | 925 | ||
921 | err = bd_claim(bdev, holder); | 926 | err = bd_claim(bdev, holder); |
922 | if (err) | 927 | if (err) |
923 | goto fail; | 928 | goto fail; |
924 | 929 | ||
925 | found = find_bd_holder(bdev, bo); | 930 | found = find_bd_holder(bdev, bo); |
926 | if (found) | 931 | if (found) |
927 | goto fail; | 932 | goto fail; |
928 | 933 | ||
929 | err = add_bd_holder(bdev, bo); | 934 | err = add_bd_holder(bdev, bo); |
930 | if (err) | 935 | if (err) |
931 | bd_release(bdev); | 936 | bd_release(bdev); |
932 | else | 937 | else |
933 | bo = NULL; | 938 | bo = NULL; |
934 | fail: | 939 | fail: |
935 | mutex_unlock(&bdev->bd_mutex); | 940 | mutex_unlock(&bdev->bd_mutex); |
936 | free_bd_holder(bo); | 941 | free_bd_holder(bo); |
937 | return err; | 942 | return err; |
938 | } | 943 | } |
939 | 944 | ||
940 | /** | 945 | /** |
941 | * bd_release_from_kobject - bd_release() with additional kobject signature | 946 | * bd_release_from_kobject - bd_release() with additional kobject signature |
942 | * | 947 | * |
943 | * @bdev: block device to be released | 948 | * @bdev: block device to be released |
944 | * @kobj: holder's kobject | 949 | * @kobj: holder's kobject |
945 | * | 950 | * |
946 | * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). | 951 | * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). |
947 | */ | 952 | */ |
948 | static void bd_release_from_kobject(struct block_device *bdev, | 953 | static void bd_release_from_kobject(struct block_device *bdev, |
949 | struct kobject *kobj) | 954 | struct kobject *kobj) |
950 | { | 955 | { |
951 | if (!kobj) | 956 | if (!kobj) |
952 | return; | 957 | return; |
953 | 958 | ||
954 | mutex_lock(&bdev->bd_mutex); | 959 | mutex_lock(&bdev->bd_mutex); |
955 | bd_release(bdev); | 960 | bd_release(bdev); |
956 | free_bd_holder(del_bd_holder(bdev, kobj)); | 961 | free_bd_holder(del_bd_holder(bdev, kobj)); |
957 | mutex_unlock(&bdev->bd_mutex); | 962 | mutex_unlock(&bdev->bd_mutex); |
958 | } | 963 | } |
959 | 964 | ||
960 | /** | 965 | /** |
961 | * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() | 966 | * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() |
962 | * | 967 | * |
963 | * @bdev: block device to be claimed | 968 | * @bdev: block device to be claimed |
964 | * @holder: holder's signature | 969 | * @holder: holder's signature |
965 | * @disk: holder's gendisk | 970 | * @disk: holder's gendisk |
966 | * | 971 | * |
967 | * Call bd_claim_by_kobject() with getting @disk->slave_dir. | 972 | * Call bd_claim_by_kobject() with getting @disk->slave_dir. |
968 | */ | 973 | */ |
969 | int bd_claim_by_disk(struct block_device *bdev, void *holder, | 974 | int bd_claim_by_disk(struct block_device *bdev, void *holder, |
970 | struct gendisk *disk) | 975 | struct gendisk *disk) |
971 | { | 976 | { |
972 | return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); | 977 | return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); |
973 | } | 978 | } |
974 | EXPORT_SYMBOL_GPL(bd_claim_by_disk); | 979 | EXPORT_SYMBOL_GPL(bd_claim_by_disk); |
975 | 980 | ||
976 | /** | 981 | /** |
977 | * bd_release_from_disk - wrapper function for bd_release_from_kobject() | 982 | * bd_release_from_disk - wrapper function for bd_release_from_kobject() |
978 | * | 983 | * |
979 | * @bdev: block device to be claimed | 984 | * @bdev: block device to be claimed |
980 | * @disk: holder's gendisk | 985 | * @disk: holder's gendisk |
981 | * | 986 | * |
982 | * Call bd_release_from_kobject() and put @disk->slave_dir. | 987 | * Call bd_release_from_kobject() and put @disk->slave_dir. |
983 | */ | 988 | */ |
984 | void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) | 989 | void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) |
985 | { | 990 | { |
986 | bd_release_from_kobject(bdev, disk->slave_dir); | 991 | bd_release_from_kobject(bdev, disk->slave_dir); |
987 | kobject_put(disk->slave_dir); | 992 | kobject_put(disk->slave_dir); |
988 | } | 993 | } |
989 | EXPORT_SYMBOL_GPL(bd_release_from_disk); | 994 | EXPORT_SYMBOL_GPL(bd_release_from_disk); |
990 | #endif | 995 | #endif |
991 | 996 | ||
992 | /* | 997 | /* |
993 | * Tries to open block device by device number. Use it ONLY if you | 998 | * Tries to open block device by device number. Use it ONLY if you |
994 | * really do not have anything better - i.e. when you are behind a | 999 | * really do not have anything better - i.e. when you are behind a |
995 | * truly sucky interface and all you are given is a device number. _Never_ | 1000 | * truly sucky interface and all you are given is a device number. _Never_ |
996 | * to be used for internal purposes. If you ever need it - reconsider | 1001 | * to be used for internal purposes. If you ever need it - reconsider |
997 | * your API. | 1002 | * your API. |
998 | */ | 1003 | */ |
999 | struct block_device *open_by_devnum(dev_t dev, fmode_t mode) | 1004 | struct block_device *open_by_devnum(dev_t dev, fmode_t mode) |
1000 | { | 1005 | { |
1001 | struct block_device *bdev = bdget(dev); | 1006 | struct block_device *bdev = bdget(dev); |
1002 | int err = -ENOMEM; | 1007 | int err = -ENOMEM; |
1003 | if (bdev) | 1008 | if (bdev) |
1004 | err = blkdev_get(bdev, mode); | 1009 | err = blkdev_get(bdev, mode); |
1005 | return err ? ERR_PTR(err) : bdev; | 1010 | return err ? ERR_PTR(err) : bdev; |
1006 | } | 1011 | } |
1007 | 1012 | ||
1008 | EXPORT_SYMBOL(open_by_devnum); | 1013 | EXPORT_SYMBOL(open_by_devnum); |
1009 | 1014 | ||
1010 | /** | 1015 | /** |
1011 | * flush_disk - invalidates all buffer-cache entries on a disk | 1016 | * flush_disk - invalidates all buffer-cache entries on a disk |
1012 | * | 1017 | * |
1013 | * @bdev: struct block device to be flushed | 1018 | * @bdev: struct block device to be flushed |
1014 | * | 1019 | * |
1015 | * Invalidates all buffer-cache entries on a disk. It should be called | 1020 | * Invalidates all buffer-cache entries on a disk. It should be called |
1016 | * when a disk has been changed -- either by a media change or online | 1021 | * when a disk has been changed -- either by a media change or online |
1017 | * resize. | 1022 | * resize. |
1018 | */ | 1023 | */ |
1019 | static void flush_disk(struct block_device *bdev) | 1024 | static void flush_disk(struct block_device *bdev) |
1020 | { | 1025 | { |
1021 | if (__invalidate_device(bdev)) { | 1026 | if (__invalidate_device(bdev)) { |
1022 | char name[BDEVNAME_SIZE] = ""; | 1027 | char name[BDEVNAME_SIZE] = ""; |
1023 | 1028 | ||
1024 | if (bdev->bd_disk) | 1029 | if (bdev->bd_disk) |
1025 | disk_name(bdev->bd_disk, 0, name); | 1030 | disk_name(bdev->bd_disk, 0, name); |
1026 | printk(KERN_WARNING "VFS: busy inodes on changed media or " | 1031 | printk(KERN_WARNING "VFS: busy inodes on changed media or " |
1027 | "resized disk %s\n", name); | 1032 | "resized disk %s\n", name); |
1028 | } | 1033 | } |
1029 | 1034 | ||
1030 | if (!bdev->bd_disk) | 1035 | if (!bdev->bd_disk) |
1031 | return; | 1036 | return; |
1032 | if (disk_partitionable(bdev->bd_disk)) | 1037 | if (disk_partitionable(bdev->bd_disk)) |
1033 | bdev->bd_invalidated = 1; | 1038 | bdev->bd_invalidated = 1; |
1034 | } | 1039 | } |
1035 | 1040 | ||
1036 | /** | 1041 | /** |
1037 | * check_disk_size_change - checks for disk size change and adjusts bdev size. | 1042 | * check_disk_size_change - checks for disk size change and adjusts bdev size. |
1038 | * @disk: struct gendisk to check | 1043 | * @disk: struct gendisk to check |
1039 | * @bdev: struct bdev to adjust. | 1044 | * @bdev: struct bdev to adjust. |
1040 | * | 1045 | * |
1041 | * This routine checks to see if the bdev size does not match the disk size | 1046 | * This routine checks to see if the bdev size does not match the disk size |
1042 | * and adjusts it if it differs. | 1047 | * and adjusts it if it differs. |
1043 | */ | 1048 | */ |
1044 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | 1049 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) |
1045 | { | 1050 | { |
1046 | loff_t disk_size, bdev_size; | 1051 | loff_t disk_size, bdev_size; |
1047 | 1052 | ||
1048 | disk_size = (loff_t)get_capacity(disk) << 9; | 1053 | disk_size = (loff_t)get_capacity(disk) << 9; |
1049 | bdev_size = i_size_read(bdev->bd_inode); | 1054 | bdev_size = i_size_read(bdev->bd_inode); |
1050 | if (disk_size != bdev_size) { | 1055 | if (disk_size != bdev_size) { |
1051 | char name[BDEVNAME_SIZE]; | 1056 | char name[BDEVNAME_SIZE]; |
1052 | 1057 | ||
1053 | disk_name(disk, 0, name); | 1058 | disk_name(disk, 0, name); |
1054 | printk(KERN_INFO | 1059 | printk(KERN_INFO |
1055 | "%s: detected capacity change from %lld to %lld\n", | 1060 | "%s: detected capacity change from %lld to %lld\n", |
1056 | name, bdev_size, disk_size); | 1061 | name, bdev_size, disk_size); |
1057 | i_size_write(bdev->bd_inode, disk_size); | 1062 | i_size_write(bdev->bd_inode, disk_size); |
1058 | flush_disk(bdev); | 1063 | flush_disk(bdev); |
1059 | } | 1064 | } |
1060 | } | 1065 | } |
1061 | EXPORT_SYMBOL(check_disk_size_change); | 1066 | EXPORT_SYMBOL(check_disk_size_change); |
1062 | 1067 | ||
1063 | /** | 1068 | /** |
1064 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back | 1069 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back |
1065 | * @disk: struct gendisk to be revalidated | 1070 | * @disk: struct gendisk to be revalidated |
1066 | * | 1071 | * |
1067 | * This routine is a wrapper for lower-level driver's revalidate_disk | 1072 | * This routine is a wrapper for lower-level driver's revalidate_disk |
1068 | * call-backs. It is used to do common pre and post operations needed | 1073 | * call-backs. It is used to do common pre and post operations needed |
1069 | * for all revalidate_disk operations. | 1074 | * for all revalidate_disk operations. |
1070 | */ | 1075 | */ |
1071 | int revalidate_disk(struct gendisk *disk) | 1076 | int revalidate_disk(struct gendisk *disk) |
1072 | { | 1077 | { |
1073 | struct block_device *bdev; | 1078 | struct block_device *bdev; |
1074 | int ret = 0; | 1079 | int ret = 0; |
1075 | 1080 | ||
1076 | if (disk->fops->revalidate_disk) | 1081 | if (disk->fops->revalidate_disk) |
1077 | ret = disk->fops->revalidate_disk(disk); | 1082 | ret = disk->fops->revalidate_disk(disk); |
1078 | 1083 | ||
1079 | bdev = bdget_disk(disk, 0); | 1084 | bdev = bdget_disk(disk, 0); |
1080 | if (!bdev) | 1085 | if (!bdev) |
1081 | return ret; | 1086 | return ret; |
1082 | 1087 | ||
1083 | mutex_lock(&bdev->bd_mutex); | 1088 | mutex_lock(&bdev->bd_mutex); |
1084 | check_disk_size_change(disk, bdev); | 1089 | check_disk_size_change(disk, bdev); |
1085 | mutex_unlock(&bdev->bd_mutex); | 1090 | mutex_unlock(&bdev->bd_mutex); |
1086 | bdput(bdev); | 1091 | bdput(bdev); |
1087 | return ret; | 1092 | return ret; |
1088 | } | 1093 | } |
1089 | EXPORT_SYMBOL(revalidate_disk); | 1094 | EXPORT_SYMBOL(revalidate_disk); |
1090 | 1095 | ||
1091 | /* | 1096 | /* |
1092 | * This routine checks whether a removable media has been changed, | 1097 | * This routine checks whether a removable media has been changed, |
1093 | * and invalidates all buffer-cache-entries in that case. This | 1098 | * and invalidates all buffer-cache-entries in that case. This |
1094 | * is a relatively slow routine, so we have to try to minimize using | 1099 | * is a relatively slow routine, so we have to try to minimize using |
1095 | * it. Thus it is called only upon a 'mount' or 'open'. This | 1100 | * it. Thus it is called only upon a 'mount' or 'open'. This |
1096 | * is the best way of combining speed and utility, I think. | 1101 | * is the best way of combining speed and utility, I think. |
1097 | * People changing diskettes in the middle of an operation deserve | 1102 | * People changing diskettes in the middle of an operation deserve |
1098 | * to lose :-) | 1103 | * to lose :-) |
1099 | */ | 1104 | */ |
1100 | int check_disk_change(struct block_device *bdev) | 1105 | int check_disk_change(struct block_device *bdev) |
1101 | { | 1106 | { |
1102 | struct gendisk *disk = bdev->bd_disk; | 1107 | struct gendisk *disk = bdev->bd_disk; |
1103 | struct block_device_operations * bdops = disk->fops; | 1108 | struct block_device_operations * bdops = disk->fops; |
1104 | 1109 | ||
1105 | if (!bdops->media_changed) | 1110 | if (!bdops->media_changed) |
1106 | return 0; | 1111 | return 0; |
1107 | if (!bdops->media_changed(bdev->bd_disk)) | 1112 | if (!bdops->media_changed(bdev->bd_disk)) |
1108 | return 0; | 1113 | return 0; |
1109 | 1114 | ||
1110 | flush_disk(bdev); | 1115 | flush_disk(bdev); |
1111 | if (bdops->revalidate_disk) | 1116 | if (bdops->revalidate_disk) |
1112 | bdops->revalidate_disk(bdev->bd_disk); | 1117 | bdops->revalidate_disk(bdev->bd_disk); |
1113 | return 1; | 1118 | return 1; |
1114 | } | 1119 | } |
1115 | 1120 | ||
1116 | EXPORT_SYMBOL(check_disk_change); | 1121 | EXPORT_SYMBOL(check_disk_change); |
1117 | 1122 | ||
1118 | void bd_set_size(struct block_device *bdev, loff_t size) | 1123 | void bd_set_size(struct block_device *bdev, loff_t size) |
1119 | { | 1124 | { |
1120 | unsigned bsize = bdev_logical_block_size(bdev); | 1125 | unsigned bsize = bdev_logical_block_size(bdev); |
1121 | 1126 | ||
1122 | bdev->bd_inode->i_size = size; | 1127 | bdev->bd_inode->i_size = size; |
1123 | while (bsize < PAGE_CACHE_SIZE) { | 1128 | while (bsize < PAGE_CACHE_SIZE) { |
1124 | if (size & bsize) | 1129 | if (size & bsize) |
1125 | break; | 1130 | break; |
1126 | bsize <<= 1; | 1131 | bsize <<= 1; |
1127 | } | 1132 | } |
1128 | bdev->bd_block_size = bsize; | 1133 | bdev->bd_block_size = bsize; |
1129 | bdev->bd_inode->i_blkbits = blksize_bits(bsize); | 1134 | bdev->bd_inode->i_blkbits = blksize_bits(bsize); |
1130 | } | 1135 | } |
1131 | EXPORT_SYMBOL(bd_set_size); | 1136 | EXPORT_SYMBOL(bd_set_size); |
1132 | 1137 | ||
1133 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); | 1138 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); |
1134 | 1139 | ||
1135 | /* | 1140 | /* |
1136 | * bd_mutex locking: | 1141 | * bd_mutex locking: |
1137 | * | 1142 | * |
1138 | * mutex_lock(part->bd_mutex) | 1143 | * mutex_lock(part->bd_mutex) |
1139 | * mutex_lock_nested(whole->bd_mutex, 1) | 1144 | * mutex_lock_nested(whole->bd_mutex, 1) |
1140 | */ | 1145 | */ |
1141 | 1146 | ||
1142 | static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | 1147 | static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) |
1143 | { | 1148 | { |
1144 | struct gendisk *disk; | 1149 | struct gendisk *disk; |
1145 | int ret; | 1150 | int ret; |
1146 | int partno; | 1151 | int partno; |
1147 | int perm = 0; | 1152 | int perm = 0; |
1148 | 1153 | ||
1149 | if (mode & FMODE_READ) | 1154 | if (mode & FMODE_READ) |
1150 | perm |= MAY_READ; | 1155 | perm |= MAY_READ; |
1151 | if (mode & FMODE_WRITE) | 1156 | if (mode & FMODE_WRITE) |
1152 | perm |= MAY_WRITE; | 1157 | perm |= MAY_WRITE; |
1153 | /* | 1158 | /* |
1154 | * hooks: /n/, see "layering violations". | 1159 | * hooks: /n/, see "layering violations". |
1155 | */ | 1160 | */ |
1156 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); | 1161 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); |
1157 | if (ret != 0) { | 1162 | if (ret != 0) { |
1158 | bdput(bdev); | 1163 | bdput(bdev); |
1159 | return ret; | 1164 | return ret; |
1160 | } | 1165 | } |
1161 | 1166 | ||
1162 | lock_kernel(); | 1167 | lock_kernel(); |
1163 | restart: | 1168 | restart: |
1164 | 1169 | ||
1165 | ret = -ENXIO; | 1170 | ret = -ENXIO; |
1166 | disk = get_gendisk(bdev->bd_dev, &partno); | 1171 | disk = get_gendisk(bdev->bd_dev, &partno); |
1167 | if (!disk) | 1172 | if (!disk) |
1168 | goto out_unlock_kernel; | 1173 | goto out_unlock_kernel; |
1169 | 1174 | ||
1170 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1175 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1171 | if (!bdev->bd_openers) { | 1176 | if (!bdev->bd_openers) { |
1172 | bdev->bd_disk = disk; | 1177 | bdev->bd_disk = disk; |
1173 | bdev->bd_contains = bdev; | 1178 | bdev->bd_contains = bdev; |
1174 | if (!partno) { | 1179 | if (!partno) { |
1175 | struct backing_dev_info *bdi; | 1180 | struct backing_dev_info *bdi; |
1176 | 1181 | ||
1177 | ret = -ENXIO; | 1182 | ret = -ENXIO; |
1178 | bdev->bd_part = disk_get_part(disk, partno); | 1183 | bdev->bd_part = disk_get_part(disk, partno); |
1179 | if (!bdev->bd_part) | 1184 | if (!bdev->bd_part) |
1180 | goto out_clear; | 1185 | goto out_clear; |
1181 | 1186 | ||
1182 | if (disk->fops->open) { | 1187 | if (disk->fops->open) { |
1183 | ret = disk->fops->open(bdev, mode); | 1188 | ret = disk->fops->open(bdev, mode); |
1184 | if (ret == -ERESTARTSYS) { | 1189 | if (ret == -ERESTARTSYS) { |
1185 | /* Lost a race with 'disk' being | 1190 | /* Lost a race with 'disk' being |
1186 | * deleted, try again. | 1191 | * deleted, try again. |
1187 | * See md.c | 1192 | * See md.c |
1188 | */ | 1193 | */ |
1189 | disk_put_part(bdev->bd_part); | 1194 | disk_put_part(bdev->bd_part); |
1190 | bdev->bd_part = NULL; | 1195 | bdev->bd_part = NULL; |
1191 | module_put(disk->fops->owner); | 1196 | module_put(disk->fops->owner); |
1192 | put_disk(disk); | 1197 | put_disk(disk); |
1193 | bdev->bd_disk = NULL; | 1198 | bdev->bd_disk = NULL; |
1194 | mutex_unlock(&bdev->bd_mutex); | 1199 | mutex_unlock(&bdev->bd_mutex); |
1195 | goto restart; | 1200 | goto restart; |
1196 | } | 1201 | } |
1197 | if (ret) | 1202 | if (ret) |
1198 | goto out_clear; | 1203 | goto out_clear; |
1199 | } | 1204 | } |
1200 | if (!bdev->bd_openers) { | 1205 | if (!bdev->bd_openers) { |
1201 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1206 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
1202 | bdi = blk_get_backing_dev_info(bdev); | 1207 | bdi = blk_get_backing_dev_info(bdev); |
1203 | if (bdi == NULL) | 1208 | if (bdi == NULL) |
1204 | bdi = &default_backing_dev_info; | 1209 | bdi = &default_backing_dev_info; |
1205 | bdev->bd_inode->i_data.backing_dev_info = bdi; | 1210 | bdev->bd_inode->i_data.backing_dev_info = bdi; |
1206 | } | 1211 | } |
1207 | if (bdev->bd_invalidated) | 1212 | if (bdev->bd_invalidated) |
1208 | rescan_partitions(disk, bdev); | 1213 | rescan_partitions(disk, bdev); |
1209 | } else { | 1214 | } else { |
1210 | struct block_device *whole; | 1215 | struct block_device *whole; |
1211 | whole = bdget_disk(disk, 0); | 1216 | whole = bdget_disk(disk, 0); |
1212 | ret = -ENOMEM; | 1217 | ret = -ENOMEM; |
1213 | if (!whole) | 1218 | if (!whole) |
1214 | goto out_clear; | 1219 | goto out_clear; |
1215 | BUG_ON(for_part); | 1220 | BUG_ON(for_part); |
1216 | ret = __blkdev_get(whole, mode, 1); | 1221 | ret = __blkdev_get(whole, mode, 1); |
1217 | if (ret) | 1222 | if (ret) |
1218 | goto out_clear; | 1223 | goto out_clear; |
1219 | bdev->bd_contains = whole; | 1224 | bdev->bd_contains = whole; |
1220 | bdev->bd_inode->i_data.backing_dev_info = | 1225 | bdev->bd_inode->i_data.backing_dev_info = |
1221 | whole->bd_inode->i_data.backing_dev_info; | 1226 | whole->bd_inode->i_data.backing_dev_info; |
1222 | bdev->bd_part = disk_get_part(disk, partno); | 1227 | bdev->bd_part = disk_get_part(disk, partno); |
1223 | if (!(disk->flags & GENHD_FL_UP) || | 1228 | if (!(disk->flags & GENHD_FL_UP) || |
1224 | !bdev->bd_part || !bdev->bd_part->nr_sects) { | 1229 | !bdev->bd_part || !bdev->bd_part->nr_sects) { |
1225 | ret = -ENXIO; | 1230 | ret = -ENXIO; |
1226 | goto out_clear; | 1231 | goto out_clear; |
1227 | } | 1232 | } |
1228 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1233 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
1229 | } | 1234 | } |
1230 | } else { | 1235 | } else { |
1231 | put_disk(disk); | 1236 | put_disk(disk); |
1232 | module_put(disk->fops->owner); | 1237 | module_put(disk->fops->owner); |
1233 | disk = NULL; | 1238 | disk = NULL; |
1234 | if (bdev->bd_contains == bdev) { | 1239 | if (bdev->bd_contains == bdev) { |
1235 | if (bdev->bd_disk->fops->open) { | 1240 | if (bdev->bd_disk->fops->open) { |
1236 | ret = bdev->bd_disk->fops->open(bdev, mode); | 1241 | ret = bdev->bd_disk->fops->open(bdev, mode); |
1237 | if (ret) | 1242 | if (ret) |
1238 | goto out_unlock_bdev; | 1243 | goto out_unlock_bdev; |
1239 | } | 1244 | } |
1240 | if (bdev->bd_invalidated) | 1245 | if (bdev->bd_invalidated) |
1241 | rescan_partitions(bdev->bd_disk, bdev); | 1246 | rescan_partitions(bdev->bd_disk, bdev); |
1242 | } | 1247 | } |
1243 | } | 1248 | } |
1244 | bdev->bd_openers++; | 1249 | bdev->bd_openers++; |
1245 | if (for_part) | 1250 | if (for_part) |
1246 | bdev->bd_part_count++; | 1251 | bdev->bd_part_count++; |
1247 | mutex_unlock(&bdev->bd_mutex); | 1252 | mutex_unlock(&bdev->bd_mutex); |
1248 | unlock_kernel(); | 1253 | unlock_kernel(); |
1249 | return 0; | 1254 | return 0; |
1250 | 1255 | ||
1251 | out_clear: | 1256 | out_clear: |
1252 | disk_put_part(bdev->bd_part); | 1257 | disk_put_part(bdev->bd_part); |
1253 | bdev->bd_disk = NULL; | 1258 | bdev->bd_disk = NULL; |
1254 | bdev->bd_part = NULL; | 1259 | bdev->bd_part = NULL; |
1255 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1260 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1256 | if (bdev != bdev->bd_contains) | 1261 | if (bdev != bdev->bd_contains) |
1257 | __blkdev_put(bdev->bd_contains, mode, 1); | 1262 | __blkdev_put(bdev->bd_contains, mode, 1); |
1258 | bdev->bd_contains = NULL; | 1263 | bdev->bd_contains = NULL; |
1259 | out_unlock_bdev: | 1264 | out_unlock_bdev: |
1260 | mutex_unlock(&bdev->bd_mutex); | 1265 | mutex_unlock(&bdev->bd_mutex); |
1261 | out_unlock_kernel: | 1266 | out_unlock_kernel: |
1262 | unlock_kernel(); | 1267 | unlock_kernel(); |
1263 | 1268 | ||
1264 | if (disk) | 1269 | if (disk) |
1265 | module_put(disk->fops->owner); | 1270 | module_put(disk->fops->owner); |
1266 | put_disk(disk); | 1271 | put_disk(disk); |
1267 | bdput(bdev); | 1272 | bdput(bdev); |
1268 | 1273 | ||
1269 | return ret; | 1274 | return ret; |
1270 | } | 1275 | } |
1271 | 1276 | ||
1272 | int blkdev_get(struct block_device *bdev, fmode_t mode) | 1277 | int blkdev_get(struct block_device *bdev, fmode_t mode) |
1273 | { | 1278 | { |
1274 | return __blkdev_get(bdev, mode, 0); | 1279 | return __blkdev_get(bdev, mode, 0); |
1275 | } | 1280 | } |
1276 | EXPORT_SYMBOL(blkdev_get); | 1281 | EXPORT_SYMBOL(blkdev_get); |
1277 | 1282 | ||
1278 | static int blkdev_open(struct inode * inode, struct file * filp) | 1283 | static int blkdev_open(struct inode * inode, struct file * filp) |
1279 | { | 1284 | { |
1280 | struct block_device *bdev; | 1285 | struct block_device *bdev; |
1281 | int res; | 1286 | int res; |
1282 | 1287 | ||
1283 | /* | 1288 | /* |
1284 | * Preserve backwards compatibility and allow large file access | 1289 | * Preserve backwards compatibility and allow large file access |
1285 | * even if userspace doesn't ask for it explicitly. Some mkfs | 1290 | * even if userspace doesn't ask for it explicitly. Some mkfs |
1286 | * binary needs it. We might want to drop this workaround | 1291 | * binary needs it. We might want to drop this workaround |
1287 | * during an unstable branch. | 1292 | * during an unstable branch. |
1288 | */ | 1293 | */ |
1289 | filp->f_flags |= O_LARGEFILE; | 1294 | filp->f_flags |= O_LARGEFILE; |
1290 | 1295 | ||
1291 | if (filp->f_flags & O_NDELAY) | 1296 | if (filp->f_flags & O_NDELAY) |
1292 | filp->f_mode |= FMODE_NDELAY; | 1297 | filp->f_mode |= FMODE_NDELAY; |
1293 | if (filp->f_flags & O_EXCL) | 1298 | if (filp->f_flags & O_EXCL) |
1294 | filp->f_mode |= FMODE_EXCL; | 1299 | filp->f_mode |= FMODE_EXCL; |
1295 | if ((filp->f_flags & O_ACCMODE) == 3) | 1300 | if ((filp->f_flags & O_ACCMODE) == 3) |
1296 | filp->f_mode |= FMODE_WRITE_IOCTL; | 1301 | filp->f_mode |= FMODE_WRITE_IOCTL; |
1297 | 1302 | ||
1298 | bdev = bd_acquire(inode); | 1303 | bdev = bd_acquire(inode); |
1299 | if (bdev == NULL) | 1304 | if (bdev == NULL) |
1300 | return -ENOMEM; | 1305 | return -ENOMEM; |
1301 | 1306 | ||
1302 | filp->f_mapping = bdev->bd_inode->i_mapping; | 1307 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1303 | 1308 | ||
1304 | res = blkdev_get(bdev, filp->f_mode); | 1309 | res = blkdev_get(bdev, filp->f_mode); |
1305 | if (res) | 1310 | if (res) |
1306 | return res; | 1311 | return res; |
1307 | 1312 | ||
1308 | if (filp->f_mode & FMODE_EXCL) { | 1313 | if (filp->f_mode & FMODE_EXCL) { |
1309 | res = bd_claim(bdev, filp); | 1314 | res = bd_claim(bdev, filp); |
1310 | if (res) | 1315 | if (res) |
1311 | goto out_blkdev_put; | 1316 | goto out_blkdev_put; |
1312 | } | 1317 | } |
1313 | 1318 | ||
1314 | return 0; | 1319 | return 0; |
1315 | 1320 | ||
1316 | out_blkdev_put: | 1321 | out_blkdev_put: |
1317 | blkdev_put(bdev, filp->f_mode); | 1322 | blkdev_put(bdev, filp->f_mode); |
1318 | return res; | 1323 | return res; |
1319 | } | 1324 | } |
1320 | 1325 | ||
1321 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | 1326 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) |
1322 | { | 1327 | { |
1323 | int ret = 0; | 1328 | int ret = 0; |
1324 | struct gendisk *disk = bdev->bd_disk; | 1329 | struct gendisk *disk = bdev->bd_disk; |
1325 | struct block_device *victim = NULL; | 1330 | struct block_device *victim = NULL; |
1326 | 1331 | ||
1327 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1332 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1328 | lock_kernel(); | 1333 | lock_kernel(); |
1329 | if (for_part) | 1334 | if (for_part) |
1330 | bdev->bd_part_count--; | 1335 | bdev->bd_part_count--; |
1331 | 1336 | ||
1332 | if (!--bdev->bd_openers) { | 1337 | if (!--bdev->bd_openers) { |
1333 | sync_blockdev(bdev); | 1338 | sync_blockdev(bdev); |
1334 | kill_bdev(bdev); | 1339 | kill_bdev(bdev); |
1335 | } | 1340 | } |
1336 | if (bdev->bd_contains == bdev) { | 1341 | if (bdev->bd_contains == bdev) { |
1337 | if (disk->fops->release) | 1342 | if (disk->fops->release) |
1338 | ret = disk->fops->release(disk, mode); | 1343 | ret = disk->fops->release(disk, mode); |
1339 | } | 1344 | } |
1340 | if (!bdev->bd_openers) { | 1345 | if (!bdev->bd_openers) { |
1341 | struct module *owner = disk->fops->owner; | 1346 | struct module *owner = disk->fops->owner; |
1342 | 1347 | ||
1343 | put_disk(disk); | 1348 | put_disk(disk); |
1344 | module_put(owner); | 1349 | module_put(owner); |
1345 | disk_put_part(bdev->bd_part); | 1350 | disk_put_part(bdev->bd_part); |
1346 | bdev->bd_part = NULL; | 1351 | bdev->bd_part = NULL; |
1347 | bdev->bd_disk = NULL; | 1352 | bdev->bd_disk = NULL; |
1348 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1353 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1349 | if (bdev != bdev->bd_contains) | 1354 | if (bdev != bdev->bd_contains) |
1350 | victim = bdev->bd_contains; | 1355 | victim = bdev->bd_contains; |
1351 | bdev->bd_contains = NULL; | 1356 | bdev->bd_contains = NULL; |
1352 | } | 1357 | } |
1353 | unlock_kernel(); | 1358 | unlock_kernel(); |
1354 | mutex_unlock(&bdev->bd_mutex); | 1359 | mutex_unlock(&bdev->bd_mutex); |
1355 | bdput(bdev); | 1360 | bdput(bdev); |
1356 | if (victim) | 1361 | if (victim) |
1357 | __blkdev_put(victim, mode, 1); | 1362 | __blkdev_put(victim, mode, 1); |
1358 | return ret; | 1363 | return ret; |
1359 | } | 1364 | } |
1360 | 1365 | ||
1361 | int blkdev_put(struct block_device *bdev, fmode_t mode) | 1366 | int blkdev_put(struct block_device *bdev, fmode_t mode) |
1362 | { | 1367 | { |
1363 | return __blkdev_put(bdev, mode, 0); | 1368 | return __blkdev_put(bdev, mode, 0); |
1364 | } | 1369 | } |
1365 | EXPORT_SYMBOL(blkdev_put); | 1370 | EXPORT_SYMBOL(blkdev_put); |
1366 | 1371 | ||
1367 | static int blkdev_close(struct inode * inode, struct file * filp) | 1372 | static int blkdev_close(struct inode * inode, struct file * filp) |
1368 | { | 1373 | { |
1369 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1374 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); |
1370 | if (bdev->bd_holder == filp) | 1375 | if (bdev->bd_holder == filp) |
1371 | bd_release(bdev); | 1376 | bd_release(bdev); |
1372 | return blkdev_put(bdev, filp->f_mode); | 1377 | return blkdev_put(bdev, filp->f_mode); |
1373 | } | 1378 | } |
1374 | 1379 | ||
1375 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 1380 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
1376 | { | 1381 | { |
1377 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | 1382 | struct block_device *bdev = I_BDEV(file->f_mapping->host); |
1378 | fmode_t mode = file->f_mode; | 1383 | fmode_t mode = file->f_mode; |
1379 | 1384 | ||
1380 | /* | 1385 | /* |
1381 | * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have | 1386 | * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have |
1382 | * to updated it before every ioctl. | 1387 | * to updated it before every ioctl. |
1383 | */ | 1388 | */ |
1384 | if (file->f_flags & O_NDELAY) | 1389 | if (file->f_flags & O_NDELAY) |
1385 | mode |= FMODE_NDELAY; | 1390 | mode |= FMODE_NDELAY; |
1386 | else | 1391 | else |
1387 | mode &= ~FMODE_NDELAY; | 1392 | mode &= ~FMODE_NDELAY; |
1388 | 1393 | ||
1389 | return blkdev_ioctl(bdev, mode, cmd, arg); | 1394 | return blkdev_ioctl(bdev, mode, cmd, arg); |
1390 | } | 1395 | } |
1391 | 1396 | ||
1392 | /* | 1397 | /* |
1393 | * Try to release a page associated with block device when the system | 1398 | * Try to release a page associated with block device when the system |
1394 | * is under memory pressure. | 1399 | * is under memory pressure. |
1395 | */ | 1400 | */ |
1396 | static int blkdev_releasepage(struct page *page, gfp_t wait) | 1401 | static int blkdev_releasepage(struct page *page, gfp_t wait) |
1397 | { | 1402 | { |
1398 | struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; | 1403 | struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; |
1399 | 1404 | ||
1400 | if (super && super->s_op->bdev_try_to_free_page) | 1405 | if (super && super->s_op->bdev_try_to_free_page) |
1401 | return super->s_op->bdev_try_to_free_page(super, page, wait); | 1406 | return super->s_op->bdev_try_to_free_page(super, page, wait); |
1402 | 1407 | ||
1403 | return try_to_free_buffers(page); | 1408 | return try_to_free_buffers(page); |
1404 | } | 1409 | } |
1405 | 1410 | ||
1406 | static const struct address_space_operations def_blk_aops = { | 1411 | static const struct address_space_operations def_blk_aops = { |
1407 | .readpage = blkdev_readpage, | 1412 | .readpage = blkdev_readpage, |
1408 | .writepage = blkdev_writepage, | 1413 | .writepage = blkdev_writepage, |
1409 | .sync_page = block_sync_page, | 1414 | .sync_page = block_sync_page, |
1410 | .write_begin = blkdev_write_begin, | 1415 | .write_begin = blkdev_write_begin, |
1411 | .write_end = blkdev_write_end, | 1416 | .write_end = blkdev_write_end, |
1412 | .writepages = generic_writepages, | 1417 | .writepages = generic_writepages, |
1413 | .releasepage = blkdev_releasepage, | 1418 | .releasepage = blkdev_releasepage, |
1414 | .direct_IO = blkdev_direct_IO, | 1419 | .direct_IO = blkdev_direct_IO, |
1415 | }; | 1420 | }; |
1416 | 1421 | ||
1417 | const struct file_operations def_blk_fops = { | 1422 | const struct file_operations def_blk_fops = { |
1418 | .open = blkdev_open, | 1423 | .open = blkdev_open, |
1419 | .release = blkdev_close, | 1424 | .release = blkdev_close, |
1420 | .llseek = block_llseek, | 1425 | .llseek = block_llseek, |
1421 | .read = do_sync_read, | 1426 | .read = do_sync_read, |
1422 | .write = do_sync_write, | 1427 | .write = do_sync_write, |
1423 | .aio_read = generic_file_aio_read, | 1428 | .aio_read = generic_file_aio_read, |
1424 | .aio_write = generic_file_aio_write_nolock, | 1429 | .aio_write = generic_file_aio_write_nolock, |
1425 | .mmap = generic_file_mmap, | 1430 | .mmap = generic_file_mmap, |
1426 | .fsync = block_fsync, | 1431 | .fsync = block_fsync, |
1427 | .unlocked_ioctl = block_ioctl, | 1432 | .unlocked_ioctl = block_ioctl, |
1428 | #ifdef CONFIG_COMPAT | 1433 | #ifdef CONFIG_COMPAT |
1429 | .compat_ioctl = compat_blkdev_ioctl, | 1434 | .compat_ioctl = compat_blkdev_ioctl, |
1430 | #endif | 1435 | #endif |
1431 | .splice_read = generic_file_splice_read, | 1436 | .splice_read = generic_file_splice_read, |
1432 | .splice_write = generic_file_splice_write, | 1437 | .splice_write = generic_file_splice_write, |
1433 | }; | 1438 | }; |
1434 | 1439 | ||
1435 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) | 1440 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) |
1436 | { | 1441 | { |
1437 | int res; | 1442 | int res; |
1438 | mm_segment_t old_fs = get_fs(); | 1443 | mm_segment_t old_fs = get_fs(); |
1439 | set_fs(KERNEL_DS); | 1444 | set_fs(KERNEL_DS); |
1440 | res = blkdev_ioctl(bdev, 0, cmd, arg); | 1445 | res = blkdev_ioctl(bdev, 0, cmd, arg); |
1441 | set_fs(old_fs); | 1446 | set_fs(old_fs); |
1442 | return res; | 1447 | return res; |
1443 | } | 1448 | } |
1444 | 1449 | ||
1445 | EXPORT_SYMBOL(ioctl_by_bdev); | 1450 | EXPORT_SYMBOL(ioctl_by_bdev); |
1446 | 1451 | ||
1447 | /** | 1452 | /** |
1448 | * lookup_bdev - lookup a struct block_device by name | 1453 | * lookup_bdev - lookup a struct block_device by name |
1449 | * @pathname: special file representing the block device | 1454 | * @pathname: special file representing the block device |
1450 | * | 1455 | * |
1451 | * Get a reference to the blockdevice at @pathname in the current | 1456 | * Get a reference to the blockdevice at @pathname in the current |
1452 | * namespace if possible and return it. Return ERR_PTR(error) | 1457 | * namespace if possible and return it. Return ERR_PTR(error) |
1453 | * otherwise. | 1458 | * otherwise. |
1454 | */ | 1459 | */ |
1455 | struct block_device *lookup_bdev(const char *pathname) | 1460 | struct block_device *lookup_bdev(const char *pathname) |
1456 | { | 1461 | { |
1457 | struct block_device *bdev; | 1462 | struct block_device *bdev; |
1458 | struct inode *inode; | 1463 | struct inode *inode; |
1459 | struct path path; | 1464 | struct path path; |
1460 | int error; | 1465 | int error; |
1461 | 1466 | ||
1462 | if (!pathname || !*pathname) | 1467 | if (!pathname || !*pathname) |
1463 | return ERR_PTR(-EINVAL); | 1468 | return ERR_PTR(-EINVAL); |
1464 | 1469 | ||
1465 | error = kern_path(pathname, LOOKUP_FOLLOW, &path); | 1470 | error = kern_path(pathname, LOOKUP_FOLLOW, &path); |
1466 | if (error) | 1471 | if (error) |
1467 | return ERR_PTR(error); | 1472 | return ERR_PTR(error); |
1468 | 1473 | ||
1469 | inode = path.dentry->d_inode; | 1474 | inode = path.dentry->d_inode; |
1470 | error = -ENOTBLK; | 1475 | error = -ENOTBLK; |
1471 | if (!S_ISBLK(inode->i_mode)) | 1476 | if (!S_ISBLK(inode->i_mode)) |
1472 | goto fail; | 1477 | goto fail; |
1473 | error = -EACCES; | 1478 | error = -EACCES; |
1474 | if (path.mnt->mnt_flags & MNT_NODEV) | 1479 | if (path.mnt->mnt_flags & MNT_NODEV) |
1475 | goto fail; | 1480 | goto fail; |
1476 | error = -ENOMEM; | 1481 | error = -ENOMEM; |
1477 | bdev = bd_acquire(inode); | 1482 | bdev = bd_acquire(inode); |
1478 | if (!bdev) | 1483 | if (!bdev) |
1479 | goto fail; | 1484 | goto fail; |
1480 | out: | 1485 | out: |
1481 | path_put(&path); | 1486 | path_put(&path); |
1482 | return bdev; | 1487 | return bdev; |
1483 | fail: | 1488 | fail: |
1484 | bdev = ERR_PTR(error); | 1489 | bdev = ERR_PTR(error); |
1485 | goto out; | 1490 | goto out; |
1486 | } | 1491 | } |
1487 | EXPORT_SYMBOL(lookup_bdev); | 1492 | EXPORT_SYMBOL(lookup_bdev); |
1488 | 1493 | ||
1489 | /** | 1494 | /** |
1490 | * open_bdev_exclusive - open a block device by name and set it up for use | 1495 | * open_bdev_exclusive - open a block device by name and set it up for use |
1491 | * | 1496 | * |
1492 | * @path: special file representing the block device | 1497 | * @path: special file representing the block device |
1493 | * @mode: FMODE_... combination to pass be used | 1498 | * @mode: FMODE_... combination to pass be used |
1494 | * @holder: owner for exclusion | 1499 | * @holder: owner for exclusion |
1495 | * | 1500 | * |
1496 | * Open the blockdevice described by the special file at @path, claim it | 1501 | * Open the blockdevice described by the special file at @path, claim it |
1497 | * for the @holder. | 1502 | * for the @holder. |
1498 | */ | 1503 | */ |
1499 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) | 1504 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) |
1500 | { | 1505 | { |
1501 | struct block_device *bdev; | 1506 | struct block_device *bdev; |
1502 | int error = 0; | 1507 | int error = 0; |
1503 | 1508 | ||
1504 | bdev = lookup_bdev(path); | 1509 | bdev = lookup_bdev(path); |
1505 | if (IS_ERR(bdev)) | 1510 | if (IS_ERR(bdev)) |
1506 | return bdev; | 1511 | return bdev; |
1507 | 1512 | ||
1508 | error = blkdev_get(bdev, mode); | 1513 | error = blkdev_get(bdev, mode); |
1509 | if (error) | 1514 | if (error) |
1510 | return ERR_PTR(error); | 1515 | return ERR_PTR(error); |
1511 | error = -EACCES; | 1516 | error = -EACCES; |
1512 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) | 1517 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) |
1513 | goto blkdev_put; | 1518 | goto blkdev_put; |
1514 | error = bd_claim(bdev, holder); | 1519 | error = bd_claim(bdev, holder); |
1515 | if (error) | 1520 | if (error) |
1516 | goto blkdev_put; | 1521 | goto blkdev_put; |
1517 | 1522 | ||
1518 | return bdev; | 1523 | return bdev; |
1519 | 1524 | ||
1520 | blkdev_put: | 1525 | blkdev_put: |
1521 | blkdev_put(bdev, mode); | 1526 | blkdev_put(bdev, mode); |
1522 | return ERR_PTR(error); | 1527 | return ERR_PTR(error); |
1523 | } | 1528 | } |
1524 | 1529 | ||
1525 | EXPORT_SYMBOL(open_bdev_exclusive); | 1530 | EXPORT_SYMBOL(open_bdev_exclusive); |
1526 | 1531 | ||
1527 | /** | 1532 | /** |
1528 | * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() | 1533 | * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() |
1529 | * | 1534 | * |
1530 | * @bdev: blockdevice to close | 1535 | * @bdev: blockdevice to close |
1531 | * @mode: mode, must match that used to open. | 1536 | * @mode: mode, must match that used to open. |
1532 | * | 1537 | * |
1533 | * This is the counterpart to open_bdev_exclusive(). | 1538 | * This is the counterpart to open_bdev_exclusive(). |
1534 | */ | 1539 | */ |
1535 | void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) | 1540 | void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) |
1536 | { | 1541 | { |
1537 | bd_release(bdev); | 1542 | bd_release(bdev); |
1538 | blkdev_put(bdev, mode); | 1543 | blkdev_put(bdev, mode); |
1539 | } | 1544 | } |
1540 | 1545 | ||
1541 | EXPORT_SYMBOL(close_bdev_exclusive); | 1546 | EXPORT_SYMBOL(close_bdev_exclusive); |
1542 | 1547 | ||
1543 | int __invalidate_device(struct block_device *bdev) | 1548 | int __invalidate_device(struct block_device *bdev) |
1544 | { | 1549 | { |
1545 | struct super_block *sb = get_super(bdev); | 1550 | struct super_block *sb = get_super(bdev); |
1546 | int res = 0; | 1551 | int res = 0; |
1547 | 1552 | ||
1548 | if (sb) { | 1553 | if (sb) { |
1549 | /* | 1554 | /* |
1550 | * no need to lock the super, get_super holds the | 1555 | * no need to lock the super, get_super holds the |
1551 | * read mutex so the filesystem cannot go away | 1556 | * read mutex so the filesystem cannot go away |
1552 | * under us (->put_super runs with the write lock | 1557 | * under us (->put_super runs with the write lock |
1553 | * hold). | 1558 | * hold). |
1554 | */ | 1559 | */ |
1555 | shrink_dcache_sb(sb); | 1560 | shrink_dcache_sb(sb); |
1556 | res = invalidate_inodes(sb); | 1561 | res = invalidate_inodes(sb); |
1557 | drop_super(sb); | 1562 | drop_super(sb); |
1558 | } | 1563 | } |
1559 | invalidate_bdev(bdev); | 1564 | invalidate_bdev(bdev); |
fs/fs-writeback.c
1 | /* | 1 | /* |
2 | * fs/fs-writeback.c | 2 | * fs/fs-writeback.c |
3 | * | 3 | * |
4 | * Copyright (C) 2002, Linus Torvalds. | 4 | * Copyright (C) 2002, Linus Torvalds. |
5 | * | 5 | * |
6 | * Contains all the functions related to writing back and waiting | 6 | * Contains all the functions related to writing back and waiting |
7 | * upon dirty inodes against superblocks, and writing back dirty | 7 | * upon dirty inodes against superblocks, and writing back dirty |
8 | * pages against inodes. ie: data writeback. Writeout of the | 8 | * pages against inodes. ie: data writeback. Writeout of the |
9 | * inode itself is not handled here. | 9 | * inode itself is not handled here. |
10 | * | 10 | * |
11 | * 10Apr2002 Andrew Morton | 11 | * 10Apr2002 Andrew Morton |
12 | * Split out of fs/inode.c | 12 | * Split out of fs/inode.c |
13 | * Additions for address_space-based writeback | 13 | * Additions for address_space-based writeback |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <linux/backing-dev.h> | 24 | #include <linux/backing-dev.h> |
25 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
26 | #include "internal.h" | 26 | #include "internal.h" |
27 | 27 | ||
28 | 28 | ||
29 | /** | 29 | /** |
30 | * writeback_acquire - attempt to get exclusive writeback access to a device | 30 | * writeback_acquire - attempt to get exclusive writeback access to a device |
31 | * @bdi: the device's backing_dev_info structure | 31 | * @bdi: the device's backing_dev_info structure |
32 | * | 32 | * |
33 | * It is a waste of resources to have more than one pdflush thread blocked on | 33 | * It is a waste of resources to have more than one pdflush thread blocked on |
34 | * a single request queue. Exclusion at the request_queue level is obtained | 34 | * a single request queue. Exclusion at the request_queue level is obtained |
35 | * via a flag in the request_queue's backing_dev_info.state. | 35 | * via a flag in the request_queue's backing_dev_info.state. |
36 | * | 36 | * |
37 | * Non-request_queue-backed address_spaces will share default_backing_dev_info, | 37 | * Non-request_queue-backed address_spaces will share default_backing_dev_info, |
38 | * unless they implement their own. Which is somewhat inefficient, as this | 38 | * unless they implement their own. Which is somewhat inefficient, as this |
39 | * may prevent concurrent writeback against multiple devices. | 39 | * may prevent concurrent writeback against multiple devices. |
40 | */ | 40 | */ |
41 | static int writeback_acquire(struct backing_dev_info *bdi) | 41 | static int writeback_acquire(struct backing_dev_info *bdi) |
42 | { | 42 | { |
43 | return !test_and_set_bit(BDI_pdflush, &bdi->state); | 43 | return !test_and_set_bit(BDI_pdflush, &bdi->state); |
44 | } | 44 | } |
45 | 45 | ||
46 | /** | 46 | /** |
47 | * writeback_in_progress - determine whether there is writeback in progress | 47 | * writeback_in_progress - determine whether there is writeback in progress |
48 | * @bdi: the device's backing_dev_info structure. | 48 | * @bdi: the device's backing_dev_info structure. |
49 | * | 49 | * |
50 | * Determine whether there is writeback in progress against a backing device. | 50 | * Determine whether there is writeback in progress against a backing device. |
51 | */ | 51 | */ |
52 | int writeback_in_progress(struct backing_dev_info *bdi) | 52 | int writeback_in_progress(struct backing_dev_info *bdi) |
53 | { | 53 | { |
54 | return test_bit(BDI_pdflush, &bdi->state); | 54 | return test_bit(BDI_pdflush, &bdi->state); |
55 | } | 55 | } |
56 | 56 | ||
57 | /** | 57 | /** |
58 | * writeback_release - relinquish exclusive writeback access against a device. | 58 | * writeback_release - relinquish exclusive writeback access against a device. |
59 | * @bdi: the device's backing_dev_info structure | 59 | * @bdi: the device's backing_dev_info structure |
60 | */ | 60 | */ |
61 | static void writeback_release(struct backing_dev_info *bdi) | 61 | static void writeback_release(struct backing_dev_info *bdi) |
62 | { | 62 | { |
63 | BUG_ON(!writeback_in_progress(bdi)); | 63 | BUG_ON(!writeback_in_progress(bdi)); |
64 | clear_bit(BDI_pdflush, &bdi->state); | 64 | clear_bit(BDI_pdflush, &bdi->state); |
65 | } | 65 | } |
66 | 66 | ||
67 | /** | 67 | /** |
68 | * __mark_inode_dirty - internal function | 68 | * __mark_inode_dirty - internal function |
69 | * @inode: inode to mark | 69 | * @inode: inode to mark |
70 | * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) | 70 | * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) |
71 | * Mark an inode as dirty. Callers should use mark_inode_dirty or | 71 | * Mark an inode as dirty. Callers should use mark_inode_dirty or |
72 | * mark_inode_dirty_sync. | 72 | * mark_inode_dirty_sync. |
73 | * | 73 | * |
74 | * Put the inode on the super block's dirty list. | 74 | * Put the inode on the super block's dirty list. |
75 | * | 75 | * |
76 | * CAREFUL! We mark it dirty unconditionally, but move it onto the | 76 | * CAREFUL! We mark it dirty unconditionally, but move it onto the |
77 | * dirty list only if it is hashed or if it refers to a blockdev. | 77 | * dirty list only if it is hashed or if it refers to a blockdev. |
78 | * If it was not hashed, it will never be added to the dirty list | 78 | * If it was not hashed, it will never be added to the dirty list |
79 | * even if it is later hashed, as it will have been marked dirty already. | 79 | * even if it is later hashed, as it will have been marked dirty already. |
80 | * | 80 | * |
81 | * In short, make sure you hash any inodes _before_ you start marking | 81 | * In short, make sure you hash any inodes _before_ you start marking |
82 | * them dirty. | 82 | * them dirty. |
83 | * | 83 | * |
84 | * This function *must* be atomic for the I_DIRTY_PAGES case - | 84 | * This function *must* be atomic for the I_DIRTY_PAGES case - |
85 | * set_page_dirty() is called under spinlock in several places. | 85 | * set_page_dirty() is called under spinlock in several places. |
86 | * | 86 | * |
87 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of | 87 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of |
88 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of | 88 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of |
89 | * the kernel-internal blockdev inode represents the dirtying time of the | 89 | * the kernel-internal blockdev inode represents the dirtying time of the |
90 | * blockdev's pages. This is why for I_DIRTY_PAGES we always use | 90 | * blockdev's pages. This is why for I_DIRTY_PAGES we always use |
91 | * page->mapping->host, so the page-dirtying time is recorded in the internal | 91 | * page->mapping->host, so the page-dirtying time is recorded in the internal |
92 | * blockdev inode. | 92 | * blockdev inode. |
93 | */ | 93 | */ |
94 | void __mark_inode_dirty(struct inode *inode, int flags) | 94 | void __mark_inode_dirty(struct inode *inode, int flags) |
95 | { | 95 | { |
96 | struct super_block *sb = inode->i_sb; | 96 | struct super_block *sb = inode->i_sb; |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 99 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
100 | * dirty the inode itself | 100 | * dirty the inode itself |
101 | */ | 101 | */ |
102 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 102 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
103 | if (sb->s_op->dirty_inode) | 103 | if (sb->s_op->dirty_inode) |
104 | sb->s_op->dirty_inode(inode); | 104 | sb->s_op->dirty_inode(inode); |
105 | } | 105 | } |
106 | 106 | ||
107 | /* | 107 | /* |
108 | * make sure that changes are seen by all cpus before we test i_state | 108 | * make sure that changes are seen by all cpus before we test i_state |
109 | * -- mikulas | 109 | * -- mikulas |
110 | */ | 110 | */ |
111 | smp_mb(); | 111 | smp_mb(); |
112 | 112 | ||
113 | /* avoid the locking if we can */ | 113 | /* avoid the locking if we can */ |
114 | if ((inode->i_state & flags) == flags) | 114 | if ((inode->i_state & flags) == flags) |
115 | return; | 115 | return; |
116 | 116 | ||
117 | if (unlikely(block_dump)) { | 117 | if (unlikely(block_dump)) { |
118 | struct dentry *dentry = NULL; | 118 | struct dentry *dentry = NULL; |
119 | const char *name = "?"; | 119 | const char *name = "?"; |
120 | 120 | ||
121 | if (!list_empty(&inode->i_dentry)) { | 121 | if (!list_empty(&inode->i_dentry)) { |
122 | dentry = list_entry(inode->i_dentry.next, | 122 | dentry = list_entry(inode->i_dentry.next, |
123 | struct dentry, d_alias); | 123 | struct dentry, d_alias); |
124 | if (dentry && dentry->d_name.name) | 124 | if (dentry && dentry->d_name.name) |
125 | name = (const char *) dentry->d_name.name; | 125 | name = (const char *) dentry->d_name.name; |
126 | } | 126 | } |
127 | 127 | ||
128 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) | 128 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) |
129 | printk(KERN_DEBUG | 129 | printk(KERN_DEBUG |
130 | "%s(%d): dirtied inode %lu (%s) on %s\n", | 130 | "%s(%d): dirtied inode %lu (%s) on %s\n", |
131 | current->comm, task_pid_nr(current), inode->i_ino, | 131 | current->comm, task_pid_nr(current), inode->i_ino, |
132 | name, inode->i_sb->s_id); | 132 | name, inode->i_sb->s_id); |
133 | } | 133 | } |
134 | 134 | ||
135 | spin_lock(&inode_lock); | 135 | spin_lock(&inode_lock); |
136 | if ((inode->i_state & flags) != flags) { | 136 | if ((inode->i_state & flags) != flags) { |
137 | const int was_dirty = inode->i_state & I_DIRTY; | 137 | const int was_dirty = inode->i_state & I_DIRTY; |
138 | 138 | ||
139 | inode->i_state |= flags; | 139 | inode->i_state |= flags; |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * If the inode is being synced, just update its dirty state. | 142 | * If the inode is being synced, just update its dirty state. |
143 | * The unlocker will place the inode on the appropriate | 143 | * The unlocker will place the inode on the appropriate |
144 | * superblock list, based upon its state. | 144 | * superblock list, based upon its state. |
145 | */ | 145 | */ |
146 | if (inode->i_state & I_SYNC) | 146 | if (inode->i_state & I_SYNC) |
147 | goto out; | 147 | goto out; |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Only add valid (hashed) inodes to the superblock's | 150 | * Only add valid (hashed) inodes to the superblock's |
151 | * dirty list. Add blockdev inodes as well. | 151 | * dirty list. Add blockdev inodes as well. |
152 | */ | 152 | */ |
153 | if (!S_ISBLK(inode->i_mode)) { | 153 | if (!S_ISBLK(inode->i_mode)) { |
154 | if (hlist_unhashed(&inode->i_hash)) | 154 | if (hlist_unhashed(&inode->i_hash)) |
155 | goto out; | 155 | goto out; |
156 | } | 156 | } |
157 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 157 | if (inode->i_state & (I_FREEING|I_CLEAR)) |
158 | goto out; | 158 | goto out; |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * If the inode was already on s_dirty/s_io/s_more_io, don't | 161 | * If the inode was already on s_dirty/s_io/s_more_io, don't |
162 | * reposition it (that would break s_dirty time-ordering). | 162 | * reposition it (that would break s_dirty time-ordering). |
163 | */ | 163 | */ |
164 | if (!was_dirty) { | 164 | if (!was_dirty) { |
165 | inode->dirtied_when = jiffies; | 165 | inode->dirtied_when = jiffies; |
166 | list_move(&inode->i_list, &sb->s_dirty); | 166 | list_move(&inode->i_list, &sb->s_dirty); |
167 | } | 167 | } |
168 | } | 168 | } |
169 | out: | 169 | out: |
170 | spin_unlock(&inode_lock); | 170 | spin_unlock(&inode_lock); |
171 | } | 171 | } |
172 | 172 | ||
173 | EXPORT_SYMBOL(__mark_inode_dirty); | 173 | EXPORT_SYMBOL(__mark_inode_dirty); |
174 | 174 | ||
175 | static int write_inode(struct inode *inode, int sync) | 175 | static int write_inode(struct inode *inode, int sync) |
176 | { | 176 | { |
177 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) | 177 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) |
178 | return inode->i_sb->s_op->write_inode(inode, sync); | 178 | return inode->i_sb->s_op->write_inode(inode, sync); |
179 | return 0; | 179 | return 0; |
180 | } | 180 | } |
181 | 181 | ||
182 | /* | 182 | /* |
183 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the | 183 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
184 | * furthest end of its superblock's dirty-inode list. | 184 | * furthest end of its superblock's dirty-inode list. |
185 | * | 185 | * |
186 | * Before stamping the inode's ->dirtied_when, we check to see whether it is | 186 | * Before stamping the inode's ->dirtied_when, we check to see whether it is |
187 | * already the most-recently-dirtied inode on the s_dirty list. If that is | 187 | * already the most-recently-dirtied inode on the s_dirty list. If that is |
188 | * the case then the inode must have been redirtied while it was being written | 188 | * the case then the inode must have been redirtied while it was being written |
189 | * out and we don't reset its dirtied_when. | 189 | * out and we don't reset its dirtied_when. |
190 | */ | 190 | */ |
191 | static void redirty_tail(struct inode *inode) | 191 | static void redirty_tail(struct inode *inode) |
192 | { | 192 | { |
193 | struct super_block *sb = inode->i_sb; | 193 | struct super_block *sb = inode->i_sb; |
194 | 194 | ||
195 | if (!list_empty(&sb->s_dirty)) { | 195 | if (!list_empty(&sb->s_dirty)) { |
196 | struct inode *tail_inode; | 196 | struct inode *tail_inode; |
197 | 197 | ||
198 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); | 198 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); |
199 | if (time_before(inode->dirtied_when, | 199 | if (time_before(inode->dirtied_when, |
200 | tail_inode->dirtied_when)) | 200 | tail_inode->dirtied_when)) |
201 | inode->dirtied_when = jiffies; | 201 | inode->dirtied_when = jiffies; |
202 | } | 202 | } |
203 | list_move(&inode->i_list, &sb->s_dirty); | 203 | list_move(&inode->i_list, &sb->s_dirty); |
204 | } | 204 | } |
205 | 205 | ||
206 | /* | 206 | /* |
207 | * requeue inode for re-scanning after sb->s_io list is exhausted. | 207 | * requeue inode for re-scanning after sb->s_io list is exhausted. |
208 | */ | 208 | */ |
209 | static void requeue_io(struct inode *inode) | 209 | static void requeue_io(struct inode *inode) |
210 | { | 210 | { |
211 | list_move(&inode->i_list, &inode->i_sb->s_more_io); | 211 | list_move(&inode->i_list, &inode->i_sb->s_more_io); |
212 | } | 212 | } |
213 | 213 | ||
214 | static void inode_sync_complete(struct inode *inode) | 214 | static void inode_sync_complete(struct inode *inode) |
215 | { | 215 | { |
216 | /* | 216 | /* |
217 | * Prevent speculative execution through spin_unlock(&inode_lock); | 217 | * Prevent speculative execution through spin_unlock(&inode_lock); |
218 | */ | 218 | */ |
219 | smp_mb(); | 219 | smp_mb(); |
220 | wake_up_bit(&inode->i_state, __I_SYNC); | 220 | wake_up_bit(&inode->i_state, __I_SYNC); |
221 | } | 221 | } |
222 | 222 | ||
223 | static bool inode_dirtied_after(struct inode *inode, unsigned long t) | 223 | static bool inode_dirtied_after(struct inode *inode, unsigned long t) |
224 | { | 224 | { |
225 | bool ret = time_after(inode->dirtied_when, t); | 225 | bool ret = time_after(inode->dirtied_when, t); |
226 | #ifndef CONFIG_64BIT | 226 | #ifndef CONFIG_64BIT |
227 | /* | 227 | /* |
228 | * For inodes being constantly redirtied, dirtied_when can get stuck. | 228 | * For inodes being constantly redirtied, dirtied_when can get stuck. |
229 | * It _appears_ to be in the future, but is actually in distant past. | 229 | * It _appears_ to be in the future, but is actually in distant past. |
230 | * This test is necessary to prevent such wrapped-around relative times | 230 | * This test is necessary to prevent such wrapped-around relative times |
231 | * from permanently stopping the whole pdflush writeback. | 231 | * from permanently stopping the whole pdflush writeback. |
232 | */ | 232 | */ |
233 | ret = ret && time_before_eq(inode->dirtied_when, jiffies); | 233 | ret = ret && time_before_eq(inode->dirtied_when, jiffies); |
234 | #endif | 234 | #endif |
235 | return ret; | 235 | return ret; |
236 | } | 236 | } |
237 | 237 | ||
238 | /* | 238 | /* |
239 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. | 239 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. |
240 | */ | 240 | */ |
241 | static void move_expired_inodes(struct list_head *delaying_queue, | 241 | static void move_expired_inodes(struct list_head *delaying_queue, |
242 | struct list_head *dispatch_queue, | 242 | struct list_head *dispatch_queue, |
243 | unsigned long *older_than_this) | 243 | unsigned long *older_than_this) |
244 | { | 244 | { |
245 | while (!list_empty(delaying_queue)) { | 245 | while (!list_empty(delaying_queue)) { |
246 | struct inode *inode = list_entry(delaying_queue->prev, | 246 | struct inode *inode = list_entry(delaying_queue->prev, |
247 | struct inode, i_list); | 247 | struct inode, i_list); |
248 | if (older_than_this && | 248 | if (older_than_this && |
249 | inode_dirtied_after(inode, *older_than_this)) | 249 | inode_dirtied_after(inode, *older_than_this)) |
250 | break; | 250 | break; |
251 | list_move(&inode->i_list, dispatch_queue); | 251 | list_move(&inode->i_list, dispatch_queue); |
252 | } | 252 | } |
253 | } | 253 | } |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * Queue all expired dirty inodes for io, eldest first. | 256 | * Queue all expired dirty inodes for io, eldest first. |
257 | */ | 257 | */ |
258 | static void queue_io(struct super_block *sb, | 258 | static void queue_io(struct super_block *sb, |
259 | unsigned long *older_than_this) | 259 | unsigned long *older_than_this) |
260 | { | 260 | { |
261 | list_splice_init(&sb->s_more_io, sb->s_io.prev); | 261 | list_splice_init(&sb->s_more_io, sb->s_io.prev); |
262 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); | 262 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); |
263 | } | 263 | } |
264 | 264 | ||
265 | int sb_has_dirty_inodes(struct super_block *sb) | 265 | int sb_has_dirty_inodes(struct super_block *sb) |
266 | { | 266 | { |
267 | return !list_empty(&sb->s_dirty) || | 267 | return !list_empty(&sb->s_dirty) || |
268 | !list_empty(&sb->s_io) || | 268 | !list_empty(&sb->s_io) || |
269 | !list_empty(&sb->s_more_io); | 269 | !list_empty(&sb->s_more_io); |
270 | } | 270 | } |
271 | EXPORT_SYMBOL(sb_has_dirty_inodes); | 271 | EXPORT_SYMBOL(sb_has_dirty_inodes); |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * Write a single inode's dirty pages and inode data out to disk. | 274 | * Write a single inode's dirty pages and inode data out to disk. |
275 | * If `wait' is set, wait on the writeout. | 275 | * If `wait' is set, wait on the writeout. |
276 | * | 276 | * |
277 | * The whole writeout design is quite complex and fragile. We want to avoid | 277 | * The whole writeout design is quite complex and fragile. We want to avoid |
278 | * starvation of particular inodes when others are being redirtied, prevent | 278 | * starvation of particular inodes when others are being redirtied, prevent |
279 | * livelocks, etc. | 279 | * livelocks, etc. |
280 | * | 280 | * |
281 | * Called under inode_lock. | 281 | * Called under inode_lock. |
282 | */ | 282 | */ |
283 | static int | 283 | static int |
284 | __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | 284 | __sync_single_inode(struct inode *inode, struct writeback_control *wbc) |
285 | { | 285 | { |
286 | unsigned dirty; | 286 | unsigned dirty; |
287 | struct address_space *mapping = inode->i_mapping; | 287 | struct address_space *mapping = inode->i_mapping; |
288 | int wait = wbc->sync_mode == WB_SYNC_ALL; | 288 | int wait = wbc->sync_mode == WB_SYNC_ALL; |
289 | int ret; | 289 | int ret; |
290 | 290 | ||
291 | BUG_ON(inode->i_state & I_SYNC); | 291 | BUG_ON(inode->i_state & I_SYNC); |
292 | WARN_ON(inode->i_state & I_NEW); | 292 | WARN_ON(inode->i_state & I_NEW); |
293 | 293 | ||
294 | /* Set I_SYNC, reset I_DIRTY */ | 294 | /* Set I_SYNC, reset I_DIRTY */ |
295 | dirty = inode->i_state & I_DIRTY; | 295 | dirty = inode->i_state & I_DIRTY; |
296 | inode->i_state |= I_SYNC; | 296 | inode->i_state |= I_SYNC; |
297 | inode->i_state &= ~I_DIRTY; | 297 | inode->i_state &= ~I_DIRTY; |
298 | 298 | ||
299 | spin_unlock(&inode_lock); | 299 | spin_unlock(&inode_lock); |
300 | 300 | ||
301 | ret = do_writepages(mapping, wbc); | 301 | ret = do_writepages(mapping, wbc); |
302 | 302 | ||
303 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 303 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
304 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 304 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
305 | int err = write_inode(inode, wait); | 305 | int err = write_inode(inode, wait); |
306 | if (ret == 0) | 306 | if (ret == 0) |
307 | ret = err; | 307 | ret = err; |
308 | } | 308 | } |
309 | 309 | ||
310 | if (wait) { | 310 | if (wait) { |
311 | int err = filemap_fdatawait(mapping); | 311 | int err = filemap_fdatawait(mapping); |
312 | if (ret == 0) | 312 | if (ret == 0) |
313 | ret = err; | 313 | ret = err; |
314 | } | 314 | } |
315 | 315 | ||
316 | spin_lock(&inode_lock); | 316 | spin_lock(&inode_lock); |
317 | WARN_ON(inode->i_state & I_NEW); | 317 | WARN_ON(inode->i_state & I_NEW); |
318 | inode->i_state &= ~I_SYNC; | 318 | inode->i_state &= ~I_SYNC; |
319 | if (!(inode->i_state & I_FREEING)) { | 319 | if (!(inode->i_state & I_FREEING)) { |
320 | if (!(inode->i_state & I_DIRTY) && | 320 | if (!(inode->i_state & I_DIRTY) && |
321 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 321 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
322 | /* | 322 | /* |
323 | * We didn't write back all the pages. nfs_writepages() | 323 | * We didn't write back all the pages. nfs_writepages() |
324 | * sometimes bales out without doing anything. Redirty | 324 | * sometimes bales out without doing anything. Redirty |
325 | * the inode; Move it from s_io onto s_more_io/s_dirty. | 325 | * the inode; Move it from s_io onto s_more_io/s_dirty. |
326 | */ | 326 | */ |
327 | /* | 327 | /* |
328 | * akpm: if the caller was the kupdate function we put | 328 | * akpm: if the caller was the kupdate function we put |
329 | * this inode at the head of s_dirty so it gets first | 329 | * this inode at the head of s_dirty so it gets first |
330 | * consideration. Otherwise, move it to the tail, for | 330 | * consideration. Otherwise, move it to the tail, for |
331 | * the reasons described there. I'm not really sure | 331 | * the reasons described there. I'm not really sure |
332 | * how much sense this makes. Presumably I had a good | 332 | * how much sense this makes. Presumably I had a good |
333 | * reasons for doing it this way, and I'd rather not | 333 | * reasons for doing it this way, and I'd rather not |
334 | * muck with it at present. | 334 | * muck with it at present. |
335 | */ | 335 | */ |
336 | if (wbc->for_kupdate) { | 336 | if (wbc->for_kupdate) { |
337 | /* | 337 | /* |
338 | * For the kupdate function we move the inode | 338 | * For the kupdate function we move the inode |
339 | * to s_more_io so it will get more writeout as | 339 | * to s_more_io so it will get more writeout as |
340 | * soon as the queue becomes uncongested. | 340 | * soon as the queue becomes uncongested. |
341 | */ | 341 | */ |
342 | inode->i_state |= I_DIRTY_PAGES; | 342 | inode->i_state |= I_DIRTY_PAGES; |
343 | if (wbc->nr_to_write <= 0) { | 343 | if (wbc->nr_to_write <= 0) { |
344 | /* | 344 | /* |
345 | * slice used up: queue for next turn | 345 | * slice used up: queue for next turn |
346 | */ | 346 | */ |
347 | requeue_io(inode); | 347 | requeue_io(inode); |
348 | } else { | 348 | } else { |
349 | /* | 349 | /* |
350 | * somehow blocked: retry later | 350 | * somehow blocked: retry later |
351 | */ | 351 | */ |
352 | redirty_tail(inode); | 352 | redirty_tail(inode); |
353 | } | 353 | } |
354 | } else { | 354 | } else { |
355 | /* | 355 | /* |
356 | * Otherwise fully redirty the inode so that | 356 | * Otherwise fully redirty the inode so that |
357 | * other inodes on this superblock will get some | 357 | * other inodes on this superblock will get some |
358 | * writeout. Otherwise heavy writing to one | 358 | * writeout. Otherwise heavy writing to one |
359 | * file would indefinitely suspend writeout of | 359 | * file would indefinitely suspend writeout of |
360 | * all the other files. | 360 | * all the other files. |
361 | */ | 361 | */ |
362 | inode->i_state |= I_DIRTY_PAGES; | 362 | inode->i_state |= I_DIRTY_PAGES; |
363 | redirty_tail(inode); | 363 | redirty_tail(inode); |
364 | } | 364 | } |
365 | } else if (inode->i_state & I_DIRTY) { | 365 | } else if (inode->i_state & I_DIRTY) { |
366 | /* | 366 | /* |
367 | * Someone redirtied the inode while were writing back | 367 | * Someone redirtied the inode while were writing back |
368 | * the pages. | 368 | * the pages. |
369 | */ | 369 | */ |
370 | redirty_tail(inode); | 370 | redirty_tail(inode); |
371 | } else if (atomic_read(&inode->i_count)) { | 371 | } else if (atomic_read(&inode->i_count)) { |
372 | /* | 372 | /* |
373 | * The inode is clean, inuse | 373 | * The inode is clean, inuse |
374 | */ | 374 | */ |
375 | list_move(&inode->i_list, &inode_in_use); | 375 | list_move(&inode->i_list, &inode_in_use); |
376 | } else { | 376 | } else { |
377 | /* | 377 | /* |
378 | * The inode is clean, unused | 378 | * The inode is clean, unused |
379 | */ | 379 | */ |
380 | list_move(&inode->i_list, &inode_unused); | 380 | list_move(&inode->i_list, &inode_unused); |
381 | } | 381 | } |
382 | } | 382 | } |
383 | inode_sync_complete(inode); | 383 | inode_sync_complete(inode); |
384 | return ret; | 384 | return ret; |
385 | } | 385 | } |
386 | 386 | ||
387 | /* | 387 | /* |
388 | * Write out an inode's dirty pages. Called under inode_lock. Either the | 388 | * Write out an inode's dirty pages. Called under inode_lock. Either the |
389 | * caller has ref on the inode (either via __iget or via syscall against an fd) | 389 | * caller has ref on the inode (either via __iget or via syscall against an fd) |
390 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | 390 | * or the inode has I_WILL_FREE set (via generic_forget_inode) |
391 | */ | 391 | */ |
392 | static int | 392 | static int |
393 | __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 393 | __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
394 | { | 394 | { |
395 | wait_queue_head_t *wqh; | 395 | wait_queue_head_t *wqh; |
396 | 396 | ||
397 | if (!atomic_read(&inode->i_count)) | 397 | if (!atomic_read(&inode->i_count)) |
398 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | 398 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); |
399 | else | 399 | else |
400 | WARN_ON(inode->i_state & I_WILL_FREE); | 400 | WARN_ON(inode->i_state & I_WILL_FREE); |
401 | 401 | ||
402 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { | 402 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { |
403 | /* | 403 | /* |
404 | * We're skipping this inode because it's locked, and we're not | 404 | * We're skipping this inode because it's locked, and we're not |
405 | * doing writeback-for-data-integrity. Move it to s_more_io so | 405 | * doing writeback-for-data-integrity. Move it to s_more_io so |
406 | * that writeback can proceed with the other inodes on s_io. | 406 | * that writeback can proceed with the other inodes on s_io. |
407 | * We'll have another go at writing back this inode when we | 407 | * We'll have another go at writing back this inode when we |
408 | * completed a full scan of s_io. | 408 | * completed a full scan of s_io. |
409 | */ | 409 | */ |
410 | requeue_io(inode); | 410 | requeue_io(inode); |
411 | return 0; | 411 | return 0; |
412 | } | 412 | } |
413 | 413 | ||
414 | /* | 414 | /* |
415 | * It's a data-integrity sync. We must wait. | 415 | * It's a data-integrity sync. We must wait. |
416 | */ | 416 | */ |
417 | if (inode->i_state & I_SYNC) { | 417 | if (inode->i_state & I_SYNC) { |
418 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); | 418 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); |
419 | 419 | ||
420 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 420 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
421 | do { | 421 | do { |
422 | spin_unlock(&inode_lock); | 422 | spin_unlock(&inode_lock); |
423 | __wait_on_bit(wqh, &wq, inode_wait, | 423 | __wait_on_bit(wqh, &wq, inode_wait, |
424 | TASK_UNINTERRUPTIBLE); | 424 | TASK_UNINTERRUPTIBLE); |
425 | spin_lock(&inode_lock); | 425 | spin_lock(&inode_lock); |
426 | } while (inode->i_state & I_SYNC); | 426 | } while (inode->i_state & I_SYNC); |
427 | } | 427 | } |
428 | return __sync_single_inode(inode, wbc); | 428 | return __sync_single_inode(inode, wbc); |
429 | } | 429 | } |
430 | 430 | ||
431 | /* | 431 | /* |
432 | * Write out a superblock's list of dirty inodes. A wait will be performed | 432 | * Write out a superblock's list of dirty inodes. A wait will be performed |
433 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | 433 | * upon no inodes, all inodes or the final one, depending upon sync_mode. |
434 | * | 434 | * |
435 | * If older_than_this is non-NULL, then only write out inodes which | 435 | * If older_than_this is non-NULL, then only write out inodes which |
436 | * had their first dirtying at a time earlier than *older_than_this. | 436 | * had their first dirtying at a time earlier than *older_than_this. |
437 | * | 437 | * |
438 | * If we're a pdflush thread, then implement pdflush collision avoidance | 438 | * If we're a pdflush thread, then implement pdflush collision avoidance |
439 | * against the entire list. | 439 | * against the entire list. |
440 | * | 440 | * |
441 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 441 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. |
442 | * This function assumes that the blockdev superblock's inodes are backed by | 442 | * This function assumes that the blockdev superblock's inodes are backed by |
443 | * a variety of queues, so all inodes are searched. For other superblocks, | 443 | * a variety of queues, so all inodes are searched. For other superblocks, |
444 | * assume that all inodes are backed by the same queue. | 444 | * assume that all inodes are backed by the same queue. |
445 | * | 445 | * |
446 | * FIXME: this linear search could get expensive with many fileystems. But | 446 | * FIXME: this linear search could get expensive with many fileystems. But |
447 | * how to fix? We need to go from an address_space to all inodes which share | 447 | * how to fix? We need to go from an address_space to all inodes which share |
448 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | 448 | * a queue with that address_space. (Easy: have a global "dirty superblocks" |
449 | * list). | 449 | * list). |
450 | * | 450 | * |
451 | * The inodes to be written are parked on sb->s_io. They are moved back onto | 451 | * The inodes to be written are parked on sb->s_io. They are moved back onto |
452 | * sb->s_dirty as they are selected for writing. This way, none can be missed | 452 | * sb->s_dirty as they are selected for writing. This way, none can be missed |
453 | * on the writer throttling path, and we get decent balancing between many | 453 | * on the writer throttling path, and we get decent balancing between many |
454 | * throttled threads: we don't want them all piling up on inode_sync_wait. | 454 | * throttled threads: we don't want them all piling up on inode_sync_wait. |
455 | */ | 455 | */ |
456 | void generic_sync_sb_inodes(struct super_block *sb, | 456 | void generic_sync_sb_inodes(struct super_block *sb, |
457 | struct writeback_control *wbc) | 457 | struct writeback_control *wbc) |
458 | { | 458 | { |
459 | const unsigned long start = jiffies; /* livelock avoidance */ | 459 | const unsigned long start = jiffies; /* livelock avoidance */ |
460 | int sync = wbc->sync_mode == WB_SYNC_ALL; | 460 | int sync = wbc->sync_mode == WB_SYNC_ALL; |
461 | 461 | ||
462 | spin_lock(&inode_lock); | 462 | spin_lock(&inode_lock); |
463 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | 463 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) |
464 | queue_io(sb, wbc->older_than_this); | 464 | queue_io(sb, wbc->older_than_this); |
465 | 465 | ||
466 | while (!list_empty(&sb->s_io)) { | 466 | while (!list_empty(&sb->s_io)) { |
467 | struct inode *inode = list_entry(sb->s_io.prev, | 467 | struct inode *inode = list_entry(sb->s_io.prev, |
468 | struct inode, i_list); | 468 | struct inode, i_list); |
469 | struct address_space *mapping = inode->i_mapping; | 469 | struct address_space *mapping = inode->i_mapping; |
470 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 470 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
471 | long pages_skipped; | 471 | long pages_skipped; |
472 | 472 | ||
473 | if (!bdi_cap_writeback_dirty(bdi)) { | 473 | if (!bdi_cap_writeback_dirty(bdi)) { |
474 | redirty_tail(inode); | 474 | redirty_tail(inode); |
475 | if (sb_is_blkdev_sb(sb)) { | 475 | if (sb_is_blkdev_sb(sb)) { |
476 | /* | 476 | /* |
477 | * Dirty memory-backed blockdev: the ramdisk | 477 | * Dirty memory-backed blockdev: the ramdisk |
478 | * driver does this. Skip just this inode | 478 | * driver does this. Skip just this inode |
479 | */ | 479 | */ |
480 | continue; | 480 | continue; |
481 | } | 481 | } |
482 | /* | 482 | /* |
483 | * Dirty memory-backed inode against a filesystem other | 483 | * Dirty memory-backed inode against a filesystem other |
484 | * than the kernel-internal bdev filesystem. Skip the | 484 | * than the kernel-internal bdev filesystem. Skip the |
485 | * entire superblock. | 485 | * entire superblock. |
486 | */ | 486 | */ |
487 | break; | 487 | break; |
488 | } | 488 | } |
489 | 489 | ||
490 | if (inode->i_state & I_NEW) { | 490 | if (inode->i_state & I_NEW) { |
491 | requeue_io(inode); | 491 | requeue_io(inode); |
492 | continue; | 492 | continue; |
493 | } | 493 | } |
494 | 494 | ||
495 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 495 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
496 | wbc->encountered_congestion = 1; | 496 | wbc->encountered_congestion = 1; |
497 | if (!sb_is_blkdev_sb(sb)) | 497 | if (!sb_is_blkdev_sb(sb)) |
498 | break; /* Skip a congested fs */ | 498 | break; /* Skip a congested fs */ |
499 | requeue_io(inode); | 499 | requeue_io(inode); |
500 | continue; /* Skip a congested blockdev */ | 500 | continue; /* Skip a congested blockdev */ |
501 | } | 501 | } |
502 | 502 | ||
503 | if (wbc->bdi && bdi != wbc->bdi) { | 503 | if (wbc->bdi && bdi != wbc->bdi) { |
504 | if (!sb_is_blkdev_sb(sb)) | 504 | if (!sb_is_blkdev_sb(sb)) |
505 | break; /* fs has the wrong queue */ | 505 | break; /* fs has the wrong queue */ |
506 | requeue_io(inode); | 506 | requeue_io(inode); |
507 | continue; /* blockdev has wrong queue */ | 507 | continue; /* blockdev has wrong queue */ |
508 | } | 508 | } |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * Was this inode dirtied after sync_sb_inodes was called? | 511 | * Was this inode dirtied after sync_sb_inodes was called? |
512 | * This keeps sync from extra jobs and livelock. | 512 | * This keeps sync from extra jobs and livelock. |
513 | */ | 513 | */ |
514 | if (inode_dirtied_after(inode, start)) | 514 | if (inode_dirtied_after(inode, start)) |
515 | break; | 515 | break; |
516 | 516 | ||
517 | /* Is another pdflush already flushing this queue? */ | 517 | /* Is another pdflush already flushing this queue? */ |
518 | if (current_is_pdflush() && !writeback_acquire(bdi)) | 518 | if (current_is_pdflush() && !writeback_acquire(bdi)) |
519 | break; | 519 | break; |
520 | 520 | ||
521 | BUG_ON(inode->i_state & I_FREEING); | 521 | BUG_ON(inode->i_state & I_FREEING); |
522 | __iget(inode); | 522 | __iget(inode); |
523 | pages_skipped = wbc->pages_skipped; | 523 | pages_skipped = wbc->pages_skipped; |
524 | __writeback_single_inode(inode, wbc); | 524 | __writeback_single_inode(inode, wbc); |
525 | if (current_is_pdflush()) | 525 | if (current_is_pdflush()) |
526 | writeback_release(bdi); | 526 | writeback_release(bdi); |
527 | if (wbc->pages_skipped != pages_skipped) { | 527 | if (wbc->pages_skipped != pages_skipped) { |
528 | /* | 528 | /* |
529 | * writeback is not making progress due to locked | 529 | * writeback is not making progress due to locked |
530 | * buffers. Skip this inode for now. | 530 | * buffers. Skip this inode for now. |
531 | */ | 531 | */ |
532 | redirty_tail(inode); | 532 | redirty_tail(inode); |
533 | } | 533 | } |
534 | spin_unlock(&inode_lock); | 534 | spin_unlock(&inode_lock); |
535 | iput(inode); | 535 | iput(inode); |
536 | cond_resched(); | 536 | cond_resched(); |
537 | spin_lock(&inode_lock); | 537 | spin_lock(&inode_lock); |
538 | if (wbc->nr_to_write <= 0) { | 538 | if (wbc->nr_to_write <= 0) { |
539 | wbc->more_io = 1; | 539 | wbc->more_io = 1; |
540 | break; | 540 | break; |
541 | } | 541 | } |
542 | if (!list_empty(&sb->s_more_io)) | 542 | if (!list_empty(&sb->s_more_io)) |
543 | wbc->more_io = 1; | 543 | wbc->more_io = 1; |
544 | } | 544 | } |
545 | 545 | ||
546 | if (sync) { | 546 | if (sync) { |
547 | struct inode *inode, *old_inode = NULL; | 547 | struct inode *inode, *old_inode = NULL; |
548 | 548 | ||
549 | /* | 549 | /* |
550 | * Data integrity sync. Must wait for all pages under writeback, | 550 | * Data integrity sync. Must wait for all pages under writeback, |
551 | * because there may have been pages dirtied before our sync | 551 | * because there may have been pages dirtied before our sync |
552 | * call, but which had writeout started before we write it out. | 552 | * call, but which had writeout started before we write it out. |
553 | * In which case, the inode may not be on the dirty list, but | 553 | * In which case, the inode may not be on the dirty list, but |
554 | * we still have to wait for that writeout. | 554 | * we still have to wait for that writeout. |
555 | */ | 555 | */ |
556 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 556 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
557 | struct address_space *mapping; | 557 | struct address_space *mapping; |
558 | 558 | ||
559 | if (inode->i_state & | 559 | if (inode->i_state & |
560 | (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 560 | (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) |
561 | continue; | 561 | continue; |
562 | mapping = inode->i_mapping; | 562 | mapping = inode->i_mapping; |
563 | if (mapping->nrpages == 0) | 563 | if (mapping->nrpages == 0) |
564 | continue; | 564 | continue; |
565 | __iget(inode); | 565 | __iget(inode); |
566 | spin_unlock(&inode_lock); | 566 | spin_unlock(&inode_lock); |
567 | /* | 567 | /* |
568 | * We hold a reference to 'inode' so it couldn't have | 568 | * We hold a reference to 'inode' so it couldn't have |
569 | * been removed from s_inodes list while we dropped the | 569 | * been removed from s_inodes list while we dropped the |
570 | * inode_lock. We cannot iput the inode now as we can | 570 | * inode_lock. We cannot iput the inode now as we can |
571 | * be holding the last reference and we cannot iput it | 571 | * be holding the last reference and we cannot iput it |
572 | * under inode_lock. So we keep the reference and iput | 572 | * under inode_lock. So we keep the reference and iput |
573 | * it later. | 573 | * it later. |
574 | */ | 574 | */ |
575 | iput(old_inode); | 575 | iput(old_inode); |
576 | old_inode = inode; | 576 | old_inode = inode; |
577 | 577 | ||
578 | filemap_fdatawait(mapping); | 578 | filemap_fdatawait(mapping); |
579 | 579 | ||
580 | cond_resched(); | 580 | cond_resched(); |
581 | 581 | ||
582 | spin_lock(&inode_lock); | 582 | spin_lock(&inode_lock); |
583 | } | 583 | } |
584 | spin_unlock(&inode_lock); | 584 | spin_unlock(&inode_lock); |
585 | iput(old_inode); | 585 | iput(old_inode); |
586 | } else | 586 | } else |
587 | spin_unlock(&inode_lock); | 587 | spin_unlock(&inode_lock); |
588 | 588 | ||
589 | return; /* Leave any unwritten inodes on s_io */ | 589 | return; /* Leave any unwritten inodes on s_io */ |
590 | } | 590 | } |
591 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); | 591 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); |
592 | 592 | ||
593 | static void sync_sb_inodes(struct super_block *sb, | 593 | static void sync_sb_inodes(struct super_block *sb, |
594 | struct writeback_control *wbc) | 594 | struct writeback_control *wbc) |
595 | { | 595 | { |
596 | generic_sync_sb_inodes(sb, wbc); | 596 | generic_sync_sb_inodes(sb, wbc); |
597 | } | 597 | } |
598 | 598 | ||
599 | /* | 599 | /* |
600 | * Start writeback of dirty pagecache data against all unlocked inodes. | 600 | * Start writeback of dirty pagecache data against all unlocked inodes. |
601 | * | 601 | * |
602 | * Note: | 602 | * Note: |
603 | * We don't need to grab a reference to superblock here. If it has non-empty | 603 | * We don't need to grab a reference to superblock here. If it has non-empty |
604 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed | 604 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed |
605 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all | 605 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all |
606 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves | 606 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves |
607 | * inode from superblock lists we are OK. | 607 | * inode from superblock lists we are OK. |
608 | * | 608 | * |
609 | * If `older_than_this' is non-zero then only flush inodes which have a | 609 | * If `older_than_this' is non-zero then only flush inodes which have a |
610 | * flushtime older than *older_than_this. | 610 | * flushtime older than *older_than_this. |
611 | * | 611 | * |
612 | * If `bdi' is non-zero then we will scan the first inode against each | 612 | * If `bdi' is non-zero then we will scan the first inode against each |
613 | * superblock until we find the matching ones. One group will be the dirty | 613 | * superblock until we find the matching ones. One group will be the dirty |
614 | * inodes against a filesystem. Then when we hit the dummy blockdev superblock, | 614 | * inodes against a filesystem. Then when we hit the dummy blockdev superblock, |
615 | * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not | 615 | * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not |
616 | * super-efficient but we're about to do a ton of I/O... | 616 | * super-efficient but we're about to do a ton of I/O... |
617 | */ | 617 | */ |
618 | void | 618 | void |
619 | writeback_inodes(struct writeback_control *wbc) | 619 | writeback_inodes(struct writeback_control *wbc) |
620 | { | 620 | { |
621 | struct super_block *sb; | 621 | struct super_block *sb; |
622 | 622 | ||
623 | might_sleep(); | 623 | might_sleep(); |
624 | spin_lock(&sb_lock); | 624 | spin_lock(&sb_lock); |
625 | restart: | 625 | restart: |
626 | list_for_each_entry_reverse(sb, &super_blocks, s_list) { | 626 | list_for_each_entry_reverse(sb, &super_blocks, s_list) { |
627 | if (sb_has_dirty_inodes(sb)) { | 627 | if (sb_has_dirty_inodes(sb)) { |
628 | /* we're making our own get_super here */ | 628 | /* we're making our own get_super here */ |
629 | sb->s_count++; | 629 | sb->s_count++; |
630 | spin_unlock(&sb_lock); | 630 | spin_unlock(&sb_lock); |
631 | /* | 631 | /* |
632 | * If we can't get the readlock, there's no sense in | 632 | * If we can't get the readlock, there's no sense in |
633 | * waiting around, most of the time the FS is going to | 633 | * waiting around, most of the time the FS is going to |
634 | * be unmounted by the time it is released. | 634 | * be unmounted by the time it is released. |
635 | */ | 635 | */ |
636 | if (down_read_trylock(&sb->s_umount)) { | 636 | if (down_read_trylock(&sb->s_umount)) { |
637 | if (sb->s_root) | 637 | if (sb->s_root) |
638 | sync_sb_inodes(sb, wbc); | 638 | sync_sb_inodes(sb, wbc); |
639 | up_read(&sb->s_umount); | 639 | up_read(&sb->s_umount); |
640 | } | 640 | } |
641 | spin_lock(&sb_lock); | 641 | spin_lock(&sb_lock); |
642 | if (__put_super_and_need_restart(sb)) | 642 | if (__put_super_and_need_restart(sb)) |
643 | goto restart; | 643 | goto restart; |
644 | } | 644 | } |
645 | if (wbc->nr_to_write <= 0) | 645 | if (wbc->nr_to_write <= 0) |
646 | break; | 646 | break; |
647 | } | 647 | } |
648 | spin_unlock(&sb_lock); | 648 | spin_unlock(&sb_lock); |
649 | } | 649 | } |
650 | 650 | ||
651 | /* | 651 | /* |
652 | * writeback and wait upon the filesystem's dirty inodes. The caller will | 652 | * writeback and wait upon the filesystem's dirty inodes. The caller will |
653 | * do this in two passes - one to write, and one to wait. | 653 | * do this in two passes - one to write, and one to wait. |
654 | * | 654 | * |
655 | * A finite limit is set on the number of pages which will be written. | 655 | * A finite limit is set on the number of pages which will be written. |
656 | * To prevent infinite livelock of sys_sync(). | 656 | * To prevent infinite livelock of sys_sync(). |
657 | * | 657 | * |
658 | * We add in the number of potentially dirty inodes, because each inode write | 658 | * We add in the number of potentially dirty inodes, because each inode write |
659 | * can dirty pagecache in the underlying blockdev. | 659 | * can dirty pagecache in the underlying blockdev. |
660 | */ | 660 | */ |
661 | void sync_inodes_sb(struct super_block *sb, int wait) | 661 | void sync_inodes_sb(struct super_block *sb, int wait) |
662 | { | 662 | { |
663 | struct writeback_control wbc = { | 663 | struct writeback_control wbc = { |
664 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, | 664 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, |
665 | .range_start = 0, | 665 | .range_start = 0, |
666 | .range_end = LLONG_MAX, | 666 | .range_end = LLONG_MAX, |
667 | }; | 667 | }; |
668 | 668 | ||
669 | if (!wait) { | 669 | if (!wait) { |
670 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 670 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
671 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 671 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
672 | 672 | ||
673 | wbc.nr_to_write = nr_dirty + nr_unstable + | 673 | wbc.nr_to_write = nr_dirty + nr_unstable + |
674 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 674 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
675 | } else | 675 | } else |
676 | wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ | 676 | wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ |
677 | 677 | ||
678 | sync_sb_inodes(sb, &wbc); | 678 | sync_sb_inodes(sb, &wbc); |
679 | } | 679 | } |
680 | 680 | ||
681 | /** | 681 | /** |
682 | * sync_inodes - writes all inodes to disk | ||
683 | * @wait: wait for completion | ||
684 | * | ||
685 | * sync_inodes() goes through each super block's dirty inode list, writes the | ||
686 | * inodes out, waits on the writeout and puts the inodes back on the normal | ||
687 | * list. | ||
688 | * | ||
689 | * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle | ||
690 | * part of the sync functions is that the blockdev "superblock" is processed | ||
691 | * last. This is because the write_inode() function of a typical fs will | ||
692 | * perform no I/O, but will mark buffers in the blockdev mapping as dirty. | ||
693 | * What we want to do is to perform all that dirtying first, and then write | ||
694 | * back all those inode blocks via the blockdev mapping in one sweep. So the | ||
695 | * additional (somewhat redundant) sync_blockdev() calls here are to make | ||
696 | * sure that really happens. Because if we call sync_inodes_sb(wait=1) with | ||
697 | * outstanding dirty inodes, the writeback goes block-at-a-time within the | ||
698 | * filesystem's write_inode(). This is extremely slow. | ||
699 | */ | ||
700 | static void __sync_inodes(int wait) | ||
701 | { | ||
702 | struct super_block *sb; | ||
703 | |||
704 | spin_lock(&sb_lock); | ||
705 | restart: | ||
706 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
707 | sb->s_count++; | ||
708 | spin_unlock(&sb_lock); | ||
709 | down_read(&sb->s_umount); | ||
710 | if (sb->s_root) { | ||
711 | sync_inodes_sb(sb, wait); | ||
712 | sync_blockdev(sb->s_bdev); | ||
713 | } | ||
714 | up_read(&sb->s_umount); | ||
715 | spin_lock(&sb_lock); | ||
716 | if (__put_super_and_need_restart(sb)) | ||
717 | goto restart; | ||
718 | } | ||
719 | spin_unlock(&sb_lock); | ||
720 | } | ||
721 | |||
722 | void sync_inodes(int wait) | ||
723 | { | ||
724 | __sync_inodes(0); | ||
725 | |||
726 | if (wait) | ||
727 | __sync_inodes(1); | ||
728 | } | ||
729 | |||
730 | /** | ||
731 | * write_inode_now - write an inode to disk | 682 | * write_inode_now - write an inode to disk |
732 | * @inode: inode to write to disk | 683 | * @inode: inode to write to disk |
733 | * @sync: whether the write should be synchronous or not | 684 | * @sync: whether the write should be synchronous or not |
734 | * | 685 | * |
735 | * This function commits an inode to disk immediately if it is dirty. This is | 686 | * This function commits an inode to disk immediately if it is dirty. This is |
736 | * primarily needed by knfsd. | 687 | * primarily needed by knfsd. |
737 | * | 688 | * |
738 | * The caller must either have a ref on the inode or must have set I_WILL_FREE. | 689 | * The caller must either have a ref on the inode or must have set I_WILL_FREE. |
739 | */ | 690 | */ |
740 | int write_inode_now(struct inode *inode, int sync) | 691 | int write_inode_now(struct inode *inode, int sync) |
741 | { | 692 | { |
742 | int ret; | 693 | int ret; |
743 | struct writeback_control wbc = { | 694 | struct writeback_control wbc = { |
744 | .nr_to_write = LONG_MAX, | 695 | .nr_to_write = LONG_MAX, |
745 | .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, | 696 | .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, |
746 | .range_start = 0, | 697 | .range_start = 0, |
747 | .range_end = LLONG_MAX, | 698 | .range_end = LLONG_MAX, |
748 | }; | 699 | }; |
749 | 700 | ||
750 | if (!mapping_cap_writeback_dirty(inode->i_mapping)) | 701 | if (!mapping_cap_writeback_dirty(inode->i_mapping)) |
751 | wbc.nr_to_write = 0; | 702 | wbc.nr_to_write = 0; |
752 | 703 | ||
753 | might_sleep(); | 704 | might_sleep(); |
754 | spin_lock(&inode_lock); | 705 | spin_lock(&inode_lock); |
755 | ret = __writeback_single_inode(inode, &wbc); | 706 | ret = __writeback_single_inode(inode, &wbc); |
756 | spin_unlock(&inode_lock); | 707 | spin_unlock(&inode_lock); |
757 | if (sync) | 708 | if (sync) |
758 | inode_sync_wait(inode); | 709 | inode_sync_wait(inode); |
759 | return ret; | 710 | return ret; |
760 | } | 711 | } |
761 | EXPORT_SYMBOL(write_inode_now); | 712 | EXPORT_SYMBOL(write_inode_now); |
762 | 713 | ||
763 | /** | 714 | /** |
764 | * sync_inode - write an inode and its pages to disk. | 715 | * sync_inode - write an inode and its pages to disk. |
765 | * @inode: the inode to sync | 716 | * @inode: the inode to sync |
766 | * @wbc: controls the writeback mode | 717 | * @wbc: controls the writeback mode |
767 | * | 718 | * |
768 | * sync_inode() will write an inode and its pages to disk. It will also | 719 | * sync_inode() will write an inode and its pages to disk. It will also |
769 | * correctly update the inode on its superblock's dirty inode lists and will | 720 | * correctly update the inode on its superblock's dirty inode lists and will |
770 | * update inode->i_state. | 721 | * update inode->i_state. |
771 | * | 722 | * |
772 | * The caller must have a ref on the inode. | 723 | * The caller must have a ref on the inode. |
773 | */ | 724 | */ |
774 | int sync_inode(struct inode *inode, struct writeback_control *wbc) | 725 | int sync_inode(struct inode *inode, struct writeback_control *wbc) |
775 | { | 726 | { |
776 | int ret; | 727 | int ret; |
777 | 728 | ||
778 | spin_lock(&inode_lock); | 729 | spin_lock(&inode_lock); |
779 | ret = __writeback_single_inode(inode, wbc); | 730 | ret = __writeback_single_inode(inode, wbc); |
780 | spin_unlock(&inode_lock); | 731 | spin_unlock(&inode_lock); |
781 | return ret; | 732 | return ret; |
782 | } | 733 | } |
783 | EXPORT_SYMBOL(sync_inode); | 734 | EXPORT_SYMBOL(sync_inode); |
784 | 735 | ||
785 | /** | 736 | /** |
786 | * generic_osync_inode - flush all dirty data for a given inode to disk | 737 | * generic_osync_inode - flush all dirty data for a given inode to disk |
787 | * @inode: inode to write | 738 | * @inode: inode to write |
788 | * @mapping: the address_space that should be flushed | 739 | * @mapping: the address_space that should be flushed |
789 | * @what: what to write and wait upon | 740 | * @what: what to write and wait upon |
790 | * | 741 | * |
791 | * This can be called by file_write functions for files which have the | 742 | * This can be called by file_write functions for files which have the |
792 | * O_SYNC flag set, to flush dirty writes to disk. | 743 | * O_SYNC flag set, to flush dirty writes to disk. |
793 | * | 744 | * |
794 | * @what is a bitmask, specifying which part of the inode's data should be | 745 | * @what is a bitmask, specifying which part of the inode's data should be |
795 | * written and waited upon. | 746 | * written and waited upon. |
796 | * | 747 | * |
797 | * OSYNC_DATA: i_mapping's dirty data | 748 | * OSYNC_DATA: i_mapping's dirty data |
798 | * OSYNC_METADATA: the buffers at i_mapping->private_list | 749 | * OSYNC_METADATA: the buffers at i_mapping->private_list |
799 | * OSYNC_INODE: the inode itself | 750 | * OSYNC_INODE: the inode itself |
800 | */ | 751 | */ |
801 | 752 | ||
802 | int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what) | 753 | int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what) |
803 | { | 754 | { |
804 | int err = 0; | 755 | int err = 0; |
805 | int need_write_inode_now = 0; | 756 | int need_write_inode_now = 0; |
806 | int err2; | 757 | int err2; |
807 | 758 | ||
808 | if (what & OSYNC_DATA) | 759 | if (what & OSYNC_DATA) |
809 | err = filemap_fdatawrite(mapping); | 760 | err = filemap_fdatawrite(mapping); |
810 | if (what & (OSYNC_METADATA|OSYNC_DATA)) { | 761 | if (what & (OSYNC_METADATA|OSYNC_DATA)) { |
811 | err2 = sync_mapping_buffers(mapping); | 762 | err2 = sync_mapping_buffers(mapping); |
812 | if (!err) | 763 | if (!err) |
813 | err = err2; | 764 | err = err2; |
814 | } | 765 | } |
815 | if (what & OSYNC_DATA) { | 766 | if (what & OSYNC_DATA) { |
816 | err2 = filemap_fdatawait(mapping); | 767 | err2 = filemap_fdatawait(mapping); |
817 | if (!err) | 768 | if (!err) |
818 | err = err2; | 769 | err = err2; |
819 | } | 770 | } |
820 | 771 | ||
821 | spin_lock(&inode_lock); | 772 | spin_lock(&inode_lock); |
822 | if ((inode->i_state & I_DIRTY) && | 773 | if ((inode->i_state & I_DIRTY) && |
823 | ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) | 774 | ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) |
824 | need_write_inode_now = 1; | 775 | need_write_inode_now = 1; |
825 | spin_unlock(&inode_lock); | 776 | spin_unlock(&inode_lock); |
826 | 777 | ||
827 | if (need_write_inode_now) { | 778 | if (need_write_inode_now) { |
828 | err2 = write_inode_now(inode, 1); | 779 | err2 = write_inode_now(inode, 1); |
829 | if (!err) | 780 | if (!err) |
830 | err = err2; | 781 | err = err2; |
831 | } | 782 | } |
832 | else | 783 | else |
833 | inode_sync_wait(inode); | 784 | inode_sync_wait(inode); |
834 | 785 | ||
835 | return err; | 786 | return err; |
836 | } | 787 | } |
837 | EXPORT_SYMBOL(generic_osync_inode); | 788 | EXPORT_SYMBOL(generic_osync_inode); |
838 | 789 |
fs/internal.h
1 | /* fs/ internal definitions | 1 | /* fs/ internal definitions |
2 | * | 2 | * |
3 | * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License | 7 | * modify it under the terms of the GNU General Public License |
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | struct super_block; | 12 | struct super_block; |
13 | struct linux_binprm; | 13 | struct linux_binprm; |
14 | struct path; | 14 | struct path; |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * block_dev.c | 17 | * block_dev.c |
18 | */ | 18 | */ |
19 | #ifdef CONFIG_BLOCK | 19 | #ifdef CONFIG_BLOCK |
20 | extern struct super_block *blockdev_superblock; | 20 | extern struct super_block *blockdev_superblock; |
21 | extern void __init bdev_cache_init(void); | 21 | extern void __init bdev_cache_init(void); |
22 | 22 | ||
23 | static inline int sb_is_blkdev_sb(struct super_block *sb) | 23 | static inline int sb_is_blkdev_sb(struct super_block *sb) |
24 | { | 24 | { |
25 | return sb == blockdev_superblock; | 25 | return sb == blockdev_superblock; |
26 | } | 26 | } |
27 | 27 | ||
28 | extern int __sync_blockdev(struct block_device *bdev, int wait); | ||
29 | |||
28 | #else | 30 | #else |
29 | static inline void bdev_cache_init(void) | 31 | static inline void bdev_cache_init(void) |
30 | { | 32 | { |
31 | } | 33 | } |
32 | 34 | ||
33 | static inline int sb_is_blkdev_sb(struct super_block *sb) | 35 | static inline int sb_is_blkdev_sb(struct super_block *sb) |
34 | { | 36 | { |
35 | return 0; | 37 | return 0; |
36 | } | 38 | } |
39 | |||
40 | static inline int __sync_blockdev(struct block_device *bdev, int wait) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
37 | #endif | 44 | #endif |
38 | 45 | ||
39 | /* | 46 | /* |
40 | * char_dev.c | 47 | * char_dev.c |
41 | */ | 48 | */ |
42 | extern void __init chrdev_init(void); | 49 | extern void __init chrdev_init(void); |
43 | 50 | ||
44 | /* | 51 | /* |
45 | * exec.c | 52 | * exec.c |
46 | */ | 53 | */ |
47 | extern int check_unsafe_exec(struct linux_binprm *); | 54 | extern int check_unsafe_exec(struct linux_binprm *); |
48 | 55 | ||
49 | /* | 56 | /* |
50 | * namespace.c | 57 | * namespace.c |
51 | */ | 58 | */ |
52 | extern int copy_mount_options(const void __user *, unsigned long *); | 59 | extern int copy_mount_options(const void __user *, unsigned long *); |
53 | 60 | ||
54 | extern void free_vfsmnt(struct vfsmount *); | 61 | extern void free_vfsmnt(struct vfsmount *); |
55 | extern struct vfsmount *alloc_vfsmnt(const char *); | 62 | extern struct vfsmount *alloc_vfsmnt(const char *); |
56 | extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); | 63 | extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); |
57 | extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, | 64 | extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, |
58 | struct vfsmount *); | 65 | struct vfsmount *); |
59 | extern void release_mounts(struct list_head *); | 66 | extern void release_mounts(struct list_head *); |
60 | extern void umount_tree(struct vfsmount *, int, struct list_head *); | 67 | extern void umount_tree(struct vfsmount *, int, struct list_head *); |
61 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | 68 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); |
62 | 69 | ||
63 | extern void __init mnt_init(void); | 70 | extern void __init mnt_init(void); |
64 | 71 | ||
65 | /* | 72 | /* |
66 | * fs_struct.c | 73 | * fs_struct.c |
67 | */ | 74 | */ |
68 | extern void chroot_fs_refs(struct path *, struct path *); | 75 | extern void chroot_fs_refs(struct path *, struct path *); |
69 | 76 | ||
70 | /* | 77 | /* |
71 | * file_table.c | 78 | * file_table.c |
72 | */ | 79 | */ |
73 | extern void mark_files_ro(struct super_block *); | 80 | extern void mark_files_ro(struct super_block *); |
74 | |||
75 | /* | ||
76 | * super.c |
fs/super.c
1 | /* | 1 | /* |
2 | * linux/fs/super.c | 2 | * linux/fs/super.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * | 5 | * |
6 | * super.c contains code to handle: - mount structures | 6 | * super.c contains code to handle: - mount structures |
7 | * - super-block tables | 7 | * - super-block tables |
8 | * - filesystem drivers list | 8 | * - filesystem drivers list |
9 | * - mount system call | 9 | * - mount system call |
10 | * - umount system call | 10 | * - umount system call |
11 | * - ustat system call | 11 | * - ustat system call |
12 | * | 12 | * |
13 | * GK 2/5/95 - Changed to support mounting the root fs via NFS | 13 | * GK 2/5/95 - Changed to support mounting the root fs via NFS |
14 | * | 14 | * |
15 | * Added kerneld support: Jacques Gelinas and Bjorn Ekwall | 15 | * Added kerneld support: Jacques Gelinas and Bjorn Ekwall |
16 | * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 | 16 | * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 |
17 | * Added options to /proc/mounts: | 17 | * Added options to /proc/mounts: |
18 | * Torbjรถrn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. | 18 | * Torbjรถrn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. |
19 | * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 | 19 | * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 |
20 | * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 | 20 | * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/init.h> | 25 | #include <linux/init.h> |
26 | #include <linux/smp_lock.h> | 26 | #include <linux/smp_lock.h> |
27 | #include <linux/acct.h> | 27 | #include <linux/acct.h> |
28 | #include <linux/blkdev.h> | 28 | #include <linux/blkdev.h> |
29 | #include <linux/quotaops.h> | 29 | #include <linux/quotaops.h> |
30 | #include <linux/namei.h> | 30 | #include <linux/namei.h> |
31 | #include <linux/buffer_head.h> /* for fsync_super() */ | 31 | #include <linux/buffer_head.h> /* for fsync_super() */ |
32 | #include <linux/mount.h> | 32 | #include <linux/mount.h> |
33 | #include <linux/security.h> | 33 | #include <linux/security.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/vfs.h> | 35 | #include <linux/vfs.h> |
36 | #include <linux/writeback.h> /* for the emergency remount stuff */ | 36 | #include <linux/writeback.h> /* for the emergency remount stuff */ |
37 | #include <linux/idr.h> | 37 | #include <linux/idr.h> |
38 | #include <linux/kobject.h> | 38 | #include <linux/kobject.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | #include <linux/file.h> | 40 | #include <linux/file.h> |
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | #include "internal.h" | 42 | #include "internal.h" |
43 | 43 | ||
44 | 44 | ||
45 | LIST_HEAD(super_blocks); | 45 | LIST_HEAD(super_blocks); |
46 | DEFINE_SPINLOCK(sb_lock); | 46 | DEFINE_SPINLOCK(sb_lock); |
47 | 47 | ||
48 | /** | 48 | /** |
49 | * alloc_super - create new superblock | 49 | * alloc_super - create new superblock |
50 | * @type: filesystem type superblock should belong to | 50 | * @type: filesystem type superblock should belong to |
51 | * | 51 | * |
52 | * Allocates and initializes a new &struct super_block. alloc_super() | 52 | * Allocates and initializes a new &struct super_block. alloc_super() |
53 | * returns a pointer new superblock or %NULL if allocation had failed. | 53 | * returns a pointer new superblock or %NULL if allocation had failed. |
54 | */ | 54 | */ |
55 | static struct super_block *alloc_super(struct file_system_type *type) | 55 | static struct super_block *alloc_super(struct file_system_type *type) |
56 | { | 56 | { |
57 | struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); | 57 | struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); |
58 | static struct super_operations default_op; | 58 | static struct super_operations default_op; |
59 | 59 | ||
60 | if (s) { | 60 | if (s) { |
61 | if (security_sb_alloc(s)) { | 61 | if (security_sb_alloc(s)) { |
62 | kfree(s); | 62 | kfree(s); |
63 | s = NULL; | 63 | s = NULL; |
64 | goto out; | 64 | goto out; |
65 | } | 65 | } |
66 | INIT_LIST_HEAD(&s->s_dirty); | 66 | INIT_LIST_HEAD(&s->s_dirty); |
67 | INIT_LIST_HEAD(&s->s_io); | 67 | INIT_LIST_HEAD(&s->s_io); |
68 | INIT_LIST_HEAD(&s->s_more_io); | 68 | INIT_LIST_HEAD(&s->s_more_io); |
69 | INIT_LIST_HEAD(&s->s_files); | 69 | INIT_LIST_HEAD(&s->s_files); |
70 | INIT_LIST_HEAD(&s->s_instances); | 70 | INIT_LIST_HEAD(&s->s_instances); |
71 | INIT_HLIST_HEAD(&s->s_anon); | 71 | INIT_HLIST_HEAD(&s->s_anon); |
72 | INIT_LIST_HEAD(&s->s_inodes); | 72 | INIT_LIST_HEAD(&s->s_inodes); |
73 | INIT_LIST_HEAD(&s->s_dentry_lru); | 73 | INIT_LIST_HEAD(&s->s_dentry_lru); |
74 | init_rwsem(&s->s_umount); | 74 | init_rwsem(&s->s_umount); |
75 | mutex_init(&s->s_lock); | 75 | mutex_init(&s->s_lock); |
76 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 76 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
77 | /* | 77 | /* |
78 | * The locking rules for s_lock are up to the | 78 | * The locking rules for s_lock are up to the |
79 | * filesystem. For example ext3fs has different | 79 | * filesystem. For example ext3fs has different |
80 | * lock ordering than usbfs: | 80 | * lock ordering than usbfs: |
81 | */ | 81 | */ |
82 | lockdep_set_class(&s->s_lock, &type->s_lock_key); | 82 | lockdep_set_class(&s->s_lock, &type->s_lock_key); |
83 | /* | 83 | /* |
84 | * sget() can have s_umount recursion. | 84 | * sget() can have s_umount recursion. |
85 | * | 85 | * |
86 | * When it cannot find a suitable sb, it allocates a new | 86 | * When it cannot find a suitable sb, it allocates a new |
87 | * one (this one), and tries again to find a suitable old | 87 | * one (this one), and tries again to find a suitable old |
88 | * one. | 88 | * one. |
89 | * | 89 | * |
90 | * In case that succeeds, it will acquire the s_umount | 90 | * In case that succeeds, it will acquire the s_umount |
91 | * lock of the old one. Since these are clearly distrinct | 91 | * lock of the old one. Since these are clearly distrinct |
92 | * locks, and this object isn't exposed yet, there's no | 92 | * locks, and this object isn't exposed yet, there's no |
93 | * risk of deadlocks. | 93 | * risk of deadlocks. |
94 | * | 94 | * |
95 | * Annotate this by putting this lock in a different | 95 | * Annotate this by putting this lock in a different |
96 | * subclass. | 96 | * subclass. |
97 | */ | 97 | */ |
98 | down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); | 98 | down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); |
99 | s->s_count = S_BIAS; | 99 | s->s_count = S_BIAS; |
100 | atomic_set(&s->s_active, 1); | 100 | atomic_set(&s->s_active, 1); |
101 | mutex_init(&s->s_vfs_rename_mutex); | 101 | mutex_init(&s->s_vfs_rename_mutex); |
102 | mutex_init(&s->s_dquot.dqio_mutex); | 102 | mutex_init(&s->s_dquot.dqio_mutex); |
103 | mutex_init(&s->s_dquot.dqonoff_mutex); | 103 | mutex_init(&s->s_dquot.dqonoff_mutex); |
104 | init_rwsem(&s->s_dquot.dqptr_sem); | 104 | init_rwsem(&s->s_dquot.dqptr_sem); |
105 | init_waitqueue_head(&s->s_wait_unfrozen); | 105 | init_waitqueue_head(&s->s_wait_unfrozen); |
106 | s->s_maxbytes = MAX_NON_LFS; | 106 | s->s_maxbytes = MAX_NON_LFS; |
107 | s->dq_op = sb_dquot_ops; | 107 | s->dq_op = sb_dquot_ops; |
108 | s->s_qcop = sb_quotactl_ops; | 108 | s->s_qcop = sb_quotactl_ops; |
109 | s->s_op = &default_op; | 109 | s->s_op = &default_op; |
110 | s->s_time_gran = 1000000000; | 110 | s->s_time_gran = 1000000000; |
111 | } | 111 | } |
112 | out: | 112 | out: |
113 | return s; | 113 | return s; |
114 | } | 114 | } |
115 | 115 | ||
116 | /** | 116 | /** |
117 | * destroy_super - frees a superblock | 117 | * destroy_super - frees a superblock |
118 | * @s: superblock to free | 118 | * @s: superblock to free |
119 | * | 119 | * |
120 | * Frees a superblock. | 120 | * Frees a superblock. |
121 | */ | 121 | */ |
122 | static inline void destroy_super(struct super_block *s) | 122 | static inline void destroy_super(struct super_block *s) |
123 | { | 123 | { |
124 | security_sb_free(s); | 124 | security_sb_free(s); |
125 | kfree(s->s_subtype); | 125 | kfree(s->s_subtype); |
126 | kfree(s->s_options); | 126 | kfree(s->s_options); |
127 | kfree(s); | 127 | kfree(s); |
128 | } | 128 | } |
129 | 129 | ||
130 | /* Superblock refcounting */ | 130 | /* Superblock refcounting */ |
131 | 131 | ||
132 | /* | 132 | /* |
133 | * Drop a superblock's refcount. Returns non-zero if the superblock was | 133 | * Drop a superblock's refcount. Returns non-zero if the superblock was |
134 | * destroyed. The caller must hold sb_lock. | 134 | * destroyed. The caller must hold sb_lock. |
135 | */ | 135 | */ |
136 | static int __put_super(struct super_block *sb) | 136 | static int __put_super(struct super_block *sb) |
137 | { | 137 | { |
138 | int ret = 0; | 138 | int ret = 0; |
139 | 139 | ||
140 | if (!--sb->s_count) { | 140 | if (!--sb->s_count) { |
141 | destroy_super(sb); | 141 | destroy_super(sb); |
142 | ret = 1; | 142 | ret = 1; |
143 | } | 143 | } |
144 | return ret; | 144 | return ret; |
145 | } | 145 | } |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * Drop a superblock's refcount. | 148 | * Drop a superblock's refcount. |
149 | * Returns non-zero if the superblock is about to be destroyed and | 149 | * Returns non-zero if the superblock is about to be destroyed and |
150 | * at least is already removed from super_blocks list, so if we are | 150 | * at least is already removed from super_blocks list, so if we are |
151 | * making a loop through super blocks then we need to restart. | 151 | * making a loop through super blocks then we need to restart. |
152 | * The caller must hold sb_lock. | 152 | * The caller must hold sb_lock. |
153 | */ | 153 | */ |
154 | int __put_super_and_need_restart(struct super_block *sb) | 154 | int __put_super_and_need_restart(struct super_block *sb) |
155 | { | 155 | { |
156 | /* check for race with generic_shutdown_super() */ | 156 | /* check for race with generic_shutdown_super() */ |
157 | if (list_empty(&sb->s_list)) { | 157 | if (list_empty(&sb->s_list)) { |
158 | /* super block is removed, need to restart... */ | 158 | /* super block is removed, need to restart... */ |
159 | __put_super(sb); | 159 | __put_super(sb); |
160 | return 1; | 160 | return 1; |
161 | } | 161 | } |
162 | /* can't be the last, since s_list is still in use */ | 162 | /* can't be the last, since s_list is still in use */ |
163 | sb->s_count--; | 163 | sb->s_count--; |
164 | BUG_ON(sb->s_count == 0); | 164 | BUG_ON(sb->s_count == 0); |
165 | return 0; | 165 | return 0; |
166 | } | 166 | } |
167 | 167 | ||
168 | /** | 168 | /** |
169 | * put_super - drop a temporary reference to superblock | 169 | * put_super - drop a temporary reference to superblock |
170 | * @sb: superblock in question | 170 | * @sb: superblock in question |
171 | * | 171 | * |
172 | * Drops a temporary reference, frees superblock if there's no | 172 | * Drops a temporary reference, frees superblock if there's no |
173 | * references left. | 173 | * references left. |
174 | */ | 174 | */ |
175 | static void put_super(struct super_block *sb) | 175 | static void put_super(struct super_block *sb) |
176 | { | 176 | { |
177 | spin_lock(&sb_lock); | 177 | spin_lock(&sb_lock); |
178 | __put_super(sb); | 178 | __put_super(sb); |
179 | spin_unlock(&sb_lock); | 179 | spin_unlock(&sb_lock); |
180 | } | 180 | } |
181 | 181 | ||
182 | 182 | ||
183 | /** | 183 | /** |
184 | * deactivate_super - drop an active reference to superblock | 184 | * deactivate_super - drop an active reference to superblock |
185 | * @s: superblock to deactivate | 185 | * @s: superblock to deactivate |
186 | * | 186 | * |
187 | * Drops an active reference to superblock, acquiring a temprory one if | 187 | * Drops an active reference to superblock, acquiring a temprory one if |
188 | * there is no active references left. In that case we lock superblock, | 188 | * there is no active references left. In that case we lock superblock, |
189 | * tell fs driver to shut it down and drop the temporary reference we | 189 | * tell fs driver to shut it down and drop the temporary reference we |
190 | * had just acquired. | 190 | * had just acquired. |
191 | */ | 191 | */ |
192 | void deactivate_super(struct super_block *s) | 192 | void deactivate_super(struct super_block *s) |
193 | { | 193 | { |
194 | struct file_system_type *fs = s->s_type; | 194 | struct file_system_type *fs = s->s_type; |
195 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 195 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { |
196 | s->s_count -= S_BIAS-1; | 196 | s->s_count -= S_BIAS-1; |
197 | spin_unlock(&sb_lock); | 197 | spin_unlock(&sb_lock); |
198 | vfs_dq_off(s, 0); | 198 | vfs_dq_off(s, 0); |
199 | down_write(&s->s_umount); | 199 | down_write(&s->s_umount); |
200 | fs->kill_sb(s); | 200 | fs->kill_sb(s); |
201 | put_filesystem(fs); | 201 | put_filesystem(fs); |
202 | put_super(s); | 202 | put_super(s); |
203 | } | 203 | } |
204 | } | 204 | } |
205 | 205 | ||
206 | EXPORT_SYMBOL(deactivate_super); | 206 | EXPORT_SYMBOL(deactivate_super); |
207 | 207 | ||
208 | /** | 208 | /** |
209 | * deactivate_locked_super - drop an active reference to superblock | 209 | * deactivate_locked_super - drop an active reference to superblock |
210 | * @s: superblock to deactivate | 210 | * @s: superblock to deactivate |
211 | * | 211 | * |
212 | * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that | 212 | * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that |
213 | * it does not unlock it until it's all over. As the result, it's safe to | 213 | * it does not unlock it until it's all over. As the result, it's safe to |
214 | * use to dispose of new superblock on ->get_sb() failure exits - nobody | 214 | * use to dispose of new superblock on ->get_sb() failure exits - nobody |
215 | * will see the sucker until it's all over. Equivalent using up_write + | 215 | * will see the sucker until it's all over. Equivalent using up_write + |
216 | * deactivate_super is safe for that purpose only if superblock is either | 216 | * deactivate_super is safe for that purpose only if superblock is either |
217 | * safe to use or has NULL ->s_root when we unlock. | 217 | * safe to use or has NULL ->s_root when we unlock. |
218 | */ | 218 | */ |
219 | void deactivate_locked_super(struct super_block *s) | 219 | void deactivate_locked_super(struct super_block *s) |
220 | { | 220 | { |
221 | struct file_system_type *fs = s->s_type; | 221 | struct file_system_type *fs = s->s_type; |
222 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 222 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { |
223 | s->s_count -= S_BIAS-1; | 223 | s->s_count -= S_BIAS-1; |
224 | spin_unlock(&sb_lock); | 224 | spin_unlock(&sb_lock); |
225 | vfs_dq_off(s, 0); | 225 | vfs_dq_off(s, 0); |
226 | fs->kill_sb(s); | 226 | fs->kill_sb(s); |
227 | put_filesystem(fs); | 227 | put_filesystem(fs); |
228 | put_super(s); | 228 | put_super(s); |
229 | } else { | 229 | } else { |
230 | up_write(&s->s_umount); | 230 | up_write(&s->s_umount); |
231 | } | 231 | } |
232 | } | 232 | } |
233 | 233 | ||
234 | EXPORT_SYMBOL(deactivate_locked_super); | 234 | EXPORT_SYMBOL(deactivate_locked_super); |
235 | 235 | ||
236 | /** | 236 | /** |
237 | * grab_super - acquire an active reference | 237 | * grab_super - acquire an active reference |
238 | * @s: reference we are trying to make active | 238 | * @s: reference we are trying to make active |
239 | * | 239 | * |
240 | * Tries to acquire an active reference. grab_super() is used when we | 240 | * Tries to acquire an active reference. grab_super() is used when we |
241 | * had just found a superblock in super_blocks or fs_type->fs_supers | 241 | * had just found a superblock in super_blocks or fs_type->fs_supers |
242 | * and want to turn it into a full-blown active reference. grab_super() | 242 | * and want to turn it into a full-blown active reference. grab_super() |
243 | * is called with sb_lock held and drops it. Returns 1 in case of | 243 | * is called with sb_lock held and drops it. Returns 1 in case of |
244 | * success, 0 if we had failed (superblock contents was already dead or | 244 | * success, 0 if we had failed (superblock contents was already dead or |
245 | * dying when grab_super() had been called). | 245 | * dying when grab_super() had been called). |
246 | */ | 246 | */ |
247 | static int grab_super(struct super_block *s) __releases(sb_lock) | 247 | static int grab_super(struct super_block *s) __releases(sb_lock) |
248 | { | 248 | { |
249 | s->s_count++; | 249 | s->s_count++; |
250 | spin_unlock(&sb_lock); | 250 | spin_unlock(&sb_lock); |
251 | down_write(&s->s_umount); | 251 | down_write(&s->s_umount); |
252 | if (s->s_root) { | 252 | if (s->s_root) { |
253 | spin_lock(&sb_lock); | 253 | spin_lock(&sb_lock); |
254 | if (s->s_count > S_BIAS) { | 254 | if (s->s_count > S_BIAS) { |
255 | atomic_inc(&s->s_active); | 255 | atomic_inc(&s->s_active); |
256 | s->s_count--; | 256 | s->s_count--; |
257 | spin_unlock(&sb_lock); | 257 | spin_unlock(&sb_lock); |
258 | return 1; | 258 | return 1; |
259 | } | 259 | } |
260 | spin_unlock(&sb_lock); | 260 | spin_unlock(&sb_lock); |
261 | } | 261 | } |
262 | up_write(&s->s_umount); | 262 | up_write(&s->s_umount); |
263 | put_super(s); | 263 | put_super(s); |
264 | yield(); | 264 | yield(); |
265 | return 0; | 265 | return 0; |
266 | } | 266 | } |
267 | 267 | ||
268 | /* | 268 | /* |
269 | * Superblock locking. We really ought to get rid of these two. | 269 | * Superblock locking. We really ought to get rid of these two. |
270 | */ | 270 | */ |
271 | void lock_super(struct super_block * sb) | 271 | void lock_super(struct super_block * sb) |
272 | { | 272 | { |
273 | get_fs_excl(); | 273 | get_fs_excl(); |
274 | mutex_lock(&sb->s_lock); | 274 | mutex_lock(&sb->s_lock); |
275 | } | 275 | } |
276 | 276 | ||
277 | void unlock_super(struct super_block * sb) | 277 | void unlock_super(struct super_block * sb) |
278 | { | 278 | { |
279 | put_fs_excl(); | 279 | put_fs_excl(); |
280 | mutex_unlock(&sb->s_lock); | 280 | mutex_unlock(&sb->s_lock); |
281 | } | 281 | } |
282 | 282 | ||
283 | EXPORT_SYMBOL(lock_super); | 283 | EXPORT_SYMBOL(lock_super); |
284 | EXPORT_SYMBOL(unlock_super); | 284 | EXPORT_SYMBOL(unlock_super); |
285 | 285 | ||
286 | /* | 286 | /* |
287 | * Write out and wait upon all dirty data associated with this | 287 | * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) |
288 | * superblock. Filesystem data as well as the underlying block | 288 | * just dirties buffers with inodes so we have to submit IO for these buffers |
289 | * device. Takes the superblock lock. Requires a second blkdev | 289 | * via __sync_blockdev(). This also speeds up the wait == 1 case since in that |
290 | * flush by the caller to complete the operation. | 290 | * case write_inode() functions do sync_dirty_buffer() and thus effectively |
291 | * write one block at a time. | ||
291 | */ | 292 | */ |
292 | static int __fsync_super(struct super_block *sb) | 293 | static int __fsync_super(struct super_block *sb, int wait) |
293 | { | 294 | { |
294 | sync_inodes_sb(sb, 0); | ||
295 | vfs_dq_sync(sb); | 295 | vfs_dq_sync(sb); |
296 | sync_inodes_sb(sb, 1); | 296 | sync_inodes_sb(sb, wait); |
297 | lock_super(sb); | 297 | lock_super(sb); |
298 | if (sb->s_dirt && sb->s_op->write_super) | 298 | if (sb->s_dirt && sb->s_op->write_super) |
299 | sb->s_op->write_super(sb); | 299 | sb->s_op->write_super(sb); |
300 | unlock_super(sb); | 300 | unlock_super(sb); |
301 | if (sb->s_op->sync_fs) | 301 | if (sb->s_op->sync_fs) |
302 | sb->s_op->sync_fs(sb, 1); | 302 | sb->s_op->sync_fs(sb, wait); |
303 | return sync_blockdev(sb->s_bdev); | 303 | return __sync_blockdev(sb->s_bdev, wait); |
304 | } | 304 | } |
305 | 305 | ||
306 | /* | 306 | /* |
307 | * Write out and wait upon all dirty data associated with this | 307 | * Write out and wait upon all dirty data associated with this |
308 | * superblock. Filesystem data as well as the underlying block | 308 | * superblock. Filesystem data as well as the underlying block |
309 | * device. Takes the superblock lock. | 309 | * device. Takes the superblock lock. |
310 | */ | 310 | */ |
311 | int fsync_super(struct super_block *sb) | 311 | int fsync_super(struct super_block *sb) |
312 | { | 312 | { |
313 | return __fsync_super(sb); | 313 | int ret; |
314 | |||
315 | ret = __fsync_super(sb, 0); | ||
316 | if (ret < 0) | ||
317 | return ret; | ||
318 | return __fsync_super(sb, 1); | ||
314 | } | 319 | } |
315 | EXPORT_SYMBOL_GPL(fsync_super); | 320 | EXPORT_SYMBOL_GPL(fsync_super); |
316 | 321 | ||
317 | /** | 322 | /** |
318 | * generic_shutdown_super - common helper for ->kill_sb() | 323 | * generic_shutdown_super - common helper for ->kill_sb() |
319 | * @sb: superblock to kill | 324 | * @sb: superblock to kill |
320 | * | 325 | * |
321 | * generic_shutdown_super() does all fs-independent work on superblock | 326 | * generic_shutdown_super() does all fs-independent work on superblock |
322 | * shutdown. Typical ->kill_sb() should pick all fs-specific objects | 327 | * shutdown. Typical ->kill_sb() should pick all fs-specific objects |
323 | * that need destruction out of superblock, call generic_shutdown_super() | 328 | * that need destruction out of superblock, call generic_shutdown_super() |
324 | * and release aforementioned objects. Note: dentries and inodes _are_ | 329 | * and release aforementioned objects. Note: dentries and inodes _are_ |
325 | * taken care of and do not need specific handling. | 330 | * taken care of and do not need specific handling. |
326 | * | 331 | * |
327 | * Upon calling this function, the filesystem may no longer alter or | 332 | * Upon calling this function, the filesystem may no longer alter or |
328 | * rearrange the set of dentries belonging to this super_block, nor may it | 333 | * rearrange the set of dentries belonging to this super_block, nor may it |
329 | * change the attachments of dentries to inodes. | 334 | * change the attachments of dentries to inodes. |
330 | */ | 335 | */ |
331 | void generic_shutdown_super(struct super_block *sb) | 336 | void generic_shutdown_super(struct super_block *sb) |
332 | { | 337 | { |
333 | const struct super_operations *sop = sb->s_op; | 338 | const struct super_operations *sop = sb->s_op; |
334 | 339 | ||
335 | 340 | ||
336 | if (sb->s_root) { | 341 | if (sb->s_root) { |
337 | shrink_dcache_for_umount(sb); | 342 | shrink_dcache_for_umount(sb); |
338 | fsync_super(sb); | 343 | fsync_super(sb); |
339 | lock_super(sb); | 344 | lock_super(sb); |
340 | sb->s_flags &= ~MS_ACTIVE; | 345 | sb->s_flags &= ~MS_ACTIVE; |
341 | 346 | ||
342 | /* bad name - it should be evict_inodes() */ | 347 | /* bad name - it should be evict_inodes() */ |
343 | invalidate_inodes(sb); | 348 | invalidate_inodes(sb); |
344 | lock_kernel(); | 349 | lock_kernel(); |
345 | 350 | ||
346 | if (sop->write_super && sb->s_dirt) | 351 | if (sop->write_super && sb->s_dirt) |
347 | sop->write_super(sb); | 352 | sop->write_super(sb); |
348 | if (sop->put_super) | 353 | if (sop->put_super) |
349 | sop->put_super(sb); | 354 | sop->put_super(sb); |
350 | 355 | ||
351 | /* Forget any remaining inodes */ | 356 | /* Forget any remaining inodes */ |
352 | if (invalidate_inodes(sb)) { | 357 | if (invalidate_inodes(sb)) { |
353 | printk("VFS: Busy inodes after unmount of %s. " | 358 | printk("VFS: Busy inodes after unmount of %s. " |
354 | "Self-destruct in 5 seconds. Have a nice day...\n", | 359 | "Self-destruct in 5 seconds. Have a nice day...\n", |
355 | sb->s_id); | 360 | sb->s_id); |
356 | } | 361 | } |
357 | 362 | ||
358 | unlock_kernel(); | 363 | unlock_kernel(); |
359 | unlock_super(sb); | 364 | unlock_super(sb); |
360 | } | 365 | } |
361 | spin_lock(&sb_lock); | 366 | spin_lock(&sb_lock); |
362 | /* should be initialized for __put_super_and_need_restart() */ | 367 | /* should be initialized for __put_super_and_need_restart() */ |
363 | list_del_init(&sb->s_list); | 368 | list_del_init(&sb->s_list); |
364 | list_del(&sb->s_instances); | 369 | list_del(&sb->s_instances); |
365 | spin_unlock(&sb_lock); | 370 | spin_unlock(&sb_lock); |
366 | up_write(&sb->s_umount); | 371 | up_write(&sb->s_umount); |
367 | } | 372 | } |
368 | 373 | ||
369 | EXPORT_SYMBOL(generic_shutdown_super); | 374 | EXPORT_SYMBOL(generic_shutdown_super); |
370 | 375 | ||
371 | /** | 376 | /** |
372 | * sget - find or create a superblock | 377 | * sget - find or create a superblock |
373 | * @type: filesystem type superblock should belong to | 378 | * @type: filesystem type superblock should belong to |
374 | * @test: comparison callback | 379 | * @test: comparison callback |
375 | * @set: setup callback | 380 | * @set: setup callback |
376 | * @data: argument to each of them | 381 | * @data: argument to each of them |
377 | */ | 382 | */ |
378 | struct super_block *sget(struct file_system_type *type, | 383 | struct super_block *sget(struct file_system_type *type, |
379 | int (*test)(struct super_block *,void *), | 384 | int (*test)(struct super_block *,void *), |
380 | int (*set)(struct super_block *,void *), | 385 | int (*set)(struct super_block *,void *), |
381 | void *data) | 386 | void *data) |
382 | { | 387 | { |
383 | struct super_block *s = NULL; | 388 | struct super_block *s = NULL; |
384 | struct super_block *old; | 389 | struct super_block *old; |
385 | int err; | 390 | int err; |
386 | 391 | ||
387 | retry: | 392 | retry: |
388 | spin_lock(&sb_lock); | 393 | spin_lock(&sb_lock); |
389 | if (test) { | 394 | if (test) { |
390 | list_for_each_entry(old, &type->fs_supers, s_instances) { | 395 | list_for_each_entry(old, &type->fs_supers, s_instances) { |
391 | if (!test(old, data)) | 396 | if (!test(old, data)) |
392 | continue; | 397 | continue; |
393 | if (!grab_super(old)) | 398 | if (!grab_super(old)) |
394 | goto retry; | 399 | goto retry; |
395 | if (s) { | 400 | if (s) { |
396 | up_write(&s->s_umount); | 401 | up_write(&s->s_umount); |
397 | destroy_super(s); | 402 | destroy_super(s); |
398 | } | 403 | } |
399 | return old; | 404 | return old; |
400 | } | 405 | } |
401 | } | 406 | } |
402 | if (!s) { | 407 | if (!s) { |
403 | spin_unlock(&sb_lock); | 408 | spin_unlock(&sb_lock); |
404 | s = alloc_super(type); | 409 | s = alloc_super(type); |
405 | if (!s) | 410 | if (!s) |
406 | return ERR_PTR(-ENOMEM); | 411 | return ERR_PTR(-ENOMEM); |
407 | goto retry; | 412 | goto retry; |
408 | } | 413 | } |
409 | 414 | ||
410 | err = set(s, data); | 415 | err = set(s, data); |
411 | if (err) { | 416 | if (err) { |
412 | spin_unlock(&sb_lock); | 417 | spin_unlock(&sb_lock); |
413 | up_write(&s->s_umount); | 418 | up_write(&s->s_umount); |
414 | destroy_super(s); | 419 | destroy_super(s); |
415 | return ERR_PTR(err); | 420 | return ERR_PTR(err); |
416 | } | 421 | } |
417 | s->s_type = type; | 422 | s->s_type = type; |
418 | strlcpy(s->s_id, type->name, sizeof(s->s_id)); | 423 | strlcpy(s->s_id, type->name, sizeof(s->s_id)); |
419 | list_add_tail(&s->s_list, &super_blocks); | 424 | list_add_tail(&s->s_list, &super_blocks); |
420 | list_add(&s->s_instances, &type->fs_supers); | 425 | list_add(&s->s_instances, &type->fs_supers); |
421 | spin_unlock(&sb_lock); | 426 | spin_unlock(&sb_lock); |
422 | get_filesystem(type); | 427 | get_filesystem(type); |
423 | return s; | 428 | return s; |
424 | } | 429 | } |
425 | 430 | ||
426 | EXPORT_SYMBOL(sget); | 431 | EXPORT_SYMBOL(sget); |
427 | 432 | ||
428 | void drop_super(struct super_block *sb) | 433 | void drop_super(struct super_block *sb) |
429 | { | 434 | { |
430 | up_read(&sb->s_umount); | 435 | up_read(&sb->s_umount); |
431 | put_super(sb); | 436 | put_super(sb); |
432 | } | 437 | } |
433 | 438 | ||
434 | EXPORT_SYMBOL(drop_super); | 439 | EXPORT_SYMBOL(drop_super); |
435 | 440 | ||
436 | static inline void write_super(struct super_block *sb) | 441 | static inline void write_super(struct super_block *sb) |
437 | { | 442 | { |
438 | lock_super(sb); | 443 | lock_super(sb); |
439 | if (sb->s_root && sb->s_dirt) | 444 | if (sb->s_root && sb->s_dirt) |
440 | if (sb->s_op->write_super) | 445 | if (sb->s_op->write_super) |
441 | sb->s_op->write_super(sb); | 446 | sb->s_op->write_super(sb); |
442 | unlock_super(sb); | 447 | unlock_super(sb); |
443 | } | 448 | } |
444 | 449 | ||
445 | /* | 450 | /* |
446 | * Note: check the dirty flag before waiting, so we don't | 451 | * Note: check the dirty flag before waiting, so we don't |
447 | * hold up the sync while mounting a device. (The newly | 452 | * hold up the sync while mounting a device. (The newly |
448 | * mounted device won't need syncing.) | 453 | * mounted device won't need syncing.) |
449 | */ | 454 | */ |
450 | void sync_supers(void) | 455 | void sync_supers(void) |
451 | { | 456 | { |
452 | struct super_block *sb; | 457 | struct super_block *sb; |
453 | 458 | ||
454 | spin_lock(&sb_lock); | 459 | spin_lock(&sb_lock); |
455 | restart: | 460 | restart: |
456 | list_for_each_entry(sb, &super_blocks, s_list) { | 461 | list_for_each_entry(sb, &super_blocks, s_list) { |
457 | if (sb->s_dirt) { | 462 | if (sb->s_dirt) { |
458 | sb->s_count++; | 463 | sb->s_count++; |
459 | spin_unlock(&sb_lock); | 464 | spin_unlock(&sb_lock); |
460 | down_read(&sb->s_umount); | 465 | down_read(&sb->s_umount); |
461 | write_super(sb); | 466 | write_super(sb); |
462 | up_read(&sb->s_umount); | 467 | up_read(&sb->s_umount); |
463 | spin_lock(&sb_lock); | 468 | spin_lock(&sb_lock); |
464 | if (__put_super_and_need_restart(sb)) | 469 | if (__put_super_and_need_restart(sb)) |
465 | goto restart; | 470 | goto restart; |
466 | } | 471 | } |
467 | } | 472 | } |
468 | spin_unlock(&sb_lock); | 473 | spin_unlock(&sb_lock); |
469 | } | 474 | } |
470 | 475 | ||
471 | /* | 476 | /* |
472 | * Call the ->sync_fs super_op against all filesystems which are r/w and | 477 | * Sync all the data for all the filesystems (called by sys_sync() and |
473 | * which implement it. | 478 | * emergency sync) |
474 | * | 479 | * |
475 | * This operation is careful to avoid the livelock which could easily happen | 480 | * This operation is careful to avoid the livelock which could easily happen |
476 | * if two or more filesystems are being continuously dirtied. s_need_sync_fs | 481 | * if two or more filesystems are being continuously dirtied. s_need_sync |
477 | * is used only here. We set it against all filesystems and then clear it as | 482 | * is used only here. We set it against all filesystems and then clear it as |
478 | * we sync them. So redirtied filesystems are skipped. | 483 | * we sync them. So redirtied filesystems are skipped. |
479 | * | 484 | * |
480 | * But if process A is currently running sync_filesystems and then process B | 485 | * But if process A is currently running sync_filesystems and then process B |
481 | * calls sync_filesystems as well, process B will set all the s_need_sync_fs | 486 | * calls sync_filesystems as well, process B will set all the s_need_sync |
482 | * flags again, which will cause process A to resync everything. Fix that with | 487 | * flags again, which will cause process A to resync everything. Fix that with |
483 | * a local mutex. | 488 | * a local mutex. |
484 | * | ||
485 | * (Fabian) Avoid sync_fs with clean fs & wait mode 0 | ||
486 | */ | 489 | */ |
487 | void sync_filesystems(int wait) | 490 | void sync_filesystems(int wait) |
488 | { | 491 | { |
489 | struct super_block *sb; | 492 | struct super_block *sb; |
490 | static DEFINE_MUTEX(mutex); | 493 | static DEFINE_MUTEX(mutex); |
491 | 494 | ||
492 | mutex_lock(&mutex); /* Could be down_interruptible */ | 495 | mutex_lock(&mutex); /* Could be down_interruptible */ |
493 | spin_lock(&sb_lock); | 496 | spin_lock(&sb_lock); |
494 | list_for_each_entry(sb, &super_blocks, s_list) { | 497 | list_for_each_entry(sb, &super_blocks, s_list) { |
495 | if (!sb->s_op->sync_fs) | ||
496 | continue; | ||
497 | if (sb->s_flags & MS_RDONLY) | 498 | if (sb->s_flags & MS_RDONLY) |
498 | continue; | 499 | continue; |
499 | sb->s_need_sync_fs = 1; | 500 | sb->s_need_sync = 1; |
500 | } | 501 | } |
501 | 502 | ||
502 | restart: | 503 | restart: |
503 | list_for_each_entry(sb, &super_blocks, s_list) { | 504 | list_for_each_entry(sb, &super_blocks, s_list) { |
504 | if (!sb->s_need_sync_fs) | 505 | if (!sb->s_need_sync) |
505 | continue; | 506 | continue; |
506 | sb->s_need_sync_fs = 0; | 507 | sb->s_need_sync = 0; |
507 | if (sb->s_flags & MS_RDONLY) | 508 | if (sb->s_flags & MS_RDONLY) |
508 | continue; /* hm. Was remounted r/o meanwhile */ | 509 | continue; /* hm. Was remounted r/o meanwhile */ |
509 | sb->s_count++; | 510 | sb->s_count++; |
510 | spin_unlock(&sb_lock); | 511 | spin_unlock(&sb_lock); |
511 | down_read(&sb->s_umount); | 512 | down_read(&sb->s_umount); |
512 | if (sb->s_root) | 513 | if (sb->s_root) |
513 | sb->s_op->sync_fs(sb, wait); | 514 | __fsync_super(sb, wait); |
514 | up_read(&sb->s_umount); | 515 | up_read(&sb->s_umount); |
515 | /* restart only when sb is no longer on the list */ | 516 | /* restart only when sb is no longer on the list */ |
516 | spin_lock(&sb_lock); | 517 | spin_lock(&sb_lock); |
517 | if (__put_super_and_need_restart(sb)) | 518 | if (__put_super_and_need_restart(sb)) |
518 | goto restart; | 519 | goto restart; |
519 | } | 520 | } |
520 | spin_unlock(&sb_lock); | 521 | spin_unlock(&sb_lock); |
521 | mutex_unlock(&mutex); | 522 | mutex_unlock(&mutex); |
522 | } | 523 | } |
523 | |||
524 | #ifdef CONFIG_BLOCK | ||
525 | /* | ||
526 | * Sync all block devices underlying some superblock | ||
527 | */ | ||
528 | void sync_blockdevs(void) | ||
529 | { | ||
530 | struct super_block *sb; | ||
531 | |||
532 | spin_lock(&sb_lock); | ||
533 | restart: | ||
534 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
535 | if (!sb->s_bdev) | ||
536 | continue; | ||
537 | sb->s_count++; | ||
538 | spin_unlock(&sb_lock); | ||
539 | down_read(&sb->s_umount); | ||
540 | if (sb->s_root) | ||
541 | sync_blockdev(sb->s_bdev); | ||
542 | up_read(&sb->s_umount); | ||
543 | spin_lock(&sb_lock); | ||
544 | if (__put_super_and_need_restart(sb)) | ||
545 | goto restart; | ||
546 | } | ||
547 | spin_unlock(&sb_lock); | ||
548 | } | ||
549 | #endif | ||
550 | 524 | ||
551 | /** | 525 | /** |
552 | * get_super - get the superblock of a device | 526 | * get_super - get the superblock of a device |
553 | * @bdev: device to get the superblock for | 527 | * @bdev: device to get the superblock for |
554 | * | 528 | * |
555 | * Scans the superblock list and finds the superblock of the file system | 529 | * Scans the superblock list and finds the superblock of the file system |
556 | * mounted on the device given. %NULL is returned if no match is found. | 530 | * mounted on the device given. %NULL is returned if no match is found. |
557 | */ | 531 | */ |
558 | 532 | ||
559 | struct super_block * get_super(struct block_device *bdev) | 533 | struct super_block * get_super(struct block_device *bdev) |
560 | { | 534 | { |
561 | struct super_block *sb; | 535 | struct super_block *sb; |
562 | 536 | ||
563 | if (!bdev) | 537 | if (!bdev) |
564 | return NULL; | 538 | return NULL; |
565 | 539 | ||
566 | spin_lock(&sb_lock); | 540 | spin_lock(&sb_lock); |
567 | rescan: | 541 | rescan: |
568 | list_for_each_entry(sb, &super_blocks, s_list) { | 542 | list_for_each_entry(sb, &super_blocks, s_list) { |
569 | if (sb->s_bdev == bdev) { | 543 | if (sb->s_bdev == bdev) { |
570 | sb->s_count++; | 544 | sb->s_count++; |
571 | spin_unlock(&sb_lock); | 545 | spin_unlock(&sb_lock); |
572 | down_read(&sb->s_umount); | 546 | down_read(&sb->s_umount); |
573 | if (sb->s_root) | 547 | if (sb->s_root) |
574 | return sb; | 548 | return sb; |
575 | up_read(&sb->s_umount); | 549 | up_read(&sb->s_umount); |
576 | /* restart only when sb is no longer on the list */ | 550 | /* restart only when sb is no longer on the list */ |
577 | spin_lock(&sb_lock); | 551 | spin_lock(&sb_lock); |
578 | if (__put_super_and_need_restart(sb)) | 552 | if (__put_super_and_need_restart(sb)) |
579 | goto rescan; | 553 | goto rescan; |
580 | } | 554 | } |
581 | } | 555 | } |
582 | spin_unlock(&sb_lock); | 556 | spin_unlock(&sb_lock); |
583 | return NULL; | 557 | return NULL; |
584 | } | 558 | } |
585 | 559 | ||
586 | EXPORT_SYMBOL(get_super); | 560 | EXPORT_SYMBOL(get_super); |
587 | 561 | ||
588 | struct super_block * user_get_super(dev_t dev) | 562 | struct super_block * user_get_super(dev_t dev) |
589 | { | 563 | { |
590 | struct super_block *sb; | 564 | struct super_block *sb; |
591 | 565 | ||
592 | spin_lock(&sb_lock); | 566 | spin_lock(&sb_lock); |
593 | rescan: | 567 | rescan: |
594 | list_for_each_entry(sb, &super_blocks, s_list) { | 568 | list_for_each_entry(sb, &super_blocks, s_list) { |
595 | if (sb->s_dev == dev) { | 569 | if (sb->s_dev == dev) { |
596 | sb->s_count++; | 570 | sb->s_count++; |
597 | spin_unlock(&sb_lock); | 571 | spin_unlock(&sb_lock); |
598 | down_read(&sb->s_umount); | 572 | down_read(&sb->s_umount); |
599 | if (sb->s_root) | 573 | if (sb->s_root) |
600 | return sb; | 574 | return sb; |
601 | up_read(&sb->s_umount); | 575 | up_read(&sb->s_umount); |
602 | /* restart only when sb is no longer on the list */ | 576 | /* restart only when sb is no longer on the list */ |
603 | spin_lock(&sb_lock); | 577 | spin_lock(&sb_lock); |
604 | if (__put_super_and_need_restart(sb)) | 578 | if (__put_super_and_need_restart(sb)) |
605 | goto rescan; | 579 | goto rescan; |
606 | } | 580 | } |
607 | } | 581 | } |
608 | spin_unlock(&sb_lock); | 582 | spin_unlock(&sb_lock); |
609 | return NULL; | 583 | return NULL; |
610 | } | 584 | } |
611 | 585 | ||
612 | SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) | 586 | SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) |
613 | { | 587 | { |
614 | struct super_block *s; | 588 | struct super_block *s; |
615 | struct ustat tmp; | 589 | struct ustat tmp; |
616 | struct kstatfs sbuf; | 590 | struct kstatfs sbuf; |
617 | int err = -EINVAL; | 591 | int err = -EINVAL; |
618 | 592 | ||
619 | s = user_get_super(new_decode_dev(dev)); | 593 | s = user_get_super(new_decode_dev(dev)); |
620 | if (s == NULL) | 594 | if (s == NULL) |
621 | goto out; | 595 | goto out; |
622 | err = vfs_statfs(s->s_root, &sbuf); | 596 | err = vfs_statfs(s->s_root, &sbuf); |
623 | drop_super(s); | 597 | drop_super(s); |
624 | if (err) | 598 | if (err) |
625 | goto out; | 599 | goto out; |
626 | 600 | ||
627 | memset(&tmp,0,sizeof(struct ustat)); | 601 | memset(&tmp,0,sizeof(struct ustat)); |
628 | tmp.f_tfree = sbuf.f_bfree; | 602 | tmp.f_tfree = sbuf.f_bfree; |
629 | tmp.f_tinode = sbuf.f_ffree; | 603 | tmp.f_tinode = sbuf.f_ffree; |
630 | 604 | ||
631 | err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; | 605 | err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; |
632 | out: | 606 | out: |
633 | return err; | 607 | return err; |
634 | } | 608 | } |
635 | 609 | ||
636 | /** | 610 | /** |
637 | * do_remount_sb - asks filesystem to change mount options. | 611 | * do_remount_sb - asks filesystem to change mount options. |
638 | * @sb: superblock in question | 612 | * @sb: superblock in question |
639 | * @flags: numeric part of options | 613 | * @flags: numeric part of options |
640 | * @data: the rest of options | 614 | * @data: the rest of options |
641 | * @force: whether or not to force the change | 615 | * @force: whether or not to force the change |
642 | * | 616 | * |
643 | * Alters the mount options of a mounted file system. | 617 | * Alters the mount options of a mounted file system. |
644 | */ | 618 | */ |
645 | int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | 619 | int do_remount_sb(struct super_block *sb, int flags, void *data, int force) |
646 | { | 620 | { |
647 | int retval; | 621 | int retval; |
648 | int remount_rw; | 622 | int remount_rw; |
649 | 623 | ||
650 | #ifdef CONFIG_BLOCK | 624 | #ifdef CONFIG_BLOCK |
651 | if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) | 625 | if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) |
652 | return -EACCES; | 626 | return -EACCES; |
653 | #endif | 627 | #endif |
654 | if (flags & MS_RDONLY) | 628 | if (flags & MS_RDONLY) |
655 | acct_auto_close(sb); | 629 | acct_auto_close(sb); |
656 | shrink_dcache_sb(sb); | 630 | shrink_dcache_sb(sb); |
657 | fsync_super(sb); | 631 | fsync_super(sb); |
658 | 632 | ||
659 | /* If we are remounting RDONLY and current sb is read/write, | 633 | /* If we are remounting RDONLY and current sb is read/write, |
660 | make sure there are no rw files opened */ | 634 | make sure there are no rw files opened */ |
661 | if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { | 635 | if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { |
662 | if (force) | 636 | if (force) |
663 | mark_files_ro(sb); | 637 | mark_files_ro(sb); |
664 | else if (!fs_may_remount_ro(sb)) | 638 | else if (!fs_may_remount_ro(sb)) |
665 | return -EBUSY; | 639 | return -EBUSY; |
666 | retval = vfs_dq_off(sb, 1); | 640 | retval = vfs_dq_off(sb, 1); |
667 | if (retval < 0 && retval != -ENOSYS) | 641 | if (retval < 0 && retval != -ENOSYS) |
668 | return -EBUSY; | 642 | return -EBUSY; |
669 | } | 643 | } |
670 | remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); | 644 | remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); |
671 | 645 | ||
672 | if (sb->s_op->remount_fs) { | 646 | if (sb->s_op->remount_fs) { |
673 | lock_super(sb); | 647 | lock_super(sb); |
674 | retval = sb->s_op->remount_fs(sb, &flags, data); | 648 | retval = sb->s_op->remount_fs(sb, &flags, data); |
675 | unlock_super(sb); | 649 | unlock_super(sb); |
676 | if (retval) | 650 | if (retval) |
677 | return retval; | 651 | return retval; |
678 | } | 652 | } |
679 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); | 653 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); |
680 | if (remount_rw) | 654 | if (remount_rw) |
681 | vfs_dq_quota_on_remount(sb); | 655 | vfs_dq_quota_on_remount(sb); |
682 | return 0; | 656 | return 0; |
683 | } | 657 | } |
684 | 658 | ||
685 | static void do_emergency_remount(struct work_struct *work) | 659 | static void do_emergency_remount(struct work_struct *work) |
686 | { | 660 | { |
687 | struct super_block *sb; | 661 | struct super_block *sb; |
688 | 662 | ||
689 | spin_lock(&sb_lock); | 663 | spin_lock(&sb_lock); |
690 | list_for_each_entry(sb, &super_blocks, s_list) { | 664 | list_for_each_entry(sb, &super_blocks, s_list) { |
691 | sb->s_count++; | 665 | sb->s_count++; |
692 | spin_unlock(&sb_lock); | 666 | spin_unlock(&sb_lock); |
693 | down_read(&sb->s_umount); | 667 | down_read(&sb->s_umount); |
694 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { | 668 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { |
695 | /* | 669 | /* |
696 | * ->remount_fs needs lock_kernel(). | 670 | * ->remount_fs needs lock_kernel(). |
697 | * | 671 | * |
698 | * What lock protects sb->s_flags?? | 672 | * What lock protects sb->s_flags?? |
699 | */ | 673 | */ |
700 | lock_kernel(); | 674 | lock_kernel(); |
701 | do_remount_sb(sb, MS_RDONLY, NULL, 1); | 675 | do_remount_sb(sb, MS_RDONLY, NULL, 1); |
702 | unlock_kernel(); | 676 | unlock_kernel(); |
703 | } | 677 | } |
704 | drop_super(sb); | 678 | drop_super(sb); |
705 | spin_lock(&sb_lock); | 679 | spin_lock(&sb_lock); |
706 | } | 680 | } |
707 | spin_unlock(&sb_lock); | 681 | spin_unlock(&sb_lock); |
708 | kfree(work); | 682 | kfree(work); |
709 | printk("Emergency Remount complete\n"); | 683 | printk("Emergency Remount complete\n"); |
710 | } | 684 | } |
711 | 685 | ||
712 | void emergency_remount(void) | 686 | void emergency_remount(void) |
713 | { | 687 | { |
714 | struct work_struct *work; | 688 | struct work_struct *work; |
715 | 689 | ||
716 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 690 | work = kmalloc(sizeof(*work), GFP_ATOMIC); |
717 | if (work) { | 691 | if (work) { |
718 | INIT_WORK(work, do_emergency_remount); | 692 | INIT_WORK(work, do_emergency_remount); |
719 | schedule_work(work); | 693 | schedule_work(work); |
720 | } | 694 | } |
721 | } | 695 | } |
722 | 696 | ||
723 | /* | 697 | /* |
724 | * Unnamed block devices are dummy devices used by virtual | 698 | * Unnamed block devices are dummy devices used by virtual |
725 | * filesystems which don't use real block-devices. -- jrs | 699 | * filesystems which don't use real block-devices. -- jrs |
726 | */ | 700 | */ |
727 | 701 | ||
728 | static DEFINE_IDA(unnamed_dev_ida); | 702 | static DEFINE_IDA(unnamed_dev_ida); |
729 | static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ | 703 | static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ |
730 | 704 | ||
731 | int set_anon_super(struct super_block *s, void *data) | 705 | int set_anon_super(struct super_block *s, void *data) |
732 | { | 706 | { |
733 | int dev; | 707 | int dev; |
734 | int error; | 708 | int error; |
735 | 709 | ||
736 | retry: | 710 | retry: |
737 | if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) | 711 | if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) |
738 | return -ENOMEM; | 712 | return -ENOMEM; |
739 | spin_lock(&unnamed_dev_lock); | 713 | spin_lock(&unnamed_dev_lock); |
740 | error = ida_get_new(&unnamed_dev_ida, &dev); | 714 | error = ida_get_new(&unnamed_dev_ida, &dev); |
741 | spin_unlock(&unnamed_dev_lock); | 715 | spin_unlock(&unnamed_dev_lock); |
742 | if (error == -EAGAIN) | 716 | if (error == -EAGAIN) |
743 | /* We raced and lost with another CPU. */ | 717 | /* We raced and lost with another CPU. */ |
744 | goto retry; | 718 | goto retry; |
745 | else if (error) | 719 | else if (error) |
746 | return -EAGAIN; | 720 | return -EAGAIN; |
747 | 721 | ||
748 | if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { | 722 | if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { |
749 | spin_lock(&unnamed_dev_lock); | 723 | spin_lock(&unnamed_dev_lock); |
750 | ida_remove(&unnamed_dev_ida, dev); | 724 | ida_remove(&unnamed_dev_ida, dev); |
751 | spin_unlock(&unnamed_dev_lock); | 725 | spin_unlock(&unnamed_dev_lock); |
752 | return -EMFILE; | 726 | return -EMFILE; |
753 | } | 727 | } |
754 | s->s_dev = MKDEV(0, dev & MINORMASK); | 728 | s->s_dev = MKDEV(0, dev & MINORMASK); |
755 | return 0; | 729 | return 0; |
756 | } | 730 | } |
757 | 731 | ||
758 | EXPORT_SYMBOL(set_anon_super); | 732 | EXPORT_SYMBOL(set_anon_super); |
759 | 733 | ||
760 | void kill_anon_super(struct super_block *sb) | 734 | void kill_anon_super(struct super_block *sb) |
761 | { | 735 | { |
762 | int slot = MINOR(sb->s_dev); | 736 | int slot = MINOR(sb->s_dev); |
763 | 737 | ||
764 | generic_shutdown_super(sb); | 738 | generic_shutdown_super(sb); |
765 | spin_lock(&unnamed_dev_lock); | 739 | spin_lock(&unnamed_dev_lock); |
766 | ida_remove(&unnamed_dev_ida, slot); | 740 | ida_remove(&unnamed_dev_ida, slot); |
767 | spin_unlock(&unnamed_dev_lock); | 741 | spin_unlock(&unnamed_dev_lock); |
768 | } | 742 | } |
769 | 743 | ||
770 | EXPORT_SYMBOL(kill_anon_super); | 744 | EXPORT_SYMBOL(kill_anon_super); |
771 | 745 | ||
772 | void kill_litter_super(struct super_block *sb) | 746 | void kill_litter_super(struct super_block *sb) |
773 | { | 747 | { |
774 | if (sb->s_root) | 748 | if (sb->s_root) |
775 | d_genocide(sb->s_root); | 749 | d_genocide(sb->s_root); |
776 | kill_anon_super(sb); | 750 | kill_anon_super(sb); |
777 | } | 751 | } |
778 | 752 | ||
779 | EXPORT_SYMBOL(kill_litter_super); | 753 | EXPORT_SYMBOL(kill_litter_super); |
780 | 754 | ||
781 | static int ns_test_super(struct super_block *sb, void *data) | 755 | static int ns_test_super(struct super_block *sb, void *data) |
782 | { | 756 | { |
783 | return sb->s_fs_info == data; | 757 | return sb->s_fs_info == data; |
784 | } | 758 | } |
785 | 759 | ||
786 | static int ns_set_super(struct super_block *sb, void *data) | 760 | static int ns_set_super(struct super_block *sb, void *data) |
787 | { | 761 | { |
788 | sb->s_fs_info = data; | 762 | sb->s_fs_info = data; |
789 | return set_anon_super(sb, NULL); | 763 | return set_anon_super(sb, NULL); |
790 | } | 764 | } |
791 | 765 | ||
792 | int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, | 766 | int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, |
793 | int (*fill_super)(struct super_block *, void *, int), | 767 | int (*fill_super)(struct super_block *, void *, int), |
794 | struct vfsmount *mnt) | 768 | struct vfsmount *mnt) |
795 | { | 769 | { |
796 | struct super_block *sb; | 770 | struct super_block *sb; |
797 | 771 | ||
798 | sb = sget(fs_type, ns_test_super, ns_set_super, data); | 772 | sb = sget(fs_type, ns_test_super, ns_set_super, data); |
799 | if (IS_ERR(sb)) | 773 | if (IS_ERR(sb)) |
800 | return PTR_ERR(sb); | 774 | return PTR_ERR(sb); |
801 | 775 | ||
802 | if (!sb->s_root) { | 776 | if (!sb->s_root) { |
803 | int err; | 777 | int err; |
804 | sb->s_flags = flags; | 778 | sb->s_flags = flags; |
805 | err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | 779 | err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); |
806 | if (err) { | 780 | if (err) { |
807 | deactivate_locked_super(sb); | 781 | deactivate_locked_super(sb); |
808 | return err; | 782 | return err; |
809 | } | 783 | } |
810 | 784 | ||
811 | sb->s_flags |= MS_ACTIVE; | 785 | sb->s_flags |= MS_ACTIVE; |
812 | } | 786 | } |
813 | 787 | ||
814 | simple_set_mnt(mnt, sb); | 788 | simple_set_mnt(mnt, sb); |
815 | return 0; | 789 | return 0; |
816 | } | 790 | } |
817 | 791 | ||
818 | EXPORT_SYMBOL(get_sb_ns); | 792 | EXPORT_SYMBOL(get_sb_ns); |
819 | 793 | ||
820 | #ifdef CONFIG_BLOCK | 794 | #ifdef CONFIG_BLOCK |
821 | static int set_bdev_super(struct super_block *s, void *data) | 795 | static int set_bdev_super(struct super_block *s, void *data) |
822 | { | 796 | { |
823 | s->s_bdev = data; | 797 | s->s_bdev = data; |
824 | s->s_dev = s->s_bdev->bd_dev; | 798 | s->s_dev = s->s_bdev->bd_dev; |
825 | return 0; | 799 | return 0; |
826 | } | 800 | } |
827 | 801 | ||
828 | static int test_bdev_super(struct super_block *s, void *data) | 802 | static int test_bdev_super(struct super_block *s, void *data) |
829 | { | 803 | { |
830 | return (void *)s->s_bdev == data; | 804 | return (void *)s->s_bdev == data; |
831 | } | 805 | } |
832 | 806 | ||
833 | int get_sb_bdev(struct file_system_type *fs_type, | 807 | int get_sb_bdev(struct file_system_type *fs_type, |
834 | int flags, const char *dev_name, void *data, | 808 | int flags, const char *dev_name, void *data, |
835 | int (*fill_super)(struct super_block *, void *, int), | 809 | int (*fill_super)(struct super_block *, void *, int), |
836 | struct vfsmount *mnt) | 810 | struct vfsmount *mnt) |
837 | { | 811 | { |
838 | struct block_device *bdev; | 812 | struct block_device *bdev; |
839 | struct super_block *s; | 813 | struct super_block *s; |
840 | fmode_t mode = FMODE_READ; | 814 | fmode_t mode = FMODE_READ; |
841 | int error = 0; | 815 | int error = 0; |
842 | 816 | ||
843 | if (!(flags & MS_RDONLY)) | 817 | if (!(flags & MS_RDONLY)) |
844 | mode |= FMODE_WRITE; | 818 | mode |= FMODE_WRITE; |
845 | 819 | ||
846 | bdev = open_bdev_exclusive(dev_name, mode, fs_type); | 820 | bdev = open_bdev_exclusive(dev_name, mode, fs_type); |
847 | if (IS_ERR(bdev)) | 821 | if (IS_ERR(bdev)) |
848 | return PTR_ERR(bdev); | 822 | return PTR_ERR(bdev); |
849 | 823 | ||
850 | /* | 824 | /* |
851 | * once the super is inserted into the list by sget, s_umount | 825 | * once the super is inserted into the list by sget, s_umount |
852 | * will protect the lockfs code from trying to start a snapshot | 826 | * will protect the lockfs code from trying to start a snapshot |
853 | * while we are mounting | 827 | * while we are mounting |
854 | */ | 828 | */ |
855 | down(&bdev->bd_mount_sem); | 829 | down(&bdev->bd_mount_sem); |
856 | s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); | 830 | s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); |
857 | up(&bdev->bd_mount_sem); | 831 | up(&bdev->bd_mount_sem); |
858 | if (IS_ERR(s)) | 832 | if (IS_ERR(s)) |
859 | goto error_s; | 833 | goto error_s; |
860 | 834 | ||
861 | if (s->s_root) { | 835 | if (s->s_root) { |
862 | if ((flags ^ s->s_flags) & MS_RDONLY) { | 836 | if ((flags ^ s->s_flags) & MS_RDONLY) { |
863 | deactivate_locked_super(s); | 837 | deactivate_locked_super(s); |
864 | error = -EBUSY; | 838 | error = -EBUSY; |
865 | goto error_bdev; | 839 | goto error_bdev; |
866 | } | 840 | } |
867 | 841 | ||
868 | close_bdev_exclusive(bdev, mode); | 842 | close_bdev_exclusive(bdev, mode); |
869 | } else { | 843 | } else { |
870 | char b[BDEVNAME_SIZE]; | 844 | char b[BDEVNAME_SIZE]; |
871 | 845 | ||
872 | s->s_flags = flags; | 846 | s->s_flags = flags; |
873 | s->s_mode = mode; | 847 | s->s_mode = mode; |
874 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 848 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
875 | sb_set_blocksize(s, block_size(bdev)); | 849 | sb_set_blocksize(s, block_size(bdev)); |
876 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); | 850 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); |
877 | if (error) { | 851 | if (error) { |
878 | deactivate_locked_super(s); | 852 | deactivate_locked_super(s); |
879 | goto error; | 853 | goto error; |
880 | } | 854 | } |
881 | 855 | ||
882 | s->s_flags |= MS_ACTIVE; | 856 | s->s_flags |= MS_ACTIVE; |
883 | bdev->bd_super = s; | 857 | bdev->bd_super = s; |
884 | } | 858 | } |
885 | 859 | ||
886 | simple_set_mnt(mnt, s); | 860 | simple_set_mnt(mnt, s); |
887 | return 0; | 861 | return 0; |
888 | 862 | ||
889 | error_s: | 863 | error_s: |
890 | error = PTR_ERR(s); | 864 | error = PTR_ERR(s); |
891 | error_bdev: | 865 | error_bdev: |
892 | close_bdev_exclusive(bdev, mode); | 866 | close_bdev_exclusive(bdev, mode); |
893 | error: | 867 | error: |
894 | return error; | 868 | return error; |
895 | } | 869 | } |
896 | 870 | ||
897 | EXPORT_SYMBOL(get_sb_bdev); | 871 | EXPORT_SYMBOL(get_sb_bdev); |
898 | 872 | ||
899 | void kill_block_super(struct super_block *sb) | 873 | void kill_block_super(struct super_block *sb) |
900 | { | 874 | { |
901 | struct block_device *bdev = sb->s_bdev; | 875 | struct block_device *bdev = sb->s_bdev; |
902 | fmode_t mode = sb->s_mode; | 876 | fmode_t mode = sb->s_mode; |
903 | 877 | ||
904 | bdev->bd_super = NULL; | 878 | bdev->bd_super = NULL; |
905 | generic_shutdown_super(sb); | 879 | generic_shutdown_super(sb); |
906 | sync_blockdev(bdev); | 880 | sync_blockdev(bdev); |
907 | close_bdev_exclusive(bdev, mode); | 881 | close_bdev_exclusive(bdev, mode); |
908 | } | 882 | } |
909 | 883 | ||
910 | EXPORT_SYMBOL(kill_block_super); | 884 | EXPORT_SYMBOL(kill_block_super); |
911 | #endif | 885 | #endif |
912 | 886 | ||
913 | int get_sb_nodev(struct file_system_type *fs_type, | 887 | int get_sb_nodev(struct file_system_type *fs_type, |
914 | int flags, void *data, | 888 | int flags, void *data, |
915 | int (*fill_super)(struct super_block *, void *, int), | 889 | int (*fill_super)(struct super_block *, void *, int), |
916 | struct vfsmount *mnt) | 890 | struct vfsmount *mnt) |
917 | { | 891 | { |
918 | int error; | 892 | int error; |
919 | struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); | 893 | struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); |
920 | 894 | ||
921 | if (IS_ERR(s)) | 895 | if (IS_ERR(s)) |
922 | return PTR_ERR(s); | 896 | return PTR_ERR(s); |
923 | 897 | ||
924 | s->s_flags = flags; | 898 | s->s_flags = flags; |
925 | 899 | ||
926 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); | 900 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); |
927 | if (error) { | 901 | if (error) { |
928 | deactivate_locked_super(s); | 902 | deactivate_locked_super(s); |
929 | return error; | 903 | return error; |
930 | } | 904 | } |
931 | s->s_flags |= MS_ACTIVE; | 905 | s->s_flags |= MS_ACTIVE; |
932 | simple_set_mnt(mnt, s); | 906 | simple_set_mnt(mnt, s); |
933 | return 0; | 907 | return 0; |
934 | } | 908 | } |
935 | 909 | ||
936 | EXPORT_SYMBOL(get_sb_nodev); | 910 | EXPORT_SYMBOL(get_sb_nodev); |
937 | 911 | ||
938 | static int compare_single(struct super_block *s, void *p) | 912 | static int compare_single(struct super_block *s, void *p) |
939 | { | 913 | { |
940 | return 1; | 914 | return 1; |
941 | } | 915 | } |
942 | 916 | ||
943 | int get_sb_single(struct file_system_type *fs_type, | 917 | int get_sb_single(struct file_system_type *fs_type, |
944 | int flags, void *data, | 918 | int flags, void *data, |
945 | int (*fill_super)(struct super_block *, void *, int), | 919 | int (*fill_super)(struct super_block *, void *, int), |
946 | struct vfsmount *mnt) | 920 | struct vfsmount *mnt) |
947 | { | 921 | { |
948 | struct super_block *s; | 922 | struct super_block *s; |
949 | int error; | 923 | int error; |
950 | 924 | ||
951 | s = sget(fs_type, compare_single, set_anon_super, NULL); | 925 | s = sget(fs_type, compare_single, set_anon_super, NULL); |
952 | if (IS_ERR(s)) | 926 | if (IS_ERR(s)) |
953 | return PTR_ERR(s); | 927 | return PTR_ERR(s); |
954 | if (!s->s_root) { | 928 | if (!s->s_root) { |
955 | s->s_flags = flags; | 929 | s->s_flags = flags; |
956 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); | 930 | error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); |
957 | if (error) { | 931 | if (error) { |
958 | deactivate_locked_super(s); | 932 | deactivate_locked_super(s); |
959 | return error; | 933 | return error; |
960 | } | 934 | } |
961 | s->s_flags |= MS_ACTIVE; | 935 | s->s_flags |= MS_ACTIVE; |
962 | } | 936 | } |
963 | do_remount_sb(s, flags, data, 0); | 937 | do_remount_sb(s, flags, data, 0); |
964 | simple_set_mnt(mnt, s); | 938 | simple_set_mnt(mnt, s); |
965 | return 0; | 939 | return 0; |
966 | } | 940 | } |
967 | 941 | ||
968 | EXPORT_SYMBOL(get_sb_single); | 942 | EXPORT_SYMBOL(get_sb_single); |
969 | 943 | ||
970 | struct vfsmount * | 944 | struct vfsmount * |
971 | vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) | 945 | vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) |
972 | { | 946 | { |
973 | struct vfsmount *mnt; | 947 | struct vfsmount *mnt; |
974 | char *secdata = NULL; | 948 | char *secdata = NULL; |
975 | int error; | 949 | int error; |
976 | 950 | ||
977 | if (!type) | 951 | if (!type) |
978 | return ERR_PTR(-ENODEV); | 952 | return ERR_PTR(-ENODEV); |
979 | 953 | ||
980 | error = -ENOMEM; | 954 | error = -ENOMEM; |
981 | mnt = alloc_vfsmnt(name); | 955 | mnt = alloc_vfsmnt(name); |
982 | if (!mnt) | 956 | if (!mnt) |
983 | goto out; | 957 | goto out; |
984 | 958 | ||
985 | if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { | 959 | if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { |
986 | secdata = alloc_secdata(); | 960 | secdata = alloc_secdata(); |
987 | if (!secdata) | 961 | if (!secdata) |
988 | goto out_mnt; | 962 | goto out_mnt; |
989 | 963 | ||
990 | error = security_sb_copy_data(data, secdata); | 964 | error = security_sb_copy_data(data, secdata); |
991 | if (error) | 965 | if (error) |
992 | goto out_free_secdata; | 966 | goto out_free_secdata; |
993 | } | 967 | } |
994 | 968 | ||
995 | error = type->get_sb(type, flags, name, data, mnt); | 969 | error = type->get_sb(type, flags, name, data, mnt); |
996 | if (error < 0) | 970 | if (error < 0) |
997 | goto out_free_secdata; | 971 | goto out_free_secdata; |
998 | BUG_ON(!mnt->mnt_sb); | 972 | BUG_ON(!mnt->mnt_sb); |
999 | 973 | ||
1000 | error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); | 974 | error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); |
1001 | if (error) | 975 | if (error) |
1002 | goto out_sb; | 976 | goto out_sb; |
1003 | 977 | ||
1004 | mnt->mnt_mountpoint = mnt->mnt_root; | 978 | mnt->mnt_mountpoint = mnt->mnt_root; |
1005 | mnt->mnt_parent = mnt; | 979 | mnt->mnt_parent = mnt; |
1006 | up_write(&mnt->mnt_sb->s_umount); | 980 | up_write(&mnt->mnt_sb->s_umount); |
1007 | free_secdata(secdata); | 981 | free_secdata(secdata); |
1008 | return mnt; | 982 | return mnt; |
1009 | out_sb: | 983 | out_sb: |
1010 | dput(mnt->mnt_root); | 984 | dput(mnt->mnt_root); |
1011 | deactivate_locked_super(mnt->mnt_sb); | 985 | deactivate_locked_super(mnt->mnt_sb); |
1012 | out_free_secdata: | 986 | out_free_secdata: |
1013 | free_secdata(secdata); | 987 | free_secdata(secdata); |
1014 | out_mnt: | 988 | out_mnt: |
1015 | free_vfsmnt(mnt); | 989 | free_vfsmnt(mnt); |
1016 | out: | 990 | out: |
1017 | return ERR_PTR(error); | 991 | return ERR_PTR(error); |
1018 | } | 992 | } |
1019 | 993 | ||
1020 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 994 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
1021 | 995 | ||
1022 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | 996 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) |
1023 | { | 997 | { |
1024 | int err; | 998 | int err; |
1025 | const char *subtype = strchr(fstype, '.'); | 999 | const char *subtype = strchr(fstype, '.'); |
1026 | if (subtype) { | 1000 | if (subtype) { |
1027 | subtype++; | 1001 | subtype++; |
1028 | err = -EINVAL; | 1002 | err = -EINVAL; |
1029 | if (!subtype[0]) | 1003 | if (!subtype[0]) |
1030 | goto err; | 1004 | goto err; |
1031 | } else | 1005 | } else |
1032 | subtype = ""; | 1006 | subtype = ""; |
1033 | 1007 | ||
1034 | mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); | 1008 | mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); |
1035 | err = -ENOMEM; | 1009 | err = -ENOMEM; |
1036 | if (!mnt->mnt_sb->s_subtype) | 1010 | if (!mnt->mnt_sb->s_subtype) |
1037 | goto err; | 1011 | goto err; |
1038 | return mnt; | 1012 | return mnt; |
1039 | 1013 | ||
1040 | err: | 1014 | err: |
1041 | mntput(mnt); | 1015 | mntput(mnt); |
1042 | return ERR_PTR(err); | 1016 | return ERR_PTR(err); |
1043 | } | 1017 | } |
1044 | 1018 | ||
1045 | struct vfsmount * | 1019 | struct vfsmount * |
1046 | do_kern_mount(const char *fstype, int flags, const char *name, void *data) | 1020 | do_kern_mount(const char *fstype, int flags, const char *name, void *data) |
1047 | { | 1021 | { |
1048 | struct file_system_type *type = get_fs_type(fstype); | 1022 | struct file_system_type *type = get_fs_type(fstype); |
1049 | struct vfsmount *mnt; | 1023 | struct vfsmount *mnt; |
1050 | if (!type) | 1024 | if (!type) |
1051 | return ERR_PTR(-ENODEV); | 1025 | return ERR_PTR(-ENODEV); |
1052 | mnt = vfs_kern_mount(type, flags, name, data); | 1026 | mnt = vfs_kern_mount(type, flags, name, data); |
1053 | if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && | 1027 | if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && |
1054 | !mnt->mnt_sb->s_subtype) | 1028 | !mnt->mnt_sb->s_subtype) |
1055 | mnt = fs_set_subtype(mnt, fstype); | 1029 | mnt = fs_set_subtype(mnt, fstype); |
1056 | put_filesystem(type); | 1030 | put_filesystem(type); |
1057 | return mnt; | 1031 | return mnt; |
1058 | } | 1032 | } |
1059 | EXPORT_SYMBOL_GPL(do_kern_mount); | 1033 | EXPORT_SYMBOL_GPL(do_kern_mount); |
1060 | 1034 | ||
1061 | struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) | 1035 | struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) |
fs/sync.c
1 | /* | 1 | /* |
2 | * High-level sync()-related operations | 2 | * High-level sync()-related operations |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
6 | #include <linux/file.h> | 6 | #include <linux/file.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/writeback.h> | 10 | #include <linux/writeback.h> |
11 | #include <linux/syscalls.h> | 11 | #include <linux/syscalls.h> |
12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/quotaops.h> | 14 | #include <linux/quotaops.h> |
15 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
16 | #include "internal.h" | 16 | #include "internal.h" |
17 | 17 | ||
18 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ | 18 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ |
19 | SYNC_FILE_RANGE_WAIT_AFTER) | 19 | SYNC_FILE_RANGE_WAIT_AFTER) |
20 | 20 | ||
21 | /* | 21 | SYSCALL_DEFINE0(sync) |
22 | * sync everything. Start out by waking pdflush, because that writes back | ||
23 | * all queues in parallel. | ||
24 | */ | ||
25 | static void do_sync(unsigned long wait) | ||
26 | { | 22 | { |
27 | wakeup_pdflush(0); | 23 | sync_filesystems(0); |
28 | sync_inodes(0); /* All mappings, inodes and their blockdevs */ | 24 | sync_filesystems(1); |
29 | vfs_dq_sync(NULL); | ||
30 | sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ | ||
31 | sync_supers(); /* Write the superblocks */ | ||
32 | sync_filesystems(0); /* Start syncing the filesystems */ | ||
33 | sync_filesystems(wait); /* Waitingly sync the filesystems */ | ||
34 | sync_blockdevs(); | ||
35 | if (!wait) | ||
36 | printk("Emergency Sync complete\n"); | ||
37 | if (unlikely(laptop_mode)) | 25 | if (unlikely(laptop_mode)) |
38 | laptop_sync_completion(); | 26 | laptop_sync_completion(); |
39 | } | ||
40 | |||
41 | SYSCALL_DEFINE0(sync) | ||
42 | { | ||
43 | do_sync(1); | ||
44 | return 0; | 27 | return 0; |
45 | } | 28 | } |
46 | 29 | ||
47 | static void do_sync_work(struct work_struct *work) | 30 | static void do_sync_work(struct work_struct *work) |
48 | { | 31 | { |
49 | do_sync(0); | 32 | /* |
33 | * Sync twice to reduce the possibility we skipped some inodes / pages | ||
34 | * because they were temporarily locked | ||
35 | */ | ||
36 | sync_filesystems(0); | ||
37 | sync_filesystems(0); | ||
38 | printk("Emergency Sync complete\n"); | ||
50 | kfree(work); | 39 | kfree(work); |
51 | } | 40 | } |
52 | 41 | ||
53 | void emergency_sync(void) | 42 | void emergency_sync(void) |
54 | { | 43 | { |
55 | struct work_struct *work; | 44 | struct work_struct *work; |
56 | 45 | ||
57 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 46 | work = kmalloc(sizeof(*work), GFP_ATOMIC); |
58 | if (work) { | 47 | if (work) { |
59 | INIT_WORK(work, do_sync_work); | 48 | INIT_WORK(work, do_sync_work); |
60 | schedule_work(work); | 49 | schedule_work(work); |
61 | } | 50 | } |
62 | } | 51 | } |
63 | 52 | ||
64 | /* | 53 | /* |
65 | * Generic function to fsync a file. | 54 | * Generic function to fsync a file. |
66 | * | 55 | * |
67 | * filp may be NULL if called via the msync of a vma. | 56 | * filp may be NULL if called via the msync of a vma. |
68 | */ | 57 | */ |
69 | int file_fsync(struct file *filp, struct dentry *dentry, int datasync) | 58 | int file_fsync(struct file *filp, struct dentry *dentry, int datasync) |
70 | { | 59 | { |
71 | struct inode * inode = dentry->d_inode; | 60 | struct inode * inode = dentry->d_inode; |
72 | struct super_block * sb; | 61 | struct super_block * sb; |
73 | int ret, err; | 62 | int ret, err; |
74 | 63 | ||
75 | /* sync the inode to buffers */ | 64 | /* sync the inode to buffers */ |
76 | ret = write_inode_now(inode, 0); | 65 | ret = write_inode_now(inode, 0); |
77 | 66 | ||
78 | /* sync the superblock to buffers */ | 67 | /* sync the superblock to buffers */ |
79 | sb = inode->i_sb; | 68 | sb = inode->i_sb; |
80 | lock_super(sb); | 69 | lock_super(sb); |
81 | if (sb->s_dirt && sb->s_op->write_super) | 70 | if (sb->s_dirt && sb->s_op->write_super) |
82 | sb->s_op->write_super(sb); | 71 | sb->s_op->write_super(sb); |
83 | unlock_super(sb); | 72 | unlock_super(sb); |
84 | 73 | ||
85 | /* .. finally sync the buffers to disk */ | 74 | /* .. finally sync the buffers to disk */ |
86 | err = sync_blockdev(sb->s_bdev); | 75 | err = sync_blockdev(sb->s_bdev); |
87 | if (!ret) | 76 | if (!ret) |
88 | ret = err; | 77 | ret = err; |
89 | return ret; | 78 | return ret; |
90 | } | 79 | } |
91 | 80 | ||
92 | /** | 81 | /** |
93 | * vfs_fsync - perform a fsync or fdatasync on a file | 82 | * vfs_fsync - perform a fsync or fdatasync on a file |
94 | * @file: file to sync | 83 | * @file: file to sync |
95 | * @dentry: dentry of @file | 84 | * @dentry: dentry of @file |
96 | * @data: only perform a fdatasync operation | 85 | * @data: only perform a fdatasync operation |
97 | * | 86 | * |
98 | * Write back data and metadata for @file to disk. If @datasync is | 87 | * Write back data and metadata for @file to disk. If @datasync is |
99 | * set only metadata needed to access modified file data is written. | 88 | * set only metadata needed to access modified file data is written. |
100 | * | 89 | * |
101 | * In case this function is called from nfsd @file may be %NULL and | 90 | * In case this function is called from nfsd @file may be %NULL and |
102 | * only @dentry is set. This can only happen when the filesystem | 91 | * only @dentry is set. This can only happen when the filesystem |
103 | * implements the export_operations API. | 92 | * implements the export_operations API. |
104 | */ | 93 | */ |
105 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 94 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) |
106 | { | 95 | { |
107 | const struct file_operations *fop; | 96 | const struct file_operations *fop; |
108 | struct address_space *mapping; | 97 | struct address_space *mapping; |
109 | int err, ret; | 98 | int err, ret; |
110 | 99 | ||
111 | /* | 100 | /* |
112 | * Get mapping and operations from the file in case we have | 101 | * Get mapping and operations from the file in case we have |
113 | * as file, or get the default values for them in case we | 102 | * as file, or get the default values for them in case we |
114 | * don't have a struct file available. Damn nfsd.. | 103 | * don't have a struct file available. Damn nfsd.. |
115 | */ | 104 | */ |
116 | if (file) { | 105 | if (file) { |
117 | mapping = file->f_mapping; | 106 | mapping = file->f_mapping; |
118 | fop = file->f_op; | 107 | fop = file->f_op; |
119 | } else { | 108 | } else { |
120 | mapping = dentry->d_inode->i_mapping; | 109 | mapping = dentry->d_inode->i_mapping; |
121 | fop = dentry->d_inode->i_fop; | 110 | fop = dentry->d_inode->i_fop; |
122 | } | 111 | } |
123 | 112 | ||
124 | if (!fop || !fop->fsync) { | 113 | if (!fop || !fop->fsync) { |
125 | ret = -EINVAL; | 114 | ret = -EINVAL; |
126 | goto out; | 115 | goto out; |
127 | } | 116 | } |
128 | 117 | ||
129 | ret = filemap_fdatawrite(mapping); | 118 | ret = filemap_fdatawrite(mapping); |
130 | 119 | ||
131 | /* | 120 | /* |
132 | * We need to protect against concurrent writers, which could cause | 121 | * We need to protect against concurrent writers, which could cause |
133 | * livelocks in fsync_buffers_list(). | 122 | * livelocks in fsync_buffers_list(). |
134 | */ | 123 | */ |
135 | mutex_lock(&mapping->host->i_mutex); | 124 | mutex_lock(&mapping->host->i_mutex); |
136 | err = fop->fsync(file, dentry, datasync); | 125 | err = fop->fsync(file, dentry, datasync); |
137 | if (!ret) | 126 | if (!ret) |
138 | ret = err; | 127 | ret = err; |
139 | mutex_unlock(&mapping->host->i_mutex); | 128 | mutex_unlock(&mapping->host->i_mutex); |
140 | err = filemap_fdatawait(mapping); | 129 | err = filemap_fdatawait(mapping); |
141 | if (!ret) | 130 | if (!ret) |
142 | ret = err; | 131 | ret = err; |
143 | out: | 132 | out: |
144 | return ret; | 133 | return ret; |
145 | } | 134 | } |
146 | EXPORT_SYMBOL(vfs_fsync); | 135 | EXPORT_SYMBOL(vfs_fsync); |
147 | 136 | ||
148 | static int do_fsync(unsigned int fd, int datasync) | 137 | static int do_fsync(unsigned int fd, int datasync) |
149 | { | 138 | { |
150 | struct file *file; | 139 | struct file *file; |
151 | int ret = -EBADF; | 140 | int ret = -EBADF; |
152 | 141 | ||
153 | file = fget(fd); | 142 | file = fget(fd); |
154 | if (file) { | 143 | if (file) { |
155 | ret = vfs_fsync(file, file->f_path.dentry, datasync); | 144 | ret = vfs_fsync(file, file->f_path.dentry, datasync); |
156 | fput(file); | 145 | fput(file); |
157 | } | 146 | } |
158 | return ret; | 147 | return ret; |
159 | } | 148 | } |
160 | 149 | ||
161 | SYSCALL_DEFINE1(fsync, unsigned int, fd) | 150 | SYSCALL_DEFINE1(fsync, unsigned int, fd) |
162 | { | 151 | { |
163 | return do_fsync(fd, 0); | 152 | return do_fsync(fd, 0); |
164 | } | 153 | } |
165 | 154 | ||
166 | SYSCALL_DEFINE1(fdatasync, unsigned int, fd) | 155 | SYSCALL_DEFINE1(fdatasync, unsigned int, fd) |
167 | { | 156 | { |
168 | return do_fsync(fd, 1); | 157 | return do_fsync(fd, 1); |
169 | } | 158 | } |
170 | 159 | ||
171 | /* | 160 | /* |
172 | * sys_sync_file_range() permits finely controlled syncing over a segment of | 161 | * sys_sync_file_range() permits finely controlled syncing over a segment of |
173 | * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is | 162 | * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is |
174 | * zero then sys_sync_file_range() will operate from offset out to EOF. | 163 | * zero then sys_sync_file_range() will operate from offset out to EOF. |
175 | * | 164 | * |
176 | * The flag bits are: | 165 | * The flag bits are: |
177 | * | 166 | * |
178 | * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range | 167 | * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range |
179 | * before performing the write. | 168 | * before performing the write. |
180 | * | 169 | * |
181 | * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the | 170 | * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the |
182 | * range which are not presently under writeback. Note that this may block for | 171 | * range which are not presently under writeback. Note that this may block for |
183 | * significant periods due to exhaustion of disk request structures. | 172 | * significant periods due to exhaustion of disk request structures. |
184 | * | 173 | * |
185 | * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range | 174 | * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range |
186 | * after performing the write. | 175 | * after performing the write. |
187 | * | 176 | * |
188 | * Useful combinations of the flag bits are: | 177 | * Useful combinations of the flag bits are: |
189 | * | 178 | * |
190 | * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages | 179 | * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages |
191 | * in the range which were dirty on entry to sys_sync_file_range() are placed | 180 | * in the range which were dirty on entry to sys_sync_file_range() are placed |
192 | * under writeout. This is a start-write-for-data-integrity operation. | 181 | * under writeout. This is a start-write-for-data-integrity operation. |
193 | * | 182 | * |
194 | * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which | 183 | * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which |
195 | * are not presently under writeout. This is an asynchronous flush-to-disk | 184 | * are not presently under writeout. This is an asynchronous flush-to-disk |
196 | * operation. Not suitable for data integrity operations. | 185 | * operation. Not suitable for data integrity operations. |
197 | * | 186 | * |
198 | * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for | 187 | * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for |
199 | * completion of writeout of all pages in the range. This will be used after an | 188 | * completion of writeout of all pages in the range. This will be used after an |
200 | * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait | 189 | * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait |
201 | * for that operation to complete and to return the result. | 190 | * for that operation to complete and to return the result. |
202 | * | 191 | * |
203 | * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER: | 192 | * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER: |
204 | * a traditional sync() operation. This is a write-for-data-integrity operation | 193 | * a traditional sync() operation. This is a write-for-data-integrity operation |
205 | * which will ensure that all pages in the range which were dirty on entry to | 194 | * which will ensure that all pages in the range which were dirty on entry to |
206 | * sys_sync_file_range() are committed to disk. | 195 | * sys_sync_file_range() are committed to disk. |
207 | * | 196 | * |
208 | * | 197 | * |
209 | * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any | 198 | * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any |
210 | * I/O errors or ENOSPC conditions and will return those to the caller, after | 199 | * I/O errors or ENOSPC conditions and will return those to the caller, after |
211 | * clearing the EIO and ENOSPC flags in the address_space. | 200 | * clearing the EIO and ENOSPC flags in the address_space. |
212 | * | 201 | * |
213 | * It should be noted that none of these operations write out the file's | 202 | * It should be noted that none of these operations write out the file's |
214 | * metadata. So unless the application is strictly performing overwrites of | 203 | * metadata. So unless the application is strictly performing overwrites of |
215 | * already-instantiated disk blocks, there are no guarantees here that the data | 204 | * already-instantiated disk blocks, there are no guarantees here that the data |
216 | * will be available after a crash. | 205 | * will be available after a crash. |
217 | */ | 206 | */ |
218 | SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | 207 | SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, |
219 | unsigned int flags) | 208 | unsigned int flags) |
220 | { | 209 | { |
221 | int ret; | 210 | int ret; |
222 | struct file *file; | 211 | struct file *file; |
223 | loff_t endbyte; /* inclusive */ | 212 | loff_t endbyte; /* inclusive */ |
224 | int fput_needed; | 213 | int fput_needed; |
225 | umode_t i_mode; | 214 | umode_t i_mode; |
226 | 215 | ||
227 | ret = -EINVAL; | 216 | ret = -EINVAL; |
228 | if (flags & ~VALID_FLAGS) | 217 | if (flags & ~VALID_FLAGS) |
229 | goto out; | 218 | goto out; |
230 | 219 | ||
231 | endbyte = offset + nbytes; | 220 | endbyte = offset + nbytes; |
232 | 221 | ||
233 | if ((s64)offset < 0) | 222 | if ((s64)offset < 0) |
234 | goto out; | 223 | goto out; |
235 | if ((s64)endbyte < 0) | 224 | if ((s64)endbyte < 0) |
236 | goto out; | 225 | goto out; |
237 | if (endbyte < offset) | 226 | if (endbyte < offset) |
238 | goto out; | 227 | goto out; |
239 | 228 | ||
240 | if (sizeof(pgoff_t) == 4) { | 229 | if (sizeof(pgoff_t) == 4) { |
241 | if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { | 230 | if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { |
242 | /* | 231 | /* |
243 | * The range starts outside a 32 bit machine's | 232 | * The range starts outside a 32 bit machine's |
244 | * pagecache addressing capabilities. Let it "succeed" | 233 | * pagecache addressing capabilities. Let it "succeed" |
245 | */ | 234 | */ |
246 | ret = 0; | 235 | ret = 0; |
247 | goto out; | 236 | goto out; |
248 | } | 237 | } |
249 | if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { | 238 | if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) { |
250 | /* | 239 | /* |
251 | * Out to EOF | 240 | * Out to EOF |
252 | */ | 241 | */ |
253 | nbytes = 0; | 242 | nbytes = 0; |
254 | } | 243 | } |
255 | } | 244 | } |
256 | 245 | ||
257 | if (nbytes == 0) | 246 | if (nbytes == 0) |
258 | endbyte = LLONG_MAX; | 247 | endbyte = LLONG_MAX; |
259 | else | 248 | else |
260 | endbyte--; /* inclusive */ | 249 | endbyte--; /* inclusive */ |
261 | 250 | ||
262 | ret = -EBADF; | 251 | ret = -EBADF; |
263 | file = fget_light(fd, &fput_needed); | 252 | file = fget_light(fd, &fput_needed); |
264 | if (!file) | 253 | if (!file) |
265 | goto out; | 254 | goto out; |
266 | 255 | ||
267 | i_mode = file->f_path.dentry->d_inode->i_mode; | 256 | i_mode = file->f_path.dentry->d_inode->i_mode; |
268 | ret = -ESPIPE; | 257 | ret = -ESPIPE; |
269 | if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && | 258 | if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && |
270 | !S_ISLNK(i_mode)) | 259 | !S_ISLNK(i_mode)) |
271 | goto out_put; | 260 | goto out_put; |
272 | 261 | ||
273 | ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); | 262 | ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); |
274 | out_put: | 263 | out_put: |
275 | fput_light(file, fput_needed); | 264 | fput_light(file, fput_needed); |
276 | out: | 265 | out: |
277 | return ret; | 266 | return ret; |
278 | } | 267 | } |
279 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS | 268 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS |
280 | asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, | 269 | asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, |
281 | long flags) | 270 | long flags) |
282 | { | 271 | { |
283 | return SYSC_sync_file_range((int) fd, offset, nbytes, | 272 | return SYSC_sync_file_range((int) fd, offset, nbytes, |
284 | (unsigned int) flags); | 273 | (unsigned int) flags); |
285 | } | 274 | } |
286 | SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); | 275 | SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); |
287 | #endif | 276 | #endif |
288 | 277 | ||
289 | /* It would be nice if people remember that not all the world's an i386 | 278 | /* It would be nice if people remember that not all the world's an i386 |
290 | when they introduce new system calls */ | 279 | when they introduce new system calls */ |
291 | SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, | 280 | SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, |
292 | loff_t offset, loff_t nbytes) | 281 | loff_t offset, loff_t nbytes) |
293 | { | 282 | { |
294 | return sys_sync_file_range(fd, offset, nbytes, flags); | 283 | return sys_sync_file_range(fd, offset, nbytes, flags); |
295 | } | 284 | } |
296 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS | 285 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS |
297 | asmlinkage long SyS_sync_file_range2(long fd, long flags, | 286 | asmlinkage long SyS_sync_file_range2(long fd, long flags, |
298 | loff_t offset, loff_t nbytes) | 287 | loff_t offset, loff_t nbytes) |
299 | { | 288 | { |
300 | return SYSC_sync_file_range2((int) fd, (unsigned int) flags, | 289 | return SYSC_sync_file_range2((int) fd, (unsigned int) flags, |
301 | offset, nbytes); | 290 | offset, nbytes); |
302 | } | 291 | } |
303 | SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); | 292 | SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); |
304 | #endif | 293 | #endif |
305 | 294 | ||
306 | /* | 295 | /* |
307 | * `endbyte' is inclusive | 296 | * `endbyte' is inclusive |
308 | */ | 297 | */ |
309 | int do_sync_mapping_range(struct address_space *mapping, loff_t offset, | 298 | int do_sync_mapping_range(struct address_space *mapping, loff_t offset, |
310 | loff_t endbyte, unsigned int flags) | 299 | loff_t endbyte, unsigned int flags) |
311 | { | 300 | { |
312 | int ret; | 301 | int ret; |
313 | 302 | ||
314 | if (!mapping) { | 303 | if (!mapping) { |
315 | ret = -EINVAL; | 304 | ret = -EINVAL; |
316 | goto out; | 305 | goto out; |
317 | } | 306 | } |
318 | 307 | ||
319 | ret = 0; | 308 | ret = 0; |
320 | if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { | 309 | if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { |
321 | ret = wait_on_page_writeback_range(mapping, | 310 | ret = wait_on_page_writeback_range(mapping, |
322 | offset >> PAGE_CACHE_SHIFT, | 311 | offset >> PAGE_CACHE_SHIFT, |
323 | endbyte >> PAGE_CACHE_SHIFT); | 312 | endbyte >> PAGE_CACHE_SHIFT); |
324 | if (ret < 0) | 313 | if (ret < 0) |
325 | goto out; | 314 | goto out; |
326 | } | 315 | } |
327 | 316 | ||
328 | if (flags & SYNC_FILE_RANGE_WRITE) { | 317 | if (flags & SYNC_FILE_RANGE_WRITE) { |
329 | ret = __filemap_fdatawrite_range(mapping, offset, endbyte, | 318 | ret = __filemap_fdatawrite_range(mapping, offset, endbyte, |
330 | WB_SYNC_ALL); | 319 | WB_SYNC_ALL); |
331 | if (ret < 0) | 320 | if (ret < 0) |
332 | goto out; | 321 | goto out; |
333 | } | 322 | } |
334 | 323 | ||
335 | if (flags & SYNC_FILE_RANGE_WAIT_AFTER) { | 324 | if (flags & SYNC_FILE_RANGE_WAIT_AFTER) { |
336 | ret = wait_on_page_writeback_range(mapping, | 325 | ret = wait_on_page_writeback_range(mapping, |
337 | offset >> PAGE_CACHE_SHIFT, | 326 | offset >> PAGE_CACHE_SHIFT, |
338 | endbyte >> PAGE_CACHE_SHIFT); | 327 | endbyte >> PAGE_CACHE_SHIFT); |
include/linux/fs.h
1 | #ifndef _LINUX_FS_H | 1 | #ifndef _LINUX_FS_H |
2 | #define _LINUX_FS_H | 2 | #define _LINUX_FS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This file has definitions for some important file table | 5 | * This file has definitions for some important file table |
6 | * structures etc. | 6 | * structures etc. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/limits.h> | 9 | #include <linux/limits.h> |
10 | #include <linux/ioctl.h> | 10 | #include <linux/ioctl.h> |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change | 13 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change |
14 | * the file limit at runtime and only root can increase the per-process | 14 | * the file limit at runtime and only root can increase the per-process |
15 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute | 15 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
16 | * upper limit on files-per-process. | 16 | * upper limit on files-per-process. |
17 | * | 17 | * |
18 | * Some programs (notably those using select()) may have to be | 18 | * Some programs (notably those using select()) may have to be |
19 | * recompiled to take full advantage of the new limits.. | 19 | * recompiled to take full advantage of the new limits.. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | /* Fixed constants first: */ | 22 | /* Fixed constants first: */ |
23 | #undef NR_OPEN | 23 | #undef NR_OPEN |
24 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ | 24 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ |
25 | 25 | ||
26 | #define BLOCK_SIZE_BITS 10 | 26 | #define BLOCK_SIZE_BITS 10 |
27 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) | 27 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) |
28 | 28 | ||
29 | #define SEEK_SET 0 /* seek relative to beginning of file */ | 29 | #define SEEK_SET 0 /* seek relative to beginning of file */ |
30 | #define SEEK_CUR 1 /* seek relative to current file position */ | 30 | #define SEEK_CUR 1 /* seek relative to current file position */ |
31 | #define SEEK_END 2 /* seek relative to end of file */ | 31 | #define SEEK_END 2 /* seek relative to end of file */ |
32 | #define SEEK_MAX SEEK_END | 32 | #define SEEK_MAX SEEK_END |
33 | 33 | ||
34 | /* And dynamically-tunable limits and defaults: */ | 34 | /* And dynamically-tunable limits and defaults: */ |
35 | struct files_stat_struct { | 35 | struct files_stat_struct { |
36 | int nr_files; /* read only */ | 36 | int nr_files; /* read only */ |
37 | int nr_free_files; /* read only */ | 37 | int nr_free_files; /* read only */ |
38 | int max_files; /* tunable */ | 38 | int max_files; /* tunable */ |
39 | }; | 39 | }; |
40 | 40 | ||
41 | struct inodes_stat_t { | 41 | struct inodes_stat_t { |
42 | int nr_inodes; | 42 | int nr_inodes; |
43 | int nr_unused; | 43 | int nr_unused; |
44 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 44 | int dummy[5]; /* padding for sysctl ABI compatibility */ |
45 | }; | 45 | }; |
46 | 46 | ||
47 | 47 | ||
48 | #define NR_FILE 8192 /* this can well be larger on a larger system */ | 48 | #define NR_FILE 8192 /* this can well be larger on a larger system */ |
49 | 49 | ||
50 | #define MAY_EXEC 1 | 50 | #define MAY_EXEC 1 |
51 | #define MAY_WRITE 2 | 51 | #define MAY_WRITE 2 |
52 | #define MAY_READ 4 | 52 | #define MAY_READ 4 |
53 | #define MAY_APPEND 8 | 53 | #define MAY_APPEND 8 |
54 | #define MAY_ACCESS 16 | 54 | #define MAY_ACCESS 16 |
55 | #define MAY_OPEN 32 | 55 | #define MAY_OPEN 32 |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond | 58 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond |
59 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() | 59 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() |
60 | */ | 60 | */ |
61 | 61 | ||
62 | /* file is open for reading */ | 62 | /* file is open for reading */ |
63 | #define FMODE_READ ((__force fmode_t)1) | 63 | #define FMODE_READ ((__force fmode_t)1) |
64 | /* file is open for writing */ | 64 | /* file is open for writing */ |
65 | #define FMODE_WRITE ((__force fmode_t)2) | 65 | #define FMODE_WRITE ((__force fmode_t)2) |
66 | /* file is seekable */ | 66 | /* file is seekable */ |
67 | #define FMODE_LSEEK ((__force fmode_t)4) | 67 | #define FMODE_LSEEK ((__force fmode_t)4) |
68 | /* file can be accessed using pread */ | 68 | /* file can be accessed using pread */ |
69 | #define FMODE_PREAD ((__force fmode_t)8) | 69 | #define FMODE_PREAD ((__force fmode_t)8) |
70 | /* file can be accessed using pwrite */ | 70 | /* file can be accessed using pwrite */ |
71 | #define FMODE_PWRITE ((__force fmode_t)16) | 71 | #define FMODE_PWRITE ((__force fmode_t)16) |
72 | /* File is opened for execution with sys_execve / sys_uselib */ | 72 | /* File is opened for execution with sys_execve / sys_uselib */ |
73 | #define FMODE_EXEC ((__force fmode_t)32) | 73 | #define FMODE_EXEC ((__force fmode_t)32) |
74 | /* File is opened with O_NDELAY (only set for block devices) */ | 74 | /* File is opened with O_NDELAY (only set for block devices) */ |
75 | #define FMODE_NDELAY ((__force fmode_t)64) | 75 | #define FMODE_NDELAY ((__force fmode_t)64) |
76 | /* File is opened with O_EXCL (only set for block devices) */ | 76 | /* File is opened with O_EXCL (only set for block devices) */ |
77 | #define FMODE_EXCL ((__force fmode_t)128) | 77 | #define FMODE_EXCL ((__force fmode_t)128) |
78 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls | 78 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls |
79 | (specialy hack for floppy.c) */ | 79 | (specialy hack for floppy.c) */ |
80 | #define FMODE_WRITE_IOCTL ((__force fmode_t)256) | 80 | #define FMODE_WRITE_IOCTL ((__force fmode_t)256) |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Don't update ctime and mtime. | 83 | * Don't update ctime and mtime. |
84 | * | 84 | * |
85 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll | 85 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll |
86 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. | 86 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. |
87 | */ | 87 | */ |
88 | #define FMODE_NOCMTIME ((__force fmode_t)2048) | 88 | #define FMODE_NOCMTIME ((__force fmode_t)2048) |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * The below are the various read and write types that we support. Some of | 91 | * The below are the various read and write types that we support. Some of |
92 | * them include behavioral modifiers that send information down to the | 92 | * them include behavioral modifiers that send information down to the |
93 | * block layer and IO scheduler. Terminology: | 93 | * block layer and IO scheduler. Terminology: |
94 | * | 94 | * |
95 | * The block layer uses device plugging to defer IO a little bit, in | 95 | * The block layer uses device plugging to defer IO a little bit, in |
96 | * the hope that we will see more IO very shortly. This increases | 96 | * the hope that we will see more IO very shortly. This increases |
97 | * coalescing of adjacent IO and thus reduces the number of IOs we | 97 | * coalescing of adjacent IO and thus reduces the number of IOs we |
98 | * have to send to the device. It also allows for better queuing, | 98 | * have to send to the device. It also allows for better queuing, |
99 | * if the IO isn't mergeable. If the caller is going to be waiting | 99 | * if the IO isn't mergeable. If the caller is going to be waiting |
100 | * for the IO, then he must ensure that the device is unplugged so | 100 | * for the IO, then he must ensure that the device is unplugged so |
101 | * that the IO is dispatched to the driver. | 101 | * that the IO is dispatched to the driver. |
102 | * | 102 | * |
103 | * All IO is handled async in Linux. This is fine for background | 103 | * All IO is handled async in Linux. This is fine for background |
104 | * writes, but for reads or writes that someone waits for completion | 104 | * writes, but for reads or writes that someone waits for completion |
105 | * on, we want to notify the block layer and IO scheduler so that they | 105 | * on, we want to notify the block layer and IO scheduler so that they |
106 | * know about it. That allows them to make better scheduling | 106 | * know about it. That allows them to make better scheduling |
107 | * decisions. So when the below references 'sync' and 'async', it | 107 | * decisions. So when the below references 'sync' and 'async', it |
108 | * is referencing this priority hint. | 108 | * is referencing this priority hint. |
109 | * | 109 | * |
110 | * With that in mind, the available types are: | 110 | * With that in mind, the available types are: |
111 | * | 111 | * |
112 | * READ A normal read operation. Device will be plugged. | 112 | * READ A normal read operation. Device will be plugged. |
113 | * READ_SYNC A synchronous read. Device is not plugged, caller can | 113 | * READ_SYNC A synchronous read. Device is not plugged, caller can |
114 | * immediately wait on this read without caring about | 114 | * immediately wait on this read without caring about |
115 | * unplugging. | 115 | * unplugging. |
116 | * READA Used for read-ahead operations. Lower priority, and the | 116 | * READA Used for read-ahead operations. Lower priority, and the |
117 | * block layer could (in theory) choose to ignore this | 117 | * block layer could (in theory) choose to ignore this |
118 | * request if it runs into resource problems. | 118 | * request if it runs into resource problems. |
119 | * WRITE A normal async write. Device will be plugged. | 119 | * WRITE A normal async write. Device will be plugged. |
120 | * SWRITE Like WRITE, but a special case for ll_rw_block() that | 120 | * SWRITE Like WRITE, but a special case for ll_rw_block() that |
121 | * tells it to lock the buffer first. Normally a buffer | 121 | * tells it to lock the buffer first. Normally a buffer |
122 | * must be locked before doing IO. | 122 | * must be locked before doing IO. |
123 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down | 123 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down |
124 | * the hint that someone will be waiting on this IO | 124 | * the hint that someone will be waiting on this IO |
125 | * shortly. The device must still be unplugged explicitly, | 125 | * shortly. The device must still be unplugged explicitly, |
126 | * WRITE_SYNC_PLUG does not do this as we could be | 126 | * WRITE_SYNC_PLUG does not do this as we could be |
127 | * submitting more writes before we actually wait on any | 127 | * submitting more writes before we actually wait on any |
128 | * of them. | 128 | * of them. |
129 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device | 129 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device |
130 | * immediately after submission. The write equivalent | 130 | * immediately after submission. The write equivalent |
131 | * of READ_SYNC. | 131 | * of READ_SYNC. |
132 | * WRITE_ODIRECT Special case write for O_DIRECT only. | 132 | * WRITE_ODIRECT Special case write for O_DIRECT only. |
133 | * SWRITE_SYNC | 133 | * SWRITE_SYNC |
134 | * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. | 134 | * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. |
135 | * See SWRITE. | 135 | * See SWRITE. |
136 | * WRITE_BARRIER Like WRITE, but tells the block layer that all | 136 | * WRITE_BARRIER Like WRITE, but tells the block layer that all |
137 | * previously submitted writes must be safely on storage | 137 | * previously submitted writes must be safely on storage |
138 | * before this one is started. Also guarantees that when | 138 | * before this one is started. Also guarantees that when |
139 | * this write is complete, it itself is also safely on | 139 | * this write is complete, it itself is also safely on |
140 | * storage. Prevents reordering of writes on both sides | 140 | * storage. Prevents reordering of writes on both sides |
141 | * of this IO. | 141 | * of this IO. |
142 | * | 142 | * |
143 | */ | 143 | */ |
144 | #define RW_MASK 1 | 144 | #define RW_MASK 1 |
145 | #define RWA_MASK 2 | 145 | #define RWA_MASK 2 |
146 | #define READ 0 | 146 | #define READ 0 |
147 | #define WRITE 1 | 147 | #define WRITE 1 |
148 | #define READA 2 /* read-ahead - don't block if no resources */ | 148 | #define READA 2 /* read-ahead - don't block if no resources */ |
149 | #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ | 149 | #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ |
150 | #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) | 150 | #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) |
151 | #define READ_META (READ | (1 << BIO_RW_META)) | 151 | #define READ_META (READ | (1 << BIO_RW_META)) |
152 | #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) | 152 | #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) |
153 | #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) | 153 | #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) |
154 | #define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) | 154 | #define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) |
155 | #define SWRITE_SYNC_PLUG \ | 155 | #define SWRITE_SYNC_PLUG \ |
156 | (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) | 156 | (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) |
157 | #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) | 157 | #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) |
158 | #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) | 158 | #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * These aren't really reads or writes, they pass down information about | 161 | * These aren't really reads or writes, they pass down information about |
162 | * parts of device that are now unused by the file system. | 162 | * parts of device that are now unused by the file system. |
163 | */ | 163 | */ |
164 | #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) | 164 | #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) |
165 | #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) | 165 | #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) |
166 | 166 | ||
167 | #define SEL_IN 1 | 167 | #define SEL_IN 1 |
168 | #define SEL_OUT 2 | 168 | #define SEL_OUT 2 |
169 | #define SEL_EX 4 | 169 | #define SEL_EX 4 |
170 | 170 | ||
171 | /* public flags for file_system_type */ | 171 | /* public flags for file_system_type */ |
172 | #define FS_REQUIRES_DEV 1 | 172 | #define FS_REQUIRES_DEV 1 |
173 | #define FS_BINARY_MOUNTDATA 2 | 173 | #define FS_BINARY_MOUNTDATA 2 |
174 | #define FS_HAS_SUBTYPE 4 | 174 | #define FS_HAS_SUBTYPE 4 |
175 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 175 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
176 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() | 176 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() |
177 | * during rename() internally. | 177 | * during rename() internally. |
178 | */ | 178 | */ |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * These are the fs-independent mount-flags: up to 32 flags are supported | 181 | * These are the fs-independent mount-flags: up to 32 flags are supported |
182 | */ | 182 | */ |
183 | #define MS_RDONLY 1 /* Mount read-only */ | 183 | #define MS_RDONLY 1 /* Mount read-only */ |
184 | #define MS_NOSUID 2 /* Ignore suid and sgid bits */ | 184 | #define MS_NOSUID 2 /* Ignore suid and sgid bits */ |
185 | #define MS_NODEV 4 /* Disallow access to device special files */ | 185 | #define MS_NODEV 4 /* Disallow access to device special files */ |
186 | #define MS_NOEXEC 8 /* Disallow program execution */ | 186 | #define MS_NOEXEC 8 /* Disallow program execution */ |
187 | #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ | 187 | #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ |
188 | #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ | 188 | #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ |
189 | #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ | 189 | #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ |
190 | #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ | 190 | #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ |
191 | #define MS_NOATIME 1024 /* Do not update access times. */ | 191 | #define MS_NOATIME 1024 /* Do not update access times. */ |
192 | #define MS_NODIRATIME 2048 /* Do not update directory access times */ | 192 | #define MS_NODIRATIME 2048 /* Do not update directory access times */ |
193 | #define MS_BIND 4096 | 193 | #define MS_BIND 4096 |
194 | #define MS_MOVE 8192 | 194 | #define MS_MOVE 8192 |
195 | #define MS_REC 16384 | 195 | #define MS_REC 16384 |
196 | #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. | 196 | #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. |
197 | MS_VERBOSE is deprecated. */ | 197 | MS_VERBOSE is deprecated. */ |
198 | #define MS_SILENT 32768 | 198 | #define MS_SILENT 32768 |
199 | #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ | 199 | #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ |
200 | #define MS_UNBINDABLE (1<<17) /* change to unbindable */ | 200 | #define MS_UNBINDABLE (1<<17) /* change to unbindable */ |
201 | #define MS_PRIVATE (1<<18) /* change to private */ | 201 | #define MS_PRIVATE (1<<18) /* change to private */ |
202 | #define MS_SLAVE (1<<19) /* change to slave */ | 202 | #define MS_SLAVE (1<<19) /* change to slave */ |
203 | #define MS_SHARED (1<<20) /* change to shared */ | 203 | #define MS_SHARED (1<<20) /* change to shared */ |
204 | #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ | 204 | #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ |
205 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ | 205 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ |
206 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ | 206 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ |
207 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ | 207 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ |
208 | #define MS_ACTIVE (1<<30) | 208 | #define MS_ACTIVE (1<<30) |
209 | #define MS_NOUSER (1<<31) | 209 | #define MS_NOUSER (1<<31) |
210 | 210 | ||
211 | /* | 211 | /* |
212 | * Superblock flags that can be altered by MS_REMOUNT | 212 | * Superblock flags that can be altered by MS_REMOUNT |
213 | */ | 213 | */ |
214 | #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) | 214 | #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * Old magic mount flag and mask | 217 | * Old magic mount flag and mask |
218 | */ | 218 | */ |
219 | #define MS_MGC_VAL 0xC0ED0000 | 219 | #define MS_MGC_VAL 0xC0ED0000 |
220 | #define MS_MGC_MSK 0xffff0000 | 220 | #define MS_MGC_MSK 0xffff0000 |
221 | 221 | ||
222 | /* Inode flags - they have nothing to superblock flags now */ | 222 | /* Inode flags - they have nothing to superblock flags now */ |
223 | 223 | ||
224 | #define S_SYNC 1 /* Writes are synced at once */ | 224 | #define S_SYNC 1 /* Writes are synced at once */ |
225 | #define S_NOATIME 2 /* Do not update access times */ | 225 | #define S_NOATIME 2 /* Do not update access times */ |
226 | #define S_APPEND 4 /* Append-only file */ | 226 | #define S_APPEND 4 /* Append-only file */ |
227 | #define S_IMMUTABLE 8 /* Immutable file */ | 227 | #define S_IMMUTABLE 8 /* Immutable file */ |
228 | #define S_DEAD 16 /* removed, but still open directory */ | 228 | #define S_DEAD 16 /* removed, but still open directory */ |
229 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ | 229 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ |
230 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ | 230 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ |
231 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ | 231 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
232 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | 232 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ |
233 | #define S_PRIVATE 512 /* Inode is fs-internal */ | 233 | #define S_PRIVATE 512 /* Inode is fs-internal */ |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Note that nosuid etc flags are inode-specific: setting some file-system | 236 | * Note that nosuid etc flags are inode-specific: setting some file-system |
237 | * flags just means all the inodes inherit those flags by default. It might be | 237 | * flags just means all the inodes inherit those flags by default. It might be |
238 | * possible to override it selectively if you really wanted to with some | 238 | * possible to override it selectively if you really wanted to with some |
239 | * ioctl() that is not currently implemented. | 239 | * ioctl() that is not currently implemented. |
240 | * | 240 | * |
241 | * Exception: MS_RDONLY is always applied to the entire file system. | 241 | * Exception: MS_RDONLY is always applied to the entire file system. |
242 | * | 242 | * |
243 | * Unfortunately, it is possible to change a filesystems flags with it mounted | 243 | * Unfortunately, it is possible to change a filesystems flags with it mounted |
244 | * with files in use. This means that all of the inodes will not have their | 244 | * with files in use. This means that all of the inodes will not have their |
245 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount | 245 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount |
246 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org | 246 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org |
247 | */ | 247 | */ |
248 | #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) | 248 | #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) |
249 | 249 | ||
250 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) | 250 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) |
251 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ | 251 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ |
252 | ((inode)->i_flags & S_SYNC)) | 252 | ((inode)->i_flags & S_SYNC)) |
253 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ | 253 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ |
254 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) | 254 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) |
255 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) | 255 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) |
256 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) | 256 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) |
257 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) | 257 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) |
258 | 258 | ||
259 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) | 259 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) |
260 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) | 260 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) |
261 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) | 261 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) |
262 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) | 262 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) |
263 | 263 | ||
264 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) | 264 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) |
265 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) | 265 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) |
266 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) | 266 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) |
267 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) | 267 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) |
268 | 268 | ||
269 | /* the read-only stuff doesn't really belong here, but any other place is | 269 | /* the read-only stuff doesn't really belong here, but any other place is |
270 | probably as bad and I don't want to create yet another include file. */ | 270 | probably as bad and I don't want to create yet another include file. */ |
271 | 271 | ||
272 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ | 272 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ |
273 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ | 273 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ |
274 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ | 274 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ |
275 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ | 275 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ |
276 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ | 276 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ |
277 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ | 277 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ |
278 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ | 278 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ |
279 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ | 279 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ |
280 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ | 280 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ |
281 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ | 281 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ |
282 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ | 282 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ |
283 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ | 283 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ |
284 | #if 0 | 284 | #if 0 |
285 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ | 285 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ |
286 | 286 | ||
287 | /* Some people are morons. Do not use sizeof! */ | 287 | /* Some people are morons. Do not use sizeof! */ |
288 | 288 | ||
289 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ | 289 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ |
290 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ | 290 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ |
291 | /* This was here just to show that the number is taken - | 291 | /* This was here just to show that the number is taken - |
292 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ | 292 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ |
293 | #endif | 293 | #endif |
294 | /* A jump here: 108-111 have been used for various private purposes. */ | 294 | /* A jump here: 108-111 have been used for various private purposes. */ |
295 | #define BLKBSZGET _IOR(0x12,112,size_t) | 295 | #define BLKBSZGET _IOR(0x12,112,size_t) |
296 | #define BLKBSZSET _IOW(0x12,113,size_t) | 296 | #define BLKBSZSET _IOW(0x12,113,size_t) |
297 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ | 297 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ |
298 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) | 298 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) |
299 | #define BLKTRACESTART _IO(0x12,116) | 299 | #define BLKTRACESTART _IO(0x12,116) |
300 | #define BLKTRACESTOP _IO(0x12,117) | 300 | #define BLKTRACESTOP _IO(0x12,117) |
301 | #define BLKTRACETEARDOWN _IO(0x12,118) | 301 | #define BLKTRACETEARDOWN _IO(0x12,118) |
302 | #define BLKDISCARD _IO(0x12,119) | 302 | #define BLKDISCARD _IO(0x12,119) |
303 | 303 | ||
304 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 304 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
305 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 305 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
306 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ | 306 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ |
307 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ | 307 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ |
308 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ | 308 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ |
309 | 309 | ||
310 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) | 310 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) |
311 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) | 311 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) |
312 | #define FS_IOC_GETVERSION _IOR('v', 1, long) | 312 | #define FS_IOC_GETVERSION _IOR('v', 1, long) |
313 | #define FS_IOC_SETVERSION _IOW('v', 2, long) | 313 | #define FS_IOC_SETVERSION _IOW('v', 2, long) |
314 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) | 314 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) |
315 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) | 315 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) |
316 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) | 316 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) |
317 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) | 317 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) |
318 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) | 318 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) |
319 | 319 | ||
320 | /* | 320 | /* |
321 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) | 321 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) |
322 | */ | 322 | */ |
323 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ | 323 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ |
324 | #define FS_UNRM_FL 0x00000002 /* Undelete */ | 324 | #define FS_UNRM_FL 0x00000002 /* Undelete */ |
325 | #define FS_COMPR_FL 0x00000004 /* Compress file */ | 325 | #define FS_COMPR_FL 0x00000004 /* Compress file */ |
326 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ | 326 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ |
327 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ | 327 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ |
328 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ | 328 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ |
329 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ | 329 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ |
330 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ | 330 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ |
331 | /* Reserved for compression usage... */ | 331 | /* Reserved for compression usage... */ |
332 | #define FS_DIRTY_FL 0x00000100 | 332 | #define FS_DIRTY_FL 0x00000100 |
333 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ | 333 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
334 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ | 334 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ |
335 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ | 335 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ |
336 | /* End compression flags --- maybe not all used */ | 336 | /* End compression flags --- maybe not all used */ |
337 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ | 337 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ |
338 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ | 338 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ |
339 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ | 339 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ |
340 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ | 340 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ |
341 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ | 341 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
342 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ | 342 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
343 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 343 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
344 | #define FS_EXTENT_FL 0x00080000 /* Extents */ | 344 | #define FS_EXTENT_FL 0x00080000 /* Extents */ |
345 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ | 345 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ |
346 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ | 346 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ |
347 | 347 | ||
348 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ | 348 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ |
349 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 349 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ |
350 | 350 | ||
351 | 351 | ||
352 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 | 352 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 |
353 | #define SYNC_FILE_RANGE_WRITE 2 | 353 | #define SYNC_FILE_RANGE_WRITE 2 |
354 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 | 354 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 |
355 | 355 | ||
356 | #ifdef __KERNEL__ | 356 | #ifdef __KERNEL__ |
357 | 357 | ||
358 | #include <linux/linkage.h> | 358 | #include <linux/linkage.h> |
359 | #include <linux/wait.h> | 359 | #include <linux/wait.h> |
360 | #include <linux/types.h> | 360 | #include <linux/types.h> |
361 | #include <linux/kdev_t.h> | 361 | #include <linux/kdev_t.h> |
362 | #include <linux/dcache.h> | 362 | #include <linux/dcache.h> |
363 | #include <linux/path.h> | 363 | #include <linux/path.h> |
364 | #include <linux/stat.h> | 364 | #include <linux/stat.h> |
365 | #include <linux/cache.h> | 365 | #include <linux/cache.h> |
366 | #include <linux/kobject.h> | 366 | #include <linux/kobject.h> |
367 | #include <linux/list.h> | 367 | #include <linux/list.h> |
368 | #include <linux/radix-tree.h> | 368 | #include <linux/radix-tree.h> |
369 | #include <linux/prio_tree.h> | 369 | #include <linux/prio_tree.h> |
370 | #include <linux/init.h> | 370 | #include <linux/init.h> |
371 | #include <linux/pid.h> | 371 | #include <linux/pid.h> |
372 | #include <linux/mutex.h> | 372 | #include <linux/mutex.h> |
373 | #include <linux/capability.h> | 373 | #include <linux/capability.h> |
374 | #include <linux/semaphore.h> | 374 | #include <linux/semaphore.h> |
375 | #include <linux/fiemap.h> | 375 | #include <linux/fiemap.h> |
376 | 376 | ||
377 | #include <asm/atomic.h> | 377 | #include <asm/atomic.h> |
378 | #include <asm/byteorder.h> | 378 | #include <asm/byteorder.h> |
379 | 379 | ||
380 | struct export_operations; | 380 | struct export_operations; |
381 | struct hd_geometry; | 381 | struct hd_geometry; |
382 | struct iovec; | 382 | struct iovec; |
383 | struct nameidata; | 383 | struct nameidata; |
384 | struct kiocb; | 384 | struct kiocb; |
385 | struct pipe_inode_info; | 385 | struct pipe_inode_info; |
386 | struct poll_table_struct; | 386 | struct poll_table_struct; |
387 | struct kstatfs; | 387 | struct kstatfs; |
388 | struct vm_area_struct; | 388 | struct vm_area_struct; |
389 | struct vfsmount; | 389 | struct vfsmount; |
390 | struct cred; | 390 | struct cred; |
391 | 391 | ||
392 | extern void __init inode_init(void); | 392 | extern void __init inode_init(void); |
393 | extern void __init inode_init_early(void); | 393 | extern void __init inode_init_early(void); |
394 | extern void __init files_init(unsigned long); | 394 | extern void __init files_init(unsigned long); |
395 | 395 | ||
396 | extern struct files_stat_struct files_stat; | 396 | extern struct files_stat_struct files_stat; |
397 | extern int get_max_files(void); | 397 | extern int get_max_files(void); |
398 | extern int sysctl_nr_open; | 398 | extern int sysctl_nr_open; |
399 | extern struct inodes_stat_t inodes_stat; | 399 | extern struct inodes_stat_t inodes_stat; |
400 | extern int leases_enable, lease_break_time; | 400 | extern int leases_enable, lease_break_time; |
401 | #ifdef CONFIG_DNOTIFY | 401 | #ifdef CONFIG_DNOTIFY |
402 | extern int dir_notify_enable; | 402 | extern int dir_notify_enable; |
403 | #endif | 403 | #endif |
404 | 404 | ||
405 | struct buffer_head; | 405 | struct buffer_head; |
406 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, | 406 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, |
407 | struct buffer_head *bh_result, int create); | 407 | struct buffer_head *bh_result, int create); |
408 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | 408 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, |
409 | ssize_t bytes, void *private); | 409 | ssize_t bytes, void *private); |
410 | 410 | ||
411 | /* | 411 | /* |
412 | * Attribute flags. These should be or-ed together to figure out what | 412 | * Attribute flags. These should be or-ed together to figure out what |
413 | * has been changed! | 413 | * has been changed! |
414 | */ | 414 | */ |
415 | #define ATTR_MODE (1 << 0) | 415 | #define ATTR_MODE (1 << 0) |
416 | #define ATTR_UID (1 << 1) | 416 | #define ATTR_UID (1 << 1) |
417 | #define ATTR_GID (1 << 2) | 417 | #define ATTR_GID (1 << 2) |
418 | #define ATTR_SIZE (1 << 3) | 418 | #define ATTR_SIZE (1 << 3) |
419 | #define ATTR_ATIME (1 << 4) | 419 | #define ATTR_ATIME (1 << 4) |
420 | #define ATTR_MTIME (1 << 5) | 420 | #define ATTR_MTIME (1 << 5) |
421 | #define ATTR_CTIME (1 << 6) | 421 | #define ATTR_CTIME (1 << 6) |
422 | #define ATTR_ATIME_SET (1 << 7) | 422 | #define ATTR_ATIME_SET (1 << 7) |
423 | #define ATTR_MTIME_SET (1 << 8) | 423 | #define ATTR_MTIME_SET (1 << 8) |
424 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ | 424 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ |
425 | #define ATTR_ATTR_FLAG (1 << 10) | 425 | #define ATTR_ATTR_FLAG (1 << 10) |
426 | #define ATTR_KILL_SUID (1 << 11) | 426 | #define ATTR_KILL_SUID (1 << 11) |
427 | #define ATTR_KILL_SGID (1 << 12) | 427 | #define ATTR_KILL_SGID (1 << 12) |
428 | #define ATTR_FILE (1 << 13) | 428 | #define ATTR_FILE (1 << 13) |
429 | #define ATTR_KILL_PRIV (1 << 14) | 429 | #define ATTR_KILL_PRIV (1 << 14) |
430 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ | 430 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ |
431 | #define ATTR_TIMES_SET (1 << 16) | 431 | #define ATTR_TIMES_SET (1 << 16) |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * This is the Inode Attributes structure, used for notify_change(). It | 434 | * This is the Inode Attributes structure, used for notify_change(). It |
435 | * uses the above definitions as flags, to know which values have changed. | 435 | * uses the above definitions as flags, to know which values have changed. |
436 | * Also, in this manner, a Filesystem can look at only the values it cares | 436 | * Also, in this manner, a Filesystem can look at only the values it cares |
437 | * about. Basically, these are the attributes that the VFS layer can | 437 | * about. Basically, these are the attributes that the VFS layer can |
438 | * request to change from the FS layer. | 438 | * request to change from the FS layer. |
439 | * | 439 | * |
440 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 | 440 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 |
441 | */ | 441 | */ |
442 | struct iattr { | 442 | struct iattr { |
443 | unsigned int ia_valid; | 443 | unsigned int ia_valid; |
444 | umode_t ia_mode; | 444 | umode_t ia_mode; |
445 | uid_t ia_uid; | 445 | uid_t ia_uid; |
446 | gid_t ia_gid; | 446 | gid_t ia_gid; |
447 | loff_t ia_size; | 447 | loff_t ia_size; |
448 | struct timespec ia_atime; | 448 | struct timespec ia_atime; |
449 | struct timespec ia_mtime; | 449 | struct timespec ia_mtime; |
450 | struct timespec ia_ctime; | 450 | struct timespec ia_ctime; |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Not an attribute, but an auxilary info for filesystems wanting to | 453 | * Not an attribute, but an auxilary info for filesystems wanting to |
454 | * implement an ftruncate() like method. NOTE: filesystem should | 454 | * implement an ftruncate() like method. NOTE: filesystem should |
455 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). | 455 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). |
456 | */ | 456 | */ |
457 | struct file *ia_file; | 457 | struct file *ia_file; |
458 | }; | 458 | }; |
459 | 459 | ||
460 | /* | 460 | /* |
461 | * Includes for diskquotas. | 461 | * Includes for diskquotas. |
462 | */ | 462 | */ |
463 | #include <linux/quota.h> | 463 | #include <linux/quota.h> |
464 | 464 | ||
465 | /** | 465 | /** |
466 | * enum positive_aop_returns - aop return codes with specific semantics | 466 | * enum positive_aop_returns - aop return codes with specific semantics |
467 | * | 467 | * |
468 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 468 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
469 | * completed, that the page is still locked, and | 469 | * completed, that the page is still locked, and |
470 | * should be considered active. The VM uses this hint | 470 | * should be considered active. The VM uses this hint |
471 | * to return the page to the active list -- it won't | 471 | * to return the page to the active list -- it won't |
472 | * be a candidate for writeback again in the near | 472 | * be a candidate for writeback again in the near |
473 | * future. Other callers must be careful to unlock | 473 | * future. Other callers must be careful to unlock |
474 | * the page if they get this return. Returned by | 474 | * the page if they get this return. Returned by |
475 | * writepage(); | 475 | * writepage(); |
476 | * | 476 | * |
477 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 477 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
478 | * unlocked it and the page might have been truncated. | 478 | * unlocked it and the page might have been truncated. |
479 | * The caller should back up to acquiring a new page and | 479 | * The caller should back up to acquiring a new page and |
480 | * trying again. The aop will be taking reasonable | 480 | * trying again. The aop will be taking reasonable |
481 | * precautions not to livelock. If the caller held a page | 481 | * precautions not to livelock. If the caller held a page |
482 | * reference, it should drop it before retrying. Returned | 482 | * reference, it should drop it before retrying. Returned |
483 | * by readpage(). | 483 | * by readpage(). |
484 | * | 484 | * |
485 | * address_space_operation functions return these large constants to indicate | 485 | * address_space_operation functions return these large constants to indicate |
486 | * special semantics to the caller. These are much larger than the bytes in a | 486 | * special semantics to the caller. These are much larger than the bytes in a |
487 | * page to allow for functions that return the number of bytes operated on in a | 487 | * page to allow for functions that return the number of bytes operated on in a |
488 | * given page. | 488 | * given page. |
489 | */ | 489 | */ |
490 | 490 | ||
491 | enum positive_aop_returns { | 491 | enum positive_aop_returns { |
492 | AOP_WRITEPAGE_ACTIVATE = 0x80000, | 492 | AOP_WRITEPAGE_ACTIVATE = 0x80000, |
493 | AOP_TRUNCATED_PAGE = 0x80001, | 493 | AOP_TRUNCATED_PAGE = 0x80001, |
494 | }; | 494 | }; |
495 | 495 | ||
496 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ | 496 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ |
497 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ | 497 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ |
498 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct | 498 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct |
499 | * helper code (eg buffer layer) | 499 | * helper code (eg buffer layer) |
500 | * to clear GFP_FS from alloc */ | 500 | * to clear GFP_FS from alloc */ |
501 | 501 | ||
502 | /* | 502 | /* |
503 | * oh the beauties of C type declarations. | 503 | * oh the beauties of C type declarations. |
504 | */ | 504 | */ |
505 | struct page; | 505 | struct page; |
506 | struct address_space; | 506 | struct address_space; |
507 | struct writeback_control; | 507 | struct writeback_control; |
508 | 508 | ||
509 | struct iov_iter { | 509 | struct iov_iter { |
510 | const struct iovec *iov; | 510 | const struct iovec *iov; |
511 | unsigned long nr_segs; | 511 | unsigned long nr_segs; |
512 | size_t iov_offset; | 512 | size_t iov_offset; |
513 | size_t count; | 513 | size_t count; |
514 | }; | 514 | }; |
515 | 515 | ||
516 | size_t iov_iter_copy_from_user_atomic(struct page *page, | 516 | size_t iov_iter_copy_from_user_atomic(struct page *page, |
517 | struct iov_iter *i, unsigned long offset, size_t bytes); | 517 | struct iov_iter *i, unsigned long offset, size_t bytes); |
518 | size_t iov_iter_copy_from_user(struct page *page, | 518 | size_t iov_iter_copy_from_user(struct page *page, |
519 | struct iov_iter *i, unsigned long offset, size_t bytes); | 519 | struct iov_iter *i, unsigned long offset, size_t bytes); |
520 | void iov_iter_advance(struct iov_iter *i, size_t bytes); | 520 | void iov_iter_advance(struct iov_iter *i, size_t bytes); |
521 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); | 521 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); |
522 | size_t iov_iter_single_seg_count(struct iov_iter *i); | 522 | size_t iov_iter_single_seg_count(struct iov_iter *i); |
523 | 523 | ||
524 | static inline void iov_iter_init(struct iov_iter *i, | 524 | static inline void iov_iter_init(struct iov_iter *i, |
525 | const struct iovec *iov, unsigned long nr_segs, | 525 | const struct iovec *iov, unsigned long nr_segs, |
526 | size_t count, size_t written) | 526 | size_t count, size_t written) |
527 | { | 527 | { |
528 | i->iov = iov; | 528 | i->iov = iov; |
529 | i->nr_segs = nr_segs; | 529 | i->nr_segs = nr_segs; |
530 | i->iov_offset = 0; | 530 | i->iov_offset = 0; |
531 | i->count = count + written; | 531 | i->count = count + written; |
532 | 532 | ||
533 | iov_iter_advance(i, written); | 533 | iov_iter_advance(i, written); |
534 | } | 534 | } |
535 | 535 | ||
536 | static inline size_t iov_iter_count(struct iov_iter *i) | 536 | static inline size_t iov_iter_count(struct iov_iter *i) |
537 | { | 537 | { |
538 | return i->count; | 538 | return i->count; |
539 | } | 539 | } |
540 | 540 | ||
541 | /* | 541 | /* |
542 | * "descriptor" for what we're up to with a read. | 542 | * "descriptor" for what we're up to with a read. |
543 | * This allows us to use the same read code yet | 543 | * This allows us to use the same read code yet |
544 | * have multiple different users of the data that | 544 | * have multiple different users of the data that |
545 | * we read from a file. | 545 | * we read from a file. |
546 | * | 546 | * |
547 | * The simplest case just copies the data to user | 547 | * The simplest case just copies the data to user |
548 | * mode. | 548 | * mode. |
549 | */ | 549 | */ |
550 | typedef struct { | 550 | typedef struct { |
551 | size_t written; | 551 | size_t written; |
552 | size_t count; | 552 | size_t count; |
553 | union { | 553 | union { |
554 | char __user *buf; | 554 | char __user *buf; |
555 | void *data; | 555 | void *data; |
556 | } arg; | 556 | } arg; |
557 | int error; | 557 | int error; |
558 | } read_descriptor_t; | 558 | } read_descriptor_t; |
559 | 559 | ||
560 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, | 560 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, |
561 | unsigned long, unsigned long); | 561 | unsigned long, unsigned long); |
562 | 562 | ||
563 | struct address_space_operations { | 563 | struct address_space_operations { |
564 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 564 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
565 | int (*readpage)(struct file *, struct page *); | 565 | int (*readpage)(struct file *, struct page *); |
566 | void (*sync_page)(struct page *); | 566 | void (*sync_page)(struct page *); |
567 | 567 | ||
568 | /* Write back some dirty pages from this mapping. */ | 568 | /* Write back some dirty pages from this mapping. */ |
569 | int (*writepages)(struct address_space *, struct writeback_control *); | 569 | int (*writepages)(struct address_space *, struct writeback_control *); |
570 | 570 | ||
571 | /* Set a page dirty. Return true if this dirtied it */ | 571 | /* Set a page dirty. Return true if this dirtied it */ |
572 | int (*set_page_dirty)(struct page *page); | 572 | int (*set_page_dirty)(struct page *page); |
573 | 573 | ||
574 | int (*readpages)(struct file *filp, struct address_space *mapping, | 574 | int (*readpages)(struct file *filp, struct address_space *mapping, |
575 | struct list_head *pages, unsigned nr_pages); | 575 | struct list_head *pages, unsigned nr_pages); |
576 | 576 | ||
577 | int (*write_begin)(struct file *, struct address_space *mapping, | 577 | int (*write_begin)(struct file *, struct address_space *mapping, |
578 | loff_t pos, unsigned len, unsigned flags, | 578 | loff_t pos, unsigned len, unsigned flags, |
579 | struct page **pagep, void **fsdata); | 579 | struct page **pagep, void **fsdata); |
580 | int (*write_end)(struct file *, struct address_space *mapping, | 580 | int (*write_end)(struct file *, struct address_space *mapping, |
581 | loff_t pos, unsigned len, unsigned copied, | 581 | loff_t pos, unsigned len, unsigned copied, |
582 | struct page *page, void *fsdata); | 582 | struct page *page, void *fsdata); |
583 | 583 | ||
584 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ | 584 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ |
585 | sector_t (*bmap)(struct address_space *, sector_t); | 585 | sector_t (*bmap)(struct address_space *, sector_t); |
586 | void (*invalidatepage) (struct page *, unsigned long); | 586 | void (*invalidatepage) (struct page *, unsigned long); |
587 | int (*releasepage) (struct page *, gfp_t); | 587 | int (*releasepage) (struct page *, gfp_t); |
588 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 588 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
589 | loff_t offset, unsigned long nr_segs); | 589 | loff_t offset, unsigned long nr_segs); |
590 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, | 590 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, |
591 | void **, unsigned long *); | 591 | void **, unsigned long *); |
592 | /* migrate the contents of a page to the specified target */ | 592 | /* migrate the contents of a page to the specified target */ |
593 | int (*migratepage) (struct address_space *, | 593 | int (*migratepage) (struct address_space *, |
594 | struct page *, struct page *); | 594 | struct page *, struct page *); |
595 | int (*launder_page) (struct page *); | 595 | int (*launder_page) (struct page *); |
596 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, | 596 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, |
597 | unsigned long); | 597 | unsigned long); |
598 | }; | 598 | }; |
599 | 599 | ||
600 | /* | 600 | /* |
601 | * pagecache_write_begin/pagecache_write_end must be used by general code | 601 | * pagecache_write_begin/pagecache_write_end must be used by general code |
602 | * to write into the pagecache. | 602 | * to write into the pagecache. |
603 | */ | 603 | */ |
604 | int pagecache_write_begin(struct file *, struct address_space *mapping, | 604 | int pagecache_write_begin(struct file *, struct address_space *mapping, |
605 | loff_t pos, unsigned len, unsigned flags, | 605 | loff_t pos, unsigned len, unsigned flags, |
606 | struct page **pagep, void **fsdata); | 606 | struct page **pagep, void **fsdata); |
607 | 607 | ||
608 | int pagecache_write_end(struct file *, struct address_space *mapping, | 608 | int pagecache_write_end(struct file *, struct address_space *mapping, |
609 | loff_t pos, unsigned len, unsigned copied, | 609 | loff_t pos, unsigned len, unsigned copied, |
610 | struct page *page, void *fsdata); | 610 | struct page *page, void *fsdata); |
611 | 611 | ||
612 | struct backing_dev_info; | 612 | struct backing_dev_info; |
613 | struct address_space { | 613 | struct address_space { |
614 | struct inode *host; /* owner: inode, block_device */ | 614 | struct inode *host; /* owner: inode, block_device */ |
615 | struct radix_tree_root page_tree; /* radix tree of all pages */ | 615 | struct radix_tree_root page_tree; /* radix tree of all pages */ |
616 | spinlock_t tree_lock; /* and lock protecting it */ | 616 | spinlock_t tree_lock; /* and lock protecting it */ |
617 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ | 617 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ |
618 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ | 618 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ |
619 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ | 619 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ |
620 | spinlock_t i_mmap_lock; /* protect tree, count, list */ | 620 | spinlock_t i_mmap_lock; /* protect tree, count, list */ |
621 | unsigned int truncate_count; /* Cover race condition with truncate */ | 621 | unsigned int truncate_count; /* Cover race condition with truncate */ |
622 | unsigned long nrpages; /* number of total pages */ | 622 | unsigned long nrpages; /* number of total pages */ |
623 | pgoff_t writeback_index;/* writeback starts here */ | 623 | pgoff_t writeback_index;/* writeback starts here */ |
624 | const struct address_space_operations *a_ops; /* methods */ | 624 | const struct address_space_operations *a_ops; /* methods */ |
625 | unsigned long flags; /* error bits/gfp mask */ | 625 | unsigned long flags; /* error bits/gfp mask */ |
626 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ | 626 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ |
627 | spinlock_t private_lock; /* for use by the address_space */ | 627 | spinlock_t private_lock; /* for use by the address_space */ |
628 | struct list_head private_list; /* ditto */ | 628 | struct list_head private_list; /* ditto */ |
629 | struct address_space *assoc_mapping; /* ditto */ | 629 | struct address_space *assoc_mapping; /* ditto */ |
630 | } __attribute__((aligned(sizeof(long)))); | 630 | } __attribute__((aligned(sizeof(long)))); |
631 | /* | 631 | /* |
632 | * On most architectures that alignment is already the case; but | 632 | * On most architectures that alignment is already the case; but |
633 | * must be enforced here for CRIS, to let the least signficant bit | 633 | * must be enforced here for CRIS, to let the least signficant bit |
634 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. | 634 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. |
635 | */ | 635 | */ |
636 | 636 | ||
637 | struct block_device { | 637 | struct block_device { |
638 | dev_t bd_dev; /* not a kdev_t - it's a search key */ | 638 | dev_t bd_dev; /* not a kdev_t - it's a search key */ |
639 | struct inode * bd_inode; /* will die */ | 639 | struct inode * bd_inode; /* will die */ |
640 | struct super_block * bd_super; | 640 | struct super_block * bd_super; |
641 | int bd_openers; | 641 | int bd_openers; |
642 | struct mutex bd_mutex; /* open/close mutex */ | 642 | struct mutex bd_mutex; /* open/close mutex */ |
643 | struct semaphore bd_mount_sem; | 643 | struct semaphore bd_mount_sem; |
644 | struct list_head bd_inodes; | 644 | struct list_head bd_inodes; |
645 | void * bd_holder; | 645 | void * bd_holder; |
646 | int bd_holders; | 646 | int bd_holders; |
647 | #ifdef CONFIG_SYSFS | 647 | #ifdef CONFIG_SYSFS |
648 | struct list_head bd_holder_list; | 648 | struct list_head bd_holder_list; |
649 | #endif | 649 | #endif |
650 | struct block_device * bd_contains; | 650 | struct block_device * bd_contains; |
651 | unsigned bd_block_size; | 651 | unsigned bd_block_size; |
652 | struct hd_struct * bd_part; | 652 | struct hd_struct * bd_part; |
653 | /* number of times partitions within this device have been opened. */ | 653 | /* number of times partitions within this device have been opened. */ |
654 | unsigned bd_part_count; | 654 | unsigned bd_part_count; |
655 | int bd_invalidated; | 655 | int bd_invalidated; |
656 | struct gendisk * bd_disk; | 656 | struct gendisk * bd_disk; |
657 | struct list_head bd_list; | 657 | struct list_head bd_list; |
658 | struct backing_dev_info *bd_inode_backing_dev_info; | 658 | struct backing_dev_info *bd_inode_backing_dev_info; |
659 | /* | 659 | /* |
660 | * Private data. You must have bd_claim'ed the block_device | 660 | * Private data. You must have bd_claim'ed the block_device |
661 | * to use this. NOTE: bd_claim allows an owner to claim | 661 | * to use this. NOTE: bd_claim allows an owner to claim |
662 | * the same device multiple times, the owner must take special | 662 | * the same device multiple times, the owner must take special |
663 | * care to not mess up bd_private for that case. | 663 | * care to not mess up bd_private for that case. |
664 | */ | 664 | */ |
665 | unsigned long bd_private; | 665 | unsigned long bd_private; |
666 | 666 | ||
667 | /* The counter of freeze processes */ | 667 | /* The counter of freeze processes */ |
668 | int bd_fsfreeze_count; | 668 | int bd_fsfreeze_count; |
669 | /* Mutex for freeze */ | 669 | /* Mutex for freeze */ |
670 | struct mutex bd_fsfreeze_mutex; | 670 | struct mutex bd_fsfreeze_mutex; |
671 | }; | 671 | }; |
672 | 672 | ||
673 | /* | 673 | /* |
674 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache | 674 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache |
675 | * radix trees | 675 | * radix trees |
676 | */ | 676 | */ |
677 | #define PAGECACHE_TAG_DIRTY 0 | 677 | #define PAGECACHE_TAG_DIRTY 0 |
678 | #define PAGECACHE_TAG_WRITEBACK 1 | 678 | #define PAGECACHE_TAG_WRITEBACK 1 |
679 | 679 | ||
680 | int mapping_tagged(struct address_space *mapping, int tag); | 680 | int mapping_tagged(struct address_space *mapping, int tag); |
681 | 681 | ||
682 | /* | 682 | /* |
683 | * Might pages of this file be mapped into userspace? | 683 | * Might pages of this file be mapped into userspace? |
684 | */ | 684 | */ |
685 | static inline int mapping_mapped(struct address_space *mapping) | 685 | static inline int mapping_mapped(struct address_space *mapping) |
686 | { | 686 | { |
687 | return !prio_tree_empty(&mapping->i_mmap) || | 687 | return !prio_tree_empty(&mapping->i_mmap) || |
688 | !list_empty(&mapping->i_mmap_nonlinear); | 688 | !list_empty(&mapping->i_mmap_nonlinear); |
689 | } | 689 | } |
690 | 690 | ||
691 | /* | 691 | /* |
692 | * Might pages of this file have been modified in userspace? | 692 | * Might pages of this file have been modified in userspace? |
693 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff | 693 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff |
694 | * marks vma as VM_SHARED if it is shared, and the file was opened for | 694 | * marks vma as VM_SHARED if it is shared, and the file was opened for |
695 | * writing i.e. vma may be mprotected writable even if now readonly. | 695 | * writing i.e. vma may be mprotected writable even if now readonly. |
696 | */ | 696 | */ |
697 | static inline int mapping_writably_mapped(struct address_space *mapping) | 697 | static inline int mapping_writably_mapped(struct address_space *mapping) |
698 | { | 698 | { |
699 | return mapping->i_mmap_writable != 0; | 699 | return mapping->i_mmap_writable != 0; |
700 | } | 700 | } |
701 | 701 | ||
702 | /* | 702 | /* |
703 | * Use sequence counter to get consistent i_size on 32-bit processors. | 703 | * Use sequence counter to get consistent i_size on 32-bit processors. |
704 | */ | 704 | */ |
705 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 705 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
706 | #include <linux/seqlock.h> | 706 | #include <linux/seqlock.h> |
707 | #define __NEED_I_SIZE_ORDERED | 707 | #define __NEED_I_SIZE_ORDERED |
708 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) | 708 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) |
709 | #else | 709 | #else |
710 | #define i_size_ordered_init(inode) do { } while (0) | 710 | #define i_size_ordered_init(inode) do { } while (0) |
711 | #endif | 711 | #endif |
712 | 712 | ||
713 | struct inode { | 713 | struct inode { |
714 | struct hlist_node i_hash; | 714 | struct hlist_node i_hash; |
715 | struct list_head i_list; | 715 | struct list_head i_list; |
716 | struct list_head i_sb_list; | 716 | struct list_head i_sb_list; |
717 | struct list_head i_dentry; | 717 | struct list_head i_dentry; |
718 | unsigned long i_ino; | 718 | unsigned long i_ino; |
719 | atomic_t i_count; | 719 | atomic_t i_count; |
720 | unsigned int i_nlink; | 720 | unsigned int i_nlink; |
721 | uid_t i_uid; | 721 | uid_t i_uid; |
722 | gid_t i_gid; | 722 | gid_t i_gid; |
723 | dev_t i_rdev; | 723 | dev_t i_rdev; |
724 | u64 i_version; | 724 | u64 i_version; |
725 | loff_t i_size; | 725 | loff_t i_size; |
726 | #ifdef __NEED_I_SIZE_ORDERED | 726 | #ifdef __NEED_I_SIZE_ORDERED |
727 | seqcount_t i_size_seqcount; | 727 | seqcount_t i_size_seqcount; |
728 | #endif | 728 | #endif |
729 | struct timespec i_atime; | 729 | struct timespec i_atime; |
730 | struct timespec i_mtime; | 730 | struct timespec i_mtime; |
731 | struct timespec i_ctime; | 731 | struct timespec i_ctime; |
732 | unsigned int i_blkbits; | 732 | unsigned int i_blkbits; |
733 | blkcnt_t i_blocks; | 733 | blkcnt_t i_blocks; |
734 | unsigned short i_bytes; | 734 | unsigned short i_bytes; |
735 | umode_t i_mode; | 735 | umode_t i_mode; |
736 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ | 736 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ |
737 | struct mutex i_mutex; | 737 | struct mutex i_mutex; |
738 | struct rw_semaphore i_alloc_sem; | 738 | struct rw_semaphore i_alloc_sem; |
739 | const struct inode_operations *i_op; | 739 | const struct inode_operations *i_op; |
740 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ | 740 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ |
741 | struct super_block *i_sb; | 741 | struct super_block *i_sb; |
742 | struct file_lock *i_flock; | 742 | struct file_lock *i_flock; |
743 | struct address_space *i_mapping; | 743 | struct address_space *i_mapping; |
744 | struct address_space i_data; | 744 | struct address_space i_data; |
745 | #ifdef CONFIG_QUOTA | 745 | #ifdef CONFIG_QUOTA |
746 | struct dquot *i_dquot[MAXQUOTAS]; | 746 | struct dquot *i_dquot[MAXQUOTAS]; |
747 | #endif | 747 | #endif |
748 | struct list_head i_devices; | 748 | struct list_head i_devices; |
749 | union { | 749 | union { |
750 | struct pipe_inode_info *i_pipe; | 750 | struct pipe_inode_info *i_pipe; |
751 | struct block_device *i_bdev; | 751 | struct block_device *i_bdev; |
752 | struct cdev *i_cdev; | 752 | struct cdev *i_cdev; |
753 | }; | 753 | }; |
754 | int i_cindex; | 754 | int i_cindex; |
755 | 755 | ||
756 | __u32 i_generation; | 756 | __u32 i_generation; |
757 | 757 | ||
758 | #ifdef CONFIG_FSNOTIFY | 758 | #ifdef CONFIG_FSNOTIFY |
759 | __u32 i_fsnotify_mask; /* all events this inode cares about */ | 759 | __u32 i_fsnotify_mask; /* all events this inode cares about */ |
760 | struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ | 760 | struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ |
761 | #endif | 761 | #endif |
762 | 762 | ||
763 | #ifdef CONFIG_INOTIFY | 763 | #ifdef CONFIG_INOTIFY |
764 | struct list_head inotify_watches; /* watches on this inode */ | 764 | struct list_head inotify_watches; /* watches on this inode */ |
765 | struct mutex inotify_mutex; /* protects the watches list */ | 765 | struct mutex inotify_mutex; /* protects the watches list */ |
766 | #endif | 766 | #endif |
767 | 767 | ||
768 | unsigned long i_state; | 768 | unsigned long i_state; |
769 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 769 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
770 | 770 | ||
771 | unsigned int i_flags; | 771 | unsigned int i_flags; |
772 | 772 | ||
773 | atomic_t i_writecount; | 773 | atomic_t i_writecount; |
774 | #ifdef CONFIG_SECURITY | 774 | #ifdef CONFIG_SECURITY |
775 | void *i_security; | 775 | void *i_security; |
776 | #endif | 776 | #endif |
777 | void *i_private; /* fs or device private pointer */ | 777 | void *i_private; /* fs or device private pointer */ |
778 | }; | 778 | }; |
779 | 779 | ||
780 | /* | 780 | /* |
781 | * inode->i_mutex nesting subclasses for the lock validator: | 781 | * inode->i_mutex nesting subclasses for the lock validator: |
782 | * | 782 | * |
783 | * 0: the object of the current VFS operation | 783 | * 0: the object of the current VFS operation |
784 | * 1: parent | 784 | * 1: parent |
785 | * 2: child/target | 785 | * 2: child/target |
786 | * 3: quota file | 786 | * 3: quota file |
787 | * | 787 | * |
788 | * The locking order between these classes is | 788 | * The locking order between these classes is |
789 | * parent -> child -> normal -> xattr -> quota | 789 | * parent -> child -> normal -> xattr -> quota |
790 | */ | 790 | */ |
791 | enum inode_i_mutex_lock_class | 791 | enum inode_i_mutex_lock_class |
792 | { | 792 | { |
793 | I_MUTEX_NORMAL, | 793 | I_MUTEX_NORMAL, |
794 | I_MUTEX_PARENT, | 794 | I_MUTEX_PARENT, |
795 | I_MUTEX_CHILD, | 795 | I_MUTEX_CHILD, |
796 | I_MUTEX_XATTR, | 796 | I_MUTEX_XATTR, |
797 | I_MUTEX_QUOTA | 797 | I_MUTEX_QUOTA |
798 | }; | 798 | }; |
799 | 799 | ||
800 | /* | 800 | /* |
801 | * NOTE: in a 32bit arch with a preemptable kernel and | 801 | * NOTE: in a 32bit arch with a preemptable kernel and |
802 | * an UP compile the i_size_read/write must be atomic | 802 | * an UP compile the i_size_read/write must be atomic |
803 | * with respect to the local cpu (unlike with preempt disabled), | 803 | * with respect to the local cpu (unlike with preempt disabled), |
804 | * but they don't need to be atomic with respect to other cpus like in | 804 | * but they don't need to be atomic with respect to other cpus like in |
805 | * true SMP (so they need either to either locally disable irq around | 805 | * true SMP (so they need either to either locally disable irq around |
806 | * the read or for example on x86 they can be still implemented as a | 806 | * the read or for example on x86 they can be still implemented as a |
807 | * cmpxchg8b without the need of the lock prefix). For SMP compiles | 807 | * cmpxchg8b without the need of the lock prefix). For SMP compiles |
808 | * and 64bit archs it makes no difference if preempt is enabled or not. | 808 | * and 64bit archs it makes no difference if preempt is enabled or not. |
809 | */ | 809 | */ |
810 | static inline loff_t i_size_read(const struct inode *inode) | 810 | static inline loff_t i_size_read(const struct inode *inode) |
811 | { | 811 | { |
812 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 812 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
813 | loff_t i_size; | 813 | loff_t i_size; |
814 | unsigned int seq; | 814 | unsigned int seq; |
815 | 815 | ||
816 | do { | 816 | do { |
817 | seq = read_seqcount_begin(&inode->i_size_seqcount); | 817 | seq = read_seqcount_begin(&inode->i_size_seqcount); |
818 | i_size = inode->i_size; | 818 | i_size = inode->i_size; |
819 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); | 819 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); |
820 | return i_size; | 820 | return i_size; |
821 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 821 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
822 | loff_t i_size; | 822 | loff_t i_size; |
823 | 823 | ||
824 | preempt_disable(); | 824 | preempt_disable(); |
825 | i_size = inode->i_size; | 825 | i_size = inode->i_size; |
826 | preempt_enable(); | 826 | preempt_enable(); |
827 | return i_size; | 827 | return i_size; |
828 | #else | 828 | #else |
829 | return inode->i_size; | 829 | return inode->i_size; |
830 | #endif | 830 | #endif |
831 | } | 831 | } |
832 | 832 | ||
833 | /* | 833 | /* |
834 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it | 834 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it |
835 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount | 835 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount |
836 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. | 836 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. |
837 | */ | 837 | */ |
838 | static inline void i_size_write(struct inode *inode, loff_t i_size) | 838 | static inline void i_size_write(struct inode *inode, loff_t i_size) |
839 | { | 839 | { |
840 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 840 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
841 | write_seqcount_begin(&inode->i_size_seqcount); | 841 | write_seqcount_begin(&inode->i_size_seqcount); |
842 | inode->i_size = i_size; | 842 | inode->i_size = i_size; |
843 | write_seqcount_end(&inode->i_size_seqcount); | 843 | write_seqcount_end(&inode->i_size_seqcount); |
844 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 844 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
845 | preempt_disable(); | 845 | preempt_disable(); |
846 | inode->i_size = i_size; | 846 | inode->i_size = i_size; |
847 | preempt_enable(); | 847 | preempt_enable(); |
848 | #else | 848 | #else |
849 | inode->i_size = i_size; | 849 | inode->i_size = i_size; |
850 | #endif | 850 | #endif |
851 | } | 851 | } |
852 | 852 | ||
853 | static inline unsigned iminor(const struct inode *inode) | 853 | static inline unsigned iminor(const struct inode *inode) |
854 | { | 854 | { |
855 | return MINOR(inode->i_rdev); | 855 | return MINOR(inode->i_rdev); |
856 | } | 856 | } |
857 | 857 | ||
858 | static inline unsigned imajor(const struct inode *inode) | 858 | static inline unsigned imajor(const struct inode *inode) |
859 | { | 859 | { |
860 | return MAJOR(inode->i_rdev); | 860 | return MAJOR(inode->i_rdev); |
861 | } | 861 | } |
862 | 862 | ||
863 | extern struct block_device *I_BDEV(struct inode *inode); | 863 | extern struct block_device *I_BDEV(struct inode *inode); |
864 | 864 | ||
865 | struct fown_struct { | 865 | struct fown_struct { |
866 | rwlock_t lock; /* protects pid, uid, euid fields */ | 866 | rwlock_t lock; /* protects pid, uid, euid fields */ |
867 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ | 867 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ |
868 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ | 868 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ |
869 | uid_t uid, euid; /* uid/euid of process setting the owner */ | 869 | uid_t uid, euid; /* uid/euid of process setting the owner */ |
870 | int signum; /* posix.1b rt signal to be delivered on IO */ | 870 | int signum; /* posix.1b rt signal to be delivered on IO */ |
871 | }; | 871 | }; |
872 | 872 | ||
873 | /* | 873 | /* |
874 | * Track a single file's readahead state | 874 | * Track a single file's readahead state |
875 | */ | 875 | */ |
876 | struct file_ra_state { | 876 | struct file_ra_state { |
877 | pgoff_t start; /* where readahead started */ | 877 | pgoff_t start; /* where readahead started */ |
878 | unsigned int size; /* # of readahead pages */ | 878 | unsigned int size; /* # of readahead pages */ |
879 | unsigned int async_size; /* do asynchronous readahead when | 879 | unsigned int async_size; /* do asynchronous readahead when |
880 | there are only # of pages ahead */ | 880 | there are only # of pages ahead */ |
881 | 881 | ||
882 | unsigned int ra_pages; /* Maximum readahead window */ | 882 | unsigned int ra_pages; /* Maximum readahead window */ |
883 | int mmap_miss; /* Cache miss stat for mmap accesses */ | 883 | int mmap_miss; /* Cache miss stat for mmap accesses */ |
884 | loff_t prev_pos; /* Cache last read() position */ | 884 | loff_t prev_pos; /* Cache last read() position */ |
885 | }; | 885 | }; |
886 | 886 | ||
887 | /* | 887 | /* |
888 | * Check if @index falls in the readahead windows. | 888 | * Check if @index falls in the readahead windows. |
889 | */ | 889 | */ |
890 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) | 890 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) |
891 | { | 891 | { |
892 | return (index >= ra->start && | 892 | return (index >= ra->start && |
893 | index < ra->start + ra->size); | 893 | index < ra->start + ra->size); |
894 | } | 894 | } |
895 | 895 | ||
896 | #define FILE_MNT_WRITE_TAKEN 1 | 896 | #define FILE_MNT_WRITE_TAKEN 1 |
897 | #define FILE_MNT_WRITE_RELEASED 2 | 897 | #define FILE_MNT_WRITE_RELEASED 2 |
898 | 898 | ||
899 | struct file { | 899 | struct file { |
900 | /* | 900 | /* |
901 | * fu_list becomes invalid after file_free is called and queued via | 901 | * fu_list becomes invalid after file_free is called and queued via |
902 | * fu_rcuhead for RCU freeing | 902 | * fu_rcuhead for RCU freeing |
903 | */ | 903 | */ |
904 | union { | 904 | union { |
905 | struct list_head fu_list; | 905 | struct list_head fu_list; |
906 | struct rcu_head fu_rcuhead; | 906 | struct rcu_head fu_rcuhead; |
907 | } f_u; | 907 | } f_u; |
908 | struct path f_path; | 908 | struct path f_path; |
909 | #define f_dentry f_path.dentry | 909 | #define f_dentry f_path.dentry |
910 | #define f_vfsmnt f_path.mnt | 910 | #define f_vfsmnt f_path.mnt |
911 | const struct file_operations *f_op; | 911 | const struct file_operations *f_op; |
912 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ | 912 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ |
913 | atomic_long_t f_count; | 913 | atomic_long_t f_count; |
914 | unsigned int f_flags; | 914 | unsigned int f_flags; |
915 | fmode_t f_mode; | 915 | fmode_t f_mode; |
916 | loff_t f_pos; | 916 | loff_t f_pos; |
917 | struct fown_struct f_owner; | 917 | struct fown_struct f_owner; |
918 | const struct cred *f_cred; | 918 | const struct cred *f_cred; |
919 | struct file_ra_state f_ra; | 919 | struct file_ra_state f_ra; |
920 | 920 | ||
921 | u64 f_version; | 921 | u64 f_version; |
922 | #ifdef CONFIG_SECURITY | 922 | #ifdef CONFIG_SECURITY |
923 | void *f_security; | 923 | void *f_security; |
924 | #endif | 924 | #endif |
925 | /* needed for tty driver, and maybe others */ | 925 | /* needed for tty driver, and maybe others */ |
926 | void *private_data; | 926 | void *private_data; |
927 | 927 | ||
928 | #ifdef CONFIG_EPOLL | 928 | #ifdef CONFIG_EPOLL |
929 | /* Used by fs/eventpoll.c to link all the hooks to this file */ | 929 | /* Used by fs/eventpoll.c to link all the hooks to this file */ |
930 | struct list_head f_ep_links; | 930 | struct list_head f_ep_links; |
931 | #endif /* #ifdef CONFIG_EPOLL */ | 931 | #endif /* #ifdef CONFIG_EPOLL */ |
932 | struct address_space *f_mapping; | 932 | struct address_space *f_mapping; |
933 | #ifdef CONFIG_DEBUG_WRITECOUNT | 933 | #ifdef CONFIG_DEBUG_WRITECOUNT |
934 | unsigned long f_mnt_write_state; | 934 | unsigned long f_mnt_write_state; |
935 | #endif | 935 | #endif |
936 | }; | 936 | }; |
937 | extern spinlock_t files_lock; | 937 | extern spinlock_t files_lock; |
938 | #define file_list_lock() spin_lock(&files_lock); | 938 | #define file_list_lock() spin_lock(&files_lock); |
939 | #define file_list_unlock() spin_unlock(&files_lock); | 939 | #define file_list_unlock() spin_unlock(&files_lock); |
940 | 940 | ||
941 | #define get_file(x) atomic_long_inc(&(x)->f_count) | 941 | #define get_file(x) atomic_long_inc(&(x)->f_count) |
942 | #define file_count(x) atomic_long_read(&(x)->f_count) | 942 | #define file_count(x) atomic_long_read(&(x)->f_count) |
943 | 943 | ||
944 | #ifdef CONFIG_DEBUG_WRITECOUNT | 944 | #ifdef CONFIG_DEBUG_WRITECOUNT |
945 | static inline void file_take_write(struct file *f) | 945 | static inline void file_take_write(struct file *f) |
946 | { | 946 | { |
947 | WARN_ON(f->f_mnt_write_state != 0); | 947 | WARN_ON(f->f_mnt_write_state != 0); |
948 | f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; | 948 | f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; |
949 | } | 949 | } |
950 | static inline void file_release_write(struct file *f) | 950 | static inline void file_release_write(struct file *f) |
951 | { | 951 | { |
952 | f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; | 952 | f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; |
953 | } | 953 | } |
954 | static inline void file_reset_write(struct file *f) | 954 | static inline void file_reset_write(struct file *f) |
955 | { | 955 | { |
956 | f->f_mnt_write_state = 0; | 956 | f->f_mnt_write_state = 0; |
957 | } | 957 | } |
958 | static inline void file_check_state(struct file *f) | 958 | static inline void file_check_state(struct file *f) |
959 | { | 959 | { |
960 | /* | 960 | /* |
961 | * At this point, either both or neither of these bits | 961 | * At this point, either both or neither of these bits |
962 | * should be set. | 962 | * should be set. |
963 | */ | 963 | */ |
964 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); | 964 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); |
965 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); | 965 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); |
966 | } | 966 | } |
967 | static inline int file_check_writeable(struct file *f) | 967 | static inline int file_check_writeable(struct file *f) |
968 | { | 968 | { |
969 | if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) | 969 | if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) |
970 | return 0; | 970 | return 0; |
971 | printk(KERN_WARNING "writeable file with no " | 971 | printk(KERN_WARNING "writeable file with no " |
972 | "mnt_want_write()\n"); | 972 | "mnt_want_write()\n"); |
973 | WARN_ON(1); | 973 | WARN_ON(1); |
974 | return -EINVAL; | 974 | return -EINVAL; |
975 | } | 975 | } |
976 | #else /* !CONFIG_DEBUG_WRITECOUNT */ | 976 | #else /* !CONFIG_DEBUG_WRITECOUNT */ |
977 | static inline void file_take_write(struct file *filp) {} | 977 | static inline void file_take_write(struct file *filp) {} |
978 | static inline void file_release_write(struct file *filp) {} | 978 | static inline void file_release_write(struct file *filp) {} |
979 | static inline void file_reset_write(struct file *filp) {} | 979 | static inline void file_reset_write(struct file *filp) {} |
980 | static inline void file_check_state(struct file *filp) {} | 980 | static inline void file_check_state(struct file *filp) {} |
981 | static inline int file_check_writeable(struct file *filp) | 981 | static inline int file_check_writeable(struct file *filp) |
982 | { | 982 | { |
983 | return 0; | 983 | return 0; |
984 | } | 984 | } |
985 | #endif /* CONFIG_DEBUG_WRITECOUNT */ | 985 | #endif /* CONFIG_DEBUG_WRITECOUNT */ |
986 | 986 | ||
987 | #define MAX_NON_LFS ((1UL<<31) - 1) | 987 | #define MAX_NON_LFS ((1UL<<31) - 1) |
988 | 988 | ||
989 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 989 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
990 | limits, otherwise bad things can happen in VM. */ | 990 | limits, otherwise bad things can happen in VM. */ |
991 | #if BITS_PER_LONG==32 | 991 | #if BITS_PER_LONG==32 |
992 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 992 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
993 | #elif BITS_PER_LONG==64 | 993 | #elif BITS_PER_LONG==64 |
994 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL | 994 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL |
995 | #endif | 995 | #endif |
996 | 996 | ||
997 | #define FL_POSIX 1 | 997 | #define FL_POSIX 1 |
998 | #define FL_FLOCK 2 | 998 | #define FL_FLOCK 2 |
999 | #define FL_ACCESS 8 /* not trying to lock, just looking */ | 999 | #define FL_ACCESS 8 /* not trying to lock, just looking */ |
1000 | #define FL_EXISTS 16 /* when unlocking, test for existence */ | 1000 | #define FL_EXISTS 16 /* when unlocking, test for existence */ |
1001 | #define FL_LEASE 32 /* lease held on this file */ | 1001 | #define FL_LEASE 32 /* lease held on this file */ |
1002 | #define FL_CLOSE 64 /* unlock on close */ | 1002 | #define FL_CLOSE 64 /* unlock on close */ |
1003 | #define FL_SLEEP 128 /* A blocking lock */ | 1003 | #define FL_SLEEP 128 /* A blocking lock */ |
1004 | 1004 | ||
1005 | /* | 1005 | /* |
1006 | * Special return value from posix_lock_file() and vfs_lock_file() for | 1006 | * Special return value from posix_lock_file() and vfs_lock_file() for |
1007 | * asynchronous locking. | 1007 | * asynchronous locking. |
1008 | */ | 1008 | */ |
1009 | #define FILE_LOCK_DEFERRED 1 | 1009 | #define FILE_LOCK_DEFERRED 1 |
1010 | 1010 | ||
1011 | /* | 1011 | /* |
1012 | * The POSIX file lock owner is determined by | 1012 | * The POSIX file lock owner is determined by |
1013 | * the "struct files_struct" in the thread group | 1013 | * the "struct files_struct" in the thread group |
1014 | * (or NULL for no owner - BSD locks). | 1014 | * (or NULL for no owner - BSD locks). |
1015 | * | 1015 | * |
1016 | * Lockd stuffs a "host" pointer into this. | 1016 | * Lockd stuffs a "host" pointer into this. |
1017 | */ | 1017 | */ |
1018 | typedef struct files_struct *fl_owner_t; | 1018 | typedef struct files_struct *fl_owner_t; |
1019 | 1019 | ||
1020 | struct file_lock_operations { | 1020 | struct file_lock_operations { |
1021 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 1021 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
1022 | void (*fl_release_private)(struct file_lock *); | 1022 | void (*fl_release_private)(struct file_lock *); |
1023 | }; | 1023 | }; |
1024 | 1024 | ||
1025 | struct lock_manager_operations { | 1025 | struct lock_manager_operations { |
1026 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | 1026 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); |
1027 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | 1027 | void (*fl_notify)(struct file_lock *); /* unblock callback */ |
1028 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); | 1028 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); |
1029 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 1029 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
1030 | void (*fl_release_private)(struct file_lock *); | 1030 | void (*fl_release_private)(struct file_lock *); |
1031 | void (*fl_break)(struct file_lock *); | 1031 | void (*fl_break)(struct file_lock *); |
1032 | int (*fl_mylease)(struct file_lock *, struct file_lock *); | 1032 | int (*fl_mylease)(struct file_lock *, struct file_lock *); |
1033 | int (*fl_change)(struct file_lock **, int); | 1033 | int (*fl_change)(struct file_lock **, int); |
1034 | }; | 1034 | }; |
1035 | 1035 | ||
1036 | struct lock_manager { | 1036 | struct lock_manager { |
1037 | struct list_head list; | 1037 | struct list_head list; |
1038 | }; | 1038 | }; |
1039 | 1039 | ||
1040 | void locks_start_grace(struct lock_manager *); | 1040 | void locks_start_grace(struct lock_manager *); |
1041 | void locks_end_grace(struct lock_manager *); | 1041 | void locks_end_grace(struct lock_manager *); |
1042 | int locks_in_grace(void); | 1042 | int locks_in_grace(void); |
1043 | 1043 | ||
1044 | /* that will die - we need it for nfs_lock_info */ | 1044 | /* that will die - we need it for nfs_lock_info */ |
1045 | #include <linux/nfs_fs_i.h> | 1045 | #include <linux/nfs_fs_i.h> |
1046 | 1046 | ||
1047 | struct file_lock { | 1047 | struct file_lock { |
1048 | struct file_lock *fl_next; /* singly linked list for this inode */ | 1048 | struct file_lock *fl_next; /* singly linked list for this inode */ |
1049 | struct list_head fl_link; /* doubly linked list of all locks */ | 1049 | struct list_head fl_link; /* doubly linked list of all locks */ |
1050 | struct list_head fl_block; /* circular list of blocked processes */ | 1050 | struct list_head fl_block; /* circular list of blocked processes */ |
1051 | fl_owner_t fl_owner; | 1051 | fl_owner_t fl_owner; |
1052 | unsigned char fl_flags; | 1052 | unsigned char fl_flags; |
1053 | unsigned char fl_type; | 1053 | unsigned char fl_type; |
1054 | unsigned int fl_pid; | 1054 | unsigned int fl_pid; |
1055 | struct pid *fl_nspid; | 1055 | struct pid *fl_nspid; |
1056 | wait_queue_head_t fl_wait; | 1056 | wait_queue_head_t fl_wait; |
1057 | struct file *fl_file; | 1057 | struct file *fl_file; |
1058 | loff_t fl_start; | 1058 | loff_t fl_start; |
1059 | loff_t fl_end; | 1059 | loff_t fl_end; |
1060 | 1060 | ||
1061 | struct fasync_struct * fl_fasync; /* for lease break notifications */ | 1061 | struct fasync_struct * fl_fasync; /* for lease break notifications */ |
1062 | unsigned long fl_break_time; /* for nonblocking lease breaks */ | 1062 | unsigned long fl_break_time; /* for nonblocking lease breaks */ |
1063 | 1063 | ||
1064 | struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ | 1064 | struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ |
1065 | struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ | 1065 | struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ |
1066 | union { | 1066 | union { |
1067 | struct nfs_lock_info nfs_fl; | 1067 | struct nfs_lock_info nfs_fl; |
1068 | struct nfs4_lock_info nfs4_fl; | 1068 | struct nfs4_lock_info nfs4_fl; |
1069 | struct { | 1069 | struct { |
1070 | struct list_head link; /* link in AFS vnode's pending_locks list */ | 1070 | struct list_head link; /* link in AFS vnode's pending_locks list */ |
1071 | int state; /* state of grant or error if -ve */ | 1071 | int state; /* state of grant or error if -ve */ |
1072 | } afs; | 1072 | } afs; |
1073 | } fl_u; | 1073 | } fl_u; |
1074 | }; | 1074 | }; |
1075 | 1075 | ||
1076 | /* The following constant reflects the upper bound of the file/locking space */ | 1076 | /* The following constant reflects the upper bound of the file/locking space */ |
1077 | #ifndef OFFSET_MAX | 1077 | #ifndef OFFSET_MAX |
1078 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) | 1078 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) |
1079 | #define OFFSET_MAX INT_LIMIT(loff_t) | 1079 | #define OFFSET_MAX INT_LIMIT(loff_t) |
1080 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) | 1080 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) |
1081 | #endif | 1081 | #endif |
1082 | 1082 | ||
1083 | #include <linux/fcntl.h> | 1083 | #include <linux/fcntl.h> |
1084 | 1084 | ||
1085 | extern void send_sigio(struct fown_struct *fown, int fd, int band); | 1085 | extern void send_sigio(struct fown_struct *fown, int fd, int band); |
1086 | 1086 | ||
1087 | /* fs/sync.c */ | 1087 | /* fs/sync.c */ |
1088 | extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, | 1088 | extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, |
1089 | loff_t endbyte, unsigned int flags); | 1089 | loff_t endbyte, unsigned int flags); |
1090 | 1090 | ||
1091 | #ifdef CONFIG_FILE_LOCKING | 1091 | #ifdef CONFIG_FILE_LOCKING |
1092 | extern int fcntl_getlk(struct file *, struct flock __user *); | 1092 | extern int fcntl_getlk(struct file *, struct flock __user *); |
1093 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, | 1093 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, |
1094 | struct flock __user *); | 1094 | struct flock __user *); |
1095 | 1095 | ||
1096 | #if BITS_PER_LONG == 32 | 1096 | #if BITS_PER_LONG == 32 |
1097 | extern int fcntl_getlk64(struct file *, struct flock64 __user *); | 1097 | extern int fcntl_getlk64(struct file *, struct flock64 __user *); |
1098 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, | 1098 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, |
1099 | struct flock64 __user *); | 1099 | struct flock64 __user *); |
1100 | #endif | 1100 | #endif |
1101 | 1101 | ||
1102 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); | 1102 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); |
1103 | extern int fcntl_getlease(struct file *filp); | 1103 | extern int fcntl_getlease(struct file *filp); |
1104 | 1104 | ||
1105 | /* fs/locks.c */ | 1105 | /* fs/locks.c */ |
1106 | extern void locks_init_lock(struct file_lock *); | 1106 | extern void locks_init_lock(struct file_lock *); |
1107 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); | 1107 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); |
1108 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); | 1108 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); |
1109 | extern void locks_remove_posix(struct file *, fl_owner_t); | 1109 | extern void locks_remove_posix(struct file *, fl_owner_t); |
1110 | extern void locks_remove_flock(struct file *); | 1110 | extern void locks_remove_flock(struct file *); |
1111 | extern void posix_test_lock(struct file *, struct file_lock *); | 1111 | extern void posix_test_lock(struct file *, struct file_lock *); |
1112 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); | 1112 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); |
1113 | extern int posix_lock_file_wait(struct file *, struct file_lock *); | 1113 | extern int posix_lock_file_wait(struct file *, struct file_lock *); |
1114 | extern int posix_unblock_lock(struct file *, struct file_lock *); | 1114 | extern int posix_unblock_lock(struct file *, struct file_lock *); |
1115 | extern int vfs_test_lock(struct file *, struct file_lock *); | 1115 | extern int vfs_test_lock(struct file *, struct file_lock *); |
1116 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); | 1116 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); |
1117 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); | 1117 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); |
1118 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); | 1118 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); |
1119 | extern int __break_lease(struct inode *inode, unsigned int flags); | 1119 | extern int __break_lease(struct inode *inode, unsigned int flags); |
1120 | extern void lease_get_mtime(struct inode *, struct timespec *time); | 1120 | extern void lease_get_mtime(struct inode *, struct timespec *time); |
1121 | extern int generic_setlease(struct file *, long, struct file_lock **); | 1121 | extern int generic_setlease(struct file *, long, struct file_lock **); |
1122 | extern int vfs_setlease(struct file *, long, struct file_lock **); | 1122 | extern int vfs_setlease(struct file *, long, struct file_lock **); |
1123 | extern int lease_modify(struct file_lock **, int); | 1123 | extern int lease_modify(struct file_lock **, int); |
1124 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); | 1124 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); |
1125 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); | 1125 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); |
1126 | #else /* !CONFIG_FILE_LOCKING */ | 1126 | #else /* !CONFIG_FILE_LOCKING */ |
1127 | static inline int fcntl_getlk(struct file *file, struct flock __user *user) | 1127 | static inline int fcntl_getlk(struct file *file, struct flock __user *user) |
1128 | { | 1128 | { |
1129 | return -EINVAL; | 1129 | return -EINVAL; |
1130 | } | 1130 | } |
1131 | 1131 | ||
1132 | static inline int fcntl_setlk(unsigned int fd, struct file *file, | 1132 | static inline int fcntl_setlk(unsigned int fd, struct file *file, |
1133 | unsigned int cmd, struct flock __user *user) | 1133 | unsigned int cmd, struct flock __user *user) |
1134 | { | 1134 | { |
1135 | return -EACCES; | 1135 | return -EACCES; |
1136 | } | 1136 | } |
1137 | 1137 | ||
1138 | #if BITS_PER_LONG == 32 | 1138 | #if BITS_PER_LONG == 32 |
1139 | static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) | 1139 | static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) |
1140 | { | 1140 | { |
1141 | return -EINVAL; | 1141 | return -EINVAL; |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, | 1144 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, |
1145 | unsigned int cmd, struct flock64 __user *user) | 1145 | unsigned int cmd, struct flock64 __user *user) |
1146 | { | 1146 | { |
1147 | return -EACCES; | 1147 | return -EACCES; |
1148 | } | 1148 | } |
1149 | #endif | 1149 | #endif |
1150 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1150 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1151 | { | 1151 | { |
1152 | return 0; | 1152 | return 0; |
1153 | } | 1153 | } |
1154 | 1154 | ||
1155 | static inline int fcntl_getlease(struct file *filp) | 1155 | static inline int fcntl_getlease(struct file *filp) |
1156 | { | 1156 | { |
1157 | return 0; | 1157 | return 0; |
1158 | } | 1158 | } |
1159 | 1159 | ||
1160 | static inline void locks_init_lock(struct file_lock *fl) | 1160 | static inline void locks_init_lock(struct file_lock *fl) |
1161 | { | 1161 | { |
1162 | return; | 1162 | return; |
1163 | } | 1163 | } |
1164 | 1164 | ||
1165 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1165 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1166 | { | 1166 | { |
1167 | return; | 1167 | return; |
1168 | } | 1168 | } |
1169 | 1169 | ||
1170 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1170 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1171 | { | 1171 | { |
1172 | return; | 1172 | return; |
1173 | } | 1173 | } |
1174 | 1174 | ||
1175 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) | 1175 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) |
1176 | { | 1176 | { |
1177 | return; | 1177 | return; |
1178 | } | 1178 | } |
1179 | 1179 | ||
1180 | static inline void locks_remove_flock(struct file *filp) | 1180 | static inline void locks_remove_flock(struct file *filp) |
1181 | { | 1181 | { |
1182 | return; | 1182 | return; |
1183 | } | 1183 | } |
1184 | 1184 | ||
1185 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) | 1185 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) |
1186 | { | 1186 | { |
1187 | return; | 1187 | return; |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, | 1190 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, |
1191 | struct file_lock *conflock) | 1191 | struct file_lock *conflock) |
1192 | { | 1192 | { |
1193 | return -ENOLCK; | 1193 | return -ENOLCK; |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) | 1196 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) |
1197 | { | 1197 | { |
1198 | return -ENOLCK; | 1198 | return -ENOLCK; |
1199 | } | 1199 | } |
1200 | 1200 | ||
1201 | static inline int posix_unblock_lock(struct file *filp, | 1201 | static inline int posix_unblock_lock(struct file *filp, |
1202 | struct file_lock *waiter) | 1202 | struct file_lock *waiter) |
1203 | { | 1203 | { |
1204 | return -ENOENT; | 1204 | return -ENOENT; |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) | 1207 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) |
1208 | { | 1208 | { |
1209 | return 0; | 1209 | return 0; |
1210 | } | 1210 | } |
1211 | 1211 | ||
1212 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, | 1212 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, |
1213 | struct file_lock *fl, struct file_lock *conf) | 1213 | struct file_lock *fl, struct file_lock *conf) |
1214 | { | 1214 | { |
1215 | return -ENOLCK; | 1215 | return -ENOLCK; |
1216 | } | 1216 | } |
1217 | 1217 | ||
1218 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 1218 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
1219 | { | 1219 | { |
1220 | return 0; | 1220 | return 0; |
1221 | } | 1221 | } |
1222 | 1222 | ||
1223 | static inline int flock_lock_file_wait(struct file *filp, | 1223 | static inline int flock_lock_file_wait(struct file *filp, |
1224 | struct file_lock *request) | 1224 | struct file_lock *request) |
1225 | { | 1225 | { |
1226 | return -ENOLCK; | 1226 | return -ENOLCK; |
1227 | } | 1227 | } |
1228 | 1228 | ||
1229 | static inline int __break_lease(struct inode *inode, unsigned int mode) | 1229 | static inline int __break_lease(struct inode *inode, unsigned int mode) |
1230 | { | 1230 | { |
1231 | return 0; | 1231 | return 0; |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) | 1234 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) |
1235 | { | 1235 | { |
1236 | return; | 1236 | return; |
1237 | } | 1237 | } |
1238 | 1238 | ||
1239 | static inline int generic_setlease(struct file *filp, long arg, | 1239 | static inline int generic_setlease(struct file *filp, long arg, |
1240 | struct file_lock **flp) | 1240 | struct file_lock **flp) |
1241 | { | 1241 | { |
1242 | return -EINVAL; | 1242 | return -EINVAL; |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | static inline int vfs_setlease(struct file *filp, long arg, | 1245 | static inline int vfs_setlease(struct file *filp, long arg, |
1246 | struct file_lock **lease) | 1246 | struct file_lock **lease) |
1247 | { | 1247 | { |
1248 | return -EINVAL; | 1248 | return -EINVAL; |
1249 | } | 1249 | } |
1250 | 1250 | ||
1251 | static inline int lease_modify(struct file_lock **before, int arg) | 1251 | static inline int lease_modify(struct file_lock **before, int arg) |
1252 | { | 1252 | { |
1253 | return -EINVAL; | 1253 | return -EINVAL; |
1254 | } | 1254 | } |
1255 | 1255 | ||
1256 | static inline int lock_may_read(struct inode *inode, loff_t start, | 1256 | static inline int lock_may_read(struct inode *inode, loff_t start, |
1257 | unsigned long len) | 1257 | unsigned long len) |
1258 | { | 1258 | { |
1259 | return 1; | 1259 | return 1; |
1260 | } | 1260 | } |
1261 | 1261 | ||
1262 | static inline int lock_may_write(struct inode *inode, loff_t start, | 1262 | static inline int lock_may_write(struct inode *inode, loff_t start, |
1263 | unsigned long len) | 1263 | unsigned long len) |
1264 | { | 1264 | { |
1265 | return 1; | 1265 | return 1; |
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | #endif /* !CONFIG_FILE_LOCKING */ | 1268 | #endif /* !CONFIG_FILE_LOCKING */ |
1269 | 1269 | ||
1270 | 1270 | ||
1271 | struct fasync_struct { | 1271 | struct fasync_struct { |
1272 | int magic; | 1272 | int magic; |
1273 | int fa_fd; | 1273 | int fa_fd; |
1274 | struct fasync_struct *fa_next; /* singly linked list */ | 1274 | struct fasync_struct *fa_next; /* singly linked list */ |
1275 | struct file *fa_file; | 1275 | struct file *fa_file; |
1276 | }; | 1276 | }; |
1277 | 1277 | ||
1278 | #define FASYNC_MAGIC 0x4601 | 1278 | #define FASYNC_MAGIC 0x4601 |
1279 | 1279 | ||
1280 | /* SMP safe fasync helpers: */ | 1280 | /* SMP safe fasync helpers: */ |
1281 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); | 1281 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); |
1282 | /* can be called from interrupts */ | 1282 | /* can be called from interrupts */ |
1283 | extern void kill_fasync(struct fasync_struct **, int, int); | 1283 | extern void kill_fasync(struct fasync_struct **, int, int); |
1284 | /* only for net: no internal synchronization */ | 1284 | /* only for net: no internal synchronization */ |
1285 | extern void __kill_fasync(struct fasync_struct *, int, int); | 1285 | extern void __kill_fasync(struct fasync_struct *, int, int); |
1286 | 1286 | ||
1287 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); | 1287 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); |
1288 | extern int f_setown(struct file *filp, unsigned long arg, int force); | 1288 | extern int f_setown(struct file *filp, unsigned long arg, int force); |
1289 | extern void f_delown(struct file *filp); | 1289 | extern void f_delown(struct file *filp); |
1290 | extern pid_t f_getown(struct file *filp); | 1290 | extern pid_t f_getown(struct file *filp); |
1291 | extern int send_sigurg(struct fown_struct *fown); | 1291 | extern int send_sigurg(struct fown_struct *fown); |
1292 | 1292 | ||
1293 | /* | 1293 | /* |
1294 | * Umount options | 1294 | * Umount options |
1295 | */ | 1295 | */ |
1296 | 1296 | ||
1297 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ | 1297 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ |
1298 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ | 1298 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ |
1299 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ | 1299 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ |
1300 | 1300 | ||
1301 | extern struct list_head super_blocks; | 1301 | extern struct list_head super_blocks; |
1302 | extern spinlock_t sb_lock; | 1302 | extern spinlock_t sb_lock; |
1303 | 1303 | ||
1304 | #define sb_entry(list) list_entry((list), struct super_block, s_list) | 1304 | #define sb_entry(list) list_entry((list), struct super_block, s_list) |
1305 | #define S_BIAS (1<<30) | 1305 | #define S_BIAS (1<<30) |
1306 | struct super_block { | 1306 | struct super_block { |
1307 | struct list_head s_list; /* Keep this first */ | 1307 | struct list_head s_list; /* Keep this first */ |
1308 | dev_t s_dev; /* search index; _not_ kdev_t */ | 1308 | dev_t s_dev; /* search index; _not_ kdev_t */ |
1309 | unsigned long s_blocksize; | 1309 | unsigned long s_blocksize; |
1310 | unsigned char s_blocksize_bits; | 1310 | unsigned char s_blocksize_bits; |
1311 | unsigned char s_dirt; | 1311 | unsigned char s_dirt; |
1312 | unsigned long long s_maxbytes; /* Max file size */ | 1312 | unsigned long long s_maxbytes; /* Max file size */ |
1313 | struct file_system_type *s_type; | 1313 | struct file_system_type *s_type; |
1314 | const struct super_operations *s_op; | 1314 | const struct super_operations *s_op; |
1315 | struct dquot_operations *dq_op; | 1315 | struct dquot_operations *dq_op; |
1316 | struct quotactl_ops *s_qcop; | 1316 | struct quotactl_ops *s_qcop; |
1317 | const struct export_operations *s_export_op; | 1317 | const struct export_operations *s_export_op; |
1318 | unsigned long s_flags; | 1318 | unsigned long s_flags; |
1319 | unsigned long s_magic; | 1319 | unsigned long s_magic; |
1320 | struct dentry *s_root; | 1320 | struct dentry *s_root; |
1321 | struct rw_semaphore s_umount; | 1321 | struct rw_semaphore s_umount; |
1322 | struct mutex s_lock; | 1322 | struct mutex s_lock; |
1323 | int s_count; | 1323 | int s_count; |
1324 | int s_need_sync_fs; | 1324 | int s_need_sync; |
1325 | atomic_t s_active; | 1325 | atomic_t s_active; |
1326 | #ifdef CONFIG_SECURITY | 1326 | #ifdef CONFIG_SECURITY |
1327 | void *s_security; | 1327 | void *s_security; |
1328 | #endif | 1328 | #endif |
1329 | struct xattr_handler **s_xattr; | 1329 | struct xattr_handler **s_xattr; |
1330 | 1330 | ||
1331 | struct list_head s_inodes; /* all inodes */ | 1331 | struct list_head s_inodes; /* all inodes */ |
1332 | struct list_head s_dirty; /* dirty inodes */ | 1332 | struct list_head s_dirty; /* dirty inodes */ |
1333 | struct list_head s_io; /* parked for writeback */ | 1333 | struct list_head s_io; /* parked for writeback */ |
1334 | struct list_head s_more_io; /* parked for more writeback */ | 1334 | struct list_head s_more_io; /* parked for more writeback */ |
1335 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1335 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ |
1336 | struct list_head s_files; | 1336 | struct list_head s_files; |
1337 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ | 1337 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ |
1338 | struct list_head s_dentry_lru; /* unused dentry lru */ | 1338 | struct list_head s_dentry_lru; /* unused dentry lru */ |
1339 | int s_nr_dentry_unused; /* # of dentry on lru */ | 1339 | int s_nr_dentry_unused; /* # of dentry on lru */ |
1340 | 1340 | ||
1341 | struct block_device *s_bdev; | 1341 | struct block_device *s_bdev; |
1342 | struct mtd_info *s_mtd; | 1342 | struct mtd_info *s_mtd; |
1343 | struct list_head s_instances; | 1343 | struct list_head s_instances; |
1344 | struct quota_info s_dquot; /* Diskquota specific options */ | 1344 | struct quota_info s_dquot; /* Diskquota specific options */ |
1345 | 1345 | ||
1346 | int s_frozen; | 1346 | int s_frozen; |
1347 | wait_queue_head_t s_wait_unfrozen; | 1347 | wait_queue_head_t s_wait_unfrozen; |
1348 | 1348 | ||
1349 | char s_id[32]; /* Informational name */ | 1349 | char s_id[32]; /* Informational name */ |
1350 | 1350 | ||
1351 | void *s_fs_info; /* Filesystem private info */ | 1351 | void *s_fs_info; /* Filesystem private info */ |
1352 | fmode_t s_mode; | 1352 | fmode_t s_mode; |
1353 | 1353 | ||
1354 | /* | 1354 | /* |
1355 | * The next field is for VFS *only*. No filesystems have any business | 1355 | * The next field is for VFS *only*. No filesystems have any business |
1356 | * even looking at it. You had been warned. | 1356 | * even looking at it. You had been warned. |
1357 | */ | 1357 | */ |
1358 | struct mutex s_vfs_rename_mutex; /* Kludge */ | 1358 | struct mutex s_vfs_rename_mutex; /* Kludge */ |
1359 | 1359 | ||
1360 | /* Granularity of c/m/atime in ns. | 1360 | /* Granularity of c/m/atime in ns. |
1361 | Cannot be worse than a second */ | 1361 | Cannot be worse than a second */ |
1362 | u32 s_time_gran; | 1362 | u32 s_time_gran; |
1363 | 1363 | ||
1364 | /* | 1364 | /* |
1365 | * Filesystem subtype. If non-empty the filesystem type field | 1365 | * Filesystem subtype. If non-empty the filesystem type field |
1366 | * in /proc/mounts will be "type.subtype" | 1366 | * in /proc/mounts will be "type.subtype" |
1367 | */ | 1367 | */ |
1368 | char *s_subtype; | 1368 | char *s_subtype; |
1369 | 1369 | ||
1370 | /* | 1370 | /* |
1371 | * Saved mount options for lazy filesystems using | 1371 | * Saved mount options for lazy filesystems using |
1372 | * generic_show_options() | 1372 | * generic_show_options() |
1373 | */ | 1373 | */ |
1374 | char *s_options; | 1374 | char *s_options; |
1375 | }; | 1375 | }; |
1376 | 1376 | ||
1377 | extern struct timespec current_fs_time(struct super_block *sb); | 1377 | extern struct timespec current_fs_time(struct super_block *sb); |
1378 | 1378 | ||
1379 | /* | 1379 | /* |
1380 | * Snapshotting support. | 1380 | * Snapshotting support. |
1381 | */ | 1381 | */ |
1382 | enum { | 1382 | enum { |
1383 | SB_UNFROZEN = 0, | 1383 | SB_UNFROZEN = 0, |
1384 | SB_FREEZE_WRITE = 1, | 1384 | SB_FREEZE_WRITE = 1, |
1385 | SB_FREEZE_TRANS = 2, | 1385 | SB_FREEZE_TRANS = 2, |
1386 | }; | 1386 | }; |
1387 | 1387 | ||
1388 | #define vfs_check_frozen(sb, level) \ | 1388 | #define vfs_check_frozen(sb, level) \ |
1389 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | 1389 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) |
1390 | 1390 | ||
1391 | #define get_fs_excl() atomic_inc(¤t->fs_excl) | 1391 | #define get_fs_excl() atomic_inc(¤t->fs_excl) |
1392 | #define put_fs_excl() atomic_dec(¤t->fs_excl) | 1392 | #define put_fs_excl() atomic_dec(¤t->fs_excl) |
1393 | #define has_fs_excl() atomic_read(¤t->fs_excl) | 1393 | #define has_fs_excl() atomic_read(¤t->fs_excl) |
1394 | 1394 | ||
1395 | #define is_owner_or_cap(inode) \ | 1395 | #define is_owner_or_cap(inode) \ |
1396 | ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) | 1396 | ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) |
1397 | 1397 | ||
1398 | /* not quite ready to be deprecated, but... */ | 1398 | /* not quite ready to be deprecated, but... */ |
1399 | extern void lock_super(struct super_block *); | 1399 | extern void lock_super(struct super_block *); |
1400 | extern void unlock_super(struct super_block *); | 1400 | extern void unlock_super(struct super_block *); |
1401 | 1401 | ||
1402 | /* | 1402 | /* |
1403 | * VFS helper functions.. | 1403 | * VFS helper functions.. |
1404 | */ | 1404 | */ |
1405 | extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); | 1405 | extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); |
1406 | extern int vfs_mkdir(struct inode *, struct dentry *, int); | 1406 | extern int vfs_mkdir(struct inode *, struct dentry *, int); |
1407 | extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); | 1407 | extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); |
1408 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); | 1408 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); |
1409 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *); | 1409 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *); |
1410 | extern int vfs_rmdir(struct inode *, struct dentry *); | 1410 | extern int vfs_rmdir(struct inode *, struct dentry *); |
1411 | extern int vfs_unlink(struct inode *, struct dentry *); | 1411 | extern int vfs_unlink(struct inode *, struct dentry *); |
1412 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 1412 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
1413 | 1413 | ||
1414 | /* | 1414 | /* |
1415 | * VFS dentry helper functions. | 1415 | * VFS dentry helper functions. |
1416 | */ | 1416 | */ |
1417 | extern void dentry_unhash(struct dentry *dentry); | 1417 | extern void dentry_unhash(struct dentry *dentry); |
1418 | 1418 | ||
1419 | /* | 1419 | /* |
1420 | * VFS file helper functions. | 1420 | * VFS file helper functions. |
1421 | */ | 1421 | */ |
1422 | extern int file_permission(struct file *, int); | 1422 | extern int file_permission(struct file *, int); |
1423 | 1423 | ||
1424 | /* | 1424 | /* |
1425 | * VFS FS_IOC_FIEMAP helper definitions. | 1425 | * VFS FS_IOC_FIEMAP helper definitions. |
1426 | */ | 1426 | */ |
1427 | struct fiemap_extent_info { | 1427 | struct fiemap_extent_info { |
1428 | unsigned int fi_flags; /* Flags as passed from user */ | 1428 | unsigned int fi_flags; /* Flags as passed from user */ |
1429 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | 1429 | unsigned int fi_extents_mapped; /* Number of mapped extents */ |
1430 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | 1430 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ |
1431 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent | 1431 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent |
1432 | * array */ | 1432 | * array */ |
1433 | }; | 1433 | }; |
1434 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | 1434 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, |
1435 | u64 phys, u64 len, u32 flags); | 1435 | u64 phys, u64 len, u32 flags); |
1436 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | 1436 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); |
1437 | 1437 | ||
1438 | /* | 1438 | /* |
1439 | * File types | 1439 | * File types |
1440 | * | 1440 | * |
1441 | * NOTE! These match bits 12..15 of stat.st_mode | 1441 | * NOTE! These match bits 12..15 of stat.st_mode |
1442 | * (ie "(i_mode >> 12) & 15"). | 1442 | * (ie "(i_mode >> 12) & 15"). |
1443 | */ | 1443 | */ |
1444 | #define DT_UNKNOWN 0 | 1444 | #define DT_UNKNOWN 0 |
1445 | #define DT_FIFO 1 | 1445 | #define DT_FIFO 1 |
1446 | #define DT_CHR 2 | 1446 | #define DT_CHR 2 |
1447 | #define DT_DIR 4 | 1447 | #define DT_DIR 4 |
1448 | #define DT_BLK 6 | 1448 | #define DT_BLK 6 |
1449 | #define DT_REG 8 | 1449 | #define DT_REG 8 |
1450 | #define DT_LNK 10 | 1450 | #define DT_LNK 10 |
1451 | #define DT_SOCK 12 | 1451 | #define DT_SOCK 12 |
1452 | #define DT_WHT 14 | 1452 | #define DT_WHT 14 |
1453 | 1453 | ||
1454 | #define OSYNC_METADATA (1<<0) | 1454 | #define OSYNC_METADATA (1<<0) |
1455 | #define OSYNC_DATA (1<<1) | 1455 | #define OSYNC_DATA (1<<1) |
1456 | #define OSYNC_INODE (1<<2) | 1456 | #define OSYNC_INODE (1<<2) |
1457 | int generic_osync_inode(struct inode *, struct address_space *, int); | 1457 | int generic_osync_inode(struct inode *, struct address_space *, int); |
1458 | 1458 | ||
1459 | /* | 1459 | /* |
1460 | * This is the "filldir" function type, used by readdir() to let | 1460 | * This is the "filldir" function type, used by readdir() to let |
1461 | * the kernel specify what kind of dirent layout it wants to have. | 1461 | * the kernel specify what kind of dirent layout it wants to have. |
1462 | * This allows the kernel to read directories into kernel space or | 1462 | * This allows the kernel to read directories into kernel space or |
1463 | * to have different dirent layouts depending on the binary type. | 1463 | * to have different dirent layouts depending on the binary type. |
1464 | */ | 1464 | */ |
1465 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); | 1465 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); |
1466 | struct block_device_operations; | 1466 | struct block_device_operations; |
1467 | 1467 | ||
1468 | /* These macros are for out of kernel modules to test that | 1468 | /* These macros are for out of kernel modules to test that |
1469 | * the kernel supports the unlocked_ioctl and compat_ioctl | 1469 | * the kernel supports the unlocked_ioctl and compat_ioctl |
1470 | * fields in struct file_operations. */ | 1470 | * fields in struct file_operations. */ |
1471 | #define HAVE_COMPAT_IOCTL 1 | 1471 | #define HAVE_COMPAT_IOCTL 1 |
1472 | #define HAVE_UNLOCKED_IOCTL 1 | 1472 | #define HAVE_UNLOCKED_IOCTL 1 |
1473 | 1473 | ||
1474 | /* | 1474 | /* |
1475 | * NOTE: | 1475 | * NOTE: |
1476 | * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl | 1476 | * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl |
1477 | * can be called without the big kernel lock held in all filesystems. | 1477 | * can be called without the big kernel lock held in all filesystems. |
1478 | */ | 1478 | */ |
1479 | struct file_operations { | 1479 | struct file_operations { |
1480 | struct module *owner; | 1480 | struct module *owner; |
1481 | loff_t (*llseek) (struct file *, loff_t, int); | 1481 | loff_t (*llseek) (struct file *, loff_t, int); |
1482 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); | 1482 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); |
1483 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); | 1483 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
1484 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1484 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1485 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1485 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1486 | int (*readdir) (struct file *, void *, filldir_t); | 1486 | int (*readdir) (struct file *, void *, filldir_t); |
1487 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 1487 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
1488 | int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); | 1488 | int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); |
1489 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 1489 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
1490 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 1490 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
1491 | int (*mmap) (struct file *, struct vm_area_struct *); | 1491 | int (*mmap) (struct file *, struct vm_area_struct *); |
1492 | int (*open) (struct inode *, struct file *); | 1492 | int (*open) (struct inode *, struct file *); |
1493 | int (*flush) (struct file *, fl_owner_t id); | 1493 | int (*flush) (struct file *, fl_owner_t id); |
1494 | int (*release) (struct inode *, struct file *); | 1494 | int (*release) (struct inode *, struct file *); |
1495 | int (*fsync) (struct file *, struct dentry *, int datasync); | 1495 | int (*fsync) (struct file *, struct dentry *, int datasync); |
1496 | int (*aio_fsync) (struct kiocb *, int datasync); | 1496 | int (*aio_fsync) (struct kiocb *, int datasync); |
1497 | int (*fasync) (int, struct file *, int); | 1497 | int (*fasync) (int, struct file *, int); |
1498 | int (*lock) (struct file *, int, struct file_lock *); | 1498 | int (*lock) (struct file *, int, struct file_lock *); |
1499 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); | 1499 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); |
1500 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); | 1500 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
1501 | int (*check_flags)(int); | 1501 | int (*check_flags)(int); |
1502 | int (*flock) (struct file *, int, struct file_lock *); | 1502 | int (*flock) (struct file *, int, struct file_lock *); |
1503 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); | 1503 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); |
1504 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); | 1504 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); |
1505 | int (*setlease)(struct file *, long, struct file_lock **); | 1505 | int (*setlease)(struct file *, long, struct file_lock **); |
1506 | }; | 1506 | }; |
1507 | 1507 | ||
1508 | struct inode_operations { | 1508 | struct inode_operations { |
1509 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); | 1509 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); |
1510 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); | 1510 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); |
1511 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 1511 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
1512 | int (*unlink) (struct inode *,struct dentry *); | 1512 | int (*unlink) (struct inode *,struct dentry *); |
1513 | int (*symlink) (struct inode *,struct dentry *,const char *); | 1513 | int (*symlink) (struct inode *,struct dentry *,const char *); |
1514 | int (*mkdir) (struct inode *,struct dentry *,int); | 1514 | int (*mkdir) (struct inode *,struct dentry *,int); |
1515 | int (*rmdir) (struct inode *,struct dentry *); | 1515 | int (*rmdir) (struct inode *,struct dentry *); |
1516 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); | 1516 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); |
1517 | int (*rename) (struct inode *, struct dentry *, | 1517 | int (*rename) (struct inode *, struct dentry *, |
1518 | struct inode *, struct dentry *); | 1518 | struct inode *, struct dentry *); |
1519 | int (*readlink) (struct dentry *, char __user *,int); | 1519 | int (*readlink) (struct dentry *, char __user *,int); |
1520 | void * (*follow_link) (struct dentry *, struct nameidata *); | 1520 | void * (*follow_link) (struct dentry *, struct nameidata *); |
1521 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 1521 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
1522 | void (*truncate) (struct inode *); | 1522 | void (*truncate) (struct inode *); |
1523 | int (*permission) (struct inode *, int); | 1523 | int (*permission) (struct inode *, int); |
1524 | int (*setattr) (struct dentry *, struct iattr *); | 1524 | int (*setattr) (struct dentry *, struct iattr *); |
1525 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); | 1525 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); |
1526 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 1526 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
1527 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 1527 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
1528 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 1528 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
1529 | int (*removexattr) (struct dentry *, const char *); | 1529 | int (*removexattr) (struct dentry *, const char *); |
1530 | void (*truncate_range)(struct inode *, loff_t, loff_t); | 1530 | void (*truncate_range)(struct inode *, loff_t, loff_t); |
1531 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, | 1531 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, |
1532 | loff_t len); | 1532 | loff_t len); |
1533 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | 1533 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, |
1534 | u64 len); | 1534 | u64 len); |
1535 | }; | 1535 | }; |
1536 | 1536 | ||
1537 | struct seq_file; | 1537 | struct seq_file; |
1538 | 1538 | ||
1539 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | 1539 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, |
1540 | unsigned long nr_segs, unsigned long fast_segs, | 1540 | unsigned long nr_segs, unsigned long fast_segs, |
1541 | struct iovec *fast_pointer, | 1541 | struct iovec *fast_pointer, |
1542 | struct iovec **ret_pointer); | 1542 | struct iovec **ret_pointer); |
1543 | 1543 | ||
1544 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); | 1544 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); |
1545 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); | 1545 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); |
1546 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, | 1546 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, |
1547 | unsigned long, loff_t *); | 1547 | unsigned long, loff_t *); |
1548 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, | 1548 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, |
1549 | unsigned long, loff_t *); | 1549 | unsigned long, loff_t *); |
1550 | 1550 | ||
1551 | struct super_operations { | 1551 | struct super_operations { |
1552 | struct inode *(*alloc_inode)(struct super_block *sb); | 1552 | struct inode *(*alloc_inode)(struct super_block *sb); |
1553 | void (*destroy_inode)(struct inode *); | 1553 | void (*destroy_inode)(struct inode *); |
1554 | 1554 | ||
1555 | void (*dirty_inode) (struct inode *); | 1555 | void (*dirty_inode) (struct inode *); |
1556 | int (*write_inode) (struct inode *, int); | 1556 | int (*write_inode) (struct inode *, int); |
1557 | void (*drop_inode) (struct inode *); | 1557 | void (*drop_inode) (struct inode *); |
1558 | void (*delete_inode) (struct inode *); | 1558 | void (*delete_inode) (struct inode *); |
1559 | void (*put_super) (struct super_block *); | 1559 | void (*put_super) (struct super_block *); |
1560 | void (*write_super) (struct super_block *); | 1560 | void (*write_super) (struct super_block *); |
1561 | int (*sync_fs)(struct super_block *sb, int wait); | 1561 | int (*sync_fs)(struct super_block *sb, int wait); |
1562 | int (*freeze_fs) (struct super_block *); | 1562 | int (*freeze_fs) (struct super_block *); |
1563 | int (*unfreeze_fs) (struct super_block *); | 1563 | int (*unfreeze_fs) (struct super_block *); |
1564 | int (*statfs) (struct dentry *, struct kstatfs *); | 1564 | int (*statfs) (struct dentry *, struct kstatfs *); |
1565 | int (*remount_fs) (struct super_block *, int *, char *); | 1565 | int (*remount_fs) (struct super_block *, int *, char *); |
1566 | void (*clear_inode) (struct inode *); | 1566 | void (*clear_inode) (struct inode *); |
1567 | void (*umount_begin) (struct super_block *); | 1567 | void (*umount_begin) (struct super_block *); |
1568 | 1568 | ||
1569 | int (*show_options)(struct seq_file *, struct vfsmount *); | 1569 | int (*show_options)(struct seq_file *, struct vfsmount *); |
1570 | int (*show_stats)(struct seq_file *, struct vfsmount *); | 1570 | int (*show_stats)(struct seq_file *, struct vfsmount *); |
1571 | #ifdef CONFIG_QUOTA | 1571 | #ifdef CONFIG_QUOTA |
1572 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 1572 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
1573 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1573 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
1574 | #endif | 1574 | #endif |
1575 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1575 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
1576 | }; | 1576 | }; |
1577 | 1577 | ||
1578 | /* | 1578 | /* |
1579 | * Inode state bits. Protected by inode_lock. | 1579 | * Inode state bits. Protected by inode_lock. |
1580 | * | 1580 | * |
1581 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, | 1581 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, |
1582 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. | 1582 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. |
1583 | * | 1583 | * |
1584 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, | 1584 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, |
1585 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at | 1585 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at |
1586 | * various stages of removing an inode. | 1586 | * various stages of removing an inode. |
1587 | * | 1587 | * |
1588 | * Two bits are used for locking and completion notification, I_LOCK and I_SYNC. | 1588 | * Two bits are used for locking and completion notification, I_LOCK and I_SYNC. |
1589 | * | 1589 | * |
1590 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on | 1590 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on |
1591 | * fdatasync(). i_atime is the usual cause. | 1591 | * fdatasync(). i_atime is the usual cause. |
1592 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of | 1592 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of |
1593 | * these changes separately from I_DIRTY_SYNC so that we | 1593 | * these changes separately from I_DIRTY_SYNC so that we |
1594 | * don't have to write inode on fdatasync() when only | 1594 | * don't have to write inode on fdatasync() when only |
1595 | * mtime has changed in it. | 1595 | * mtime has changed in it. |
1596 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. | 1596 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. |
1597 | * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both | 1597 | * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both |
1598 | * are cleared by unlock_new_inode(), called from iget(). | 1598 | * are cleared by unlock_new_inode(), called from iget(). |
1599 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count | 1599 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count |
1600 | * is zero. I_FREEING must be set when I_WILL_FREE is | 1600 | * is zero. I_FREEING must be set when I_WILL_FREE is |
1601 | * cleared. | 1601 | * cleared. |
1602 | * I_FREEING Set when inode is about to be freed but still has dirty | 1602 | * I_FREEING Set when inode is about to be freed but still has dirty |
1603 | * pages or buffers attached or the inode itself is still | 1603 | * pages or buffers attached or the inode itself is still |
1604 | * dirty. | 1604 | * dirty. |
1605 | * I_CLEAR Set by clear_inode(). In this state the inode is clean | 1605 | * I_CLEAR Set by clear_inode(). In this state the inode is clean |
1606 | * and can be destroyed. | 1606 | * and can be destroyed. |
1607 | * | 1607 | * |
1608 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are | 1608 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are |
1609 | * prohibited for many purposes. iget() must wait for | 1609 | * prohibited for many purposes. iget() must wait for |
1610 | * the inode to be completely released, then create it | 1610 | * the inode to be completely released, then create it |
1611 | * anew. Other functions will just ignore such inodes, | 1611 | * anew. Other functions will just ignore such inodes, |
1612 | * if appropriate. I_LOCK is used for waiting. | 1612 | * if appropriate. I_LOCK is used for waiting. |
1613 | * | 1613 | * |
1614 | * I_LOCK Serves as both a mutex and completion notification. | 1614 | * I_LOCK Serves as both a mutex and completion notification. |
1615 | * New inodes set I_LOCK. If two processes both create | 1615 | * New inodes set I_LOCK. If two processes both create |
1616 | * the same inode, one of them will release its inode and | 1616 | * the same inode, one of them will release its inode and |
1617 | * wait for I_LOCK to be released before returning. | 1617 | * wait for I_LOCK to be released before returning. |
1618 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can | 1618 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can |
1619 | * also cause waiting on I_LOCK, without I_LOCK actually | 1619 | * also cause waiting on I_LOCK, without I_LOCK actually |
1620 | * being set. find_inode() uses this to prevent returning | 1620 | * being set. find_inode() uses this to prevent returning |
1621 | * nearly-dead inodes. | 1621 | * nearly-dead inodes. |
1622 | * I_SYNC Similar to I_LOCK, but limited in scope to writeback | 1622 | * I_SYNC Similar to I_LOCK, but limited in scope to writeback |
1623 | * of inode dirty data. Having a separate lock for this | 1623 | * of inode dirty data. Having a separate lock for this |
1624 | * purpose reduces latency and prevents some filesystem- | 1624 | * purpose reduces latency and prevents some filesystem- |
1625 | * specific deadlocks. | 1625 | * specific deadlocks. |
1626 | * | 1626 | * |
1627 | * Q: What is the difference between I_WILL_FREE and I_FREEING? | 1627 | * Q: What is the difference between I_WILL_FREE and I_FREEING? |
1628 | * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on | 1628 | * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on |
1629 | * I_CLEAR? If not, why? | 1629 | * I_CLEAR? If not, why? |
1630 | */ | 1630 | */ |
1631 | #define I_DIRTY_SYNC 1 | 1631 | #define I_DIRTY_SYNC 1 |
1632 | #define I_DIRTY_DATASYNC 2 | 1632 | #define I_DIRTY_DATASYNC 2 |
1633 | #define I_DIRTY_PAGES 4 | 1633 | #define I_DIRTY_PAGES 4 |
1634 | #define I_NEW 8 | 1634 | #define I_NEW 8 |
1635 | #define I_WILL_FREE 16 | 1635 | #define I_WILL_FREE 16 |
1636 | #define I_FREEING 32 | 1636 | #define I_FREEING 32 |
1637 | #define I_CLEAR 64 | 1637 | #define I_CLEAR 64 |
1638 | #define __I_LOCK 7 | 1638 | #define __I_LOCK 7 |
1639 | #define I_LOCK (1 << __I_LOCK) | 1639 | #define I_LOCK (1 << __I_LOCK) |
1640 | #define __I_SYNC 8 | 1640 | #define __I_SYNC 8 |
1641 | #define I_SYNC (1 << __I_SYNC) | 1641 | #define I_SYNC (1 << __I_SYNC) |
1642 | 1642 | ||
1643 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) | 1643 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) |
1644 | 1644 | ||
1645 | extern void __mark_inode_dirty(struct inode *, int); | 1645 | extern void __mark_inode_dirty(struct inode *, int); |
1646 | static inline void mark_inode_dirty(struct inode *inode) | 1646 | static inline void mark_inode_dirty(struct inode *inode) |
1647 | { | 1647 | { |
1648 | __mark_inode_dirty(inode, I_DIRTY); | 1648 | __mark_inode_dirty(inode, I_DIRTY); |
1649 | } | 1649 | } |
1650 | 1650 | ||
1651 | static inline void mark_inode_dirty_sync(struct inode *inode) | 1651 | static inline void mark_inode_dirty_sync(struct inode *inode) |
1652 | { | 1652 | { |
1653 | __mark_inode_dirty(inode, I_DIRTY_SYNC); | 1653 | __mark_inode_dirty(inode, I_DIRTY_SYNC); |
1654 | } | 1654 | } |
1655 | 1655 | ||
1656 | /** | 1656 | /** |
1657 | * inc_nlink - directly increment an inode's link count | 1657 | * inc_nlink - directly increment an inode's link count |
1658 | * @inode: inode | 1658 | * @inode: inode |
1659 | * | 1659 | * |
1660 | * This is a low-level filesystem helper to replace any | 1660 | * This is a low-level filesystem helper to replace any |
1661 | * direct filesystem manipulation of i_nlink. Currently, | 1661 | * direct filesystem manipulation of i_nlink. Currently, |
1662 | * it is only here for parity with dec_nlink(). | 1662 | * it is only here for parity with dec_nlink(). |
1663 | */ | 1663 | */ |
1664 | static inline void inc_nlink(struct inode *inode) | 1664 | static inline void inc_nlink(struct inode *inode) |
1665 | { | 1665 | { |
1666 | inode->i_nlink++; | 1666 | inode->i_nlink++; |
1667 | } | 1667 | } |
1668 | 1668 | ||
1669 | static inline void inode_inc_link_count(struct inode *inode) | 1669 | static inline void inode_inc_link_count(struct inode *inode) |
1670 | { | 1670 | { |
1671 | inc_nlink(inode); | 1671 | inc_nlink(inode); |
1672 | mark_inode_dirty(inode); | 1672 | mark_inode_dirty(inode); |
1673 | } | 1673 | } |
1674 | 1674 | ||
1675 | /** | 1675 | /** |
1676 | * drop_nlink - directly drop an inode's link count | 1676 | * drop_nlink - directly drop an inode's link count |
1677 | * @inode: inode | 1677 | * @inode: inode |
1678 | * | 1678 | * |
1679 | * This is a low-level filesystem helper to replace any | 1679 | * This is a low-level filesystem helper to replace any |
1680 | * direct filesystem manipulation of i_nlink. In cases | 1680 | * direct filesystem manipulation of i_nlink. In cases |
1681 | * where we are attempting to track writes to the | 1681 | * where we are attempting to track writes to the |
1682 | * filesystem, a decrement to zero means an imminent | 1682 | * filesystem, a decrement to zero means an imminent |
1683 | * write when the file is truncated and actually unlinked | 1683 | * write when the file is truncated and actually unlinked |
1684 | * on the filesystem. | 1684 | * on the filesystem. |
1685 | */ | 1685 | */ |
1686 | static inline void drop_nlink(struct inode *inode) | 1686 | static inline void drop_nlink(struct inode *inode) |
1687 | { | 1687 | { |
1688 | inode->i_nlink--; | 1688 | inode->i_nlink--; |
1689 | } | 1689 | } |
1690 | 1690 | ||
1691 | /** | 1691 | /** |
1692 | * clear_nlink - directly zero an inode's link count | 1692 | * clear_nlink - directly zero an inode's link count |
1693 | * @inode: inode | 1693 | * @inode: inode |
1694 | * | 1694 | * |
1695 | * This is a low-level filesystem helper to replace any | 1695 | * This is a low-level filesystem helper to replace any |
1696 | * direct filesystem manipulation of i_nlink. See | 1696 | * direct filesystem manipulation of i_nlink. See |
1697 | * drop_nlink() for why we care about i_nlink hitting zero. | 1697 | * drop_nlink() for why we care about i_nlink hitting zero. |
1698 | */ | 1698 | */ |
1699 | static inline void clear_nlink(struct inode *inode) | 1699 | static inline void clear_nlink(struct inode *inode) |
1700 | { | 1700 | { |
1701 | inode->i_nlink = 0; | 1701 | inode->i_nlink = 0; |
1702 | } | 1702 | } |
1703 | 1703 | ||
1704 | static inline void inode_dec_link_count(struct inode *inode) | 1704 | static inline void inode_dec_link_count(struct inode *inode) |
1705 | { | 1705 | { |
1706 | drop_nlink(inode); | 1706 | drop_nlink(inode); |
1707 | mark_inode_dirty(inode); | 1707 | mark_inode_dirty(inode); |
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | /** | 1710 | /** |
1711 | * inode_inc_iversion - increments i_version | 1711 | * inode_inc_iversion - increments i_version |
1712 | * @inode: inode that need to be updated | 1712 | * @inode: inode that need to be updated |
1713 | * | 1713 | * |
1714 | * Every time the inode is modified, the i_version field will be incremented. | 1714 | * Every time the inode is modified, the i_version field will be incremented. |
1715 | * The filesystem has to be mounted with i_version flag | 1715 | * The filesystem has to be mounted with i_version flag |
1716 | */ | 1716 | */ |
1717 | 1717 | ||
1718 | static inline void inode_inc_iversion(struct inode *inode) | 1718 | static inline void inode_inc_iversion(struct inode *inode) |
1719 | { | 1719 | { |
1720 | spin_lock(&inode->i_lock); | 1720 | spin_lock(&inode->i_lock); |
1721 | inode->i_version++; | 1721 | inode->i_version++; |
1722 | spin_unlock(&inode->i_lock); | 1722 | spin_unlock(&inode->i_lock); |
1723 | } | 1723 | } |
1724 | 1724 | ||
1725 | extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); | 1725 | extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); |
1726 | static inline void file_accessed(struct file *file) | 1726 | static inline void file_accessed(struct file *file) |
1727 | { | 1727 | { |
1728 | if (!(file->f_flags & O_NOATIME)) | 1728 | if (!(file->f_flags & O_NOATIME)) |
1729 | touch_atime(file->f_path.mnt, file->f_path.dentry); | 1729 | touch_atime(file->f_path.mnt, file->f_path.dentry); |
1730 | } | 1730 | } |
1731 | 1731 | ||
1732 | int sync_inode(struct inode *inode, struct writeback_control *wbc); | 1732 | int sync_inode(struct inode *inode, struct writeback_control *wbc); |
1733 | 1733 | ||
1734 | struct file_system_type { | 1734 | struct file_system_type { |
1735 | const char *name; | 1735 | const char *name; |
1736 | int fs_flags; | 1736 | int fs_flags; |
1737 | int (*get_sb) (struct file_system_type *, int, | 1737 | int (*get_sb) (struct file_system_type *, int, |
1738 | const char *, void *, struct vfsmount *); | 1738 | const char *, void *, struct vfsmount *); |
1739 | void (*kill_sb) (struct super_block *); | 1739 | void (*kill_sb) (struct super_block *); |
1740 | struct module *owner; | 1740 | struct module *owner; |
1741 | struct file_system_type * next; | 1741 | struct file_system_type * next; |
1742 | struct list_head fs_supers; | 1742 | struct list_head fs_supers; |
1743 | 1743 | ||
1744 | struct lock_class_key s_lock_key; | 1744 | struct lock_class_key s_lock_key; |
1745 | struct lock_class_key s_umount_key; | 1745 | struct lock_class_key s_umount_key; |
1746 | 1746 | ||
1747 | struct lock_class_key i_lock_key; | 1747 | struct lock_class_key i_lock_key; |
1748 | struct lock_class_key i_mutex_key; | 1748 | struct lock_class_key i_mutex_key; |
1749 | struct lock_class_key i_mutex_dir_key; | 1749 | struct lock_class_key i_mutex_dir_key; |
1750 | struct lock_class_key i_alloc_sem_key; | 1750 | struct lock_class_key i_alloc_sem_key; |
1751 | }; | 1751 | }; |
1752 | 1752 | ||
1753 | extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, | 1753 | extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, |
1754 | int (*fill_super)(struct super_block *, void *, int), | 1754 | int (*fill_super)(struct super_block *, void *, int), |
1755 | struct vfsmount *mnt); | 1755 | struct vfsmount *mnt); |
1756 | extern int get_sb_bdev(struct file_system_type *fs_type, | 1756 | extern int get_sb_bdev(struct file_system_type *fs_type, |
1757 | int flags, const char *dev_name, void *data, | 1757 | int flags, const char *dev_name, void *data, |
1758 | int (*fill_super)(struct super_block *, void *, int), | 1758 | int (*fill_super)(struct super_block *, void *, int), |
1759 | struct vfsmount *mnt); | 1759 | struct vfsmount *mnt); |
1760 | extern int get_sb_single(struct file_system_type *fs_type, | 1760 | extern int get_sb_single(struct file_system_type *fs_type, |
1761 | int flags, void *data, | 1761 | int flags, void *data, |
1762 | int (*fill_super)(struct super_block *, void *, int), | 1762 | int (*fill_super)(struct super_block *, void *, int), |
1763 | struct vfsmount *mnt); | 1763 | struct vfsmount *mnt); |
1764 | extern int get_sb_nodev(struct file_system_type *fs_type, | 1764 | extern int get_sb_nodev(struct file_system_type *fs_type, |
1765 | int flags, void *data, | 1765 | int flags, void *data, |
1766 | int (*fill_super)(struct super_block *, void *, int), | 1766 | int (*fill_super)(struct super_block *, void *, int), |
1767 | struct vfsmount *mnt); | 1767 | struct vfsmount *mnt); |
1768 | void generic_shutdown_super(struct super_block *sb); | 1768 | void generic_shutdown_super(struct super_block *sb); |
1769 | void kill_block_super(struct super_block *sb); | 1769 | void kill_block_super(struct super_block *sb); |
1770 | void kill_anon_super(struct super_block *sb); | 1770 | void kill_anon_super(struct super_block *sb); |
1771 | void kill_litter_super(struct super_block *sb); | 1771 | void kill_litter_super(struct super_block *sb); |
1772 | void deactivate_super(struct super_block *sb); | 1772 | void deactivate_super(struct super_block *sb); |
1773 | void deactivate_locked_super(struct super_block *sb); | 1773 | void deactivate_locked_super(struct super_block *sb); |
1774 | int set_anon_super(struct super_block *s, void *data); | 1774 | int set_anon_super(struct super_block *s, void *data); |
1775 | struct super_block *sget(struct file_system_type *type, | 1775 | struct super_block *sget(struct file_system_type *type, |
1776 | int (*test)(struct super_block *,void *), | 1776 | int (*test)(struct super_block *,void *), |
1777 | int (*set)(struct super_block *,void *), | 1777 | int (*set)(struct super_block *,void *), |
1778 | void *data); | 1778 | void *data); |
1779 | extern int get_sb_pseudo(struct file_system_type *, char *, | 1779 | extern int get_sb_pseudo(struct file_system_type *, char *, |
1780 | const struct super_operations *ops, unsigned long, | 1780 | const struct super_operations *ops, unsigned long, |
1781 | struct vfsmount *mnt); | 1781 | struct vfsmount *mnt); |
1782 | extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); | 1782 | extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); |
1783 | int __put_super_and_need_restart(struct super_block *sb); | 1783 | int __put_super_and_need_restart(struct super_block *sb); |
1784 | 1784 | ||
1785 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ | 1785 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ |
1786 | #define fops_get(fops) \ | 1786 | #define fops_get(fops) \ |
1787 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) | 1787 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) |
1788 | #define fops_put(fops) \ | 1788 | #define fops_put(fops) \ |
1789 | do { if (fops) module_put((fops)->owner); } while(0) | 1789 | do { if (fops) module_put((fops)->owner); } while(0) |
1790 | 1790 | ||
1791 | extern int register_filesystem(struct file_system_type *); | 1791 | extern int register_filesystem(struct file_system_type *); |
1792 | extern int unregister_filesystem(struct file_system_type *); | 1792 | extern int unregister_filesystem(struct file_system_type *); |
1793 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); | 1793 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); |
1794 | #define kern_mount(type) kern_mount_data(type, NULL) | 1794 | #define kern_mount(type) kern_mount_data(type, NULL) |
1795 | extern int may_umount_tree(struct vfsmount *); | 1795 | extern int may_umount_tree(struct vfsmount *); |
1796 | extern int may_umount(struct vfsmount *); | 1796 | extern int may_umount(struct vfsmount *); |
1797 | extern long do_mount(char *, char *, char *, unsigned long, void *); | 1797 | extern long do_mount(char *, char *, char *, unsigned long, void *); |
1798 | extern struct vfsmount *collect_mounts(struct path *); | 1798 | extern struct vfsmount *collect_mounts(struct path *); |
1799 | extern void drop_collected_mounts(struct vfsmount *); | 1799 | extern void drop_collected_mounts(struct vfsmount *); |
1800 | 1800 | ||
1801 | extern int vfs_statfs(struct dentry *, struct kstatfs *); | 1801 | extern int vfs_statfs(struct dentry *, struct kstatfs *); |
1802 | 1802 | ||
1803 | extern int current_umask(void); | 1803 | extern int current_umask(void); |
1804 | 1804 | ||
1805 | /* /sys/fs */ | 1805 | /* /sys/fs */ |
1806 | extern struct kobject *fs_kobj; | 1806 | extern struct kobject *fs_kobj; |
1807 | 1807 | ||
1808 | extern int rw_verify_area(int, struct file *, loff_t *, size_t); | 1808 | extern int rw_verify_area(int, struct file *, loff_t *, size_t); |
1809 | 1809 | ||
1810 | #define FLOCK_VERIFY_READ 1 | 1810 | #define FLOCK_VERIFY_READ 1 |
1811 | #define FLOCK_VERIFY_WRITE 2 | 1811 | #define FLOCK_VERIFY_WRITE 2 |
1812 | 1812 | ||
1813 | #ifdef CONFIG_FILE_LOCKING | 1813 | #ifdef CONFIG_FILE_LOCKING |
1814 | extern int locks_mandatory_locked(struct inode *); | 1814 | extern int locks_mandatory_locked(struct inode *); |
1815 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); | 1815 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); |
1816 | 1816 | ||
1817 | /* | 1817 | /* |
1818 | * Candidates for mandatory locking have the setgid bit set | 1818 | * Candidates for mandatory locking have the setgid bit set |
1819 | * but no group execute bit - an otherwise meaningless combination. | 1819 | * but no group execute bit - an otherwise meaningless combination. |
1820 | */ | 1820 | */ |
1821 | 1821 | ||
1822 | static inline int __mandatory_lock(struct inode *ino) | 1822 | static inline int __mandatory_lock(struct inode *ino) |
1823 | { | 1823 | { |
1824 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; | 1824 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; |
1825 | } | 1825 | } |
1826 | 1826 | ||
1827 | /* | 1827 | /* |
1828 | * ... and these candidates should be on MS_MANDLOCK mounted fs, | 1828 | * ... and these candidates should be on MS_MANDLOCK mounted fs, |
1829 | * otherwise these will be advisory locks | 1829 | * otherwise these will be advisory locks |
1830 | */ | 1830 | */ |
1831 | 1831 | ||
1832 | static inline int mandatory_lock(struct inode *ino) | 1832 | static inline int mandatory_lock(struct inode *ino) |
1833 | { | 1833 | { |
1834 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); | 1834 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); |
1835 | } | 1835 | } |
1836 | 1836 | ||
1837 | static inline int locks_verify_locked(struct inode *inode) | 1837 | static inline int locks_verify_locked(struct inode *inode) |
1838 | { | 1838 | { |
1839 | if (mandatory_lock(inode)) | 1839 | if (mandatory_lock(inode)) |
1840 | return locks_mandatory_locked(inode); | 1840 | return locks_mandatory_locked(inode); |
1841 | return 0; | 1841 | return 0; |
1842 | } | 1842 | } |
1843 | 1843 | ||
1844 | static inline int locks_verify_truncate(struct inode *inode, | 1844 | static inline int locks_verify_truncate(struct inode *inode, |
1845 | struct file *filp, | 1845 | struct file *filp, |
1846 | loff_t size) | 1846 | loff_t size) |
1847 | { | 1847 | { |
1848 | if (inode->i_flock && mandatory_lock(inode)) | 1848 | if (inode->i_flock && mandatory_lock(inode)) |
1849 | return locks_mandatory_area( | 1849 | return locks_mandatory_area( |
1850 | FLOCK_VERIFY_WRITE, inode, filp, | 1850 | FLOCK_VERIFY_WRITE, inode, filp, |
1851 | size < inode->i_size ? size : inode->i_size, | 1851 | size < inode->i_size ? size : inode->i_size, |
1852 | (size < inode->i_size ? inode->i_size - size | 1852 | (size < inode->i_size ? inode->i_size - size |
1853 | : size - inode->i_size) | 1853 | : size - inode->i_size) |
1854 | ); | 1854 | ); |
1855 | return 0; | 1855 | return 0; |
1856 | } | 1856 | } |
1857 | 1857 | ||
1858 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1858 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1859 | { | 1859 | { |
1860 | if (inode->i_flock) | 1860 | if (inode->i_flock) |
1861 | return __break_lease(inode, mode); | 1861 | return __break_lease(inode, mode); |
1862 | return 0; | 1862 | return 0; |
1863 | } | 1863 | } |
1864 | #else /* !CONFIG_FILE_LOCKING */ | 1864 | #else /* !CONFIG_FILE_LOCKING */ |
1865 | static inline int locks_mandatory_locked(struct inode *inode) | 1865 | static inline int locks_mandatory_locked(struct inode *inode) |
1866 | { | 1866 | { |
1867 | return 0; | 1867 | return 0; |
1868 | } | 1868 | } |
1869 | 1869 | ||
1870 | static inline int locks_mandatory_area(int rw, struct inode *inode, | 1870 | static inline int locks_mandatory_area(int rw, struct inode *inode, |
1871 | struct file *filp, loff_t offset, | 1871 | struct file *filp, loff_t offset, |
1872 | size_t count) | 1872 | size_t count) |
1873 | { | 1873 | { |
1874 | return 0; | 1874 | return 0; |
1875 | } | 1875 | } |
1876 | 1876 | ||
1877 | static inline int __mandatory_lock(struct inode *inode) | 1877 | static inline int __mandatory_lock(struct inode *inode) |
1878 | { | 1878 | { |
1879 | return 0; | 1879 | return 0; |
1880 | } | 1880 | } |
1881 | 1881 | ||
1882 | static inline int mandatory_lock(struct inode *inode) | 1882 | static inline int mandatory_lock(struct inode *inode) |
1883 | { | 1883 | { |
1884 | return 0; | 1884 | return 0; |
1885 | } | 1885 | } |
1886 | 1886 | ||
1887 | static inline int locks_verify_locked(struct inode *inode) | 1887 | static inline int locks_verify_locked(struct inode *inode) |
1888 | { | 1888 | { |
1889 | return 0; | 1889 | return 0; |
1890 | } | 1890 | } |
1891 | 1891 | ||
1892 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, | 1892 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, |
1893 | size_t size) | 1893 | size_t size) |
1894 | { | 1894 | { |
1895 | return 0; | 1895 | return 0; |
1896 | } | 1896 | } |
1897 | 1897 | ||
1898 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1898 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1899 | { | 1899 | { |
1900 | return 0; | 1900 | return 0; |
1901 | } | 1901 | } |
1902 | 1902 | ||
1903 | #endif /* CONFIG_FILE_LOCKING */ | 1903 | #endif /* CONFIG_FILE_LOCKING */ |
1904 | 1904 | ||
1905 | /* fs/open.c */ | 1905 | /* fs/open.c */ |
1906 | 1906 | ||
1907 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, | 1907 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, |
1908 | struct file *filp); | 1908 | struct file *filp); |
1909 | extern long do_sys_open(int dfd, const char __user *filename, int flags, | 1909 | extern long do_sys_open(int dfd, const char __user *filename, int flags, |
1910 | int mode); | 1910 | int mode); |
1911 | extern struct file *filp_open(const char *, int, int); | 1911 | extern struct file *filp_open(const char *, int, int); |
1912 | extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, | 1912 | extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, |
1913 | const struct cred *); | 1913 | const struct cred *); |
1914 | extern int filp_close(struct file *, fl_owner_t id); | 1914 | extern int filp_close(struct file *, fl_owner_t id); |
1915 | extern char * getname(const char __user *); | 1915 | extern char * getname(const char __user *); |
1916 | 1916 | ||
1917 | /* fs/dcache.c */ | 1917 | /* fs/dcache.c */ |
1918 | extern void __init vfs_caches_init_early(void); | 1918 | extern void __init vfs_caches_init_early(void); |
1919 | extern void __init vfs_caches_init(unsigned long); | 1919 | extern void __init vfs_caches_init(unsigned long); |
1920 | 1920 | ||
1921 | extern struct kmem_cache *names_cachep; | 1921 | extern struct kmem_cache *names_cachep; |
1922 | 1922 | ||
1923 | #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) | 1923 | #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) |
1924 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) | 1924 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) |
1925 | #ifndef CONFIG_AUDITSYSCALL | 1925 | #ifndef CONFIG_AUDITSYSCALL |
1926 | #define putname(name) __putname(name) | 1926 | #define putname(name) __putname(name) |
1927 | #else | 1927 | #else |
1928 | extern void putname(const char *name); | 1928 | extern void putname(const char *name); |
1929 | #endif | 1929 | #endif |
1930 | 1930 | ||
1931 | #ifdef CONFIG_BLOCK | 1931 | #ifdef CONFIG_BLOCK |
1932 | extern int register_blkdev(unsigned int, const char *); | 1932 | extern int register_blkdev(unsigned int, const char *); |
1933 | extern void unregister_blkdev(unsigned int, const char *); | 1933 | extern void unregister_blkdev(unsigned int, const char *); |
1934 | extern struct block_device *bdget(dev_t); | 1934 | extern struct block_device *bdget(dev_t); |
1935 | extern void bd_set_size(struct block_device *, loff_t size); | 1935 | extern void bd_set_size(struct block_device *, loff_t size); |
1936 | extern void bd_forget(struct inode *inode); | 1936 | extern void bd_forget(struct inode *inode); |
1937 | extern void bdput(struct block_device *); | 1937 | extern void bdput(struct block_device *); |
1938 | extern struct block_device *open_by_devnum(dev_t, fmode_t); | 1938 | extern struct block_device *open_by_devnum(dev_t, fmode_t); |
1939 | extern void invalidate_bdev(struct block_device *); | 1939 | extern void invalidate_bdev(struct block_device *); |
1940 | extern int sync_blockdev(struct block_device *bdev); | 1940 | extern int sync_blockdev(struct block_device *bdev); |
1941 | extern struct super_block *freeze_bdev(struct block_device *); | 1941 | extern struct super_block *freeze_bdev(struct block_device *); |
1942 | extern void emergency_thaw_all(void); | 1942 | extern void emergency_thaw_all(void); |
1943 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | 1943 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); |
1944 | extern int fsync_bdev(struct block_device *); | 1944 | extern int fsync_bdev(struct block_device *); |
1945 | extern int fsync_super(struct super_block *); | 1945 | extern int fsync_super(struct super_block *); |
1946 | extern int fsync_no_super(struct block_device *); | 1946 | extern int fsync_no_super(struct block_device *); |
1947 | #else | 1947 | #else |
1948 | static inline void bd_forget(struct inode *inode) {} | 1948 | static inline void bd_forget(struct inode *inode) {} |
1949 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | 1949 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } |
1950 | static inline void invalidate_bdev(struct block_device *bdev) {} | 1950 | static inline void invalidate_bdev(struct block_device *bdev) {} |
1951 | 1951 | ||
1952 | static inline struct super_block *freeze_bdev(struct block_device *sb) | 1952 | static inline struct super_block *freeze_bdev(struct block_device *sb) |
1953 | { | 1953 | { |
1954 | return NULL; | 1954 | return NULL; |
1955 | } | 1955 | } |
1956 | 1956 | ||
1957 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) | 1957 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
1958 | { | 1958 | { |
1959 | return 0; | 1959 | return 0; |
1960 | } | 1960 | } |
1961 | #endif | 1961 | #endif |
1962 | extern const struct file_operations def_blk_fops; | 1962 | extern const struct file_operations def_blk_fops; |
1963 | extern const struct file_operations def_chr_fops; | 1963 | extern const struct file_operations def_chr_fops; |
1964 | extern const struct file_operations bad_sock_fops; | 1964 | extern const struct file_operations bad_sock_fops; |
1965 | extern const struct file_operations def_fifo_fops; | 1965 | extern const struct file_operations def_fifo_fops; |
1966 | #ifdef CONFIG_BLOCK | 1966 | #ifdef CONFIG_BLOCK |
1967 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); | 1967 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); |
1968 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); | 1968 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); |
1969 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); | 1969 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); |
1970 | extern int blkdev_get(struct block_device *, fmode_t); | 1970 | extern int blkdev_get(struct block_device *, fmode_t); |
1971 | extern int blkdev_put(struct block_device *, fmode_t); | 1971 | extern int blkdev_put(struct block_device *, fmode_t); |
1972 | extern int bd_claim(struct block_device *, void *); | 1972 | extern int bd_claim(struct block_device *, void *); |
1973 | extern void bd_release(struct block_device *); | 1973 | extern void bd_release(struct block_device *); |
1974 | #ifdef CONFIG_SYSFS | 1974 | #ifdef CONFIG_SYSFS |
1975 | extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); | 1975 | extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); |
1976 | extern void bd_release_from_disk(struct block_device *, struct gendisk *); | 1976 | extern void bd_release_from_disk(struct block_device *, struct gendisk *); |
1977 | #else | 1977 | #else |
1978 | #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) | 1978 | #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) |
1979 | #define bd_release_from_disk(bdev, disk) bd_release(bdev) | 1979 | #define bd_release_from_disk(bdev, disk) bd_release(bdev) |
1980 | #endif | 1980 | #endif |
1981 | #endif | 1981 | #endif |
1982 | 1982 | ||
1983 | /* fs/char_dev.c */ | 1983 | /* fs/char_dev.c */ |
1984 | #define CHRDEV_MAJOR_HASH_SIZE 255 | 1984 | #define CHRDEV_MAJOR_HASH_SIZE 255 |
1985 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); | 1985 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); |
1986 | extern int register_chrdev_region(dev_t, unsigned, const char *); | 1986 | extern int register_chrdev_region(dev_t, unsigned, const char *); |
1987 | extern int register_chrdev(unsigned int, const char *, | 1987 | extern int register_chrdev(unsigned int, const char *, |
1988 | const struct file_operations *); | 1988 | const struct file_operations *); |
1989 | extern void unregister_chrdev(unsigned int, const char *); | 1989 | extern void unregister_chrdev(unsigned int, const char *); |
1990 | extern void unregister_chrdev_region(dev_t, unsigned); | 1990 | extern void unregister_chrdev_region(dev_t, unsigned); |
1991 | extern void chrdev_show(struct seq_file *,off_t); | 1991 | extern void chrdev_show(struct seq_file *,off_t); |
1992 | 1992 | ||
1993 | /* fs/block_dev.c */ | 1993 | /* fs/block_dev.c */ |
1994 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ | 1994 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ |
1995 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ | 1995 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ |
1996 | 1996 | ||
1997 | #ifdef CONFIG_BLOCK | 1997 | #ifdef CONFIG_BLOCK |
1998 | #define BLKDEV_MAJOR_HASH_SIZE 255 | 1998 | #define BLKDEV_MAJOR_HASH_SIZE 255 |
1999 | extern const char *__bdevname(dev_t, char *buffer); | 1999 | extern const char *__bdevname(dev_t, char *buffer); |
2000 | extern const char *bdevname(struct block_device *bdev, char *buffer); | 2000 | extern const char *bdevname(struct block_device *bdev, char *buffer); |
2001 | extern struct block_device *lookup_bdev(const char *); | 2001 | extern struct block_device *lookup_bdev(const char *); |
2002 | extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); | 2002 | extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); |
2003 | extern void close_bdev_exclusive(struct block_device *, fmode_t); | 2003 | extern void close_bdev_exclusive(struct block_device *, fmode_t); |
2004 | extern void blkdev_show(struct seq_file *,off_t); | 2004 | extern void blkdev_show(struct seq_file *,off_t); |
2005 | 2005 | ||
2006 | #else | 2006 | #else |
2007 | #define BLKDEV_MAJOR_HASH_SIZE 0 | 2007 | #define BLKDEV_MAJOR_HASH_SIZE 0 |
2008 | #endif | 2008 | #endif |
2009 | 2009 | ||
2010 | extern void init_special_inode(struct inode *, umode_t, dev_t); | 2010 | extern void init_special_inode(struct inode *, umode_t, dev_t); |
2011 | 2011 | ||
2012 | /* Invalid inode operations -- fs/bad_inode.c */ | 2012 | /* Invalid inode operations -- fs/bad_inode.c */ |
2013 | extern void make_bad_inode(struct inode *); | 2013 | extern void make_bad_inode(struct inode *); |
2014 | extern int is_bad_inode(struct inode *); | 2014 | extern int is_bad_inode(struct inode *); |
2015 | 2015 | ||
2016 | extern const struct file_operations read_pipefifo_fops; | 2016 | extern const struct file_operations read_pipefifo_fops; |
2017 | extern const struct file_operations write_pipefifo_fops; | 2017 | extern const struct file_operations write_pipefifo_fops; |
2018 | extern const struct file_operations rdwr_pipefifo_fops; | 2018 | extern const struct file_operations rdwr_pipefifo_fops; |
2019 | 2019 | ||
2020 | extern int fs_may_remount_ro(struct super_block *); | 2020 | extern int fs_may_remount_ro(struct super_block *); |
2021 | 2021 | ||
2022 | #ifdef CONFIG_BLOCK | 2022 | #ifdef CONFIG_BLOCK |
2023 | /* | 2023 | /* |
2024 | * return READ, READA, or WRITE | 2024 | * return READ, READA, or WRITE |
2025 | */ | 2025 | */ |
2026 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) | 2026 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) |
2027 | 2027 | ||
2028 | /* | 2028 | /* |
2029 | * return data direction, READ or WRITE | 2029 | * return data direction, READ or WRITE |
2030 | */ | 2030 | */ |
2031 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) | 2031 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) |
2032 | 2032 | ||
2033 | extern void check_disk_size_change(struct gendisk *disk, | 2033 | extern void check_disk_size_change(struct gendisk *disk, |
2034 | struct block_device *bdev); | 2034 | struct block_device *bdev); |
2035 | extern int revalidate_disk(struct gendisk *); | 2035 | extern int revalidate_disk(struct gendisk *); |
2036 | extern int check_disk_change(struct block_device *); | 2036 | extern int check_disk_change(struct block_device *); |
2037 | extern int __invalidate_device(struct block_device *); | 2037 | extern int __invalidate_device(struct block_device *); |
2038 | extern int invalidate_partition(struct gendisk *, int); | 2038 | extern int invalidate_partition(struct gendisk *, int); |
2039 | #endif | 2039 | #endif |
2040 | extern int invalidate_inodes(struct super_block *); | 2040 | extern int invalidate_inodes(struct super_block *); |
2041 | unsigned long __invalidate_mapping_pages(struct address_space *mapping, | 2041 | unsigned long __invalidate_mapping_pages(struct address_space *mapping, |
2042 | pgoff_t start, pgoff_t end, | 2042 | pgoff_t start, pgoff_t end, |
2043 | bool be_atomic); | 2043 | bool be_atomic); |
2044 | unsigned long invalidate_mapping_pages(struct address_space *mapping, | 2044 | unsigned long invalidate_mapping_pages(struct address_space *mapping, |
2045 | pgoff_t start, pgoff_t end); | 2045 | pgoff_t start, pgoff_t end); |
2046 | 2046 | ||
2047 | static inline unsigned long __deprecated | 2047 | static inline unsigned long __deprecated |
2048 | invalidate_inode_pages(struct address_space *mapping) | 2048 | invalidate_inode_pages(struct address_space *mapping) |
2049 | { | 2049 | { |
2050 | return invalidate_mapping_pages(mapping, 0, ~0UL); | 2050 | return invalidate_mapping_pages(mapping, 0, ~0UL); |
2051 | } | 2051 | } |
2052 | 2052 | ||
2053 | static inline void invalidate_remote_inode(struct inode *inode) | 2053 | static inline void invalidate_remote_inode(struct inode *inode) |
2054 | { | 2054 | { |
2055 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 2055 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2056 | S_ISLNK(inode->i_mode)) | 2056 | S_ISLNK(inode->i_mode)) |
2057 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 2057 | invalidate_mapping_pages(inode->i_mapping, 0, -1); |
2058 | } | 2058 | } |
2059 | extern int invalidate_inode_pages2(struct address_space *mapping); | 2059 | extern int invalidate_inode_pages2(struct address_space *mapping); |
2060 | extern int invalidate_inode_pages2_range(struct address_space *mapping, | 2060 | extern int invalidate_inode_pages2_range(struct address_space *mapping, |
2061 | pgoff_t start, pgoff_t end); | 2061 | pgoff_t start, pgoff_t end); |
2062 | extern void generic_sync_sb_inodes(struct super_block *sb, | 2062 | extern void generic_sync_sb_inodes(struct super_block *sb, |
2063 | struct writeback_control *wbc); | 2063 | struct writeback_control *wbc); |
2064 | extern int write_inode_now(struct inode *, int); | 2064 | extern int write_inode_now(struct inode *, int); |
2065 | extern int filemap_fdatawrite(struct address_space *); | 2065 | extern int filemap_fdatawrite(struct address_space *); |
2066 | extern int filemap_flush(struct address_space *); | 2066 | extern int filemap_flush(struct address_space *); |
2067 | extern int filemap_fdatawait(struct address_space *); | 2067 | extern int filemap_fdatawait(struct address_space *); |
2068 | extern int filemap_write_and_wait(struct address_space *mapping); | 2068 | extern int filemap_write_and_wait(struct address_space *mapping); |
2069 | extern int filemap_write_and_wait_range(struct address_space *mapping, | 2069 | extern int filemap_write_and_wait_range(struct address_space *mapping, |
2070 | loff_t lstart, loff_t lend); | 2070 | loff_t lstart, loff_t lend); |
2071 | extern int wait_on_page_writeback_range(struct address_space *mapping, | 2071 | extern int wait_on_page_writeback_range(struct address_space *mapping, |
2072 | pgoff_t start, pgoff_t end); | 2072 | pgoff_t start, pgoff_t end); |
2073 | extern int __filemap_fdatawrite_range(struct address_space *mapping, | 2073 | extern int __filemap_fdatawrite_range(struct address_space *mapping, |
2074 | loff_t start, loff_t end, int sync_mode); | 2074 | loff_t start, loff_t end, int sync_mode); |
2075 | extern int filemap_fdatawrite_range(struct address_space *mapping, | 2075 | extern int filemap_fdatawrite_range(struct address_space *mapping, |
2076 | loff_t start, loff_t end); | 2076 | loff_t start, loff_t end); |
2077 | 2077 | ||
2078 | extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); | 2078 | extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); |
2079 | extern void sync_supers(void); | 2079 | extern void sync_supers(void); |
2080 | extern void sync_filesystems(int wait); | 2080 | extern void sync_filesystems(int wait); |
2081 | extern void emergency_sync(void); | 2081 | extern void emergency_sync(void); |
2082 | extern void emergency_remount(void); | 2082 | extern void emergency_remount(void); |
2083 | extern int do_remount_sb(struct super_block *sb, int flags, | 2083 | extern int do_remount_sb(struct super_block *sb, int flags, |
2084 | void *data, int force); | 2084 | void *data, int force); |
2085 | #ifdef CONFIG_BLOCK | 2085 | #ifdef CONFIG_BLOCK |
2086 | extern sector_t bmap(struct inode *, sector_t); | 2086 | extern sector_t bmap(struct inode *, sector_t); |
2087 | #endif | 2087 | #endif |
2088 | extern int notify_change(struct dentry *, struct iattr *); | 2088 | extern int notify_change(struct dentry *, struct iattr *); |
2089 | extern int inode_permission(struct inode *, int); | 2089 | extern int inode_permission(struct inode *, int); |
2090 | extern int generic_permission(struct inode *, int, | 2090 | extern int generic_permission(struct inode *, int, |
2091 | int (*check_acl)(struct inode *, int)); | 2091 | int (*check_acl)(struct inode *, int)); |
2092 | 2092 | ||
2093 | static inline bool execute_ok(struct inode *inode) | 2093 | static inline bool execute_ok(struct inode *inode) |
2094 | { | 2094 | { |
2095 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); | 2095 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); |
2096 | } | 2096 | } |
2097 | 2097 | ||
2098 | extern int get_write_access(struct inode *); | 2098 | extern int get_write_access(struct inode *); |
2099 | extern int deny_write_access(struct file *); | 2099 | extern int deny_write_access(struct file *); |
2100 | static inline void put_write_access(struct inode * inode) | 2100 | static inline void put_write_access(struct inode * inode) |
2101 | { | 2101 | { |
2102 | atomic_dec(&inode->i_writecount); | 2102 | atomic_dec(&inode->i_writecount); |
2103 | } | 2103 | } |
2104 | static inline void allow_write_access(struct file *file) | 2104 | static inline void allow_write_access(struct file *file) |
2105 | { | 2105 | { |
2106 | if (file) | 2106 | if (file) |
2107 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); | 2107 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); |
2108 | } | 2108 | } |
2109 | extern int do_pipe_flags(int *, int); | 2109 | extern int do_pipe_flags(int *, int); |
2110 | extern struct file *create_read_pipe(struct file *f, int flags); | 2110 | extern struct file *create_read_pipe(struct file *f, int flags); |
2111 | extern struct file *create_write_pipe(int flags); | 2111 | extern struct file *create_write_pipe(int flags); |
2112 | extern void free_write_pipe(struct file *); | 2112 | extern void free_write_pipe(struct file *); |
2113 | 2113 | ||
2114 | extern struct file *do_filp_open(int dfd, const char *pathname, | 2114 | extern struct file *do_filp_open(int dfd, const char *pathname, |
2115 | int open_flag, int mode, int acc_mode); | 2115 | int open_flag, int mode, int acc_mode); |
2116 | extern int may_open(struct path *, int, int); | 2116 | extern int may_open(struct path *, int, int); |
2117 | 2117 | ||
2118 | extern int kernel_read(struct file *, unsigned long, char *, unsigned long); | 2118 | extern int kernel_read(struct file *, unsigned long, char *, unsigned long); |
2119 | extern struct file * open_exec(const char *); | 2119 | extern struct file * open_exec(const char *); |
2120 | 2120 | ||
2121 | /* fs/dcache.c -- generic fs support functions */ | 2121 | /* fs/dcache.c -- generic fs support functions */ |
2122 | extern int is_subdir(struct dentry *, struct dentry *); | 2122 | extern int is_subdir(struct dentry *, struct dentry *); |
2123 | extern ino_t find_inode_number(struct dentry *, struct qstr *); | 2123 | extern ino_t find_inode_number(struct dentry *, struct qstr *); |
2124 | 2124 | ||
2125 | #include <linux/err.h> | 2125 | #include <linux/err.h> |
2126 | 2126 | ||
2127 | /* needed for stackable file system support */ | 2127 | /* needed for stackable file system support */ |
2128 | extern loff_t default_llseek(struct file *file, loff_t offset, int origin); | 2128 | extern loff_t default_llseek(struct file *file, loff_t offset, int origin); |
2129 | 2129 | ||
2130 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); | 2130 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); |
2131 | 2131 | ||
2132 | extern struct inode * inode_init_always(struct super_block *, struct inode *); | 2132 | extern struct inode * inode_init_always(struct super_block *, struct inode *); |
2133 | extern void inode_init_once(struct inode *); | 2133 | extern void inode_init_once(struct inode *); |
2134 | extern void inode_add_to_lists(struct super_block *, struct inode *); | 2134 | extern void inode_add_to_lists(struct super_block *, struct inode *); |
2135 | extern void iput(struct inode *); | 2135 | extern void iput(struct inode *); |
2136 | extern struct inode * igrab(struct inode *); | 2136 | extern struct inode * igrab(struct inode *); |
2137 | extern ino_t iunique(struct super_block *, ino_t); | 2137 | extern ino_t iunique(struct super_block *, ino_t); |
2138 | extern int inode_needs_sync(struct inode *inode); | 2138 | extern int inode_needs_sync(struct inode *inode); |
2139 | extern void generic_delete_inode(struct inode *inode); | 2139 | extern void generic_delete_inode(struct inode *inode); |
2140 | extern void generic_drop_inode(struct inode *inode); | 2140 | extern void generic_drop_inode(struct inode *inode); |
2141 | 2141 | ||
2142 | extern struct inode *ilookup5_nowait(struct super_block *sb, | 2142 | extern struct inode *ilookup5_nowait(struct super_block *sb, |
2143 | unsigned long hashval, int (*test)(struct inode *, void *), | 2143 | unsigned long hashval, int (*test)(struct inode *, void *), |
2144 | void *data); | 2144 | void *data); |
2145 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, | 2145 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, |
2146 | int (*test)(struct inode *, void *), void *data); | 2146 | int (*test)(struct inode *, void *), void *data); |
2147 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); | 2147 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); |
2148 | 2148 | ||
2149 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); | 2149 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); |
2150 | extern struct inode * iget_locked(struct super_block *, unsigned long); | 2150 | extern struct inode * iget_locked(struct super_block *, unsigned long); |
2151 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); | 2151 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); |
2152 | extern int insert_inode_locked(struct inode *); | 2152 | extern int insert_inode_locked(struct inode *); |
2153 | extern void unlock_new_inode(struct inode *); | 2153 | extern void unlock_new_inode(struct inode *); |
2154 | 2154 | ||
2155 | extern void __iget(struct inode * inode); | 2155 | extern void __iget(struct inode * inode); |
2156 | extern void iget_failed(struct inode *); | 2156 | extern void iget_failed(struct inode *); |
2157 | extern void clear_inode(struct inode *); | 2157 | extern void clear_inode(struct inode *); |
2158 | extern void destroy_inode(struct inode *); | 2158 | extern void destroy_inode(struct inode *); |
2159 | extern struct inode *new_inode(struct super_block *); | 2159 | extern struct inode *new_inode(struct super_block *); |
2160 | extern int should_remove_suid(struct dentry *); | 2160 | extern int should_remove_suid(struct dentry *); |
2161 | extern int file_remove_suid(struct file *); | 2161 | extern int file_remove_suid(struct file *); |
2162 | 2162 | ||
2163 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); | 2163 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); |
2164 | extern void remove_inode_hash(struct inode *); | 2164 | extern void remove_inode_hash(struct inode *); |
2165 | static inline void insert_inode_hash(struct inode *inode) { | 2165 | static inline void insert_inode_hash(struct inode *inode) { |
2166 | __insert_inode_hash(inode, inode->i_ino); | 2166 | __insert_inode_hash(inode, inode->i_ino); |
2167 | } | 2167 | } |
2168 | 2168 | ||
2169 | extern struct file * get_empty_filp(void); | 2169 | extern struct file * get_empty_filp(void); |
2170 | extern void file_move(struct file *f, struct list_head *list); | 2170 | extern void file_move(struct file *f, struct list_head *list); |
2171 | extern void file_kill(struct file *f); | 2171 | extern void file_kill(struct file *f); |
2172 | #ifdef CONFIG_BLOCK | 2172 | #ifdef CONFIG_BLOCK |
2173 | struct bio; | 2173 | struct bio; |
2174 | extern void submit_bio(int, struct bio *); | 2174 | extern void submit_bio(int, struct bio *); |
2175 | extern int bdev_read_only(struct block_device *); | 2175 | extern int bdev_read_only(struct block_device *); |
2176 | #endif | 2176 | #endif |
2177 | extern int set_blocksize(struct block_device *, int); | 2177 | extern int set_blocksize(struct block_device *, int); |
2178 | extern int sb_set_blocksize(struct super_block *, int); | 2178 | extern int sb_set_blocksize(struct super_block *, int); |
2179 | extern int sb_min_blocksize(struct super_block *, int); | 2179 | extern int sb_min_blocksize(struct super_block *, int); |
2180 | extern int sb_has_dirty_inodes(struct super_block *); | 2180 | extern int sb_has_dirty_inodes(struct super_block *); |
2181 | 2181 | ||
2182 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); | 2182 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); |
2183 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); | 2183 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); |
2184 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); | 2184 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); |
2185 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); | 2185 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); |
2186 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2186 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2187 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2187 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2188 | extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, | 2188 | extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, |
2189 | unsigned long, loff_t); | 2189 | unsigned long, loff_t); |
2190 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, | 2190 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, |
2191 | unsigned long *, loff_t, loff_t *, size_t, size_t); | 2191 | unsigned long *, loff_t, loff_t *, size_t, size_t); |
2192 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, | 2192 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, |
2193 | unsigned long, loff_t, loff_t *, size_t, ssize_t); | 2193 | unsigned long, loff_t, loff_t *, size_t, ssize_t); |
2194 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); | 2194 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); |
2195 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); | 2195 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); |
2196 | extern int generic_segment_checks(const struct iovec *iov, | 2196 | extern int generic_segment_checks(const struct iovec *iov, |
2197 | unsigned long *nr_segs, size_t *count, int access_flags); | 2197 | unsigned long *nr_segs, size_t *count, int access_flags); |
2198 | 2198 | ||
2199 | /* fs/splice.c */ | 2199 | /* fs/splice.c */ |
2200 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, | 2200 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, |
2201 | struct pipe_inode_info *, size_t, unsigned int); | 2201 | struct pipe_inode_info *, size_t, unsigned int); |
2202 | extern ssize_t default_file_splice_read(struct file *, loff_t *, | 2202 | extern ssize_t default_file_splice_read(struct file *, loff_t *, |
2203 | struct pipe_inode_info *, size_t, unsigned int); | 2203 | struct pipe_inode_info *, size_t, unsigned int); |
2204 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, | 2204 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, |
2205 | struct file *, loff_t *, size_t, unsigned int); | 2205 | struct file *, loff_t *, size_t, unsigned int); |
2206 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2206 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
2207 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2207 | struct file *out, loff_t *, size_t len, unsigned int flags); |
2208 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | 2208 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, |
2209 | size_t len, unsigned int flags); | 2209 | size_t len, unsigned int flags); |
2210 | 2210 | ||
2211 | extern void | 2211 | extern void |
2212 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); | 2212 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); |
2213 | extern loff_t no_llseek(struct file *file, loff_t offset, int origin); | 2213 | extern loff_t no_llseek(struct file *file, loff_t offset, int origin); |
2214 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); | 2214 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); |
2215 | extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, | 2215 | extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, |
2216 | int origin); | 2216 | int origin); |
2217 | extern int generic_file_open(struct inode * inode, struct file * filp); | 2217 | extern int generic_file_open(struct inode * inode, struct file * filp); |
2218 | extern int nonseekable_open(struct inode * inode, struct file * filp); | 2218 | extern int nonseekable_open(struct inode * inode, struct file * filp); |
2219 | 2219 | ||
2220 | #ifdef CONFIG_FS_XIP | 2220 | #ifdef CONFIG_FS_XIP |
2221 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, | 2221 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, |
2222 | loff_t *ppos); | 2222 | loff_t *ppos); |
2223 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); | 2223 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); |
2224 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, | 2224 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, |
2225 | size_t len, loff_t *ppos); | 2225 | size_t len, loff_t *ppos); |
2226 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); | 2226 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); |
2227 | #else | 2227 | #else |
2228 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | 2228 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) |
2229 | { | 2229 | { |
2230 | return 0; | 2230 | return 0; |
2231 | } | 2231 | } |
2232 | #endif | 2232 | #endif |
2233 | 2233 | ||
2234 | #ifdef CONFIG_BLOCK | 2234 | #ifdef CONFIG_BLOCK |
2235 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2235 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
2236 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2236 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
2237 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2237 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
2238 | int lock_type); | 2238 | int lock_type); |
2239 | 2239 | ||
2240 | enum { | 2240 | enum { |
2241 | DIO_LOCKING = 1, /* need locking between buffered and direct access */ | 2241 | DIO_LOCKING = 1, /* need locking between buffered and direct access */ |
2242 | DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ | 2242 | DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ |
2243 | DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ | 2243 | DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ |
2244 | }; | 2244 | }; |
2245 | 2245 | ||
2246 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | 2246 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, |
2247 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, | 2247 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, |
2248 | loff_t offset, unsigned long nr_segs, get_block_t get_block, | 2248 | loff_t offset, unsigned long nr_segs, get_block_t get_block, |
2249 | dio_iodone_t end_io) | 2249 | dio_iodone_t end_io) |
2250 | { | 2250 | { |
2251 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2251 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2252 | nr_segs, get_block, end_io, DIO_LOCKING); | 2252 | nr_segs, get_block, end_io, DIO_LOCKING); |
2253 | } | 2253 | } |
2254 | 2254 | ||
2255 | static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, | 2255 | static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, |
2256 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, | 2256 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, |
2257 | loff_t offset, unsigned long nr_segs, get_block_t get_block, | 2257 | loff_t offset, unsigned long nr_segs, get_block_t get_block, |
2258 | dio_iodone_t end_io) | 2258 | dio_iodone_t end_io) |
2259 | { | 2259 | { |
2260 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2260 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2261 | nr_segs, get_block, end_io, DIO_NO_LOCKING); | 2261 | nr_segs, get_block, end_io, DIO_NO_LOCKING); |
2262 | } | 2262 | } |
2263 | 2263 | ||
2264 | static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, | 2264 | static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, |
2265 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, | 2265 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, |
2266 | loff_t offset, unsigned long nr_segs, get_block_t get_block, | 2266 | loff_t offset, unsigned long nr_segs, get_block_t get_block, |
2267 | dio_iodone_t end_io) | 2267 | dio_iodone_t end_io) |
2268 | { | 2268 | { |
2269 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2269 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2270 | nr_segs, get_block, end_io, DIO_OWN_LOCKING); | 2270 | nr_segs, get_block, end_io, DIO_OWN_LOCKING); |
2271 | } | 2271 | } |
2272 | #endif | 2272 | #endif |
2273 | 2273 | ||
2274 | extern const struct file_operations generic_ro_fops; | 2274 | extern const struct file_operations generic_ro_fops; |
2275 | 2275 | ||
2276 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2276 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
2277 | 2277 | ||
2278 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); | 2278 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); |
2279 | extern int vfs_follow_link(struct nameidata *, const char *); | 2279 | extern int vfs_follow_link(struct nameidata *, const char *); |
2280 | extern int page_readlink(struct dentry *, char __user *, int); | 2280 | extern int page_readlink(struct dentry *, char __user *, int); |
2281 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2281 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2282 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2282 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
2283 | extern int __page_symlink(struct inode *inode, const char *symname, int len, | 2283 | extern int __page_symlink(struct inode *inode, const char *symname, int len, |
2284 | int nofs); | 2284 | int nofs); |
2285 | extern int page_symlink(struct inode *inode, const char *symname, int len); | 2285 | extern int page_symlink(struct inode *inode, const char *symname, int len); |
2286 | extern const struct inode_operations page_symlink_inode_operations; | 2286 | extern const struct inode_operations page_symlink_inode_operations; |
2287 | extern int generic_readlink(struct dentry *, char __user *, int); | 2287 | extern int generic_readlink(struct dentry *, char __user *, int); |
2288 | extern void generic_fillattr(struct inode *, struct kstat *); | 2288 | extern void generic_fillattr(struct inode *, struct kstat *); |
2289 | extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2289 | extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2290 | void inode_add_bytes(struct inode *inode, loff_t bytes); | 2290 | void inode_add_bytes(struct inode *inode, loff_t bytes); |
2291 | void inode_sub_bytes(struct inode *inode, loff_t bytes); | 2291 | void inode_sub_bytes(struct inode *inode, loff_t bytes); |
2292 | loff_t inode_get_bytes(struct inode *inode); | 2292 | loff_t inode_get_bytes(struct inode *inode); |
2293 | void inode_set_bytes(struct inode *inode, loff_t bytes); | 2293 | void inode_set_bytes(struct inode *inode, loff_t bytes); |
2294 | 2294 | ||
2295 | extern int vfs_readdir(struct file *, filldir_t, void *); | 2295 | extern int vfs_readdir(struct file *, filldir_t, void *); |
2296 | 2296 | ||
2297 | extern int vfs_stat(char __user *, struct kstat *); | 2297 | extern int vfs_stat(char __user *, struct kstat *); |
2298 | extern int vfs_lstat(char __user *, struct kstat *); | 2298 | extern int vfs_lstat(char __user *, struct kstat *); |
2299 | extern int vfs_fstat(unsigned int, struct kstat *); | 2299 | extern int vfs_fstat(unsigned int, struct kstat *); |
2300 | extern int vfs_fstatat(int , char __user *, struct kstat *, int); | 2300 | extern int vfs_fstatat(int , char __user *, struct kstat *, int); |
2301 | 2301 | ||
2302 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | 2302 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, |
2303 | unsigned long arg); | 2303 | unsigned long arg); |
2304 | extern int __generic_block_fiemap(struct inode *inode, | 2304 | extern int __generic_block_fiemap(struct inode *inode, |
2305 | struct fiemap_extent_info *fieinfo, u64 start, | 2305 | struct fiemap_extent_info *fieinfo, u64 start, |
2306 | u64 len, get_block_t *get_block); | 2306 | u64 len, get_block_t *get_block); |
2307 | extern int generic_block_fiemap(struct inode *inode, | 2307 | extern int generic_block_fiemap(struct inode *inode, |
2308 | struct fiemap_extent_info *fieinfo, u64 start, | 2308 | struct fiemap_extent_info *fieinfo, u64 start, |
2309 | u64 len, get_block_t *get_block); | 2309 | u64 len, get_block_t *get_block); |
2310 | 2310 | ||
2311 | extern void get_filesystem(struct file_system_type *fs); | 2311 | extern void get_filesystem(struct file_system_type *fs); |
2312 | extern void put_filesystem(struct file_system_type *fs); | 2312 | extern void put_filesystem(struct file_system_type *fs); |
2313 | extern struct file_system_type *get_fs_type(const char *name); | 2313 | extern struct file_system_type *get_fs_type(const char *name); |
2314 | extern struct super_block *get_super(struct block_device *); | 2314 | extern struct super_block *get_super(struct block_device *); |
2315 | extern struct super_block *user_get_super(dev_t); | 2315 | extern struct super_block *user_get_super(dev_t); |
2316 | extern void drop_super(struct super_block *sb); | 2316 | extern void drop_super(struct super_block *sb); |
2317 | 2317 | ||
2318 | extern int dcache_dir_open(struct inode *, struct file *); | 2318 | extern int dcache_dir_open(struct inode *, struct file *); |
2319 | extern int dcache_dir_close(struct inode *, struct file *); | 2319 | extern int dcache_dir_close(struct inode *, struct file *); |
2320 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); | 2320 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); |
2321 | extern int dcache_readdir(struct file *, void *, filldir_t); | 2321 | extern int dcache_readdir(struct file *, void *, filldir_t); |
2322 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2322 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2323 | extern int simple_statfs(struct dentry *, struct kstatfs *); | 2323 | extern int simple_statfs(struct dentry *, struct kstatfs *); |
2324 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); | 2324 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); |
2325 | extern int simple_unlink(struct inode *, struct dentry *); | 2325 | extern int simple_unlink(struct inode *, struct dentry *); |
2326 | extern int simple_rmdir(struct inode *, struct dentry *); | 2326 | extern int simple_rmdir(struct inode *, struct dentry *); |
2327 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 2327 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
2328 | extern int simple_sync_file(struct file *, struct dentry *, int); | 2328 | extern int simple_sync_file(struct file *, struct dentry *, int); |
2329 | extern int simple_empty(struct dentry *); | 2329 | extern int simple_empty(struct dentry *); |
2330 | extern int simple_readpage(struct file *file, struct page *page); | 2330 | extern int simple_readpage(struct file *file, struct page *page); |
2331 | extern int simple_prepare_write(struct file *file, struct page *page, | 2331 | extern int simple_prepare_write(struct file *file, struct page *page, |
2332 | unsigned offset, unsigned to); | 2332 | unsigned offset, unsigned to); |
2333 | extern int simple_write_begin(struct file *file, struct address_space *mapping, | 2333 | extern int simple_write_begin(struct file *file, struct address_space *mapping, |
2334 | loff_t pos, unsigned len, unsigned flags, | 2334 | loff_t pos, unsigned len, unsigned flags, |
2335 | struct page **pagep, void **fsdata); | 2335 | struct page **pagep, void **fsdata); |
2336 | extern int simple_write_end(struct file *file, struct address_space *mapping, | 2336 | extern int simple_write_end(struct file *file, struct address_space *mapping, |
2337 | loff_t pos, unsigned len, unsigned copied, | 2337 | loff_t pos, unsigned len, unsigned copied, |
2338 | struct page *page, void *fsdata); | 2338 | struct page *page, void *fsdata); |
2339 | 2339 | ||
2340 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); | 2340 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); |
2341 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); | 2341 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); |
2342 | extern const struct file_operations simple_dir_operations; | 2342 | extern const struct file_operations simple_dir_operations; |
2343 | extern const struct inode_operations simple_dir_inode_operations; | 2343 | extern const struct inode_operations simple_dir_inode_operations; |
2344 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; | 2344 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; |
2345 | struct dentry *d_alloc_name(struct dentry *, const char *); | 2345 | struct dentry *d_alloc_name(struct dentry *, const char *); |
2346 | extern int simple_fill_super(struct super_block *, int, struct tree_descr *); | 2346 | extern int simple_fill_super(struct super_block *, int, struct tree_descr *); |
2347 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); | 2347 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); |
2348 | extern void simple_release_fs(struct vfsmount **mount, int *count); | 2348 | extern void simple_release_fs(struct vfsmount **mount, int *count); |
2349 | 2349 | ||
2350 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, | 2350 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, |
2351 | loff_t *ppos, const void *from, size_t available); | 2351 | loff_t *ppos, const void *from, size_t available); |
2352 | 2352 | ||
2353 | #ifdef CONFIG_MIGRATION | 2353 | #ifdef CONFIG_MIGRATION |
2354 | extern int buffer_migrate_page(struct address_space *, | 2354 | extern int buffer_migrate_page(struct address_space *, |
2355 | struct page *, struct page *); | 2355 | struct page *, struct page *); |
2356 | #else | 2356 | #else |
2357 | #define buffer_migrate_page NULL | 2357 | #define buffer_migrate_page NULL |
2358 | #endif | 2358 | #endif |
2359 | 2359 | ||
2360 | extern int inode_change_ok(struct inode *, struct iattr *); | 2360 | extern int inode_change_ok(struct inode *, struct iattr *); |
2361 | extern int __must_check inode_setattr(struct inode *, struct iattr *); | 2361 | extern int __must_check inode_setattr(struct inode *, struct iattr *); |
2362 | 2362 | ||
2363 | extern void file_update_time(struct file *file); | 2363 | extern void file_update_time(struct file *file); |
2364 | 2364 | ||
2365 | extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); | 2365 | extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); |
2366 | extern void save_mount_options(struct super_block *sb, char *options); | 2366 | extern void save_mount_options(struct super_block *sb, char *options); |
2367 | extern void replace_mount_options(struct super_block *sb, char *options); | 2367 | extern void replace_mount_options(struct super_block *sb, char *options); |
2368 | 2368 | ||
2369 | static inline ino_t parent_ino(struct dentry *dentry) | 2369 | static inline ino_t parent_ino(struct dentry *dentry) |
2370 | { | 2370 | { |
2371 | ino_t res; | 2371 | ino_t res; |
2372 | 2372 | ||
2373 | spin_lock(&dentry->d_lock); | 2373 | spin_lock(&dentry->d_lock); |
2374 | res = dentry->d_parent->d_inode->i_ino; | 2374 | res = dentry->d_parent->d_inode->i_ino; |
2375 | spin_unlock(&dentry->d_lock); | 2375 | spin_unlock(&dentry->d_lock); |
2376 | return res; | 2376 | return res; |
2377 | } | 2377 | } |
2378 | 2378 | ||
2379 | /* Transaction based IO helpers */ | 2379 | /* Transaction based IO helpers */ |
2380 | 2380 | ||
2381 | /* | 2381 | /* |
2382 | * An argresp is stored in an allocated page and holds the | 2382 | * An argresp is stored in an allocated page and holds the |
2383 | * size of the argument or response, along with its content | 2383 | * size of the argument or response, along with its content |
2384 | */ | 2384 | */ |
2385 | struct simple_transaction_argresp { | 2385 | struct simple_transaction_argresp { |
2386 | ssize_t size; | 2386 | ssize_t size; |
2387 | char data[0]; | 2387 | char data[0]; |
2388 | }; | 2388 | }; |
2389 | 2389 | ||
2390 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) | 2390 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) |
2391 | 2391 | ||
2392 | char *simple_transaction_get(struct file *file, const char __user *buf, | 2392 | char *simple_transaction_get(struct file *file, const char __user *buf, |
2393 | size_t size); | 2393 | size_t size); |
2394 | ssize_t simple_transaction_read(struct file *file, char __user *buf, | 2394 | ssize_t simple_transaction_read(struct file *file, char __user *buf, |
2395 | size_t size, loff_t *pos); | 2395 | size_t size, loff_t *pos); |
2396 | int simple_transaction_release(struct inode *inode, struct file *file); | 2396 | int simple_transaction_release(struct inode *inode, struct file *file); |
2397 | 2397 | ||
2398 | void simple_transaction_set(struct file *file, size_t n); | 2398 | void simple_transaction_set(struct file *file, size_t n); |
2399 | 2399 | ||
2400 | /* | 2400 | /* |
2401 | * simple attribute files | 2401 | * simple attribute files |
2402 | * | 2402 | * |
2403 | * These attributes behave similar to those in sysfs: | 2403 | * These attributes behave similar to those in sysfs: |
2404 | * | 2404 | * |
2405 | * Writing to an attribute immediately sets a value, an open file can be | 2405 | * Writing to an attribute immediately sets a value, an open file can be |
2406 | * written to multiple times. | 2406 | * written to multiple times. |
2407 | * | 2407 | * |
2408 | * Reading from an attribute creates a buffer from the value that might get | 2408 | * Reading from an attribute creates a buffer from the value that might get |
2409 | * read with multiple read calls. When the attribute has been read | 2409 | * read with multiple read calls. When the attribute has been read |
2410 | * completely, no further read calls are possible until the file is opened | 2410 | * completely, no further read calls are possible until the file is opened |
2411 | * again. | 2411 | * again. |
2412 | * | 2412 | * |
2413 | * All attributes contain a text representation of a numeric value | 2413 | * All attributes contain a text representation of a numeric value |
2414 | * that are accessed with the get() and set() functions. | 2414 | * that are accessed with the get() and set() functions. |
2415 | */ | 2415 | */ |
2416 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ | 2416 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ |
2417 | static int __fops ## _open(struct inode *inode, struct file *file) \ | 2417 | static int __fops ## _open(struct inode *inode, struct file *file) \ |
2418 | { \ | 2418 | { \ |
2419 | __simple_attr_check_format(__fmt, 0ull); \ | 2419 | __simple_attr_check_format(__fmt, 0ull); \ |
2420 | return simple_attr_open(inode, file, __get, __set, __fmt); \ | 2420 | return simple_attr_open(inode, file, __get, __set, __fmt); \ |
2421 | } \ | 2421 | } \ |
2422 | static struct file_operations __fops = { \ | 2422 | static struct file_operations __fops = { \ |
2423 | .owner = THIS_MODULE, \ | 2423 | .owner = THIS_MODULE, \ |
2424 | .open = __fops ## _open, \ | 2424 | .open = __fops ## _open, \ |
2425 | .release = simple_attr_release, \ | 2425 | .release = simple_attr_release, \ |
2426 | .read = simple_attr_read, \ | 2426 | .read = simple_attr_read, \ |
2427 | .write = simple_attr_write, \ | 2427 | .write = simple_attr_write, \ |
2428 | }; | 2428 | }; |
2429 | 2429 | ||
2430 | static inline void __attribute__((format(printf, 1, 2))) | 2430 | static inline void __attribute__((format(printf, 1, 2))) |
2431 | __simple_attr_check_format(const char *fmt, ...) | 2431 | __simple_attr_check_format(const char *fmt, ...) |
2432 | { | 2432 | { |
2433 | /* don't do anything, just let the compiler check the arguments; */ | 2433 | /* don't do anything, just let the compiler check the arguments; */ |
2434 | } | 2434 | } |
2435 | 2435 | ||
2436 | int simple_attr_open(struct inode *inode, struct file *file, | 2436 | int simple_attr_open(struct inode *inode, struct file *file, |
2437 | int (*get)(void *, u64 *), int (*set)(void *, u64), | 2437 | int (*get)(void *, u64 *), int (*set)(void *, u64), |
2438 | const char *fmt); | 2438 | const char *fmt); |
2439 | int simple_attr_release(struct inode *inode, struct file *file); | 2439 | int simple_attr_release(struct inode *inode, struct file *file); |
2440 | ssize_t simple_attr_read(struct file *file, char __user *buf, | 2440 | ssize_t simple_attr_read(struct file *file, char __user *buf, |
2441 | size_t len, loff_t *ppos); | 2441 | size_t len, loff_t *ppos); |
2442 | ssize_t simple_attr_write(struct file *file, const char __user *buf, | 2442 | ssize_t simple_attr_write(struct file *file, const char __user *buf, |
2443 | size_t len, loff_t *ppos); | 2443 | size_t len, loff_t *ppos); |
2444 | 2444 | ||
2445 | struct ctl_table; | 2445 | struct ctl_table; |
2446 | int proc_nr_files(struct ctl_table *table, int write, struct file *filp, | 2446 | int proc_nr_files(struct ctl_table *table, int write, struct file *filp, |
2447 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2447 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2448 | 2448 | ||
2449 | int __init get_filesystem_list(char *buf); | 2449 | int __init get_filesystem_list(char *buf); |
2450 | 2450 | ||
2451 | #endif /* __KERNEL__ */ | 2451 | #endif /* __KERNEL__ */ |
2452 | #endif /* _LINUX_FS_H */ | 2452 | #endif /* _LINUX_FS_H */ |
2453 | 2453 |
include/linux/writeback.h
1 | /* | 1 | /* |
2 | * include/linux/writeback.h | 2 | * include/linux/writeback.h |
3 | */ | 3 | */ |
4 | #ifndef WRITEBACK_H | 4 | #ifndef WRITEBACK_H |
5 | #define WRITEBACK_H | 5 | #define WRITEBACK_H |
6 | 6 | ||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | 9 | ||
10 | struct backing_dev_info; | 10 | struct backing_dev_info; |
11 | 11 | ||
12 | extern spinlock_t inode_lock; | 12 | extern spinlock_t inode_lock; |
13 | extern struct list_head inode_in_use; | 13 | extern struct list_head inode_in_use; |
14 | extern struct list_head inode_unused; | 14 | extern struct list_head inode_unused; |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * Yes, writeback.h requires sched.h | 17 | * Yes, writeback.h requires sched.h |
18 | * No, sched.h is not included from here. | 18 | * No, sched.h is not included from here. |
19 | */ | 19 | */ |
20 | static inline int task_is_pdflush(struct task_struct *task) | 20 | static inline int task_is_pdflush(struct task_struct *task) |
21 | { | 21 | { |
22 | return task->flags & PF_FLUSHER; | 22 | return task->flags & PF_FLUSHER; |
23 | } | 23 | } |
24 | 24 | ||
25 | #define current_is_pdflush() task_is_pdflush(current) | 25 | #define current_is_pdflush() task_is_pdflush(current) |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * fs/fs-writeback.c | 28 | * fs/fs-writeback.c |
29 | */ | 29 | */ |
30 | enum writeback_sync_modes { | 30 | enum writeback_sync_modes { |
31 | WB_SYNC_NONE, /* Don't wait on anything */ | 31 | WB_SYNC_NONE, /* Don't wait on anything */ |
32 | WB_SYNC_ALL, /* Wait on every mapping */ | 32 | WB_SYNC_ALL, /* Wait on every mapping */ |
33 | }; | 33 | }; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * A control structure which tells the writeback code what to do. These are | 36 | * A control structure which tells the writeback code what to do. These are |
37 | * always on the stack, and hence need no locking. They are always initialised | 37 | * always on the stack, and hence need no locking. They are always initialised |
38 | * in a manner such that unspecified fields are set to zero. | 38 | * in a manner such that unspecified fields are set to zero. |
39 | */ | 39 | */ |
40 | struct writeback_control { | 40 | struct writeback_control { |
41 | struct backing_dev_info *bdi; /* If !NULL, only write back this | 41 | struct backing_dev_info *bdi; /* If !NULL, only write back this |
42 | queue */ | 42 | queue */ |
43 | enum writeback_sync_modes sync_mode; | 43 | enum writeback_sync_modes sync_mode; |
44 | unsigned long *older_than_this; /* If !NULL, only write back inodes | 44 | unsigned long *older_than_this; /* If !NULL, only write back inodes |
45 | older than this */ | 45 | older than this */ |
46 | long nr_to_write; /* Write this many pages, and decrement | 46 | long nr_to_write; /* Write this many pages, and decrement |
47 | this for each page written */ | 47 | this for each page written */ |
48 | long pages_skipped; /* Pages which were not written */ | 48 | long pages_skipped; /* Pages which were not written */ |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * For a_ops->writepages(): is start or end are non-zero then this is | 51 | * For a_ops->writepages(): is start or end are non-zero then this is |
52 | * a hint that the filesystem need only write out the pages inside that | 52 | * a hint that the filesystem need only write out the pages inside that |
53 | * byterange. The byte at `end' is included in the writeout request. | 53 | * byterange. The byte at `end' is included in the writeout request. |
54 | */ | 54 | */ |
55 | loff_t range_start; | 55 | loff_t range_start; |
56 | loff_t range_end; | 56 | loff_t range_end; |
57 | 57 | ||
58 | unsigned nonblocking:1; /* Don't get stuck on request queues */ | 58 | unsigned nonblocking:1; /* Don't get stuck on request queues */ |
59 | unsigned encountered_congestion:1; /* An output: a queue is full */ | 59 | unsigned encountered_congestion:1; /* An output: a queue is full */ |
60 | unsigned for_kupdate:1; /* A kupdate writeback */ | 60 | unsigned for_kupdate:1; /* A kupdate writeback */ |
61 | unsigned for_reclaim:1; /* Invoked from the page allocator */ | 61 | unsigned for_reclaim:1; /* Invoked from the page allocator */ |
62 | unsigned for_writepages:1; /* This is a writepages() call */ | 62 | unsigned for_writepages:1; /* This is a writepages() call */ |
63 | unsigned range_cyclic:1; /* range_start is cyclic */ | 63 | unsigned range_cyclic:1; /* range_start is cyclic */ |
64 | unsigned more_io:1; /* more io to be dispatched */ | 64 | unsigned more_io:1; /* more io to be dispatched */ |
65 | /* | 65 | /* |
66 | * write_cache_pages() won't update wbc->nr_to_write and | 66 | * write_cache_pages() won't update wbc->nr_to_write and |
67 | * mapping->writeback_index if no_nrwrite_index_update | 67 | * mapping->writeback_index if no_nrwrite_index_update |
68 | * is set. write_cache_pages() may write more than we | 68 | * is set. write_cache_pages() may write more than we |
69 | * requested and we want to make sure nr_to_write and | 69 | * requested and we want to make sure nr_to_write and |
70 | * writeback_index are updated in a consistent manner | 70 | * writeback_index are updated in a consistent manner |
71 | * so we use a single control to update them | 71 | * so we use a single control to update them |
72 | */ | 72 | */ |
73 | unsigned no_nrwrite_index_update:1; | 73 | unsigned no_nrwrite_index_update:1; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * fs/fs-writeback.c | 77 | * fs/fs-writeback.c |
78 | */ | 78 | */ |
79 | void writeback_inodes(struct writeback_control *wbc); | 79 | void writeback_inodes(struct writeback_control *wbc); |
80 | int inode_wait(void *); | 80 | int inode_wait(void *); |
81 | void sync_inodes_sb(struct super_block *, int wait); | 81 | void sync_inodes_sb(struct super_block *, int wait); |
82 | void sync_inodes(int wait); | ||
83 | 82 | ||
84 | /* writeback.h requires fs.h; it, too, is not included from here. */ | 83 | /* writeback.h requires fs.h; it, too, is not included from here. */ |
85 | static inline void wait_on_inode(struct inode *inode) | 84 | static inline void wait_on_inode(struct inode *inode) |
86 | { | 85 | { |
87 | might_sleep(); | 86 | might_sleep(); |
88 | wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, | 87 | wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, |
89 | TASK_UNINTERRUPTIBLE); | 88 | TASK_UNINTERRUPTIBLE); |
90 | } | 89 | } |
91 | static inline void inode_sync_wait(struct inode *inode) | 90 | static inline void inode_sync_wait(struct inode *inode) |
92 | { | 91 | { |
93 | might_sleep(); | 92 | might_sleep(); |
94 | wait_on_bit(&inode->i_state, __I_SYNC, inode_wait, | 93 | wait_on_bit(&inode->i_state, __I_SYNC, inode_wait, |
95 | TASK_UNINTERRUPTIBLE); | 94 | TASK_UNINTERRUPTIBLE); |
96 | } | 95 | } |
97 | 96 | ||
98 | 97 | ||
99 | /* | 98 | /* |
100 | * mm/page-writeback.c | 99 | * mm/page-writeback.c |
101 | */ | 100 | */ |
102 | int wakeup_pdflush(long nr_pages); | 101 | int wakeup_pdflush(long nr_pages); |
103 | void laptop_io_completion(void); | 102 | void laptop_io_completion(void); |
104 | void laptop_sync_completion(void); | 103 | void laptop_sync_completion(void); |
105 | void throttle_vm_writeout(gfp_t gfp_mask); | 104 | void throttle_vm_writeout(gfp_t gfp_mask); |
106 | 105 | ||
107 | /* These are exported to sysctl. */ | 106 | /* These are exported to sysctl. */ |
108 | extern int dirty_background_ratio; | 107 | extern int dirty_background_ratio; |
109 | extern unsigned long dirty_background_bytes; | 108 | extern unsigned long dirty_background_bytes; |
110 | extern int vm_dirty_ratio; | 109 | extern int vm_dirty_ratio; |
111 | extern unsigned long vm_dirty_bytes; | 110 | extern unsigned long vm_dirty_bytes; |
112 | extern unsigned int dirty_writeback_interval; | 111 | extern unsigned int dirty_writeback_interval; |
113 | extern unsigned int dirty_expire_interval; | 112 | extern unsigned int dirty_expire_interval; |
114 | extern int vm_highmem_is_dirtyable; | 113 | extern int vm_highmem_is_dirtyable; |
115 | extern int block_dump; | 114 | extern int block_dump; |
116 | extern int laptop_mode; | 115 | extern int laptop_mode; |
117 | 116 | ||
118 | extern unsigned long determine_dirtyable_memory(void); | 117 | extern unsigned long determine_dirtyable_memory(void); |
119 | 118 | ||
120 | extern int dirty_background_ratio_handler(struct ctl_table *table, int write, | 119 | extern int dirty_background_ratio_handler(struct ctl_table *table, int write, |
121 | struct file *filp, void __user *buffer, size_t *lenp, | 120 | struct file *filp, void __user *buffer, size_t *lenp, |
122 | loff_t *ppos); | 121 | loff_t *ppos); |
123 | extern int dirty_background_bytes_handler(struct ctl_table *table, int write, | 122 | extern int dirty_background_bytes_handler(struct ctl_table *table, int write, |
124 | struct file *filp, void __user *buffer, size_t *lenp, | 123 | struct file *filp, void __user *buffer, size_t *lenp, |
125 | loff_t *ppos); | 124 | loff_t *ppos); |
126 | extern int dirty_ratio_handler(struct ctl_table *table, int write, | 125 | extern int dirty_ratio_handler(struct ctl_table *table, int write, |
127 | struct file *filp, void __user *buffer, size_t *lenp, | 126 | struct file *filp, void __user *buffer, size_t *lenp, |
128 | loff_t *ppos); | 127 | loff_t *ppos); |
129 | extern int dirty_bytes_handler(struct ctl_table *table, int write, | 128 | extern int dirty_bytes_handler(struct ctl_table *table, int write, |
130 | struct file *filp, void __user *buffer, size_t *lenp, | 129 | struct file *filp, void __user *buffer, size_t *lenp, |
131 | loff_t *ppos); | 130 | loff_t *ppos); |
132 | 131 | ||
133 | struct ctl_table; | 132 | struct ctl_table; |
134 | struct file; | 133 | struct file; |
135 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, | 134 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, |
136 | void __user *, size_t *, loff_t *); | 135 | void __user *, size_t *, loff_t *); |
137 | 136 | ||
138 | void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, | 137 | void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, |
139 | unsigned long *pbdi_dirty, struct backing_dev_info *bdi); | 138 | unsigned long *pbdi_dirty, struct backing_dev_info *bdi); |
140 | 139 | ||
141 | void page_writeback_init(void); | 140 | void page_writeback_init(void); |
142 | void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | 141 | void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, |
143 | unsigned long nr_pages_dirtied); | 142 | unsigned long nr_pages_dirtied); |
144 | 143 | ||
145 | static inline void | 144 | static inline void |
146 | balance_dirty_pages_ratelimited(struct address_space *mapping) | 145 | balance_dirty_pages_ratelimited(struct address_space *mapping) |
147 | { | 146 | { |
148 | balance_dirty_pages_ratelimited_nr(mapping, 1); | 147 | balance_dirty_pages_ratelimited_nr(mapping, 1); |
149 | } | 148 | } |
150 | 149 | ||
151 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, | 150 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, |
152 | void *data); | 151 | void *data); |
153 | 152 | ||
154 | int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); | 153 | int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); |
155 | int generic_writepages(struct address_space *mapping, | 154 | int generic_writepages(struct address_space *mapping, |
156 | struct writeback_control *wbc); | 155 | struct writeback_control *wbc); |
157 | int write_cache_pages(struct address_space *mapping, | 156 | int write_cache_pages(struct address_space *mapping, |
158 | struct writeback_control *wbc, writepage_t writepage, | 157 | struct writeback_control *wbc, writepage_t writepage, |
159 | void *data); | 158 | void *data); |
160 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); | 159 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); |
161 | int sync_page_range(struct inode *inode, struct address_space *mapping, | 160 | int sync_page_range(struct inode *inode, struct address_space *mapping, |
162 | loff_t pos, loff_t count); | 161 | loff_t pos, loff_t count); |
163 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | 162 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, |
164 | loff_t pos, loff_t count); | 163 | loff_t pos, loff_t count); |
165 | void set_page_dirty_balance(struct page *page, int page_mkwrite); | 164 | void set_page_dirty_balance(struct page *page, int page_mkwrite); |
166 | void writeback_set_ratelimit(void); | 165 | void writeback_set_ratelimit(void); |
167 | 166 | ||
168 | /* pdflush.c */ | 167 | /* pdflush.c */ |
169 | extern int nr_pdflush_threads; /* Global so it can be exported to sysctl | 168 | extern int nr_pdflush_threads; /* Global so it can be exported to sysctl |
170 | read-only. */ | 169 | read-only. */ |
171 | 170 | ||
172 | 171 | ||
173 | #endif /* WRITEBACK_H */ | 172 | #endif /* WRITEBACK_H */ |
174 | 173 |