Commit bb8430a2c8fe2b726033017daadf73c69b0348ea

Authored by Christoph Hellwig
Committed by Linus Torvalds
1 parent 51ee4b84f5

locks: remove fl_copy_lock lock_manager operation

This one was only used for a nasty hack in nfsd, which has recently
been removed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 1 additions and 7 deletions Inline Diff

Documentation/filesystems/Locking
1 The text below describes the locking rules for VFS-related methods. 1 The text below describes the locking rules for VFS-related methods.
2 It is (believed to be) up-to-date. *Please*, if you change anything in 2 It is (believed to be) up-to-date. *Please*, if you change anything in
3 prototypes or locking protocols - update this file. And update the relevant 3 prototypes or locking protocols - update this file. And update the relevant
4 instances in the tree, don't leave that to maintainers of filesystems/devices/ 4 instances in the tree, don't leave that to maintainers of filesystems/devices/
5 etc. At the very least, put the list of dubious cases in the end of this file. 5 etc. At the very least, put the list of dubious cases in the end of this file.
6 Don't turn it into log - maintainers of out-of-the-tree code are supposed to 6 Don't turn it into log - maintainers of out-of-the-tree code are supposed to
7 be able to use diff(1). 7 be able to use diff(1).
8 Thing currently missing here: socket operations. Alexey? 8 Thing currently missing here: socket operations. Alexey?
9 9
10 --------------------------- dentry_operations -------------------------- 10 --------------------------- dentry_operations --------------------------
11 prototypes: 11 prototypes:
12 int (*d_revalidate)(struct dentry *, int); 12 int (*d_revalidate)(struct dentry *, int);
13 int (*d_hash) (struct dentry *, struct qstr *); 13 int (*d_hash) (struct dentry *, struct qstr *);
14 int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); 14 int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
15 int (*d_delete)(struct dentry *); 15 int (*d_delete)(struct dentry *);
16 void (*d_release)(struct dentry *); 16 void (*d_release)(struct dentry *);
17 void (*d_iput)(struct dentry *, struct inode *); 17 void (*d_iput)(struct dentry *, struct inode *);
18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
19 19
20 locking rules: 20 locking rules:
21 none have BKL 21 none have BKL
22 dcache_lock rename_lock ->d_lock may block 22 dcache_lock rename_lock ->d_lock may block
23 d_revalidate: no no no yes 23 d_revalidate: no no no yes
24 d_hash no no no yes 24 d_hash no no no yes
25 d_compare: no yes no no 25 d_compare: no yes no no
26 d_delete: yes no yes no 26 d_delete: yes no yes no
27 d_release: no no no yes 27 d_release: no no no yes
28 d_iput: no no no yes 28 d_iput: no no no yes
29 d_dname: no no no no 29 d_dname: no no no no
30 30
31 --------------------------- inode_operations --------------------------- 31 --------------------------- inode_operations ---------------------------
32 prototypes: 32 prototypes:
33 int (*create) (struct inode *,struct dentry *,int, struct nameidata *); 33 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
34 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid 34 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid
35 ata *); 35 ata *);
36 int (*link) (struct dentry *,struct inode *,struct dentry *); 36 int (*link) (struct dentry *,struct inode *,struct dentry *);
37 int (*unlink) (struct inode *,struct dentry *); 37 int (*unlink) (struct inode *,struct dentry *);
38 int (*symlink) (struct inode *,struct dentry *,const char *); 38 int (*symlink) (struct inode *,struct dentry *,const char *);
39 int (*mkdir) (struct inode *,struct dentry *,int); 39 int (*mkdir) (struct inode *,struct dentry *,int);
40 int (*rmdir) (struct inode *,struct dentry *); 40 int (*rmdir) (struct inode *,struct dentry *);
41 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 41 int (*mknod) (struct inode *,struct dentry *,int,dev_t);
42 int (*rename) (struct inode *, struct dentry *, 42 int (*rename) (struct inode *, struct dentry *,
43 struct inode *, struct dentry *); 43 struct inode *, struct dentry *);
44 int (*readlink) (struct dentry *, char __user *,int); 44 int (*readlink) (struct dentry *, char __user *,int);
45 int (*follow_link) (struct dentry *, struct nameidata *); 45 int (*follow_link) (struct dentry *, struct nameidata *);
46 void (*truncate) (struct inode *); 46 void (*truncate) (struct inode *);
47 int (*permission) (struct inode *, int, struct nameidata *); 47 int (*permission) (struct inode *, int, struct nameidata *);
48 int (*setattr) (struct dentry *, struct iattr *); 48 int (*setattr) (struct dentry *, struct iattr *);
49 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); 49 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
50 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 50 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
51 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 51 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
52 ssize_t (*listxattr) (struct dentry *, char *, size_t); 52 ssize_t (*listxattr) (struct dentry *, char *, size_t);
53 int (*removexattr) (struct dentry *, const char *); 53 int (*removexattr) (struct dentry *, const char *);
54 54
55 locking rules: 55 locking rules:
56 all may block, none have BKL 56 all may block, none have BKL
57 i_mutex(inode) 57 i_mutex(inode)
58 lookup: yes 58 lookup: yes
59 create: yes 59 create: yes
60 link: yes (both) 60 link: yes (both)
61 mknod: yes 61 mknod: yes
62 symlink: yes 62 symlink: yes
63 mkdir: yes 63 mkdir: yes
64 unlink: yes (both) 64 unlink: yes (both)
65 rmdir: yes (both) (see below) 65 rmdir: yes (both) (see below)
66 rename: yes (all) (see below) 66 rename: yes (all) (see below)
67 readlink: no 67 readlink: no
68 follow_link: no 68 follow_link: no
69 truncate: yes (see below) 69 truncate: yes (see below)
70 setattr: yes 70 setattr: yes
71 permission: no 71 permission: no
72 getattr: no 72 getattr: no
73 setxattr: yes 73 setxattr: yes
74 getxattr: no 74 getxattr: no
75 listxattr: no 75 listxattr: no
76 removexattr: yes 76 removexattr: yes
77 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 77 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
78 victim. 78 victim.
79 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. 79 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
80 ->truncate() is never called directly - it's a callback, not a 80 ->truncate() is never called directly - it's a callback, not a
81 method. It's called by vmtruncate() - library function normally used by 81 method. It's called by vmtruncate() - library function normally used by
82 ->setattr(). Locking information above applies to that call (i.e. is 82 ->setattr(). Locking information above applies to that call (i.e. is
83 inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been 83 inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
84 passed). 84 passed).
85 85
86 See Documentation/filesystems/directory-locking for more detailed discussion 86 See Documentation/filesystems/directory-locking for more detailed discussion
87 of the locking scheme for directory operations. 87 of the locking scheme for directory operations.
88 88
89 --------------------------- super_operations --------------------------- 89 --------------------------- super_operations ---------------------------
90 prototypes: 90 prototypes:
91 struct inode *(*alloc_inode)(struct super_block *sb); 91 struct inode *(*alloc_inode)(struct super_block *sb);
92 void (*destroy_inode)(struct inode *); 92 void (*destroy_inode)(struct inode *);
93 void (*dirty_inode) (struct inode *); 93 void (*dirty_inode) (struct inode *);
94 int (*write_inode) (struct inode *, int); 94 int (*write_inode) (struct inode *, int);
95 int (*drop_inode) (struct inode *); 95 int (*drop_inode) (struct inode *);
96 void (*evict_inode) (struct inode *); 96 void (*evict_inode) (struct inode *);
97 void (*put_super) (struct super_block *); 97 void (*put_super) (struct super_block *);
98 void (*write_super) (struct super_block *); 98 void (*write_super) (struct super_block *);
99 int (*sync_fs)(struct super_block *sb, int wait); 99 int (*sync_fs)(struct super_block *sb, int wait);
100 int (*freeze_fs) (struct super_block *); 100 int (*freeze_fs) (struct super_block *);
101 int (*unfreeze_fs) (struct super_block *); 101 int (*unfreeze_fs) (struct super_block *);
102 int (*statfs) (struct dentry *, struct kstatfs *); 102 int (*statfs) (struct dentry *, struct kstatfs *);
103 int (*remount_fs) (struct super_block *, int *, char *); 103 int (*remount_fs) (struct super_block *, int *, char *);
104 void (*umount_begin) (struct super_block *); 104 void (*umount_begin) (struct super_block *);
105 int (*show_options)(struct seq_file *, struct vfsmount *); 105 int (*show_options)(struct seq_file *, struct vfsmount *);
106 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 106 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
107 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 107 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
108 108
109 locking rules: 109 locking rules:
110 All may block [not true, see below] 110 All may block [not true, see below]
111 None have BKL 111 None have BKL
112 s_umount 112 s_umount
113 alloc_inode: 113 alloc_inode:
114 destroy_inode: 114 destroy_inode:
115 dirty_inode: (must not sleep) 115 dirty_inode: (must not sleep)
116 write_inode: 116 write_inode:
117 drop_inode: !!!inode_lock!!! 117 drop_inode: !!!inode_lock!!!
118 evict_inode: 118 evict_inode:
119 put_super: write 119 put_super: write
120 write_super: read 120 write_super: read
121 sync_fs: read 121 sync_fs: read
122 freeze_fs: read 122 freeze_fs: read
123 unfreeze_fs: read 123 unfreeze_fs: read
124 statfs: maybe(read) (see below) 124 statfs: maybe(read) (see below)
125 remount_fs: write 125 remount_fs: write
126 umount_begin: no 126 umount_begin: no
127 show_options: no (namespace_sem) 127 show_options: no (namespace_sem)
128 quota_read: no (see below) 128 quota_read: no (see below)
129 quota_write: no (see below) 129 quota_write: no (see below)
130 130
131 ->statfs() has s_umount (shared) when called by ustat(2) (native or 131 ->statfs() has s_umount (shared) when called by ustat(2) (native or
132 compat), but that's an accident of bad API; s_umount is used to pin 132 compat), but that's an accident of bad API; s_umount is used to pin
133 the superblock down when we only have dev_t given us by userland to 133 the superblock down when we only have dev_t given us by userland to
134 identify the superblock. Everything else (statfs(), fstatfs(), etc.) 134 identify the superblock. Everything else (statfs(), fstatfs(), etc.)
135 doesn't hold it when calling ->statfs() - superblock is pinned down 135 doesn't hold it when calling ->statfs() - superblock is pinned down
136 by resolving the pathname passed to syscall. 136 by resolving the pathname passed to syscall.
137 ->quota_read() and ->quota_write() functions are both guaranteed to 137 ->quota_read() and ->quota_write() functions are both guaranteed to
138 be the only ones operating on the quota file by the quota code (via 138 be the only ones operating on the quota file by the quota code (via
139 dqio_sem) (unless an admin really wants to screw up something and 139 dqio_sem) (unless an admin really wants to screw up something and
140 writes to quota files with quotas on). For other details about locking 140 writes to quota files with quotas on). For other details about locking
141 see also dquot_operations section. 141 see also dquot_operations section.
142 142
143 --------------------------- file_system_type --------------------------- 143 --------------------------- file_system_type ---------------------------
144 prototypes: 144 prototypes:
145 int (*get_sb) (struct file_system_type *, int, 145 int (*get_sb) (struct file_system_type *, int,
146 const char *, void *, struct vfsmount *); 146 const char *, void *, struct vfsmount *);
147 void (*kill_sb) (struct super_block *); 147 void (*kill_sb) (struct super_block *);
148 locking rules: 148 locking rules:
149 may block BKL 149 may block BKL
150 get_sb yes no 150 get_sb yes no
151 kill_sb yes no 151 kill_sb yes no
152 152
153 ->get_sb() returns error or 0 with locked superblock attached to the vfsmount 153 ->get_sb() returns error or 0 with locked superblock attached to the vfsmount
154 (exclusive on ->s_umount). 154 (exclusive on ->s_umount).
155 ->kill_sb() takes a write-locked superblock, does all shutdown work on it, 155 ->kill_sb() takes a write-locked superblock, does all shutdown work on it,
156 unlocks and drops the reference. 156 unlocks and drops the reference.
157 157
158 --------------------------- address_space_operations -------------------------- 158 --------------------------- address_space_operations --------------------------
159 prototypes: 159 prototypes:
160 int (*writepage)(struct page *page, struct writeback_control *wbc); 160 int (*writepage)(struct page *page, struct writeback_control *wbc);
161 int (*readpage)(struct file *, struct page *); 161 int (*readpage)(struct file *, struct page *);
162 int (*sync_page)(struct page *); 162 int (*sync_page)(struct page *);
163 int (*writepages)(struct address_space *, struct writeback_control *); 163 int (*writepages)(struct address_space *, struct writeback_control *);
164 int (*set_page_dirty)(struct page *page); 164 int (*set_page_dirty)(struct page *page);
165 int (*readpages)(struct file *filp, struct address_space *mapping, 165 int (*readpages)(struct file *filp, struct address_space *mapping,
166 struct list_head *pages, unsigned nr_pages); 166 struct list_head *pages, unsigned nr_pages);
167 int (*write_begin)(struct file *, struct address_space *mapping, 167 int (*write_begin)(struct file *, struct address_space *mapping,
168 loff_t pos, unsigned len, unsigned flags, 168 loff_t pos, unsigned len, unsigned flags,
169 struct page **pagep, void **fsdata); 169 struct page **pagep, void **fsdata);
170 int (*write_end)(struct file *, struct address_space *mapping, 170 int (*write_end)(struct file *, struct address_space *mapping,
171 loff_t pos, unsigned len, unsigned copied, 171 loff_t pos, unsigned len, unsigned copied,
172 struct page *page, void *fsdata); 172 struct page *page, void *fsdata);
173 sector_t (*bmap)(struct address_space *, sector_t); 173 sector_t (*bmap)(struct address_space *, sector_t);
174 int (*invalidatepage) (struct page *, unsigned long); 174 int (*invalidatepage) (struct page *, unsigned long);
175 int (*releasepage) (struct page *, int); 175 int (*releasepage) (struct page *, int);
176 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 176 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
177 loff_t offset, unsigned long nr_segs); 177 loff_t offset, unsigned long nr_segs);
178 int (*launder_page) (struct page *); 178 int (*launder_page) (struct page *);
179 179
180 locking rules: 180 locking rules:
181 All except set_page_dirty may block 181 All except set_page_dirty may block
182 182
183 BKL PageLocked(page) i_mutex 183 BKL PageLocked(page) i_mutex
184 writepage: no yes, unlocks (see below) 184 writepage: no yes, unlocks (see below)
185 readpage: no yes, unlocks 185 readpage: no yes, unlocks
186 sync_page: no maybe 186 sync_page: no maybe
187 writepages: no 187 writepages: no
188 set_page_dirty no no 188 set_page_dirty no no
189 readpages: no 189 readpages: no
190 write_begin: no locks the page yes 190 write_begin: no locks the page yes
191 write_end: no yes, unlocks yes 191 write_end: no yes, unlocks yes
192 perform_write: no n/a yes 192 perform_write: no n/a yes
193 bmap: no 193 bmap: no
194 invalidatepage: no yes 194 invalidatepage: no yes
195 releasepage: no yes 195 releasepage: no yes
196 direct_IO: no 196 direct_IO: no
197 launder_page: no yes 197 launder_page: no yes
198 198
199 ->write_begin(), ->write_end(), ->sync_page() and ->readpage() 199 ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
200 may be called from the request handler (/dev/loop). 200 may be called from the request handler (/dev/loop).
201 201
202 ->readpage() unlocks the page, either synchronously or via I/O 202 ->readpage() unlocks the page, either synchronously or via I/O
203 completion. 203 completion.
204 204
205 ->readpages() populates the pagecache with the passed pages and starts 205 ->readpages() populates the pagecache with the passed pages and starts
206 I/O against them. They come unlocked upon I/O completion. 206 I/O against them. They come unlocked upon I/O completion.
207 207
208 ->writepage() is used for two purposes: for "memory cleansing" and for 208 ->writepage() is used for two purposes: for "memory cleansing" and for
209 "sync". These are quite different operations and the behaviour may differ 209 "sync". These are quite different operations and the behaviour may differ
210 depending upon the mode. 210 depending upon the mode.
211 211
212 If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then 212 If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
213 it *must* start I/O against the page, even if that would involve 213 it *must* start I/O against the page, even if that would involve
214 blocking on in-progress I/O. 214 blocking on in-progress I/O.
215 215
216 If writepage is called for memory cleansing (sync_mode == 216 If writepage is called for memory cleansing (sync_mode ==
217 WBC_SYNC_NONE) then its role is to get as much writeout underway as 217 WBC_SYNC_NONE) then its role is to get as much writeout underway as
218 possible. So writepage should try to avoid blocking against 218 possible. So writepage should try to avoid blocking against
219 currently-in-progress I/O. 219 currently-in-progress I/O.
220 220
221 If the filesystem is not called for "sync" and it determines that it 221 If the filesystem is not called for "sync" and it determines that it
222 would need to block against in-progress I/O to be able to start new I/O 222 would need to block against in-progress I/O to be able to start new I/O
223 against the page the filesystem should redirty the page with 223 against the page the filesystem should redirty the page with
224 redirty_page_for_writepage(), then unlock the page and return zero. 224 redirty_page_for_writepage(), then unlock the page and return zero.
225 This may also be done to avoid internal deadlocks, but rarely. 225 This may also be done to avoid internal deadlocks, but rarely.
226 226
227 If the filesystem is called for sync then it must wait on any 227 If the filesystem is called for sync then it must wait on any
228 in-progress I/O and then start new I/O. 228 in-progress I/O and then start new I/O.
229 229
230 The filesystem should unlock the page synchronously, before returning to the 230 The filesystem should unlock the page synchronously, before returning to the
231 caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE 231 caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
232 value. WRITEPAGE_ACTIVATE means that page cannot really be written out 232 value. WRITEPAGE_ACTIVATE means that page cannot really be written out
233 currently, and VM should stop calling ->writepage() on this page for some 233 currently, and VM should stop calling ->writepage() on this page for some
234 time. VM does this by moving page to the head of the active list, hence the 234 time. VM does this by moving page to the head of the active list, hence the
235 name. 235 name.
236 236
237 Unless the filesystem is going to redirty_page_for_writepage(), unlock the page 237 Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
238 and return zero, writepage *must* run set_page_writeback() against the page, 238 and return zero, writepage *must* run set_page_writeback() against the page,
239 followed by unlocking it. Once set_page_writeback() has been run against the 239 followed by unlocking it. Once set_page_writeback() has been run against the
240 page, write I/O can be submitted and the write I/O completion handler must run 240 page, write I/O can be submitted and the write I/O completion handler must run
241 end_page_writeback() once the I/O is complete. If no I/O is submitted, the 241 end_page_writeback() once the I/O is complete. If no I/O is submitted, the
242 filesystem must run end_page_writeback() against the page before returning from 242 filesystem must run end_page_writeback() against the page before returning from
243 writepage. 243 writepage.
244 244
245 That is: after 2.5.12, pages which are under writeout are *not* locked. Note, 245 That is: after 2.5.12, pages which are under writeout are *not* locked. Note,
246 if the filesystem needs the page to be locked during writeout, that is ok, too, 246 if the filesystem needs the page to be locked during writeout, that is ok, too,
247 the page is allowed to be unlocked at any point in time between the calls to 247 the page is allowed to be unlocked at any point in time between the calls to
248 set_page_writeback() and end_page_writeback(). 248 set_page_writeback() and end_page_writeback().
249 249
250 Note, failure to run either redirty_page_for_writepage() or the combination of 250 Note, failure to run either redirty_page_for_writepage() or the combination of
251 set_page_writeback()/end_page_writeback() on a page submitted to writepage 251 set_page_writeback()/end_page_writeback() on a page submitted to writepage
252 will leave the page itself marked clean but it will be tagged as dirty in the 252 will leave the page itself marked clean but it will be tagged as dirty in the
253 radix tree. This incoherency can lead to all sorts of hard-to-debug problems 253 radix tree. This incoherency can lead to all sorts of hard-to-debug problems
254 in the filesystem like having dirty inodes at umount and losing written data. 254 in the filesystem like having dirty inodes at umount and losing written data.
255 255
256 ->sync_page() locking rules are not well-defined - usually it is called 256 ->sync_page() locking rules are not well-defined - usually it is called
257 with lock on page, but that is not guaranteed. Considering the currently 257 with lock on page, but that is not guaranteed. Considering the currently
258 existing instances of this method ->sync_page() itself doesn't look 258 existing instances of this method ->sync_page() itself doesn't look
259 well-defined... 259 well-defined...
260 260
261 ->writepages() is used for periodic writeback and for syscall-initiated 261 ->writepages() is used for periodic writeback and for syscall-initiated
262 sync operations. The address_space should start I/O against at least 262 sync operations. The address_space should start I/O against at least
263 *nr_to_write pages. *nr_to_write must be decremented for each page which is 263 *nr_to_write pages. *nr_to_write must be decremented for each page which is
264 written. The address_space implementation may write more (or less) pages 264 written. The address_space implementation may write more (or less) pages
265 than *nr_to_write asks for, but it should try to be reasonably close. If 265 than *nr_to_write asks for, but it should try to be reasonably close. If
266 nr_to_write is NULL, all dirty pages must be written. 266 nr_to_write is NULL, all dirty pages must be written.
267 267
268 writepages should _only_ write pages which are present on 268 writepages should _only_ write pages which are present on
269 mapping->io_pages. 269 mapping->io_pages.
270 270
271 ->set_page_dirty() is called from various places in the kernel 271 ->set_page_dirty() is called from various places in the kernel
272 when the target page is marked as needing writeback. It may be called 272 when the target page is marked as needing writeback. It may be called
273 under spinlock (it cannot block) and is sometimes called with the page 273 under spinlock (it cannot block) and is sometimes called with the page
274 not locked. 274 not locked.
275 275
276 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some 276 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
277 filesystems and by the swapper. The latter will eventually go away. All 277 filesystems and by the swapper. The latter will eventually go away. All
278 instances do not actually need the BKL. Please, keep it that way and don't 278 instances do not actually need the BKL. Please, keep it that way and don't
279 breed new callers. 279 breed new callers.
280 280
281 ->invalidatepage() is called when the filesystem must attempt to drop 281 ->invalidatepage() is called when the filesystem must attempt to drop
282 some or all of the buffers from the page when it is being truncated. It 282 some or all of the buffers from the page when it is being truncated. It
283 returns zero on success. If ->invalidatepage is zero, the kernel uses 283 returns zero on success. If ->invalidatepage is zero, the kernel uses
284 block_invalidatepage() instead. 284 block_invalidatepage() instead.
285 285
286 ->releasepage() is called when the kernel is about to try to drop the 286 ->releasepage() is called when the kernel is about to try to drop the
287 buffers from the page in preparation for freeing it. It returns zero to 287 buffers from the page in preparation for freeing it. It returns zero to
288 indicate that the buffers are (or may be) freeable. If ->releasepage is zero, 288 indicate that the buffers are (or may be) freeable. If ->releasepage is zero,
289 the kernel assumes that the fs has no private interest in the buffers. 289 the kernel assumes that the fs has no private interest in the buffers.
290 290
291 ->launder_page() may be called prior to releasing a page if 291 ->launder_page() may be called prior to releasing a page if
292 it is still found to be dirty. It returns zero if the page was successfully 292 it is still found to be dirty. It returns zero if the page was successfully
293 cleaned, or an error value if not. Note that in order to prevent the page 293 cleaned, or an error value if not. Note that in order to prevent the page
294 getting mapped back in and redirtied, it needs to be kept locked 294 getting mapped back in and redirtied, it needs to be kept locked
295 across the entire operation. 295 across the entire operation.
296 296
297 Note: currently almost all instances of address_space methods are 297 Note: currently almost all instances of address_space methods are
298 using BKL for internal serialization and that's one of the worst sources 298 using BKL for internal serialization and that's one of the worst sources
299 of contention. Normally they are calling library functions (in fs/buffer.c) 299 of contention. Normally they are calling library functions (in fs/buffer.c)
300 and pass foo_get_block() as a callback (on local block-based filesystems, 300 and pass foo_get_block() as a callback (on local block-based filesystems,
301 indeed). BKL is not needed for library stuff and is usually taken by 301 indeed). BKL is not needed for library stuff and is usually taken by
302 foo_get_block(). It's an overkill, since block bitmaps can be protected by 302 foo_get_block(). It's an overkill, since block bitmaps can be protected by
303 internal fs locking and real critical areas are much smaller than the areas 303 internal fs locking and real critical areas are much smaller than the areas
304 filesystems protect now. 304 filesystems protect now.
305 305
306 ----------------------- file_lock_operations ------------------------------ 306 ----------------------- file_lock_operations ------------------------------
307 prototypes: 307 prototypes:
308 void (*fl_insert)(struct file_lock *); /* lock insertion callback */ 308 void (*fl_insert)(struct file_lock *); /* lock insertion callback */
309 void (*fl_remove)(struct file_lock *); /* lock removal callback */ 309 void (*fl_remove)(struct file_lock *); /* lock removal callback */
310 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 310 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
311 void (*fl_release_private)(struct file_lock *); 311 void (*fl_release_private)(struct file_lock *);
312 312
313 313
314 locking rules: 314 locking rules:
315 BKL may block 315 BKL may block
316 fl_insert: yes no 316 fl_insert: yes no
317 fl_remove: yes no 317 fl_remove: yes no
318 fl_copy_lock: yes no 318 fl_copy_lock: yes no
319 fl_release_private: yes yes 319 fl_release_private: yes yes
320 320
321 ----------------------- lock_manager_operations --------------------------- 321 ----------------------- lock_manager_operations ---------------------------
322 prototypes: 322 prototypes:
323 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 323 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
324 void (*fl_notify)(struct file_lock *); /* unblock callback */ 324 void (*fl_notify)(struct file_lock *); /* unblock callback */
325 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
326 void (*fl_release_private)(struct file_lock *); 325 void (*fl_release_private)(struct file_lock *);
327 void (*fl_break)(struct file_lock *); /* break_lease callback */ 326 void (*fl_break)(struct file_lock *); /* break_lease callback */
328 327
329 locking rules: 328 locking rules:
330 BKL may block 329 BKL may block
331 fl_compare_owner: yes no 330 fl_compare_owner: yes no
332 fl_notify: yes no 331 fl_notify: yes no
333 fl_copy_lock: yes no
334 fl_release_private: yes yes 332 fl_release_private: yes yes
335 fl_break: yes no 333 fl_break: yes no
336 334
337 Currently only NFSD and NLM provide instances of this class. None of the 335 Currently only NFSD and NLM provide instances of this class. None of the
338 them block. If you have out-of-tree instances - please, show up. Locking 336 them block. If you have out-of-tree instances - please, show up. Locking
339 in that area will change. 337 in that area will change.
340 --------------------------- buffer_head ----------------------------------- 338 --------------------------- buffer_head -----------------------------------
341 prototypes: 339 prototypes:
342 void (*b_end_io)(struct buffer_head *bh, int uptodate); 340 void (*b_end_io)(struct buffer_head *bh, int uptodate);
343 341
344 locking rules: 342 locking rules:
345 called from interrupts. In other words, extreme care is needed here. 343 called from interrupts. In other words, extreme care is needed here.
346 bh is locked, but that's all warranties we have here. Currently only RAID1, 344 bh is locked, but that's all warranties we have here. Currently only RAID1,
347 highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices 345 highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
348 call this method upon the IO completion. 346 call this method upon the IO completion.
349 347
350 --------------------------- block_device_operations ----------------------- 348 --------------------------- block_device_operations -----------------------
351 prototypes: 349 prototypes:
352 int (*open) (struct block_device *, fmode_t); 350 int (*open) (struct block_device *, fmode_t);
353 int (*release) (struct gendisk *, fmode_t); 351 int (*release) (struct gendisk *, fmode_t);
354 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 352 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
355 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 353 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
356 int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); 354 int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
357 int (*media_changed) (struct gendisk *); 355 int (*media_changed) (struct gendisk *);
358 void (*unlock_native_capacity) (struct gendisk *); 356 void (*unlock_native_capacity) (struct gendisk *);
359 int (*revalidate_disk) (struct gendisk *); 357 int (*revalidate_disk) (struct gendisk *);
360 int (*getgeo)(struct block_device *, struct hd_geometry *); 358 int (*getgeo)(struct block_device *, struct hd_geometry *);
361 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 359 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
362 360
363 locking rules: 361 locking rules:
364 BKL bd_mutex 362 BKL bd_mutex
365 open: no yes 363 open: no yes
366 release: no yes 364 release: no yes
367 ioctl: no no 365 ioctl: no no
368 compat_ioctl: no no 366 compat_ioctl: no no
369 direct_access: no no 367 direct_access: no no
370 media_changed: no no 368 media_changed: no no
371 unlock_native_capacity: no no 369 unlock_native_capacity: no no
372 revalidate_disk: no no 370 revalidate_disk: no no
373 getgeo: no no 371 getgeo: no no
374 swap_slot_free_notify: no no (see below) 372 swap_slot_free_notify: no no (see below)
375 373
376 media_changed, unlock_native_capacity and revalidate_disk are called only from 374 media_changed, unlock_native_capacity and revalidate_disk are called only from
377 check_disk_change(). 375 check_disk_change().
378 376
379 swap_slot_free_notify is called with swap_lock and sometimes the page lock 377 swap_slot_free_notify is called with swap_lock and sometimes the page lock
380 held. 378 held.
381 379
382 380
383 --------------------------- file_operations ------------------------------- 381 --------------------------- file_operations -------------------------------
384 prototypes: 382 prototypes:
385 loff_t (*llseek) (struct file *, loff_t, int); 383 loff_t (*llseek) (struct file *, loff_t, int);
386 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 384 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
387 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 385 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
388 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 386 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
389 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 387 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
390 int (*readdir) (struct file *, void *, filldir_t); 388 int (*readdir) (struct file *, void *, filldir_t);
391 unsigned int (*poll) (struct file *, struct poll_table_struct *); 389 unsigned int (*poll) (struct file *, struct poll_table_struct *);
392 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 390 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
393 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 391 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
394 int (*mmap) (struct file *, struct vm_area_struct *); 392 int (*mmap) (struct file *, struct vm_area_struct *);
395 int (*open) (struct inode *, struct file *); 393 int (*open) (struct inode *, struct file *);
396 int (*flush) (struct file *); 394 int (*flush) (struct file *);
397 int (*release) (struct inode *, struct file *); 395 int (*release) (struct inode *, struct file *);
398 int (*fsync) (struct file *, int datasync); 396 int (*fsync) (struct file *, int datasync);
399 int (*aio_fsync) (struct kiocb *, int datasync); 397 int (*aio_fsync) (struct kiocb *, int datasync);
400 int (*fasync) (int, struct file *, int); 398 int (*fasync) (int, struct file *, int);
401 int (*lock) (struct file *, int, struct file_lock *); 399 int (*lock) (struct file *, int, struct file_lock *);
402 ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, 400 ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
403 loff_t *); 401 loff_t *);
404 ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, 402 ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
405 loff_t *); 403 loff_t *);
406 ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, 404 ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
407 void __user *); 405 void __user *);
408 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, 406 ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
409 loff_t *, int); 407 loff_t *, int);
410 unsigned long (*get_unmapped_area)(struct file *, unsigned long, 408 unsigned long (*get_unmapped_area)(struct file *, unsigned long,
411 unsigned long, unsigned long, unsigned long); 409 unsigned long, unsigned long, unsigned long);
412 int (*check_flags)(int); 410 int (*check_flags)(int);
413 }; 411 };
414 412
415 locking rules: 413 locking rules:
416 All may block. 414 All may block.
417 BKL 415 BKL
418 llseek: no (see below) 416 llseek: no (see below)
419 read: no 417 read: no
420 aio_read: no 418 aio_read: no
421 write: no 419 write: no
422 aio_write: no 420 aio_write: no
423 readdir: no 421 readdir: no
424 poll: no 422 poll: no
425 unlocked_ioctl: no 423 unlocked_ioctl: no
426 compat_ioctl: no 424 compat_ioctl: no
427 mmap: no 425 mmap: no
428 open: no 426 open: no
429 flush: no 427 flush: no
430 release: no 428 release: no
431 fsync: no (see below) 429 fsync: no (see below)
432 aio_fsync: no 430 aio_fsync: no
433 fasync: no 431 fasync: no
434 lock: yes 432 lock: yes
435 readv: no 433 readv: no
436 writev: no 434 writev: no
437 sendfile: no 435 sendfile: no
438 sendpage: no 436 sendpage: no
439 get_unmapped_area: no 437 get_unmapped_area: no
440 check_flags: no 438 check_flags: no
441 439
442 ->llseek() locking has moved from llseek to the individual llseek 440 ->llseek() locking has moved from llseek to the individual llseek
443 implementations. If your fs is not using generic_file_llseek, you 441 implementations. If your fs is not using generic_file_llseek, you
444 need to acquire and release the appropriate locks in your ->llseek(). 442 need to acquire and release the appropriate locks in your ->llseek().
445 For many filesystems, it is probably safe to acquire the inode 443 For many filesystems, it is probably safe to acquire the inode
446 mutex or just to use i_size_read() instead. 444 mutex or just to use i_size_read() instead.
447 Note: this does not protect the file->f_pos against concurrent modifications 445 Note: this does not protect the file->f_pos against concurrent modifications
448 since this is something the userspace has to take care about. 446 since this is something the userspace has to take care about.
449 447
450 Note: ext2_release() was *the* source of contention on fs-intensive 448 Note: ext2_release() was *the* source of contention on fs-intensive
451 loads and dropping BKL on ->release() helps to get rid of that (we still 449 loads and dropping BKL on ->release() helps to get rid of that (we still
452 grab BKL for cases when we close a file that had been opened r/w, but that 450 grab BKL for cases when we close a file that had been opened r/w, but that
453 can and should be done using the internal locking with smaller critical areas). 451 can and should be done using the internal locking with smaller critical areas).
454 Current worst offender is ext2_get_block()... 452 Current worst offender is ext2_get_block()...
455 453
456 ->fasync() is called without BKL protection, and is responsible for 454 ->fasync() is called without BKL protection, and is responsible for
457 maintaining the FASYNC bit in filp->f_flags. Most instances call 455 maintaining the FASYNC bit in filp->f_flags. Most instances call
458 fasync_helper(), which does that maintenance, so it's not normally 456 fasync_helper(), which does that maintenance, so it's not normally
459 something one needs to worry about. Return values > 0 will be mapped to 457 something one needs to worry about. Return values > 0 will be mapped to
460 zero in the VFS layer. 458 zero in the VFS layer.
461 459
462 ->readdir() and ->ioctl() on directories must be changed. Ideally we would 460 ->readdir() and ->ioctl() on directories must be changed. Ideally we would
463 move ->readdir() to inode_operations and use a separate method for directory 461 move ->readdir() to inode_operations and use a separate method for directory
464 ->ioctl() or kill the latter completely. One of the problems is that for 462 ->ioctl() or kill the latter completely. One of the problems is that for
465 anything that resembles union-mount we won't have a struct file for all 463 anything that resembles union-mount we won't have a struct file for all
466 components. And there are other reasons why the current interface is a mess... 464 components. And there are other reasons why the current interface is a mess...
467 465
468 ->read on directories probably must go away - we should just enforce -EISDIR 466 ->read on directories probably must go away - we should just enforce -EISDIR
469 in sys_read() and friends. 467 in sys_read() and friends.
470 468
471 ->fsync() has i_mutex on inode. 469 ->fsync() has i_mutex on inode.
472 470
473 --------------------------- dquot_operations ------------------------------- 471 --------------------------- dquot_operations -------------------------------
474 prototypes: 472 prototypes:
475 int (*write_dquot) (struct dquot *); 473 int (*write_dquot) (struct dquot *);
476 int (*acquire_dquot) (struct dquot *); 474 int (*acquire_dquot) (struct dquot *);
477 int (*release_dquot) (struct dquot *); 475 int (*release_dquot) (struct dquot *);
478 int (*mark_dirty) (struct dquot *); 476 int (*mark_dirty) (struct dquot *);
479 int (*write_info) (struct super_block *, int); 477 int (*write_info) (struct super_block *, int);
480 478
481 These operations are intended to be more or less wrapping functions that ensure 479 These operations are intended to be more or less wrapping functions that ensure
482 a proper locking wrt the filesystem and call the generic quota operations. 480 a proper locking wrt the filesystem and call the generic quota operations.
483 481
484 What filesystem should expect from the generic quota functions: 482 What filesystem should expect from the generic quota functions:
485 483
486 FS recursion Held locks when called 484 FS recursion Held locks when called
487 write_dquot: yes dqonoff_sem or dqptr_sem 485 write_dquot: yes dqonoff_sem or dqptr_sem
488 acquire_dquot: yes dqonoff_sem or dqptr_sem 486 acquire_dquot: yes dqonoff_sem or dqptr_sem
489 release_dquot: yes dqonoff_sem or dqptr_sem 487 release_dquot: yes dqonoff_sem or dqptr_sem
490 mark_dirty: no - 488 mark_dirty: no -
491 write_info: yes dqonoff_sem 489 write_info: yes dqonoff_sem
492 490
493 FS recursion means calling ->quota_read() and ->quota_write() from superblock 491 FS recursion means calling ->quota_read() and ->quota_write() from superblock
494 operations. 492 operations.
495 493
496 More details about quota locking can be found in fs/dquot.c. 494 More details about quota locking can be found in fs/dquot.c.
497 495
498 --------------------------- vm_operations_struct ----------------------------- 496 --------------------------- vm_operations_struct -----------------------------
499 prototypes: 497 prototypes:
500 void (*open)(struct vm_area_struct*); 498 void (*open)(struct vm_area_struct*);
501 void (*close)(struct vm_area_struct*); 499 void (*close)(struct vm_area_struct*);
502 int (*fault)(struct vm_area_struct*, struct vm_fault *); 500 int (*fault)(struct vm_area_struct*, struct vm_fault *);
503 int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); 501 int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
504 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); 502 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
505 503
506 locking rules: 504 locking rules:
507 BKL mmap_sem PageLocked(page) 505 BKL mmap_sem PageLocked(page)
508 open: no yes 506 open: no yes
509 close: no yes 507 close: no yes
510 fault: no yes can return with page locked 508 fault: no yes can return with page locked
511 page_mkwrite: no yes can return with page locked 509 page_mkwrite: no yes can return with page locked
512 access: no yes 510 access: no yes
513 511
514 ->fault() is called when a previously not present pte is about 512 ->fault() is called when a previously not present pte is about
515 to be faulted in. The filesystem must find and return the page associated 513 to be faulted in. The filesystem must find and return the page associated
516 with the passed in "pgoff" in the vm_fault structure. If it is possible that 514 with the passed in "pgoff" in the vm_fault structure. If it is possible that
517 the page may be truncated and/or invalidated, then the filesystem must lock 515 the page may be truncated and/or invalidated, then the filesystem must lock
518 the page, then ensure it is not already truncated (the page lock will block 516 the page, then ensure it is not already truncated (the page lock will block
519 subsequent truncate), and then return with VM_FAULT_LOCKED, and the page 517 subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
520 locked. The VM will unlock the page. 518 locked. The VM will unlock the page.
521 519
522 ->page_mkwrite() is called when a previously read-only pte is 520 ->page_mkwrite() is called when a previously read-only pte is
523 about to become writeable. The filesystem again must ensure that there are 521 about to become writeable. The filesystem again must ensure that there are
524 no truncate/invalidate races, and then return with the page locked. If 522 no truncate/invalidate races, and then return with the page locked. If
525 the page has been truncated, the filesystem should not look up a new page 523 the page has been truncated, the filesystem should not look up a new page
526 like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which 524 like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
527 will cause the VM to retry the fault. 525 will cause the VM to retry the fault.
528 526
529 ->access() is called when get_user_pages() fails in 527 ->access() is called when get_user_pages() fails in
530 acces_process_vm(), typically used to debug a process through 528 acces_process_vm(), typically used to debug a process through
531 /proc/pid/mem or ptrace. This function is needed only for 529 /proc/pid/mem or ptrace. This function is needed only for
532 VM_IO | VM_PFNMAP VMAs. 530 VM_IO | VM_PFNMAP VMAs.
533 531
534 ================================================================================ 532 ================================================================================
535 Dubious stuff 533 Dubious stuff
536 534
537 (if you break something or notice that it is broken and do not fix it yourself 535 (if you break something or notice that it is broken and do not fix it yourself
538 - at least put it here) 536 - at least put it here)
539 537
540 ipc/shm.c::shm_delete() - may need BKL. 538 ipc/shm.c::shm_delete() - may need BKL.
541 ->read() and ->write() in many drivers are (probably) missing BKL. 539 ->read() and ->write() in many drivers are (probably) missing BKL.
542 540
1 /* 1 /*
2 * linux/fs/locks.c 2 * linux/fs/locks.c
3 * 3 *
4 * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. 4 * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
5 * Doug Evans (dje@spiff.uucp), August 07, 1992 5 * Doug Evans (dje@spiff.uucp), August 07, 1992
6 * 6 *
7 * Deadlock detection added. 7 * Deadlock detection added.
8 * FIXME: one thing isn't handled yet: 8 * FIXME: one thing isn't handled yet:
9 * - mandatory locks (requires lots of changes elsewhere) 9 * - mandatory locks (requires lots of changes elsewhere)
10 * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994. 10 * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994.
11 * 11 *
12 * Miscellaneous edits, and a total rewrite of posix_lock_file() code. 12 * Miscellaneous edits, and a total rewrite of posix_lock_file() code.
13 * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 13 * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
14 * 14 *
15 * Converted file_lock_table to a linked list from an array, which eliminates 15 * Converted file_lock_table to a linked list from an array, which eliminates
16 * the limits on how many active file locks are open. 16 * the limits on how many active file locks are open.
17 * Chad Page (pageone@netcom.com), November 27, 1994 17 * Chad Page (pageone@netcom.com), November 27, 1994
18 * 18 *
19 * Removed dependency on file descriptors. dup()'ed file descriptors now 19 * Removed dependency on file descriptors. dup()'ed file descriptors now
20 * get the same locks as the original file descriptors, and a close() on 20 * get the same locks as the original file descriptors, and a close() on
21 * any file descriptor removes ALL the locks on the file for the current 21 * any file descriptor removes ALL the locks on the file for the current
22 * process. Since locks still depend on the process id, locks are inherited 22 * process. Since locks still depend on the process id, locks are inherited
23 * after an exec() but not after a fork(). This agrees with POSIX, and both 23 * after an exec() but not after a fork(). This agrees with POSIX, and both
24 * BSD and SVR4 practice. 24 * BSD and SVR4 practice.
25 * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995 25 * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995
26 * 26 *
27 * Scrapped free list which is redundant now that we allocate locks 27 * Scrapped free list which is redundant now that we allocate locks
28 * dynamically with kmalloc()/kfree(). 28 * dynamically with kmalloc()/kfree().
29 * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995 29 * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995
30 * 30 *
31 * Implemented two lock personalities - FL_FLOCK and FL_POSIX. 31 * Implemented two lock personalities - FL_FLOCK and FL_POSIX.
32 * 32 *
33 * FL_POSIX locks are created with calls to fcntl() and lockf() through the 33 * FL_POSIX locks are created with calls to fcntl() and lockf() through the
34 * fcntl() system call. They have the semantics described above. 34 * fcntl() system call. They have the semantics described above.
35 * 35 *
36 * FL_FLOCK locks are created with calls to flock(), through the flock() 36 * FL_FLOCK locks are created with calls to flock(), through the flock()
37 * system call, which is new. Old C libraries implement flock() via fcntl() 37 * system call, which is new. Old C libraries implement flock() via fcntl()
38 * and will continue to use the old, broken implementation. 38 * and will continue to use the old, broken implementation.
39 * 39 *
40 * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated 40 * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated
41 * with a file pointer (filp). As a result they can be shared by a parent 41 * with a file pointer (filp). As a result they can be shared by a parent
42 * process and its children after a fork(). They are removed when the last 42 * process and its children after a fork(). They are removed when the last
43 * file descriptor referring to the file pointer is closed (unless explicitly 43 * file descriptor referring to the file pointer is closed (unless explicitly
44 * unlocked). 44 * unlocked).
45 * 45 *
46 * FL_FLOCK locks never deadlock, an existing lock is always removed before 46 * FL_FLOCK locks never deadlock, an existing lock is always removed before
47 * upgrading from shared to exclusive (or vice versa). When this happens 47 * upgrading from shared to exclusive (or vice versa). When this happens
48 * any processes blocked by the current lock are woken up and allowed to 48 * any processes blocked by the current lock are woken up and allowed to
49 * run before the new lock is applied. 49 * run before the new lock is applied.
50 * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 50 * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
51 * 51 *
52 * Removed some race conditions in flock_lock_file(), marked other possible 52 * Removed some race conditions in flock_lock_file(), marked other possible
53 * races. Just grep for FIXME to see them. 53 * races. Just grep for FIXME to see them.
54 * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. 54 * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
55 * 55 *
56 * Addressed Dmitry's concerns. Deadlock checking no longer recursive. 56 * Addressed Dmitry's concerns. Deadlock checking no longer recursive.
57 * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep 57 * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep
58 * once we've checked for blocking and deadlocking. 58 * once we've checked for blocking and deadlocking.
59 * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996. 59 * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996.
60 * 60 *
61 * Initial implementation of mandatory locks. SunOS turned out to be 61 * Initial implementation of mandatory locks. SunOS turned out to be
62 * a rotten model, so I implemented the "obvious" semantics. 62 * a rotten model, so I implemented the "obvious" semantics.
63 * See 'Documentation/mandatory.txt' for details. 63 * See 'Documentation/mandatory.txt' for details.
64 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. 64 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
65 * 65 *
66 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to 66 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to
67 * check if a file has mandatory locks, used by mmap(), open() and creat() to 67 * check if a file has mandatory locks, used by mmap(), open() and creat() to
68 * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference 68 * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference
69 * Manual, Section 2. 69 * Manual, Section 2.
70 * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996. 70 * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996.
71 * 71 *
72 * Tidied up block list handling. Added '/proc/locks' interface. 72 * Tidied up block list handling. Added '/proc/locks' interface.
73 * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996. 73 * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996.
74 * 74 *
75 * Fixed deadlock condition for pathological code that mixes calls to 75 * Fixed deadlock condition for pathological code that mixes calls to
76 * flock() and fcntl(). 76 * flock() and fcntl().
77 * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996. 77 * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996.
78 * 78 *
79 * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use 79 * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use
80 * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to 80 * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to
81 * guarantee sensible behaviour in the case where file system modules might 81 * guarantee sensible behaviour in the case where file system modules might
82 * be compiled with different options than the kernel itself. 82 * be compiled with different options than the kernel itself.
83 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. 83 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
84 * 84 *
85 * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel 85 * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel
86 * (Thomas.Meckel@mni.fh-giessen.de) for spotting this. 86 * (Thomas.Meckel@mni.fh-giessen.de) for spotting this.
87 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. 87 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
88 * 88 *
89 * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK 89 * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK
90 * locks. Changed process synchronisation to avoid dereferencing locks that 90 * locks. Changed process synchronisation to avoid dereferencing locks that
91 * have already been freed. 91 * have already been freed.
92 * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996. 92 * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996.
93 * 93 *
94 * Made the block list a circular list to minimise searching in the list. 94 * Made the block list a circular list to minimise searching in the list.
95 * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996. 95 * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996.
96 * 96 *
97 * Made mandatory locking a mount option. Default is not to allow mandatory 97 * Made mandatory locking a mount option. Default is not to allow mandatory
98 * locking. 98 * locking.
99 * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. 99 * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996.
100 * 100 *
101 * Some adaptations for NFS support. 101 * Some adaptations for NFS support.
102 * Olaf Kirch (okir@monad.swb.de), Dec 1996, 102 * Olaf Kirch (okir@monad.swb.de), Dec 1996,
103 * 103 *
104 * Fixed /proc/locks interface so that we can't overrun the buffer we are handed. 104 * Fixed /proc/locks interface so that we can't overrun the buffer we are handed.
105 * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997. 105 * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997.
106 * 106 *
107 * Use slab allocator instead of kmalloc/kfree. 107 * Use slab allocator instead of kmalloc/kfree.
108 * Use generic list implementation from <linux/list.h>. 108 * Use generic list implementation from <linux/list.h>.
109 * Sped up posix_locks_deadlock by only considering blocked locks. 109 * Sped up posix_locks_deadlock by only considering blocked locks.
110 * Matthew Wilcox <willy@debian.org>, March, 2000. 110 * Matthew Wilcox <willy@debian.org>, March, 2000.
111 * 111 *
112 * Leases and LOCK_MAND 112 * Leases and LOCK_MAND
113 * Matthew Wilcox <willy@debian.org>, June, 2000. 113 * Matthew Wilcox <willy@debian.org>, June, 2000.
114 * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. 114 * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
115 */ 115 */
116 116
117 #include <linux/capability.h> 117 #include <linux/capability.h>
118 #include <linux/file.h> 118 #include <linux/file.h>
119 #include <linux/fdtable.h> 119 #include <linux/fdtable.h>
120 #include <linux/fs.h> 120 #include <linux/fs.h>
121 #include <linux/init.h> 121 #include <linux/init.h>
122 #include <linux/module.h> 122 #include <linux/module.h>
123 #include <linux/security.h> 123 #include <linux/security.h>
124 #include <linux/slab.h> 124 #include <linux/slab.h>
125 #include <linux/smp_lock.h> 125 #include <linux/smp_lock.h>
126 #include <linux/syscalls.h> 126 #include <linux/syscalls.h>
127 #include <linux/time.h> 127 #include <linux/time.h>
128 #include <linux/rcupdate.h> 128 #include <linux/rcupdate.h>
129 #include <linux/pid_namespace.h> 129 #include <linux/pid_namespace.h>
130 130
131 #include <asm/uaccess.h> 131 #include <asm/uaccess.h>
132 132
133 #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 133 #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
134 #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 134 #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
135 #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) 135 #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
136 136
137 int leases_enable = 1; 137 int leases_enable = 1;
138 int lease_break_time = 45; 138 int lease_break_time = 45;
139 139
140 #define for_each_lock(inode, lockp) \ 140 #define for_each_lock(inode, lockp) \
141 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) 141 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
142 142
143 static LIST_HEAD(file_lock_list); 143 static LIST_HEAD(file_lock_list);
144 static LIST_HEAD(blocked_list); 144 static LIST_HEAD(blocked_list);
145 static DEFINE_SPINLOCK(file_lock_lock); 145 static DEFINE_SPINLOCK(file_lock_lock);
146 146
147 /* 147 /*
148 * Protects the two list heads above, plus the inode->i_flock list 148 * Protects the two list heads above, plus the inode->i_flock list
149 * FIXME: should use a spinlock, once lockd and ceph are ready. 149 * FIXME: should use a spinlock, once lockd and ceph are ready.
150 */ 150 */
151 void lock_flocks(void) 151 void lock_flocks(void)
152 { 152 {
153 spin_lock(&file_lock_lock); 153 spin_lock(&file_lock_lock);
154 } 154 }
155 EXPORT_SYMBOL_GPL(lock_flocks); 155 EXPORT_SYMBOL_GPL(lock_flocks);
156 156
157 void unlock_flocks(void) 157 void unlock_flocks(void)
158 { 158 {
159 spin_unlock(&file_lock_lock); 159 spin_unlock(&file_lock_lock);
160 } 160 }
161 EXPORT_SYMBOL_GPL(unlock_flocks); 161 EXPORT_SYMBOL_GPL(unlock_flocks);
162 162
163 static struct kmem_cache *filelock_cache __read_mostly; 163 static struct kmem_cache *filelock_cache __read_mostly;
164 164
165 /* Allocate an empty lock structure. */ 165 /* Allocate an empty lock structure. */
166 struct file_lock *locks_alloc_lock(void) 166 struct file_lock *locks_alloc_lock(void)
167 { 167 {
168 return kmem_cache_alloc(filelock_cache, GFP_KERNEL); 168 return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
169 } 169 }
170 EXPORT_SYMBOL_GPL(locks_alloc_lock); 170 EXPORT_SYMBOL_GPL(locks_alloc_lock);
171 171
172 void locks_release_private(struct file_lock *fl) 172 void locks_release_private(struct file_lock *fl)
173 { 173 {
174 if (fl->fl_ops) { 174 if (fl->fl_ops) {
175 if (fl->fl_ops->fl_release_private) 175 if (fl->fl_ops->fl_release_private)
176 fl->fl_ops->fl_release_private(fl); 176 fl->fl_ops->fl_release_private(fl);
177 fl->fl_ops = NULL; 177 fl->fl_ops = NULL;
178 } 178 }
179 if (fl->fl_lmops) { 179 if (fl->fl_lmops) {
180 if (fl->fl_lmops->fl_release_private) 180 if (fl->fl_lmops->fl_release_private)
181 fl->fl_lmops->fl_release_private(fl); 181 fl->fl_lmops->fl_release_private(fl);
182 fl->fl_lmops = NULL; 182 fl->fl_lmops = NULL;
183 } 183 }
184 184
185 } 185 }
186 EXPORT_SYMBOL_GPL(locks_release_private); 186 EXPORT_SYMBOL_GPL(locks_release_private);
187 187
188 /* Free a lock which is not in use. */ 188 /* Free a lock which is not in use. */
189 void locks_free_lock(struct file_lock *fl) 189 void locks_free_lock(struct file_lock *fl)
190 { 190 {
191 BUG_ON(waitqueue_active(&fl->fl_wait)); 191 BUG_ON(waitqueue_active(&fl->fl_wait));
192 BUG_ON(!list_empty(&fl->fl_block)); 192 BUG_ON(!list_empty(&fl->fl_block));
193 BUG_ON(!list_empty(&fl->fl_link)); 193 BUG_ON(!list_empty(&fl->fl_link));
194 194
195 locks_release_private(fl); 195 locks_release_private(fl);
196 kmem_cache_free(filelock_cache, fl); 196 kmem_cache_free(filelock_cache, fl);
197 } 197 }
198 EXPORT_SYMBOL(locks_free_lock); 198 EXPORT_SYMBOL(locks_free_lock);
199 199
200 void locks_init_lock(struct file_lock *fl) 200 void locks_init_lock(struct file_lock *fl)
201 { 201 {
202 INIT_LIST_HEAD(&fl->fl_link); 202 INIT_LIST_HEAD(&fl->fl_link);
203 INIT_LIST_HEAD(&fl->fl_block); 203 INIT_LIST_HEAD(&fl->fl_block);
204 init_waitqueue_head(&fl->fl_wait); 204 init_waitqueue_head(&fl->fl_wait);
205 fl->fl_next = NULL; 205 fl->fl_next = NULL;
206 fl->fl_fasync = NULL; 206 fl->fl_fasync = NULL;
207 fl->fl_owner = NULL; 207 fl->fl_owner = NULL;
208 fl->fl_pid = 0; 208 fl->fl_pid = 0;
209 fl->fl_nspid = NULL; 209 fl->fl_nspid = NULL;
210 fl->fl_file = NULL; 210 fl->fl_file = NULL;
211 fl->fl_flags = 0; 211 fl->fl_flags = 0;
212 fl->fl_type = 0; 212 fl->fl_type = 0;
213 fl->fl_start = fl->fl_end = 0; 213 fl->fl_start = fl->fl_end = 0;
214 fl->fl_ops = NULL; 214 fl->fl_ops = NULL;
215 fl->fl_lmops = NULL; 215 fl->fl_lmops = NULL;
216 } 216 }
217 217
218 EXPORT_SYMBOL(locks_init_lock); 218 EXPORT_SYMBOL(locks_init_lock);
219 219
220 /* 220 /*
221 * Initialises the fields of the file lock which are invariant for 221 * Initialises the fields of the file lock which are invariant for
222 * free file_locks. 222 * free file_locks.
223 */ 223 */
224 static void init_once(void *foo) 224 static void init_once(void *foo)
225 { 225 {
226 struct file_lock *lock = (struct file_lock *) foo; 226 struct file_lock *lock = (struct file_lock *) foo;
227 227
228 locks_init_lock(lock); 228 locks_init_lock(lock);
229 } 229 }
230 230
231 static void locks_copy_private(struct file_lock *new, struct file_lock *fl) 231 static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
232 { 232 {
233 if (fl->fl_ops) { 233 if (fl->fl_ops) {
234 if (fl->fl_ops->fl_copy_lock) 234 if (fl->fl_ops->fl_copy_lock)
235 fl->fl_ops->fl_copy_lock(new, fl); 235 fl->fl_ops->fl_copy_lock(new, fl);
236 new->fl_ops = fl->fl_ops; 236 new->fl_ops = fl->fl_ops;
237 } 237 }
238 if (fl->fl_lmops) { 238 if (fl->fl_lmops)
239 if (fl->fl_lmops->fl_copy_lock)
240 fl->fl_lmops->fl_copy_lock(new, fl);
241 new->fl_lmops = fl->fl_lmops; 239 new->fl_lmops = fl->fl_lmops;
242 }
243 } 240 }
244 241
245 /* 242 /*
246 * Initialize a new lock from an existing file_lock structure. 243 * Initialize a new lock from an existing file_lock structure.
247 */ 244 */
248 void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) 245 void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
249 { 246 {
250 new->fl_owner = fl->fl_owner; 247 new->fl_owner = fl->fl_owner;
251 new->fl_pid = fl->fl_pid; 248 new->fl_pid = fl->fl_pid;
252 new->fl_file = NULL; 249 new->fl_file = NULL;
253 new->fl_flags = fl->fl_flags; 250 new->fl_flags = fl->fl_flags;
254 new->fl_type = fl->fl_type; 251 new->fl_type = fl->fl_type;
255 new->fl_start = fl->fl_start; 252 new->fl_start = fl->fl_start;
256 new->fl_end = fl->fl_end; 253 new->fl_end = fl->fl_end;
257 new->fl_ops = NULL; 254 new->fl_ops = NULL;
258 new->fl_lmops = NULL; 255 new->fl_lmops = NULL;
259 } 256 }
260 EXPORT_SYMBOL(__locks_copy_lock); 257 EXPORT_SYMBOL(__locks_copy_lock);
261 258
262 void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 259 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
263 { 260 {
264 locks_release_private(new); 261 locks_release_private(new);
265 262
266 __locks_copy_lock(new, fl); 263 __locks_copy_lock(new, fl);
267 new->fl_file = fl->fl_file; 264 new->fl_file = fl->fl_file;
268 new->fl_ops = fl->fl_ops; 265 new->fl_ops = fl->fl_ops;
269 new->fl_lmops = fl->fl_lmops; 266 new->fl_lmops = fl->fl_lmops;
270 267
271 locks_copy_private(new, fl); 268 locks_copy_private(new, fl);
272 } 269 }
273 270
274 EXPORT_SYMBOL(locks_copy_lock); 271 EXPORT_SYMBOL(locks_copy_lock);
275 272
276 static inline int flock_translate_cmd(int cmd) { 273 static inline int flock_translate_cmd(int cmd) {
277 if (cmd & LOCK_MAND) 274 if (cmd & LOCK_MAND)
278 return cmd & (LOCK_MAND | LOCK_RW); 275 return cmd & (LOCK_MAND | LOCK_RW);
279 switch (cmd) { 276 switch (cmd) {
280 case LOCK_SH: 277 case LOCK_SH:
281 return F_RDLCK; 278 return F_RDLCK;
282 case LOCK_EX: 279 case LOCK_EX:
283 return F_WRLCK; 280 return F_WRLCK;
284 case LOCK_UN: 281 case LOCK_UN:
285 return F_UNLCK; 282 return F_UNLCK;
286 } 283 }
287 return -EINVAL; 284 return -EINVAL;
288 } 285 }
289 286
290 /* Fill in a file_lock structure with an appropriate FLOCK lock. */ 287 /* Fill in a file_lock structure with an appropriate FLOCK lock. */
291 static int flock_make_lock(struct file *filp, struct file_lock **lock, 288 static int flock_make_lock(struct file *filp, struct file_lock **lock,
292 unsigned int cmd) 289 unsigned int cmd)
293 { 290 {
294 struct file_lock *fl; 291 struct file_lock *fl;
295 int type = flock_translate_cmd(cmd); 292 int type = flock_translate_cmd(cmd);
296 if (type < 0) 293 if (type < 0)
297 return type; 294 return type;
298 295
299 fl = locks_alloc_lock(); 296 fl = locks_alloc_lock();
300 if (fl == NULL) 297 if (fl == NULL)
301 return -ENOMEM; 298 return -ENOMEM;
302 299
303 fl->fl_file = filp; 300 fl->fl_file = filp;
304 fl->fl_pid = current->tgid; 301 fl->fl_pid = current->tgid;
305 fl->fl_flags = FL_FLOCK; 302 fl->fl_flags = FL_FLOCK;
306 fl->fl_type = type; 303 fl->fl_type = type;
307 fl->fl_end = OFFSET_MAX; 304 fl->fl_end = OFFSET_MAX;
308 305
309 *lock = fl; 306 *lock = fl;
310 return 0; 307 return 0;
311 } 308 }
312 309
313 static int assign_type(struct file_lock *fl, int type) 310 static int assign_type(struct file_lock *fl, int type)
314 { 311 {
315 switch (type) { 312 switch (type) {
316 case F_RDLCK: 313 case F_RDLCK:
317 case F_WRLCK: 314 case F_WRLCK:
318 case F_UNLCK: 315 case F_UNLCK:
319 fl->fl_type = type; 316 fl->fl_type = type;
320 break; 317 break;
321 default: 318 default:
322 return -EINVAL; 319 return -EINVAL;
323 } 320 }
324 return 0; 321 return 0;
325 } 322 }
326 323
327 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX 324 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
328 * style lock. 325 * style lock.
329 */ 326 */
330 static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, 327 static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
331 struct flock *l) 328 struct flock *l)
332 { 329 {
333 off_t start, end; 330 off_t start, end;
334 331
335 switch (l->l_whence) { 332 switch (l->l_whence) {
336 case SEEK_SET: 333 case SEEK_SET:
337 start = 0; 334 start = 0;
338 break; 335 break;
339 case SEEK_CUR: 336 case SEEK_CUR:
340 start = filp->f_pos; 337 start = filp->f_pos;
341 break; 338 break;
342 case SEEK_END: 339 case SEEK_END:
343 start = i_size_read(filp->f_path.dentry->d_inode); 340 start = i_size_read(filp->f_path.dentry->d_inode);
344 break; 341 break;
345 default: 342 default:
346 return -EINVAL; 343 return -EINVAL;
347 } 344 }
348 345
349 /* POSIX-1996 leaves the case l->l_len < 0 undefined; 346 /* POSIX-1996 leaves the case l->l_len < 0 undefined;
350 POSIX-2001 defines it. */ 347 POSIX-2001 defines it. */
351 start += l->l_start; 348 start += l->l_start;
352 if (start < 0) 349 if (start < 0)
353 return -EINVAL; 350 return -EINVAL;
354 fl->fl_end = OFFSET_MAX; 351 fl->fl_end = OFFSET_MAX;
355 if (l->l_len > 0) { 352 if (l->l_len > 0) {
356 end = start + l->l_len - 1; 353 end = start + l->l_len - 1;
357 fl->fl_end = end; 354 fl->fl_end = end;
358 } else if (l->l_len < 0) { 355 } else if (l->l_len < 0) {
359 end = start - 1; 356 end = start - 1;
360 fl->fl_end = end; 357 fl->fl_end = end;
361 start += l->l_len; 358 start += l->l_len;
362 if (start < 0) 359 if (start < 0)
363 return -EINVAL; 360 return -EINVAL;
364 } 361 }
365 fl->fl_start = start; /* we record the absolute position */ 362 fl->fl_start = start; /* we record the absolute position */
366 if (fl->fl_end < fl->fl_start) 363 if (fl->fl_end < fl->fl_start)
367 return -EOVERFLOW; 364 return -EOVERFLOW;
368 365
369 fl->fl_owner = current->files; 366 fl->fl_owner = current->files;
370 fl->fl_pid = current->tgid; 367 fl->fl_pid = current->tgid;
371 fl->fl_file = filp; 368 fl->fl_file = filp;
372 fl->fl_flags = FL_POSIX; 369 fl->fl_flags = FL_POSIX;
373 fl->fl_ops = NULL; 370 fl->fl_ops = NULL;
374 fl->fl_lmops = NULL; 371 fl->fl_lmops = NULL;
375 372
376 return assign_type(fl, l->l_type); 373 return assign_type(fl, l->l_type);
377 } 374 }
378 375
379 #if BITS_PER_LONG == 32 376 #if BITS_PER_LONG == 32
380 static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, 377 static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
381 struct flock64 *l) 378 struct flock64 *l)
382 { 379 {
383 loff_t start; 380 loff_t start;
384 381
385 switch (l->l_whence) { 382 switch (l->l_whence) {
386 case SEEK_SET: 383 case SEEK_SET:
387 start = 0; 384 start = 0;
388 break; 385 break;
389 case SEEK_CUR: 386 case SEEK_CUR:
390 start = filp->f_pos; 387 start = filp->f_pos;
391 break; 388 break;
392 case SEEK_END: 389 case SEEK_END:
393 start = i_size_read(filp->f_path.dentry->d_inode); 390 start = i_size_read(filp->f_path.dentry->d_inode);
394 break; 391 break;
395 default: 392 default:
396 return -EINVAL; 393 return -EINVAL;
397 } 394 }
398 395
399 start += l->l_start; 396 start += l->l_start;
400 if (start < 0) 397 if (start < 0)
401 return -EINVAL; 398 return -EINVAL;
402 fl->fl_end = OFFSET_MAX; 399 fl->fl_end = OFFSET_MAX;
403 if (l->l_len > 0) { 400 if (l->l_len > 0) {
404 fl->fl_end = start + l->l_len - 1; 401 fl->fl_end = start + l->l_len - 1;
405 } else if (l->l_len < 0) { 402 } else if (l->l_len < 0) {
406 fl->fl_end = start - 1; 403 fl->fl_end = start - 1;
407 start += l->l_len; 404 start += l->l_len;
408 if (start < 0) 405 if (start < 0)
409 return -EINVAL; 406 return -EINVAL;
410 } 407 }
411 fl->fl_start = start; /* we record the absolute position */ 408 fl->fl_start = start; /* we record the absolute position */
412 if (fl->fl_end < fl->fl_start) 409 if (fl->fl_end < fl->fl_start)
413 return -EOVERFLOW; 410 return -EOVERFLOW;
414 411
415 fl->fl_owner = current->files; 412 fl->fl_owner = current->files;
416 fl->fl_pid = current->tgid; 413 fl->fl_pid = current->tgid;
417 fl->fl_file = filp; 414 fl->fl_file = filp;
418 fl->fl_flags = FL_POSIX; 415 fl->fl_flags = FL_POSIX;
419 fl->fl_ops = NULL; 416 fl->fl_ops = NULL;
420 fl->fl_lmops = NULL; 417 fl->fl_lmops = NULL;
421 418
422 switch (l->l_type) { 419 switch (l->l_type) {
423 case F_RDLCK: 420 case F_RDLCK:
424 case F_WRLCK: 421 case F_WRLCK:
425 case F_UNLCK: 422 case F_UNLCK:
426 fl->fl_type = l->l_type; 423 fl->fl_type = l->l_type;
427 break; 424 break;
428 default: 425 default:
429 return -EINVAL; 426 return -EINVAL;
430 } 427 }
431 428
432 return (0); 429 return (0);
433 } 430 }
434 #endif 431 #endif
435 432
436 /* default lease lock manager operations */ 433 /* default lease lock manager operations */
437 static void lease_break_callback(struct file_lock *fl) 434 static void lease_break_callback(struct file_lock *fl)
438 { 435 {
439 kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); 436 kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
440 } 437 }
441 438
442 static void lease_release_private_callback(struct file_lock *fl) 439 static void lease_release_private_callback(struct file_lock *fl)
443 { 440 {
444 if (!fl->fl_file) 441 if (!fl->fl_file)
445 return; 442 return;
446 443
447 f_delown(fl->fl_file); 444 f_delown(fl->fl_file);
448 fl->fl_file->f_owner.signum = 0; 445 fl->fl_file->f_owner.signum = 0;
449 } 446 }
450 447
451 static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) 448 static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
452 { 449 {
453 return fl->fl_file == try->fl_file; 450 return fl->fl_file == try->fl_file;
454 } 451 }
455 452
456 static const struct lock_manager_operations lease_manager_ops = { 453 static const struct lock_manager_operations lease_manager_ops = {
457 .fl_break = lease_break_callback, 454 .fl_break = lease_break_callback,
458 .fl_release_private = lease_release_private_callback, 455 .fl_release_private = lease_release_private_callback,
459 .fl_mylease = lease_mylease_callback, 456 .fl_mylease = lease_mylease_callback,
460 .fl_change = lease_modify, 457 .fl_change = lease_modify,
461 }; 458 };
462 459
463 /* 460 /*
464 * Initialize a lease, use the default lock manager operations 461 * Initialize a lease, use the default lock manager operations
465 */ 462 */
466 static int lease_init(struct file *filp, int type, struct file_lock *fl) 463 static int lease_init(struct file *filp, int type, struct file_lock *fl)
467 { 464 {
468 if (assign_type(fl, type) != 0) 465 if (assign_type(fl, type) != 0)
469 return -EINVAL; 466 return -EINVAL;
470 467
471 fl->fl_owner = current->files; 468 fl->fl_owner = current->files;
472 fl->fl_pid = current->tgid; 469 fl->fl_pid = current->tgid;
473 470
474 fl->fl_file = filp; 471 fl->fl_file = filp;
475 fl->fl_flags = FL_LEASE; 472 fl->fl_flags = FL_LEASE;
476 fl->fl_start = 0; 473 fl->fl_start = 0;
477 fl->fl_end = OFFSET_MAX; 474 fl->fl_end = OFFSET_MAX;
478 fl->fl_ops = NULL; 475 fl->fl_ops = NULL;
479 fl->fl_lmops = &lease_manager_ops; 476 fl->fl_lmops = &lease_manager_ops;
480 return 0; 477 return 0;
481 } 478 }
482 479
483 /* Allocate a file_lock initialised to this type of lease */ 480 /* Allocate a file_lock initialised to this type of lease */
484 static struct file_lock *lease_alloc(struct file *filp, int type) 481 static struct file_lock *lease_alloc(struct file *filp, int type)
485 { 482 {
486 struct file_lock *fl = locks_alloc_lock(); 483 struct file_lock *fl = locks_alloc_lock();
487 int error = -ENOMEM; 484 int error = -ENOMEM;
488 485
489 if (fl == NULL) 486 if (fl == NULL)
490 return ERR_PTR(error); 487 return ERR_PTR(error);
491 488
492 error = lease_init(filp, type, fl); 489 error = lease_init(filp, type, fl);
493 if (error) { 490 if (error) {
494 locks_free_lock(fl); 491 locks_free_lock(fl);
495 return ERR_PTR(error); 492 return ERR_PTR(error);
496 } 493 }
497 return fl; 494 return fl;
498 } 495 }
499 496
500 /* Check if two locks overlap each other. 497 /* Check if two locks overlap each other.
501 */ 498 */
502 static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) 499 static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
503 { 500 {
504 return ((fl1->fl_end >= fl2->fl_start) && 501 return ((fl1->fl_end >= fl2->fl_start) &&
505 (fl2->fl_end >= fl1->fl_start)); 502 (fl2->fl_end >= fl1->fl_start));
506 } 503 }
507 504
508 /* 505 /*
509 * Check whether two locks have the same owner. 506 * Check whether two locks have the same owner.
510 */ 507 */
511 static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) 508 static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
512 { 509 {
513 if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) 510 if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
514 return fl2->fl_lmops == fl1->fl_lmops && 511 return fl2->fl_lmops == fl1->fl_lmops &&
515 fl1->fl_lmops->fl_compare_owner(fl1, fl2); 512 fl1->fl_lmops->fl_compare_owner(fl1, fl2);
516 return fl1->fl_owner == fl2->fl_owner; 513 return fl1->fl_owner == fl2->fl_owner;
517 } 514 }
518 515
519 /* Remove waiter from blocker's block list. 516 /* Remove waiter from blocker's block list.
520 * When blocker ends up pointing to itself then the list is empty. 517 * When blocker ends up pointing to itself then the list is empty.
521 */ 518 */
522 static void __locks_delete_block(struct file_lock *waiter) 519 static void __locks_delete_block(struct file_lock *waiter)
523 { 520 {
524 list_del_init(&waiter->fl_block); 521 list_del_init(&waiter->fl_block);
525 list_del_init(&waiter->fl_link); 522 list_del_init(&waiter->fl_link);
526 waiter->fl_next = NULL; 523 waiter->fl_next = NULL;
527 } 524 }
528 525
529 /* 526 /*
530 */ 527 */
531 static void locks_delete_block(struct file_lock *waiter) 528 static void locks_delete_block(struct file_lock *waiter)
532 { 529 {
533 lock_flocks(); 530 lock_flocks();
534 __locks_delete_block(waiter); 531 __locks_delete_block(waiter);
535 unlock_flocks(); 532 unlock_flocks();
536 } 533 }
537 534
538 /* Insert waiter into blocker's block list. 535 /* Insert waiter into blocker's block list.
539 * We use a circular list so that processes can be easily woken up in 536 * We use a circular list so that processes can be easily woken up in
540 * the order they blocked. The documentation doesn't require this but 537 * the order they blocked. The documentation doesn't require this but
541 * it seems like the reasonable thing to do. 538 * it seems like the reasonable thing to do.
542 */ 539 */
543 static void locks_insert_block(struct file_lock *blocker, 540 static void locks_insert_block(struct file_lock *blocker,
544 struct file_lock *waiter) 541 struct file_lock *waiter)
545 { 542 {
546 BUG_ON(!list_empty(&waiter->fl_block)); 543 BUG_ON(!list_empty(&waiter->fl_block));
547 list_add_tail(&waiter->fl_block, &blocker->fl_block); 544 list_add_tail(&waiter->fl_block, &blocker->fl_block);
548 waiter->fl_next = blocker; 545 waiter->fl_next = blocker;
549 if (IS_POSIX(blocker)) 546 if (IS_POSIX(blocker))
550 list_add(&waiter->fl_link, &blocked_list); 547 list_add(&waiter->fl_link, &blocked_list);
551 } 548 }
552 549
553 /* Wake up processes blocked waiting for blocker. 550 /* Wake up processes blocked waiting for blocker.
554 * If told to wait then schedule the processes until the block list 551 * If told to wait then schedule the processes until the block list
555 * is empty, otherwise empty the block list ourselves. 552 * is empty, otherwise empty the block list ourselves.
556 */ 553 */
557 static void locks_wake_up_blocks(struct file_lock *blocker) 554 static void locks_wake_up_blocks(struct file_lock *blocker)
558 { 555 {
559 while (!list_empty(&blocker->fl_block)) { 556 while (!list_empty(&blocker->fl_block)) {
560 struct file_lock *waiter; 557 struct file_lock *waiter;
561 558
562 waiter = list_first_entry(&blocker->fl_block, 559 waiter = list_first_entry(&blocker->fl_block,
563 struct file_lock, fl_block); 560 struct file_lock, fl_block);
564 __locks_delete_block(waiter); 561 __locks_delete_block(waiter);
565 if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) 562 if (waiter->fl_lmops && waiter->fl_lmops->fl_notify)
566 waiter->fl_lmops->fl_notify(waiter); 563 waiter->fl_lmops->fl_notify(waiter);
567 else 564 else
568 wake_up(&waiter->fl_wait); 565 wake_up(&waiter->fl_wait);
569 } 566 }
570 } 567 }
571 568
572 /* Insert file lock fl into an inode's lock list at the position indicated 569 /* Insert file lock fl into an inode's lock list at the position indicated
573 * by pos. At the same time add the lock to the global file lock list. 570 * by pos. At the same time add the lock to the global file lock list.
574 */ 571 */
575 static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) 572 static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
576 { 573 {
577 list_add(&fl->fl_link, &file_lock_list); 574 list_add(&fl->fl_link, &file_lock_list);
578 575
579 fl->fl_nspid = get_pid(task_tgid(current)); 576 fl->fl_nspid = get_pid(task_tgid(current));
580 577
581 /* insert into file's list */ 578 /* insert into file's list */
582 fl->fl_next = *pos; 579 fl->fl_next = *pos;
583 *pos = fl; 580 *pos = fl;
584 } 581 }
585 582
586 /* 583 /*
587 * Delete a lock and then free it. 584 * Delete a lock and then free it.
588 * Wake up processes that are blocked waiting for this lock, 585 * Wake up processes that are blocked waiting for this lock,
589 * notify the FS that the lock has been cleared and 586 * notify the FS that the lock has been cleared and
590 * finally free the lock. 587 * finally free the lock.
591 */ 588 */
592 static void locks_delete_lock(struct file_lock **thisfl_p) 589 static void locks_delete_lock(struct file_lock **thisfl_p)
593 { 590 {
594 struct file_lock *fl = *thisfl_p; 591 struct file_lock *fl = *thisfl_p;
595 592
596 *thisfl_p = fl->fl_next; 593 *thisfl_p = fl->fl_next;
597 fl->fl_next = NULL; 594 fl->fl_next = NULL;
598 list_del_init(&fl->fl_link); 595 list_del_init(&fl->fl_link);
599 596
600 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); 597 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
601 if (fl->fl_fasync != NULL) { 598 if (fl->fl_fasync != NULL) {
602 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); 599 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
603 fl->fl_fasync = NULL; 600 fl->fl_fasync = NULL;
604 } 601 }
605 602
606 if (fl->fl_nspid) { 603 if (fl->fl_nspid) {
607 put_pid(fl->fl_nspid); 604 put_pid(fl->fl_nspid);
608 fl->fl_nspid = NULL; 605 fl->fl_nspid = NULL;
609 } 606 }
610 607
611 locks_wake_up_blocks(fl); 608 locks_wake_up_blocks(fl);
612 locks_free_lock(fl); 609 locks_free_lock(fl);
613 } 610 }
614 611
615 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality 612 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
616 * checks for shared/exclusive status of overlapping locks. 613 * checks for shared/exclusive status of overlapping locks.
617 */ 614 */
618 static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) 615 static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
619 { 616 {
620 if (sys_fl->fl_type == F_WRLCK) 617 if (sys_fl->fl_type == F_WRLCK)
621 return 1; 618 return 1;
622 if (caller_fl->fl_type == F_WRLCK) 619 if (caller_fl->fl_type == F_WRLCK)
623 return 1; 620 return 1;
624 return 0; 621 return 0;
625 } 622 }
626 623
627 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific 624 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
628 * checking before calling the locks_conflict(). 625 * checking before calling the locks_conflict().
629 */ 626 */
630 static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) 627 static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
631 { 628 {
632 /* POSIX locks owned by the same process do not conflict with 629 /* POSIX locks owned by the same process do not conflict with
633 * each other. 630 * each other.
634 */ 631 */
635 if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) 632 if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl))
636 return (0); 633 return (0);
637 634
638 /* Check whether they overlap */ 635 /* Check whether they overlap */
639 if (!locks_overlap(caller_fl, sys_fl)) 636 if (!locks_overlap(caller_fl, sys_fl))
640 return 0; 637 return 0;
641 638
642 return (locks_conflict(caller_fl, sys_fl)); 639 return (locks_conflict(caller_fl, sys_fl));
643 } 640 }
644 641
645 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific 642 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
646 * checking before calling the locks_conflict(). 643 * checking before calling the locks_conflict().
647 */ 644 */
648 static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) 645 static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
649 { 646 {
650 /* FLOCK locks referring to the same filp do not conflict with 647 /* FLOCK locks referring to the same filp do not conflict with
651 * each other. 648 * each other.
652 */ 649 */
653 if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) 650 if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
654 return (0); 651 return (0);
655 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) 652 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
656 return 0; 653 return 0;
657 654
658 return (locks_conflict(caller_fl, sys_fl)); 655 return (locks_conflict(caller_fl, sys_fl));
659 } 656 }
660 657
661 void 658 void
662 posix_test_lock(struct file *filp, struct file_lock *fl) 659 posix_test_lock(struct file *filp, struct file_lock *fl)
663 { 660 {
664 struct file_lock *cfl; 661 struct file_lock *cfl;
665 662
666 lock_flocks(); 663 lock_flocks();
667 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { 664 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
668 if (!IS_POSIX(cfl)) 665 if (!IS_POSIX(cfl))
669 continue; 666 continue;
670 if (posix_locks_conflict(fl, cfl)) 667 if (posix_locks_conflict(fl, cfl))
671 break; 668 break;
672 } 669 }
673 if (cfl) { 670 if (cfl) {
674 __locks_copy_lock(fl, cfl); 671 __locks_copy_lock(fl, cfl);
675 if (cfl->fl_nspid) 672 if (cfl->fl_nspid)
676 fl->fl_pid = pid_vnr(cfl->fl_nspid); 673 fl->fl_pid = pid_vnr(cfl->fl_nspid);
677 } else 674 } else
678 fl->fl_type = F_UNLCK; 675 fl->fl_type = F_UNLCK;
679 unlock_flocks(); 676 unlock_flocks();
680 return; 677 return;
681 } 678 }
682 EXPORT_SYMBOL(posix_test_lock); 679 EXPORT_SYMBOL(posix_test_lock);
683 680
684 /* 681 /*
685 * Deadlock detection: 682 * Deadlock detection:
686 * 683 *
687 * We attempt to detect deadlocks that are due purely to posix file 684 * We attempt to detect deadlocks that are due purely to posix file
688 * locks. 685 * locks.
689 * 686 *
690 * We assume that a task can be waiting for at most one lock at a time. 687 * We assume that a task can be waiting for at most one lock at a time.
691 * So for any acquired lock, the process holding that lock may be 688 * So for any acquired lock, the process holding that lock may be
692 * waiting on at most one other lock. That lock in turns may be held by 689 * waiting on at most one other lock. That lock in turns may be held by
693 * someone waiting for at most one other lock. Given a requested lock 690 * someone waiting for at most one other lock. Given a requested lock
694 * caller_fl which is about to wait for a conflicting lock block_fl, we 691 * caller_fl which is about to wait for a conflicting lock block_fl, we
695 * follow this chain of waiters to ensure we are not about to create a 692 * follow this chain of waiters to ensure we are not about to create a
696 * cycle. 693 * cycle.
697 * 694 *
698 * Since we do this before we ever put a process to sleep on a lock, we 695 * Since we do this before we ever put a process to sleep on a lock, we
699 * are ensured that there is never a cycle; that is what guarantees that 696 * are ensured that there is never a cycle; that is what guarantees that
700 * the while() loop in posix_locks_deadlock() eventually completes. 697 * the while() loop in posix_locks_deadlock() eventually completes.
701 * 698 *
702 * Note: the above assumption may not be true when handling lock 699 * Note: the above assumption may not be true when handling lock
703 * requests from a broken NFS client. It may also fail in the presence 700 * requests from a broken NFS client. It may also fail in the presence
704 * of tasks (such as posix threads) sharing the same open file table. 701 * of tasks (such as posix threads) sharing the same open file table.
705 * 702 *
706 * To handle those cases, we just bail out after a few iterations. 703 * To handle those cases, we just bail out after a few iterations.
707 */ 704 */
708 705
709 #define MAX_DEADLK_ITERATIONS 10 706 #define MAX_DEADLK_ITERATIONS 10
710 707
711 /* Find a lock that the owner of the given block_fl is blocking on. */ 708 /* Find a lock that the owner of the given block_fl is blocking on. */
712 static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) 709 static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
713 { 710 {
714 struct file_lock *fl; 711 struct file_lock *fl;
715 712
716 list_for_each_entry(fl, &blocked_list, fl_link) { 713 list_for_each_entry(fl, &blocked_list, fl_link) {
717 if (posix_same_owner(fl, block_fl)) 714 if (posix_same_owner(fl, block_fl))
718 return fl->fl_next; 715 return fl->fl_next;
719 } 716 }
720 return NULL; 717 return NULL;
721 } 718 }
722 719
723 static int posix_locks_deadlock(struct file_lock *caller_fl, 720 static int posix_locks_deadlock(struct file_lock *caller_fl,
724 struct file_lock *block_fl) 721 struct file_lock *block_fl)
725 { 722 {
726 int i = 0; 723 int i = 0;
727 724
728 while ((block_fl = what_owner_is_waiting_for(block_fl))) { 725 while ((block_fl = what_owner_is_waiting_for(block_fl))) {
729 if (i++ > MAX_DEADLK_ITERATIONS) 726 if (i++ > MAX_DEADLK_ITERATIONS)
730 return 0; 727 return 0;
731 if (posix_same_owner(caller_fl, block_fl)) 728 if (posix_same_owner(caller_fl, block_fl))
732 return 1; 729 return 1;
733 } 730 }
734 return 0; 731 return 0;
735 } 732 }
736 733
737 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks 734 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
738 * after any leases, but before any posix locks. 735 * after any leases, but before any posix locks.
739 * 736 *
740 * Note that if called with an FL_EXISTS argument, the caller may determine 737 * Note that if called with an FL_EXISTS argument, the caller may determine
741 * whether or not a lock was successfully freed by testing the return 738 * whether or not a lock was successfully freed by testing the return
742 * value for -ENOENT. 739 * value for -ENOENT.
743 */ 740 */
744 static int flock_lock_file(struct file *filp, struct file_lock *request) 741 static int flock_lock_file(struct file *filp, struct file_lock *request)
745 { 742 {
746 struct file_lock *new_fl = NULL; 743 struct file_lock *new_fl = NULL;
747 struct file_lock **before; 744 struct file_lock **before;
748 struct inode * inode = filp->f_path.dentry->d_inode; 745 struct inode * inode = filp->f_path.dentry->d_inode;
749 int error = 0; 746 int error = 0;
750 int found = 0; 747 int found = 0;
751 748
752 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 749 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
753 new_fl = locks_alloc_lock(); 750 new_fl = locks_alloc_lock();
754 if (!new_fl) 751 if (!new_fl)
755 return -ENOMEM; 752 return -ENOMEM;
756 } 753 }
757 754
758 lock_flocks(); 755 lock_flocks();
759 if (request->fl_flags & FL_ACCESS) 756 if (request->fl_flags & FL_ACCESS)
760 goto find_conflict; 757 goto find_conflict;
761 758
762 for_each_lock(inode, before) { 759 for_each_lock(inode, before) {
763 struct file_lock *fl = *before; 760 struct file_lock *fl = *before;
764 if (IS_POSIX(fl)) 761 if (IS_POSIX(fl))
765 break; 762 break;
766 if (IS_LEASE(fl)) 763 if (IS_LEASE(fl))
767 continue; 764 continue;
768 if (filp != fl->fl_file) 765 if (filp != fl->fl_file)
769 continue; 766 continue;
770 if (request->fl_type == fl->fl_type) 767 if (request->fl_type == fl->fl_type)
771 goto out; 768 goto out;
772 found = 1; 769 found = 1;
773 locks_delete_lock(before); 770 locks_delete_lock(before);
774 break; 771 break;
775 } 772 }
776 773
777 if (request->fl_type == F_UNLCK) { 774 if (request->fl_type == F_UNLCK) {
778 if ((request->fl_flags & FL_EXISTS) && !found) 775 if ((request->fl_flags & FL_EXISTS) && !found)
779 error = -ENOENT; 776 error = -ENOENT;
780 goto out; 777 goto out;
781 } 778 }
782 779
783 /* 780 /*
784 * If a higher-priority process was blocked on the old file lock, 781 * If a higher-priority process was blocked on the old file lock,
785 * give it the opportunity to lock the file. 782 * give it the opportunity to lock the file.
786 */ 783 */
787 if (found) { 784 if (found) {
788 unlock_flocks(); 785 unlock_flocks();
789 cond_resched(); 786 cond_resched();
790 lock_flocks(); 787 lock_flocks();
791 } 788 }
792 789
793 find_conflict: 790 find_conflict:
794 for_each_lock(inode, before) { 791 for_each_lock(inode, before) {
795 struct file_lock *fl = *before; 792 struct file_lock *fl = *before;
796 if (IS_POSIX(fl)) 793 if (IS_POSIX(fl))
797 break; 794 break;
798 if (IS_LEASE(fl)) 795 if (IS_LEASE(fl))
799 continue; 796 continue;
800 if (!flock_locks_conflict(request, fl)) 797 if (!flock_locks_conflict(request, fl))
801 continue; 798 continue;
802 error = -EAGAIN; 799 error = -EAGAIN;
803 if (!(request->fl_flags & FL_SLEEP)) 800 if (!(request->fl_flags & FL_SLEEP))
804 goto out; 801 goto out;
805 error = FILE_LOCK_DEFERRED; 802 error = FILE_LOCK_DEFERRED;
806 locks_insert_block(fl, request); 803 locks_insert_block(fl, request);
807 goto out; 804 goto out;
808 } 805 }
809 if (request->fl_flags & FL_ACCESS) 806 if (request->fl_flags & FL_ACCESS)
810 goto out; 807 goto out;
811 locks_copy_lock(new_fl, request); 808 locks_copy_lock(new_fl, request);
812 locks_insert_lock(before, new_fl); 809 locks_insert_lock(before, new_fl);
813 new_fl = NULL; 810 new_fl = NULL;
814 error = 0; 811 error = 0;
815 812
816 out: 813 out:
817 unlock_flocks(); 814 unlock_flocks();
818 if (new_fl) 815 if (new_fl)
819 locks_free_lock(new_fl); 816 locks_free_lock(new_fl);
820 return error; 817 return error;
821 } 818 }
822 819
823 static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 820 static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
824 { 821 {
825 struct file_lock *fl; 822 struct file_lock *fl;
826 struct file_lock *new_fl = NULL; 823 struct file_lock *new_fl = NULL;
827 struct file_lock *new_fl2 = NULL; 824 struct file_lock *new_fl2 = NULL;
828 struct file_lock *left = NULL; 825 struct file_lock *left = NULL;
829 struct file_lock *right = NULL; 826 struct file_lock *right = NULL;
830 struct file_lock **before; 827 struct file_lock **before;
831 int error, added = 0; 828 int error, added = 0;
832 829
833 /* 830 /*
834 * We may need two file_lock structures for this operation, 831 * We may need two file_lock structures for this operation,
835 * so we get them in advance to avoid races. 832 * so we get them in advance to avoid races.
836 * 833 *
837 * In some cases we can be sure, that no new locks will be needed 834 * In some cases we can be sure, that no new locks will be needed
838 */ 835 */
839 if (!(request->fl_flags & FL_ACCESS) && 836 if (!(request->fl_flags & FL_ACCESS) &&
840 (request->fl_type != F_UNLCK || 837 (request->fl_type != F_UNLCK ||
841 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { 838 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
842 new_fl = locks_alloc_lock(); 839 new_fl = locks_alloc_lock();
843 new_fl2 = locks_alloc_lock(); 840 new_fl2 = locks_alloc_lock();
844 } 841 }
845 842
846 lock_flocks(); 843 lock_flocks();
847 if (request->fl_type != F_UNLCK) { 844 if (request->fl_type != F_UNLCK) {
848 for_each_lock(inode, before) { 845 for_each_lock(inode, before) {
849 fl = *before; 846 fl = *before;
850 if (!IS_POSIX(fl)) 847 if (!IS_POSIX(fl))
851 continue; 848 continue;
852 if (!posix_locks_conflict(request, fl)) 849 if (!posix_locks_conflict(request, fl))
853 continue; 850 continue;
854 if (conflock) 851 if (conflock)
855 __locks_copy_lock(conflock, fl); 852 __locks_copy_lock(conflock, fl);
856 error = -EAGAIN; 853 error = -EAGAIN;
857 if (!(request->fl_flags & FL_SLEEP)) 854 if (!(request->fl_flags & FL_SLEEP))
858 goto out; 855 goto out;
859 error = -EDEADLK; 856 error = -EDEADLK;
860 if (posix_locks_deadlock(request, fl)) 857 if (posix_locks_deadlock(request, fl))
861 goto out; 858 goto out;
862 error = FILE_LOCK_DEFERRED; 859 error = FILE_LOCK_DEFERRED;
863 locks_insert_block(fl, request); 860 locks_insert_block(fl, request);
864 goto out; 861 goto out;
865 } 862 }
866 } 863 }
867 864
868 /* If we're just looking for a conflict, we're done. */ 865 /* If we're just looking for a conflict, we're done. */
869 error = 0; 866 error = 0;
870 if (request->fl_flags & FL_ACCESS) 867 if (request->fl_flags & FL_ACCESS)
871 goto out; 868 goto out;
872 869
873 /* 870 /*
874 * Find the first old lock with the same owner as the new lock. 871 * Find the first old lock with the same owner as the new lock.
875 */ 872 */
876 873
877 before = &inode->i_flock; 874 before = &inode->i_flock;
878 875
879 /* First skip locks owned by other processes. */ 876 /* First skip locks owned by other processes. */
880 while ((fl = *before) && (!IS_POSIX(fl) || 877 while ((fl = *before) && (!IS_POSIX(fl) ||
881 !posix_same_owner(request, fl))) { 878 !posix_same_owner(request, fl))) {
882 before = &fl->fl_next; 879 before = &fl->fl_next;
883 } 880 }
884 881
885 /* Process locks with this owner. */ 882 /* Process locks with this owner. */
886 while ((fl = *before) && posix_same_owner(request, fl)) { 883 while ((fl = *before) && posix_same_owner(request, fl)) {
887 /* Detect adjacent or overlapping regions (if same lock type) 884 /* Detect adjacent or overlapping regions (if same lock type)
888 */ 885 */
889 if (request->fl_type == fl->fl_type) { 886 if (request->fl_type == fl->fl_type) {
890 /* In all comparisons of start vs end, use 887 /* In all comparisons of start vs end, use
891 * "start - 1" rather than "end + 1". If end 888 * "start - 1" rather than "end + 1". If end
892 * is OFFSET_MAX, end + 1 will become negative. 889 * is OFFSET_MAX, end + 1 will become negative.
893 */ 890 */
894 if (fl->fl_end < request->fl_start - 1) 891 if (fl->fl_end < request->fl_start - 1)
895 goto next_lock; 892 goto next_lock;
896 /* If the next lock in the list has entirely bigger 893 /* If the next lock in the list has entirely bigger
897 * addresses than the new one, insert the lock here. 894 * addresses than the new one, insert the lock here.
898 */ 895 */
899 if (fl->fl_start - 1 > request->fl_end) 896 if (fl->fl_start - 1 > request->fl_end)
900 break; 897 break;
901 898
902 /* If we come here, the new and old lock are of the 899 /* If we come here, the new and old lock are of the
903 * same type and adjacent or overlapping. Make one 900 * same type and adjacent or overlapping. Make one
904 * lock yielding from the lower start address of both 901 * lock yielding from the lower start address of both
905 * locks to the higher end address. 902 * locks to the higher end address.
906 */ 903 */
907 if (fl->fl_start > request->fl_start) 904 if (fl->fl_start > request->fl_start)
908 fl->fl_start = request->fl_start; 905 fl->fl_start = request->fl_start;
909 else 906 else
910 request->fl_start = fl->fl_start; 907 request->fl_start = fl->fl_start;
911 if (fl->fl_end < request->fl_end) 908 if (fl->fl_end < request->fl_end)
912 fl->fl_end = request->fl_end; 909 fl->fl_end = request->fl_end;
913 else 910 else
914 request->fl_end = fl->fl_end; 911 request->fl_end = fl->fl_end;
915 if (added) { 912 if (added) {
916 locks_delete_lock(before); 913 locks_delete_lock(before);
917 continue; 914 continue;
918 } 915 }
919 request = fl; 916 request = fl;
920 added = 1; 917 added = 1;
921 } 918 }
922 else { 919 else {
923 /* Processing for different lock types is a bit 920 /* Processing for different lock types is a bit
924 * more complex. 921 * more complex.
925 */ 922 */
926 if (fl->fl_end < request->fl_start) 923 if (fl->fl_end < request->fl_start)
927 goto next_lock; 924 goto next_lock;
928 if (fl->fl_start > request->fl_end) 925 if (fl->fl_start > request->fl_end)
929 break; 926 break;
930 if (request->fl_type == F_UNLCK) 927 if (request->fl_type == F_UNLCK)
931 added = 1; 928 added = 1;
932 if (fl->fl_start < request->fl_start) 929 if (fl->fl_start < request->fl_start)
933 left = fl; 930 left = fl;
934 /* If the next lock in the list has a higher end 931 /* If the next lock in the list has a higher end
935 * address than the new one, insert the new one here. 932 * address than the new one, insert the new one here.
936 */ 933 */
937 if (fl->fl_end > request->fl_end) { 934 if (fl->fl_end > request->fl_end) {
938 right = fl; 935 right = fl;
939 break; 936 break;
940 } 937 }
941 if (fl->fl_start >= request->fl_start) { 938 if (fl->fl_start >= request->fl_start) {
942 /* The new lock completely replaces an old 939 /* The new lock completely replaces an old
943 * one (This may happen several times). 940 * one (This may happen several times).
944 */ 941 */
945 if (added) { 942 if (added) {
946 locks_delete_lock(before); 943 locks_delete_lock(before);
947 continue; 944 continue;
948 } 945 }
949 /* Replace the old lock with the new one. 946 /* Replace the old lock with the new one.
950 * Wake up anybody waiting for the old one, 947 * Wake up anybody waiting for the old one,
951 * as the change in lock type might satisfy 948 * as the change in lock type might satisfy
952 * their needs. 949 * their needs.
953 */ 950 */
954 locks_wake_up_blocks(fl); 951 locks_wake_up_blocks(fl);
955 fl->fl_start = request->fl_start; 952 fl->fl_start = request->fl_start;
956 fl->fl_end = request->fl_end; 953 fl->fl_end = request->fl_end;
957 fl->fl_type = request->fl_type; 954 fl->fl_type = request->fl_type;
958 locks_release_private(fl); 955 locks_release_private(fl);
959 locks_copy_private(fl, request); 956 locks_copy_private(fl, request);
960 request = fl; 957 request = fl;
961 added = 1; 958 added = 1;
962 } 959 }
963 } 960 }
964 /* Go on to next lock. 961 /* Go on to next lock.
965 */ 962 */
966 next_lock: 963 next_lock:
967 before = &fl->fl_next; 964 before = &fl->fl_next;
968 } 965 }
969 966
970 /* 967 /*
971 * The above code only modifies existing locks in case of 968 * The above code only modifies existing locks in case of
972 * merging or replacing. If new lock(s) need to be inserted 969 * merging or replacing. If new lock(s) need to be inserted
973 * all modifications are done bellow this, so it's safe yet to 970 * all modifications are done bellow this, so it's safe yet to
974 * bail out. 971 * bail out.
975 */ 972 */
976 error = -ENOLCK; /* "no luck" */ 973 error = -ENOLCK; /* "no luck" */
977 if (right && left == right && !new_fl2) 974 if (right && left == right && !new_fl2)
978 goto out; 975 goto out;
979 976
980 error = 0; 977 error = 0;
981 if (!added) { 978 if (!added) {
982 if (request->fl_type == F_UNLCK) { 979 if (request->fl_type == F_UNLCK) {
983 if (request->fl_flags & FL_EXISTS) 980 if (request->fl_flags & FL_EXISTS)
984 error = -ENOENT; 981 error = -ENOENT;
985 goto out; 982 goto out;
986 } 983 }
987 984
988 if (!new_fl) { 985 if (!new_fl) {
989 error = -ENOLCK; 986 error = -ENOLCK;
990 goto out; 987 goto out;
991 } 988 }
992 locks_copy_lock(new_fl, request); 989 locks_copy_lock(new_fl, request);
993 locks_insert_lock(before, new_fl); 990 locks_insert_lock(before, new_fl);
994 new_fl = NULL; 991 new_fl = NULL;
995 } 992 }
996 if (right) { 993 if (right) {
997 if (left == right) { 994 if (left == right) {
998 /* The new lock breaks the old one in two pieces, 995 /* The new lock breaks the old one in two pieces,
999 * so we have to use the second new lock. 996 * so we have to use the second new lock.
1000 */ 997 */
1001 left = new_fl2; 998 left = new_fl2;
1002 new_fl2 = NULL; 999 new_fl2 = NULL;
1003 locks_copy_lock(left, right); 1000 locks_copy_lock(left, right);
1004 locks_insert_lock(before, left); 1001 locks_insert_lock(before, left);
1005 } 1002 }
1006 right->fl_start = request->fl_end + 1; 1003 right->fl_start = request->fl_end + 1;
1007 locks_wake_up_blocks(right); 1004 locks_wake_up_blocks(right);
1008 } 1005 }
1009 if (left) { 1006 if (left) {
1010 left->fl_end = request->fl_start - 1; 1007 left->fl_end = request->fl_start - 1;
1011 locks_wake_up_blocks(left); 1008 locks_wake_up_blocks(left);
1012 } 1009 }
1013 out: 1010 out:
1014 unlock_flocks(); 1011 unlock_flocks();
1015 /* 1012 /*
1016 * Free any unused locks. 1013 * Free any unused locks.
1017 */ 1014 */
1018 if (new_fl) 1015 if (new_fl)
1019 locks_free_lock(new_fl); 1016 locks_free_lock(new_fl);
1020 if (new_fl2) 1017 if (new_fl2)
1021 locks_free_lock(new_fl2); 1018 locks_free_lock(new_fl2);
1022 return error; 1019 return error;
1023 } 1020 }
1024 1021
1025 /** 1022 /**
1026 * posix_lock_file - Apply a POSIX-style lock to a file 1023 * posix_lock_file - Apply a POSIX-style lock to a file
1027 * @filp: The file to apply the lock to 1024 * @filp: The file to apply the lock to
1028 * @fl: The lock to be applied 1025 * @fl: The lock to be applied
1029 * @conflock: Place to return a copy of the conflicting lock, if found. 1026 * @conflock: Place to return a copy of the conflicting lock, if found.
1030 * 1027 *
1031 * Add a POSIX style lock to a file. 1028 * Add a POSIX style lock to a file.
1032 * We merge adjacent & overlapping locks whenever possible. 1029 * We merge adjacent & overlapping locks whenever possible.
1033 * POSIX locks are sorted by owner task, then by starting address 1030 * POSIX locks are sorted by owner task, then by starting address
1034 * 1031 *
1035 * Note that if called with an FL_EXISTS argument, the caller may determine 1032 * Note that if called with an FL_EXISTS argument, the caller may determine
1036 * whether or not a lock was successfully freed by testing the return 1033 * whether or not a lock was successfully freed by testing the return
1037 * value for -ENOENT. 1034 * value for -ENOENT.
1038 */ 1035 */
1039 int posix_lock_file(struct file *filp, struct file_lock *fl, 1036 int posix_lock_file(struct file *filp, struct file_lock *fl,
1040 struct file_lock *conflock) 1037 struct file_lock *conflock)
1041 { 1038 {
1042 return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock); 1039 return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
1043 } 1040 }
1044 EXPORT_SYMBOL(posix_lock_file); 1041 EXPORT_SYMBOL(posix_lock_file);
1045 1042
1046 /** 1043 /**
1047 * posix_lock_file_wait - Apply a POSIX-style lock to a file 1044 * posix_lock_file_wait - Apply a POSIX-style lock to a file
1048 * @filp: The file to apply the lock to 1045 * @filp: The file to apply the lock to
1049 * @fl: The lock to be applied 1046 * @fl: The lock to be applied
1050 * 1047 *
1051 * Add a POSIX style lock to a file. 1048 * Add a POSIX style lock to a file.
1052 * We merge adjacent & overlapping locks whenever possible. 1049 * We merge adjacent & overlapping locks whenever possible.
1053 * POSIX locks are sorted by owner task, then by starting address 1050 * POSIX locks are sorted by owner task, then by starting address
1054 */ 1051 */
1055 int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1052 int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1056 { 1053 {
1057 int error; 1054 int error;
1058 might_sleep (); 1055 might_sleep ();
1059 for (;;) { 1056 for (;;) {
1060 error = posix_lock_file(filp, fl, NULL); 1057 error = posix_lock_file(filp, fl, NULL);
1061 if (error != FILE_LOCK_DEFERRED) 1058 if (error != FILE_LOCK_DEFERRED)
1062 break; 1059 break;
1063 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1060 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1064 if (!error) 1061 if (!error)
1065 continue; 1062 continue;
1066 1063
1067 locks_delete_block(fl); 1064 locks_delete_block(fl);
1068 break; 1065 break;
1069 } 1066 }
1070 return error; 1067 return error;
1071 } 1068 }
1072 EXPORT_SYMBOL(posix_lock_file_wait); 1069 EXPORT_SYMBOL(posix_lock_file_wait);
1073 1070
1074 /** 1071 /**
1075 * locks_mandatory_locked - Check for an active lock 1072 * locks_mandatory_locked - Check for an active lock
1076 * @inode: the file to check 1073 * @inode: the file to check
1077 * 1074 *
1078 * Searches the inode's list of locks to find any POSIX locks which conflict. 1075 * Searches the inode's list of locks to find any POSIX locks which conflict.
1079 * This function is called from locks_verify_locked() only. 1076 * This function is called from locks_verify_locked() only.
1080 */ 1077 */
1081 int locks_mandatory_locked(struct inode *inode) 1078 int locks_mandatory_locked(struct inode *inode)
1082 { 1079 {
1083 fl_owner_t owner = current->files; 1080 fl_owner_t owner = current->files;
1084 struct file_lock *fl; 1081 struct file_lock *fl;
1085 1082
1086 /* 1083 /*
1087 * Search the lock list for this inode for any POSIX locks. 1084 * Search the lock list for this inode for any POSIX locks.
1088 */ 1085 */
1089 lock_flocks(); 1086 lock_flocks();
1090 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1087 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
1091 if (!IS_POSIX(fl)) 1088 if (!IS_POSIX(fl))
1092 continue; 1089 continue;
1093 if (fl->fl_owner != owner) 1090 if (fl->fl_owner != owner)
1094 break; 1091 break;
1095 } 1092 }
1096 unlock_flocks(); 1093 unlock_flocks();
1097 return fl ? -EAGAIN : 0; 1094 return fl ? -EAGAIN : 0;
1098 } 1095 }
1099 1096
1100 /** 1097 /**
1101 * locks_mandatory_area - Check for a conflicting lock 1098 * locks_mandatory_area - Check for a conflicting lock
1102 * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ 1099 * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ
1103 * for shared 1100 * for shared
1104 * @inode: the file to check 1101 * @inode: the file to check
1105 * @filp: how the file was opened (if it was) 1102 * @filp: how the file was opened (if it was)
1106 * @offset: start of area to check 1103 * @offset: start of area to check
1107 * @count: length of area to check 1104 * @count: length of area to check
1108 * 1105 *
1109 * Searches the inode's list of locks to find any POSIX locks which conflict. 1106 * Searches the inode's list of locks to find any POSIX locks which conflict.
1110 * This function is called from rw_verify_area() and 1107 * This function is called from rw_verify_area() and
1111 * locks_verify_truncate(). 1108 * locks_verify_truncate().
1112 */ 1109 */
1113 int locks_mandatory_area(int read_write, struct inode *inode, 1110 int locks_mandatory_area(int read_write, struct inode *inode,
1114 struct file *filp, loff_t offset, 1111 struct file *filp, loff_t offset,
1115 size_t count) 1112 size_t count)
1116 { 1113 {
1117 struct file_lock fl; 1114 struct file_lock fl;
1118 int error; 1115 int error;
1119 1116
1120 locks_init_lock(&fl); 1117 locks_init_lock(&fl);
1121 fl.fl_owner = current->files; 1118 fl.fl_owner = current->files;
1122 fl.fl_pid = current->tgid; 1119 fl.fl_pid = current->tgid;
1123 fl.fl_file = filp; 1120 fl.fl_file = filp;
1124 fl.fl_flags = FL_POSIX | FL_ACCESS; 1121 fl.fl_flags = FL_POSIX | FL_ACCESS;
1125 if (filp && !(filp->f_flags & O_NONBLOCK)) 1122 if (filp && !(filp->f_flags & O_NONBLOCK))
1126 fl.fl_flags |= FL_SLEEP; 1123 fl.fl_flags |= FL_SLEEP;
1127 fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; 1124 fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
1128 fl.fl_start = offset; 1125 fl.fl_start = offset;
1129 fl.fl_end = offset + count - 1; 1126 fl.fl_end = offset + count - 1;
1130 1127
1131 for (;;) { 1128 for (;;) {
1132 error = __posix_lock_file(inode, &fl, NULL); 1129 error = __posix_lock_file(inode, &fl, NULL);
1133 if (error != FILE_LOCK_DEFERRED) 1130 if (error != FILE_LOCK_DEFERRED)
1134 break; 1131 break;
1135 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); 1132 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1136 if (!error) { 1133 if (!error) {
1137 /* 1134 /*
1138 * If we've been sleeping someone might have 1135 * If we've been sleeping someone might have
1139 * changed the permissions behind our back. 1136 * changed the permissions behind our back.
1140 */ 1137 */
1141 if (__mandatory_lock(inode)) 1138 if (__mandatory_lock(inode))
1142 continue; 1139 continue;
1143 } 1140 }
1144 1141
1145 locks_delete_block(&fl); 1142 locks_delete_block(&fl);
1146 break; 1143 break;
1147 } 1144 }
1148 1145
1149 return error; 1146 return error;
1150 } 1147 }
1151 1148
1152 EXPORT_SYMBOL(locks_mandatory_area); 1149 EXPORT_SYMBOL(locks_mandatory_area);
1153 1150
1154 /* We already had a lease on this file; just change its type */ 1151 /* We already had a lease on this file; just change its type */
1155 int lease_modify(struct file_lock **before, int arg) 1152 int lease_modify(struct file_lock **before, int arg)
1156 { 1153 {
1157 struct file_lock *fl = *before; 1154 struct file_lock *fl = *before;
1158 int error = assign_type(fl, arg); 1155 int error = assign_type(fl, arg);
1159 1156
1160 if (error) 1157 if (error)
1161 return error; 1158 return error;
1162 locks_wake_up_blocks(fl); 1159 locks_wake_up_blocks(fl);
1163 if (arg == F_UNLCK) 1160 if (arg == F_UNLCK)
1164 locks_delete_lock(before); 1161 locks_delete_lock(before);
1165 return 0; 1162 return 0;
1166 } 1163 }
1167 1164
1168 EXPORT_SYMBOL(lease_modify); 1165 EXPORT_SYMBOL(lease_modify);
1169 1166
1170 static void time_out_leases(struct inode *inode) 1167 static void time_out_leases(struct inode *inode)
1171 { 1168 {
1172 struct file_lock **before; 1169 struct file_lock **before;
1173 struct file_lock *fl; 1170 struct file_lock *fl;
1174 1171
1175 before = &inode->i_flock; 1172 before = &inode->i_flock;
1176 while ((fl = *before) && IS_LEASE(fl) && (fl->fl_type & F_INPROGRESS)) { 1173 while ((fl = *before) && IS_LEASE(fl) && (fl->fl_type & F_INPROGRESS)) {
1177 if ((fl->fl_break_time == 0) 1174 if ((fl->fl_break_time == 0)
1178 || time_before(jiffies, fl->fl_break_time)) { 1175 || time_before(jiffies, fl->fl_break_time)) {
1179 before = &fl->fl_next; 1176 before = &fl->fl_next;
1180 continue; 1177 continue;
1181 } 1178 }
1182 lease_modify(before, fl->fl_type & ~F_INPROGRESS); 1179 lease_modify(before, fl->fl_type & ~F_INPROGRESS);
1183 if (fl == *before) /* lease_modify may have freed fl */ 1180 if (fl == *before) /* lease_modify may have freed fl */
1184 before = &fl->fl_next; 1181 before = &fl->fl_next;
1185 } 1182 }
1186 } 1183 }
1187 1184
1188 /** 1185 /**
1189 * __break_lease - revoke all outstanding leases on file 1186 * __break_lease - revoke all outstanding leases on file
1190 * @inode: the inode of the file to return 1187 * @inode: the inode of the file to return
1191 * @mode: the open mode (read or write) 1188 * @mode: the open mode (read or write)
1192 * 1189 *
1193 * break_lease (inlined for speed) has checked there already is at least 1190 * break_lease (inlined for speed) has checked there already is at least
1194 * some kind of lock (maybe a lease) on this file. Leases are broken on 1191 * some kind of lock (maybe a lease) on this file. Leases are broken on
1195 * a call to open() or truncate(). This function can sleep unless you 1192 * a call to open() or truncate(). This function can sleep unless you
1196 * specified %O_NONBLOCK to your open(). 1193 * specified %O_NONBLOCK to your open().
1197 */ 1194 */
1198 int __break_lease(struct inode *inode, unsigned int mode) 1195 int __break_lease(struct inode *inode, unsigned int mode)
1199 { 1196 {
1200 int error = 0, future; 1197 int error = 0, future;
1201 struct file_lock *new_fl, *flock; 1198 struct file_lock *new_fl, *flock;
1202 struct file_lock *fl; 1199 struct file_lock *fl;
1203 unsigned long break_time; 1200 unsigned long break_time;
1204 int i_have_this_lease = 0; 1201 int i_have_this_lease = 0;
1205 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1202 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1206 1203
1207 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); 1204 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1208 1205
1209 lock_flocks(); 1206 lock_flocks();
1210 1207
1211 time_out_leases(inode); 1208 time_out_leases(inode);
1212 1209
1213 flock = inode->i_flock; 1210 flock = inode->i_flock;
1214 if ((flock == NULL) || !IS_LEASE(flock)) 1211 if ((flock == NULL) || !IS_LEASE(flock))
1215 goto out; 1212 goto out;
1216 1213
1217 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) 1214 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
1218 if (fl->fl_owner == current->files) 1215 if (fl->fl_owner == current->files)
1219 i_have_this_lease = 1; 1216 i_have_this_lease = 1;
1220 1217
1221 if (want_write) { 1218 if (want_write) {
1222 /* If we want write access, we have to revoke any lease. */ 1219 /* If we want write access, we have to revoke any lease. */
1223 future = F_UNLCK | F_INPROGRESS; 1220 future = F_UNLCK | F_INPROGRESS;
1224 } else if (flock->fl_type & F_INPROGRESS) { 1221 } else if (flock->fl_type & F_INPROGRESS) {
1225 /* If the lease is already being broken, we just leave it */ 1222 /* If the lease is already being broken, we just leave it */
1226 future = flock->fl_type; 1223 future = flock->fl_type;
1227 } else if (flock->fl_type & F_WRLCK) { 1224 } else if (flock->fl_type & F_WRLCK) {
1228 /* Downgrade the exclusive lease to a read-only lease. */ 1225 /* Downgrade the exclusive lease to a read-only lease. */
1229 future = F_RDLCK | F_INPROGRESS; 1226 future = F_RDLCK | F_INPROGRESS;
1230 } else { 1227 } else {
1231 /* the existing lease was read-only, so we can read too. */ 1228 /* the existing lease was read-only, so we can read too. */
1232 goto out; 1229 goto out;
1233 } 1230 }
1234 1231
1235 if (IS_ERR(new_fl) && !i_have_this_lease 1232 if (IS_ERR(new_fl) && !i_have_this_lease
1236 && ((mode & O_NONBLOCK) == 0)) { 1233 && ((mode & O_NONBLOCK) == 0)) {
1237 error = PTR_ERR(new_fl); 1234 error = PTR_ERR(new_fl);
1238 goto out; 1235 goto out;
1239 } 1236 }
1240 1237
1241 break_time = 0; 1238 break_time = 0;
1242 if (lease_break_time > 0) { 1239 if (lease_break_time > 0) {
1243 break_time = jiffies + lease_break_time * HZ; 1240 break_time = jiffies + lease_break_time * HZ;
1244 if (break_time == 0) 1241 if (break_time == 0)
1245 break_time++; /* so that 0 means no break time */ 1242 break_time++; /* so that 0 means no break time */
1246 } 1243 }
1247 1244
1248 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { 1245 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
1249 if (fl->fl_type != future) { 1246 if (fl->fl_type != future) {
1250 fl->fl_type = future; 1247 fl->fl_type = future;
1251 fl->fl_break_time = break_time; 1248 fl->fl_break_time = break_time;
1252 /* lease must have lmops break callback */ 1249 /* lease must have lmops break callback */
1253 fl->fl_lmops->fl_break(fl); 1250 fl->fl_lmops->fl_break(fl);
1254 } 1251 }
1255 } 1252 }
1256 1253
1257 if (i_have_this_lease || (mode & O_NONBLOCK)) { 1254 if (i_have_this_lease || (mode & O_NONBLOCK)) {
1258 error = -EWOULDBLOCK; 1255 error = -EWOULDBLOCK;
1259 goto out; 1256 goto out;
1260 } 1257 }
1261 1258
1262 restart: 1259 restart:
1263 break_time = flock->fl_break_time; 1260 break_time = flock->fl_break_time;
1264 if (break_time != 0) { 1261 if (break_time != 0) {
1265 break_time -= jiffies; 1262 break_time -= jiffies;
1266 if (break_time == 0) 1263 if (break_time == 0)
1267 break_time++; 1264 break_time++;
1268 } 1265 }
1269 locks_insert_block(flock, new_fl); 1266 locks_insert_block(flock, new_fl);
1270 unlock_flocks(); 1267 unlock_flocks();
1271 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1268 error = wait_event_interruptible_timeout(new_fl->fl_wait,
1272 !new_fl->fl_next, break_time); 1269 !new_fl->fl_next, break_time);
1273 lock_flocks(); 1270 lock_flocks();
1274 __locks_delete_block(new_fl); 1271 __locks_delete_block(new_fl);
1275 if (error >= 0) { 1272 if (error >= 0) {
1276 if (error == 0) 1273 if (error == 0)
1277 time_out_leases(inode); 1274 time_out_leases(inode);
1278 /* Wait for the next lease that has not been broken yet */ 1275 /* Wait for the next lease that has not been broken yet */
1279 for (flock = inode->i_flock; flock && IS_LEASE(flock); 1276 for (flock = inode->i_flock; flock && IS_LEASE(flock);
1280 flock = flock->fl_next) { 1277 flock = flock->fl_next) {
1281 if (flock->fl_type & F_INPROGRESS) 1278 if (flock->fl_type & F_INPROGRESS)
1282 goto restart; 1279 goto restart;
1283 } 1280 }
1284 error = 0; 1281 error = 0;
1285 } 1282 }
1286 1283
1287 out: 1284 out:
1288 unlock_flocks(); 1285 unlock_flocks();
1289 if (!IS_ERR(new_fl)) 1286 if (!IS_ERR(new_fl))
1290 locks_free_lock(new_fl); 1287 locks_free_lock(new_fl);
1291 return error; 1288 return error;
1292 } 1289 }
1293 1290
1294 EXPORT_SYMBOL(__break_lease); 1291 EXPORT_SYMBOL(__break_lease);
1295 1292
1296 /** 1293 /**
1297 * lease_get_mtime - get the last modified time of an inode 1294 * lease_get_mtime - get the last modified time of an inode
1298 * @inode: the inode 1295 * @inode: the inode
1299 * @time: pointer to a timespec which will contain the last modified time 1296 * @time: pointer to a timespec which will contain the last modified time
1300 * 1297 *
1301 * This is to force NFS clients to flush their caches for files with 1298 * This is to force NFS clients to flush their caches for files with
1302 * exclusive leases. The justification is that if someone has an 1299 * exclusive leases. The justification is that if someone has an
1303 * exclusive lease, then they could be modifying it. 1300 * exclusive lease, then they could be modifying it.
1304 */ 1301 */
1305 void lease_get_mtime(struct inode *inode, struct timespec *time) 1302 void lease_get_mtime(struct inode *inode, struct timespec *time)
1306 { 1303 {
1307 struct file_lock *flock = inode->i_flock; 1304 struct file_lock *flock = inode->i_flock;
1308 if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK)) 1305 if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK))
1309 *time = current_fs_time(inode->i_sb); 1306 *time = current_fs_time(inode->i_sb);
1310 else 1307 else
1311 *time = inode->i_mtime; 1308 *time = inode->i_mtime;
1312 } 1309 }
1313 1310
1314 EXPORT_SYMBOL(lease_get_mtime); 1311 EXPORT_SYMBOL(lease_get_mtime);
1315 1312
1316 /** 1313 /**
1317 * fcntl_getlease - Enquire what lease is currently active 1314 * fcntl_getlease - Enquire what lease is currently active
1318 * @filp: the file 1315 * @filp: the file
1319 * 1316 *
1320 * The value returned by this function will be one of 1317 * The value returned by this function will be one of
1321 * (if no lease break is pending): 1318 * (if no lease break is pending):
1322 * 1319 *
1323 * %F_RDLCK to indicate a shared lease is held. 1320 * %F_RDLCK to indicate a shared lease is held.
1324 * 1321 *
1325 * %F_WRLCK to indicate an exclusive lease is held. 1322 * %F_WRLCK to indicate an exclusive lease is held.
1326 * 1323 *
1327 * %F_UNLCK to indicate no lease is held. 1324 * %F_UNLCK to indicate no lease is held.
1328 * 1325 *
1329 * (if a lease break is pending): 1326 * (if a lease break is pending):
1330 * 1327 *
1331 * %F_RDLCK to indicate an exclusive lease needs to be 1328 * %F_RDLCK to indicate an exclusive lease needs to be
1332 * changed to a shared lease (or removed). 1329 * changed to a shared lease (or removed).
1333 * 1330 *
1334 * %F_UNLCK to indicate the lease needs to be removed. 1331 * %F_UNLCK to indicate the lease needs to be removed.
1335 * 1332 *
1336 * XXX: sfr & willy disagree over whether F_INPROGRESS 1333 * XXX: sfr & willy disagree over whether F_INPROGRESS
1337 * should be returned to userspace. 1334 * should be returned to userspace.
1338 */ 1335 */
1339 int fcntl_getlease(struct file *filp) 1336 int fcntl_getlease(struct file *filp)
1340 { 1337 {
1341 struct file_lock *fl; 1338 struct file_lock *fl;
1342 int type = F_UNLCK; 1339 int type = F_UNLCK;
1343 1340
1344 lock_flocks(); 1341 lock_flocks();
1345 time_out_leases(filp->f_path.dentry->d_inode); 1342 time_out_leases(filp->f_path.dentry->d_inode);
1346 for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); 1343 for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
1347 fl = fl->fl_next) { 1344 fl = fl->fl_next) {
1348 if (fl->fl_file == filp) { 1345 if (fl->fl_file == filp) {
1349 type = fl->fl_type & ~F_INPROGRESS; 1346 type = fl->fl_type & ~F_INPROGRESS;
1350 break; 1347 break;
1351 } 1348 }
1352 } 1349 }
1353 unlock_flocks(); 1350 unlock_flocks();
1354 return type; 1351 return type;
1355 } 1352 }
1356 1353
1357 /** 1354 /**
1358 * generic_setlease - sets a lease on an open file 1355 * generic_setlease - sets a lease on an open file
1359 * @filp: file pointer 1356 * @filp: file pointer
1360 * @arg: type of lease to obtain 1357 * @arg: type of lease to obtain
1361 * @flp: input - file_lock to use, output - file_lock inserted 1358 * @flp: input - file_lock to use, output - file_lock inserted
1362 * 1359 *
1363 * The (input) flp->fl_lmops->fl_break function is required 1360 * The (input) flp->fl_lmops->fl_break function is required
1364 * by break_lease(). 1361 * by break_lease().
1365 * 1362 *
1366 * Called with file_lock_lock held. 1363 * Called with file_lock_lock held.
1367 */ 1364 */
1368 int generic_setlease(struct file *filp, long arg, struct file_lock **flp) 1365 int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1369 { 1366 {
1370 struct file_lock *fl, **before, **my_before = NULL, *lease; 1367 struct file_lock *fl, **before, **my_before = NULL, *lease;
1371 struct dentry *dentry = filp->f_path.dentry; 1368 struct dentry *dentry = filp->f_path.dentry;
1372 struct inode *inode = dentry->d_inode; 1369 struct inode *inode = dentry->d_inode;
1373 int error, rdlease_count = 0, wrlease_count = 0; 1370 int error, rdlease_count = 0, wrlease_count = 0;
1374 1371
1375 lease = *flp; 1372 lease = *flp;
1376 1373
1377 error = -EACCES; 1374 error = -EACCES;
1378 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) 1375 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
1379 goto out; 1376 goto out;
1380 error = -EINVAL; 1377 error = -EINVAL;
1381 if (!S_ISREG(inode->i_mode)) 1378 if (!S_ISREG(inode->i_mode))
1382 goto out; 1379 goto out;
1383 error = security_file_lock(filp, arg); 1380 error = security_file_lock(filp, arg);
1384 if (error) 1381 if (error)
1385 goto out; 1382 goto out;
1386 1383
1387 time_out_leases(inode); 1384 time_out_leases(inode);
1388 1385
1389 BUG_ON(!(*flp)->fl_lmops->fl_break); 1386 BUG_ON(!(*flp)->fl_lmops->fl_break);
1390 1387
1391 if (arg != F_UNLCK) { 1388 if (arg != F_UNLCK) {
1392 error = -EAGAIN; 1389 error = -EAGAIN;
1393 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1390 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1394 goto out; 1391 goto out;
1395 if ((arg == F_WRLCK) 1392 if ((arg == F_WRLCK)
1396 && ((atomic_read(&dentry->d_count) > 1) 1393 && ((atomic_read(&dentry->d_count) > 1)
1397 || (atomic_read(&inode->i_count) > 1))) 1394 || (atomic_read(&inode->i_count) > 1)))
1398 goto out; 1395 goto out;
1399 } 1396 }
1400 1397
1401 /* 1398 /*
1402 * At this point, we know that if there is an exclusive 1399 * At this point, we know that if there is an exclusive
1403 * lease on this file, then we hold it on this filp 1400 * lease on this file, then we hold it on this filp
1404 * (otherwise our open of this file would have blocked). 1401 * (otherwise our open of this file would have blocked).
1405 * And if we are trying to acquire an exclusive lease, 1402 * And if we are trying to acquire an exclusive lease,
1406 * then the file is not open by anyone (including us) 1403 * then the file is not open by anyone (including us)
1407 * except for this filp. 1404 * except for this filp.
1408 */ 1405 */
1409 for (before = &inode->i_flock; 1406 for (before = &inode->i_flock;
1410 ((fl = *before) != NULL) && IS_LEASE(fl); 1407 ((fl = *before) != NULL) && IS_LEASE(fl);
1411 before = &fl->fl_next) { 1408 before = &fl->fl_next) {
1412 if (lease->fl_lmops->fl_mylease(fl, lease)) 1409 if (lease->fl_lmops->fl_mylease(fl, lease))
1413 my_before = before; 1410 my_before = before;
1414 else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) 1411 else if (fl->fl_type == (F_INPROGRESS | F_UNLCK))
1415 /* 1412 /*
1416 * Someone is in the process of opening this 1413 * Someone is in the process of opening this
1417 * file for writing so we may not take an 1414 * file for writing so we may not take an
1418 * exclusive lease on it. 1415 * exclusive lease on it.
1419 */ 1416 */
1420 wrlease_count++; 1417 wrlease_count++;
1421 else 1418 else
1422 rdlease_count++; 1419 rdlease_count++;
1423 } 1420 }
1424 1421
1425 error = -EAGAIN; 1422 error = -EAGAIN;
1426 if ((arg == F_RDLCK && (wrlease_count > 0)) || 1423 if ((arg == F_RDLCK && (wrlease_count > 0)) ||
1427 (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0))) 1424 (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0)))
1428 goto out; 1425 goto out;
1429 1426
1430 if (my_before != NULL) { 1427 if (my_before != NULL) {
1431 error = lease->fl_lmops->fl_change(my_before, arg); 1428 error = lease->fl_lmops->fl_change(my_before, arg);
1432 if (!error) 1429 if (!error)
1433 *flp = *my_before; 1430 *flp = *my_before;
1434 goto out; 1431 goto out;
1435 } 1432 }
1436 1433
1437 if (arg == F_UNLCK) 1434 if (arg == F_UNLCK)
1438 goto out; 1435 goto out;
1439 1436
1440 error = -EINVAL; 1437 error = -EINVAL;
1441 if (!leases_enable) 1438 if (!leases_enable)
1442 goto out; 1439 goto out;
1443 1440
1444 locks_insert_lock(before, lease); 1441 locks_insert_lock(before, lease);
1445 return 0; 1442 return 0;
1446 1443
1447 out: 1444 out:
1448 return error; 1445 return error;
1449 } 1446 }
1450 EXPORT_SYMBOL(generic_setlease); 1447 EXPORT_SYMBOL(generic_setlease);
1451 1448
1452 static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) 1449 static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1453 { 1450 {
1454 if (filp->f_op && filp->f_op->setlease) 1451 if (filp->f_op && filp->f_op->setlease)
1455 return filp->f_op->setlease(filp, arg, lease); 1452 return filp->f_op->setlease(filp, arg, lease);
1456 else 1453 else
1457 return generic_setlease(filp, arg, lease); 1454 return generic_setlease(filp, arg, lease);
1458 } 1455 }
1459 1456
1460 /** 1457 /**
1461 * vfs_setlease - sets a lease on an open file 1458 * vfs_setlease - sets a lease on an open file
1462 * @filp: file pointer 1459 * @filp: file pointer
1463 * @arg: type of lease to obtain 1460 * @arg: type of lease to obtain
1464 * @lease: file_lock to use 1461 * @lease: file_lock to use
1465 * 1462 *
1466 * Call this to establish a lease on the file. 1463 * Call this to establish a lease on the file.
1467 * The (*lease)->fl_lmops->fl_break operation must be set; if not, 1464 * The (*lease)->fl_lmops->fl_break operation must be set; if not,
1468 * break_lease will oops! 1465 * break_lease will oops!
1469 * 1466 *
1470 * This will call the filesystem's setlease file method, if 1467 * This will call the filesystem's setlease file method, if
1471 * defined. Note that there is no getlease method; instead, the 1468 * defined. Note that there is no getlease method; instead, the
1472 * filesystem setlease method should call back to setlease() to 1469 * filesystem setlease method should call back to setlease() to
1473 * add a lease to the inode's lease list, where fcntl_getlease() can 1470 * add a lease to the inode's lease list, where fcntl_getlease() can
1474 * find it. Since fcntl_getlease() only reports whether the current 1471 * find it. Since fcntl_getlease() only reports whether the current
1475 * task holds a lease, a cluster filesystem need only do this for 1472 * task holds a lease, a cluster filesystem need only do this for
1476 * leases held by processes on this node. 1473 * leases held by processes on this node.
1477 * 1474 *
1478 * There is also no break_lease method; filesystems that 1475 * There is also no break_lease method; filesystems that
1479 * handle their own leases should break leases themselves from the 1476 * handle their own leases should break leases themselves from the
1480 * filesystem's open, create, and (on truncate) setattr methods. 1477 * filesystem's open, create, and (on truncate) setattr methods.
1481 * 1478 *
1482 * Warning: the only current setlease methods exist only to disable 1479 * Warning: the only current setlease methods exist only to disable
1483 * leases in certain cases. More vfs changes may be required to 1480 * leases in certain cases. More vfs changes may be required to
1484 * allow a full filesystem lease implementation. 1481 * allow a full filesystem lease implementation.
1485 */ 1482 */
1486 1483
1487 int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) 1484 int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1488 { 1485 {
1489 int error; 1486 int error;
1490 1487
1491 lock_flocks(); 1488 lock_flocks();
1492 error = __vfs_setlease(filp, arg, lease); 1489 error = __vfs_setlease(filp, arg, lease);
1493 unlock_flocks(); 1490 unlock_flocks();
1494 1491
1495 return error; 1492 return error;
1496 } 1493 }
1497 EXPORT_SYMBOL_GPL(vfs_setlease); 1494 EXPORT_SYMBOL_GPL(vfs_setlease);
1498 1495
1499 static int do_fcntl_delete_lease(struct file *filp) 1496 static int do_fcntl_delete_lease(struct file *filp)
1500 { 1497 {
1501 struct file_lock fl, *flp = &fl; 1498 struct file_lock fl, *flp = &fl;
1502 1499
1503 lease_init(filp, F_UNLCK, flp); 1500 lease_init(filp, F_UNLCK, flp);
1504 1501
1505 return vfs_setlease(filp, F_UNLCK, &flp); 1502 return vfs_setlease(filp, F_UNLCK, &flp);
1506 } 1503 }
1507 1504
1508 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) 1505 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1509 { 1506 {
1510 struct file_lock *fl; 1507 struct file_lock *fl;
1511 struct fasync_struct *new; 1508 struct fasync_struct *new;
1512 struct inode *inode = filp->f_path.dentry->d_inode; 1509 struct inode *inode = filp->f_path.dentry->d_inode;
1513 int error; 1510 int error;
1514 1511
1515 fl = lease_alloc(filp, arg); 1512 fl = lease_alloc(filp, arg);
1516 if (IS_ERR(fl)) 1513 if (IS_ERR(fl))
1517 return PTR_ERR(fl); 1514 return PTR_ERR(fl);
1518 1515
1519 new = fasync_alloc(); 1516 new = fasync_alloc();
1520 if (!new) { 1517 if (!new) {
1521 locks_free_lock(fl); 1518 locks_free_lock(fl);
1522 return -ENOMEM; 1519 return -ENOMEM;
1523 } 1520 }
1524 lock_flocks(); 1521 lock_flocks();
1525 error = __vfs_setlease(filp, arg, &fl); 1522 error = __vfs_setlease(filp, arg, &fl);
1526 if (error) { 1523 if (error) {
1527 unlock_flocks(); 1524 unlock_flocks();
1528 locks_free_lock(fl); 1525 locks_free_lock(fl);
1529 goto out_free_fasync; 1526 goto out_free_fasync;
1530 } 1527 }
1531 1528
1532 /* 1529 /*
1533 * fasync_insert_entry() returns the old entry if any. 1530 * fasync_insert_entry() returns the old entry if any.
1534 * If there was no old entry, then it used 'new' and 1531 * If there was no old entry, then it used 'new' and
1535 * inserted it into the fasync list. Clear new so that 1532 * inserted it into the fasync list. Clear new so that
1536 * we don't release it here. 1533 * we don't release it here.
1537 */ 1534 */
1538 if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) 1535 if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
1539 new = NULL; 1536 new = NULL;
1540 1537
1541 if (error < 0) { 1538 if (error < 0) {
1542 /* remove lease just inserted by setlease */ 1539 /* remove lease just inserted by setlease */
1543 fl->fl_type = F_UNLCK | F_INPROGRESS; 1540 fl->fl_type = F_UNLCK | F_INPROGRESS;
1544 fl->fl_break_time = jiffies - 10; 1541 fl->fl_break_time = jiffies - 10;
1545 time_out_leases(inode); 1542 time_out_leases(inode);
1546 } else { 1543 } else {
1547 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 1544 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1548 } 1545 }
1549 unlock_flocks(); 1546 unlock_flocks();
1550 1547
1551 out_free_fasync: 1548 out_free_fasync:
1552 if (new) 1549 if (new)
1553 fasync_free(new); 1550 fasync_free(new);
1554 return error; 1551 return error;
1555 } 1552 }
1556 1553
1557 /** 1554 /**
1558 * fcntl_setlease - sets a lease on an open file 1555 * fcntl_setlease - sets a lease on an open file
1559 * @fd: open file descriptor 1556 * @fd: open file descriptor
1560 * @filp: file pointer 1557 * @filp: file pointer
1561 * @arg: type of lease to obtain 1558 * @arg: type of lease to obtain
1562 * 1559 *
1563 * Call this fcntl to establish a lease on the file. 1560 * Call this fcntl to establish a lease on the file.
1564 * Note that you also need to call %F_SETSIG to 1561 * Note that you also need to call %F_SETSIG to
1565 * receive a signal when the lease is broken. 1562 * receive a signal when the lease is broken.
1566 */ 1563 */
1567 int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1564 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1568 { 1565 {
1569 if (arg == F_UNLCK) 1566 if (arg == F_UNLCK)
1570 return do_fcntl_delete_lease(filp); 1567 return do_fcntl_delete_lease(filp);
1571 return do_fcntl_add_lease(fd, filp, arg); 1568 return do_fcntl_add_lease(fd, filp, arg);
1572 } 1569 }
1573 1570
1574 /** 1571 /**
1575 * flock_lock_file_wait - Apply a FLOCK-style lock to a file 1572 * flock_lock_file_wait - Apply a FLOCK-style lock to a file
1576 * @filp: The file to apply the lock to 1573 * @filp: The file to apply the lock to
1577 * @fl: The lock to be applied 1574 * @fl: The lock to be applied
1578 * 1575 *
1579 * Add a FLOCK style lock to a file. 1576 * Add a FLOCK style lock to a file.
1580 */ 1577 */
1581 int flock_lock_file_wait(struct file *filp, struct file_lock *fl) 1578 int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1582 { 1579 {
1583 int error; 1580 int error;
1584 might_sleep(); 1581 might_sleep();
1585 for (;;) { 1582 for (;;) {
1586 error = flock_lock_file(filp, fl); 1583 error = flock_lock_file(filp, fl);
1587 if (error != FILE_LOCK_DEFERRED) 1584 if (error != FILE_LOCK_DEFERRED)
1588 break; 1585 break;
1589 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1586 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1590 if (!error) 1587 if (!error)
1591 continue; 1588 continue;
1592 1589
1593 locks_delete_block(fl); 1590 locks_delete_block(fl);
1594 break; 1591 break;
1595 } 1592 }
1596 return error; 1593 return error;
1597 } 1594 }
1598 1595
1599 EXPORT_SYMBOL(flock_lock_file_wait); 1596 EXPORT_SYMBOL(flock_lock_file_wait);
1600 1597
1601 /** 1598 /**
1602 * sys_flock: - flock() system call. 1599 * sys_flock: - flock() system call.
1603 * @fd: the file descriptor to lock. 1600 * @fd: the file descriptor to lock.
1604 * @cmd: the type of lock to apply. 1601 * @cmd: the type of lock to apply.
1605 * 1602 *
1606 * Apply a %FL_FLOCK style lock to an open file descriptor. 1603 * Apply a %FL_FLOCK style lock to an open file descriptor.
1607 * The @cmd can be one of 1604 * The @cmd can be one of
1608 * 1605 *
1609 * %LOCK_SH -- a shared lock. 1606 * %LOCK_SH -- a shared lock.
1610 * 1607 *
1611 * %LOCK_EX -- an exclusive lock. 1608 * %LOCK_EX -- an exclusive lock.
1612 * 1609 *
1613 * %LOCK_UN -- remove an existing lock. 1610 * %LOCK_UN -- remove an existing lock.
1614 * 1611 *
1615 * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. 1612 * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes.
1616 * 1613 *
1617 * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other 1614 * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
1618 * processes read and write access respectively. 1615 * processes read and write access respectively.
1619 */ 1616 */
1620 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) 1617 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1621 { 1618 {
1622 struct file *filp; 1619 struct file *filp;
1623 struct file_lock *lock; 1620 struct file_lock *lock;
1624 int can_sleep, unlock; 1621 int can_sleep, unlock;
1625 int error; 1622 int error;
1626 1623
1627 error = -EBADF; 1624 error = -EBADF;
1628 filp = fget(fd); 1625 filp = fget(fd);
1629 if (!filp) 1626 if (!filp)
1630 goto out; 1627 goto out;
1631 1628
1632 can_sleep = !(cmd & LOCK_NB); 1629 can_sleep = !(cmd & LOCK_NB);
1633 cmd &= ~LOCK_NB; 1630 cmd &= ~LOCK_NB;
1634 unlock = (cmd == LOCK_UN); 1631 unlock = (cmd == LOCK_UN);
1635 1632
1636 if (!unlock && !(cmd & LOCK_MAND) && 1633 if (!unlock && !(cmd & LOCK_MAND) &&
1637 !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) 1634 !(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
1638 goto out_putf; 1635 goto out_putf;
1639 1636
1640 error = flock_make_lock(filp, &lock, cmd); 1637 error = flock_make_lock(filp, &lock, cmd);
1641 if (error) 1638 if (error)
1642 goto out_putf; 1639 goto out_putf;
1643 if (can_sleep) 1640 if (can_sleep)
1644 lock->fl_flags |= FL_SLEEP; 1641 lock->fl_flags |= FL_SLEEP;
1645 1642
1646 error = security_file_lock(filp, lock->fl_type); 1643 error = security_file_lock(filp, lock->fl_type);
1647 if (error) 1644 if (error)
1648 goto out_free; 1645 goto out_free;
1649 1646
1650 if (filp->f_op && filp->f_op->flock) 1647 if (filp->f_op && filp->f_op->flock)
1651 error = filp->f_op->flock(filp, 1648 error = filp->f_op->flock(filp,
1652 (can_sleep) ? F_SETLKW : F_SETLK, 1649 (can_sleep) ? F_SETLKW : F_SETLK,
1653 lock); 1650 lock);
1654 else 1651 else
1655 error = flock_lock_file_wait(filp, lock); 1652 error = flock_lock_file_wait(filp, lock);
1656 1653
1657 out_free: 1654 out_free:
1658 locks_free_lock(lock); 1655 locks_free_lock(lock);
1659 1656
1660 out_putf: 1657 out_putf:
1661 fput(filp); 1658 fput(filp);
1662 out: 1659 out:
1663 return error; 1660 return error;
1664 } 1661 }
1665 1662
1666 /** 1663 /**
1667 * vfs_test_lock - test file byte range lock 1664 * vfs_test_lock - test file byte range lock
1668 * @filp: The file to test lock for 1665 * @filp: The file to test lock for
1669 * @fl: The lock to test; also used to hold result 1666 * @fl: The lock to test; also used to hold result
1670 * 1667 *
1671 * Returns -ERRNO on failure. Indicates presence of conflicting lock by 1668 * Returns -ERRNO on failure. Indicates presence of conflicting lock by
1672 * setting conf->fl_type to something other than F_UNLCK. 1669 * setting conf->fl_type to something other than F_UNLCK.
1673 */ 1670 */
1674 int vfs_test_lock(struct file *filp, struct file_lock *fl) 1671 int vfs_test_lock(struct file *filp, struct file_lock *fl)
1675 { 1672 {
1676 if (filp->f_op && filp->f_op->lock) 1673 if (filp->f_op && filp->f_op->lock)
1677 return filp->f_op->lock(filp, F_GETLK, fl); 1674 return filp->f_op->lock(filp, F_GETLK, fl);
1678 posix_test_lock(filp, fl); 1675 posix_test_lock(filp, fl);
1679 return 0; 1676 return 0;
1680 } 1677 }
1681 EXPORT_SYMBOL_GPL(vfs_test_lock); 1678 EXPORT_SYMBOL_GPL(vfs_test_lock);
1682 1679
1683 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) 1680 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1684 { 1681 {
1685 flock->l_pid = fl->fl_pid; 1682 flock->l_pid = fl->fl_pid;
1686 #if BITS_PER_LONG == 32 1683 #if BITS_PER_LONG == 32
1687 /* 1684 /*
1688 * Make sure we can represent the posix lock via 1685 * Make sure we can represent the posix lock via
1689 * legacy 32bit flock. 1686 * legacy 32bit flock.
1690 */ 1687 */
1691 if (fl->fl_start > OFFT_OFFSET_MAX) 1688 if (fl->fl_start > OFFT_OFFSET_MAX)
1692 return -EOVERFLOW; 1689 return -EOVERFLOW;
1693 if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) 1690 if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
1694 return -EOVERFLOW; 1691 return -EOVERFLOW;
1695 #endif 1692 #endif
1696 flock->l_start = fl->fl_start; 1693 flock->l_start = fl->fl_start;
1697 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 1694 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1698 fl->fl_end - fl->fl_start + 1; 1695 fl->fl_end - fl->fl_start + 1;
1699 flock->l_whence = 0; 1696 flock->l_whence = 0;
1700 flock->l_type = fl->fl_type; 1697 flock->l_type = fl->fl_type;
1701 return 0; 1698 return 0;
1702 } 1699 }
1703 1700
1704 #if BITS_PER_LONG == 32 1701 #if BITS_PER_LONG == 32
1705 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) 1702 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
1706 { 1703 {
1707 flock->l_pid = fl->fl_pid; 1704 flock->l_pid = fl->fl_pid;
1708 flock->l_start = fl->fl_start; 1705 flock->l_start = fl->fl_start;
1709 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 1706 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1710 fl->fl_end - fl->fl_start + 1; 1707 fl->fl_end - fl->fl_start + 1;
1711 flock->l_whence = 0; 1708 flock->l_whence = 0;
1712 flock->l_type = fl->fl_type; 1709 flock->l_type = fl->fl_type;
1713 } 1710 }
1714 #endif 1711 #endif
1715 1712
1716 /* Report the first existing lock that would conflict with l. 1713 /* Report the first existing lock that would conflict with l.
1717 * This implements the F_GETLK command of fcntl(). 1714 * This implements the F_GETLK command of fcntl().
1718 */ 1715 */
1719 int fcntl_getlk(struct file *filp, struct flock __user *l) 1716 int fcntl_getlk(struct file *filp, struct flock __user *l)
1720 { 1717 {
1721 struct file_lock file_lock; 1718 struct file_lock file_lock;
1722 struct flock flock; 1719 struct flock flock;
1723 int error; 1720 int error;
1724 1721
1725 error = -EFAULT; 1722 error = -EFAULT;
1726 if (copy_from_user(&flock, l, sizeof(flock))) 1723 if (copy_from_user(&flock, l, sizeof(flock)))
1727 goto out; 1724 goto out;
1728 error = -EINVAL; 1725 error = -EINVAL;
1729 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) 1726 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
1730 goto out; 1727 goto out;
1731 1728
1732 error = flock_to_posix_lock(filp, &file_lock, &flock); 1729 error = flock_to_posix_lock(filp, &file_lock, &flock);
1733 if (error) 1730 if (error)
1734 goto out; 1731 goto out;
1735 1732
1736 error = vfs_test_lock(filp, &file_lock); 1733 error = vfs_test_lock(filp, &file_lock);
1737 if (error) 1734 if (error)
1738 goto out; 1735 goto out;
1739 1736
1740 flock.l_type = file_lock.fl_type; 1737 flock.l_type = file_lock.fl_type;
1741 if (file_lock.fl_type != F_UNLCK) { 1738 if (file_lock.fl_type != F_UNLCK) {
1742 error = posix_lock_to_flock(&flock, &file_lock); 1739 error = posix_lock_to_flock(&flock, &file_lock);
1743 if (error) 1740 if (error)
1744 goto out; 1741 goto out;
1745 } 1742 }
1746 error = -EFAULT; 1743 error = -EFAULT;
1747 if (!copy_to_user(l, &flock, sizeof(flock))) 1744 if (!copy_to_user(l, &flock, sizeof(flock)))
1748 error = 0; 1745 error = 0;
1749 out: 1746 out:
1750 return error; 1747 return error;
1751 } 1748 }
1752 1749
1753 /** 1750 /**
1754 * vfs_lock_file - file byte range lock 1751 * vfs_lock_file - file byte range lock
1755 * @filp: The file to apply the lock to 1752 * @filp: The file to apply the lock to
1756 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) 1753 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
1757 * @fl: The lock to be applied 1754 * @fl: The lock to be applied
1758 * @conf: Place to return a copy of the conflicting lock, if found. 1755 * @conf: Place to return a copy of the conflicting lock, if found.
1759 * 1756 *
1760 * A caller that doesn't care about the conflicting lock may pass NULL 1757 * A caller that doesn't care about the conflicting lock may pass NULL
1761 * as the final argument. 1758 * as the final argument.
1762 * 1759 *
1763 * If the filesystem defines a private ->lock() method, then @conf will 1760 * If the filesystem defines a private ->lock() method, then @conf will
1764 * be left unchanged; so a caller that cares should initialize it to 1761 * be left unchanged; so a caller that cares should initialize it to
1765 * some acceptable default. 1762 * some acceptable default.
1766 * 1763 *
1767 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX 1764 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
1768 * locks, the ->lock() interface may return asynchronously, before the lock has 1765 * locks, the ->lock() interface may return asynchronously, before the lock has
1769 * been granted or denied by the underlying filesystem, if (and only if) 1766 * been granted or denied by the underlying filesystem, if (and only if)
1770 * fl_grant is set. Callers expecting ->lock() to return asynchronously 1767 * fl_grant is set. Callers expecting ->lock() to return asynchronously
1771 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) 1768 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1772 * the request is for a blocking lock. When ->lock() does return asynchronously, 1769 * the request is for a blocking lock. When ->lock() does return asynchronously,
1773 * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock 1770 * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
1774 * request completes. 1771 * request completes.
1775 * If the request is for non-blocking lock the file system should return 1772 * If the request is for non-blocking lock the file system should return
1776 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine 1773 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
1777 * with the result. If the request timed out the callback routine will return a 1774 * with the result. If the request timed out the callback routine will return a
1778 * nonzero return code and the file system should release the lock. The file 1775 * nonzero return code and the file system should release the lock. The file
1779 * system is also responsible to keep a corresponding posix lock when it 1776 * system is also responsible to keep a corresponding posix lock when it
1780 * grants a lock so the VFS can find out which locks are locally held and do 1777 * grants a lock so the VFS can find out which locks are locally held and do
1781 * the correct lock cleanup when required. 1778 * the correct lock cleanup when required.
1782 * The underlying filesystem must not drop the kernel lock or call 1779 * The underlying filesystem must not drop the kernel lock or call
1783 * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED 1780 * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
1784 * return code. 1781 * return code.
1785 */ 1782 */
1786 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 1783 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
1787 { 1784 {
1788 if (filp->f_op && filp->f_op->lock) 1785 if (filp->f_op && filp->f_op->lock)
1789 return filp->f_op->lock(filp, cmd, fl); 1786 return filp->f_op->lock(filp, cmd, fl);
1790 else 1787 else
1791 return posix_lock_file(filp, fl, conf); 1788 return posix_lock_file(filp, fl, conf);
1792 } 1789 }
1793 EXPORT_SYMBOL_GPL(vfs_lock_file); 1790 EXPORT_SYMBOL_GPL(vfs_lock_file);
1794 1791
1795 static int do_lock_file_wait(struct file *filp, unsigned int cmd, 1792 static int do_lock_file_wait(struct file *filp, unsigned int cmd,
1796 struct file_lock *fl) 1793 struct file_lock *fl)
1797 { 1794 {
1798 int error; 1795 int error;
1799 1796
1800 error = security_file_lock(filp, fl->fl_type); 1797 error = security_file_lock(filp, fl->fl_type);
1801 if (error) 1798 if (error)
1802 return error; 1799 return error;
1803 1800
1804 for (;;) { 1801 for (;;) {
1805 error = vfs_lock_file(filp, cmd, fl, NULL); 1802 error = vfs_lock_file(filp, cmd, fl, NULL);
1806 if (error != FILE_LOCK_DEFERRED) 1803 if (error != FILE_LOCK_DEFERRED)
1807 break; 1804 break;
1808 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1805 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1809 if (!error) 1806 if (!error)
1810 continue; 1807 continue;
1811 1808
1812 locks_delete_block(fl); 1809 locks_delete_block(fl);
1813 break; 1810 break;
1814 } 1811 }
1815 1812
1816 return error; 1813 return error;
1817 } 1814 }
1818 1815
1819 /* Apply the lock described by l to an open file descriptor. 1816 /* Apply the lock described by l to an open file descriptor.
1820 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1817 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1821 */ 1818 */
1822 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, 1819 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1823 struct flock __user *l) 1820 struct flock __user *l)
1824 { 1821 {
1825 struct file_lock *file_lock = locks_alloc_lock(); 1822 struct file_lock *file_lock = locks_alloc_lock();
1826 struct flock flock; 1823 struct flock flock;
1827 struct inode *inode; 1824 struct inode *inode;
1828 struct file *f; 1825 struct file *f;
1829 int error; 1826 int error;
1830 1827
1831 if (file_lock == NULL) 1828 if (file_lock == NULL)
1832 return -ENOLCK; 1829 return -ENOLCK;
1833 1830
1834 /* 1831 /*
1835 * This might block, so we do it before checking the inode. 1832 * This might block, so we do it before checking the inode.
1836 */ 1833 */
1837 error = -EFAULT; 1834 error = -EFAULT;
1838 if (copy_from_user(&flock, l, sizeof(flock))) 1835 if (copy_from_user(&flock, l, sizeof(flock)))
1839 goto out; 1836 goto out;
1840 1837
1841 inode = filp->f_path.dentry->d_inode; 1838 inode = filp->f_path.dentry->d_inode;
1842 1839
1843 /* Don't allow mandatory locks on files that may be memory mapped 1840 /* Don't allow mandatory locks on files that may be memory mapped
1844 * and shared. 1841 * and shared.
1845 */ 1842 */
1846 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { 1843 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
1847 error = -EAGAIN; 1844 error = -EAGAIN;
1848 goto out; 1845 goto out;
1849 } 1846 }
1850 1847
1851 again: 1848 again:
1852 error = flock_to_posix_lock(filp, file_lock, &flock); 1849 error = flock_to_posix_lock(filp, file_lock, &flock);
1853 if (error) 1850 if (error)
1854 goto out; 1851 goto out;
1855 if (cmd == F_SETLKW) { 1852 if (cmd == F_SETLKW) {
1856 file_lock->fl_flags |= FL_SLEEP; 1853 file_lock->fl_flags |= FL_SLEEP;
1857 } 1854 }
1858 1855
1859 error = -EBADF; 1856 error = -EBADF;
1860 switch (flock.l_type) { 1857 switch (flock.l_type) {
1861 case F_RDLCK: 1858 case F_RDLCK:
1862 if (!(filp->f_mode & FMODE_READ)) 1859 if (!(filp->f_mode & FMODE_READ))
1863 goto out; 1860 goto out;
1864 break; 1861 break;
1865 case F_WRLCK: 1862 case F_WRLCK:
1866 if (!(filp->f_mode & FMODE_WRITE)) 1863 if (!(filp->f_mode & FMODE_WRITE))
1867 goto out; 1864 goto out;
1868 break; 1865 break;
1869 case F_UNLCK: 1866 case F_UNLCK:
1870 break; 1867 break;
1871 default: 1868 default:
1872 error = -EINVAL; 1869 error = -EINVAL;
1873 goto out; 1870 goto out;
1874 } 1871 }
1875 1872
1876 error = do_lock_file_wait(filp, cmd, file_lock); 1873 error = do_lock_file_wait(filp, cmd, file_lock);
1877 1874
1878 /* 1875 /*
1879 * Attempt to detect a close/fcntl race and recover by 1876 * Attempt to detect a close/fcntl race and recover by
1880 * releasing the lock that was just acquired. 1877 * releasing the lock that was just acquired.
1881 */ 1878 */
1882 /* 1879 /*
1883 * we need that spin_lock here - it prevents reordering between 1880 * we need that spin_lock here - it prevents reordering between
1884 * update of inode->i_flock and check for it done in close(). 1881 * update of inode->i_flock and check for it done in close().
1885 * rcu_read_lock() wouldn't do. 1882 * rcu_read_lock() wouldn't do.
1886 */ 1883 */
1887 spin_lock(&current->files->file_lock); 1884 spin_lock(&current->files->file_lock);
1888 f = fcheck(fd); 1885 f = fcheck(fd);
1889 spin_unlock(&current->files->file_lock); 1886 spin_unlock(&current->files->file_lock);
1890 if (!error && f != filp && flock.l_type != F_UNLCK) { 1887 if (!error && f != filp && flock.l_type != F_UNLCK) {
1891 flock.l_type = F_UNLCK; 1888 flock.l_type = F_UNLCK;
1892 goto again; 1889 goto again;
1893 } 1890 }
1894 1891
1895 out: 1892 out:
1896 locks_free_lock(file_lock); 1893 locks_free_lock(file_lock);
1897 return error; 1894 return error;
1898 } 1895 }
1899 1896
1900 #if BITS_PER_LONG == 32 1897 #if BITS_PER_LONG == 32
1901 /* Report the first existing lock that would conflict with l. 1898 /* Report the first existing lock that would conflict with l.
1902 * This implements the F_GETLK command of fcntl(). 1899 * This implements the F_GETLK command of fcntl().
1903 */ 1900 */
1904 int fcntl_getlk64(struct file *filp, struct flock64 __user *l) 1901 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
1905 { 1902 {
1906 struct file_lock file_lock; 1903 struct file_lock file_lock;
1907 struct flock64 flock; 1904 struct flock64 flock;
1908 int error; 1905 int error;
1909 1906
1910 error = -EFAULT; 1907 error = -EFAULT;
1911 if (copy_from_user(&flock, l, sizeof(flock))) 1908 if (copy_from_user(&flock, l, sizeof(flock)))
1912 goto out; 1909 goto out;
1913 error = -EINVAL; 1910 error = -EINVAL;
1914 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) 1911 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
1915 goto out; 1912 goto out;
1916 1913
1917 error = flock64_to_posix_lock(filp, &file_lock, &flock); 1914 error = flock64_to_posix_lock(filp, &file_lock, &flock);
1918 if (error) 1915 if (error)
1919 goto out; 1916 goto out;
1920 1917
1921 error = vfs_test_lock(filp, &file_lock); 1918 error = vfs_test_lock(filp, &file_lock);
1922 if (error) 1919 if (error)
1923 goto out; 1920 goto out;
1924 1921
1925 flock.l_type = file_lock.fl_type; 1922 flock.l_type = file_lock.fl_type;
1926 if (file_lock.fl_type != F_UNLCK) 1923 if (file_lock.fl_type != F_UNLCK)
1927 posix_lock_to_flock64(&flock, &file_lock); 1924 posix_lock_to_flock64(&flock, &file_lock);
1928 1925
1929 error = -EFAULT; 1926 error = -EFAULT;
1930 if (!copy_to_user(l, &flock, sizeof(flock))) 1927 if (!copy_to_user(l, &flock, sizeof(flock)))
1931 error = 0; 1928 error = 0;
1932 1929
1933 out: 1930 out:
1934 return error; 1931 return error;
1935 } 1932 }
1936 1933
1937 /* Apply the lock described by l to an open file descriptor. 1934 /* Apply the lock described by l to an open file descriptor.
1938 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1935 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1939 */ 1936 */
1940 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, 1937 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1941 struct flock64 __user *l) 1938 struct flock64 __user *l)
1942 { 1939 {
1943 struct file_lock *file_lock = locks_alloc_lock(); 1940 struct file_lock *file_lock = locks_alloc_lock();
1944 struct flock64 flock; 1941 struct flock64 flock;
1945 struct inode *inode; 1942 struct inode *inode;
1946 struct file *f; 1943 struct file *f;
1947 int error; 1944 int error;
1948 1945
1949 if (file_lock == NULL) 1946 if (file_lock == NULL)
1950 return -ENOLCK; 1947 return -ENOLCK;
1951 1948
1952 /* 1949 /*
1953 * This might block, so we do it before checking the inode. 1950 * This might block, so we do it before checking the inode.
1954 */ 1951 */
1955 error = -EFAULT; 1952 error = -EFAULT;
1956 if (copy_from_user(&flock, l, sizeof(flock))) 1953 if (copy_from_user(&flock, l, sizeof(flock)))
1957 goto out; 1954 goto out;
1958 1955
1959 inode = filp->f_path.dentry->d_inode; 1956 inode = filp->f_path.dentry->d_inode;
1960 1957
1961 /* Don't allow mandatory locks on files that may be memory mapped 1958 /* Don't allow mandatory locks on files that may be memory mapped
1962 * and shared. 1959 * and shared.
1963 */ 1960 */
1964 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { 1961 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
1965 error = -EAGAIN; 1962 error = -EAGAIN;
1966 goto out; 1963 goto out;
1967 } 1964 }
1968 1965
1969 again: 1966 again:
1970 error = flock64_to_posix_lock(filp, file_lock, &flock); 1967 error = flock64_to_posix_lock(filp, file_lock, &flock);
1971 if (error) 1968 if (error)
1972 goto out; 1969 goto out;
1973 if (cmd == F_SETLKW64) { 1970 if (cmd == F_SETLKW64) {
1974 file_lock->fl_flags |= FL_SLEEP; 1971 file_lock->fl_flags |= FL_SLEEP;
1975 } 1972 }
1976 1973
1977 error = -EBADF; 1974 error = -EBADF;
1978 switch (flock.l_type) { 1975 switch (flock.l_type) {
1979 case F_RDLCK: 1976 case F_RDLCK:
1980 if (!(filp->f_mode & FMODE_READ)) 1977 if (!(filp->f_mode & FMODE_READ))
1981 goto out; 1978 goto out;
1982 break; 1979 break;
1983 case F_WRLCK: 1980 case F_WRLCK:
1984 if (!(filp->f_mode & FMODE_WRITE)) 1981 if (!(filp->f_mode & FMODE_WRITE))
1985 goto out; 1982 goto out;
1986 break; 1983 break;
1987 case F_UNLCK: 1984 case F_UNLCK:
1988 break; 1985 break;
1989 default: 1986 default:
1990 error = -EINVAL; 1987 error = -EINVAL;
1991 goto out; 1988 goto out;
1992 } 1989 }
1993 1990
1994 error = do_lock_file_wait(filp, cmd, file_lock); 1991 error = do_lock_file_wait(filp, cmd, file_lock);
1995 1992
1996 /* 1993 /*
1997 * Attempt to detect a close/fcntl race and recover by 1994 * Attempt to detect a close/fcntl race and recover by
1998 * releasing the lock that was just acquired. 1995 * releasing the lock that was just acquired.
1999 */ 1996 */
2000 spin_lock(&current->files->file_lock); 1997 spin_lock(&current->files->file_lock);
2001 f = fcheck(fd); 1998 f = fcheck(fd);
2002 spin_unlock(&current->files->file_lock); 1999 spin_unlock(&current->files->file_lock);
2003 if (!error && f != filp && flock.l_type != F_UNLCK) { 2000 if (!error && f != filp && flock.l_type != F_UNLCK) {
2004 flock.l_type = F_UNLCK; 2001 flock.l_type = F_UNLCK;
2005 goto again; 2002 goto again;
2006 } 2003 }
2007 2004
2008 out: 2005 out:
2009 locks_free_lock(file_lock); 2006 locks_free_lock(file_lock);
2010 return error; 2007 return error;
2011 } 2008 }
2012 #endif /* BITS_PER_LONG == 32 */ 2009 #endif /* BITS_PER_LONG == 32 */
2013 2010
2014 /* 2011 /*
2015 * This function is called when the file is being removed 2012 * This function is called when the file is being removed
2016 * from the task's fd array. POSIX locks belonging to this task 2013 * from the task's fd array. POSIX locks belonging to this task
2017 * are deleted at this time. 2014 * are deleted at this time.
2018 */ 2015 */
2019 void locks_remove_posix(struct file *filp, fl_owner_t owner) 2016 void locks_remove_posix(struct file *filp, fl_owner_t owner)
2020 { 2017 {
2021 struct file_lock lock; 2018 struct file_lock lock;
2022 2019
2023 /* 2020 /*
2024 * If there are no locks held on this file, we don't need to call 2021 * If there are no locks held on this file, we don't need to call
2025 * posix_lock_file(). Another process could be setting a lock on this 2022 * posix_lock_file(). Another process could be setting a lock on this
2026 * file at the same time, but we wouldn't remove that lock anyway. 2023 * file at the same time, but we wouldn't remove that lock anyway.
2027 */ 2024 */
2028 if (!filp->f_path.dentry->d_inode->i_flock) 2025 if (!filp->f_path.dentry->d_inode->i_flock)
2029 return; 2026 return;
2030 2027
2031 lock.fl_type = F_UNLCK; 2028 lock.fl_type = F_UNLCK;
2032 lock.fl_flags = FL_POSIX | FL_CLOSE; 2029 lock.fl_flags = FL_POSIX | FL_CLOSE;
2033 lock.fl_start = 0; 2030 lock.fl_start = 0;
2034 lock.fl_end = OFFSET_MAX; 2031 lock.fl_end = OFFSET_MAX;
2035 lock.fl_owner = owner; 2032 lock.fl_owner = owner;
2036 lock.fl_pid = current->tgid; 2033 lock.fl_pid = current->tgid;
2037 lock.fl_file = filp; 2034 lock.fl_file = filp;
2038 lock.fl_ops = NULL; 2035 lock.fl_ops = NULL;
2039 lock.fl_lmops = NULL; 2036 lock.fl_lmops = NULL;
2040 2037
2041 vfs_lock_file(filp, F_SETLK, &lock, NULL); 2038 vfs_lock_file(filp, F_SETLK, &lock, NULL);
2042 2039
2043 if (lock.fl_ops && lock.fl_ops->fl_release_private) 2040 if (lock.fl_ops && lock.fl_ops->fl_release_private)
2044 lock.fl_ops->fl_release_private(&lock); 2041 lock.fl_ops->fl_release_private(&lock);
2045 } 2042 }
2046 2043
2047 EXPORT_SYMBOL(locks_remove_posix); 2044 EXPORT_SYMBOL(locks_remove_posix);
2048 2045
2049 /* 2046 /*
2050 * This function is called on the last close of an open file. 2047 * This function is called on the last close of an open file.
2051 */ 2048 */
2052 void locks_remove_flock(struct file *filp) 2049 void locks_remove_flock(struct file *filp)
2053 { 2050 {
2054 struct inode * inode = filp->f_path.dentry->d_inode; 2051 struct inode * inode = filp->f_path.dentry->d_inode;
2055 struct file_lock *fl; 2052 struct file_lock *fl;
2056 struct file_lock **before; 2053 struct file_lock **before;
2057 2054
2058 if (!inode->i_flock) 2055 if (!inode->i_flock)
2059 return; 2056 return;
2060 2057
2061 if (filp->f_op && filp->f_op->flock) { 2058 if (filp->f_op && filp->f_op->flock) {
2062 struct file_lock fl = { 2059 struct file_lock fl = {
2063 .fl_pid = current->tgid, 2060 .fl_pid = current->tgid,
2064 .fl_file = filp, 2061 .fl_file = filp,
2065 .fl_flags = FL_FLOCK, 2062 .fl_flags = FL_FLOCK,
2066 .fl_type = F_UNLCK, 2063 .fl_type = F_UNLCK,
2067 .fl_end = OFFSET_MAX, 2064 .fl_end = OFFSET_MAX,
2068 }; 2065 };
2069 filp->f_op->flock(filp, F_SETLKW, &fl); 2066 filp->f_op->flock(filp, F_SETLKW, &fl);
2070 if (fl.fl_ops && fl.fl_ops->fl_release_private) 2067 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2071 fl.fl_ops->fl_release_private(&fl); 2068 fl.fl_ops->fl_release_private(&fl);
2072 } 2069 }
2073 2070
2074 lock_flocks(); 2071 lock_flocks();
2075 before = &inode->i_flock; 2072 before = &inode->i_flock;
2076 2073
2077 while ((fl = *before) != NULL) { 2074 while ((fl = *before) != NULL) {
2078 if (fl->fl_file == filp) { 2075 if (fl->fl_file == filp) {
2079 if (IS_FLOCK(fl)) { 2076 if (IS_FLOCK(fl)) {
2080 locks_delete_lock(before); 2077 locks_delete_lock(before);
2081 continue; 2078 continue;
2082 } 2079 }
2083 if (IS_LEASE(fl)) { 2080 if (IS_LEASE(fl)) {
2084 lease_modify(before, F_UNLCK); 2081 lease_modify(before, F_UNLCK);
2085 continue; 2082 continue;
2086 } 2083 }
2087 /* What? */ 2084 /* What? */
2088 BUG(); 2085 BUG();
2089 } 2086 }
2090 before = &fl->fl_next; 2087 before = &fl->fl_next;
2091 } 2088 }
2092 unlock_flocks(); 2089 unlock_flocks();
2093 } 2090 }
2094 2091
2095 /** 2092 /**
2096 * posix_unblock_lock - stop waiting for a file lock 2093 * posix_unblock_lock - stop waiting for a file lock
2097 * @filp: how the file was opened 2094 * @filp: how the file was opened
2098 * @waiter: the lock which was waiting 2095 * @waiter: the lock which was waiting
2099 * 2096 *
2100 * lockd needs to block waiting for locks. 2097 * lockd needs to block waiting for locks.
2101 */ 2098 */
2102 int 2099 int
2103 posix_unblock_lock(struct file *filp, struct file_lock *waiter) 2100 posix_unblock_lock(struct file *filp, struct file_lock *waiter)
2104 { 2101 {
2105 int status = 0; 2102 int status = 0;
2106 2103
2107 lock_flocks(); 2104 lock_flocks();
2108 if (waiter->fl_next) 2105 if (waiter->fl_next)
2109 __locks_delete_block(waiter); 2106 __locks_delete_block(waiter);
2110 else 2107 else
2111 status = -ENOENT; 2108 status = -ENOENT;
2112 unlock_flocks(); 2109 unlock_flocks();
2113 return status; 2110 return status;
2114 } 2111 }
2115 2112
2116 EXPORT_SYMBOL(posix_unblock_lock); 2113 EXPORT_SYMBOL(posix_unblock_lock);
2117 2114
2118 /** 2115 /**
2119 * vfs_cancel_lock - file byte range unblock lock 2116 * vfs_cancel_lock - file byte range unblock lock
2120 * @filp: The file to apply the unblock to 2117 * @filp: The file to apply the unblock to
2121 * @fl: The lock to be unblocked 2118 * @fl: The lock to be unblocked
2122 * 2119 *
2123 * Used by lock managers to cancel blocked requests 2120 * Used by lock managers to cancel blocked requests
2124 */ 2121 */
2125 int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 2122 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2126 { 2123 {
2127 if (filp->f_op && filp->f_op->lock) 2124 if (filp->f_op && filp->f_op->lock)
2128 return filp->f_op->lock(filp, F_CANCELLK, fl); 2125 return filp->f_op->lock(filp, F_CANCELLK, fl);
2129 return 0; 2126 return 0;
2130 } 2127 }
2131 2128
2132 EXPORT_SYMBOL_GPL(vfs_cancel_lock); 2129 EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2133 2130
2134 #ifdef CONFIG_PROC_FS 2131 #ifdef CONFIG_PROC_FS
2135 #include <linux/proc_fs.h> 2132 #include <linux/proc_fs.h>
2136 #include <linux/seq_file.h> 2133 #include <linux/seq_file.h>
2137 2134
2138 static void lock_get_status(struct seq_file *f, struct file_lock *fl, 2135 static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2139 loff_t id, char *pfx) 2136 loff_t id, char *pfx)
2140 { 2137 {
2141 struct inode *inode = NULL; 2138 struct inode *inode = NULL;
2142 unsigned int fl_pid; 2139 unsigned int fl_pid;
2143 2140
2144 if (fl->fl_nspid) 2141 if (fl->fl_nspid)
2145 fl_pid = pid_vnr(fl->fl_nspid); 2142 fl_pid = pid_vnr(fl->fl_nspid);
2146 else 2143 else
2147 fl_pid = fl->fl_pid; 2144 fl_pid = fl->fl_pid;
2148 2145
2149 if (fl->fl_file != NULL) 2146 if (fl->fl_file != NULL)
2150 inode = fl->fl_file->f_path.dentry->d_inode; 2147 inode = fl->fl_file->f_path.dentry->d_inode;
2151 2148
2152 seq_printf(f, "%lld:%s ", id, pfx); 2149 seq_printf(f, "%lld:%s ", id, pfx);
2153 if (IS_POSIX(fl)) { 2150 if (IS_POSIX(fl)) {
2154 seq_printf(f, "%6s %s ", 2151 seq_printf(f, "%6s %s ",
2155 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", 2152 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
2156 (inode == NULL) ? "*NOINODE*" : 2153 (inode == NULL) ? "*NOINODE*" :
2157 mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); 2154 mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
2158 } else if (IS_FLOCK(fl)) { 2155 } else if (IS_FLOCK(fl)) {
2159 if (fl->fl_type & LOCK_MAND) { 2156 if (fl->fl_type & LOCK_MAND) {
2160 seq_printf(f, "FLOCK MSNFS "); 2157 seq_printf(f, "FLOCK MSNFS ");
2161 } else { 2158 } else {
2162 seq_printf(f, "FLOCK ADVISORY "); 2159 seq_printf(f, "FLOCK ADVISORY ");
2163 } 2160 }
2164 } else if (IS_LEASE(fl)) { 2161 } else if (IS_LEASE(fl)) {
2165 seq_printf(f, "LEASE "); 2162 seq_printf(f, "LEASE ");
2166 if (fl->fl_type & F_INPROGRESS) 2163 if (fl->fl_type & F_INPROGRESS)
2167 seq_printf(f, "BREAKING "); 2164 seq_printf(f, "BREAKING ");
2168 else if (fl->fl_file) 2165 else if (fl->fl_file)
2169 seq_printf(f, "ACTIVE "); 2166 seq_printf(f, "ACTIVE ");
2170 else 2167 else
2171 seq_printf(f, "BREAKER "); 2168 seq_printf(f, "BREAKER ");
2172 } else { 2169 } else {
2173 seq_printf(f, "UNKNOWN UNKNOWN "); 2170 seq_printf(f, "UNKNOWN UNKNOWN ");
2174 } 2171 }
2175 if (fl->fl_type & LOCK_MAND) { 2172 if (fl->fl_type & LOCK_MAND) {
2176 seq_printf(f, "%s ", 2173 seq_printf(f, "%s ",
2177 (fl->fl_type & LOCK_READ) 2174 (fl->fl_type & LOCK_READ)
2178 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " 2175 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ "
2179 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); 2176 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
2180 } else { 2177 } else {
2181 seq_printf(f, "%s ", 2178 seq_printf(f, "%s ",
2182 (fl->fl_type & F_INPROGRESS) 2179 (fl->fl_type & F_INPROGRESS)
2183 ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " 2180 ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ "
2184 : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); 2181 : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ ");
2185 } 2182 }
2186 if (inode) { 2183 if (inode) {
2187 #ifdef WE_CAN_BREAK_LSLK_NOW 2184 #ifdef WE_CAN_BREAK_LSLK_NOW
2188 seq_printf(f, "%d %s:%ld ", fl_pid, 2185 seq_printf(f, "%d %s:%ld ", fl_pid,
2189 inode->i_sb->s_id, inode->i_ino); 2186 inode->i_sb->s_id, inode->i_ino);
2190 #else 2187 #else
2191 /* userspace relies on this representation of dev_t ;-( */ 2188 /* userspace relies on this representation of dev_t ;-( */
2192 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, 2189 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
2193 MAJOR(inode->i_sb->s_dev), 2190 MAJOR(inode->i_sb->s_dev),
2194 MINOR(inode->i_sb->s_dev), inode->i_ino); 2191 MINOR(inode->i_sb->s_dev), inode->i_ino);
2195 #endif 2192 #endif
2196 } else { 2193 } else {
2197 seq_printf(f, "%d <none>:0 ", fl_pid); 2194 seq_printf(f, "%d <none>:0 ", fl_pid);
2198 } 2195 }
2199 if (IS_POSIX(fl)) { 2196 if (IS_POSIX(fl)) {
2200 if (fl->fl_end == OFFSET_MAX) 2197 if (fl->fl_end == OFFSET_MAX)
2201 seq_printf(f, "%Ld EOF\n", fl->fl_start); 2198 seq_printf(f, "%Ld EOF\n", fl->fl_start);
2202 else 2199 else
2203 seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); 2200 seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2204 } else { 2201 } else {
2205 seq_printf(f, "0 EOF\n"); 2202 seq_printf(f, "0 EOF\n");
2206 } 2203 }
2207 } 2204 }
2208 2205
2209 static int locks_show(struct seq_file *f, void *v) 2206 static int locks_show(struct seq_file *f, void *v)
2210 { 2207 {
2211 struct file_lock *fl, *bfl; 2208 struct file_lock *fl, *bfl;
2212 2209
2213 fl = list_entry(v, struct file_lock, fl_link); 2210 fl = list_entry(v, struct file_lock, fl_link);
2214 2211
2215 lock_get_status(f, fl, *((loff_t *)f->private), ""); 2212 lock_get_status(f, fl, *((loff_t *)f->private), "");
2216 2213
2217 list_for_each_entry(bfl, &fl->fl_block, fl_block) 2214 list_for_each_entry(bfl, &fl->fl_block, fl_block)
2218 lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); 2215 lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
2219 2216
2220 return 0; 2217 return 0;
2221 } 2218 }
2222 2219
2223 static void *locks_start(struct seq_file *f, loff_t *pos) 2220 static void *locks_start(struct seq_file *f, loff_t *pos)
2224 { 2221 {
2225 loff_t *p = f->private; 2222 loff_t *p = f->private;
2226 2223
2227 lock_flocks(); 2224 lock_flocks();
2228 *p = (*pos + 1); 2225 *p = (*pos + 1);
2229 return seq_list_start(&file_lock_list, *pos); 2226 return seq_list_start(&file_lock_list, *pos);
2230 } 2227 }
2231 2228
2232 static void *locks_next(struct seq_file *f, void *v, loff_t *pos) 2229 static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2233 { 2230 {
2234 loff_t *p = f->private; 2231 loff_t *p = f->private;
2235 ++*p; 2232 ++*p;
2236 return seq_list_next(v, &file_lock_list, pos); 2233 return seq_list_next(v, &file_lock_list, pos);
2237 } 2234 }
2238 2235
2239 static void locks_stop(struct seq_file *f, void *v) 2236 static void locks_stop(struct seq_file *f, void *v)
2240 { 2237 {
2241 unlock_flocks(); 2238 unlock_flocks();
2242 } 2239 }
2243 2240
2244 static const struct seq_operations locks_seq_operations = { 2241 static const struct seq_operations locks_seq_operations = {
2245 .start = locks_start, 2242 .start = locks_start,
2246 .next = locks_next, 2243 .next = locks_next,
2247 .stop = locks_stop, 2244 .stop = locks_stop,
2248 .show = locks_show, 2245 .show = locks_show,
2249 }; 2246 };
2250 2247
2251 static int locks_open(struct inode *inode, struct file *filp) 2248 static int locks_open(struct inode *inode, struct file *filp)
2252 { 2249 {
2253 return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); 2250 return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t));
2254 } 2251 }
2255 2252
2256 static const struct file_operations proc_locks_operations = { 2253 static const struct file_operations proc_locks_operations = {
2257 .open = locks_open, 2254 .open = locks_open,
2258 .read = seq_read, 2255 .read = seq_read,
2259 .llseek = seq_lseek, 2256 .llseek = seq_lseek,
2260 .release = seq_release_private, 2257 .release = seq_release_private,
2261 }; 2258 };
2262 2259
2263 static int __init proc_locks_init(void) 2260 static int __init proc_locks_init(void)
2264 { 2261 {
2265 proc_create("locks", 0, NULL, &proc_locks_operations); 2262 proc_create("locks", 0, NULL, &proc_locks_operations);
2266 return 0; 2263 return 0;
2267 } 2264 }
2268 module_init(proc_locks_init); 2265 module_init(proc_locks_init);
2269 #endif 2266 #endif
2270 2267
2271 /** 2268 /**
2272 * lock_may_read - checks that the region is free of locks 2269 * lock_may_read - checks that the region is free of locks
2273 * @inode: the inode that is being read 2270 * @inode: the inode that is being read
2274 * @start: the first byte to read 2271 * @start: the first byte to read
2275 * @len: the number of bytes to read 2272 * @len: the number of bytes to read
2276 * 2273 *
2277 * Emulates Windows locking requirements. Whole-file 2274 * Emulates Windows locking requirements. Whole-file
2278 * mandatory locks (share modes) can prohibit a read and 2275 * mandatory locks (share modes) can prohibit a read and
2279 * byte-range POSIX locks can prohibit a read if they overlap. 2276 * byte-range POSIX locks can prohibit a read if they overlap.
2280 * 2277 *
2281 * N.B. this function is only ever called 2278 * N.B. this function is only ever called
2282 * from knfsd and ownership of locks is never checked. 2279 * from knfsd and ownership of locks is never checked.
2283 */ 2280 */
2284 int lock_may_read(struct inode *inode, loff_t start, unsigned long len) 2281 int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
2285 { 2282 {
2286 struct file_lock *fl; 2283 struct file_lock *fl;
2287 int result = 1; 2284 int result = 1;
2288 lock_flocks(); 2285 lock_flocks();
2289 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 2286 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
2290 if (IS_POSIX(fl)) { 2287 if (IS_POSIX(fl)) {
2291 if (fl->fl_type == F_RDLCK) 2288 if (fl->fl_type == F_RDLCK)
2292 continue; 2289 continue;
2293 if ((fl->fl_end < start) || (fl->fl_start > (start + len))) 2290 if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
2294 continue; 2291 continue;
2295 } else if (IS_FLOCK(fl)) { 2292 } else if (IS_FLOCK(fl)) {
2296 if (!(fl->fl_type & LOCK_MAND)) 2293 if (!(fl->fl_type & LOCK_MAND))
2297 continue; 2294 continue;
2298 if (fl->fl_type & LOCK_READ) 2295 if (fl->fl_type & LOCK_READ)
2299 continue; 2296 continue;
2300 } else 2297 } else
2301 continue; 2298 continue;
2302 result = 0; 2299 result = 0;
2303 break; 2300 break;
2304 } 2301 }
2305 unlock_flocks(); 2302 unlock_flocks();
2306 return result; 2303 return result;
2307 } 2304 }
2308 2305
2309 EXPORT_SYMBOL(lock_may_read); 2306 EXPORT_SYMBOL(lock_may_read);
2310 2307
2311 /** 2308 /**
2312 * lock_may_write - checks that the region is free of locks 2309 * lock_may_write - checks that the region is free of locks
2313 * @inode: the inode that is being written 2310 * @inode: the inode that is being written
2314 * @start: the first byte to write 2311 * @start: the first byte to write
2315 * @len: the number of bytes to write 2312 * @len: the number of bytes to write
2316 * 2313 *
2317 * Emulates Windows locking requirements. Whole-file 2314 * Emulates Windows locking requirements. Whole-file
2318 * mandatory locks (share modes) can prohibit a write and 2315 * mandatory locks (share modes) can prohibit a write and
2319 * byte-range POSIX locks can prohibit a write if they overlap. 2316 * byte-range POSIX locks can prohibit a write if they overlap.
2320 * 2317 *
2321 * N.B. this function is only ever called 2318 * N.B. this function is only ever called
2322 * from knfsd and ownership of locks is never checked. 2319 * from knfsd and ownership of locks is never checked.
2323 */ 2320 */
2324 int lock_may_write(struct inode *inode, loff_t start, unsigned long len) 2321 int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
2325 { 2322 {
2326 struct file_lock *fl; 2323 struct file_lock *fl;
2327 int result = 1; 2324 int result = 1;
2328 lock_flocks(); 2325 lock_flocks();
2329 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 2326 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
2330 if (IS_POSIX(fl)) { 2327 if (IS_POSIX(fl)) {
2331 if ((fl->fl_end < start) || (fl->fl_start > (start + len))) 2328 if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
2332 continue; 2329 continue;
2333 } else if (IS_FLOCK(fl)) { 2330 } else if (IS_FLOCK(fl)) {
2334 if (!(fl->fl_type & LOCK_MAND)) 2331 if (!(fl->fl_type & LOCK_MAND))
2335 continue; 2332 continue;
2336 if (fl->fl_type & LOCK_WRITE) 2333 if (fl->fl_type & LOCK_WRITE)
2337 continue; 2334 continue;
2338 } else 2335 } else
2339 continue; 2336 continue;
2340 result = 0; 2337 result = 0;
2341 break; 2338 break;
2342 } 2339 }
2343 unlock_flocks(); 2340 unlock_flocks();
2344 return result; 2341 return result;
2345 } 2342 }
2346 2343
2347 EXPORT_SYMBOL(lock_may_write); 2344 EXPORT_SYMBOL(lock_may_write);
2348 2345
2349 static int __init filelock_init(void) 2346 static int __init filelock_init(void)
2350 { 2347 {
2351 filelock_cache = kmem_cache_create("file_lock_cache", 2348 filelock_cache = kmem_cache_create("file_lock_cache",
2352 sizeof(struct file_lock), 0, SLAB_PANIC, 2349 sizeof(struct file_lock), 0, SLAB_PANIC,
2353 init_once); 2350 init_once);
2354 return 0; 2351 return 0;
2355 } 2352 }
2356 2353
2357 core_initcall(filelock_init); 2354 core_initcall(filelock_init);
2358 2355
1 #ifndef _LINUX_FS_H 1 #ifndef _LINUX_FS_H
2 #define _LINUX_FS_H 2 #define _LINUX_FS_H
3 3
4 /* 4 /*
5 * This file has definitions for some important file table 5 * This file has definitions for some important file table
6 * structures etc. 6 * structures etc.
7 */ 7 */
8 8
9 #include <linux/limits.h> 9 #include <linux/limits.h>
10 #include <linux/ioctl.h> 10 #include <linux/ioctl.h>
11 #include <linux/blk_types.h> 11 #include <linux/blk_types.h>
12 #include <linux/types.h> 12 #include <linux/types.h>
13 13
14 /* 14 /*
15 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change 15 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
16 * the file limit at runtime and only root can increase the per-process 16 * the file limit at runtime and only root can increase the per-process
17 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 17 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
18 * upper limit on files-per-process. 18 * upper limit on files-per-process.
19 * 19 *
20 * Some programs (notably those using select()) may have to be 20 * Some programs (notably those using select()) may have to be
21 * recompiled to take full advantage of the new limits.. 21 * recompiled to take full advantage of the new limits..
22 */ 22 */
23 23
24 /* Fixed constants first: */ 24 /* Fixed constants first: */
25 #undef NR_OPEN 25 #undef NR_OPEN
26 #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ 26 #define INR_OPEN 1024 /* Initial setting for nfile rlimits */
27 27
28 #define BLOCK_SIZE_BITS 10 28 #define BLOCK_SIZE_BITS 10
29 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) 29 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
30 30
31 #define SEEK_SET 0 /* seek relative to beginning of file */ 31 #define SEEK_SET 0 /* seek relative to beginning of file */
32 #define SEEK_CUR 1 /* seek relative to current file position */ 32 #define SEEK_CUR 1 /* seek relative to current file position */
33 #define SEEK_END 2 /* seek relative to end of file */ 33 #define SEEK_END 2 /* seek relative to end of file */
34 #define SEEK_MAX SEEK_END 34 #define SEEK_MAX SEEK_END
35 35
36 struct fstrim_range { 36 struct fstrim_range {
37 uint64_t start; 37 uint64_t start;
38 uint64_t len; 38 uint64_t len;
39 uint64_t minlen; 39 uint64_t minlen;
40 }; 40 };
41 41
42 /* And dynamically-tunable limits and defaults: */ 42 /* And dynamically-tunable limits and defaults: */
43 struct files_stat_struct { 43 struct files_stat_struct {
44 unsigned long nr_files; /* read only */ 44 unsigned long nr_files; /* read only */
45 unsigned long nr_free_files; /* read only */ 45 unsigned long nr_free_files; /* read only */
46 unsigned long max_files; /* tunable */ 46 unsigned long max_files; /* tunable */
47 }; 47 };
48 48
49 struct inodes_stat_t { 49 struct inodes_stat_t {
50 int nr_inodes; 50 int nr_inodes;
51 int nr_unused; 51 int nr_unused;
52 int dummy[5]; /* padding for sysctl ABI compatibility */ 52 int dummy[5]; /* padding for sysctl ABI compatibility */
53 }; 53 };
54 54
55 55
56 #define NR_FILE 8192 /* this can well be larger on a larger system */ 56 #define NR_FILE 8192 /* this can well be larger on a larger system */
57 57
58 #define MAY_EXEC 1 58 #define MAY_EXEC 1
59 #define MAY_WRITE 2 59 #define MAY_WRITE 2
60 #define MAY_READ 4 60 #define MAY_READ 4
61 #define MAY_APPEND 8 61 #define MAY_APPEND 8
62 #define MAY_ACCESS 16 62 #define MAY_ACCESS 16
63 #define MAY_OPEN 32 63 #define MAY_OPEN 32
64 #define MAY_CHDIR 64 64 #define MAY_CHDIR 64
65 65
66 /* 66 /*
67 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond 67 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
68 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() 68 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
69 */ 69 */
70 70
71 /* file is open for reading */ 71 /* file is open for reading */
72 #define FMODE_READ ((__force fmode_t)0x1) 72 #define FMODE_READ ((__force fmode_t)0x1)
73 /* file is open for writing */ 73 /* file is open for writing */
74 #define FMODE_WRITE ((__force fmode_t)0x2) 74 #define FMODE_WRITE ((__force fmode_t)0x2)
75 /* file is seekable */ 75 /* file is seekable */
76 #define FMODE_LSEEK ((__force fmode_t)0x4) 76 #define FMODE_LSEEK ((__force fmode_t)0x4)
77 /* file can be accessed using pread */ 77 /* file can be accessed using pread */
78 #define FMODE_PREAD ((__force fmode_t)0x8) 78 #define FMODE_PREAD ((__force fmode_t)0x8)
79 /* file can be accessed using pwrite */ 79 /* file can be accessed using pwrite */
80 #define FMODE_PWRITE ((__force fmode_t)0x10) 80 #define FMODE_PWRITE ((__force fmode_t)0x10)
81 /* File is opened for execution with sys_execve / sys_uselib */ 81 /* File is opened for execution with sys_execve / sys_uselib */
82 #define FMODE_EXEC ((__force fmode_t)0x20) 82 #define FMODE_EXEC ((__force fmode_t)0x20)
83 /* File is opened with O_NDELAY (only set for block devices) */ 83 /* File is opened with O_NDELAY (only set for block devices) */
84 #define FMODE_NDELAY ((__force fmode_t)0x40) 84 #define FMODE_NDELAY ((__force fmode_t)0x40)
85 /* File is opened with O_EXCL (only set for block devices) */ 85 /* File is opened with O_EXCL (only set for block devices) */
86 #define FMODE_EXCL ((__force fmode_t)0x80) 86 #define FMODE_EXCL ((__force fmode_t)0x80)
87 /* File is opened using open(.., 3, ..) and is writeable only for ioctls 87 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
88 (specialy hack for floppy.c) */ 88 (specialy hack for floppy.c) */
89 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) 89 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
90 90
91 /* 91 /*
92 * Don't update ctime and mtime. 92 * Don't update ctime and mtime.
93 * 93 *
94 * Currently a special hack for the XFS open_by_handle ioctl, but we'll 94 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
95 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. 95 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
96 */ 96 */
97 #define FMODE_NOCMTIME ((__force fmode_t)0x800) 97 #define FMODE_NOCMTIME ((__force fmode_t)0x800)
98 98
99 /* Expect random access pattern */ 99 /* Expect random access pattern */
100 #define FMODE_RANDOM ((__force fmode_t)0x1000) 100 #define FMODE_RANDOM ((__force fmode_t)0x1000)
101 101
102 /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 102 /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
103 #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 103 #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
104 104
105 /* File was opened by fanotify and shouldn't generate fanotify events */ 105 /* File was opened by fanotify and shouldn't generate fanotify events */
106 #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 106 #define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
107 107
108 /* 108 /*
109 * The below are the various read and write types that we support. Some of 109 * The below are the various read and write types that we support. Some of
110 * them include behavioral modifiers that send information down to the 110 * them include behavioral modifiers that send information down to the
111 * block layer and IO scheduler. Terminology: 111 * block layer and IO scheduler. Terminology:
112 * 112 *
113 * The block layer uses device plugging to defer IO a little bit, in 113 * The block layer uses device plugging to defer IO a little bit, in
114 * the hope that we will see more IO very shortly. This increases 114 * the hope that we will see more IO very shortly. This increases
115 * coalescing of adjacent IO and thus reduces the number of IOs we 115 * coalescing of adjacent IO and thus reduces the number of IOs we
116 * have to send to the device. It also allows for better queuing, 116 * have to send to the device. It also allows for better queuing,
117 * if the IO isn't mergeable. If the caller is going to be waiting 117 * if the IO isn't mergeable. If the caller is going to be waiting
118 * for the IO, then he must ensure that the device is unplugged so 118 * for the IO, then he must ensure that the device is unplugged so
119 * that the IO is dispatched to the driver. 119 * that the IO is dispatched to the driver.
120 * 120 *
121 * All IO is handled async in Linux. This is fine for background 121 * All IO is handled async in Linux. This is fine for background
122 * writes, but for reads or writes that someone waits for completion 122 * writes, but for reads or writes that someone waits for completion
123 * on, we want to notify the block layer and IO scheduler so that they 123 * on, we want to notify the block layer and IO scheduler so that they
124 * know about it. That allows them to make better scheduling 124 * know about it. That allows them to make better scheduling
125 * decisions. So when the below references 'sync' and 'async', it 125 * decisions. So when the below references 'sync' and 'async', it
126 * is referencing this priority hint. 126 * is referencing this priority hint.
127 * 127 *
128 * With that in mind, the available types are: 128 * With that in mind, the available types are:
129 * 129 *
130 * READ A normal read operation. Device will be plugged. 130 * READ A normal read operation. Device will be plugged.
131 * READ_SYNC A synchronous read. Device is not plugged, caller can 131 * READ_SYNC A synchronous read. Device is not plugged, caller can
132 * immediately wait on this read without caring about 132 * immediately wait on this read without caring about
133 * unplugging. 133 * unplugging.
134 * READA Used for read-ahead operations. Lower priority, and the 134 * READA Used for read-ahead operations. Lower priority, and the
135 * block layer could (in theory) choose to ignore this 135 * block layer could (in theory) choose to ignore this
136 * request if it runs into resource problems. 136 * request if it runs into resource problems.
137 * WRITE A normal async write. Device will be plugged. 137 * WRITE A normal async write. Device will be plugged.
138 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down 138 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
139 * the hint that someone will be waiting on this IO 139 * the hint that someone will be waiting on this IO
140 * shortly. The device must still be unplugged explicitly, 140 * shortly. The device must still be unplugged explicitly,
141 * WRITE_SYNC_PLUG does not do this as we could be 141 * WRITE_SYNC_PLUG does not do this as we could be
142 * submitting more writes before we actually wait on any 142 * submitting more writes before we actually wait on any
143 * of them. 143 * of them.
144 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device 144 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
145 * immediately after submission. The write equivalent 145 * immediately after submission. The write equivalent
146 * of READ_SYNC. 146 * of READ_SYNC.
147 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. 147 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
148 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. 148 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
149 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on 149 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
150 * non-volatile media on completion. 150 * non-volatile media on completion.
151 * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded 151 * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
152 * by a cache flush and data is guaranteed to be on 152 * by a cache flush and data is guaranteed to be on
153 * non-volatile media on completion. 153 * non-volatile media on completion.
154 * 154 *
155 */ 155 */
156 #define RW_MASK REQ_WRITE 156 #define RW_MASK REQ_WRITE
157 #define RWA_MASK REQ_RAHEAD 157 #define RWA_MASK REQ_RAHEAD
158 158
159 #define READ 0 159 #define READ 0
160 #define WRITE RW_MASK 160 #define WRITE RW_MASK
161 #define READA RWA_MASK 161 #define READA RWA_MASK
162 162
163 #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) 163 #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG)
164 #define READ_META (READ | REQ_META) 164 #define READ_META (READ | REQ_META)
165 #define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) 165 #define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE)
166 #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) 166 #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
167 #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) 167 #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC)
168 #define WRITE_META (WRITE | REQ_META) 168 #define WRITE_META (WRITE | REQ_META)
169 #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 169 #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
170 REQ_FLUSH) 170 REQ_FLUSH)
171 #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 171 #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
172 REQ_FUA) 172 REQ_FUA)
173 #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 173 #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
174 REQ_FLUSH | REQ_FUA) 174 REQ_FLUSH | REQ_FUA)
175 175
176 #define SEL_IN 1 176 #define SEL_IN 1
177 #define SEL_OUT 2 177 #define SEL_OUT 2
178 #define SEL_EX 4 178 #define SEL_EX 4
179 179
180 /* public flags for file_system_type */ 180 /* public flags for file_system_type */
181 #define FS_REQUIRES_DEV 1 181 #define FS_REQUIRES_DEV 1
182 #define FS_BINARY_MOUNTDATA 2 182 #define FS_BINARY_MOUNTDATA 2
183 #define FS_HAS_SUBTYPE 4 183 #define FS_HAS_SUBTYPE 4
184 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 184 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
185 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() 185 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
186 * during rename() internally. 186 * during rename() internally.
187 */ 187 */
188 188
189 /* 189 /*
190 * These are the fs-independent mount-flags: up to 32 flags are supported 190 * These are the fs-independent mount-flags: up to 32 flags are supported
191 */ 191 */
192 #define MS_RDONLY 1 /* Mount read-only */ 192 #define MS_RDONLY 1 /* Mount read-only */
193 #define MS_NOSUID 2 /* Ignore suid and sgid bits */ 193 #define MS_NOSUID 2 /* Ignore suid and sgid bits */
194 #define MS_NODEV 4 /* Disallow access to device special files */ 194 #define MS_NODEV 4 /* Disallow access to device special files */
195 #define MS_NOEXEC 8 /* Disallow program execution */ 195 #define MS_NOEXEC 8 /* Disallow program execution */
196 #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ 196 #define MS_SYNCHRONOUS 16 /* Writes are synced at once */
197 #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ 197 #define MS_REMOUNT 32 /* Alter flags of a mounted FS */
198 #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ 198 #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
199 #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ 199 #define MS_DIRSYNC 128 /* Directory modifications are synchronous */
200 #define MS_NOATIME 1024 /* Do not update access times. */ 200 #define MS_NOATIME 1024 /* Do not update access times. */
201 #define MS_NODIRATIME 2048 /* Do not update directory access times */ 201 #define MS_NODIRATIME 2048 /* Do not update directory access times */
202 #define MS_BIND 4096 202 #define MS_BIND 4096
203 #define MS_MOVE 8192 203 #define MS_MOVE 8192
204 #define MS_REC 16384 204 #define MS_REC 16384
205 #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. 205 #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
206 MS_VERBOSE is deprecated. */ 206 MS_VERBOSE is deprecated. */
207 #define MS_SILENT 32768 207 #define MS_SILENT 32768
208 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ 208 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
209 #define MS_UNBINDABLE (1<<17) /* change to unbindable */ 209 #define MS_UNBINDABLE (1<<17) /* change to unbindable */
210 #define MS_PRIVATE (1<<18) /* change to private */ 210 #define MS_PRIVATE (1<<18) /* change to private */
211 #define MS_SLAVE (1<<19) /* change to slave */ 211 #define MS_SLAVE (1<<19) /* change to slave */
212 #define MS_SHARED (1<<20) /* change to shared */ 212 #define MS_SHARED (1<<20) /* change to shared */
213 #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ 213 #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
214 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ 214 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
215 #define MS_I_VERSION (1<<23) /* Update inode I_version field */ 215 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
216 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ 216 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
217 #define MS_BORN (1<<29) 217 #define MS_BORN (1<<29)
218 #define MS_ACTIVE (1<<30) 218 #define MS_ACTIVE (1<<30)
219 #define MS_NOUSER (1<<31) 219 #define MS_NOUSER (1<<31)
220 220
221 /* 221 /*
222 * Superblock flags that can be altered by MS_REMOUNT 222 * Superblock flags that can be altered by MS_REMOUNT
223 */ 223 */
224 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) 224 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
225 225
226 /* 226 /*
227 * Old magic mount flag and mask 227 * Old magic mount flag and mask
228 */ 228 */
229 #define MS_MGC_VAL 0xC0ED0000 229 #define MS_MGC_VAL 0xC0ED0000
230 #define MS_MGC_MSK 0xffff0000 230 #define MS_MGC_MSK 0xffff0000
231 231
232 /* Inode flags - they have nothing to superblock flags now */ 232 /* Inode flags - they have nothing to superblock flags now */
233 233
234 #define S_SYNC 1 /* Writes are synced at once */ 234 #define S_SYNC 1 /* Writes are synced at once */
235 #define S_NOATIME 2 /* Do not update access times */ 235 #define S_NOATIME 2 /* Do not update access times */
236 #define S_APPEND 4 /* Append-only file */ 236 #define S_APPEND 4 /* Append-only file */
237 #define S_IMMUTABLE 8 /* Immutable file */ 237 #define S_IMMUTABLE 8 /* Immutable file */
238 #define S_DEAD 16 /* removed, but still open directory */ 238 #define S_DEAD 16 /* removed, but still open directory */
239 #define S_NOQUOTA 32 /* Inode is not counted to quota */ 239 #define S_NOQUOTA 32 /* Inode is not counted to quota */
240 #define S_DIRSYNC 64 /* Directory modifications are synchronous */ 240 #define S_DIRSYNC 64 /* Directory modifications are synchronous */
241 #define S_NOCMTIME 128 /* Do not update file c/mtime */ 241 #define S_NOCMTIME 128 /* Do not update file c/mtime */
242 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ 242 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
243 #define S_PRIVATE 512 /* Inode is fs-internal */ 243 #define S_PRIVATE 512 /* Inode is fs-internal */
244 #define S_IMA 1024 /* Inode has an associated IMA struct */ 244 #define S_IMA 1024 /* Inode has an associated IMA struct */
245 245
246 /* 246 /*
247 * Note that nosuid etc flags are inode-specific: setting some file-system 247 * Note that nosuid etc flags are inode-specific: setting some file-system
248 * flags just means all the inodes inherit those flags by default. It might be 248 * flags just means all the inodes inherit those flags by default. It might be
249 * possible to override it selectively if you really wanted to with some 249 * possible to override it selectively if you really wanted to with some
250 * ioctl() that is not currently implemented. 250 * ioctl() that is not currently implemented.
251 * 251 *
252 * Exception: MS_RDONLY is always applied to the entire file system. 252 * Exception: MS_RDONLY is always applied to the entire file system.
253 * 253 *
254 * Unfortunately, it is possible to change a filesystems flags with it mounted 254 * Unfortunately, it is possible to change a filesystems flags with it mounted
255 * with files in use. This means that all of the inodes will not have their 255 * with files in use. This means that all of the inodes will not have their
256 * i_flags updated. Hence, i_flags no longer inherit the superblock mount 256 * i_flags updated. Hence, i_flags no longer inherit the superblock mount
257 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org 257 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
258 */ 258 */
259 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) 259 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))
260 260
261 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) 261 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
262 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ 262 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \
263 ((inode)->i_flags & S_SYNC)) 263 ((inode)->i_flags & S_SYNC))
264 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ 264 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
265 ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) 265 ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
266 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) 266 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
267 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) 267 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
268 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) 268 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
269 269
270 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) 270 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
271 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) 271 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
272 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) 272 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
273 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) 273 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
274 274
275 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) 275 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
276 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) 276 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
277 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) 277 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
278 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) 278 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
279 #define IS_IMA(inode) ((inode)->i_flags & S_IMA) 279 #define IS_IMA(inode) ((inode)->i_flags & S_IMA)
280 280
281 /* the read-only stuff doesn't really belong here, but any other place is 281 /* the read-only stuff doesn't really belong here, but any other place is
282 probably as bad and I don't want to create yet another include file. */ 282 probably as bad and I don't want to create yet another include file. */
283 283
284 #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ 284 #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
285 #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ 285 #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
286 #define BLKRRPART _IO(0x12,95) /* re-read partition table */ 286 #define BLKRRPART _IO(0x12,95) /* re-read partition table */
287 #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ 287 #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
288 #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ 288 #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
289 #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ 289 #define BLKRASET _IO(0x12,98) /* set read ahead for block device */
290 #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ 290 #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */
291 #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ 291 #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
292 #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ 292 #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
293 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ 293 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
294 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ 294 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
295 #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ 295 #define BLKSSZGET _IO(0x12,104)/* get block device sector size */
296 #if 0 296 #if 0
297 #define BLKPG _IO(0x12,105)/* See blkpg.h */ 297 #define BLKPG _IO(0x12,105)/* See blkpg.h */
298 298
299 /* Some people are morons. Do not use sizeof! */ 299 /* Some people are morons. Do not use sizeof! */
300 300
301 #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ 301 #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */
302 #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ 302 #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */
303 /* This was here just to show that the number is taken - 303 /* This was here just to show that the number is taken -
304 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ 304 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
305 #endif 305 #endif
306 /* A jump here: 108-111 have been used for various private purposes. */ 306 /* A jump here: 108-111 have been used for various private purposes. */
307 #define BLKBSZGET _IOR(0x12,112,size_t) 307 #define BLKBSZGET _IOR(0x12,112,size_t)
308 #define BLKBSZSET _IOW(0x12,113,size_t) 308 #define BLKBSZSET _IOW(0x12,113,size_t)
309 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ 309 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
310 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) 310 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
311 #define BLKTRACESTART _IO(0x12,116) 311 #define BLKTRACESTART _IO(0x12,116)
312 #define BLKTRACESTOP _IO(0x12,117) 312 #define BLKTRACESTOP _IO(0x12,117)
313 #define BLKTRACETEARDOWN _IO(0x12,118) 313 #define BLKTRACETEARDOWN _IO(0x12,118)
314 #define BLKDISCARD _IO(0x12,119) 314 #define BLKDISCARD _IO(0x12,119)
315 #define BLKIOMIN _IO(0x12,120) 315 #define BLKIOMIN _IO(0x12,120)
316 #define BLKIOOPT _IO(0x12,121) 316 #define BLKIOOPT _IO(0x12,121)
317 #define BLKALIGNOFF _IO(0x12,122) 317 #define BLKALIGNOFF _IO(0x12,122)
318 #define BLKPBSZGET _IO(0x12,123) 318 #define BLKPBSZGET _IO(0x12,123)
319 #define BLKDISCARDZEROES _IO(0x12,124) 319 #define BLKDISCARDZEROES _IO(0x12,124)
320 #define BLKSECDISCARD _IO(0x12,125) 320 #define BLKSECDISCARD _IO(0x12,125)
321 321
322 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 322 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
323 #define FIBMAP _IO(0x00,1) /* bmap access */ 323 #define FIBMAP _IO(0x00,1) /* bmap access */
324 #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ 324 #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
325 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 325 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */
326 #define FITHAW _IOWR('X', 120, int) /* Thaw */ 326 #define FITHAW _IOWR('X', 120, int) /* Thaw */
327 #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ 327 #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
328 328
329 #define FS_IOC_GETFLAGS _IOR('f', 1, long) 329 #define FS_IOC_GETFLAGS _IOR('f', 1, long)
330 #define FS_IOC_SETFLAGS _IOW('f', 2, long) 330 #define FS_IOC_SETFLAGS _IOW('f', 2, long)
331 #define FS_IOC_GETVERSION _IOR('v', 1, long) 331 #define FS_IOC_GETVERSION _IOR('v', 1, long)
332 #define FS_IOC_SETVERSION _IOW('v', 2, long) 332 #define FS_IOC_SETVERSION _IOW('v', 2, long)
333 #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) 333 #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
334 #define FS_IOC32_GETFLAGS _IOR('f', 1, int) 334 #define FS_IOC32_GETFLAGS _IOR('f', 1, int)
335 #define FS_IOC32_SETFLAGS _IOW('f', 2, int) 335 #define FS_IOC32_SETFLAGS _IOW('f', 2, int)
336 #define FS_IOC32_GETVERSION _IOR('v', 1, int) 336 #define FS_IOC32_GETVERSION _IOR('v', 1, int)
337 #define FS_IOC32_SETVERSION _IOW('v', 2, int) 337 #define FS_IOC32_SETVERSION _IOW('v', 2, int)
338 338
339 /* 339 /*
340 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) 340 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
341 */ 341 */
342 #define FS_SECRM_FL 0x00000001 /* Secure deletion */ 342 #define FS_SECRM_FL 0x00000001 /* Secure deletion */
343 #define FS_UNRM_FL 0x00000002 /* Undelete */ 343 #define FS_UNRM_FL 0x00000002 /* Undelete */
344 #define FS_COMPR_FL 0x00000004 /* Compress file */ 344 #define FS_COMPR_FL 0x00000004 /* Compress file */
345 #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ 345 #define FS_SYNC_FL 0x00000008 /* Synchronous updates */
346 #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ 346 #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
347 #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ 347 #define FS_APPEND_FL 0x00000020 /* writes to file may only append */
348 #define FS_NODUMP_FL 0x00000040 /* do not dump file */ 348 #define FS_NODUMP_FL 0x00000040 /* do not dump file */
349 #define FS_NOATIME_FL 0x00000080 /* do not update atime */ 349 #define FS_NOATIME_FL 0x00000080 /* do not update atime */
350 /* Reserved for compression usage... */ 350 /* Reserved for compression usage... */
351 #define FS_DIRTY_FL 0x00000100 351 #define FS_DIRTY_FL 0x00000100
352 #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ 352 #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
353 #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ 353 #define FS_NOCOMP_FL 0x00000400 /* Don't compress */
354 #define FS_ECOMPR_FL 0x00000800 /* Compression error */ 354 #define FS_ECOMPR_FL 0x00000800 /* Compression error */
355 /* End compression flags --- maybe not all used */ 355 /* End compression flags --- maybe not all used */
356 #define FS_BTREE_FL 0x00001000 /* btree format dir */ 356 #define FS_BTREE_FL 0x00001000 /* btree format dir */
357 #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ 357 #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
358 #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ 358 #define FS_IMAGIC_FL 0x00002000 /* AFS directory */
359 #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ 359 #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
360 #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ 360 #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
361 #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ 361 #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
362 #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 362 #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
363 #define FS_EXTENT_FL 0x00080000 /* Extents */ 363 #define FS_EXTENT_FL 0x00080000 /* Extents */
364 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ 364 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
365 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ 365 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
366 366
367 #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ 367 #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
368 #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 368 #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
369 369
370 370
371 #define SYNC_FILE_RANGE_WAIT_BEFORE 1 371 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
372 #define SYNC_FILE_RANGE_WRITE 2 372 #define SYNC_FILE_RANGE_WRITE 2
373 #define SYNC_FILE_RANGE_WAIT_AFTER 4 373 #define SYNC_FILE_RANGE_WAIT_AFTER 4
374 374
375 #ifdef __KERNEL__ 375 #ifdef __KERNEL__
376 376
377 #include <linux/linkage.h> 377 #include <linux/linkage.h>
378 #include <linux/wait.h> 378 #include <linux/wait.h>
379 #include <linux/types.h> 379 #include <linux/types.h>
380 #include <linux/kdev_t.h> 380 #include <linux/kdev_t.h>
381 #include <linux/dcache.h> 381 #include <linux/dcache.h>
382 #include <linux/path.h> 382 #include <linux/path.h>
383 #include <linux/stat.h> 383 #include <linux/stat.h>
384 #include <linux/cache.h> 384 #include <linux/cache.h>
385 #include <linux/kobject.h> 385 #include <linux/kobject.h>
386 #include <linux/list.h> 386 #include <linux/list.h>
387 #include <linux/radix-tree.h> 387 #include <linux/radix-tree.h>
388 #include <linux/prio_tree.h> 388 #include <linux/prio_tree.h>
389 #include <linux/init.h> 389 #include <linux/init.h>
390 #include <linux/pid.h> 390 #include <linux/pid.h>
391 #include <linux/mutex.h> 391 #include <linux/mutex.h>
392 #include <linux/capability.h> 392 #include <linux/capability.h>
393 #include <linux/semaphore.h> 393 #include <linux/semaphore.h>
394 #include <linux/fiemap.h> 394 #include <linux/fiemap.h>
395 395
396 #include <asm/atomic.h> 396 #include <asm/atomic.h>
397 #include <asm/byteorder.h> 397 #include <asm/byteorder.h>
398 398
399 struct export_operations; 399 struct export_operations;
400 struct hd_geometry; 400 struct hd_geometry;
401 struct iovec; 401 struct iovec;
402 struct nameidata; 402 struct nameidata;
403 struct kiocb; 403 struct kiocb;
404 struct pipe_inode_info; 404 struct pipe_inode_info;
405 struct poll_table_struct; 405 struct poll_table_struct;
406 struct kstatfs; 406 struct kstatfs;
407 struct vm_area_struct; 407 struct vm_area_struct;
408 struct vfsmount; 408 struct vfsmount;
409 struct cred; 409 struct cred;
410 410
411 extern void __init inode_init(void); 411 extern void __init inode_init(void);
412 extern void __init inode_init_early(void); 412 extern void __init inode_init_early(void);
413 extern void __init files_init(unsigned long); 413 extern void __init files_init(unsigned long);
414 414
415 extern struct files_stat_struct files_stat; 415 extern struct files_stat_struct files_stat;
416 extern unsigned long get_max_files(void); 416 extern unsigned long get_max_files(void);
417 extern int sysctl_nr_open; 417 extern int sysctl_nr_open;
418 extern struct inodes_stat_t inodes_stat; 418 extern struct inodes_stat_t inodes_stat;
419 extern int leases_enable, lease_break_time; 419 extern int leases_enable, lease_break_time;
420 420
421 struct buffer_head; 421 struct buffer_head;
422 typedef int (get_block_t)(struct inode *inode, sector_t iblock, 422 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
423 struct buffer_head *bh_result, int create); 423 struct buffer_head *bh_result, int create);
424 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, 424 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
425 ssize_t bytes, void *private, int ret, 425 ssize_t bytes, void *private, int ret,
426 bool is_async); 426 bool is_async);
427 427
428 /* 428 /*
429 * Attribute flags. These should be or-ed together to figure out what 429 * Attribute flags. These should be or-ed together to figure out what
430 * has been changed! 430 * has been changed!
431 */ 431 */
432 #define ATTR_MODE (1 << 0) 432 #define ATTR_MODE (1 << 0)
433 #define ATTR_UID (1 << 1) 433 #define ATTR_UID (1 << 1)
434 #define ATTR_GID (1 << 2) 434 #define ATTR_GID (1 << 2)
435 #define ATTR_SIZE (1 << 3) 435 #define ATTR_SIZE (1 << 3)
436 #define ATTR_ATIME (1 << 4) 436 #define ATTR_ATIME (1 << 4)
437 #define ATTR_MTIME (1 << 5) 437 #define ATTR_MTIME (1 << 5)
438 #define ATTR_CTIME (1 << 6) 438 #define ATTR_CTIME (1 << 6)
439 #define ATTR_ATIME_SET (1 << 7) 439 #define ATTR_ATIME_SET (1 << 7)
440 #define ATTR_MTIME_SET (1 << 8) 440 #define ATTR_MTIME_SET (1 << 8)
441 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ 441 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
442 #define ATTR_ATTR_FLAG (1 << 10) 442 #define ATTR_ATTR_FLAG (1 << 10)
443 #define ATTR_KILL_SUID (1 << 11) 443 #define ATTR_KILL_SUID (1 << 11)
444 #define ATTR_KILL_SGID (1 << 12) 444 #define ATTR_KILL_SGID (1 << 12)
445 #define ATTR_FILE (1 << 13) 445 #define ATTR_FILE (1 << 13)
446 #define ATTR_KILL_PRIV (1 << 14) 446 #define ATTR_KILL_PRIV (1 << 14)
447 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ 447 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
448 #define ATTR_TIMES_SET (1 << 16) 448 #define ATTR_TIMES_SET (1 << 16)
449 449
450 /* 450 /*
451 * This is the Inode Attributes structure, used for notify_change(). It 451 * This is the Inode Attributes structure, used for notify_change(). It
452 * uses the above definitions as flags, to know which values have changed. 452 * uses the above definitions as flags, to know which values have changed.
453 * Also, in this manner, a Filesystem can look at only the values it cares 453 * Also, in this manner, a Filesystem can look at only the values it cares
454 * about. Basically, these are the attributes that the VFS layer can 454 * about. Basically, these are the attributes that the VFS layer can
455 * request to change from the FS layer. 455 * request to change from the FS layer.
456 * 456 *
457 * Derek Atkins <warlord@MIT.EDU> 94-10-20 457 * Derek Atkins <warlord@MIT.EDU> 94-10-20
458 */ 458 */
459 struct iattr { 459 struct iattr {
460 unsigned int ia_valid; 460 unsigned int ia_valid;
461 umode_t ia_mode; 461 umode_t ia_mode;
462 uid_t ia_uid; 462 uid_t ia_uid;
463 gid_t ia_gid; 463 gid_t ia_gid;
464 loff_t ia_size; 464 loff_t ia_size;
465 struct timespec ia_atime; 465 struct timespec ia_atime;
466 struct timespec ia_mtime; 466 struct timespec ia_mtime;
467 struct timespec ia_ctime; 467 struct timespec ia_ctime;
468 468
469 /* 469 /*
470 * Not an attribute, but an auxilary info for filesystems wanting to 470 * Not an attribute, but an auxilary info for filesystems wanting to
471 * implement an ftruncate() like method. NOTE: filesystem should 471 * implement an ftruncate() like method. NOTE: filesystem should
472 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). 472 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
473 */ 473 */
474 struct file *ia_file; 474 struct file *ia_file;
475 }; 475 };
476 476
477 /* 477 /*
478 * Includes for diskquotas. 478 * Includes for diskquotas.
479 */ 479 */
480 #include <linux/quota.h> 480 #include <linux/quota.h>
481 481
482 /** 482 /**
483 * enum positive_aop_returns - aop return codes with specific semantics 483 * enum positive_aop_returns - aop return codes with specific semantics
484 * 484 *
485 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 485 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
486 * completed, that the page is still locked, and 486 * completed, that the page is still locked, and
487 * should be considered active. The VM uses this hint 487 * should be considered active. The VM uses this hint
488 * to return the page to the active list -- it won't 488 * to return the page to the active list -- it won't
489 * be a candidate for writeback again in the near 489 * be a candidate for writeback again in the near
490 * future. Other callers must be careful to unlock 490 * future. Other callers must be careful to unlock
491 * the page if they get this return. Returned by 491 * the page if they get this return. Returned by
492 * writepage(); 492 * writepage();
493 * 493 *
494 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 494 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
495 * unlocked it and the page might have been truncated. 495 * unlocked it and the page might have been truncated.
496 * The caller should back up to acquiring a new page and 496 * The caller should back up to acquiring a new page and
497 * trying again. The aop will be taking reasonable 497 * trying again. The aop will be taking reasonable
498 * precautions not to livelock. If the caller held a page 498 * precautions not to livelock. If the caller held a page
499 * reference, it should drop it before retrying. Returned 499 * reference, it should drop it before retrying. Returned
500 * by readpage(). 500 * by readpage().
501 * 501 *
502 * address_space_operation functions return these large constants to indicate 502 * address_space_operation functions return these large constants to indicate
503 * special semantics to the caller. These are much larger than the bytes in a 503 * special semantics to the caller. These are much larger than the bytes in a
504 * page to allow for functions that return the number of bytes operated on in a 504 * page to allow for functions that return the number of bytes operated on in a
505 * given page. 505 * given page.
506 */ 506 */
507 507
508 enum positive_aop_returns { 508 enum positive_aop_returns {
509 AOP_WRITEPAGE_ACTIVATE = 0x80000, 509 AOP_WRITEPAGE_ACTIVATE = 0x80000,
510 AOP_TRUNCATED_PAGE = 0x80001, 510 AOP_TRUNCATED_PAGE = 0x80001,
511 }; 511 };
512 512
513 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ 513 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
514 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ 514 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
515 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct 515 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
516 * helper code (eg buffer layer) 516 * helper code (eg buffer layer)
517 * to clear GFP_FS from alloc */ 517 * to clear GFP_FS from alloc */
518 518
519 /* 519 /*
520 * oh the beauties of C type declarations. 520 * oh the beauties of C type declarations.
521 */ 521 */
522 struct page; 522 struct page;
523 struct address_space; 523 struct address_space;
524 struct writeback_control; 524 struct writeback_control;
525 525
526 struct iov_iter { 526 struct iov_iter {
527 const struct iovec *iov; 527 const struct iovec *iov;
528 unsigned long nr_segs; 528 unsigned long nr_segs;
529 size_t iov_offset; 529 size_t iov_offset;
530 size_t count; 530 size_t count;
531 }; 531 };
532 532
533 size_t iov_iter_copy_from_user_atomic(struct page *page, 533 size_t iov_iter_copy_from_user_atomic(struct page *page,
534 struct iov_iter *i, unsigned long offset, size_t bytes); 534 struct iov_iter *i, unsigned long offset, size_t bytes);
535 size_t iov_iter_copy_from_user(struct page *page, 535 size_t iov_iter_copy_from_user(struct page *page,
536 struct iov_iter *i, unsigned long offset, size_t bytes); 536 struct iov_iter *i, unsigned long offset, size_t bytes);
537 void iov_iter_advance(struct iov_iter *i, size_t bytes); 537 void iov_iter_advance(struct iov_iter *i, size_t bytes);
538 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); 538 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
539 size_t iov_iter_single_seg_count(struct iov_iter *i); 539 size_t iov_iter_single_seg_count(struct iov_iter *i);
540 540
541 static inline void iov_iter_init(struct iov_iter *i, 541 static inline void iov_iter_init(struct iov_iter *i,
542 const struct iovec *iov, unsigned long nr_segs, 542 const struct iovec *iov, unsigned long nr_segs,
543 size_t count, size_t written) 543 size_t count, size_t written)
544 { 544 {
545 i->iov = iov; 545 i->iov = iov;
546 i->nr_segs = nr_segs; 546 i->nr_segs = nr_segs;
547 i->iov_offset = 0; 547 i->iov_offset = 0;
548 i->count = count + written; 548 i->count = count + written;
549 549
550 iov_iter_advance(i, written); 550 iov_iter_advance(i, written);
551 } 551 }
552 552
553 static inline size_t iov_iter_count(struct iov_iter *i) 553 static inline size_t iov_iter_count(struct iov_iter *i)
554 { 554 {
555 return i->count; 555 return i->count;
556 } 556 }
557 557
558 /* 558 /*
559 * "descriptor" for what we're up to with a read. 559 * "descriptor" for what we're up to with a read.
560 * This allows us to use the same read code yet 560 * This allows us to use the same read code yet
561 * have multiple different users of the data that 561 * have multiple different users of the data that
562 * we read from a file. 562 * we read from a file.
563 * 563 *
564 * The simplest case just copies the data to user 564 * The simplest case just copies the data to user
565 * mode. 565 * mode.
566 */ 566 */
567 typedef struct { 567 typedef struct {
568 size_t written; 568 size_t written;
569 size_t count; 569 size_t count;
570 union { 570 union {
571 char __user *buf; 571 char __user *buf;
572 void *data; 572 void *data;
573 } arg; 573 } arg;
574 int error; 574 int error;
575 } read_descriptor_t; 575 } read_descriptor_t;
576 576
577 typedef int (*read_actor_t)(read_descriptor_t *, struct page *, 577 typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
578 unsigned long, unsigned long); 578 unsigned long, unsigned long);
579 579
580 struct address_space_operations { 580 struct address_space_operations {
581 int (*writepage)(struct page *page, struct writeback_control *wbc); 581 int (*writepage)(struct page *page, struct writeback_control *wbc);
582 int (*readpage)(struct file *, struct page *); 582 int (*readpage)(struct file *, struct page *);
583 void (*sync_page)(struct page *); 583 void (*sync_page)(struct page *);
584 584
585 /* Write back some dirty pages from this mapping. */ 585 /* Write back some dirty pages from this mapping. */
586 int (*writepages)(struct address_space *, struct writeback_control *); 586 int (*writepages)(struct address_space *, struct writeback_control *);
587 587
588 /* Set a page dirty. Return true if this dirtied it */ 588 /* Set a page dirty. Return true if this dirtied it */
589 int (*set_page_dirty)(struct page *page); 589 int (*set_page_dirty)(struct page *page);
590 590
591 int (*readpages)(struct file *filp, struct address_space *mapping, 591 int (*readpages)(struct file *filp, struct address_space *mapping,
592 struct list_head *pages, unsigned nr_pages); 592 struct list_head *pages, unsigned nr_pages);
593 593
594 int (*write_begin)(struct file *, struct address_space *mapping, 594 int (*write_begin)(struct file *, struct address_space *mapping,
595 loff_t pos, unsigned len, unsigned flags, 595 loff_t pos, unsigned len, unsigned flags,
596 struct page **pagep, void **fsdata); 596 struct page **pagep, void **fsdata);
597 int (*write_end)(struct file *, struct address_space *mapping, 597 int (*write_end)(struct file *, struct address_space *mapping,
598 loff_t pos, unsigned len, unsigned copied, 598 loff_t pos, unsigned len, unsigned copied,
599 struct page *page, void *fsdata); 599 struct page *page, void *fsdata);
600 600
601 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ 601 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
602 sector_t (*bmap)(struct address_space *, sector_t); 602 sector_t (*bmap)(struct address_space *, sector_t);
603 void (*invalidatepage) (struct page *, unsigned long); 603 void (*invalidatepage) (struct page *, unsigned long);
604 int (*releasepage) (struct page *, gfp_t); 604 int (*releasepage) (struct page *, gfp_t);
605 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 605 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
606 loff_t offset, unsigned long nr_segs); 606 loff_t offset, unsigned long nr_segs);
607 int (*get_xip_mem)(struct address_space *, pgoff_t, int, 607 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
608 void **, unsigned long *); 608 void **, unsigned long *);
609 /* migrate the contents of a page to the specified target */ 609 /* migrate the contents of a page to the specified target */
610 int (*migratepage) (struct address_space *, 610 int (*migratepage) (struct address_space *,
611 struct page *, struct page *); 611 struct page *, struct page *);
612 int (*launder_page) (struct page *); 612 int (*launder_page) (struct page *);
613 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 613 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
614 unsigned long); 614 unsigned long);
615 int (*error_remove_page)(struct address_space *, struct page *); 615 int (*error_remove_page)(struct address_space *, struct page *);
616 }; 616 };
617 617
618 /* 618 /*
619 * pagecache_write_begin/pagecache_write_end must be used by general code 619 * pagecache_write_begin/pagecache_write_end must be used by general code
620 * to write into the pagecache. 620 * to write into the pagecache.
621 */ 621 */
622 int pagecache_write_begin(struct file *, struct address_space *mapping, 622 int pagecache_write_begin(struct file *, struct address_space *mapping,
623 loff_t pos, unsigned len, unsigned flags, 623 loff_t pos, unsigned len, unsigned flags,
624 struct page **pagep, void **fsdata); 624 struct page **pagep, void **fsdata);
625 625
626 int pagecache_write_end(struct file *, struct address_space *mapping, 626 int pagecache_write_end(struct file *, struct address_space *mapping,
627 loff_t pos, unsigned len, unsigned copied, 627 loff_t pos, unsigned len, unsigned copied,
628 struct page *page, void *fsdata); 628 struct page *page, void *fsdata);
629 629
630 struct backing_dev_info; 630 struct backing_dev_info;
631 struct address_space { 631 struct address_space {
632 struct inode *host; /* owner: inode, block_device */ 632 struct inode *host; /* owner: inode, block_device */
633 struct radix_tree_root page_tree; /* radix tree of all pages */ 633 struct radix_tree_root page_tree; /* radix tree of all pages */
634 spinlock_t tree_lock; /* and lock protecting it */ 634 spinlock_t tree_lock; /* and lock protecting it */
635 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 635 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
636 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 636 struct prio_tree_root i_mmap; /* tree of private and shared mappings */
637 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 637 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
638 spinlock_t i_mmap_lock; /* protect tree, count, list */ 638 spinlock_t i_mmap_lock; /* protect tree, count, list */
639 unsigned int truncate_count; /* Cover race condition with truncate */ 639 unsigned int truncate_count; /* Cover race condition with truncate */
640 unsigned long nrpages; /* number of total pages */ 640 unsigned long nrpages; /* number of total pages */
641 pgoff_t writeback_index;/* writeback starts here */ 641 pgoff_t writeback_index;/* writeback starts here */
642 const struct address_space_operations *a_ops; /* methods */ 642 const struct address_space_operations *a_ops; /* methods */
643 unsigned long flags; /* error bits/gfp mask */ 643 unsigned long flags; /* error bits/gfp mask */
644 struct backing_dev_info *backing_dev_info; /* device readahead, etc */ 644 struct backing_dev_info *backing_dev_info; /* device readahead, etc */
645 spinlock_t private_lock; /* for use by the address_space */ 645 spinlock_t private_lock; /* for use by the address_space */
646 struct list_head private_list; /* ditto */ 646 struct list_head private_list; /* ditto */
647 struct address_space *assoc_mapping; /* ditto */ 647 struct address_space *assoc_mapping; /* ditto */
648 } __attribute__((aligned(sizeof(long)))); 648 } __attribute__((aligned(sizeof(long))));
649 /* 649 /*
650 * On most architectures that alignment is already the case; but 650 * On most architectures that alignment is already the case; but
651 * must be enforced here for CRIS, to let the least signficant bit 651 * must be enforced here for CRIS, to let the least signficant bit
652 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. 652 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
653 */ 653 */
654 654
655 struct block_device { 655 struct block_device {
656 dev_t bd_dev; /* not a kdev_t - it's a search key */ 656 dev_t bd_dev; /* not a kdev_t - it's a search key */
657 struct inode * bd_inode; /* will die */ 657 struct inode * bd_inode; /* will die */
658 struct super_block * bd_super; 658 struct super_block * bd_super;
659 int bd_openers; 659 int bd_openers;
660 struct mutex bd_mutex; /* open/close mutex */ 660 struct mutex bd_mutex; /* open/close mutex */
661 struct list_head bd_inodes; 661 struct list_head bd_inodes;
662 void * bd_claiming; 662 void * bd_claiming;
663 void * bd_holder; 663 void * bd_holder;
664 int bd_holders; 664 int bd_holders;
665 #ifdef CONFIG_SYSFS 665 #ifdef CONFIG_SYSFS
666 struct list_head bd_holder_list; 666 struct list_head bd_holder_list;
667 #endif 667 #endif
668 struct block_device * bd_contains; 668 struct block_device * bd_contains;
669 unsigned bd_block_size; 669 unsigned bd_block_size;
670 struct hd_struct * bd_part; 670 struct hd_struct * bd_part;
671 /* number of times partitions within this device have been opened. */ 671 /* number of times partitions within this device have been opened. */
672 unsigned bd_part_count; 672 unsigned bd_part_count;
673 int bd_invalidated; 673 int bd_invalidated;
674 struct gendisk * bd_disk; 674 struct gendisk * bd_disk;
675 struct list_head bd_list; 675 struct list_head bd_list;
676 /* 676 /*
677 * Private data. You must have bd_claim'ed the block_device 677 * Private data. You must have bd_claim'ed the block_device
678 * to use this. NOTE: bd_claim allows an owner to claim 678 * to use this. NOTE: bd_claim allows an owner to claim
679 * the same device multiple times, the owner must take special 679 * the same device multiple times, the owner must take special
680 * care to not mess up bd_private for that case. 680 * care to not mess up bd_private for that case.
681 */ 681 */
682 unsigned long bd_private; 682 unsigned long bd_private;
683 683
684 /* The counter of freeze processes */ 684 /* The counter of freeze processes */
685 int bd_fsfreeze_count; 685 int bd_fsfreeze_count;
686 /* Mutex for freeze */ 686 /* Mutex for freeze */
687 struct mutex bd_fsfreeze_mutex; 687 struct mutex bd_fsfreeze_mutex;
688 }; 688 };
689 689
690 /* 690 /*
691 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache 691 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
692 * radix trees 692 * radix trees
693 */ 693 */
694 #define PAGECACHE_TAG_DIRTY 0 694 #define PAGECACHE_TAG_DIRTY 0
695 #define PAGECACHE_TAG_WRITEBACK 1 695 #define PAGECACHE_TAG_WRITEBACK 1
696 #define PAGECACHE_TAG_TOWRITE 2 696 #define PAGECACHE_TAG_TOWRITE 2
697 697
698 int mapping_tagged(struct address_space *mapping, int tag); 698 int mapping_tagged(struct address_space *mapping, int tag);
699 699
700 /* 700 /*
701 * Might pages of this file be mapped into userspace? 701 * Might pages of this file be mapped into userspace?
702 */ 702 */
703 static inline int mapping_mapped(struct address_space *mapping) 703 static inline int mapping_mapped(struct address_space *mapping)
704 { 704 {
705 return !prio_tree_empty(&mapping->i_mmap) || 705 return !prio_tree_empty(&mapping->i_mmap) ||
706 !list_empty(&mapping->i_mmap_nonlinear); 706 !list_empty(&mapping->i_mmap_nonlinear);
707 } 707 }
708 708
709 /* 709 /*
710 * Might pages of this file have been modified in userspace? 710 * Might pages of this file have been modified in userspace?
711 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff 711 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
712 * marks vma as VM_SHARED if it is shared, and the file was opened for 712 * marks vma as VM_SHARED if it is shared, and the file was opened for
713 * writing i.e. vma may be mprotected writable even if now readonly. 713 * writing i.e. vma may be mprotected writable even if now readonly.
714 */ 714 */
715 static inline int mapping_writably_mapped(struct address_space *mapping) 715 static inline int mapping_writably_mapped(struct address_space *mapping)
716 { 716 {
717 return mapping->i_mmap_writable != 0; 717 return mapping->i_mmap_writable != 0;
718 } 718 }
719 719
720 /* 720 /*
721 * Use sequence counter to get consistent i_size on 32-bit processors. 721 * Use sequence counter to get consistent i_size on 32-bit processors.
722 */ 722 */
723 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 723 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
724 #include <linux/seqlock.h> 724 #include <linux/seqlock.h>
725 #define __NEED_I_SIZE_ORDERED 725 #define __NEED_I_SIZE_ORDERED
726 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) 726 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
727 #else 727 #else
728 #define i_size_ordered_init(inode) do { } while (0) 728 #define i_size_ordered_init(inode) do { } while (0)
729 #endif 729 #endif
730 730
731 struct posix_acl; 731 struct posix_acl;
732 #define ACL_NOT_CACHED ((void *)(-1)) 732 #define ACL_NOT_CACHED ((void *)(-1))
733 733
734 struct inode { 734 struct inode {
735 struct hlist_node i_hash; 735 struct hlist_node i_hash;
736 struct list_head i_wb_list; /* backing dev IO list */ 736 struct list_head i_wb_list; /* backing dev IO list */
737 struct list_head i_lru; /* inode LRU list */ 737 struct list_head i_lru; /* inode LRU list */
738 struct list_head i_sb_list; 738 struct list_head i_sb_list;
739 struct list_head i_dentry; 739 struct list_head i_dentry;
740 unsigned long i_ino; 740 unsigned long i_ino;
741 atomic_t i_count; 741 atomic_t i_count;
742 unsigned int i_nlink; 742 unsigned int i_nlink;
743 uid_t i_uid; 743 uid_t i_uid;
744 gid_t i_gid; 744 gid_t i_gid;
745 dev_t i_rdev; 745 dev_t i_rdev;
746 unsigned int i_blkbits; 746 unsigned int i_blkbits;
747 u64 i_version; 747 u64 i_version;
748 loff_t i_size; 748 loff_t i_size;
749 #ifdef __NEED_I_SIZE_ORDERED 749 #ifdef __NEED_I_SIZE_ORDERED
750 seqcount_t i_size_seqcount; 750 seqcount_t i_size_seqcount;
751 #endif 751 #endif
752 struct timespec i_atime; 752 struct timespec i_atime;
753 struct timespec i_mtime; 753 struct timespec i_mtime;
754 struct timespec i_ctime; 754 struct timespec i_ctime;
755 blkcnt_t i_blocks; 755 blkcnt_t i_blocks;
756 unsigned short i_bytes; 756 unsigned short i_bytes;
757 umode_t i_mode; 757 umode_t i_mode;
758 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 758 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
759 struct mutex i_mutex; 759 struct mutex i_mutex;
760 struct rw_semaphore i_alloc_sem; 760 struct rw_semaphore i_alloc_sem;
761 const struct inode_operations *i_op; 761 const struct inode_operations *i_op;
762 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 762 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
763 struct super_block *i_sb; 763 struct super_block *i_sb;
764 struct file_lock *i_flock; 764 struct file_lock *i_flock;
765 struct address_space *i_mapping; 765 struct address_space *i_mapping;
766 struct address_space i_data; 766 struct address_space i_data;
767 #ifdef CONFIG_QUOTA 767 #ifdef CONFIG_QUOTA
768 struct dquot *i_dquot[MAXQUOTAS]; 768 struct dquot *i_dquot[MAXQUOTAS];
769 #endif 769 #endif
770 struct list_head i_devices; 770 struct list_head i_devices;
771 union { 771 union {
772 struct pipe_inode_info *i_pipe; 772 struct pipe_inode_info *i_pipe;
773 struct block_device *i_bdev; 773 struct block_device *i_bdev;
774 struct cdev *i_cdev; 774 struct cdev *i_cdev;
775 }; 775 };
776 776
777 __u32 i_generation; 777 __u32 i_generation;
778 778
779 #ifdef CONFIG_FSNOTIFY 779 #ifdef CONFIG_FSNOTIFY
780 __u32 i_fsnotify_mask; /* all events this inode cares about */ 780 __u32 i_fsnotify_mask; /* all events this inode cares about */
781 struct hlist_head i_fsnotify_marks; 781 struct hlist_head i_fsnotify_marks;
782 #endif 782 #endif
783 783
784 unsigned long i_state; 784 unsigned long i_state;
785 unsigned long dirtied_when; /* jiffies of first dirtying */ 785 unsigned long dirtied_when; /* jiffies of first dirtying */
786 786
787 unsigned int i_flags; 787 unsigned int i_flags;
788 788
789 #ifdef CONFIG_IMA 789 #ifdef CONFIG_IMA
790 /* protected by i_lock */ 790 /* protected by i_lock */
791 unsigned int i_readcount; /* struct files open RO */ 791 unsigned int i_readcount; /* struct files open RO */
792 #endif 792 #endif
793 atomic_t i_writecount; 793 atomic_t i_writecount;
794 #ifdef CONFIG_SECURITY 794 #ifdef CONFIG_SECURITY
795 void *i_security; 795 void *i_security;
796 #endif 796 #endif
797 #ifdef CONFIG_FS_POSIX_ACL 797 #ifdef CONFIG_FS_POSIX_ACL
798 struct posix_acl *i_acl; 798 struct posix_acl *i_acl;
799 struct posix_acl *i_default_acl; 799 struct posix_acl *i_default_acl;
800 #endif 800 #endif
801 void *i_private; /* fs or device private pointer */ 801 void *i_private; /* fs or device private pointer */
802 }; 802 };
803 803
804 static inline int inode_unhashed(struct inode *inode) 804 static inline int inode_unhashed(struct inode *inode)
805 { 805 {
806 return hlist_unhashed(&inode->i_hash); 806 return hlist_unhashed(&inode->i_hash);
807 } 807 }
808 808
809 /* 809 /*
810 * inode->i_mutex nesting subclasses for the lock validator: 810 * inode->i_mutex nesting subclasses for the lock validator:
811 * 811 *
812 * 0: the object of the current VFS operation 812 * 0: the object of the current VFS operation
813 * 1: parent 813 * 1: parent
814 * 2: child/target 814 * 2: child/target
815 * 3: quota file 815 * 3: quota file
816 * 816 *
817 * The locking order between these classes is 817 * The locking order between these classes is
818 * parent -> child -> normal -> xattr -> quota 818 * parent -> child -> normal -> xattr -> quota
819 */ 819 */
820 enum inode_i_mutex_lock_class 820 enum inode_i_mutex_lock_class
821 { 821 {
822 I_MUTEX_NORMAL, 822 I_MUTEX_NORMAL,
823 I_MUTEX_PARENT, 823 I_MUTEX_PARENT,
824 I_MUTEX_CHILD, 824 I_MUTEX_CHILD,
825 I_MUTEX_XATTR, 825 I_MUTEX_XATTR,
826 I_MUTEX_QUOTA 826 I_MUTEX_QUOTA
827 }; 827 };
828 828
829 /* 829 /*
830 * NOTE: in a 32bit arch with a preemptable kernel and 830 * NOTE: in a 32bit arch with a preemptable kernel and
831 * an UP compile the i_size_read/write must be atomic 831 * an UP compile the i_size_read/write must be atomic
832 * with respect to the local cpu (unlike with preempt disabled), 832 * with respect to the local cpu (unlike with preempt disabled),
833 * but they don't need to be atomic with respect to other cpus like in 833 * but they don't need to be atomic with respect to other cpus like in
834 * true SMP (so they need either to either locally disable irq around 834 * true SMP (so they need either to either locally disable irq around
835 * the read or for example on x86 they can be still implemented as a 835 * the read or for example on x86 they can be still implemented as a
836 * cmpxchg8b without the need of the lock prefix). For SMP compiles 836 * cmpxchg8b without the need of the lock prefix). For SMP compiles
837 * and 64bit archs it makes no difference if preempt is enabled or not. 837 * and 64bit archs it makes no difference if preempt is enabled or not.
838 */ 838 */
839 static inline loff_t i_size_read(const struct inode *inode) 839 static inline loff_t i_size_read(const struct inode *inode)
840 { 840 {
841 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 841 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
842 loff_t i_size; 842 loff_t i_size;
843 unsigned int seq; 843 unsigned int seq;
844 844
845 do { 845 do {
846 seq = read_seqcount_begin(&inode->i_size_seqcount); 846 seq = read_seqcount_begin(&inode->i_size_seqcount);
847 i_size = inode->i_size; 847 i_size = inode->i_size;
848 } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); 848 } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
849 return i_size; 849 return i_size;
850 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 850 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
851 loff_t i_size; 851 loff_t i_size;
852 852
853 preempt_disable(); 853 preempt_disable();
854 i_size = inode->i_size; 854 i_size = inode->i_size;
855 preempt_enable(); 855 preempt_enable();
856 return i_size; 856 return i_size;
857 #else 857 #else
858 return inode->i_size; 858 return inode->i_size;
859 #endif 859 #endif
860 } 860 }
861 861
862 /* 862 /*
863 * NOTE: unlike i_size_read(), i_size_write() does need locking around it 863 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
864 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount 864 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
865 * can be lost, resulting in subsequent i_size_read() calls spinning forever. 865 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
866 */ 866 */
867 static inline void i_size_write(struct inode *inode, loff_t i_size) 867 static inline void i_size_write(struct inode *inode, loff_t i_size)
868 { 868 {
869 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 869 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
870 write_seqcount_begin(&inode->i_size_seqcount); 870 write_seqcount_begin(&inode->i_size_seqcount);
871 inode->i_size = i_size; 871 inode->i_size = i_size;
872 write_seqcount_end(&inode->i_size_seqcount); 872 write_seqcount_end(&inode->i_size_seqcount);
873 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 873 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
874 preempt_disable(); 874 preempt_disable();
875 inode->i_size = i_size; 875 inode->i_size = i_size;
876 preempt_enable(); 876 preempt_enable();
877 #else 877 #else
878 inode->i_size = i_size; 878 inode->i_size = i_size;
879 #endif 879 #endif
880 } 880 }
881 881
882 static inline unsigned iminor(const struct inode *inode) 882 static inline unsigned iminor(const struct inode *inode)
883 { 883 {
884 return MINOR(inode->i_rdev); 884 return MINOR(inode->i_rdev);
885 } 885 }
886 886
887 static inline unsigned imajor(const struct inode *inode) 887 static inline unsigned imajor(const struct inode *inode)
888 { 888 {
889 return MAJOR(inode->i_rdev); 889 return MAJOR(inode->i_rdev);
890 } 890 }
891 891
892 extern struct block_device *I_BDEV(struct inode *inode); 892 extern struct block_device *I_BDEV(struct inode *inode);
893 893
894 struct fown_struct { 894 struct fown_struct {
895 rwlock_t lock; /* protects pid, uid, euid fields */ 895 rwlock_t lock; /* protects pid, uid, euid fields */
896 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ 896 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
897 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ 897 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
898 uid_t uid, euid; /* uid/euid of process setting the owner */ 898 uid_t uid, euid; /* uid/euid of process setting the owner */
899 int signum; /* posix.1b rt signal to be delivered on IO */ 899 int signum; /* posix.1b rt signal to be delivered on IO */
900 }; 900 };
901 901
902 /* 902 /*
903 * Track a single file's readahead state 903 * Track a single file's readahead state
904 */ 904 */
905 struct file_ra_state { 905 struct file_ra_state {
906 pgoff_t start; /* where readahead started */ 906 pgoff_t start; /* where readahead started */
907 unsigned int size; /* # of readahead pages */ 907 unsigned int size; /* # of readahead pages */
908 unsigned int async_size; /* do asynchronous readahead when 908 unsigned int async_size; /* do asynchronous readahead when
909 there are only # of pages ahead */ 909 there are only # of pages ahead */
910 910
911 unsigned int ra_pages; /* Maximum readahead window */ 911 unsigned int ra_pages; /* Maximum readahead window */
912 unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ 912 unsigned int mmap_miss; /* Cache miss stat for mmap accesses */
913 loff_t prev_pos; /* Cache last read() position */ 913 loff_t prev_pos; /* Cache last read() position */
914 }; 914 };
915 915
916 /* 916 /*
917 * Check if @index falls in the readahead windows. 917 * Check if @index falls in the readahead windows.
918 */ 918 */
919 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) 919 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
920 { 920 {
921 return (index >= ra->start && 921 return (index >= ra->start &&
922 index < ra->start + ra->size); 922 index < ra->start + ra->size);
923 } 923 }
924 924
925 #define FILE_MNT_WRITE_TAKEN 1 925 #define FILE_MNT_WRITE_TAKEN 1
926 #define FILE_MNT_WRITE_RELEASED 2 926 #define FILE_MNT_WRITE_RELEASED 2
927 927
928 struct file { 928 struct file {
929 /* 929 /*
930 * fu_list becomes invalid after file_free is called and queued via 930 * fu_list becomes invalid after file_free is called and queued via
931 * fu_rcuhead for RCU freeing 931 * fu_rcuhead for RCU freeing
932 */ 932 */
933 union { 933 union {
934 struct list_head fu_list; 934 struct list_head fu_list;
935 struct rcu_head fu_rcuhead; 935 struct rcu_head fu_rcuhead;
936 } f_u; 936 } f_u;
937 struct path f_path; 937 struct path f_path;
938 #define f_dentry f_path.dentry 938 #define f_dentry f_path.dentry
939 #define f_vfsmnt f_path.mnt 939 #define f_vfsmnt f_path.mnt
940 const struct file_operations *f_op; 940 const struct file_operations *f_op;
941 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ 941 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
942 #ifdef CONFIG_SMP 942 #ifdef CONFIG_SMP
943 int f_sb_list_cpu; 943 int f_sb_list_cpu;
944 #endif 944 #endif
945 atomic_long_t f_count; 945 atomic_long_t f_count;
946 unsigned int f_flags; 946 unsigned int f_flags;
947 fmode_t f_mode; 947 fmode_t f_mode;
948 loff_t f_pos; 948 loff_t f_pos;
949 struct fown_struct f_owner; 949 struct fown_struct f_owner;
950 const struct cred *f_cred; 950 const struct cred *f_cred;
951 struct file_ra_state f_ra; 951 struct file_ra_state f_ra;
952 952
953 u64 f_version; 953 u64 f_version;
954 #ifdef CONFIG_SECURITY 954 #ifdef CONFIG_SECURITY
955 void *f_security; 955 void *f_security;
956 #endif 956 #endif
957 /* needed for tty driver, and maybe others */ 957 /* needed for tty driver, and maybe others */
958 void *private_data; 958 void *private_data;
959 959
960 #ifdef CONFIG_EPOLL 960 #ifdef CONFIG_EPOLL
961 /* Used by fs/eventpoll.c to link all the hooks to this file */ 961 /* Used by fs/eventpoll.c to link all the hooks to this file */
962 struct list_head f_ep_links; 962 struct list_head f_ep_links;
963 #endif /* #ifdef CONFIG_EPOLL */ 963 #endif /* #ifdef CONFIG_EPOLL */
964 struct address_space *f_mapping; 964 struct address_space *f_mapping;
965 #ifdef CONFIG_DEBUG_WRITECOUNT 965 #ifdef CONFIG_DEBUG_WRITECOUNT
966 unsigned long f_mnt_write_state; 966 unsigned long f_mnt_write_state;
967 #endif 967 #endif
968 }; 968 };
969 969
970 #define get_file(x) atomic_long_inc(&(x)->f_count) 970 #define get_file(x) atomic_long_inc(&(x)->f_count)
971 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 971 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
972 #define file_count(x) atomic_long_read(&(x)->f_count) 972 #define file_count(x) atomic_long_read(&(x)->f_count)
973 973
974 #ifdef CONFIG_DEBUG_WRITECOUNT 974 #ifdef CONFIG_DEBUG_WRITECOUNT
975 static inline void file_take_write(struct file *f) 975 static inline void file_take_write(struct file *f)
976 { 976 {
977 WARN_ON(f->f_mnt_write_state != 0); 977 WARN_ON(f->f_mnt_write_state != 0);
978 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; 978 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
979 } 979 }
980 static inline void file_release_write(struct file *f) 980 static inline void file_release_write(struct file *f)
981 { 981 {
982 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; 982 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
983 } 983 }
984 static inline void file_reset_write(struct file *f) 984 static inline void file_reset_write(struct file *f)
985 { 985 {
986 f->f_mnt_write_state = 0; 986 f->f_mnt_write_state = 0;
987 } 987 }
988 static inline void file_check_state(struct file *f) 988 static inline void file_check_state(struct file *f)
989 { 989 {
990 /* 990 /*
991 * At this point, either both or neither of these bits 991 * At this point, either both or neither of these bits
992 * should be set. 992 * should be set.
993 */ 993 */
994 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); 994 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
995 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); 995 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
996 } 996 }
997 static inline int file_check_writeable(struct file *f) 997 static inline int file_check_writeable(struct file *f)
998 { 998 {
999 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) 999 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
1000 return 0; 1000 return 0;
1001 printk(KERN_WARNING "writeable file with no " 1001 printk(KERN_WARNING "writeable file with no "
1002 "mnt_want_write()\n"); 1002 "mnt_want_write()\n");
1003 WARN_ON(1); 1003 WARN_ON(1);
1004 return -EINVAL; 1004 return -EINVAL;
1005 } 1005 }
1006 #else /* !CONFIG_DEBUG_WRITECOUNT */ 1006 #else /* !CONFIG_DEBUG_WRITECOUNT */
1007 static inline void file_take_write(struct file *filp) {} 1007 static inline void file_take_write(struct file *filp) {}
1008 static inline void file_release_write(struct file *filp) {} 1008 static inline void file_release_write(struct file *filp) {}
1009 static inline void file_reset_write(struct file *filp) {} 1009 static inline void file_reset_write(struct file *filp) {}
1010 static inline void file_check_state(struct file *filp) {} 1010 static inline void file_check_state(struct file *filp) {}
1011 static inline int file_check_writeable(struct file *filp) 1011 static inline int file_check_writeable(struct file *filp)
1012 { 1012 {
1013 return 0; 1013 return 0;
1014 } 1014 }
1015 #endif /* CONFIG_DEBUG_WRITECOUNT */ 1015 #endif /* CONFIG_DEBUG_WRITECOUNT */
1016 1016
1017 #define MAX_NON_LFS ((1UL<<31) - 1) 1017 #define MAX_NON_LFS ((1UL<<31) - 1)
1018 1018
1019 /* Page cache limit. The filesystems should put that into their s_maxbytes 1019 /* Page cache limit. The filesystems should put that into their s_maxbytes
1020 limits, otherwise bad things can happen in VM. */ 1020 limits, otherwise bad things can happen in VM. */
1021 #if BITS_PER_LONG==32 1021 #if BITS_PER_LONG==32
1022 #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1022 #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1023 #elif BITS_PER_LONG==64 1023 #elif BITS_PER_LONG==64
1024 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1024 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
1025 #endif 1025 #endif
1026 1026
1027 #define FL_POSIX 1 1027 #define FL_POSIX 1
1028 #define FL_FLOCK 2 1028 #define FL_FLOCK 2
1029 #define FL_ACCESS 8 /* not trying to lock, just looking */ 1029 #define FL_ACCESS 8 /* not trying to lock, just looking */
1030 #define FL_EXISTS 16 /* when unlocking, test for existence */ 1030 #define FL_EXISTS 16 /* when unlocking, test for existence */
1031 #define FL_LEASE 32 /* lease held on this file */ 1031 #define FL_LEASE 32 /* lease held on this file */
1032 #define FL_CLOSE 64 /* unlock on close */ 1032 #define FL_CLOSE 64 /* unlock on close */
1033 #define FL_SLEEP 128 /* A blocking lock */ 1033 #define FL_SLEEP 128 /* A blocking lock */
1034 1034
1035 /* 1035 /*
1036 * Special return value from posix_lock_file() and vfs_lock_file() for 1036 * Special return value from posix_lock_file() and vfs_lock_file() for
1037 * asynchronous locking. 1037 * asynchronous locking.
1038 */ 1038 */
1039 #define FILE_LOCK_DEFERRED 1 1039 #define FILE_LOCK_DEFERRED 1
1040 1040
1041 /* 1041 /*
1042 * The POSIX file lock owner is determined by 1042 * The POSIX file lock owner is determined by
1043 * the "struct files_struct" in the thread group 1043 * the "struct files_struct" in the thread group
1044 * (or NULL for no owner - BSD locks). 1044 * (or NULL for no owner - BSD locks).
1045 * 1045 *
1046 * Lockd stuffs a "host" pointer into this. 1046 * Lockd stuffs a "host" pointer into this.
1047 */ 1047 */
1048 typedef struct files_struct *fl_owner_t; 1048 typedef struct files_struct *fl_owner_t;
1049 1049
1050 struct file_lock_operations { 1050 struct file_lock_operations {
1051 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 1051 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1052 void (*fl_release_private)(struct file_lock *); 1052 void (*fl_release_private)(struct file_lock *);
1053 }; 1053 };
1054 1054
1055 struct lock_manager_operations { 1055 struct lock_manager_operations {
1056 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 1056 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
1057 void (*fl_notify)(struct file_lock *); /* unblock callback */ 1057 void (*fl_notify)(struct file_lock *); /* unblock callback */
1058 int (*fl_grant)(struct file_lock *, struct file_lock *, int); 1058 int (*fl_grant)(struct file_lock *, struct file_lock *, int);
1059 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1060 void (*fl_release_private)(struct file_lock *); 1059 void (*fl_release_private)(struct file_lock *);
1061 void (*fl_break)(struct file_lock *); 1060 void (*fl_break)(struct file_lock *);
1062 int (*fl_mylease)(struct file_lock *, struct file_lock *); 1061 int (*fl_mylease)(struct file_lock *, struct file_lock *);
1063 int (*fl_change)(struct file_lock **, int); 1062 int (*fl_change)(struct file_lock **, int);
1064 }; 1063 };
1065 1064
1066 struct lock_manager { 1065 struct lock_manager {
1067 struct list_head list; 1066 struct list_head list;
1068 }; 1067 };
1069 1068
1070 void locks_start_grace(struct lock_manager *); 1069 void locks_start_grace(struct lock_manager *);
1071 void locks_end_grace(struct lock_manager *); 1070 void locks_end_grace(struct lock_manager *);
1072 int locks_in_grace(void); 1071 int locks_in_grace(void);
1073 1072
1074 /* that will die - we need it for nfs_lock_info */ 1073 /* that will die - we need it for nfs_lock_info */
1075 #include <linux/nfs_fs_i.h> 1074 #include <linux/nfs_fs_i.h>
1076 1075
1077 struct file_lock { 1076 struct file_lock {
1078 struct file_lock *fl_next; /* singly linked list for this inode */ 1077 struct file_lock *fl_next; /* singly linked list for this inode */
1079 struct list_head fl_link; /* doubly linked list of all locks */ 1078 struct list_head fl_link; /* doubly linked list of all locks */
1080 struct list_head fl_block; /* circular list of blocked processes */ 1079 struct list_head fl_block; /* circular list of blocked processes */
1081 fl_owner_t fl_owner; 1080 fl_owner_t fl_owner;
1082 unsigned char fl_flags; 1081 unsigned char fl_flags;
1083 unsigned char fl_type; 1082 unsigned char fl_type;
1084 unsigned int fl_pid; 1083 unsigned int fl_pid;
1085 struct pid *fl_nspid; 1084 struct pid *fl_nspid;
1086 wait_queue_head_t fl_wait; 1085 wait_queue_head_t fl_wait;
1087 struct file *fl_file; 1086 struct file *fl_file;
1088 loff_t fl_start; 1087 loff_t fl_start;
1089 loff_t fl_end; 1088 loff_t fl_end;
1090 1089
1091 struct fasync_struct * fl_fasync; /* for lease break notifications */ 1090 struct fasync_struct * fl_fasync; /* for lease break notifications */
1092 unsigned long fl_break_time; /* for nonblocking lease breaks */ 1091 unsigned long fl_break_time; /* for nonblocking lease breaks */
1093 1092
1094 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ 1093 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
1095 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ 1094 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
1096 union { 1095 union {
1097 struct nfs_lock_info nfs_fl; 1096 struct nfs_lock_info nfs_fl;
1098 struct nfs4_lock_info nfs4_fl; 1097 struct nfs4_lock_info nfs4_fl;
1099 struct { 1098 struct {
1100 struct list_head link; /* link in AFS vnode's pending_locks list */ 1099 struct list_head link; /* link in AFS vnode's pending_locks list */
1101 int state; /* state of grant or error if -ve */ 1100 int state; /* state of grant or error if -ve */
1102 } afs; 1101 } afs;
1103 } fl_u; 1102 } fl_u;
1104 }; 1103 };
1105 1104
1106 /* The following constant reflects the upper bound of the file/locking space */ 1105 /* The following constant reflects the upper bound of the file/locking space */
1107 #ifndef OFFSET_MAX 1106 #ifndef OFFSET_MAX
1108 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) 1107 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
1109 #define OFFSET_MAX INT_LIMIT(loff_t) 1108 #define OFFSET_MAX INT_LIMIT(loff_t)
1110 #define OFFT_OFFSET_MAX INT_LIMIT(off_t) 1109 #define OFFT_OFFSET_MAX INT_LIMIT(off_t)
1111 #endif 1110 #endif
1112 1111
1113 #include <linux/fcntl.h> 1112 #include <linux/fcntl.h>
1114 1113
1115 extern void send_sigio(struct fown_struct *fown, int fd, int band); 1114 extern void send_sigio(struct fown_struct *fown, int fd, int band);
1116 1115
1117 #ifdef CONFIG_FILE_LOCKING 1116 #ifdef CONFIG_FILE_LOCKING
1118 extern int fcntl_getlk(struct file *, struct flock __user *); 1117 extern int fcntl_getlk(struct file *, struct flock __user *);
1119 extern int fcntl_setlk(unsigned int, struct file *, unsigned int, 1118 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
1120 struct flock __user *); 1119 struct flock __user *);
1121 1120
1122 #if BITS_PER_LONG == 32 1121 #if BITS_PER_LONG == 32
1123 extern int fcntl_getlk64(struct file *, struct flock64 __user *); 1122 extern int fcntl_getlk64(struct file *, struct flock64 __user *);
1124 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, 1123 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
1125 struct flock64 __user *); 1124 struct flock64 __user *);
1126 #endif 1125 #endif
1127 1126
1128 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); 1127 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
1129 extern int fcntl_getlease(struct file *filp); 1128 extern int fcntl_getlease(struct file *filp);
1130 1129
1131 /* fs/locks.c */ 1130 /* fs/locks.c */
1132 void locks_free_lock(struct file_lock *fl); 1131 void locks_free_lock(struct file_lock *fl);
1133 extern void locks_init_lock(struct file_lock *); 1132 extern void locks_init_lock(struct file_lock *);
1134 extern struct file_lock * locks_alloc_lock(void); 1133 extern struct file_lock * locks_alloc_lock(void);
1135 extern void locks_copy_lock(struct file_lock *, struct file_lock *); 1134 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
1136 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); 1135 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
1137 extern void locks_remove_posix(struct file *, fl_owner_t); 1136 extern void locks_remove_posix(struct file *, fl_owner_t);
1138 extern void locks_remove_flock(struct file *); 1137 extern void locks_remove_flock(struct file *);
1139 extern void locks_release_private(struct file_lock *); 1138 extern void locks_release_private(struct file_lock *);
1140 extern void posix_test_lock(struct file *, struct file_lock *); 1139 extern void posix_test_lock(struct file *, struct file_lock *);
1141 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); 1140 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
1142 extern int posix_lock_file_wait(struct file *, struct file_lock *); 1141 extern int posix_lock_file_wait(struct file *, struct file_lock *);
1143 extern int posix_unblock_lock(struct file *, struct file_lock *); 1142 extern int posix_unblock_lock(struct file *, struct file_lock *);
1144 extern int vfs_test_lock(struct file *, struct file_lock *); 1143 extern int vfs_test_lock(struct file *, struct file_lock *);
1145 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); 1144 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
1146 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); 1145 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
1147 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); 1146 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
1148 extern int __break_lease(struct inode *inode, unsigned int flags); 1147 extern int __break_lease(struct inode *inode, unsigned int flags);
1149 extern void lease_get_mtime(struct inode *, struct timespec *time); 1148 extern void lease_get_mtime(struct inode *, struct timespec *time);
1150 extern int generic_setlease(struct file *, long, struct file_lock **); 1149 extern int generic_setlease(struct file *, long, struct file_lock **);
1151 extern int vfs_setlease(struct file *, long, struct file_lock **); 1150 extern int vfs_setlease(struct file *, long, struct file_lock **);
1152 extern int lease_modify(struct file_lock **, int); 1151 extern int lease_modify(struct file_lock **, int);
1153 extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 1152 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
1154 extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 1153 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
1155 extern void lock_flocks(void); 1154 extern void lock_flocks(void);
1156 extern void unlock_flocks(void); 1155 extern void unlock_flocks(void);
1157 #else /* !CONFIG_FILE_LOCKING */ 1156 #else /* !CONFIG_FILE_LOCKING */
1158 static inline int fcntl_getlk(struct file *file, struct flock __user *user) 1157 static inline int fcntl_getlk(struct file *file, struct flock __user *user)
1159 { 1158 {
1160 return -EINVAL; 1159 return -EINVAL;
1161 } 1160 }
1162 1161
1163 static inline int fcntl_setlk(unsigned int fd, struct file *file, 1162 static inline int fcntl_setlk(unsigned int fd, struct file *file,
1164 unsigned int cmd, struct flock __user *user) 1163 unsigned int cmd, struct flock __user *user)
1165 { 1164 {
1166 return -EACCES; 1165 return -EACCES;
1167 } 1166 }
1168 1167
1169 #if BITS_PER_LONG == 32 1168 #if BITS_PER_LONG == 32
1170 static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) 1169 static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
1171 { 1170 {
1172 return -EINVAL; 1171 return -EINVAL;
1173 } 1172 }
1174 1173
1175 static inline int fcntl_setlk64(unsigned int fd, struct file *file, 1174 static inline int fcntl_setlk64(unsigned int fd, struct file *file,
1176 unsigned int cmd, struct flock64 __user *user) 1175 unsigned int cmd, struct flock64 __user *user)
1177 { 1176 {
1178 return -EACCES; 1177 return -EACCES;
1179 } 1178 }
1180 #endif 1179 #endif
1181 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1180 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1182 { 1181 {
1183 return 0; 1182 return 0;
1184 } 1183 }
1185 1184
1186 static inline int fcntl_getlease(struct file *filp) 1185 static inline int fcntl_getlease(struct file *filp)
1187 { 1186 {
1188 return 0; 1187 return 0;
1189 } 1188 }
1190 1189
1191 static inline void locks_init_lock(struct file_lock *fl) 1190 static inline void locks_init_lock(struct file_lock *fl)
1192 { 1191 {
1193 return; 1192 return;
1194 } 1193 }
1195 1194
1196 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1195 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1197 { 1196 {
1198 return; 1197 return;
1199 } 1198 }
1200 1199
1201 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1200 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1202 { 1201 {
1203 return; 1202 return;
1204 } 1203 }
1205 1204
1206 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) 1205 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
1207 { 1206 {
1208 return; 1207 return;
1209 } 1208 }
1210 1209
1211 static inline void locks_remove_flock(struct file *filp) 1210 static inline void locks_remove_flock(struct file *filp)
1212 { 1211 {
1213 return; 1212 return;
1214 } 1213 }
1215 1214
1216 static inline void posix_test_lock(struct file *filp, struct file_lock *fl) 1215 static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
1217 { 1216 {
1218 return; 1217 return;
1219 } 1218 }
1220 1219
1221 static inline int posix_lock_file(struct file *filp, struct file_lock *fl, 1220 static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
1222 struct file_lock *conflock) 1221 struct file_lock *conflock)
1223 { 1222 {
1224 return -ENOLCK; 1223 return -ENOLCK;
1225 } 1224 }
1226 1225
1227 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1226 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1228 { 1227 {
1229 return -ENOLCK; 1228 return -ENOLCK;
1230 } 1229 }
1231 1230
1232 static inline int posix_unblock_lock(struct file *filp, 1231 static inline int posix_unblock_lock(struct file *filp,
1233 struct file_lock *waiter) 1232 struct file_lock *waiter)
1234 { 1233 {
1235 return -ENOENT; 1234 return -ENOENT;
1236 } 1235 }
1237 1236
1238 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) 1237 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
1239 { 1238 {
1240 return 0; 1239 return 0;
1241 } 1240 }
1242 1241
1243 static inline int vfs_lock_file(struct file *filp, unsigned int cmd, 1242 static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
1244 struct file_lock *fl, struct file_lock *conf) 1243 struct file_lock *fl, struct file_lock *conf)
1245 { 1244 {
1246 return -ENOLCK; 1245 return -ENOLCK;
1247 } 1246 }
1248 1247
1249 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 1248 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
1250 { 1249 {
1251 return 0; 1250 return 0;
1252 } 1251 }
1253 1252
1254 static inline int flock_lock_file_wait(struct file *filp, 1253 static inline int flock_lock_file_wait(struct file *filp,
1255 struct file_lock *request) 1254 struct file_lock *request)
1256 { 1255 {
1257 return -ENOLCK; 1256 return -ENOLCK;
1258 } 1257 }
1259 1258
1260 static inline int __break_lease(struct inode *inode, unsigned int mode) 1259 static inline int __break_lease(struct inode *inode, unsigned int mode)
1261 { 1260 {
1262 return 0; 1261 return 0;
1263 } 1262 }
1264 1263
1265 static inline void lease_get_mtime(struct inode *inode, struct timespec *time) 1264 static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
1266 { 1265 {
1267 return; 1266 return;
1268 } 1267 }
1269 1268
1270 static inline int generic_setlease(struct file *filp, long arg, 1269 static inline int generic_setlease(struct file *filp, long arg,
1271 struct file_lock **flp) 1270 struct file_lock **flp)
1272 { 1271 {
1273 return -EINVAL; 1272 return -EINVAL;
1274 } 1273 }
1275 1274
1276 static inline int vfs_setlease(struct file *filp, long arg, 1275 static inline int vfs_setlease(struct file *filp, long arg,
1277 struct file_lock **lease) 1276 struct file_lock **lease)
1278 { 1277 {
1279 return -EINVAL; 1278 return -EINVAL;
1280 } 1279 }
1281 1280
1282 static inline int lease_modify(struct file_lock **before, int arg) 1281 static inline int lease_modify(struct file_lock **before, int arg)
1283 { 1282 {
1284 return -EINVAL; 1283 return -EINVAL;
1285 } 1284 }
1286 1285
1287 static inline int lock_may_read(struct inode *inode, loff_t start, 1286 static inline int lock_may_read(struct inode *inode, loff_t start,
1288 unsigned long len) 1287 unsigned long len)
1289 { 1288 {
1290 return 1; 1289 return 1;
1291 } 1290 }
1292 1291
1293 static inline int lock_may_write(struct inode *inode, loff_t start, 1292 static inline int lock_may_write(struct inode *inode, loff_t start,
1294 unsigned long len) 1293 unsigned long len)
1295 { 1294 {
1296 return 1; 1295 return 1;
1297 } 1296 }
1298 1297
1299 static inline void lock_flocks(void) 1298 static inline void lock_flocks(void)
1300 { 1299 {
1301 } 1300 }
1302 1301
1303 static inline void unlock_flocks(void) 1302 static inline void unlock_flocks(void)
1304 { 1303 {
1305 } 1304 }
1306 1305
1307 #endif /* !CONFIG_FILE_LOCKING */ 1306 #endif /* !CONFIG_FILE_LOCKING */
1308 1307
1309 1308
1310 struct fasync_struct { 1309 struct fasync_struct {
1311 spinlock_t fa_lock; 1310 spinlock_t fa_lock;
1312 int magic; 1311 int magic;
1313 int fa_fd; 1312 int fa_fd;
1314 struct fasync_struct *fa_next; /* singly linked list */ 1313 struct fasync_struct *fa_next; /* singly linked list */
1315 struct file *fa_file; 1314 struct file *fa_file;
1316 struct rcu_head fa_rcu; 1315 struct rcu_head fa_rcu;
1317 }; 1316 };
1318 1317
1319 #define FASYNC_MAGIC 0x4601 1318 #define FASYNC_MAGIC 0x4601
1320 1319
1321 /* SMP safe fasync helpers: */ 1320 /* SMP safe fasync helpers: */
1322 extern int fasync_helper(int, struct file *, int, struct fasync_struct **); 1321 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1323 extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); 1322 extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
1324 extern int fasync_remove_entry(struct file *, struct fasync_struct **); 1323 extern int fasync_remove_entry(struct file *, struct fasync_struct **);
1325 extern struct fasync_struct *fasync_alloc(void); 1324 extern struct fasync_struct *fasync_alloc(void);
1326 extern void fasync_free(struct fasync_struct *); 1325 extern void fasync_free(struct fasync_struct *);
1327 1326
1328 /* can be called from interrupts */ 1327 /* can be called from interrupts */
1329 extern void kill_fasync(struct fasync_struct **, int, int); 1328 extern void kill_fasync(struct fasync_struct **, int, int);
1330 1329
1331 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); 1330 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1332 extern int f_setown(struct file *filp, unsigned long arg, int force); 1331 extern int f_setown(struct file *filp, unsigned long arg, int force);
1333 extern void f_delown(struct file *filp); 1332 extern void f_delown(struct file *filp);
1334 extern pid_t f_getown(struct file *filp); 1333 extern pid_t f_getown(struct file *filp);
1335 extern int send_sigurg(struct fown_struct *fown); 1334 extern int send_sigurg(struct fown_struct *fown);
1336 1335
1337 /* 1336 /*
1338 * Umount options 1337 * Umount options
1339 */ 1338 */
1340 1339
1341 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ 1340 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
1342 #define MNT_DETACH 0x00000002 /* Just detach from the tree */ 1341 #define MNT_DETACH 0x00000002 /* Just detach from the tree */
1343 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ 1342 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */
1344 #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ 1343 #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
1345 #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ 1344 #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
1346 1345
1347 extern struct list_head super_blocks; 1346 extern struct list_head super_blocks;
1348 extern spinlock_t sb_lock; 1347 extern spinlock_t sb_lock;
1349 1348
1350 struct super_block { 1349 struct super_block {
1351 struct list_head s_list; /* Keep this first */ 1350 struct list_head s_list; /* Keep this first */
1352 dev_t s_dev; /* search index; _not_ kdev_t */ 1351 dev_t s_dev; /* search index; _not_ kdev_t */
1353 unsigned char s_dirt; 1352 unsigned char s_dirt;
1354 unsigned char s_blocksize_bits; 1353 unsigned char s_blocksize_bits;
1355 unsigned long s_blocksize; 1354 unsigned long s_blocksize;
1356 loff_t s_maxbytes; /* Max file size */ 1355 loff_t s_maxbytes; /* Max file size */
1357 struct file_system_type *s_type; 1356 struct file_system_type *s_type;
1358 const struct super_operations *s_op; 1357 const struct super_operations *s_op;
1359 const struct dquot_operations *dq_op; 1358 const struct dquot_operations *dq_op;
1360 const struct quotactl_ops *s_qcop; 1359 const struct quotactl_ops *s_qcop;
1361 const struct export_operations *s_export_op; 1360 const struct export_operations *s_export_op;
1362 unsigned long s_flags; 1361 unsigned long s_flags;
1363 unsigned long s_magic; 1362 unsigned long s_magic;
1364 struct dentry *s_root; 1363 struct dentry *s_root;
1365 struct rw_semaphore s_umount; 1364 struct rw_semaphore s_umount;
1366 struct mutex s_lock; 1365 struct mutex s_lock;
1367 int s_count; 1366 int s_count;
1368 atomic_t s_active; 1367 atomic_t s_active;
1369 #ifdef CONFIG_SECURITY 1368 #ifdef CONFIG_SECURITY
1370 void *s_security; 1369 void *s_security;
1371 #endif 1370 #endif
1372 const struct xattr_handler **s_xattr; 1371 const struct xattr_handler **s_xattr;
1373 1372
1374 struct list_head s_inodes; /* all inodes */ 1373 struct list_head s_inodes; /* all inodes */
1375 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1374 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
1376 #ifdef CONFIG_SMP 1375 #ifdef CONFIG_SMP
1377 struct list_head __percpu *s_files; 1376 struct list_head __percpu *s_files;
1378 #else 1377 #else
1379 struct list_head s_files; 1378 struct list_head s_files;
1380 #endif 1379 #endif
1381 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1380 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
1382 struct list_head s_dentry_lru; /* unused dentry lru */ 1381 struct list_head s_dentry_lru; /* unused dentry lru */
1383 int s_nr_dentry_unused; /* # of dentry on lru */ 1382 int s_nr_dentry_unused; /* # of dentry on lru */
1384 1383
1385 struct block_device *s_bdev; 1384 struct block_device *s_bdev;
1386 struct backing_dev_info *s_bdi; 1385 struct backing_dev_info *s_bdi;
1387 struct mtd_info *s_mtd; 1386 struct mtd_info *s_mtd;
1388 struct list_head s_instances; 1387 struct list_head s_instances;
1389 struct quota_info s_dquot; /* Diskquota specific options */ 1388 struct quota_info s_dquot; /* Diskquota specific options */
1390 1389
1391 int s_frozen; 1390 int s_frozen;
1392 wait_queue_head_t s_wait_unfrozen; 1391 wait_queue_head_t s_wait_unfrozen;
1393 1392
1394 char s_id[32]; /* Informational name */ 1393 char s_id[32]; /* Informational name */
1395 1394
1396 void *s_fs_info; /* Filesystem private info */ 1395 void *s_fs_info; /* Filesystem private info */
1397 fmode_t s_mode; 1396 fmode_t s_mode;
1398 1397
1399 /* Granularity of c/m/atime in ns. 1398 /* Granularity of c/m/atime in ns.
1400 Cannot be worse than a second */ 1399 Cannot be worse than a second */
1401 u32 s_time_gran; 1400 u32 s_time_gran;
1402 1401
1403 /* 1402 /*
1404 * The next field is for VFS *only*. No filesystems have any business 1403 * The next field is for VFS *only*. No filesystems have any business
1405 * even looking at it. You had been warned. 1404 * even looking at it. You had been warned.
1406 */ 1405 */
1407 struct mutex s_vfs_rename_mutex; /* Kludge */ 1406 struct mutex s_vfs_rename_mutex; /* Kludge */
1408 1407
1409 /* 1408 /*
1410 * Filesystem subtype. If non-empty the filesystem type field 1409 * Filesystem subtype. If non-empty the filesystem type field
1411 * in /proc/mounts will be "type.subtype" 1410 * in /proc/mounts will be "type.subtype"
1412 */ 1411 */
1413 char *s_subtype; 1412 char *s_subtype;
1414 1413
1415 /* 1414 /*
1416 * Saved mount options for lazy filesystems using 1415 * Saved mount options for lazy filesystems using
1417 * generic_show_options() 1416 * generic_show_options()
1418 */ 1417 */
1419 char __rcu *s_options; 1418 char __rcu *s_options;
1420 }; 1419 };
1421 1420
1422 extern struct timespec current_fs_time(struct super_block *sb); 1421 extern struct timespec current_fs_time(struct super_block *sb);
1423 1422
1424 /* 1423 /*
1425 * Snapshotting support. 1424 * Snapshotting support.
1426 */ 1425 */
1427 enum { 1426 enum {
1428 SB_UNFROZEN = 0, 1427 SB_UNFROZEN = 0,
1429 SB_FREEZE_WRITE = 1, 1428 SB_FREEZE_WRITE = 1,
1430 SB_FREEZE_TRANS = 2, 1429 SB_FREEZE_TRANS = 2,
1431 }; 1430 };
1432 1431
1433 #define vfs_check_frozen(sb, level) \ 1432 #define vfs_check_frozen(sb, level) \
1434 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1433 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
1435 1434
1436 #define get_fs_excl() atomic_inc(&current->fs_excl) 1435 #define get_fs_excl() atomic_inc(&current->fs_excl)
1437 #define put_fs_excl() atomic_dec(&current->fs_excl) 1436 #define put_fs_excl() atomic_dec(&current->fs_excl)
1438 #define has_fs_excl() atomic_read(&current->fs_excl) 1437 #define has_fs_excl() atomic_read(&current->fs_excl)
1439 1438
1440 #define is_owner_or_cap(inode) \ 1439 #define is_owner_or_cap(inode) \
1441 ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) 1440 ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER))
1442 1441
1443 /* not quite ready to be deprecated, but... */ 1442 /* not quite ready to be deprecated, but... */
1444 extern void lock_super(struct super_block *); 1443 extern void lock_super(struct super_block *);
1445 extern void unlock_super(struct super_block *); 1444 extern void unlock_super(struct super_block *);
1446 1445
1447 /* 1446 /*
1448 * VFS helper functions.. 1447 * VFS helper functions..
1449 */ 1448 */
1450 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); 1449 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
1451 extern int vfs_mkdir(struct inode *, struct dentry *, int); 1450 extern int vfs_mkdir(struct inode *, struct dentry *, int);
1452 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); 1451 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
1453 extern int vfs_symlink(struct inode *, struct dentry *, const char *); 1452 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
1454 extern int vfs_link(struct dentry *, struct inode *, struct dentry *); 1453 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
1455 extern int vfs_rmdir(struct inode *, struct dentry *); 1454 extern int vfs_rmdir(struct inode *, struct dentry *);
1456 extern int vfs_unlink(struct inode *, struct dentry *); 1455 extern int vfs_unlink(struct inode *, struct dentry *);
1457 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 1456 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
1458 1457
1459 /* 1458 /*
1460 * VFS dentry helper functions. 1459 * VFS dentry helper functions.
1461 */ 1460 */
1462 extern void dentry_unhash(struct dentry *dentry); 1461 extern void dentry_unhash(struct dentry *dentry);
1463 1462
1464 /* 1463 /*
1465 * VFS file helper functions. 1464 * VFS file helper functions.
1466 */ 1465 */
1467 extern int file_permission(struct file *, int); 1466 extern int file_permission(struct file *, int);
1468 extern void inode_init_owner(struct inode *inode, const struct inode *dir, 1467 extern void inode_init_owner(struct inode *inode, const struct inode *dir,
1469 mode_t mode); 1468 mode_t mode);
1470 /* 1469 /*
1471 * VFS FS_IOC_FIEMAP helper definitions. 1470 * VFS FS_IOC_FIEMAP helper definitions.
1472 */ 1471 */
1473 struct fiemap_extent_info { 1472 struct fiemap_extent_info {
1474 unsigned int fi_flags; /* Flags as passed from user */ 1473 unsigned int fi_flags; /* Flags as passed from user */
1475 unsigned int fi_extents_mapped; /* Number of mapped extents */ 1474 unsigned int fi_extents_mapped; /* Number of mapped extents */
1476 unsigned int fi_extents_max; /* Size of fiemap_extent array */ 1475 unsigned int fi_extents_max; /* Size of fiemap_extent array */
1477 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent 1476 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
1478 * array */ 1477 * array */
1479 }; 1478 };
1480 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, 1479 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
1481 u64 phys, u64 len, u32 flags); 1480 u64 phys, u64 len, u32 flags);
1482 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); 1481 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1483 1482
1484 /* 1483 /*
1485 * File types 1484 * File types
1486 * 1485 *
1487 * NOTE! These match bits 12..15 of stat.st_mode 1486 * NOTE! These match bits 12..15 of stat.st_mode
1488 * (ie "(i_mode >> 12) & 15"). 1487 * (ie "(i_mode >> 12) & 15").
1489 */ 1488 */
1490 #define DT_UNKNOWN 0 1489 #define DT_UNKNOWN 0
1491 #define DT_FIFO 1 1490 #define DT_FIFO 1
1492 #define DT_CHR 2 1491 #define DT_CHR 2
1493 #define DT_DIR 4 1492 #define DT_DIR 4
1494 #define DT_BLK 6 1493 #define DT_BLK 6
1495 #define DT_REG 8 1494 #define DT_REG 8
1496 #define DT_LNK 10 1495 #define DT_LNK 10
1497 #define DT_SOCK 12 1496 #define DT_SOCK 12
1498 #define DT_WHT 14 1497 #define DT_WHT 14
1499 1498
1500 /* 1499 /*
1501 * This is the "filldir" function type, used by readdir() to let 1500 * This is the "filldir" function type, used by readdir() to let
1502 * the kernel specify what kind of dirent layout it wants to have. 1501 * the kernel specify what kind of dirent layout it wants to have.
1503 * This allows the kernel to read directories into kernel space or 1502 * This allows the kernel to read directories into kernel space or
1504 * to have different dirent layouts depending on the binary type. 1503 * to have different dirent layouts depending on the binary type.
1505 */ 1504 */
1506 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); 1505 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
1507 struct block_device_operations; 1506 struct block_device_operations;
1508 1507
1509 /* These macros are for out of kernel modules to test that 1508 /* These macros are for out of kernel modules to test that
1510 * the kernel supports the unlocked_ioctl and compat_ioctl 1509 * the kernel supports the unlocked_ioctl and compat_ioctl
1511 * fields in struct file_operations. */ 1510 * fields in struct file_operations. */
1512 #define HAVE_COMPAT_IOCTL 1 1511 #define HAVE_COMPAT_IOCTL 1
1513 #define HAVE_UNLOCKED_IOCTL 1 1512 #define HAVE_UNLOCKED_IOCTL 1
1514 1513
1515 /* 1514 /*
1516 * NOTE: 1515 * NOTE:
1517 * all file operations except setlease can be called without 1516 * all file operations except setlease can be called without
1518 * the big kernel lock held in all filesystems. 1517 * the big kernel lock held in all filesystems.
1519 */ 1518 */
1520 struct file_operations { 1519 struct file_operations {
1521 struct module *owner; 1520 struct module *owner;
1522 loff_t (*llseek) (struct file *, loff_t, int); 1521 loff_t (*llseek) (struct file *, loff_t, int);
1523 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 1522 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
1524 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 1523 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
1525 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1524 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1526 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1525 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1527 int (*readdir) (struct file *, void *, filldir_t); 1526 int (*readdir) (struct file *, void *, filldir_t);
1528 unsigned int (*poll) (struct file *, struct poll_table_struct *); 1527 unsigned int (*poll) (struct file *, struct poll_table_struct *);
1529 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 1528 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
1530 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 1529 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
1531 int (*mmap) (struct file *, struct vm_area_struct *); 1530 int (*mmap) (struct file *, struct vm_area_struct *);
1532 int (*open) (struct inode *, struct file *); 1531 int (*open) (struct inode *, struct file *);
1533 int (*flush) (struct file *, fl_owner_t id); 1532 int (*flush) (struct file *, fl_owner_t id);
1534 int (*release) (struct inode *, struct file *); 1533 int (*release) (struct inode *, struct file *);
1535 int (*fsync) (struct file *, int datasync); 1534 int (*fsync) (struct file *, int datasync);
1536 int (*aio_fsync) (struct kiocb *, int datasync); 1535 int (*aio_fsync) (struct kiocb *, int datasync);
1537 int (*fasync) (int, struct file *, int); 1536 int (*fasync) (int, struct file *, int);
1538 int (*lock) (struct file *, int, struct file_lock *); 1537 int (*lock) (struct file *, int, struct file_lock *);
1539 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); 1538 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
1540 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 1539 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1541 int (*check_flags)(int); 1540 int (*check_flags)(int);
1542 int (*flock) (struct file *, int, struct file_lock *); 1541 int (*flock) (struct file *, int, struct file_lock *);
1543 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); 1542 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
1544 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); 1543 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1545 int (*setlease)(struct file *, long, struct file_lock **); 1544 int (*setlease)(struct file *, long, struct file_lock **);
1546 }; 1545 };
1547 1546
1548 struct inode_operations { 1547 struct inode_operations {
1549 int (*create) (struct inode *,struct dentry *,int, struct nameidata *); 1548 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
1550 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); 1549 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
1551 int (*link) (struct dentry *,struct inode *,struct dentry *); 1550 int (*link) (struct dentry *,struct inode *,struct dentry *);
1552 int (*unlink) (struct inode *,struct dentry *); 1551 int (*unlink) (struct inode *,struct dentry *);
1553 int (*symlink) (struct inode *,struct dentry *,const char *); 1552 int (*symlink) (struct inode *,struct dentry *,const char *);
1554 int (*mkdir) (struct inode *,struct dentry *,int); 1553 int (*mkdir) (struct inode *,struct dentry *,int);
1555 int (*rmdir) (struct inode *,struct dentry *); 1554 int (*rmdir) (struct inode *,struct dentry *);
1556 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 1555 int (*mknod) (struct inode *,struct dentry *,int,dev_t);
1557 int (*rename) (struct inode *, struct dentry *, 1556 int (*rename) (struct inode *, struct dentry *,
1558 struct inode *, struct dentry *); 1557 struct inode *, struct dentry *);
1559 int (*readlink) (struct dentry *, char __user *,int); 1558 int (*readlink) (struct dentry *, char __user *,int);
1560 void * (*follow_link) (struct dentry *, struct nameidata *); 1559 void * (*follow_link) (struct dentry *, struct nameidata *);
1561 void (*put_link) (struct dentry *, struct nameidata *, void *); 1560 void (*put_link) (struct dentry *, struct nameidata *, void *);
1562 void (*truncate) (struct inode *); 1561 void (*truncate) (struct inode *);
1563 int (*permission) (struct inode *, int); 1562 int (*permission) (struct inode *, int);
1564 int (*check_acl)(struct inode *, int); 1563 int (*check_acl)(struct inode *, int);
1565 int (*setattr) (struct dentry *, struct iattr *); 1564 int (*setattr) (struct dentry *, struct iattr *);
1566 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 1565 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1567 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 1566 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
1568 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 1567 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1569 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1568 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1570 int (*removexattr) (struct dentry *, const char *); 1569 int (*removexattr) (struct dentry *, const char *);
1571 void (*truncate_range)(struct inode *, loff_t, loff_t); 1570 void (*truncate_range)(struct inode *, loff_t, loff_t);
1572 long (*fallocate)(struct inode *inode, int mode, loff_t offset, 1571 long (*fallocate)(struct inode *inode, int mode, loff_t offset,
1573 loff_t len); 1572 loff_t len);
1574 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1573 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1575 u64 len); 1574 u64 len);
1576 }; 1575 };
1577 1576
1578 struct seq_file; 1577 struct seq_file;
1579 1578
1580 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 1579 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
1581 unsigned long nr_segs, unsigned long fast_segs, 1580 unsigned long nr_segs, unsigned long fast_segs,
1582 struct iovec *fast_pointer, 1581 struct iovec *fast_pointer,
1583 struct iovec **ret_pointer); 1582 struct iovec **ret_pointer);
1584 1583
1585 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); 1584 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1586 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); 1585 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
1587 extern ssize_t vfs_readv(struct file *, const struct iovec __user *, 1586 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1588 unsigned long, loff_t *); 1587 unsigned long, loff_t *);
1589 extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1588 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1590 unsigned long, loff_t *); 1589 unsigned long, loff_t *);
1591 1590
1592 struct super_operations { 1591 struct super_operations {
1593 struct inode *(*alloc_inode)(struct super_block *sb); 1592 struct inode *(*alloc_inode)(struct super_block *sb);
1594 void (*destroy_inode)(struct inode *); 1593 void (*destroy_inode)(struct inode *);
1595 1594
1596 void (*dirty_inode) (struct inode *); 1595 void (*dirty_inode) (struct inode *);
1597 int (*write_inode) (struct inode *, struct writeback_control *wbc); 1596 int (*write_inode) (struct inode *, struct writeback_control *wbc);
1598 int (*drop_inode) (struct inode *); 1597 int (*drop_inode) (struct inode *);
1599 void (*evict_inode) (struct inode *); 1598 void (*evict_inode) (struct inode *);
1600 void (*put_super) (struct super_block *); 1599 void (*put_super) (struct super_block *);
1601 void (*write_super) (struct super_block *); 1600 void (*write_super) (struct super_block *);
1602 int (*sync_fs)(struct super_block *sb, int wait); 1601 int (*sync_fs)(struct super_block *sb, int wait);
1603 int (*freeze_fs) (struct super_block *); 1602 int (*freeze_fs) (struct super_block *);
1604 int (*unfreeze_fs) (struct super_block *); 1603 int (*unfreeze_fs) (struct super_block *);
1605 int (*statfs) (struct dentry *, struct kstatfs *); 1604 int (*statfs) (struct dentry *, struct kstatfs *);
1606 int (*remount_fs) (struct super_block *, int *, char *); 1605 int (*remount_fs) (struct super_block *, int *, char *);
1607 void (*umount_begin) (struct super_block *); 1606 void (*umount_begin) (struct super_block *);
1608 1607
1609 int (*show_options)(struct seq_file *, struct vfsmount *); 1608 int (*show_options)(struct seq_file *, struct vfsmount *);
1610 int (*show_stats)(struct seq_file *, struct vfsmount *); 1609 int (*show_stats)(struct seq_file *, struct vfsmount *);
1611 #ifdef CONFIG_QUOTA 1610 #ifdef CONFIG_QUOTA
1612 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 1611 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1613 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1612 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1614 #endif 1613 #endif
1615 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1614 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1616 int (*trim_fs) (struct super_block *, struct fstrim_range *); 1615 int (*trim_fs) (struct super_block *, struct fstrim_range *);
1617 }; 1616 };
1618 1617
1619 /* 1618 /*
1620 * Inode state bits. Protected by inode_lock. 1619 * Inode state bits. Protected by inode_lock.
1621 * 1620 *
1622 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1621 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
1623 * I_DIRTY_DATASYNC and I_DIRTY_PAGES. 1622 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
1624 * 1623 *
1625 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, 1624 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
1626 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 1625 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
1627 * various stages of removing an inode. 1626 * various stages of removing an inode.
1628 * 1627 *
1629 * Two bits are used for locking and completion notification, I_NEW and I_SYNC. 1628 * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
1630 * 1629 *
1631 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 1630 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
1632 * fdatasync(). i_atime is the usual cause. 1631 * fdatasync(). i_atime is the usual cause.
1633 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of 1632 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
1634 * these changes separately from I_DIRTY_SYNC so that we 1633 * these changes separately from I_DIRTY_SYNC so that we
1635 * don't have to write inode on fdatasync() when only 1634 * don't have to write inode on fdatasync() when only
1636 * mtime has changed in it. 1635 * mtime has changed in it.
1637 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 1636 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
1638 * I_NEW Serves as both a mutex and completion notification. 1637 * I_NEW Serves as both a mutex and completion notification.
1639 * New inodes set I_NEW. If two processes both create 1638 * New inodes set I_NEW. If two processes both create
1640 * the same inode, one of them will release its inode and 1639 * the same inode, one of them will release its inode and
1641 * wait for I_NEW to be released before returning. 1640 * wait for I_NEW to be released before returning.
1642 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can 1641 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
1643 * also cause waiting on I_NEW, without I_NEW actually 1642 * also cause waiting on I_NEW, without I_NEW actually
1644 * being set. find_inode() uses this to prevent returning 1643 * being set. find_inode() uses this to prevent returning
1645 * nearly-dead inodes. 1644 * nearly-dead inodes.
1646 * I_WILL_FREE Must be set when calling write_inode_now() if i_count 1645 * I_WILL_FREE Must be set when calling write_inode_now() if i_count
1647 * is zero. I_FREEING must be set when I_WILL_FREE is 1646 * is zero. I_FREEING must be set when I_WILL_FREE is
1648 * cleared. 1647 * cleared.
1649 * I_FREEING Set when inode is about to be freed but still has dirty 1648 * I_FREEING Set when inode is about to be freed but still has dirty
1650 * pages or buffers attached or the inode itself is still 1649 * pages or buffers attached or the inode itself is still
1651 * dirty. 1650 * dirty.
1652 * I_CLEAR Added by end_writeback(). In this state the inode is clean 1651 * I_CLEAR Added by end_writeback(). In this state the inode is clean
1653 * and can be destroyed. Inode keeps I_FREEING. 1652 * and can be destroyed. Inode keeps I_FREEING.
1654 * 1653 *
1655 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are 1654 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
1656 * prohibited for many purposes. iget() must wait for 1655 * prohibited for many purposes. iget() must wait for
1657 * the inode to be completely released, then create it 1656 * the inode to be completely released, then create it
1658 * anew. Other functions will just ignore such inodes, 1657 * anew. Other functions will just ignore such inodes,
1659 * if appropriate. I_NEW is used for waiting. 1658 * if appropriate. I_NEW is used for waiting.
1660 * 1659 *
1661 * I_SYNC Synchonized write of dirty inode data. The bits is 1660 * I_SYNC Synchonized write of dirty inode data. The bits is
1662 * set during data writeback, and cleared with a wakeup 1661 * set during data writeback, and cleared with a wakeup
1663 * on the bit address once it is done. 1662 * on the bit address once it is done.
1664 * 1663 *
1665 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1664 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1666 */ 1665 */
1667 #define I_DIRTY_SYNC (1 << 0) 1666 #define I_DIRTY_SYNC (1 << 0)
1668 #define I_DIRTY_DATASYNC (1 << 1) 1667 #define I_DIRTY_DATASYNC (1 << 1)
1669 #define I_DIRTY_PAGES (1 << 2) 1668 #define I_DIRTY_PAGES (1 << 2)
1670 #define __I_NEW 3 1669 #define __I_NEW 3
1671 #define I_NEW (1 << __I_NEW) 1670 #define I_NEW (1 << __I_NEW)
1672 #define I_WILL_FREE (1 << 4) 1671 #define I_WILL_FREE (1 << 4)
1673 #define I_FREEING (1 << 5) 1672 #define I_FREEING (1 << 5)
1674 #define I_CLEAR (1 << 6) 1673 #define I_CLEAR (1 << 6)
1675 #define __I_SYNC 7 1674 #define __I_SYNC 7
1676 #define I_SYNC (1 << __I_SYNC) 1675 #define I_SYNC (1 << __I_SYNC)
1677 #define I_REFERENCED (1 << 8) 1676 #define I_REFERENCED (1 << 8)
1678 1677
1679 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1678 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1680 1679
1681 extern void __mark_inode_dirty(struct inode *, int); 1680 extern void __mark_inode_dirty(struct inode *, int);
1682 static inline void mark_inode_dirty(struct inode *inode) 1681 static inline void mark_inode_dirty(struct inode *inode)
1683 { 1682 {
1684 __mark_inode_dirty(inode, I_DIRTY); 1683 __mark_inode_dirty(inode, I_DIRTY);
1685 } 1684 }
1686 1685
1687 static inline void mark_inode_dirty_sync(struct inode *inode) 1686 static inline void mark_inode_dirty_sync(struct inode *inode)
1688 { 1687 {
1689 __mark_inode_dirty(inode, I_DIRTY_SYNC); 1688 __mark_inode_dirty(inode, I_DIRTY_SYNC);
1690 } 1689 }
1691 1690
1692 /** 1691 /**
1693 * inc_nlink - directly increment an inode's link count 1692 * inc_nlink - directly increment an inode's link count
1694 * @inode: inode 1693 * @inode: inode
1695 * 1694 *
1696 * This is a low-level filesystem helper to replace any 1695 * This is a low-level filesystem helper to replace any
1697 * direct filesystem manipulation of i_nlink. Currently, 1696 * direct filesystem manipulation of i_nlink. Currently,
1698 * it is only here for parity with dec_nlink(). 1697 * it is only here for parity with dec_nlink().
1699 */ 1698 */
1700 static inline void inc_nlink(struct inode *inode) 1699 static inline void inc_nlink(struct inode *inode)
1701 { 1700 {
1702 inode->i_nlink++; 1701 inode->i_nlink++;
1703 } 1702 }
1704 1703
1705 static inline void inode_inc_link_count(struct inode *inode) 1704 static inline void inode_inc_link_count(struct inode *inode)
1706 { 1705 {
1707 inc_nlink(inode); 1706 inc_nlink(inode);
1708 mark_inode_dirty(inode); 1707 mark_inode_dirty(inode);
1709 } 1708 }
1710 1709
1711 /** 1710 /**
1712 * drop_nlink - directly drop an inode's link count 1711 * drop_nlink - directly drop an inode's link count
1713 * @inode: inode 1712 * @inode: inode
1714 * 1713 *
1715 * This is a low-level filesystem helper to replace any 1714 * This is a low-level filesystem helper to replace any
1716 * direct filesystem manipulation of i_nlink. In cases 1715 * direct filesystem manipulation of i_nlink. In cases
1717 * where we are attempting to track writes to the 1716 * where we are attempting to track writes to the
1718 * filesystem, a decrement to zero means an imminent 1717 * filesystem, a decrement to zero means an imminent
1719 * write when the file is truncated and actually unlinked 1718 * write when the file is truncated and actually unlinked
1720 * on the filesystem. 1719 * on the filesystem.
1721 */ 1720 */
1722 static inline void drop_nlink(struct inode *inode) 1721 static inline void drop_nlink(struct inode *inode)
1723 { 1722 {
1724 inode->i_nlink--; 1723 inode->i_nlink--;
1725 } 1724 }
1726 1725
1727 /** 1726 /**
1728 * clear_nlink - directly zero an inode's link count 1727 * clear_nlink - directly zero an inode's link count
1729 * @inode: inode 1728 * @inode: inode
1730 * 1729 *
1731 * This is a low-level filesystem helper to replace any 1730 * This is a low-level filesystem helper to replace any
1732 * direct filesystem manipulation of i_nlink. See 1731 * direct filesystem manipulation of i_nlink. See
1733 * drop_nlink() for why we care about i_nlink hitting zero. 1732 * drop_nlink() for why we care about i_nlink hitting zero.
1734 */ 1733 */
1735 static inline void clear_nlink(struct inode *inode) 1734 static inline void clear_nlink(struct inode *inode)
1736 { 1735 {
1737 inode->i_nlink = 0; 1736 inode->i_nlink = 0;
1738 } 1737 }
1739 1738
1740 static inline void inode_dec_link_count(struct inode *inode) 1739 static inline void inode_dec_link_count(struct inode *inode)
1741 { 1740 {
1742 drop_nlink(inode); 1741 drop_nlink(inode);
1743 mark_inode_dirty(inode); 1742 mark_inode_dirty(inode);
1744 } 1743 }
1745 1744
1746 /** 1745 /**
1747 * inode_inc_iversion - increments i_version 1746 * inode_inc_iversion - increments i_version
1748 * @inode: inode that need to be updated 1747 * @inode: inode that need to be updated
1749 * 1748 *
1750 * Every time the inode is modified, the i_version field will be incremented. 1749 * Every time the inode is modified, the i_version field will be incremented.
1751 * The filesystem has to be mounted with i_version flag 1750 * The filesystem has to be mounted with i_version flag
1752 */ 1751 */
1753 1752
1754 static inline void inode_inc_iversion(struct inode *inode) 1753 static inline void inode_inc_iversion(struct inode *inode)
1755 { 1754 {
1756 spin_lock(&inode->i_lock); 1755 spin_lock(&inode->i_lock);
1757 inode->i_version++; 1756 inode->i_version++;
1758 spin_unlock(&inode->i_lock); 1757 spin_unlock(&inode->i_lock);
1759 } 1758 }
1760 1759
1761 extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); 1760 extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry);
1762 static inline void file_accessed(struct file *file) 1761 static inline void file_accessed(struct file *file)
1763 { 1762 {
1764 if (!(file->f_flags & O_NOATIME)) 1763 if (!(file->f_flags & O_NOATIME))
1765 touch_atime(file->f_path.mnt, file->f_path.dentry); 1764 touch_atime(file->f_path.mnt, file->f_path.dentry);
1766 } 1765 }
1767 1766
1768 int sync_inode(struct inode *inode, struct writeback_control *wbc); 1767 int sync_inode(struct inode *inode, struct writeback_control *wbc);
1769 int sync_inode_metadata(struct inode *inode, int wait); 1768 int sync_inode_metadata(struct inode *inode, int wait);
1770 1769
1771 struct file_system_type { 1770 struct file_system_type {
1772 const char *name; 1771 const char *name;
1773 int fs_flags; 1772 int fs_flags;
1774 int (*get_sb) (struct file_system_type *, int, 1773 int (*get_sb) (struct file_system_type *, int,
1775 const char *, void *, struct vfsmount *); 1774 const char *, void *, struct vfsmount *);
1776 struct dentry *(*mount) (struct file_system_type *, int, 1775 struct dentry *(*mount) (struct file_system_type *, int,
1777 const char *, void *); 1776 const char *, void *);
1778 void (*kill_sb) (struct super_block *); 1777 void (*kill_sb) (struct super_block *);
1779 struct module *owner; 1778 struct module *owner;
1780 struct file_system_type * next; 1779 struct file_system_type * next;
1781 struct list_head fs_supers; 1780 struct list_head fs_supers;
1782 1781
1783 struct lock_class_key s_lock_key; 1782 struct lock_class_key s_lock_key;
1784 struct lock_class_key s_umount_key; 1783 struct lock_class_key s_umount_key;
1785 struct lock_class_key s_vfs_rename_key; 1784 struct lock_class_key s_vfs_rename_key;
1786 1785
1787 struct lock_class_key i_lock_key; 1786 struct lock_class_key i_lock_key;
1788 struct lock_class_key i_mutex_key; 1787 struct lock_class_key i_mutex_key;
1789 struct lock_class_key i_mutex_dir_key; 1788 struct lock_class_key i_mutex_dir_key;
1790 struct lock_class_key i_alloc_sem_key; 1789 struct lock_class_key i_alloc_sem_key;
1791 }; 1790 };
1792 1791
1793 extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, 1792 extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
1794 void *data, int (*fill_super)(struct super_block *, void *, int)); 1793 void *data, int (*fill_super)(struct super_block *, void *, int));
1795 extern struct dentry *mount_bdev(struct file_system_type *fs_type, 1794 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
1796 int flags, const char *dev_name, void *data, 1795 int flags, const char *dev_name, void *data,
1797 int (*fill_super)(struct super_block *, void *, int)); 1796 int (*fill_super)(struct super_block *, void *, int));
1798 extern int get_sb_bdev(struct file_system_type *fs_type, 1797 extern int get_sb_bdev(struct file_system_type *fs_type,
1799 int flags, const char *dev_name, void *data, 1798 int flags, const char *dev_name, void *data,
1800 int (*fill_super)(struct super_block *, void *, int), 1799 int (*fill_super)(struct super_block *, void *, int),
1801 struct vfsmount *mnt); 1800 struct vfsmount *mnt);
1802 extern struct dentry *mount_single(struct file_system_type *fs_type, 1801 extern struct dentry *mount_single(struct file_system_type *fs_type,
1803 int flags, void *data, 1802 int flags, void *data,
1804 int (*fill_super)(struct super_block *, void *, int)); 1803 int (*fill_super)(struct super_block *, void *, int));
1805 extern int get_sb_single(struct file_system_type *fs_type, 1804 extern int get_sb_single(struct file_system_type *fs_type,
1806 int flags, void *data, 1805 int flags, void *data,
1807 int (*fill_super)(struct super_block *, void *, int), 1806 int (*fill_super)(struct super_block *, void *, int),
1808 struct vfsmount *mnt); 1807 struct vfsmount *mnt);
1809 extern struct dentry *mount_nodev(struct file_system_type *fs_type, 1808 extern struct dentry *mount_nodev(struct file_system_type *fs_type,
1810 int flags, void *data, 1809 int flags, void *data,
1811 int (*fill_super)(struct super_block *, void *, int)); 1810 int (*fill_super)(struct super_block *, void *, int));
1812 extern int get_sb_nodev(struct file_system_type *fs_type, 1811 extern int get_sb_nodev(struct file_system_type *fs_type,
1813 int flags, void *data, 1812 int flags, void *data,
1814 int (*fill_super)(struct super_block *, void *, int), 1813 int (*fill_super)(struct super_block *, void *, int),
1815 struct vfsmount *mnt); 1814 struct vfsmount *mnt);
1816 void generic_shutdown_super(struct super_block *sb); 1815 void generic_shutdown_super(struct super_block *sb);
1817 void kill_block_super(struct super_block *sb); 1816 void kill_block_super(struct super_block *sb);
1818 void kill_anon_super(struct super_block *sb); 1817 void kill_anon_super(struct super_block *sb);
1819 void kill_litter_super(struct super_block *sb); 1818 void kill_litter_super(struct super_block *sb);
1820 void deactivate_super(struct super_block *sb); 1819 void deactivate_super(struct super_block *sb);
1821 void deactivate_locked_super(struct super_block *sb); 1820 void deactivate_locked_super(struct super_block *sb);
1822 int set_anon_super(struct super_block *s, void *data); 1821 int set_anon_super(struct super_block *s, void *data);
1823 struct super_block *sget(struct file_system_type *type, 1822 struct super_block *sget(struct file_system_type *type,
1824 int (*test)(struct super_block *,void *), 1823 int (*test)(struct super_block *,void *),
1825 int (*set)(struct super_block *,void *), 1824 int (*set)(struct super_block *,void *),
1826 void *data); 1825 void *data);
1827 extern struct dentry *mount_pseudo(struct file_system_type *, char *, 1826 extern struct dentry *mount_pseudo(struct file_system_type *, char *,
1828 const struct super_operations *ops, unsigned long); 1827 const struct super_operations *ops, unsigned long);
1829 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); 1828 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
1830 1829
1831 static inline void sb_mark_dirty(struct super_block *sb) 1830 static inline void sb_mark_dirty(struct super_block *sb)
1832 { 1831 {
1833 sb->s_dirt = 1; 1832 sb->s_dirt = 1;
1834 } 1833 }
1835 static inline void sb_mark_clean(struct super_block *sb) 1834 static inline void sb_mark_clean(struct super_block *sb)
1836 { 1835 {
1837 sb->s_dirt = 0; 1836 sb->s_dirt = 0;
1838 } 1837 }
1839 static inline int sb_is_dirty(struct super_block *sb) 1838 static inline int sb_is_dirty(struct super_block *sb)
1840 { 1839 {
1841 return sb->s_dirt; 1840 return sb->s_dirt;
1842 } 1841 }
1843 1842
1844 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1843 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
1845 #define fops_get(fops) \ 1844 #define fops_get(fops) \
1846 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) 1845 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
1847 #define fops_put(fops) \ 1846 #define fops_put(fops) \
1848 do { if (fops) module_put((fops)->owner); } while(0) 1847 do { if (fops) module_put((fops)->owner); } while(0)
1849 1848
1850 extern int register_filesystem(struct file_system_type *); 1849 extern int register_filesystem(struct file_system_type *);
1851 extern int unregister_filesystem(struct file_system_type *); 1850 extern int unregister_filesystem(struct file_system_type *);
1852 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); 1851 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
1853 #define kern_mount(type) kern_mount_data(type, NULL) 1852 #define kern_mount(type) kern_mount_data(type, NULL)
1854 extern int may_umount_tree(struct vfsmount *); 1853 extern int may_umount_tree(struct vfsmount *);
1855 extern int may_umount(struct vfsmount *); 1854 extern int may_umount(struct vfsmount *);
1856 extern long do_mount(char *, char *, char *, unsigned long, void *); 1855 extern long do_mount(char *, char *, char *, unsigned long, void *);
1857 extern struct vfsmount *collect_mounts(struct path *); 1856 extern struct vfsmount *collect_mounts(struct path *);
1858 extern void drop_collected_mounts(struct vfsmount *); 1857 extern void drop_collected_mounts(struct vfsmount *);
1859 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1858 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1860 struct vfsmount *); 1859 struct vfsmount *);
1861 extern int vfs_statfs(struct path *, struct kstatfs *); 1860 extern int vfs_statfs(struct path *, struct kstatfs *);
1862 extern int statfs_by_dentry(struct dentry *, struct kstatfs *); 1861 extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
1863 extern int freeze_super(struct super_block *super); 1862 extern int freeze_super(struct super_block *super);
1864 extern int thaw_super(struct super_block *super); 1863 extern int thaw_super(struct super_block *super);
1865 1864
1866 extern int current_umask(void); 1865 extern int current_umask(void);
1867 1866
1868 /* /sys/fs */ 1867 /* /sys/fs */
1869 extern struct kobject *fs_kobj; 1868 extern struct kobject *fs_kobj;
1870 1869
1871 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 1870 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
1872 extern int rw_verify_area(int, struct file *, loff_t *, size_t); 1871 extern int rw_verify_area(int, struct file *, loff_t *, size_t);
1873 1872
1874 #define FLOCK_VERIFY_READ 1 1873 #define FLOCK_VERIFY_READ 1
1875 #define FLOCK_VERIFY_WRITE 2 1874 #define FLOCK_VERIFY_WRITE 2
1876 1875
1877 #ifdef CONFIG_FILE_LOCKING 1876 #ifdef CONFIG_FILE_LOCKING
1878 extern int locks_mandatory_locked(struct inode *); 1877 extern int locks_mandatory_locked(struct inode *);
1879 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 1878 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
1880 1879
1881 /* 1880 /*
1882 * Candidates for mandatory locking have the setgid bit set 1881 * Candidates for mandatory locking have the setgid bit set
1883 * but no group execute bit - an otherwise meaningless combination. 1882 * but no group execute bit - an otherwise meaningless combination.
1884 */ 1883 */
1885 1884
1886 static inline int __mandatory_lock(struct inode *ino) 1885 static inline int __mandatory_lock(struct inode *ino)
1887 { 1886 {
1888 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; 1887 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
1889 } 1888 }
1890 1889
1891 /* 1890 /*
1892 * ... and these candidates should be on MS_MANDLOCK mounted fs, 1891 * ... and these candidates should be on MS_MANDLOCK mounted fs,
1893 * otherwise these will be advisory locks 1892 * otherwise these will be advisory locks
1894 */ 1893 */
1895 1894
1896 static inline int mandatory_lock(struct inode *ino) 1895 static inline int mandatory_lock(struct inode *ino)
1897 { 1896 {
1898 return IS_MANDLOCK(ino) && __mandatory_lock(ino); 1897 return IS_MANDLOCK(ino) && __mandatory_lock(ino);
1899 } 1898 }
1900 1899
1901 static inline int locks_verify_locked(struct inode *inode) 1900 static inline int locks_verify_locked(struct inode *inode)
1902 { 1901 {
1903 if (mandatory_lock(inode)) 1902 if (mandatory_lock(inode))
1904 return locks_mandatory_locked(inode); 1903 return locks_mandatory_locked(inode);
1905 return 0; 1904 return 0;
1906 } 1905 }
1907 1906
1908 static inline int locks_verify_truncate(struct inode *inode, 1907 static inline int locks_verify_truncate(struct inode *inode,
1909 struct file *filp, 1908 struct file *filp,
1910 loff_t size) 1909 loff_t size)
1911 { 1910 {
1912 if (inode->i_flock && mandatory_lock(inode)) 1911 if (inode->i_flock && mandatory_lock(inode))
1913 return locks_mandatory_area( 1912 return locks_mandatory_area(
1914 FLOCK_VERIFY_WRITE, inode, filp, 1913 FLOCK_VERIFY_WRITE, inode, filp,
1915 size < inode->i_size ? size : inode->i_size, 1914 size < inode->i_size ? size : inode->i_size,
1916 (size < inode->i_size ? inode->i_size - size 1915 (size < inode->i_size ? inode->i_size - size
1917 : size - inode->i_size) 1916 : size - inode->i_size)
1918 ); 1917 );
1919 return 0; 1918 return 0;
1920 } 1919 }
1921 1920
1922 static inline int break_lease(struct inode *inode, unsigned int mode) 1921 static inline int break_lease(struct inode *inode, unsigned int mode)
1923 { 1922 {
1924 if (inode->i_flock) 1923 if (inode->i_flock)
1925 return __break_lease(inode, mode); 1924 return __break_lease(inode, mode);
1926 return 0; 1925 return 0;
1927 } 1926 }
1928 #else /* !CONFIG_FILE_LOCKING */ 1927 #else /* !CONFIG_FILE_LOCKING */
1929 static inline int locks_mandatory_locked(struct inode *inode) 1928 static inline int locks_mandatory_locked(struct inode *inode)
1930 { 1929 {
1931 return 0; 1930 return 0;
1932 } 1931 }
1933 1932
1934 static inline int locks_mandatory_area(int rw, struct inode *inode, 1933 static inline int locks_mandatory_area(int rw, struct inode *inode,
1935 struct file *filp, loff_t offset, 1934 struct file *filp, loff_t offset,
1936 size_t count) 1935 size_t count)
1937 { 1936 {
1938 return 0; 1937 return 0;
1939 } 1938 }
1940 1939
1941 static inline int __mandatory_lock(struct inode *inode) 1940 static inline int __mandatory_lock(struct inode *inode)
1942 { 1941 {
1943 return 0; 1942 return 0;
1944 } 1943 }
1945 1944
1946 static inline int mandatory_lock(struct inode *inode) 1945 static inline int mandatory_lock(struct inode *inode)
1947 { 1946 {
1948 return 0; 1947 return 0;
1949 } 1948 }
1950 1949
1951 static inline int locks_verify_locked(struct inode *inode) 1950 static inline int locks_verify_locked(struct inode *inode)
1952 { 1951 {
1953 return 0; 1952 return 0;
1954 } 1953 }
1955 1954
1956 static inline int locks_verify_truncate(struct inode *inode, struct file *filp, 1955 static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
1957 size_t size) 1956 size_t size)
1958 { 1957 {
1959 return 0; 1958 return 0;
1960 } 1959 }
1961 1960
1962 static inline int break_lease(struct inode *inode, unsigned int mode) 1961 static inline int break_lease(struct inode *inode, unsigned int mode)
1963 { 1962 {
1964 return 0; 1963 return 0;
1965 } 1964 }
1966 1965
1967 #endif /* CONFIG_FILE_LOCKING */ 1966 #endif /* CONFIG_FILE_LOCKING */
1968 1967
1969 /* fs/open.c */ 1968 /* fs/open.c */
1970 1969
1971 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, 1970 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
1972 struct file *filp); 1971 struct file *filp);
1973 extern int do_fallocate(struct file *file, int mode, loff_t offset, 1972 extern int do_fallocate(struct file *file, int mode, loff_t offset,
1974 loff_t len); 1973 loff_t len);
1975 extern long do_sys_open(int dfd, const char __user *filename, int flags, 1974 extern long do_sys_open(int dfd, const char __user *filename, int flags,
1976 int mode); 1975 int mode);
1977 extern struct file *filp_open(const char *, int, int); 1976 extern struct file *filp_open(const char *, int, int);
1978 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 1977 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1979 const struct cred *); 1978 const struct cred *);
1980 extern int filp_close(struct file *, fl_owner_t id); 1979 extern int filp_close(struct file *, fl_owner_t id);
1981 extern char * getname(const char __user *); 1980 extern char * getname(const char __user *);
1982 1981
1983 /* fs/ioctl.c */ 1982 /* fs/ioctl.c */
1984 1983
1985 extern int ioctl_preallocate(struct file *filp, void __user *argp); 1984 extern int ioctl_preallocate(struct file *filp, void __user *argp);
1986 1985
1987 /* fs/dcache.c */ 1986 /* fs/dcache.c */
1988 extern void __init vfs_caches_init_early(void); 1987 extern void __init vfs_caches_init_early(void);
1989 extern void __init vfs_caches_init(unsigned long); 1988 extern void __init vfs_caches_init(unsigned long);
1990 1989
1991 extern struct kmem_cache *names_cachep; 1990 extern struct kmem_cache *names_cachep;
1992 1991
1993 #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) 1992 #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp))
1994 #define __getname() __getname_gfp(GFP_KERNEL) 1993 #define __getname() __getname_gfp(GFP_KERNEL)
1995 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) 1994 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
1996 #ifndef CONFIG_AUDITSYSCALL 1995 #ifndef CONFIG_AUDITSYSCALL
1997 #define putname(name) __putname(name) 1996 #define putname(name) __putname(name)
1998 #else 1997 #else
1999 extern void putname(const char *name); 1998 extern void putname(const char *name);
2000 #endif 1999 #endif
2001 2000
2002 #ifdef CONFIG_BLOCK 2001 #ifdef CONFIG_BLOCK
2003 extern int register_blkdev(unsigned int, const char *); 2002 extern int register_blkdev(unsigned int, const char *);
2004 extern void unregister_blkdev(unsigned int, const char *); 2003 extern void unregister_blkdev(unsigned int, const char *);
2005 extern struct block_device *bdget(dev_t); 2004 extern struct block_device *bdget(dev_t);
2006 extern struct block_device *bdgrab(struct block_device *bdev); 2005 extern struct block_device *bdgrab(struct block_device *bdev);
2007 extern void bd_set_size(struct block_device *, loff_t size); 2006 extern void bd_set_size(struct block_device *, loff_t size);
2008 extern void bd_forget(struct inode *inode); 2007 extern void bd_forget(struct inode *inode);
2009 extern void bdput(struct block_device *); 2008 extern void bdput(struct block_device *);
2010 extern struct block_device *open_by_devnum(dev_t, fmode_t); 2009 extern struct block_device *open_by_devnum(dev_t, fmode_t);
2011 extern void invalidate_bdev(struct block_device *); 2010 extern void invalidate_bdev(struct block_device *);
2012 extern int sync_blockdev(struct block_device *bdev); 2011 extern int sync_blockdev(struct block_device *bdev);
2013 extern struct super_block *freeze_bdev(struct block_device *); 2012 extern struct super_block *freeze_bdev(struct block_device *);
2014 extern void emergency_thaw_all(void); 2013 extern void emergency_thaw_all(void);
2015 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); 2014 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
2016 extern int fsync_bdev(struct block_device *); 2015 extern int fsync_bdev(struct block_device *);
2017 #else 2016 #else
2018 static inline void bd_forget(struct inode *inode) {} 2017 static inline void bd_forget(struct inode *inode) {}
2019 static inline int sync_blockdev(struct block_device *bdev) { return 0; } 2018 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
2020 static inline void invalidate_bdev(struct block_device *bdev) {} 2019 static inline void invalidate_bdev(struct block_device *bdev) {}
2021 2020
2022 static inline struct super_block *freeze_bdev(struct block_device *sb) 2021 static inline struct super_block *freeze_bdev(struct block_device *sb)
2023 { 2022 {
2024 return NULL; 2023 return NULL;
2025 } 2024 }
2026 2025
2027 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) 2026 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
2028 { 2027 {
2029 return 0; 2028 return 0;
2030 } 2029 }
2031 #endif 2030 #endif
2032 extern int sync_filesystem(struct super_block *); 2031 extern int sync_filesystem(struct super_block *);
2033 extern const struct file_operations def_blk_fops; 2032 extern const struct file_operations def_blk_fops;
2034 extern const struct file_operations def_chr_fops; 2033 extern const struct file_operations def_chr_fops;
2035 extern const struct file_operations bad_sock_fops; 2034 extern const struct file_operations bad_sock_fops;
2036 extern const struct file_operations def_fifo_fops; 2035 extern const struct file_operations def_fifo_fops;
2037 #ifdef CONFIG_BLOCK 2036 #ifdef CONFIG_BLOCK
2038 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 2037 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
2039 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 2038 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
2040 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 2039 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
2041 extern int blkdev_get(struct block_device *, fmode_t); 2040 extern int blkdev_get(struct block_device *, fmode_t);
2042 extern int blkdev_put(struct block_device *, fmode_t); 2041 extern int blkdev_put(struct block_device *, fmode_t);
2043 extern int bd_claim(struct block_device *, void *); 2042 extern int bd_claim(struct block_device *, void *);
2044 extern void bd_release(struct block_device *); 2043 extern void bd_release(struct block_device *);
2045 #ifdef CONFIG_SYSFS 2044 #ifdef CONFIG_SYSFS
2046 extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); 2045 extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
2047 extern void bd_release_from_disk(struct block_device *, struct gendisk *); 2046 extern void bd_release_from_disk(struct block_device *, struct gendisk *);
2048 #else 2047 #else
2049 #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) 2048 #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
2050 #define bd_release_from_disk(bdev, disk) bd_release(bdev) 2049 #define bd_release_from_disk(bdev, disk) bd_release(bdev)
2051 #endif 2050 #endif
2052 #endif 2051 #endif
2053 2052
2054 /* fs/char_dev.c */ 2053 /* fs/char_dev.c */
2055 #define CHRDEV_MAJOR_HASH_SIZE 255 2054 #define CHRDEV_MAJOR_HASH_SIZE 255
2056 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); 2055 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
2057 extern int register_chrdev_region(dev_t, unsigned, const char *); 2056 extern int register_chrdev_region(dev_t, unsigned, const char *);
2058 extern int __register_chrdev(unsigned int major, unsigned int baseminor, 2057 extern int __register_chrdev(unsigned int major, unsigned int baseminor,
2059 unsigned int count, const char *name, 2058 unsigned int count, const char *name,
2060 const struct file_operations *fops); 2059 const struct file_operations *fops);
2061 extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, 2060 extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2062 unsigned int count, const char *name); 2061 unsigned int count, const char *name);
2063 extern void unregister_chrdev_region(dev_t, unsigned); 2062 extern void unregister_chrdev_region(dev_t, unsigned);
2064 extern void chrdev_show(struct seq_file *,off_t); 2063 extern void chrdev_show(struct seq_file *,off_t);
2065 2064
2066 static inline int register_chrdev(unsigned int major, const char *name, 2065 static inline int register_chrdev(unsigned int major, const char *name,
2067 const struct file_operations *fops) 2066 const struct file_operations *fops)
2068 { 2067 {
2069 return __register_chrdev(major, 0, 256, name, fops); 2068 return __register_chrdev(major, 0, 256, name, fops);
2070 } 2069 }
2071 2070
2072 static inline void unregister_chrdev(unsigned int major, const char *name) 2071 static inline void unregister_chrdev(unsigned int major, const char *name)
2073 { 2072 {
2074 __unregister_chrdev(major, 0, 256, name); 2073 __unregister_chrdev(major, 0, 256, name);
2075 } 2074 }
2076 2075
2077 /* fs/block_dev.c */ 2076 /* fs/block_dev.c */
2078 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 2077 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
2079 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 2078 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
2080 2079
2081 #ifdef CONFIG_BLOCK 2080 #ifdef CONFIG_BLOCK
2082 #define BLKDEV_MAJOR_HASH_SIZE 255 2081 #define BLKDEV_MAJOR_HASH_SIZE 255
2083 extern const char *__bdevname(dev_t, char *buffer); 2082 extern const char *__bdevname(dev_t, char *buffer);
2084 extern const char *bdevname(struct block_device *bdev, char *buffer); 2083 extern const char *bdevname(struct block_device *bdev, char *buffer);
2085 extern struct block_device *lookup_bdev(const char *); 2084 extern struct block_device *lookup_bdev(const char *);
2086 extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); 2085 extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
2087 extern void close_bdev_exclusive(struct block_device *, fmode_t); 2086 extern void close_bdev_exclusive(struct block_device *, fmode_t);
2088 extern void blkdev_show(struct seq_file *,off_t); 2087 extern void blkdev_show(struct seq_file *,off_t);
2089 2088
2090 #else 2089 #else
2091 #define BLKDEV_MAJOR_HASH_SIZE 0 2090 #define BLKDEV_MAJOR_HASH_SIZE 0
2092 #endif 2091 #endif
2093 2092
2094 extern void init_special_inode(struct inode *, umode_t, dev_t); 2093 extern void init_special_inode(struct inode *, umode_t, dev_t);
2095 2094
2096 /* Invalid inode operations -- fs/bad_inode.c */ 2095 /* Invalid inode operations -- fs/bad_inode.c */
2097 extern void make_bad_inode(struct inode *); 2096 extern void make_bad_inode(struct inode *);
2098 extern int is_bad_inode(struct inode *); 2097 extern int is_bad_inode(struct inode *);
2099 2098
2100 extern const struct file_operations read_pipefifo_fops; 2099 extern const struct file_operations read_pipefifo_fops;
2101 extern const struct file_operations write_pipefifo_fops; 2100 extern const struct file_operations write_pipefifo_fops;
2102 extern const struct file_operations rdwr_pipefifo_fops; 2101 extern const struct file_operations rdwr_pipefifo_fops;
2103 2102
2104 extern int fs_may_remount_ro(struct super_block *); 2103 extern int fs_may_remount_ro(struct super_block *);
2105 2104
2106 #ifdef CONFIG_BLOCK 2105 #ifdef CONFIG_BLOCK
2107 /* 2106 /*
2108 * return READ, READA, or WRITE 2107 * return READ, READA, or WRITE
2109 */ 2108 */
2110 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) 2109 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK))
2111 2110
2112 /* 2111 /*
2113 * return data direction, READ or WRITE 2112 * return data direction, READ or WRITE
2114 */ 2113 */
2115 #define bio_data_dir(bio) ((bio)->bi_rw & 1) 2114 #define bio_data_dir(bio) ((bio)->bi_rw & 1)
2116 2115
2117 extern void check_disk_size_change(struct gendisk *disk, 2116 extern void check_disk_size_change(struct gendisk *disk,
2118 struct block_device *bdev); 2117 struct block_device *bdev);
2119 extern int revalidate_disk(struct gendisk *); 2118 extern int revalidate_disk(struct gendisk *);
2120 extern int check_disk_change(struct block_device *); 2119 extern int check_disk_change(struct block_device *);
2121 extern int __invalidate_device(struct block_device *); 2120 extern int __invalidate_device(struct block_device *);
2122 extern int invalidate_partition(struct gendisk *, int); 2121 extern int invalidate_partition(struct gendisk *, int);
2123 #endif 2122 #endif
2124 unsigned long invalidate_mapping_pages(struct address_space *mapping, 2123 unsigned long invalidate_mapping_pages(struct address_space *mapping,
2125 pgoff_t start, pgoff_t end); 2124 pgoff_t start, pgoff_t end);
2126 2125
2127 static inline void invalidate_remote_inode(struct inode *inode) 2126 static inline void invalidate_remote_inode(struct inode *inode)
2128 { 2127 {
2129 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2128 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2130 S_ISLNK(inode->i_mode)) 2129 S_ISLNK(inode->i_mode))
2131 invalidate_mapping_pages(inode->i_mapping, 0, -1); 2130 invalidate_mapping_pages(inode->i_mapping, 0, -1);
2132 } 2131 }
2133 extern int invalidate_inode_pages2(struct address_space *mapping); 2132 extern int invalidate_inode_pages2(struct address_space *mapping);
2134 extern int invalidate_inode_pages2_range(struct address_space *mapping, 2133 extern int invalidate_inode_pages2_range(struct address_space *mapping,
2135 pgoff_t start, pgoff_t end); 2134 pgoff_t start, pgoff_t end);
2136 extern int write_inode_now(struct inode *, int); 2135 extern int write_inode_now(struct inode *, int);
2137 extern int filemap_fdatawrite(struct address_space *); 2136 extern int filemap_fdatawrite(struct address_space *);
2138 extern int filemap_flush(struct address_space *); 2137 extern int filemap_flush(struct address_space *);
2139 extern int filemap_fdatawait(struct address_space *); 2138 extern int filemap_fdatawait(struct address_space *);
2140 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, 2139 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2141 loff_t lend); 2140 loff_t lend);
2142 extern int filemap_write_and_wait(struct address_space *mapping); 2141 extern int filemap_write_and_wait(struct address_space *mapping);
2143 extern int filemap_write_and_wait_range(struct address_space *mapping, 2142 extern int filemap_write_and_wait_range(struct address_space *mapping,
2144 loff_t lstart, loff_t lend); 2143 loff_t lstart, loff_t lend);
2145 extern int __filemap_fdatawrite_range(struct address_space *mapping, 2144 extern int __filemap_fdatawrite_range(struct address_space *mapping,
2146 loff_t start, loff_t end, int sync_mode); 2145 loff_t start, loff_t end, int sync_mode);
2147 extern int filemap_fdatawrite_range(struct address_space *mapping, 2146 extern int filemap_fdatawrite_range(struct address_space *mapping,
2148 loff_t start, loff_t end); 2147 loff_t start, loff_t end);
2149 2148
2150 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, 2149 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2151 int datasync); 2150 int datasync);
2152 extern int vfs_fsync(struct file *file, int datasync); 2151 extern int vfs_fsync(struct file *file, int datasync);
2153 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); 2152 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
2154 extern void sync_supers(void); 2153 extern void sync_supers(void);
2155 extern void emergency_sync(void); 2154 extern void emergency_sync(void);
2156 extern void emergency_remount(void); 2155 extern void emergency_remount(void);
2157 #ifdef CONFIG_BLOCK 2156 #ifdef CONFIG_BLOCK
2158 extern sector_t bmap(struct inode *, sector_t); 2157 extern sector_t bmap(struct inode *, sector_t);
2159 #endif 2158 #endif
2160 extern int notify_change(struct dentry *, struct iattr *); 2159 extern int notify_change(struct dentry *, struct iattr *);
2161 extern int inode_permission(struct inode *, int); 2160 extern int inode_permission(struct inode *, int);
2162 extern int generic_permission(struct inode *, int, 2161 extern int generic_permission(struct inode *, int,
2163 int (*check_acl)(struct inode *, int)); 2162 int (*check_acl)(struct inode *, int));
2164 2163
2165 static inline bool execute_ok(struct inode *inode) 2164 static inline bool execute_ok(struct inode *inode)
2166 { 2165 {
2167 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); 2166 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2168 } 2167 }
2169 2168
2170 extern int get_write_access(struct inode *); 2169 extern int get_write_access(struct inode *);
2171 extern int deny_write_access(struct file *); 2170 extern int deny_write_access(struct file *);
2172 static inline void put_write_access(struct inode * inode) 2171 static inline void put_write_access(struct inode * inode)
2173 { 2172 {
2174 atomic_dec(&inode->i_writecount); 2173 atomic_dec(&inode->i_writecount);
2175 } 2174 }
2176 static inline void allow_write_access(struct file *file) 2175 static inline void allow_write_access(struct file *file)
2177 { 2176 {
2178 if (file) 2177 if (file)
2179 atomic_inc(&file->f_path.dentry->d_inode->i_writecount); 2178 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
2180 } 2179 }
2181 extern int do_pipe_flags(int *, int); 2180 extern int do_pipe_flags(int *, int);
2182 extern struct file *create_read_pipe(struct file *f, int flags); 2181 extern struct file *create_read_pipe(struct file *f, int flags);
2183 extern struct file *create_write_pipe(int flags); 2182 extern struct file *create_write_pipe(int flags);
2184 extern void free_write_pipe(struct file *); 2183 extern void free_write_pipe(struct file *);
2185 2184
2186 extern struct file *do_filp_open(int dfd, const char *pathname, 2185 extern struct file *do_filp_open(int dfd, const char *pathname,
2187 int open_flag, int mode, int acc_mode); 2186 int open_flag, int mode, int acc_mode);
2188 extern int may_open(struct path *, int, int); 2187 extern int may_open(struct path *, int, int);
2189 2188
2190 extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2189 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2191 extern struct file * open_exec(const char *); 2190 extern struct file * open_exec(const char *);
2192 2191
2193 /* fs/dcache.c -- generic fs support functions */ 2192 /* fs/dcache.c -- generic fs support functions */
2194 extern int is_subdir(struct dentry *, struct dentry *); 2193 extern int is_subdir(struct dentry *, struct dentry *);
2195 extern int path_is_under(struct path *, struct path *); 2194 extern int path_is_under(struct path *, struct path *);
2196 extern ino_t find_inode_number(struct dentry *, struct qstr *); 2195 extern ino_t find_inode_number(struct dentry *, struct qstr *);
2197 2196
2198 #include <linux/err.h> 2197 #include <linux/err.h>
2199 2198
2200 /* needed for stackable file system support */ 2199 /* needed for stackable file system support */
2201 extern loff_t default_llseek(struct file *file, loff_t offset, int origin); 2200 extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
2202 2201
2203 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); 2202 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
2204 2203
2205 extern int inode_init_always(struct super_block *, struct inode *); 2204 extern int inode_init_always(struct super_block *, struct inode *);
2206 extern void inode_init_once(struct inode *); 2205 extern void inode_init_once(struct inode *);
2207 extern void ihold(struct inode * inode); 2206 extern void ihold(struct inode * inode);
2208 extern void iput(struct inode *); 2207 extern void iput(struct inode *);
2209 extern struct inode * igrab(struct inode *); 2208 extern struct inode * igrab(struct inode *);
2210 extern ino_t iunique(struct super_block *, ino_t); 2209 extern ino_t iunique(struct super_block *, ino_t);
2211 extern int inode_needs_sync(struct inode *inode); 2210 extern int inode_needs_sync(struct inode *inode);
2212 extern int generic_delete_inode(struct inode *inode); 2211 extern int generic_delete_inode(struct inode *inode);
2213 extern int generic_drop_inode(struct inode *inode); 2212 extern int generic_drop_inode(struct inode *inode);
2214 2213
2215 extern struct inode *ilookup5_nowait(struct super_block *sb, 2214 extern struct inode *ilookup5_nowait(struct super_block *sb,
2216 unsigned long hashval, int (*test)(struct inode *, void *), 2215 unsigned long hashval, int (*test)(struct inode *, void *),
2217 void *data); 2216 void *data);
2218 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 2217 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
2219 int (*test)(struct inode *, void *), void *data); 2218 int (*test)(struct inode *, void *), void *data);
2220 extern struct inode *ilookup(struct super_block *sb, unsigned long ino); 2219 extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
2221 2220
2222 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); 2221 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
2223 extern struct inode * iget_locked(struct super_block *, unsigned long); 2222 extern struct inode * iget_locked(struct super_block *, unsigned long);
2224 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); 2223 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
2225 extern int insert_inode_locked(struct inode *); 2224 extern int insert_inode_locked(struct inode *);
2226 extern void unlock_new_inode(struct inode *); 2225 extern void unlock_new_inode(struct inode *);
2227 extern unsigned int get_next_ino(void); 2226 extern unsigned int get_next_ino(void);
2228 2227
2229 extern void __iget(struct inode * inode); 2228 extern void __iget(struct inode * inode);
2230 extern void iget_failed(struct inode *); 2229 extern void iget_failed(struct inode *);
2231 extern void end_writeback(struct inode *); 2230 extern void end_writeback(struct inode *);
2232 extern void __destroy_inode(struct inode *); 2231 extern void __destroy_inode(struct inode *);
2233 extern struct inode *new_inode(struct super_block *); 2232 extern struct inode *new_inode(struct super_block *);
2234 extern int should_remove_suid(struct dentry *); 2233 extern int should_remove_suid(struct dentry *);
2235 extern int file_remove_suid(struct file *); 2234 extern int file_remove_suid(struct file *);
2236 2235
2237 extern void __insert_inode_hash(struct inode *, unsigned long hashval); 2236 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
2238 extern void remove_inode_hash(struct inode *); 2237 extern void remove_inode_hash(struct inode *);
2239 static inline void insert_inode_hash(struct inode *inode) 2238 static inline void insert_inode_hash(struct inode *inode)
2240 { 2239 {
2241 __insert_inode_hash(inode, inode->i_ino); 2240 __insert_inode_hash(inode, inode->i_ino);
2242 } 2241 }
2243 extern void inode_sb_list_add(struct inode *inode); 2242 extern void inode_sb_list_add(struct inode *inode);
2244 2243
2245 #ifdef CONFIG_BLOCK 2244 #ifdef CONFIG_BLOCK
2246 extern void submit_bio(int, struct bio *); 2245 extern void submit_bio(int, struct bio *);
2247 extern int bdev_read_only(struct block_device *); 2246 extern int bdev_read_only(struct block_device *);
2248 #endif 2247 #endif
2249 extern int set_blocksize(struct block_device *, int); 2248 extern int set_blocksize(struct block_device *, int);
2250 extern int sb_set_blocksize(struct super_block *, int); 2249 extern int sb_set_blocksize(struct super_block *, int);
2251 extern int sb_min_blocksize(struct super_block *, int); 2250 extern int sb_min_blocksize(struct super_block *, int);
2252 2251
2253 extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2252 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2254 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2253 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2255 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 2254 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
2256 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2255 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2257 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2256 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2258 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, 2257 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
2259 loff_t *); 2258 loff_t *);
2260 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2259 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2261 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2260 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
2262 unsigned long *, loff_t, loff_t *, size_t, size_t); 2261 unsigned long *, loff_t, loff_t *, size_t, size_t);
2263 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, 2262 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
2264 unsigned long, loff_t, loff_t *, size_t, ssize_t); 2263 unsigned long, loff_t, loff_t *, size_t, ssize_t);
2265 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2264 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
2266 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2265 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
2267 extern int generic_segment_checks(const struct iovec *iov, 2266 extern int generic_segment_checks(const struct iovec *iov,
2268 unsigned long *nr_segs, size_t *count, int access_flags); 2267 unsigned long *nr_segs, size_t *count, int access_flags);
2269 2268
2270 /* fs/block_dev.c */ 2269 /* fs/block_dev.c */
2271 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 2270 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
2272 unsigned long nr_segs, loff_t pos); 2271 unsigned long nr_segs, loff_t pos);
2273 extern int blkdev_fsync(struct file *filp, int datasync); 2272 extern int blkdev_fsync(struct file *filp, int datasync);
2274 2273
2275 /* fs/splice.c */ 2274 /* fs/splice.c */
2276 extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2275 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
2277 struct pipe_inode_info *, size_t, unsigned int); 2276 struct pipe_inode_info *, size_t, unsigned int);
2278 extern ssize_t default_file_splice_read(struct file *, loff_t *, 2277 extern ssize_t default_file_splice_read(struct file *, loff_t *,
2279 struct pipe_inode_info *, size_t, unsigned int); 2278 struct pipe_inode_info *, size_t, unsigned int);
2280 extern ssize_t generic_file_splice_write(struct pipe_inode_info *, 2279 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
2281 struct file *, loff_t *, size_t, unsigned int); 2280 struct file *, loff_t *, size_t, unsigned int);
2282 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 2281 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
2283 struct file *out, loff_t *, size_t len, unsigned int flags); 2282 struct file *out, loff_t *, size_t len, unsigned int flags);
2284 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 2283 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
2285 size_t len, unsigned int flags); 2284 size_t len, unsigned int flags);
2286 2285
2287 extern void 2286 extern void
2288 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 2287 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
2289 extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); 2288 extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
2290 extern loff_t no_llseek(struct file *file, loff_t offset, int origin); 2289 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
2291 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); 2290 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
2292 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, 2291 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
2293 int origin); 2292 int origin);
2294 extern int generic_file_open(struct inode * inode, struct file * filp); 2293 extern int generic_file_open(struct inode * inode, struct file * filp);
2295 extern int nonseekable_open(struct inode * inode, struct file * filp); 2294 extern int nonseekable_open(struct inode * inode, struct file * filp);
2296 2295
2297 #ifdef CONFIG_FS_XIP 2296 #ifdef CONFIG_FS_XIP
2298 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, 2297 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
2299 loff_t *ppos); 2298 loff_t *ppos);
2300 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 2299 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
2301 extern ssize_t xip_file_write(struct file *filp, const char __user *buf, 2300 extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
2302 size_t len, loff_t *ppos); 2301 size_t len, loff_t *ppos);
2303 extern int xip_truncate_page(struct address_space *mapping, loff_t from); 2302 extern int xip_truncate_page(struct address_space *mapping, loff_t from);
2304 #else 2303 #else
2305 static inline int xip_truncate_page(struct address_space *mapping, loff_t from) 2304 static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
2306 { 2305 {
2307 return 0; 2306 return 0;
2308 } 2307 }
2309 #endif 2308 #endif
2310 2309
2311 #ifdef CONFIG_BLOCK 2310 #ifdef CONFIG_BLOCK
2312 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, 2311 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
2313 loff_t file_offset); 2312 loff_t file_offset);
2314 2313
2315 enum { 2314 enum {
2316 /* need locking between buffered and direct access */ 2315 /* need locking between buffered and direct access */
2317 DIO_LOCKING = 0x01, 2316 DIO_LOCKING = 0x01,
2318 2317
2319 /* filesystem does not support filling holes */ 2318 /* filesystem does not support filling holes */
2320 DIO_SKIP_HOLES = 0x02, 2319 DIO_SKIP_HOLES = 0x02,
2321 }; 2320 };
2322 2321
2323 void dio_end_io(struct bio *bio, int error); 2322 void dio_end_io(struct bio *bio, int error);
2324 2323
2325 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2324 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2326 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2325 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2327 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 2326 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
2328 dio_submit_t submit_io, int flags); 2327 dio_submit_t submit_io, int flags);
2329 2328
2330 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, 2329 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2331 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2330 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2332 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2331 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2333 dio_iodone_t end_io) 2332 dio_iodone_t end_io)
2334 { 2333 {
2335 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2334 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2336 nr_segs, get_block, end_io, NULL, 2335 nr_segs, get_block, end_io, NULL,
2337 DIO_LOCKING | DIO_SKIP_HOLES); 2336 DIO_LOCKING | DIO_SKIP_HOLES);
2338 } 2337 }
2339 #endif 2338 #endif
2340 2339
2341 extern const struct file_operations generic_ro_fops; 2340 extern const struct file_operations generic_ro_fops;
2342 2341
2343 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2342 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
2344 2343
2345 extern int vfs_readlink(struct dentry *, char __user *, int, const char *); 2344 extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
2346 extern int vfs_follow_link(struct nameidata *, const char *); 2345 extern int vfs_follow_link(struct nameidata *, const char *);
2347 extern int page_readlink(struct dentry *, char __user *, int); 2346 extern int page_readlink(struct dentry *, char __user *, int);
2348 extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2347 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
2349 extern void page_put_link(struct dentry *, struct nameidata *, void *); 2348 extern void page_put_link(struct dentry *, struct nameidata *, void *);
2350 extern int __page_symlink(struct inode *inode, const char *symname, int len, 2349 extern int __page_symlink(struct inode *inode, const char *symname, int len,
2351 int nofs); 2350 int nofs);
2352 extern int page_symlink(struct inode *inode, const char *symname, int len); 2351 extern int page_symlink(struct inode *inode, const char *symname, int len);
2353 extern const struct inode_operations page_symlink_inode_operations; 2352 extern const struct inode_operations page_symlink_inode_operations;
2354 extern int generic_readlink(struct dentry *, char __user *, int); 2353 extern int generic_readlink(struct dentry *, char __user *, int);
2355 extern void generic_fillattr(struct inode *, struct kstat *); 2354 extern void generic_fillattr(struct inode *, struct kstat *);
2356 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2355 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2357 void __inode_add_bytes(struct inode *inode, loff_t bytes); 2356 void __inode_add_bytes(struct inode *inode, loff_t bytes);
2358 void inode_add_bytes(struct inode *inode, loff_t bytes); 2357 void inode_add_bytes(struct inode *inode, loff_t bytes);
2359 void inode_sub_bytes(struct inode *inode, loff_t bytes); 2358 void inode_sub_bytes(struct inode *inode, loff_t bytes);
2360 loff_t inode_get_bytes(struct inode *inode); 2359 loff_t inode_get_bytes(struct inode *inode);
2361 void inode_set_bytes(struct inode *inode, loff_t bytes); 2360 void inode_set_bytes(struct inode *inode, loff_t bytes);
2362 2361
2363 extern int vfs_readdir(struct file *, filldir_t, void *); 2362 extern int vfs_readdir(struct file *, filldir_t, void *);
2364 2363
2365 extern int vfs_stat(const char __user *, struct kstat *); 2364 extern int vfs_stat(const char __user *, struct kstat *);
2366 extern int vfs_lstat(const char __user *, struct kstat *); 2365 extern int vfs_lstat(const char __user *, struct kstat *);
2367 extern int vfs_fstat(unsigned int, struct kstat *); 2366 extern int vfs_fstat(unsigned int, struct kstat *);
2368 extern int vfs_fstatat(int , const char __user *, struct kstat *, int); 2367 extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
2369 2368
2370 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, 2369 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
2371 unsigned long arg); 2370 unsigned long arg);
2372 extern int __generic_block_fiemap(struct inode *inode, 2371 extern int __generic_block_fiemap(struct inode *inode,
2373 struct fiemap_extent_info *fieinfo, 2372 struct fiemap_extent_info *fieinfo,
2374 loff_t start, loff_t len, 2373 loff_t start, loff_t len,
2375 get_block_t *get_block); 2374 get_block_t *get_block);
2376 extern int generic_block_fiemap(struct inode *inode, 2375 extern int generic_block_fiemap(struct inode *inode,
2377 struct fiemap_extent_info *fieinfo, u64 start, 2376 struct fiemap_extent_info *fieinfo, u64 start,
2378 u64 len, get_block_t *get_block); 2377 u64 len, get_block_t *get_block);
2379 2378
2380 extern void get_filesystem(struct file_system_type *fs); 2379 extern void get_filesystem(struct file_system_type *fs);
2381 extern void put_filesystem(struct file_system_type *fs); 2380 extern void put_filesystem(struct file_system_type *fs);
2382 extern struct file_system_type *get_fs_type(const char *name); 2381 extern struct file_system_type *get_fs_type(const char *name);
2383 extern struct super_block *get_super(struct block_device *); 2382 extern struct super_block *get_super(struct block_device *);
2384 extern struct super_block *get_active_super(struct block_device *bdev); 2383 extern struct super_block *get_active_super(struct block_device *bdev);
2385 extern struct super_block *user_get_super(dev_t); 2384 extern struct super_block *user_get_super(dev_t);
2386 extern void drop_super(struct super_block *sb); 2385 extern void drop_super(struct super_block *sb);
2387 extern void iterate_supers(void (*)(struct super_block *, void *), void *); 2386 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
2388 2387
2389 extern int dcache_dir_open(struct inode *, struct file *); 2388 extern int dcache_dir_open(struct inode *, struct file *);
2390 extern int dcache_dir_close(struct inode *, struct file *); 2389 extern int dcache_dir_close(struct inode *, struct file *);
2391 extern loff_t dcache_dir_lseek(struct file *, loff_t, int); 2390 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
2392 extern int dcache_readdir(struct file *, void *, filldir_t); 2391 extern int dcache_readdir(struct file *, void *, filldir_t);
2393 extern int simple_setattr(struct dentry *, struct iattr *); 2392 extern int simple_setattr(struct dentry *, struct iattr *);
2394 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2393 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2395 extern int simple_statfs(struct dentry *, struct kstatfs *); 2394 extern int simple_statfs(struct dentry *, struct kstatfs *);
2396 extern int simple_link(struct dentry *, struct inode *, struct dentry *); 2395 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
2397 extern int simple_unlink(struct inode *, struct dentry *); 2396 extern int simple_unlink(struct inode *, struct dentry *);
2398 extern int simple_rmdir(struct inode *, struct dentry *); 2397 extern int simple_rmdir(struct inode *, struct dentry *);
2399 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 2398 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2400 extern int noop_fsync(struct file *, int); 2399 extern int noop_fsync(struct file *, int);
2401 extern int simple_empty(struct dentry *); 2400 extern int simple_empty(struct dentry *);
2402 extern int simple_readpage(struct file *file, struct page *page); 2401 extern int simple_readpage(struct file *file, struct page *page);
2403 extern int simple_write_begin(struct file *file, struct address_space *mapping, 2402 extern int simple_write_begin(struct file *file, struct address_space *mapping,
2404 loff_t pos, unsigned len, unsigned flags, 2403 loff_t pos, unsigned len, unsigned flags,
2405 struct page **pagep, void **fsdata); 2404 struct page **pagep, void **fsdata);
2406 extern int simple_write_end(struct file *file, struct address_space *mapping, 2405 extern int simple_write_end(struct file *file, struct address_space *mapping,
2407 loff_t pos, unsigned len, unsigned copied, 2406 loff_t pos, unsigned len, unsigned copied,
2408 struct page *page, void *fsdata); 2407 struct page *page, void *fsdata);
2409 2408
2410 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); 2409 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
2411 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); 2410 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
2412 extern const struct file_operations simple_dir_operations; 2411 extern const struct file_operations simple_dir_operations;
2413 extern const struct inode_operations simple_dir_inode_operations; 2412 extern const struct inode_operations simple_dir_inode_operations;
2414 struct tree_descr { char *name; const struct file_operations *ops; int mode; }; 2413 struct tree_descr { char *name; const struct file_operations *ops; int mode; };
2415 struct dentry *d_alloc_name(struct dentry *, const char *); 2414 struct dentry *d_alloc_name(struct dentry *, const char *);
2416 extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); 2415 extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
2417 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); 2416 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
2418 extern void simple_release_fs(struct vfsmount **mount, int *count); 2417 extern void simple_release_fs(struct vfsmount **mount, int *count);
2419 2418
2420 extern ssize_t simple_read_from_buffer(void __user *to, size_t count, 2419 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
2421 loff_t *ppos, const void *from, size_t available); 2420 loff_t *ppos, const void *from, size_t available);
2422 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 2421 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
2423 const void __user *from, size_t count); 2422 const void __user *from, size_t count);
2424 2423
2425 extern int generic_file_fsync(struct file *, int); 2424 extern int generic_file_fsync(struct file *, int);
2426 2425
2427 extern int generic_check_addressable(unsigned, u64); 2426 extern int generic_check_addressable(unsigned, u64);
2428 2427
2429 #ifdef CONFIG_MIGRATION 2428 #ifdef CONFIG_MIGRATION
2430 extern int buffer_migrate_page(struct address_space *, 2429 extern int buffer_migrate_page(struct address_space *,
2431 struct page *, struct page *); 2430 struct page *, struct page *);
2432 #else 2431 #else
2433 #define buffer_migrate_page NULL 2432 #define buffer_migrate_page NULL
2434 #endif 2433 #endif
2435 2434
2436 extern int inode_change_ok(const struct inode *, struct iattr *); 2435 extern int inode_change_ok(const struct inode *, struct iattr *);
2437 extern int inode_newsize_ok(const struct inode *, loff_t offset); 2436 extern int inode_newsize_ok(const struct inode *, loff_t offset);
2438 extern void setattr_copy(struct inode *inode, const struct iattr *attr); 2437 extern void setattr_copy(struct inode *inode, const struct iattr *attr);
2439 2438
2440 extern void file_update_time(struct file *file); 2439 extern void file_update_time(struct file *file);
2441 2440
2442 extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); 2441 extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
2443 extern void save_mount_options(struct super_block *sb, char *options); 2442 extern void save_mount_options(struct super_block *sb, char *options);
2444 extern void replace_mount_options(struct super_block *sb, char *options); 2443 extern void replace_mount_options(struct super_block *sb, char *options);
2445 2444
2446 static inline ino_t parent_ino(struct dentry *dentry) 2445 static inline ino_t parent_ino(struct dentry *dentry)
2447 { 2446 {
2448 ino_t res; 2447 ino_t res;
2449 2448
2450 spin_lock(&dentry->d_lock); 2449 spin_lock(&dentry->d_lock);
2451 res = dentry->d_parent->d_inode->i_ino; 2450 res = dentry->d_parent->d_inode->i_ino;
2452 spin_unlock(&dentry->d_lock); 2451 spin_unlock(&dentry->d_lock);
2453 return res; 2452 return res;
2454 } 2453 }
2455 2454
2456 /* Transaction based IO helpers */ 2455 /* Transaction based IO helpers */
2457 2456
2458 /* 2457 /*
2459 * An argresp is stored in an allocated page and holds the 2458 * An argresp is stored in an allocated page and holds the
2460 * size of the argument or response, along with its content 2459 * size of the argument or response, along with its content
2461 */ 2460 */
2462 struct simple_transaction_argresp { 2461 struct simple_transaction_argresp {
2463 ssize_t size; 2462 ssize_t size;
2464 char data[0]; 2463 char data[0];
2465 }; 2464 };
2466 2465
2467 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) 2466 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
2468 2467
2469 char *simple_transaction_get(struct file *file, const char __user *buf, 2468 char *simple_transaction_get(struct file *file, const char __user *buf,
2470 size_t size); 2469 size_t size);
2471 ssize_t simple_transaction_read(struct file *file, char __user *buf, 2470 ssize_t simple_transaction_read(struct file *file, char __user *buf,
2472 size_t size, loff_t *pos); 2471 size_t size, loff_t *pos);
2473 int simple_transaction_release(struct inode *inode, struct file *file); 2472 int simple_transaction_release(struct inode *inode, struct file *file);
2474 2473
2475 void simple_transaction_set(struct file *file, size_t n); 2474 void simple_transaction_set(struct file *file, size_t n);
2476 2475
2477 /* 2476 /*
2478 * simple attribute files 2477 * simple attribute files
2479 * 2478 *
2480 * These attributes behave similar to those in sysfs: 2479 * These attributes behave similar to those in sysfs:
2481 * 2480 *
2482 * Writing to an attribute immediately sets a value, an open file can be 2481 * Writing to an attribute immediately sets a value, an open file can be
2483 * written to multiple times. 2482 * written to multiple times.
2484 * 2483 *
2485 * Reading from an attribute creates a buffer from the value that might get 2484 * Reading from an attribute creates a buffer from the value that might get
2486 * read with multiple read calls. When the attribute has been read 2485 * read with multiple read calls. When the attribute has been read
2487 * completely, no further read calls are possible until the file is opened 2486 * completely, no further read calls are possible until the file is opened
2488 * again. 2487 * again.
2489 * 2488 *
2490 * All attributes contain a text representation of a numeric value 2489 * All attributes contain a text representation of a numeric value
2491 * that are accessed with the get() and set() functions. 2490 * that are accessed with the get() and set() functions.
2492 */ 2491 */
2493 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ 2492 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
2494 static int __fops ## _open(struct inode *inode, struct file *file) \ 2493 static int __fops ## _open(struct inode *inode, struct file *file) \
2495 { \ 2494 { \
2496 __simple_attr_check_format(__fmt, 0ull); \ 2495 __simple_attr_check_format(__fmt, 0ull); \
2497 return simple_attr_open(inode, file, __get, __set, __fmt); \ 2496 return simple_attr_open(inode, file, __get, __set, __fmt); \
2498 } \ 2497 } \
2499 static const struct file_operations __fops = { \ 2498 static const struct file_operations __fops = { \
2500 .owner = THIS_MODULE, \ 2499 .owner = THIS_MODULE, \
2501 .open = __fops ## _open, \ 2500 .open = __fops ## _open, \
2502 .release = simple_attr_release, \ 2501 .release = simple_attr_release, \
2503 .read = simple_attr_read, \ 2502 .read = simple_attr_read, \
2504 .write = simple_attr_write, \ 2503 .write = simple_attr_write, \
2505 .llseek = generic_file_llseek, \ 2504 .llseek = generic_file_llseek, \
2506 }; 2505 };
2507 2506
2508 static inline void __attribute__((format(printf, 1, 2))) 2507 static inline void __attribute__((format(printf, 1, 2)))
2509 __simple_attr_check_format(const char *fmt, ...) 2508 __simple_attr_check_format(const char *fmt, ...)
2510 { 2509 {
2511 /* don't do anything, just let the compiler check the arguments; */ 2510 /* don't do anything, just let the compiler check the arguments; */
2512 } 2511 }
2513 2512
2514 int simple_attr_open(struct inode *inode, struct file *file, 2513 int simple_attr_open(struct inode *inode, struct file *file,
2515 int (*get)(void *, u64 *), int (*set)(void *, u64), 2514 int (*get)(void *, u64 *), int (*set)(void *, u64),
2516 const char *fmt); 2515 const char *fmt);
2517 int simple_attr_release(struct inode *inode, struct file *file); 2516 int simple_attr_release(struct inode *inode, struct file *file);
2518 ssize_t simple_attr_read(struct file *file, char __user *buf, 2517 ssize_t simple_attr_read(struct file *file, char __user *buf,
2519 size_t len, loff_t *ppos); 2518 size_t len, loff_t *ppos);
2520 ssize_t simple_attr_write(struct file *file, const char __user *buf, 2519 ssize_t simple_attr_write(struct file *file, const char __user *buf,
2521 size_t len, loff_t *ppos); 2520 size_t len, loff_t *ppos);
2522 2521
2523 struct ctl_table; 2522 struct ctl_table;
2524 int proc_nr_files(struct ctl_table *table, int write, 2523 int proc_nr_files(struct ctl_table *table, int write,
2525 void __user *buffer, size_t *lenp, loff_t *ppos); 2524 void __user *buffer, size_t *lenp, loff_t *ppos);
2526 int proc_nr_dentry(struct ctl_table *table, int write, 2525 int proc_nr_dentry(struct ctl_table *table, int write,
2527 void __user *buffer, size_t *lenp, loff_t *ppos); 2526 void __user *buffer, size_t *lenp, loff_t *ppos);
2528 int proc_nr_inodes(struct ctl_table *table, int write, 2527 int proc_nr_inodes(struct ctl_table *table, int write,
2529 void __user *buffer, size_t *lenp, loff_t *ppos); 2528 void __user *buffer, size_t *lenp, loff_t *ppos);
2530 int __init get_filesystem_list(char *buf); 2529 int __init get_filesystem_list(char *buf);
2531 2530
2532 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) 2531 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
2533 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ 2532 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
2534 (flag & FMODE_NONOTIFY))) 2533 (flag & FMODE_NONOTIFY)))
2535 2534
2536 #endif /* __KERNEL__ */ 2535 #endif /* __KERNEL__ */
2537 #endif /* _LINUX_FS_H */ 2536 #endif /* _LINUX_FS_H */
2538 2537