Commit bb8430a2c8fe2b726033017daadf73c69b0348ea
Committed by
Linus Torvalds
1 parent
51ee4b84f5
Exists in
master
and in
20 other branches
locks: remove fl_copy_lock lock_manager operation
This one was only used for a nasty hack in nfsd, which has recently been removed. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 1 additions and 7 deletions Inline Diff
Documentation/filesystems/Locking
1 | The text below describes the locking rules for VFS-related methods. | 1 | The text below describes the locking rules for VFS-related methods. |
2 | It is (believed to be) up-to-date. *Please*, if you change anything in | 2 | It is (believed to be) up-to-date. *Please*, if you change anything in |
3 | prototypes or locking protocols - update this file. And update the relevant | 3 | prototypes or locking protocols - update this file. And update the relevant |
4 | instances in the tree, don't leave that to maintainers of filesystems/devices/ | 4 | instances in the tree, don't leave that to maintainers of filesystems/devices/ |
5 | etc. At the very least, put the list of dubious cases in the end of this file. | 5 | etc. At the very least, put the list of dubious cases in the end of this file. |
6 | Don't turn it into log - maintainers of out-of-the-tree code are supposed to | 6 | Don't turn it into log - maintainers of out-of-the-tree code are supposed to |
7 | be able to use diff(1). | 7 | be able to use diff(1). |
8 | Thing currently missing here: socket operations. Alexey? | 8 | Thing currently missing here: socket operations. Alexey? |
9 | 9 | ||
10 | --------------------------- dentry_operations -------------------------- | 10 | --------------------------- dentry_operations -------------------------- |
11 | prototypes: | 11 | prototypes: |
12 | int (*d_revalidate)(struct dentry *, int); | 12 | int (*d_revalidate)(struct dentry *, int); |
13 | int (*d_hash) (struct dentry *, struct qstr *); | 13 | int (*d_hash) (struct dentry *, struct qstr *); |
14 | int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); | 14 | int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); |
15 | int (*d_delete)(struct dentry *); | 15 | int (*d_delete)(struct dentry *); |
16 | void (*d_release)(struct dentry *); | 16 | void (*d_release)(struct dentry *); |
17 | void (*d_iput)(struct dentry *, struct inode *); | 17 | void (*d_iput)(struct dentry *, struct inode *); |
18 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); | 18 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); |
19 | 19 | ||
20 | locking rules: | 20 | locking rules: |
21 | none have BKL | 21 | none have BKL |
22 | dcache_lock rename_lock ->d_lock may block | 22 | dcache_lock rename_lock ->d_lock may block |
23 | d_revalidate: no no no yes | 23 | d_revalidate: no no no yes |
24 | d_hash no no no yes | 24 | d_hash no no no yes |
25 | d_compare: no yes no no | 25 | d_compare: no yes no no |
26 | d_delete: yes no yes no | 26 | d_delete: yes no yes no |
27 | d_release: no no no yes | 27 | d_release: no no no yes |
28 | d_iput: no no no yes | 28 | d_iput: no no no yes |
29 | d_dname: no no no no | 29 | d_dname: no no no no |
30 | 30 | ||
31 | --------------------------- inode_operations --------------------------- | 31 | --------------------------- inode_operations --------------------------- |
32 | prototypes: | 32 | prototypes: |
33 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); | 33 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); |
34 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid | 34 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid |
35 | ata *); | 35 | ata *); |
36 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 36 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
37 | int (*unlink) (struct inode *,struct dentry *); | 37 | int (*unlink) (struct inode *,struct dentry *); |
38 | int (*symlink) (struct inode *,struct dentry *,const char *); | 38 | int (*symlink) (struct inode *,struct dentry *,const char *); |
39 | int (*mkdir) (struct inode *,struct dentry *,int); | 39 | int (*mkdir) (struct inode *,struct dentry *,int); |
40 | int (*rmdir) (struct inode *,struct dentry *); | 40 | int (*rmdir) (struct inode *,struct dentry *); |
41 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); | 41 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); |
42 | int (*rename) (struct inode *, struct dentry *, | 42 | int (*rename) (struct inode *, struct dentry *, |
43 | struct inode *, struct dentry *); | 43 | struct inode *, struct dentry *); |
44 | int (*readlink) (struct dentry *, char __user *,int); | 44 | int (*readlink) (struct dentry *, char __user *,int); |
45 | int (*follow_link) (struct dentry *, struct nameidata *); | 45 | int (*follow_link) (struct dentry *, struct nameidata *); |
46 | void (*truncate) (struct inode *); | 46 | void (*truncate) (struct inode *); |
47 | int (*permission) (struct inode *, int, struct nameidata *); | 47 | int (*permission) (struct inode *, int, struct nameidata *); |
48 | int (*setattr) (struct dentry *, struct iattr *); | 48 | int (*setattr) (struct dentry *, struct iattr *); |
49 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); | 49 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); |
50 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 50 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
51 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 51 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
52 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 52 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
53 | int (*removexattr) (struct dentry *, const char *); | 53 | int (*removexattr) (struct dentry *, const char *); |
54 | 54 | ||
55 | locking rules: | 55 | locking rules: |
56 | all may block, none have BKL | 56 | all may block, none have BKL |
57 | i_mutex(inode) | 57 | i_mutex(inode) |
58 | lookup: yes | 58 | lookup: yes |
59 | create: yes | 59 | create: yes |
60 | link: yes (both) | 60 | link: yes (both) |
61 | mknod: yes | 61 | mknod: yes |
62 | symlink: yes | 62 | symlink: yes |
63 | mkdir: yes | 63 | mkdir: yes |
64 | unlink: yes (both) | 64 | unlink: yes (both) |
65 | rmdir: yes (both) (see below) | 65 | rmdir: yes (both) (see below) |
66 | rename: yes (all) (see below) | 66 | rename: yes (all) (see below) |
67 | readlink: no | 67 | readlink: no |
68 | follow_link: no | 68 | follow_link: no |
69 | truncate: yes (see below) | 69 | truncate: yes (see below) |
70 | setattr: yes | 70 | setattr: yes |
71 | permission: no | 71 | permission: no |
72 | getattr: no | 72 | getattr: no |
73 | setxattr: yes | 73 | setxattr: yes |
74 | getxattr: no | 74 | getxattr: no |
75 | listxattr: no | 75 | listxattr: no |
76 | removexattr: yes | 76 | removexattr: yes |
77 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 77 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
78 | victim. | 78 | victim. |
79 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. | 79 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. |
80 | ->truncate() is never called directly - it's a callback, not a | 80 | ->truncate() is never called directly - it's a callback, not a |
81 | method. It's called by vmtruncate() - library function normally used by | 81 | method. It's called by vmtruncate() - library function normally used by |
82 | ->setattr(). Locking information above applies to that call (i.e. is | 82 | ->setattr(). Locking information above applies to that call (i.e. is |
83 | inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been | 83 | inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been |
84 | passed). | 84 | passed). |
85 | 85 | ||
86 | See Documentation/filesystems/directory-locking for more detailed discussion | 86 | See Documentation/filesystems/directory-locking for more detailed discussion |
87 | of the locking scheme for directory operations. | 87 | of the locking scheme for directory operations. |
88 | 88 | ||
89 | --------------------------- super_operations --------------------------- | 89 | --------------------------- super_operations --------------------------- |
90 | prototypes: | 90 | prototypes: |
91 | struct inode *(*alloc_inode)(struct super_block *sb); | 91 | struct inode *(*alloc_inode)(struct super_block *sb); |
92 | void (*destroy_inode)(struct inode *); | 92 | void (*destroy_inode)(struct inode *); |
93 | void (*dirty_inode) (struct inode *); | 93 | void (*dirty_inode) (struct inode *); |
94 | int (*write_inode) (struct inode *, int); | 94 | int (*write_inode) (struct inode *, int); |
95 | int (*drop_inode) (struct inode *); | 95 | int (*drop_inode) (struct inode *); |
96 | void (*evict_inode) (struct inode *); | 96 | void (*evict_inode) (struct inode *); |
97 | void (*put_super) (struct super_block *); | 97 | void (*put_super) (struct super_block *); |
98 | void (*write_super) (struct super_block *); | 98 | void (*write_super) (struct super_block *); |
99 | int (*sync_fs)(struct super_block *sb, int wait); | 99 | int (*sync_fs)(struct super_block *sb, int wait); |
100 | int (*freeze_fs) (struct super_block *); | 100 | int (*freeze_fs) (struct super_block *); |
101 | int (*unfreeze_fs) (struct super_block *); | 101 | int (*unfreeze_fs) (struct super_block *); |
102 | int (*statfs) (struct dentry *, struct kstatfs *); | 102 | int (*statfs) (struct dentry *, struct kstatfs *); |
103 | int (*remount_fs) (struct super_block *, int *, char *); | 103 | int (*remount_fs) (struct super_block *, int *, char *); |
104 | void (*umount_begin) (struct super_block *); | 104 | void (*umount_begin) (struct super_block *); |
105 | int (*show_options)(struct seq_file *, struct vfsmount *); | 105 | int (*show_options)(struct seq_file *, struct vfsmount *); |
106 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 106 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
107 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 107 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
108 | 108 | ||
109 | locking rules: | 109 | locking rules: |
110 | All may block [not true, see below] | 110 | All may block [not true, see below] |
111 | None have BKL | 111 | None have BKL |
112 | s_umount | 112 | s_umount |
113 | alloc_inode: | 113 | alloc_inode: |
114 | destroy_inode: | 114 | destroy_inode: |
115 | dirty_inode: (must not sleep) | 115 | dirty_inode: (must not sleep) |
116 | write_inode: | 116 | write_inode: |
117 | drop_inode: !!!inode_lock!!! | 117 | drop_inode: !!!inode_lock!!! |
118 | evict_inode: | 118 | evict_inode: |
119 | put_super: write | 119 | put_super: write |
120 | write_super: read | 120 | write_super: read |
121 | sync_fs: read | 121 | sync_fs: read |
122 | freeze_fs: read | 122 | freeze_fs: read |
123 | unfreeze_fs: read | 123 | unfreeze_fs: read |
124 | statfs: maybe(read) (see below) | 124 | statfs: maybe(read) (see below) |
125 | remount_fs: write | 125 | remount_fs: write |
126 | umount_begin: no | 126 | umount_begin: no |
127 | show_options: no (namespace_sem) | 127 | show_options: no (namespace_sem) |
128 | quota_read: no (see below) | 128 | quota_read: no (see below) |
129 | quota_write: no (see below) | 129 | quota_write: no (see below) |
130 | 130 | ||
131 | ->statfs() has s_umount (shared) when called by ustat(2) (native or | 131 | ->statfs() has s_umount (shared) when called by ustat(2) (native or |
132 | compat), but that's an accident of bad API; s_umount is used to pin | 132 | compat), but that's an accident of bad API; s_umount is used to pin |
133 | the superblock down when we only have dev_t given us by userland to | 133 | the superblock down when we only have dev_t given us by userland to |
134 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) | 134 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) |
135 | doesn't hold it when calling ->statfs() - superblock is pinned down | 135 | doesn't hold it when calling ->statfs() - superblock is pinned down |
136 | by resolving the pathname passed to syscall. | 136 | by resolving the pathname passed to syscall. |
137 | ->quota_read() and ->quota_write() functions are both guaranteed to | 137 | ->quota_read() and ->quota_write() functions are both guaranteed to |
138 | be the only ones operating on the quota file by the quota code (via | 138 | be the only ones operating on the quota file by the quota code (via |
139 | dqio_sem) (unless an admin really wants to screw up something and | 139 | dqio_sem) (unless an admin really wants to screw up something and |
140 | writes to quota files with quotas on). For other details about locking | 140 | writes to quota files with quotas on). For other details about locking |
141 | see also dquot_operations section. | 141 | see also dquot_operations section. |
142 | 142 | ||
143 | --------------------------- file_system_type --------------------------- | 143 | --------------------------- file_system_type --------------------------- |
144 | prototypes: | 144 | prototypes: |
145 | int (*get_sb) (struct file_system_type *, int, | 145 | int (*get_sb) (struct file_system_type *, int, |
146 | const char *, void *, struct vfsmount *); | 146 | const char *, void *, struct vfsmount *); |
147 | void (*kill_sb) (struct super_block *); | 147 | void (*kill_sb) (struct super_block *); |
148 | locking rules: | 148 | locking rules: |
149 | may block BKL | 149 | may block BKL |
150 | get_sb yes no | 150 | get_sb yes no |
151 | kill_sb yes no | 151 | kill_sb yes no |
152 | 152 | ||
153 | ->get_sb() returns error or 0 with locked superblock attached to the vfsmount | 153 | ->get_sb() returns error or 0 with locked superblock attached to the vfsmount |
154 | (exclusive on ->s_umount). | 154 | (exclusive on ->s_umount). |
155 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, | 155 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, |
156 | unlocks and drops the reference. | 156 | unlocks and drops the reference. |
157 | 157 | ||
158 | --------------------------- address_space_operations -------------------------- | 158 | --------------------------- address_space_operations -------------------------- |
159 | prototypes: | 159 | prototypes: |
160 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 160 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
161 | int (*readpage)(struct file *, struct page *); | 161 | int (*readpage)(struct file *, struct page *); |
162 | int (*sync_page)(struct page *); | 162 | int (*sync_page)(struct page *); |
163 | int (*writepages)(struct address_space *, struct writeback_control *); | 163 | int (*writepages)(struct address_space *, struct writeback_control *); |
164 | int (*set_page_dirty)(struct page *page); | 164 | int (*set_page_dirty)(struct page *page); |
165 | int (*readpages)(struct file *filp, struct address_space *mapping, | 165 | int (*readpages)(struct file *filp, struct address_space *mapping, |
166 | struct list_head *pages, unsigned nr_pages); | 166 | struct list_head *pages, unsigned nr_pages); |
167 | int (*write_begin)(struct file *, struct address_space *mapping, | 167 | int (*write_begin)(struct file *, struct address_space *mapping, |
168 | loff_t pos, unsigned len, unsigned flags, | 168 | loff_t pos, unsigned len, unsigned flags, |
169 | struct page **pagep, void **fsdata); | 169 | struct page **pagep, void **fsdata); |
170 | int (*write_end)(struct file *, struct address_space *mapping, | 170 | int (*write_end)(struct file *, struct address_space *mapping, |
171 | loff_t pos, unsigned len, unsigned copied, | 171 | loff_t pos, unsigned len, unsigned copied, |
172 | struct page *page, void *fsdata); | 172 | struct page *page, void *fsdata); |
173 | sector_t (*bmap)(struct address_space *, sector_t); | 173 | sector_t (*bmap)(struct address_space *, sector_t); |
174 | int (*invalidatepage) (struct page *, unsigned long); | 174 | int (*invalidatepage) (struct page *, unsigned long); |
175 | int (*releasepage) (struct page *, int); | 175 | int (*releasepage) (struct page *, int); |
176 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 176 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
177 | loff_t offset, unsigned long nr_segs); | 177 | loff_t offset, unsigned long nr_segs); |
178 | int (*launder_page) (struct page *); | 178 | int (*launder_page) (struct page *); |
179 | 179 | ||
180 | locking rules: | 180 | locking rules: |
181 | All except set_page_dirty may block | 181 | All except set_page_dirty may block |
182 | 182 | ||
183 | BKL PageLocked(page) i_mutex | 183 | BKL PageLocked(page) i_mutex |
184 | writepage: no yes, unlocks (see below) | 184 | writepage: no yes, unlocks (see below) |
185 | readpage: no yes, unlocks | 185 | readpage: no yes, unlocks |
186 | sync_page: no maybe | 186 | sync_page: no maybe |
187 | writepages: no | 187 | writepages: no |
188 | set_page_dirty no no | 188 | set_page_dirty no no |
189 | readpages: no | 189 | readpages: no |
190 | write_begin: no locks the page yes | 190 | write_begin: no locks the page yes |
191 | write_end: no yes, unlocks yes | 191 | write_end: no yes, unlocks yes |
192 | perform_write: no n/a yes | 192 | perform_write: no n/a yes |
193 | bmap: no | 193 | bmap: no |
194 | invalidatepage: no yes | 194 | invalidatepage: no yes |
195 | releasepage: no yes | 195 | releasepage: no yes |
196 | direct_IO: no | 196 | direct_IO: no |
197 | launder_page: no yes | 197 | launder_page: no yes |
198 | 198 | ||
199 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() | 199 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() |
200 | may be called from the request handler (/dev/loop). | 200 | may be called from the request handler (/dev/loop). |
201 | 201 | ||
202 | ->readpage() unlocks the page, either synchronously or via I/O | 202 | ->readpage() unlocks the page, either synchronously or via I/O |
203 | completion. | 203 | completion. |
204 | 204 | ||
205 | ->readpages() populates the pagecache with the passed pages and starts | 205 | ->readpages() populates the pagecache with the passed pages and starts |
206 | I/O against them. They come unlocked upon I/O completion. | 206 | I/O against them. They come unlocked upon I/O completion. |
207 | 207 | ||
208 | ->writepage() is used for two purposes: for "memory cleansing" and for | 208 | ->writepage() is used for two purposes: for "memory cleansing" and for |
209 | "sync". These are quite different operations and the behaviour may differ | 209 | "sync". These are quite different operations and the behaviour may differ |
210 | depending upon the mode. | 210 | depending upon the mode. |
211 | 211 | ||
212 | If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then | 212 | If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then |
213 | it *must* start I/O against the page, even if that would involve | 213 | it *must* start I/O against the page, even if that would involve |
214 | blocking on in-progress I/O. | 214 | blocking on in-progress I/O. |
215 | 215 | ||
216 | If writepage is called for memory cleansing (sync_mode == | 216 | If writepage is called for memory cleansing (sync_mode == |
217 | WBC_SYNC_NONE) then its role is to get as much writeout underway as | 217 | WBC_SYNC_NONE) then its role is to get as much writeout underway as |
218 | possible. So writepage should try to avoid blocking against | 218 | possible. So writepage should try to avoid blocking against |
219 | currently-in-progress I/O. | 219 | currently-in-progress I/O. |
220 | 220 | ||
221 | If the filesystem is not called for "sync" and it determines that it | 221 | If the filesystem is not called for "sync" and it determines that it |
222 | would need to block against in-progress I/O to be able to start new I/O | 222 | would need to block against in-progress I/O to be able to start new I/O |
223 | against the page the filesystem should redirty the page with | 223 | against the page the filesystem should redirty the page with |
224 | redirty_page_for_writepage(), then unlock the page and return zero. | 224 | redirty_page_for_writepage(), then unlock the page and return zero. |
225 | This may also be done to avoid internal deadlocks, but rarely. | 225 | This may also be done to avoid internal deadlocks, but rarely. |
226 | 226 | ||
227 | If the filesystem is called for sync then it must wait on any | 227 | If the filesystem is called for sync then it must wait on any |
228 | in-progress I/O and then start new I/O. | 228 | in-progress I/O and then start new I/O. |
229 | 229 | ||
230 | The filesystem should unlock the page synchronously, before returning to the | 230 | The filesystem should unlock the page synchronously, before returning to the |
231 | caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE | 231 | caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE |
232 | value. WRITEPAGE_ACTIVATE means that page cannot really be written out | 232 | value. WRITEPAGE_ACTIVATE means that page cannot really be written out |
233 | currently, and VM should stop calling ->writepage() on this page for some | 233 | currently, and VM should stop calling ->writepage() on this page for some |
234 | time. VM does this by moving page to the head of the active list, hence the | 234 | time. VM does this by moving page to the head of the active list, hence the |
235 | name. | 235 | name. |
236 | 236 | ||
237 | Unless the filesystem is going to redirty_page_for_writepage(), unlock the page | 237 | Unless the filesystem is going to redirty_page_for_writepage(), unlock the page |
238 | and return zero, writepage *must* run set_page_writeback() against the page, | 238 | and return zero, writepage *must* run set_page_writeback() against the page, |
239 | followed by unlocking it. Once set_page_writeback() has been run against the | 239 | followed by unlocking it. Once set_page_writeback() has been run against the |
240 | page, write I/O can be submitted and the write I/O completion handler must run | 240 | page, write I/O can be submitted and the write I/O completion handler must run |
241 | end_page_writeback() once the I/O is complete. If no I/O is submitted, the | 241 | end_page_writeback() once the I/O is complete. If no I/O is submitted, the |
242 | filesystem must run end_page_writeback() against the page before returning from | 242 | filesystem must run end_page_writeback() against the page before returning from |
243 | writepage. | 243 | writepage. |
244 | 244 | ||
245 | That is: after 2.5.12, pages which are under writeout are *not* locked. Note, | 245 | That is: after 2.5.12, pages which are under writeout are *not* locked. Note, |
246 | if the filesystem needs the page to be locked during writeout, that is ok, too, | 246 | if the filesystem needs the page to be locked during writeout, that is ok, too, |
247 | the page is allowed to be unlocked at any point in time between the calls to | 247 | the page is allowed to be unlocked at any point in time between the calls to |
248 | set_page_writeback() and end_page_writeback(). | 248 | set_page_writeback() and end_page_writeback(). |
249 | 249 | ||
250 | Note, failure to run either redirty_page_for_writepage() or the combination of | 250 | Note, failure to run either redirty_page_for_writepage() or the combination of |
251 | set_page_writeback()/end_page_writeback() on a page submitted to writepage | 251 | set_page_writeback()/end_page_writeback() on a page submitted to writepage |
252 | will leave the page itself marked clean but it will be tagged as dirty in the | 252 | will leave the page itself marked clean but it will be tagged as dirty in the |
253 | radix tree. This incoherency can lead to all sorts of hard-to-debug problems | 253 | radix tree. This incoherency can lead to all sorts of hard-to-debug problems |
254 | in the filesystem like having dirty inodes at umount and losing written data. | 254 | in the filesystem like having dirty inodes at umount and losing written data. |
255 | 255 | ||
256 | ->sync_page() locking rules are not well-defined - usually it is called | 256 | ->sync_page() locking rules are not well-defined - usually it is called |
257 | with lock on page, but that is not guaranteed. Considering the currently | 257 | with lock on page, but that is not guaranteed. Considering the currently |
258 | existing instances of this method ->sync_page() itself doesn't look | 258 | existing instances of this method ->sync_page() itself doesn't look |
259 | well-defined... | 259 | well-defined... |
260 | 260 | ||
261 | ->writepages() is used for periodic writeback and for syscall-initiated | 261 | ->writepages() is used for periodic writeback and for syscall-initiated |
262 | sync operations. The address_space should start I/O against at least | 262 | sync operations. The address_space should start I/O against at least |
263 | *nr_to_write pages. *nr_to_write must be decremented for each page which is | 263 | *nr_to_write pages. *nr_to_write must be decremented for each page which is |
264 | written. The address_space implementation may write more (or less) pages | 264 | written. The address_space implementation may write more (or less) pages |
265 | than *nr_to_write asks for, but it should try to be reasonably close. If | 265 | than *nr_to_write asks for, but it should try to be reasonably close. If |
266 | nr_to_write is NULL, all dirty pages must be written. | 266 | nr_to_write is NULL, all dirty pages must be written. |
267 | 267 | ||
268 | writepages should _only_ write pages which are present on | 268 | writepages should _only_ write pages which are present on |
269 | mapping->io_pages. | 269 | mapping->io_pages. |
270 | 270 | ||
271 | ->set_page_dirty() is called from various places in the kernel | 271 | ->set_page_dirty() is called from various places in the kernel |
272 | when the target page is marked as needing writeback. It may be called | 272 | when the target page is marked as needing writeback. It may be called |
273 | under spinlock (it cannot block) and is sometimes called with the page | 273 | under spinlock (it cannot block) and is sometimes called with the page |
274 | not locked. | 274 | not locked. |
275 | 275 | ||
276 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some | 276 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some |
277 | filesystems and by the swapper. The latter will eventually go away. All | 277 | filesystems and by the swapper. The latter will eventually go away. All |
278 | instances do not actually need the BKL. Please, keep it that way and don't | 278 | instances do not actually need the BKL. Please, keep it that way and don't |
279 | breed new callers. | 279 | breed new callers. |
280 | 280 | ||
281 | ->invalidatepage() is called when the filesystem must attempt to drop | 281 | ->invalidatepage() is called when the filesystem must attempt to drop |
282 | some or all of the buffers from the page when it is being truncated. It | 282 | some or all of the buffers from the page when it is being truncated. It |
283 | returns zero on success. If ->invalidatepage is zero, the kernel uses | 283 | returns zero on success. If ->invalidatepage is zero, the kernel uses |
284 | block_invalidatepage() instead. | 284 | block_invalidatepage() instead. |
285 | 285 | ||
286 | ->releasepage() is called when the kernel is about to try to drop the | 286 | ->releasepage() is called when the kernel is about to try to drop the |
287 | buffers from the page in preparation for freeing it. It returns zero to | 287 | buffers from the page in preparation for freeing it. It returns zero to |
288 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, | 288 | indicate that the buffers are (or may be) freeable. If ->releasepage is zero, |
289 | the kernel assumes that the fs has no private interest in the buffers. | 289 | the kernel assumes that the fs has no private interest in the buffers. |
290 | 290 | ||
291 | ->launder_page() may be called prior to releasing a page if | 291 | ->launder_page() may be called prior to releasing a page if |
292 | it is still found to be dirty. It returns zero if the page was successfully | 292 | it is still found to be dirty. It returns zero if the page was successfully |
293 | cleaned, or an error value if not. Note that in order to prevent the page | 293 | cleaned, or an error value if not. Note that in order to prevent the page |
294 | getting mapped back in and redirtied, it needs to be kept locked | 294 | getting mapped back in and redirtied, it needs to be kept locked |
295 | across the entire operation. | 295 | across the entire operation. |
296 | 296 | ||
297 | Note: currently almost all instances of address_space methods are | 297 | Note: currently almost all instances of address_space methods are |
298 | using BKL for internal serialization and that's one of the worst sources | 298 | using BKL for internal serialization and that's one of the worst sources |
299 | of contention. Normally they are calling library functions (in fs/buffer.c) | 299 | of contention. Normally they are calling library functions (in fs/buffer.c) |
300 | and pass foo_get_block() as a callback (on local block-based filesystems, | 300 | and pass foo_get_block() as a callback (on local block-based filesystems, |
301 | indeed). BKL is not needed for library stuff and is usually taken by | 301 | indeed). BKL is not needed for library stuff and is usually taken by |
302 | foo_get_block(). It's an overkill, since block bitmaps can be protected by | 302 | foo_get_block(). It's an overkill, since block bitmaps can be protected by |
303 | internal fs locking and real critical areas are much smaller than the areas | 303 | internal fs locking and real critical areas are much smaller than the areas |
304 | filesystems protect now. | 304 | filesystems protect now. |
305 | 305 | ||
306 | ----------------------- file_lock_operations ------------------------------ | 306 | ----------------------- file_lock_operations ------------------------------ |
307 | prototypes: | 307 | prototypes: |
308 | void (*fl_insert)(struct file_lock *); /* lock insertion callback */ | 308 | void (*fl_insert)(struct file_lock *); /* lock insertion callback */ |
309 | void (*fl_remove)(struct file_lock *); /* lock removal callback */ | 309 | void (*fl_remove)(struct file_lock *); /* lock removal callback */ |
310 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 310 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
311 | void (*fl_release_private)(struct file_lock *); | 311 | void (*fl_release_private)(struct file_lock *); |
312 | 312 | ||
313 | 313 | ||
314 | locking rules: | 314 | locking rules: |
315 | BKL may block | 315 | BKL may block |
316 | fl_insert: yes no | 316 | fl_insert: yes no |
317 | fl_remove: yes no | 317 | fl_remove: yes no |
318 | fl_copy_lock: yes no | 318 | fl_copy_lock: yes no |
319 | fl_release_private: yes yes | 319 | fl_release_private: yes yes |
320 | 320 | ||
321 | ----------------------- lock_manager_operations --------------------------- | 321 | ----------------------- lock_manager_operations --------------------------- |
322 | prototypes: | 322 | prototypes: |
323 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | 323 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); |
324 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | 324 | void (*fl_notify)(struct file_lock *); /* unblock callback */ |
325 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | ||
326 | void (*fl_release_private)(struct file_lock *); | 325 | void (*fl_release_private)(struct file_lock *); |
327 | void (*fl_break)(struct file_lock *); /* break_lease callback */ | 326 | void (*fl_break)(struct file_lock *); /* break_lease callback */ |
328 | 327 | ||
329 | locking rules: | 328 | locking rules: |
330 | BKL may block | 329 | BKL may block |
331 | fl_compare_owner: yes no | 330 | fl_compare_owner: yes no |
332 | fl_notify: yes no | 331 | fl_notify: yes no |
333 | fl_copy_lock: yes no | ||
334 | fl_release_private: yes yes | 332 | fl_release_private: yes yes |
335 | fl_break: yes no | 333 | fl_break: yes no |
336 | 334 | ||
337 | Currently only NFSD and NLM provide instances of this class. None of the | 335 | Currently only NFSD and NLM provide instances of this class. None of the |
338 | them block. If you have out-of-tree instances - please, show up. Locking | 336 | them block. If you have out-of-tree instances - please, show up. Locking |
339 | in that area will change. | 337 | in that area will change. |
340 | --------------------------- buffer_head ----------------------------------- | 338 | --------------------------- buffer_head ----------------------------------- |
341 | prototypes: | 339 | prototypes: |
342 | void (*b_end_io)(struct buffer_head *bh, int uptodate); | 340 | void (*b_end_io)(struct buffer_head *bh, int uptodate); |
343 | 341 | ||
344 | locking rules: | 342 | locking rules: |
345 | called from interrupts. In other words, extreme care is needed here. | 343 | called from interrupts. In other words, extreme care is needed here. |
346 | bh is locked, but that's all warranties we have here. Currently only RAID1, | 344 | bh is locked, but that's all warranties we have here. Currently only RAID1, |
347 | highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices | 345 | highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices |
348 | call this method upon the IO completion. | 346 | call this method upon the IO completion. |
349 | 347 | ||
350 | --------------------------- block_device_operations ----------------------- | 348 | --------------------------- block_device_operations ----------------------- |
351 | prototypes: | 349 | prototypes: |
352 | int (*open) (struct block_device *, fmode_t); | 350 | int (*open) (struct block_device *, fmode_t); |
353 | int (*release) (struct gendisk *, fmode_t); | 351 | int (*release) (struct gendisk *, fmode_t); |
354 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 352 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
355 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 353 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
356 | int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); | 354 | int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); |
357 | int (*media_changed) (struct gendisk *); | 355 | int (*media_changed) (struct gendisk *); |
358 | void (*unlock_native_capacity) (struct gendisk *); | 356 | void (*unlock_native_capacity) (struct gendisk *); |
359 | int (*revalidate_disk) (struct gendisk *); | 357 | int (*revalidate_disk) (struct gendisk *); |
360 | int (*getgeo)(struct block_device *, struct hd_geometry *); | 358 | int (*getgeo)(struct block_device *, struct hd_geometry *); |
361 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | 359 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); |
362 | 360 | ||
363 | locking rules: | 361 | locking rules: |
364 | BKL bd_mutex | 362 | BKL bd_mutex |
365 | open: no yes | 363 | open: no yes |
366 | release: no yes | 364 | release: no yes |
367 | ioctl: no no | 365 | ioctl: no no |
368 | compat_ioctl: no no | 366 | compat_ioctl: no no |
369 | direct_access: no no | 367 | direct_access: no no |
370 | media_changed: no no | 368 | media_changed: no no |
371 | unlock_native_capacity: no no | 369 | unlock_native_capacity: no no |
372 | revalidate_disk: no no | 370 | revalidate_disk: no no |
373 | getgeo: no no | 371 | getgeo: no no |
374 | swap_slot_free_notify: no no (see below) | 372 | swap_slot_free_notify: no no (see below) |
375 | 373 | ||
376 | media_changed, unlock_native_capacity and revalidate_disk are called only from | 374 | media_changed, unlock_native_capacity and revalidate_disk are called only from |
377 | check_disk_change(). | 375 | check_disk_change(). |
378 | 376 | ||
379 | swap_slot_free_notify is called with swap_lock and sometimes the page lock | 377 | swap_slot_free_notify is called with swap_lock and sometimes the page lock |
380 | held. | 378 | held. |
381 | 379 | ||
382 | 380 | ||
383 | --------------------------- file_operations ------------------------------- | 381 | --------------------------- file_operations ------------------------------- |
384 | prototypes: | 382 | prototypes: |
385 | loff_t (*llseek) (struct file *, loff_t, int); | 383 | loff_t (*llseek) (struct file *, loff_t, int); |
386 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); | 384 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); |
387 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); | 385 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
388 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 386 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
389 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 387 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
390 | int (*readdir) (struct file *, void *, filldir_t); | 388 | int (*readdir) (struct file *, void *, filldir_t); |
391 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 389 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
392 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 390 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
393 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 391 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
394 | int (*mmap) (struct file *, struct vm_area_struct *); | 392 | int (*mmap) (struct file *, struct vm_area_struct *); |
395 | int (*open) (struct inode *, struct file *); | 393 | int (*open) (struct inode *, struct file *); |
396 | int (*flush) (struct file *); | 394 | int (*flush) (struct file *); |
397 | int (*release) (struct inode *, struct file *); | 395 | int (*release) (struct inode *, struct file *); |
398 | int (*fsync) (struct file *, int datasync); | 396 | int (*fsync) (struct file *, int datasync); |
399 | int (*aio_fsync) (struct kiocb *, int datasync); | 397 | int (*aio_fsync) (struct kiocb *, int datasync); |
400 | int (*fasync) (int, struct file *, int); | 398 | int (*fasync) (int, struct file *, int); |
401 | int (*lock) (struct file *, int, struct file_lock *); | 399 | int (*lock) (struct file *, int, struct file_lock *); |
402 | ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, | 400 | ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, |
403 | loff_t *); | 401 | loff_t *); |
404 | ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, | 402 | ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, |
405 | loff_t *); | 403 | loff_t *); |
406 | ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, | 404 | ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, |
407 | void __user *); | 405 | void __user *); |
408 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, | 406 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, |
409 | loff_t *, int); | 407 | loff_t *, int); |
410 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, | 408 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, |
411 | unsigned long, unsigned long, unsigned long); | 409 | unsigned long, unsigned long, unsigned long); |
412 | int (*check_flags)(int); | 410 | int (*check_flags)(int); |
413 | }; | 411 | }; |
414 | 412 | ||
415 | locking rules: | 413 | locking rules: |
416 | All may block. | 414 | All may block. |
417 | BKL | 415 | BKL |
418 | llseek: no (see below) | 416 | llseek: no (see below) |
419 | read: no | 417 | read: no |
420 | aio_read: no | 418 | aio_read: no |
421 | write: no | 419 | write: no |
422 | aio_write: no | 420 | aio_write: no |
423 | readdir: no | 421 | readdir: no |
424 | poll: no | 422 | poll: no |
425 | unlocked_ioctl: no | 423 | unlocked_ioctl: no |
426 | compat_ioctl: no | 424 | compat_ioctl: no |
427 | mmap: no | 425 | mmap: no |
428 | open: no | 426 | open: no |
429 | flush: no | 427 | flush: no |
430 | release: no | 428 | release: no |
431 | fsync: no (see below) | 429 | fsync: no (see below) |
432 | aio_fsync: no | 430 | aio_fsync: no |
433 | fasync: no | 431 | fasync: no |
434 | lock: yes | 432 | lock: yes |
435 | readv: no | 433 | readv: no |
436 | writev: no | 434 | writev: no |
437 | sendfile: no | 435 | sendfile: no |
438 | sendpage: no | 436 | sendpage: no |
439 | get_unmapped_area: no | 437 | get_unmapped_area: no |
440 | check_flags: no | 438 | check_flags: no |
441 | 439 | ||
442 | ->llseek() locking has moved from llseek to the individual llseek | 440 | ->llseek() locking has moved from llseek to the individual llseek |
443 | implementations. If your fs is not using generic_file_llseek, you | 441 | implementations. If your fs is not using generic_file_llseek, you |
444 | need to acquire and release the appropriate locks in your ->llseek(). | 442 | need to acquire and release the appropriate locks in your ->llseek(). |
445 | For many filesystems, it is probably safe to acquire the inode | 443 | For many filesystems, it is probably safe to acquire the inode |
446 | mutex or just to use i_size_read() instead. | 444 | mutex or just to use i_size_read() instead. |
447 | Note: this does not protect the file->f_pos against concurrent modifications | 445 | Note: this does not protect the file->f_pos against concurrent modifications |
448 | since this is something the userspace has to take care about. | 446 | since this is something the userspace has to take care about. |
449 | 447 | ||
450 | Note: ext2_release() was *the* source of contention on fs-intensive | 448 | Note: ext2_release() was *the* source of contention on fs-intensive |
451 | loads and dropping BKL on ->release() helps to get rid of that (we still | 449 | loads and dropping BKL on ->release() helps to get rid of that (we still |
452 | grab BKL for cases when we close a file that had been opened r/w, but that | 450 | grab BKL for cases when we close a file that had been opened r/w, but that |
453 | can and should be done using the internal locking with smaller critical areas). | 451 | can and should be done using the internal locking with smaller critical areas). |
454 | Current worst offender is ext2_get_block()... | 452 | Current worst offender is ext2_get_block()... |
455 | 453 | ||
456 | ->fasync() is called without BKL protection, and is responsible for | 454 | ->fasync() is called without BKL protection, and is responsible for |
457 | maintaining the FASYNC bit in filp->f_flags. Most instances call | 455 | maintaining the FASYNC bit in filp->f_flags. Most instances call |
458 | fasync_helper(), which does that maintenance, so it's not normally | 456 | fasync_helper(), which does that maintenance, so it's not normally |
459 | something one needs to worry about. Return values > 0 will be mapped to | 457 | something one needs to worry about. Return values > 0 will be mapped to |
460 | zero in the VFS layer. | 458 | zero in the VFS layer. |
461 | 459 | ||
462 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would | 460 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would |
463 | move ->readdir() to inode_operations and use a separate method for directory | 461 | move ->readdir() to inode_operations and use a separate method for directory |
464 | ->ioctl() or kill the latter completely. One of the problems is that for | 462 | ->ioctl() or kill the latter completely. One of the problems is that for |
465 | anything that resembles union-mount we won't have a struct file for all | 463 | anything that resembles union-mount we won't have a struct file for all |
466 | components. And there are other reasons why the current interface is a mess... | 464 | components. And there are other reasons why the current interface is a mess... |
467 | 465 | ||
468 | ->read on directories probably must go away - we should just enforce -EISDIR | 466 | ->read on directories probably must go away - we should just enforce -EISDIR |
469 | in sys_read() and friends. | 467 | in sys_read() and friends. |
470 | 468 | ||
471 | ->fsync() has i_mutex on inode. | 469 | ->fsync() has i_mutex on inode. |
472 | 470 | ||
473 | --------------------------- dquot_operations ------------------------------- | 471 | --------------------------- dquot_operations ------------------------------- |
474 | prototypes: | 472 | prototypes: |
475 | int (*write_dquot) (struct dquot *); | 473 | int (*write_dquot) (struct dquot *); |
476 | int (*acquire_dquot) (struct dquot *); | 474 | int (*acquire_dquot) (struct dquot *); |
477 | int (*release_dquot) (struct dquot *); | 475 | int (*release_dquot) (struct dquot *); |
478 | int (*mark_dirty) (struct dquot *); | 476 | int (*mark_dirty) (struct dquot *); |
479 | int (*write_info) (struct super_block *, int); | 477 | int (*write_info) (struct super_block *, int); |
480 | 478 | ||
481 | These operations are intended to be more or less wrapping functions that ensure | 479 | These operations are intended to be more or less wrapping functions that ensure |
482 | a proper locking wrt the filesystem and call the generic quota operations. | 480 | a proper locking wrt the filesystem and call the generic quota operations. |
483 | 481 | ||
484 | What filesystem should expect from the generic quota functions: | 482 | What filesystem should expect from the generic quota functions: |
485 | 483 | ||
486 | FS recursion Held locks when called | 484 | FS recursion Held locks when called |
487 | write_dquot: yes dqonoff_sem or dqptr_sem | 485 | write_dquot: yes dqonoff_sem or dqptr_sem |
488 | acquire_dquot: yes dqonoff_sem or dqptr_sem | 486 | acquire_dquot: yes dqonoff_sem or dqptr_sem |
489 | release_dquot: yes dqonoff_sem or dqptr_sem | 487 | release_dquot: yes dqonoff_sem or dqptr_sem |
490 | mark_dirty: no - | 488 | mark_dirty: no - |
491 | write_info: yes dqonoff_sem | 489 | write_info: yes dqonoff_sem |
492 | 490 | ||
493 | FS recursion means calling ->quota_read() and ->quota_write() from superblock | 491 | FS recursion means calling ->quota_read() and ->quota_write() from superblock |
494 | operations. | 492 | operations. |
495 | 493 | ||
496 | More details about quota locking can be found in fs/dquot.c. | 494 | More details about quota locking can be found in fs/dquot.c. |
497 | 495 | ||
498 | --------------------------- vm_operations_struct ----------------------------- | 496 | --------------------------- vm_operations_struct ----------------------------- |
499 | prototypes: | 497 | prototypes: |
500 | void (*open)(struct vm_area_struct*); | 498 | void (*open)(struct vm_area_struct*); |
501 | void (*close)(struct vm_area_struct*); | 499 | void (*close)(struct vm_area_struct*); |
502 | int (*fault)(struct vm_area_struct*, struct vm_fault *); | 500 | int (*fault)(struct vm_area_struct*, struct vm_fault *); |
503 | int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); | 501 | int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); |
504 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | 502 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); |
505 | 503 | ||
506 | locking rules: | 504 | locking rules: |
507 | BKL mmap_sem PageLocked(page) | 505 | BKL mmap_sem PageLocked(page) |
508 | open: no yes | 506 | open: no yes |
509 | close: no yes | 507 | close: no yes |
510 | fault: no yes can return with page locked | 508 | fault: no yes can return with page locked |
511 | page_mkwrite: no yes can return with page locked | 509 | page_mkwrite: no yes can return with page locked |
512 | access: no yes | 510 | access: no yes |
513 | 511 | ||
514 | ->fault() is called when a previously not present pte is about | 512 | ->fault() is called when a previously not present pte is about |
515 | to be faulted in. The filesystem must find and return the page associated | 513 | to be faulted in. The filesystem must find and return the page associated |
516 | with the passed in "pgoff" in the vm_fault structure. If it is possible that | 514 | with the passed in "pgoff" in the vm_fault structure. If it is possible that |
517 | the page may be truncated and/or invalidated, then the filesystem must lock | 515 | the page may be truncated and/or invalidated, then the filesystem must lock |
518 | the page, then ensure it is not already truncated (the page lock will block | 516 | the page, then ensure it is not already truncated (the page lock will block |
519 | subsequent truncate), and then return with VM_FAULT_LOCKED, and the page | 517 | subsequent truncate), and then return with VM_FAULT_LOCKED, and the page |
520 | locked. The VM will unlock the page. | 518 | locked. The VM will unlock the page. |
521 | 519 | ||
522 | ->page_mkwrite() is called when a previously read-only pte is | 520 | ->page_mkwrite() is called when a previously read-only pte is |
523 | about to become writeable. The filesystem again must ensure that there are | 521 | about to become writeable. The filesystem again must ensure that there are |
524 | no truncate/invalidate races, and then return with the page locked. If | 522 | no truncate/invalidate races, and then return with the page locked. If |
525 | the page has been truncated, the filesystem should not look up a new page | 523 | the page has been truncated, the filesystem should not look up a new page |
526 | like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which | 524 | like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which |
527 | will cause the VM to retry the fault. | 525 | will cause the VM to retry the fault. |
528 | 526 | ||
529 | ->access() is called when get_user_pages() fails in | 527 | ->access() is called when get_user_pages() fails in |
530 | acces_process_vm(), typically used to debug a process through | 528 | acces_process_vm(), typically used to debug a process through |
531 | /proc/pid/mem or ptrace. This function is needed only for | 529 | /proc/pid/mem or ptrace. This function is needed only for |
532 | VM_IO | VM_PFNMAP VMAs. | 530 | VM_IO | VM_PFNMAP VMAs. |
533 | 531 | ||
534 | ================================================================================ | 532 | ================================================================================ |
535 | Dubious stuff | 533 | Dubious stuff |
536 | 534 | ||
537 | (if you break something or notice that it is broken and do not fix it yourself | 535 | (if you break something or notice that it is broken and do not fix it yourself |
538 | - at least put it here) | 536 | - at least put it here) |
539 | 537 | ||
540 | ipc/shm.c::shm_delete() - may need BKL. | 538 | ipc/shm.c::shm_delete() - may need BKL. |
541 | ->read() and ->write() in many drivers are (probably) missing BKL. | 539 | ->read() and ->write() in many drivers are (probably) missing BKL. |
542 | 540 |
fs/locks.c
1 | /* | 1 | /* |
2 | * linux/fs/locks.c | 2 | * linux/fs/locks.c |
3 | * | 3 | * |
4 | * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. | 4 | * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. |
5 | * Doug Evans (dje@spiff.uucp), August 07, 1992 | 5 | * Doug Evans (dje@spiff.uucp), August 07, 1992 |
6 | * | 6 | * |
7 | * Deadlock detection added. | 7 | * Deadlock detection added. |
8 | * FIXME: one thing isn't handled yet: | 8 | * FIXME: one thing isn't handled yet: |
9 | * - mandatory locks (requires lots of changes elsewhere) | 9 | * - mandatory locks (requires lots of changes elsewhere) |
10 | * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994. | 10 | * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994. |
11 | * | 11 | * |
12 | * Miscellaneous edits, and a total rewrite of posix_lock_file() code. | 12 | * Miscellaneous edits, and a total rewrite of posix_lock_file() code. |
13 | * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 | 13 | * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 |
14 | * | 14 | * |
15 | * Converted file_lock_table to a linked list from an array, which eliminates | 15 | * Converted file_lock_table to a linked list from an array, which eliminates |
16 | * the limits on how many active file locks are open. | 16 | * the limits on how many active file locks are open. |
17 | * Chad Page (pageone@netcom.com), November 27, 1994 | 17 | * Chad Page (pageone@netcom.com), November 27, 1994 |
18 | * | 18 | * |
19 | * Removed dependency on file descriptors. dup()'ed file descriptors now | 19 | * Removed dependency on file descriptors. dup()'ed file descriptors now |
20 | * get the same locks as the original file descriptors, and a close() on | 20 | * get the same locks as the original file descriptors, and a close() on |
21 | * any file descriptor removes ALL the locks on the file for the current | 21 | * any file descriptor removes ALL the locks on the file for the current |
22 | * process. Since locks still depend on the process id, locks are inherited | 22 | * process. Since locks still depend on the process id, locks are inherited |
23 | * after an exec() but not after a fork(). This agrees with POSIX, and both | 23 | * after an exec() but not after a fork(). This agrees with POSIX, and both |
24 | * BSD and SVR4 practice. | 24 | * BSD and SVR4 practice. |
25 | * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995 | 25 | * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995 |
26 | * | 26 | * |
27 | * Scrapped free list which is redundant now that we allocate locks | 27 | * Scrapped free list which is redundant now that we allocate locks |
28 | * dynamically with kmalloc()/kfree(). | 28 | * dynamically with kmalloc()/kfree(). |
29 | * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995 | 29 | * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995 |
30 | * | 30 | * |
31 | * Implemented two lock personalities - FL_FLOCK and FL_POSIX. | 31 | * Implemented two lock personalities - FL_FLOCK and FL_POSIX. |
32 | * | 32 | * |
33 | * FL_POSIX locks are created with calls to fcntl() and lockf() through the | 33 | * FL_POSIX locks are created with calls to fcntl() and lockf() through the |
34 | * fcntl() system call. They have the semantics described above. | 34 | * fcntl() system call. They have the semantics described above. |
35 | * | 35 | * |
36 | * FL_FLOCK locks are created with calls to flock(), through the flock() | 36 | * FL_FLOCK locks are created with calls to flock(), through the flock() |
37 | * system call, which is new. Old C libraries implement flock() via fcntl() | 37 | * system call, which is new. Old C libraries implement flock() via fcntl() |
38 | * and will continue to use the old, broken implementation. | 38 | * and will continue to use the old, broken implementation. |
39 | * | 39 | * |
40 | * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated | 40 | * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated |
41 | * with a file pointer (filp). As a result they can be shared by a parent | 41 | * with a file pointer (filp). As a result they can be shared by a parent |
42 | * process and its children after a fork(). They are removed when the last | 42 | * process and its children after a fork(). They are removed when the last |
43 | * file descriptor referring to the file pointer is closed (unless explicitly | 43 | * file descriptor referring to the file pointer is closed (unless explicitly |
44 | * unlocked). | 44 | * unlocked). |
45 | * | 45 | * |
46 | * FL_FLOCK locks never deadlock, an existing lock is always removed before | 46 | * FL_FLOCK locks never deadlock, an existing lock is always removed before |
47 | * upgrading from shared to exclusive (or vice versa). When this happens | 47 | * upgrading from shared to exclusive (or vice versa). When this happens |
48 | * any processes blocked by the current lock are woken up and allowed to | 48 | * any processes blocked by the current lock are woken up and allowed to |
49 | * run before the new lock is applied. | 49 | * run before the new lock is applied. |
50 | * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 | 50 | * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 |
51 | * | 51 | * |
52 | * Removed some race conditions in flock_lock_file(), marked other possible | 52 | * Removed some race conditions in flock_lock_file(), marked other possible |
53 | * races. Just grep for FIXME to see them. | 53 | * races. Just grep for FIXME to see them. |
54 | * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. | 54 | * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. |
55 | * | 55 | * |
56 | * Addressed Dmitry's concerns. Deadlock checking no longer recursive. | 56 | * Addressed Dmitry's concerns. Deadlock checking no longer recursive. |
57 | * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep | 57 | * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep |
58 | * once we've checked for blocking and deadlocking. | 58 | * once we've checked for blocking and deadlocking. |
59 | * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996. | 59 | * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996. |
60 | * | 60 | * |
61 | * Initial implementation of mandatory locks. SunOS turned out to be | 61 | * Initial implementation of mandatory locks. SunOS turned out to be |
62 | * a rotten model, so I implemented the "obvious" semantics. | 62 | * a rotten model, so I implemented the "obvious" semantics. |
63 | * See 'Documentation/mandatory.txt' for details. | 63 | * See 'Documentation/mandatory.txt' for details. |
64 | * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. | 64 | * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. |
65 | * | 65 | * |
66 | * Don't allow mandatory locks on mmap()'ed files. Added simple functions to | 66 | * Don't allow mandatory locks on mmap()'ed files. Added simple functions to |
67 | * check if a file has mandatory locks, used by mmap(), open() and creat() to | 67 | * check if a file has mandatory locks, used by mmap(), open() and creat() to |
68 | * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference | 68 | * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference |
69 | * Manual, Section 2. | 69 | * Manual, Section 2. |
70 | * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996. | 70 | * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996. |
71 | * | 71 | * |
72 | * Tidied up block list handling. Added '/proc/locks' interface. | 72 | * Tidied up block list handling. Added '/proc/locks' interface. |
73 | * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996. | 73 | * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996. |
74 | * | 74 | * |
75 | * Fixed deadlock condition for pathological code that mixes calls to | 75 | * Fixed deadlock condition for pathological code that mixes calls to |
76 | * flock() and fcntl(). | 76 | * flock() and fcntl(). |
77 | * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996. | 77 | * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996. |
78 | * | 78 | * |
79 | * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use | 79 | * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use |
80 | * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to | 80 | * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to |
81 | * guarantee sensible behaviour in the case where file system modules might | 81 | * guarantee sensible behaviour in the case where file system modules might |
82 | * be compiled with different options than the kernel itself. | 82 | * be compiled with different options than the kernel itself. |
83 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. | 83 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. |
84 | * | 84 | * |
85 | * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel | 85 | * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel |
86 | * (Thomas.Meckel@mni.fh-giessen.de) for spotting this. | 86 | * (Thomas.Meckel@mni.fh-giessen.de) for spotting this. |
87 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. | 87 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. |
88 | * | 88 | * |
89 | * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK | 89 | * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK |
90 | * locks. Changed process synchronisation to avoid dereferencing locks that | 90 | * locks. Changed process synchronisation to avoid dereferencing locks that |
91 | * have already been freed. | 91 | * have already been freed. |
92 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996. | 92 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996. |
93 | * | 93 | * |
94 | * Made the block list a circular list to minimise searching in the list. | 94 | * Made the block list a circular list to minimise searching in the list. |
95 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996. | 95 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996. |
96 | * | 96 | * |
97 | * Made mandatory locking a mount option. Default is not to allow mandatory | 97 | * Made mandatory locking a mount option. Default is not to allow mandatory |
98 | * locking. | 98 | * locking. |
99 | * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. | 99 | * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. |
100 | * | 100 | * |
101 | * Some adaptations for NFS support. | 101 | * Some adaptations for NFS support. |
102 | * Olaf Kirch (okir@monad.swb.de), Dec 1996, | 102 | * Olaf Kirch (okir@monad.swb.de), Dec 1996, |
103 | * | 103 | * |
104 | * Fixed /proc/locks interface so that we can't overrun the buffer we are handed. | 104 | * Fixed /proc/locks interface so that we can't overrun the buffer we are handed. |
105 | * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997. | 105 | * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997. |
106 | * | 106 | * |
107 | * Use slab allocator instead of kmalloc/kfree. | 107 | * Use slab allocator instead of kmalloc/kfree. |
108 | * Use generic list implementation from <linux/list.h>. | 108 | * Use generic list implementation from <linux/list.h>. |
109 | * Sped up posix_locks_deadlock by only considering blocked locks. | 109 | * Sped up posix_locks_deadlock by only considering blocked locks. |
110 | * Matthew Wilcox <willy@debian.org>, March, 2000. | 110 | * Matthew Wilcox <willy@debian.org>, March, 2000. |
111 | * | 111 | * |
112 | * Leases and LOCK_MAND | 112 | * Leases and LOCK_MAND |
113 | * Matthew Wilcox <willy@debian.org>, June, 2000. | 113 | * Matthew Wilcox <willy@debian.org>, June, 2000. |
114 | * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. | 114 | * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. |
115 | */ | 115 | */ |
116 | 116 | ||
117 | #include <linux/capability.h> | 117 | #include <linux/capability.h> |
118 | #include <linux/file.h> | 118 | #include <linux/file.h> |
119 | #include <linux/fdtable.h> | 119 | #include <linux/fdtable.h> |
120 | #include <linux/fs.h> | 120 | #include <linux/fs.h> |
121 | #include <linux/init.h> | 121 | #include <linux/init.h> |
122 | #include <linux/module.h> | 122 | #include <linux/module.h> |
123 | #include <linux/security.h> | 123 | #include <linux/security.h> |
124 | #include <linux/slab.h> | 124 | #include <linux/slab.h> |
125 | #include <linux/smp_lock.h> | 125 | #include <linux/smp_lock.h> |
126 | #include <linux/syscalls.h> | 126 | #include <linux/syscalls.h> |
127 | #include <linux/time.h> | 127 | #include <linux/time.h> |
128 | #include <linux/rcupdate.h> | 128 | #include <linux/rcupdate.h> |
129 | #include <linux/pid_namespace.h> | 129 | #include <linux/pid_namespace.h> |
130 | 130 | ||
131 | #include <asm/uaccess.h> | 131 | #include <asm/uaccess.h> |
132 | 132 | ||
133 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 133 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
134 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 134 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
135 | #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) | 135 | #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) |
136 | 136 | ||
137 | int leases_enable = 1; | 137 | int leases_enable = 1; |
138 | int lease_break_time = 45; | 138 | int lease_break_time = 45; |
139 | 139 | ||
140 | #define for_each_lock(inode, lockp) \ | 140 | #define for_each_lock(inode, lockp) \ |
141 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) | 141 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) |
142 | 142 | ||
143 | static LIST_HEAD(file_lock_list); | 143 | static LIST_HEAD(file_lock_list); |
144 | static LIST_HEAD(blocked_list); | 144 | static LIST_HEAD(blocked_list); |
145 | static DEFINE_SPINLOCK(file_lock_lock); | 145 | static DEFINE_SPINLOCK(file_lock_lock); |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * Protects the two list heads above, plus the inode->i_flock list | 148 | * Protects the two list heads above, plus the inode->i_flock list |
149 | * FIXME: should use a spinlock, once lockd and ceph are ready. | 149 | * FIXME: should use a spinlock, once lockd and ceph are ready. |
150 | */ | 150 | */ |
151 | void lock_flocks(void) | 151 | void lock_flocks(void) |
152 | { | 152 | { |
153 | spin_lock(&file_lock_lock); | 153 | spin_lock(&file_lock_lock); |
154 | } | 154 | } |
155 | EXPORT_SYMBOL_GPL(lock_flocks); | 155 | EXPORT_SYMBOL_GPL(lock_flocks); |
156 | 156 | ||
157 | void unlock_flocks(void) | 157 | void unlock_flocks(void) |
158 | { | 158 | { |
159 | spin_unlock(&file_lock_lock); | 159 | spin_unlock(&file_lock_lock); |
160 | } | 160 | } |
161 | EXPORT_SYMBOL_GPL(unlock_flocks); | 161 | EXPORT_SYMBOL_GPL(unlock_flocks); |
162 | 162 | ||
163 | static struct kmem_cache *filelock_cache __read_mostly; | 163 | static struct kmem_cache *filelock_cache __read_mostly; |
164 | 164 | ||
165 | /* Allocate an empty lock structure. */ | 165 | /* Allocate an empty lock structure. */ |
166 | struct file_lock *locks_alloc_lock(void) | 166 | struct file_lock *locks_alloc_lock(void) |
167 | { | 167 | { |
168 | return kmem_cache_alloc(filelock_cache, GFP_KERNEL); | 168 | return kmem_cache_alloc(filelock_cache, GFP_KERNEL); |
169 | } | 169 | } |
170 | EXPORT_SYMBOL_GPL(locks_alloc_lock); | 170 | EXPORT_SYMBOL_GPL(locks_alloc_lock); |
171 | 171 | ||
172 | void locks_release_private(struct file_lock *fl) | 172 | void locks_release_private(struct file_lock *fl) |
173 | { | 173 | { |
174 | if (fl->fl_ops) { | 174 | if (fl->fl_ops) { |
175 | if (fl->fl_ops->fl_release_private) | 175 | if (fl->fl_ops->fl_release_private) |
176 | fl->fl_ops->fl_release_private(fl); | 176 | fl->fl_ops->fl_release_private(fl); |
177 | fl->fl_ops = NULL; | 177 | fl->fl_ops = NULL; |
178 | } | 178 | } |
179 | if (fl->fl_lmops) { | 179 | if (fl->fl_lmops) { |
180 | if (fl->fl_lmops->fl_release_private) | 180 | if (fl->fl_lmops->fl_release_private) |
181 | fl->fl_lmops->fl_release_private(fl); | 181 | fl->fl_lmops->fl_release_private(fl); |
182 | fl->fl_lmops = NULL; | 182 | fl->fl_lmops = NULL; |
183 | } | 183 | } |
184 | 184 | ||
185 | } | 185 | } |
186 | EXPORT_SYMBOL_GPL(locks_release_private); | 186 | EXPORT_SYMBOL_GPL(locks_release_private); |
187 | 187 | ||
188 | /* Free a lock which is not in use. */ | 188 | /* Free a lock which is not in use. */ |
189 | void locks_free_lock(struct file_lock *fl) | 189 | void locks_free_lock(struct file_lock *fl) |
190 | { | 190 | { |
191 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 191 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
192 | BUG_ON(!list_empty(&fl->fl_block)); | 192 | BUG_ON(!list_empty(&fl->fl_block)); |
193 | BUG_ON(!list_empty(&fl->fl_link)); | 193 | BUG_ON(!list_empty(&fl->fl_link)); |
194 | 194 | ||
195 | locks_release_private(fl); | 195 | locks_release_private(fl); |
196 | kmem_cache_free(filelock_cache, fl); | 196 | kmem_cache_free(filelock_cache, fl); |
197 | } | 197 | } |
198 | EXPORT_SYMBOL(locks_free_lock); | 198 | EXPORT_SYMBOL(locks_free_lock); |
199 | 199 | ||
200 | void locks_init_lock(struct file_lock *fl) | 200 | void locks_init_lock(struct file_lock *fl) |
201 | { | 201 | { |
202 | INIT_LIST_HEAD(&fl->fl_link); | 202 | INIT_LIST_HEAD(&fl->fl_link); |
203 | INIT_LIST_HEAD(&fl->fl_block); | 203 | INIT_LIST_HEAD(&fl->fl_block); |
204 | init_waitqueue_head(&fl->fl_wait); | 204 | init_waitqueue_head(&fl->fl_wait); |
205 | fl->fl_next = NULL; | 205 | fl->fl_next = NULL; |
206 | fl->fl_fasync = NULL; | 206 | fl->fl_fasync = NULL; |
207 | fl->fl_owner = NULL; | 207 | fl->fl_owner = NULL; |
208 | fl->fl_pid = 0; | 208 | fl->fl_pid = 0; |
209 | fl->fl_nspid = NULL; | 209 | fl->fl_nspid = NULL; |
210 | fl->fl_file = NULL; | 210 | fl->fl_file = NULL; |
211 | fl->fl_flags = 0; | 211 | fl->fl_flags = 0; |
212 | fl->fl_type = 0; | 212 | fl->fl_type = 0; |
213 | fl->fl_start = fl->fl_end = 0; | 213 | fl->fl_start = fl->fl_end = 0; |
214 | fl->fl_ops = NULL; | 214 | fl->fl_ops = NULL; |
215 | fl->fl_lmops = NULL; | 215 | fl->fl_lmops = NULL; |
216 | } | 216 | } |
217 | 217 | ||
218 | EXPORT_SYMBOL(locks_init_lock); | 218 | EXPORT_SYMBOL(locks_init_lock); |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * Initialises the fields of the file lock which are invariant for | 221 | * Initialises the fields of the file lock which are invariant for |
222 | * free file_locks. | 222 | * free file_locks. |
223 | */ | 223 | */ |
224 | static void init_once(void *foo) | 224 | static void init_once(void *foo) |
225 | { | 225 | { |
226 | struct file_lock *lock = (struct file_lock *) foo; | 226 | struct file_lock *lock = (struct file_lock *) foo; |
227 | 227 | ||
228 | locks_init_lock(lock); | 228 | locks_init_lock(lock); |
229 | } | 229 | } |
230 | 230 | ||
231 | static void locks_copy_private(struct file_lock *new, struct file_lock *fl) | 231 | static void locks_copy_private(struct file_lock *new, struct file_lock *fl) |
232 | { | 232 | { |
233 | if (fl->fl_ops) { | 233 | if (fl->fl_ops) { |
234 | if (fl->fl_ops->fl_copy_lock) | 234 | if (fl->fl_ops->fl_copy_lock) |
235 | fl->fl_ops->fl_copy_lock(new, fl); | 235 | fl->fl_ops->fl_copy_lock(new, fl); |
236 | new->fl_ops = fl->fl_ops; | 236 | new->fl_ops = fl->fl_ops; |
237 | } | 237 | } |
238 | if (fl->fl_lmops) { | 238 | if (fl->fl_lmops) |
239 | if (fl->fl_lmops->fl_copy_lock) | ||
240 | fl->fl_lmops->fl_copy_lock(new, fl); | ||
241 | new->fl_lmops = fl->fl_lmops; | 239 | new->fl_lmops = fl->fl_lmops; |
242 | } | ||
243 | } | 240 | } |
244 | 241 | ||
245 | /* | 242 | /* |
246 | * Initialize a new lock from an existing file_lock structure. | 243 | * Initialize a new lock from an existing file_lock structure. |
247 | */ | 244 | */ |
248 | void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) | 245 | void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) |
249 | { | 246 | { |
250 | new->fl_owner = fl->fl_owner; | 247 | new->fl_owner = fl->fl_owner; |
251 | new->fl_pid = fl->fl_pid; | 248 | new->fl_pid = fl->fl_pid; |
252 | new->fl_file = NULL; | 249 | new->fl_file = NULL; |
253 | new->fl_flags = fl->fl_flags; | 250 | new->fl_flags = fl->fl_flags; |
254 | new->fl_type = fl->fl_type; | 251 | new->fl_type = fl->fl_type; |
255 | new->fl_start = fl->fl_start; | 252 | new->fl_start = fl->fl_start; |
256 | new->fl_end = fl->fl_end; | 253 | new->fl_end = fl->fl_end; |
257 | new->fl_ops = NULL; | 254 | new->fl_ops = NULL; |
258 | new->fl_lmops = NULL; | 255 | new->fl_lmops = NULL; |
259 | } | 256 | } |
260 | EXPORT_SYMBOL(__locks_copy_lock); | 257 | EXPORT_SYMBOL(__locks_copy_lock); |
261 | 258 | ||
262 | void locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 259 | void locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
263 | { | 260 | { |
264 | locks_release_private(new); | 261 | locks_release_private(new); |
265 | 262 | ||
266 | __locks_copy_lock(new, fl); | 263 | __locks_copy_lock(new, fl); |
267 | new->fl_file = fl->fl_file; | 264 | new->fl_file = fl->fl_file; |
268 | new->fl_ops = fl->fl_ops; | 265 | new->fl_ops = fl->fl_ops; |
269 | new->fl_lmops = fl->fl_lmops; | 266 | new->fl_lmops = fl->fl_lmops; |
270 | 267 | ||
271 | locks_copy_private(new, fl); | 268 | locks_copy_private(new, fl); |
272 | } | 269 | } |
273 | 270 | ||
274 | EXPORT_SYMBOL(locks_copy_lock); | 271 | EXPORT_SYMBOL(locks_copy_lock); |
275 | 272 | ||
276 | static inline int flock_translate_cmd(int cmd) { | 273 | static inline int flock_translate_cmd(int cmd) { |
277 | if (cmd & LOCK_MAND) | 274 | if (cmd & LOCK_MAND) |
278 | return cmd & (LOCK_MAND | LOCK_RW); | 275 | return cmd & (LOCK_MAND | LOCK_RW); |
279 | switch (cmd) { | 276 | switch (cmd) { |
280 | case LOCK_SH: | 277 | case LOCK_SH: |
281 | return F_RDLCK; | 278 | return F_RDLCK; |
282 | case LOCK_EX: | 279 | case LOCK_EX: |
283 | return F_WRLCK; | 280 | return F_WRLCK; |
284 | case LOCK_UN: | 281 | case LOCK_UN: |
285 | return F_UNLCK; | 282 | return F_UNLCK; |
286 | } | 283 | } |
287 | return -EINVAL; | 284 | return -EINVAL; |
288 | } | 285 | } |
289 | 286 | ||
290 | /* Fill in a file_lock structure with an appropriate FLOCK lock. */ | 287 | /* Fill in a file_lock structure with an appropriate FLOCK lock. */ |
291 | static int flock_make_lock(struct file *filp, struct file_lock **lock, | 288 | static int flock_make_lock(struct file *filp, struct file_lock **lock, |
292 | unsigned int cmd) | 289 | unsigned int cmd) |
293 | { | 290 | { |
294 | struct file_lock *fl; | 291 | struct file_lock *fl; |
295 | int type = flock_translate_cmd(cmd); | 292 | int type = flock_translate_cmd(cmd); |
296 | if (type < 0) | 293 | if (type < 0) |
297 | return type; | 294 | return type; |
298 | 295 | ||
299 | fl = locks_alloc_lock(); | 296 | fl = locks_alloc_lock(); |
300 | if (fl == NULL) | 297 | if (fl == NULL) |
301 | return -ENOMEM; | 298 | return -ENOMEM; |
302 | 299 | ||
303 | fl->fl_file = filp; | 300 | fl->fl_file = filp; |
304 | fl->fl_pid = current->tgid; | 301 | fl->fl_pid = current->tgid; |
305 | fl->fl_flags = FL_FLOCK; | 302 | fl->fl_flags = FL_FLOCK; |
306 | fl->fl_type = type; | 303 | fl->fl_type = type; |
307 | fl->fl_end = OFFSET_MAX; | 304 | fl->fl_end = OFFSET_MAX; |
308 | 305 | ||
309 | *lock = fl; | 306 | *lock = fl; |
310 | return 0; | 307 | return 0; |
311 | } | 308 | } |
312 | 309 | ||
313 | static int assign_type(struct file_lock *fl, int type) | 310 | static int assign_type(struct file_lock *fl, int type) |
314 | { | 311 | { |
315 | switch (type) { | 312 | switch (type) { |
316 | case F_RDLCK: | 313 | case F_RDLCK: |
317 | case F_WRLCK: | 314 | case F_WRLCK: |
318 | case F_UNLCK: | 315 | case F_UNLCK: |
319 | fl->fl_type = type; | 316 | fl->fl_type = type; |
320 | break; | 317 | break; |
321 | default: | 318 | default: |
322 | return -EINVAL; | 319 | return -EINVAL; |
323 | } | 320 | } |
324 | return 0; | 321 | return 0; |
325 | } | 322 | } |
326 | 323 | ||
327 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX | 324 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX |
328 | * style lock. | 325 | * style lock. |
329 | */ | 326 | */ |
330 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | 327 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, |
331 | struct flock *l) | 328 | struct flock *l) |
332 | { | 329 | { |
333 | off_t start, end; | 330 | off_t start, end; |
334 | 331 | ||
335 | switch (l->l_whence) { | 332 | switch (l->l_whence) { |
336 | case SEEK_SET: | 333 | case SEEK_SET: |
337 | start = 0; | 334 | start = 0; |
338 | break; | 335 | break; |
339 | case SEEK_CUR: | 336 | case SEEK_CUR: |
340 | start = filp->f_pos; | 337 | start = filp->f_pos; |
341 | break; | 338 | break; |
342 | case SEEK_END: | 339 | case SEEK_END: |
343 | start = i_size_read(filp->f_path.dentry->d_inode); | 340 | start = i_size_read(filp->f_path.dentry->d_inode); |
344 | break; | 341 | break; |
345 | default: | 342 | default: |
346 | return -EINVAL; | 343 | return -EINVAL; |
347 | } | 344 | } |
348 | 345 | ||
349 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; | 346 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; |
350 | POSIX-2001 defines it. */ | 347 | POSIX-2001 defines it. */ |
351 | start += l->l_start; | 348 | start += l->l_start; |
352 | if (start < 0) | 349 | if (start < 0) |
353 | return -EINVAL; | 350 | return -EINVAL; |
354 | fl->fl_end = OFFSET_MAX; | 351 | fl->fl_end = OFFSET_MAX; |
355 | if (l->l_len > 0) { | 352 | if (l->l_len > 0) { |
356 | end = start + l->l_len - 1; | 353 | end = start + l->l_len - 1; |
357 | fl->fl_end = end; | 354 | fl->fl_end = end; |
358 | } else if (l->l_len < 0) { | 355 | } else if (l->l_len < 0) { |
359 | end = start - 1; | 356 | end = start - 1; |
360 | fl->fl_end = end; | 357 | fl->fl_end = end; |
361 | start += l->l_len; | 358 | start += l->l_len; |
362 | if (start < 0) | 359 | if (start < 0) |
363 | return -EINVAL; | 360 | return -EINVAL; |
364 | } | 361 | } |
365 | fl->fl_start = start; /* we record the absolute position */ | 362 | fl->fl_start = start; /* we record the absolute position */ |
366 | if (fl->fl_end < fl->fl_start) | 363 | if (fl->fl_end < fl->fl_start) |
367 | return -EOVERFLOW; | 364 | return -EOVERFLOW; |
368 | 365 | ||
369 | fl->fl_owner = current->files; | 366 | fl->fl_owner = current->files; |
370 | fl->fl_pid = current->tgid; | 367 | fl->fl_pid = current->tgid; |
371 | fl->fl_file = filp; | 368 | fl->fl_file = filp; |
372 | fl->fl_flags = FL_POSIX; | 369 | fl->fl_flags = FL_POSIX; |
373 | fl->fl_ops = NULL; | 370 | fl->fl_ops = NULL; |
374 | fl->fl_lmops = NULL; | 371 | fl->fl_lmops = NULL; |
375 | 372 | ||
376 | return assign_type(fl, l->l_type); | 373 | return assign_type(fl, l->l_type); |
377 | } | 374 | } |
378 | 375 | ||
379 | #if BITS_PER_LONG == 32 | 376 | #if BITS_PER_LONG == 32 |
380 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, | 377 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, |
381 | struct flock64 *l) | 378 | struct flock64 *l) |
382 | { | 379 | { |
383 | loff_t start; | 380 | loff_t start; |
384 | 381 | ||
385 | switch (l->l_whence) { | 382 | switch (l->l_whence) { |
386 | case SEEK_SET: | 383 | case SEEK_SET: |
387 | start = 0; | 384 | start = 0; |
388 | break; | 385 | break; |
389 | case SEEK_CUR: | 386 | case SEEK_CUR: |
390 | start = filp->f_pos; | 387 | start = filp->f_pos; |
391 | break; | 388 | break; |
392 | case SEEK_END: | 389 | case SEEK_END: |
393 | start = i_size_read(filp->f_path.dentry->d_inode); | 390 | start = i_size_read(filp->f_path.dentry->d_inode); |
394 | break; | 391 | break; |
395 | default: | 392 | default: |
396 | return -EINVAL; | 393 | return -EINVAL; |
397 | } | 394 | } |
398 | 395 | ||
399 | start += l->l_start; | 396 | start += l->l_start; |
400 | if (start < 0) | 397 | if (start < 0) |
401 | return -EINVAL; | 398 | return -EINVAL; |
402 | fl->fl_end = OFFSET_MAX; | 399 | fl->fl_end = OFFSET_MAX; |
403 | if (l->l_len > 0) { | 400 | if (l->l_len > 0) { |
404 | fl->fl_end = start + l->l_len - 1; | 401 | fl->fl_end = start + l->l_len - 1; |
405 | } else if (l->l_len < 0) { | 402 | } else if (l->l_len < 0) { |
406 | fl->fl_end = start - 1; | 403 | fl->fl_end = start - 1; |
407 | start += l->l_len; | 404 | start += l->l_len; |
408 | if (start < 0) | 405 | if (start < 0) |
409 | return -EINVAL; | 406 | return -EINVAL; |
410 | } | 407 | } |
411 | fl->fl_start = start; /* we record the absolute position */ | 408 | fl->fl_start = start; /* we record the absolute position */ |
412 | if (fl->fl_end < fl->fl_start) | 409 | if (fl->fl_end < fl->fl_start) |
413 | return -EOVERFLOW; | 410 | return -EOVERFLOW; |
414 | 411 | ||
415 | fl->fl_owner = current->files; | 412 | fl->fl_owner = current->files; |
416 | fl->fl_pid = current->tgid; | 413 | fl->fl_pid = current->tgid; |
417 | fl->fl_file = filp; | 414 | fl->fl_file = filp; |
418 | fl->fl_flags = FL_POSIX; | 415 | fl->fl_flags = FL_POSIX; |
419 | fl->fl_ops = NULL; | 416 | fl->fl_ops = NULL; |
420 | fl->fl_lmops = NULL; | 417 | fl->fl_lmops = NULL; |
421 | 418 | ||
422 | switch (l->l_type) { | 419 | switch (l->l_type) { |
423 | case F_RDLCK: | 420 | case F_RDLCK: |
424 | case F_WRLCK: | 421 | case F_WRLCK: |
425 | case F_UNLCK: | 422 | case F_UNLCK: |
426 | fl->fl_type = l->l_type; | 423 | fl->fl_type = l->l_type; |
427 | break; | 424 | break; |
428 | default: | 425 | default: |
429 | return -EINVAL; | 426 | return -EINVAL; |
430 | } | 427 | } |
431 | 428 | ||
432 | return (0); | 429 | return (0); |
433 | } | 430 | } |
434 | #endif | 431 | #endif |
435 | 432 | ||
436 | /* default lease lock manager operations */ | 433 | /* default lease lock manager operations */ |
437 | static void lease_break_callback(struct file_lock *fl) | 434 | static void lease_break_callback(struct file_lock *fl) |
438 | { | 435 | { |
439 | kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); | 436 | kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); |
440 | } | 437 | } |
441 | 438 | ||
442 | static void lease_release_private_callback(struct file_lock *fl) | 439 | static void lease_release_private_callback(struct file_lock *fl) |
443 | { | 440 | { |
444 | if (!fl->fl_file) | 441 | if (!fl->fl_file) |
445 | return; | 442 | return; |
446 | 443 | ||
447 | f_delown(fl->fl_file); | 444 | f_delown(fl->fl_file); |
448 | fl->fl_file->f_owner.signum = 0; | 445 | fl->fl_file->f_owner.signum = 0; |
449 | } | 446 | } |
450 | 447 | ||
451 | static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) | 448 | static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) |
452 | { | 449 | { |
453 | return fl->fl_file == try->fl_file; | 450 | return fl->fl_file == try->fl_file; |
454 | } | 451 | } |
455 | 452 | ||
456 | static const struct lock_manager_operations lease_manager_ops = { | 453 | static const struct lock_manager_operations lease_manager_ops = { |
457 | .fl_break = lease_break_callback, | 454 | .fl_break = lease_break_callback, |
458 | .fl_release_private = lease_release_private_callback, | 455 | .fl_release_private = lease_release_private_callback, |
459 | .fl_mylease = lease_mylease_callback, | 456 | .fl_mylease = lease_mylease_callback, |
460 | .fl_change = lease_modify, | 457 | .fl_change = lease_modify, |
461 | }; | 458 | }; |
462 | 459 | ||
463 | /* | 460 | /* |
464 | * Initialize a lease, use the default lock manager operations | 461 | * Initialize a lease, use the default lock manager operations |
465 | */ | 462 | */ |
466 | static int lease_init(struct file *filp, int type, struct file_lock *fl) | 463 | static int lease_init(struct file *filp, int type, struct file_lock *fl) |
467 | { | 464 | { |
468 | if (assign_type(fl, type) != 0) | 465 | if (assign_type(fl, type) != 0) |
469 | return -EINVAL; | 466 | return -EINVAL; |
470 | 467 | ||
471 | fl->fl_owner = current->files; | 468 | fl->fl_owner = current->files; |
472 | fl->fl_pid = current->tgid; | 469 | fl->fl_pid = current->tgid; |
473 | 470 | ||
474 | fl->fl_file = filp; | 471 | fl->fl_file = filp; |
475 | fl->fl_flags = FL_LEASE; | 472 | fl->fl_flags = FL_LEASE; |
476 | fl->fl_start = 0; | 473 | fl->fl_start = 0; |
477 | fl->fl_end = OFFSET_MAX; | 474 | fl->fl_end = OFFSET_MAX; |
478 | fl->fl_ops = NULL; | 475 | fl->fl_ops = NULL; |
479 | fl->fl_lmops = &lease_manager_ops; | 476 | fl->fl_lmops = &lease_manager_ops; |
480 | return 0; | 477 | return 0; |
481 | } | 478 | } |
482 | 479 | ||
483 | /* Allocate a file_lock initialised to this type of lease */ | 480 | /* Allocate a file_lock initialised to this type of lease */ |
484 | static struct file_lock *lease_alloc(struct file *filp, int type) | 481 | static struct file_lock *lease_alloc(struct file *filp, int type) |
485 | { | 482 | { |
486 | struct file_lock *fl = locks_alloc_lock(); | 483 | struct file_lock *fl = locks_alloc_lock(); |
487 | int error = -ENOMEM; | 484 | int error = -ENOMEM; |
488 | 485 | ||
489 | if (fl == NULL) | 486 | if (fl == NULL) |
490 | return ERR_PTR(error); | 487 | return ERR_PTR(error); |
491 | 488 | ||
492 | error = lease_init(filp, type, fl); | 489 | error = lease_init(filp, type, fl); |
493 | if (error) { | 490 | if (error) { |
494 | locks_free_lock(fl); | 491 | locks_free_lock(fl); |
495 | return ERR_PTR(error); | 492 | return ERR_PTR(error); |
496 | } | 493 | } |
497 | return fl; | 494 | return fl; |
498 | } | 495 | } |
499 | 496 | ||
500 | /* Check if two locks overlap each other. | 497 | /* Check if two locks overlap each other. |
501 | */ | 498 | */ |
502 | static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) | 499 | static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) |
503 | { | 500 | { |
504 | return ((fl1->fl_end >= fl2->fl_start) && | 501 | return ((fl1->fl_end >= fl2->fl_start) && |
505 | (fl2->fl_end >= fl1->fl_start)); | 502 | (fl2->fl_end >= fl1->fl_start)); |
506 | } | 503 | } |
507 | 504 | ||
508 | /* | 505 | /* |
509 | * Check whether two locks have the same owner. | 506 | * Check whether two locks have the same owner. |
510 | */ | 507 | */ |
511 | static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | 508 | static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) |
512 | { | 509 | { |
513 | if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) | 510 | if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) |
514 | return fl2->fl_lmops == fl1->fl_lmops && | 511 | return fl2->fl_lmops == fl1->fl_lmops && |
515 | fl1->fl_lmops->fl_compare_owner(fl1, fl2); | 512 | fl1->fl_lmops->fl_compare_owner(fl1, fl2); |
516 | return fl1->fl_owner == fl2->fl_owner; | 513 | return fl1->fl_owner == fl2->fl_owner; |
517 | } | 514 | } |
518 | 515 | ||
519 | /* Remove waiter from blocker's block list. | 516 | /* Remove waiter from blocker's block list. |
520 | * When blocker ends up pointing to itself then the list is empty. | 517 | * When blocker ends up pointing to itself then the list is empty. |
521 | */ | 518 | */ |
522 | static void __locks_delete_block(struct file_lock *waiter) | 519 | static void __locks_delete_block(struct file_lock *waiter) |
523 | { | 520 | { |
524 | list_del_init(&waiter->fl_block); | 521 | list_del_init(&waiter->fl_block); |
525 | list_del_init(&waiter->fl_link); | 522 | list_del_init(&waiter->fl_link); |
526 | waiter->fl_next = NULL; | 523 | waiter->fl_next = NULL; |
527 | } | 524 | } |
528 | 525 | ||
529 | /* | 526 | /* |
530 | */ | 527 | */ |
531 | static void locks_delete_block(struct file_lock *waiter) | 528 | static void locks_delete_block(struct file_lock *waiter) |
532 | { | 529 | { |
533 | lock_flocks(); | 530 | lock_flocks(); |
534 | __locks_delete_block(waiter); | 531 | __locks_delete_block(waiter); |
535 | unlock_flocks(); | 532 | unlock_flocks(); |
536 | } | 533 | } |
537 | 534 | ||
538 | /* Insert waiter into blocker's block list. | 535 | /* Insert waiter into blocker's block list. |
539 | * We use a circular list so that processes can be easily woken up in | 536 | * We use a circular list so that processes can be easily woken up in |
540 | * the order they blocked. The documentation doesn't require this but | 537 | * the order they blocked. The documentation doesn't require this but |
541 | * it seems like the reasonable thing to do. | 538 | * it seems like the reasonable thing to do. |
542 | */ | 539 | */ |
543 | static void locks_insert_block(struct file_lock *blocker, | 540 | static void locks_insert_block(struct file_lock *blocker, |
544 | struct file_lock *waiter) | 541 | struct file_lock *waiter) |
545 | { | 542 | { |
546 | BUG_ON(!list_empty(&waiter->fl_block)); | 543 | BUG_ON(!list_empty(&waiter->fl_block)); |
547 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | 544 | list_add_tail(&waiter->fl_block, &blocker->fl_block); |
548 | waiter->fl_next = blocker; | 545 | waiter->fl_next = blocker; |
549 | if (IS_POSIX(blocker)) | 546 | if (IS_POSIX(blocker)) |
550 | list_add(&waiter->fl_link, &blocked_list); | 547 | list_add(&waiter->fl_link, &blocked_list); |
551 | } | 548 | } |
552 | 549 | ||
553 | /* Wake up processes blocked waiting for blocker. | 550 | /* Wake up processes blocked waiting for blocker. |
554 | * If told to wait then schedule the processes until the block list | 551 | * If told to wait then schedule the processes until the block list |
555 | * is empty, otherwise empty the block list ourselves. | 552 | * is empty, otherwise empty the block list ourselves. |
556 | */ | 553 | */ |
557 | static void locks_wake_up_blocks(struct file_lock *blocker) | 554 | static void locks_wake_up_blocks(struct file_lock *blocker) |
558 | { | 555 | { |
559 | while (!list_empty(&blocker->fl_block)) { | 556 | while (!list_empty(&blocker->fl_block)) { |
560 | struct file_lock *waiter; | 557 | struct file_lock *waiter; |
561 | 558 | ||
562 | waiter = list_first_entry(&blocker->fl_block, | 559 | waiter = list_first_entry(&blocker->fl_block, |
563 | struct file_lock, fl_block); | 560 | struct file_lock, fl_block); |
564 | __locks_delete_block(waiter); | 561 | __locks_delete_block(waiter); |
565 | if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) | 562 | if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) |
566 | waiter->fl_lmops->fl_notify(waiter); | 563 | waiter->fl_lmops->fl_notify(waiter); |
567 | else | 564 | else |
568 | wake_up(&waiter->fl_wait); | 565 | wake_up(&waiter->fl_wait); |
569 | } | 566 | } |
570 | } | 567 | } |
571 | 568 | ||
572 | /* Insert file lock fl into an inode's lock list at the position indicated | 569 | /* Insert file lock fl into an inode's lock list at the position indicated |
573 | * by pos. At the same time add the lock to the global file lock list. | 570 | * by pos. At the same time add the lock to the global file lock list. |
574 | */ | 571 | */ |
575 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | 572 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) |
576 | { | 573 | { |
577 | list_add(&fl->fl_link, &file_lock_list); | 574 | list_add(&fl->fl_link, &file_lock_list); |
578 | 575 | ||
579 | fl->fl_nspid = get_pid(task_tgid(current)); | 576 | fl->fl_nspid = get_pid(task_tgid(current)); |
580 | 577 | ||
581 | /* insert into file's list */ | 578 | /* insert into file's list */ |
582 | fl->fl_next = *pos; | 579 | fl->fl_next = *pos; |
583 | *pos = fl; | 580 | *pos = fl; |
584 | } | 581 | } |
585 | 582 | ||
586 | /* | 583 | /* |
587 | * Delete a lock and then free it. | 584 | * Delete a lock and then free it. |
588 | * Wake up processes that are blocked waiting for this lock, | 585 | * Wake up processes that are blocked waiting for this lock, |
589 | * notify the FS that the lock has been cleared and | 586 | * notify the FS that the lock has been cleared and |
590 | * finally free the lock. | 587 | * finally free the lock. |
591 | */ | 588 | */ |
592 | static void locks_delete_lock(struct file_lock **thisfl_p) | 589 | static void locks_delete_lock(struct file_lock **thisfl_p) |
593 | { | 590 | { |
594 | struct file_lock *fl = *thisfl_p; | 591 | struct file_lock *fl = *thisfl_p; |
595 | 592 | ||
596 | *thisfl_p = fl->fl_next; | 593 | *thisfl_p = fl->fl_next; |
597 | fl->fl_next = NULL; | 594 | fl->fl_next = NULL; |
598 | list_del_init(&fl->fl_link); | 595 | list_del_init(&fl->fl_link); |
599 | 596 | ||
600 | fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); | 597 | fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); |
601 | if (fl->fl_fasync != NULL) { | 598 | if (fl->fl_fasync != NULL) { |
602 | printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); | 599 | printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); |
603 | fl->fl_fasync = NULL; | 600 | fl->fl_fasync = NULL; |
604 | } | 601 | } |
605 | 602 | ||
606 | if (fl->fl_nspid) { | 603 | if (fl->fl_nspid) { |
607 | put_pid(fl->fl_nspid); | 604 | put_pid(fl->fl_nspid); |
608 | fl->fl_nspid = NULL; | 605 | fl->fl_nspid = NULL; |
609 | } | 606 | } |
610 | 607 | ||
611 | locks_wake_up_blocks(fl); | 608 | locks_wake_up_blocks(fl); |
612 | locks_free_lock(fl); | 609 | locks_free_lock(fl); |
613 | } | 610 | } |
614 | 611 | ||
615 | /* Determine if lock sys_fl blocks lock caller_fl. Common functionality | 612 | /* Determine if lock sys_fl blocks lock caller_fl. Common functionality |
616 | * checks for shared/exclusive status of overlapping locks. | 613 | * checks for shared/exclusive status of overlapping locks. |
617 | */ | 614 | */ |
618 | static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) | 615 | static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) |
619 | { | 616 | { |
620 | if (sys_fl->fl_type == F_WRLCK) | 617 | if (sys_fl->fl_type == F_WRLCK) |
621 | return 1; | 618 | return 1; |
622 | if (caller_fl->fl_type == F_WRLCK) | 619 | if (caller_fl->fl_type == F_WRLCK) |
623 | return 1; | 620 | return 1; |
624 | return 0; | 621 | return 0; |
625 | } | 622 | } |
626 | 623 | ||
627 | /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific | 624 | /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific |
628 | * checking before calling the locks_conflict(). | 625 | * checking before calling the locks_conflict(). |
629 | */ | 626 | */ |
630 | static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) | 627 | static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) |
631 | { | 628 | { |
632 | /* POSIX locks owned by the same process do not conflict with | 629 | /* POSIX locks owned by the same process do not conflict with |
633 | * each other. | 630 | * each other. |
634 | */ | 631 | */ |
635 | if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) | 632 | if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) |
636 | return (0); | 633 | return (0); |
637 | 634 | ||
638 | /* Check whether they overlap */ | 635 | /* Check whether they overlap */ |
639 | if (!locks_overlap(caller_fl, sys_fl)) | 636 | if (!locks_overlap(caller_fl, sys_fl)) |
640 | return 0; | 637 | return 0; |
641 | 638 | ||
642 | return (locks_conflict(caller_fl, sys_fl)); | 639 | return (locks_conflict(caller_fl, sys_fl)); |
643 | } | 640 | } |
644 | 641 | ||
645 | /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific | 642 | /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific |
646 | * checking before calling the locks_conflict(). | 643 | * checking before calling the locks_conflict(). |
647 | */ | 644 | */ |
648 | static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) | 645 | static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) |
649 | { | 646 | { |
650 | /* FLOCK locks referring to the same filp do not conflict with | 647 | /* FLOCK locks referring to the same filp do not conflict with |
651 | * each other. | 648 | * each other. |
652 | */ | 649 | */ |
653 | if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) | 650 | if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) |
654 | return (0); | 651 | return (0); |
655 | if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) | 652 | if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) |
656 | return 0; | 653 | return 0; |
657 | 654 | ||
658 | return (locks_conflict(caller_fl, sys_fl)); | 655 | return (locks_conflict(caller_fl, sys_fl)); |
659 | } | 656 | } |
660 | 657 | ||
661 | void | 658 | void |
662 | posix_test_lock(struct file *filp, struct file_lock *fl) | 659 | posix_test_lock(struct file *filp, struct file_lock *fl) |
663 | { | 660 | { |
664 | struct file_lock *cfl; | 661 | struct file_lock *cfl; |
665 | 662 | ||
666 | lock_flocks(); | 663 | lock_flocks(); |
667 | for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { | 664 | for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { |
668 | if (!IS_POSIX(cfl)) | 665 | if (!IS_POSIX(cfl)) |
669 | continue; | 666 | continue; |
670 | if (posix_locks_conflict(fl, cfl)) | 667 | if (posix_locks_conflict(fl, cfl)) |
671 | break; | 668 | break; |
672 | } | 669 | } |
673 | if (cfl) { | 670 | if (cfl) { |
674 | __locks_copy_lock(fl, cfl); | 671 | __locks_copy_lock(fl, cfl); |
675 | if (cfl->fl_nspid) | 672 | if (cfl->fl_nspid) |
676 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 673 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
677 | } else | 674 | } else |
678 | fl->fl_type = F_UNLCK; | 675 | fl->fl_type = F_UNLCK; |
679 | unlock_flocks(); | 676 | unlock_flocks(); |
680 | return; | 677 | return; |
681 | } | 678 | } |
682 | EXPORT_SYMBOL(posix_test_lock); | 679 | EXPORT_SYMBOL(posix_test_lock); |
683 | 680 | ||
684 | /* | 681 | /* |
685 | * Deadlock detection: | 682 | * Deadlock detection: |
686 | * | 683 | * |
687 | * We attempt to detect deadlocks that are due purely to posix file | 684 | * We attempt to detect deadlocks that are due purely to posix file |
688 | * locks. | 685 | * locks. |
689 | * | 686 | * |
690 | * We assume that a task can be waiting for at most one lock at a time. | 687 | * We assume that a task can be waiting for at most one lock at a time. |
691 | * So for any acquired lock, the process holding that lock may be | 688 | * So for any acquired lock, the process holding that lock may be |
692 | * waiting on at most one other lock. That lock in turns may be held by | 689 | * waiting on at most one other lock. That lock in turns may be held by |
693 | * someone waiting for at most one other lock. Given a requested lock | 690 | * someone waiting for at most one other lock. Given a requested lock |
694 | * caller_fl which is about to wait for a conflicting lock block_fl, we | 691 | * caller_fl which is about to wait for a conflicting lock block_fl, we |
695 | * follow this chain of waiters to ensure we are not about to create a | 692 | * follow this chain of waiters to ensure we are not about to create a |
696 | * cycle. | 693 | * cycle. |
697 | * | 694 | * |
698 | * Since we do this before we ever put a process to sleep on a lock, we | 695 | * Since we do this before we ever put a process to sleep on a lock, we |
699 | * are ensured that there is never a cycle; that is what guarantees that | 696 | * are ensured that there is never a cycle; that is what guarantees that |
700 | * the while() loop in posix_locks_deadlock() eventually completes. | 697 | * the while() loop in posix_locks_deadlock() eventually completes. |
701 | * | 698 | * |
702 | * Note: the above assumption may not be true when handling lock | 699 | * Note: the above assumption may not be true when handling lock |
703 | * requests from a broken NFS client. It may also fail in the presence | 700 | * requests from a broken NFS client. It may also fail in the presence |
704 | * of tasks (such as posix threads) sharing the same open file table. | 701 | * of tasks (such as posix threads) sharing the same open file table. |
705 | * | 702 | * |
706 | * To handle those cases, we just bail out after a few iterations. | 703 | * To handle those cases, we just bail out after a few iterations. |
707 | */ | 704 | */ |
708 | 705 | ||
709 | #define MAX_DEADLK_ITERATIONS 10 | 706 | #define MAX_DEADLK_ITERATIONS 10 |
710 | 707 | ||
711 | /* Find a lock that the owner of the given block_fl is blocking on. */ | 708 | /* Find a lock that the owner of the given block_fl is blocking on. */ |
712 | static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | 709 | static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) |
713 | { | 710 | { |
714 | struct file_lock *fl; | 711 | struct file_lock *fl; |
715 | 712 | ||
716 | list_for_each_entry(fl, &blocked_list, fl_link) { | 713 | list_for_each_entry(fl, &blocked_list, fl_link) { |
717 | if (posix_same_owner(fl, block_fl)) | 714 | if (posix_same_owner(fl, block_fl)) |
718 | return fl->fl_next; | 715 | return fl->fl_next; |
719 | } | 716 | } |
720 | return NULL; | 717 | return NULL; |
721 | } | 718 | } |
722 | 719 | ||
723 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 720 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
724 | struct file_lock *block_fl) | 721 | struct file_lock *block_fl) |
725 | { | 722 | { |
726 | int i = 0; | 723 | int i = 0; |
727 | 724 | ||
728 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { | 725 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { |
729 | if (i++ > MAX_DEADLK_ITERATIONS) | 726 | if (i++ > MAX_DEADLK_ITERATIONS) |
730 | return 0; | 727 | return 0; |
731 | if (posix_same_owner(caller_fl, block_fl)) | 728 | if (posix_same_owner(caller_fl, block_fl)) |
732 | return 1; | 729 | return 1; |
733 | } | 730 | } |
734 | return 0; | 731 | return 0; |
735 | } | 732 | } |
736 | 733 | ||
737 | /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks | 734 | /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks |
738 | * after any leases, but before any posix locks. | 735 | * after any leases, but before any posix locks. |
739 | * | 736 | * |
740 | * Note that if called with an FL_EXISTS argument, the caller may determine | 737 | * Note that if called with an FL_EXISTS argument, the caller may determine |
741 | * whether or not a lock was successfully freed by testing the return | 738 | * whether or not a lock was successfully freed by testing the return |
742 | * value for -ENOENT. | 739 | * value for -ENOENT. |
743 | */ | 740 | */ |
744 | static int flock_lock_file(struct file *filp, struct file_lock *request) | 741 | static int flock_lock_file(struct file *filp, struct file_lock *request) |
745 | { | 742 | { |
746 | struct file_lock *new_fl = NULL; | 743 | struct file_lock *new_fl = NULL; |
747 | struct file_lock **before; | 744 | struct file_lock **before; |
748 | struct inode * inode = filp->f_path.dentry->d_inode; | 745 | struct inode * inode = filp->f_path.dentry->d_inode; |
749 | int error = 0; | 746 | int error = 0; |
750 | int found = 0; | 747 | int found = 0; |
751 | 748 | ||
752 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { | 749 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { |
753 | new_fl = locks_alloc_lock(); | 750 | new_fl = locks_alloc_lock(); |
754 | if (!new_fl) | 751 | if (!new_fl) |
755 | return -ENOMEM; | 752 | return -ENOMEM; |
756 | } | 753 | } |
757 | 754 | ||
758 | lock_flocks(); | 755 | lock_flocks(); |
759 | if (request->fl_flags & FL_ACCESS) | 756 | if (request->fl_flags & FL_ACCESS) |
760 | goto find_conflict; | 757 | goto find_conflict; |
761 | 758 | ||
762 | for_each_lock(inode, before) { | 759 | for_each_lock(inode, before) { |
763 | struct file_lock *fl = *before; | 760 | struct file_lock *fl = *before; |
764 | if (IS_POSIX(fl)) | 761 | if (IS_POSIX(fl)) |
765 | break; | 762 | break; |
766 | if (IS_LEASE(fl)) | 763 | if (IS_LEASE(fl)) |
767 | continue; | 764 | continue; |
768 | if (filp != fl->fl_file) | 765 | if (filp != fl->fl_file) |
769 | continue; | 766 | continue; |
770 | if (request->fl_type == fl->fl_type) | 767 | if (request->fl_type == fl->fl_type) |
771 | goto out; | 768 | goto out; |
772 | found = 1; | 769 | found = 1; |
773 | locks_delete_lock(before); | 770 | locks_delete_lock(before); |
774 | break; | 771 | break; |
775 | } | 772 | } |
776 | 773 | ||
777 | if (request->fl_type == F_UNLCK) { | 774 | if (request->fl_type == F_UNLCK) { |
778 | if ((request->fl_flags & FL_EXISTS) && !found) | 775 | if ((request->fl_flags & FL_EXISTS) && !found) |
779 | error = -ENOENT; | 776 | error = -ENOENT; |
780 | goto out; | 777 | goto out; |
781 | } | 778 | } |
782 | 779 | ||
783 | /* | 780 | /* |
784 | * If a higher-priority process was blocked on the old file lock, | 781 | * If a higher-priority process was blocked on the old file lock, |
785 | * give it the opportunity to lock the file. | 782 | * give it the opportunity to lock the file. |
786 | */ | 783 | */ |
787 | if (found) { | 784 | if (found) { |
788 | unlock_flocks(); | 785 | unlock_flocks(); |
789 | cond_resched(); | 786 | cond_resched(); |
790 | lock_flocks(); | 787 | lock_flocks(); |
791 | } | 788 | } |
792 | 789 | ||
793 | find_conflict: | 790 | find_conflict: |
794 | for_each_lock(inode, before) { | 791 | for_each_lock(inode, before) { |
795 | struct file_lock *fl = *before; | 792 | struct file_lock *fl = *before; |
796 | if (IS_POSIX(fl)) | 793 | if (IS_POSIX(fl)) |
797 | break; | 794 | break; |
798 | if (IS_LEASE(fl)) | 795 | if (IS_LEASE(fl)) |
799 | continue; | 796 | continue; |
800 | if (!flock_locks_conflict(request, fl)) | 797 | if (!flock_locks_conflict(request, fl)) |
801 | continue; | 798 | continue; |
802 | error = -EAGAIN; | 799 | error = -EAGAIN; |
803 | if (!(request->fl_flags & FL_SLEEP)) | 800 | if (!(request->fl_flags & FL_SLEEP)) |
804 | goto out; | 801 | goto out; |
805 | error = FILE_LOCK_DEFERRED; | 802 | error = FILE_LOCK_DEFERRED; |
806 | locks_insert_block(fl, request); | 803 | locks_insert_block(fl, request); |
807 | goto out; | 804 | goto out; |
808 | } | 805 | } |
809 | if (request->fl_flags & FL_ACCESS) | 806 | if (request->fl_flags & FL_ACCESS) |
810 | goto out; | 807 | goto out; |
811 | locks_copy_lock(new_fl, request); | 808 | locks_copy_lock(new_fl, request); |
812 | locks_insert_lock(before, new_fl); | 809 | locks_insert_lock(before, new_fl); |
813 | new_fl = NULL; | 810 | new_fl = NULL; |
814 | error = 0; | 811 | error = 0; |
815 | 812 | ||
816 | out: | 813 | out: |
817 | unlock_flocks(); | 814 | unlock_flocks(); |
818 | if (new_fl) | 815 | if (new_fl) |
819 | locks_free_lock(new_fl); | 816 | locks_free_lock(new_fl); |
820 | return error; | 817 | return error; |
821 | } | 818 | } |
822 | 819 | ||
823 | static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) | 820 | static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) |
824 | { | 821 | { |
825 | struct file_lock *fl; | 822 | struct file_lock *fl; |
826 | struct file_lock *new_fl = NULL; | 823 | struct file_lock *new_fl = NULL; |
827 | struct file_lock *new_fl2 = NULL; | 824 | struct file_lock *new_fl2 = NULL; |
828 | struct file_lock *left = NULL; | 825 | struct file_lock *left = NULL; |
829 | struct file_lock *right = NULL; | 826 | struct file_lock *right = NULL; |
830 | struct file_lock **before; | 827 | struct file_lock **before; |
831 | int error, added = 0; | 828 | int error, added = 0; |
832 | 829 | ||
833 | /* | 830 | /* |
834 | * We may need two file_lock structures for this operation, | 831 | * We may need two file_lock structures for this operation, |
835 | * so we get them in advance to avoid races. | 832 | * so we get them in advance to avoid races. |
836 | * | 833 | * |
837 | * In some cases we can be sure, that no new locks will be needed | 834 | * In some cases we can be sure, that no new locks will be needed |
838 | */ | 835 | */ |
839 | if (!(request->fl_flags & FL_ACCESS) && | 836 | if (!(request->fl_flags & FL_ACCESS) && |
840 | (request->fl_type != F_UNLCK || | 837 | (request->fl_type != F_UNLCK || |
841 | request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { | 838 | request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { |
842 | new_fl = locks_alloc_lock(); | 839 | new_fl = locks_alloc_lock(); |
843 | new_fl2 = locks_alloc_lock(); | 840 | new_fl2 = locks_alloc_lock(); |
844 | } | 841 | } |
845 | 842 | ||
846 | lock_flocks(); | 843 | lock_flocks(); |
847 | if (request->fl_type != F_UNLCK) { | 844 | if (request->fl_type != F_UNLCK) { |
848 | for_each_lock(inode, before) { | 845 | for_each_lock(inode, before) { |
849 | fl = *before; | 846 | fl = *before; |
850 | if (!IS_POSIX(fl)) | 847 | if (!IS_POSIX(fl)) |
851 | continue; | 848 | continue; |
852 | if (!posix_locks_conflict(request, fl)) | 849 | if (!posix_locks_conflict(request, fl)) |
853 | continue; | 850 | continue; |
854 | if (conflock) | 851 | if (conflock) |
855 | __locks_copy_lock(conflock, fl); | 852 | __locks_copy_lock(conflock, fl); |
856 | error = -EAGAIN; | 853 | error = -EAGAIN; |
857 | if (!(request->fl_flags & FL_SLEEP)) | 854 | if (!(request->fl_flags & FL_SLEEP)) |
858 | goto out; | 855 | goto out; |
859 | error = -EDEADLK; | 856 | error = -EDEADLK; |
860 | if (posix_locks_deadlock(request, fl)) | 857 | if (posix_locks_deadlock(request, fl)) |
861 | goto out; | 858 | goto out; |
862 | error = FILE_LOCK_DEFERRED; | 859 | error = FILE_LOCK_DEFERRED; |
863 | locks_insert_block(fl, request); | 860 | locks_insert_block(fl, request); |
864 | goto out; | 861 | goto out; |
865 | } | 862 | } |
866 | } | 863 | } |
867 | 864 | ||
868 | /* If we're just looking for a conflict, we're done. */ | 865 | /* If we're just looking for a conflict, we're done. */ |
869 | error = 0; | 866 | error = 0; |
870 | if (request->fl_flags & FL_ACCESS) | 867 | if (request->fl_flags & FL_ACCESS) |
871 | goto out; | 868 | goto out; |
872 | 869 | ||
873 | /* | 870 | /* |
874 | * Find the first old lock with the same owner as the new lock. | 871 | * Find the first old lock with the same owner as the new lock. |
875 | */ | 872 | */ |
876 | 873 | ||
877 | before = &inode->i_flock; | 874 | before = &inode->i_flock; |
878 | 875 | ||
879 | /* First skip locks owned by other processes. */ | 876 | /* First skip locks owned by other processes. */ |
880 | while ((fl = *before) && (!IS_POSIX(fl) || | 877 | while ((fl = *before) && (!IS_POSIX(fl) || |
881 | !posix_same_owner(request, fl))) { | 878 | !posix_same_owner(request, fl))) { |
882 | before = &fl->fl_next; | 879 | before = &fl->fl_next; |
883 | } | 880 | } |
884 | 881 | ||
885 | /* Process locks with this owner. */ | 882 | /* Process locks with this owner. */ |
886 | while ((fl = *before) && posix_same_owner(request, fl)) { | 883 | while ((fl = *before) && posix_same_owner(request, fl)) { |
887 | /* Detect adjacent or overlapping regions (if same lock type) | 884 | /* Detect adjacent or overlapping regions (if same lock type) |
888 | */ | 885 | */ |
889 | if (request->fl_type == fl->fl_type) { | 886 | if (request->fl_type == fl->fl_type) { |
890 | /* In all comparisons of start vs end, use | 887 | /* In all comparisons of start vs end, use |
891 | * "start - 1" rather than "end + 1". If end | 888 | * "start - 1" rather than "end + 1". If end |
892 | * is OFFSET_MAX, end + 1 will become negative. | 889 | * is OFFSET_MAX, end + 1 will become negative. |
893 | */ | 890 | */ |
894 | if (fl->fl_end < request->fl_start - 1) | 891 | if (fl->fl_end < request->fl_start - 1) |
895 | goto next_lock; | 892 | goto next_lock; |
896 | /* If the next lock in the list has entirely bigger | 893 | /* If the next lock in the list has entirely bigger |
897 | * addresses than the new one, insert the lock here. | 894 | * addresses than the new one, insert the lock here. |
898 | */ | 895 | */ |
899 | if (fl->fl_start - 1 > request->fl_end) | 896 | if (fl->fl_start - 1 > request->fl_end) |
900 | break; | 897 | break; |
901 | 898 | ||
902 | /* If we come here, the new and old lock are of the | 899 | /* If we come here, the new and old lock are of the |
903 | * same type and adjacent or overlapping. Make one | 900 | * same type and adjacent or overlapping. Make one |
904 | * lock yielding from the lower start address of both | 901 | * lock yielding from the lower start address of both |
905 | * locks to the higher end address. | 902 | * locks to the higher end address. |
906 | */ | 903 | */ |
907 | if (fl->fl_start > request->fl_start) | 904 | if (fl->fl_start > request->fl_start) |
908 | fl->fl_start = request->fl_start; | 905 | fl->fl_start = request->fl_start; |
909 | else | 906 | else |
910 | request->fl_start = fl->fl_start; | 907 | request->fl_start = fl->fl_start; |
911 | if (fl->fl_end < request->fl_end) | 908 | if (fl->fl_end < request->fl_end) |
912 | fl->fl_end = request->fl_end; | 909 | fl->fl_end = request->fl_end; |
913 | else | 910 | else |
914 | request->fl_end = fl->fl_end; | 911 | request->fl_end = fl->fl_end; |
915 | if (added) { | 912 | if (added) { |
916 | locks_delete_lock(before); | 913 | locks_delete_lock(before); |
917 | continue; | 914 | continue; |
918 | } | 915 | } |
919 | request = fl; | 916 | request = fl; |
920 | added = 1; | 917 | added = 1; |
921 | } | 918 | } |
922 | else { | 919 | else { |
923 | /* Processing for different lock types is a bit | 920 | /* Processing for different lock types is a bit |
924 | * more complex. | 921 | * more complex. |
925 | */ | 922 | */ |
926 | if (fl->fl_end < request->fl_start) | 923 | if (fl->fl_end < request->fl_start) |
927 | goto next_lock; | 924 | goto next_lock; |
928 | if (fl->fl_start > request->fl_end) | 925 | if (fl->fl_start > request->fl_end) |
929 | break; | 926 | break; |
930 | if (request->fl_type == F_UNLCK) | 927 | if (request->fl_type == F_UNLCK) |
931 | added = 1; | 928 | added = 1; |
932 | if (fl->fl_start < request->fl_start) | 929 | if (fl->fl_start < request->fl_start) |
933 | left = fl; | 930 | left = fl; |
934 | /* If the next lock in the list has a higher end | 931 | /* If the next lock in the list has a higher end |
935 | * address than the new one, insert the new one here. | 932 | * address than the new one, insert the new one here. |
936 | */ | 933 | */ |
937 | if (fl->fl_end > request->fl_end) { | 934 | if (fl->fl_end > request->fl_end) { |
938 | right = fl; | 935 | right = fl; |
939 | break; | 936 | break; |
940 | } | 937 | } |
941 | if (fl->fl_start >= request->fl_start) { | 938 | if (fl->fl_start >= request->fl_start) { |
942 | /* The new lock completely replaces an old | 939 | /* The new lock completely replaces an old |
943 | * one (This may happen several times). | 940 | * one (This may happen several times). |
944 | */ | 941 | */ |
945 | if (added) { | 942 | if (added) { |
946 | locks_delete_lock(before); | 943 | locks_delete_lock(before); |
947 | continue; | 944 | continue; |
948 | } | 945 | } |
949 | /* Replace the old lock with the new one. | 946 | /* Replace the old lock with the new one. |
950 | * Wake up anybody waiting for the old one, | 947 | * Wake up anybody waiting for the old one, |
951 | * as the change in lock type might satisfy | 948 | * as the change in lock type might satisfy |
952 | * their needs. | 949 | * their needs. |
953 | */ | 950 | */ |
954 | locks_wake_up_blocks(fl); | 951 | locks_wake_up_blocks(fl); |
955 | fl->fl_start = request->fl_start; | 952 | fl->fl_start = request->fl_start; |
956 | fl->fl_end = request->fl_end; | 953 | fl->fl_end = request->fl_end; |
957 | fl->fl_type = request->fl_type; | 954 | fl->fl_type = request->fl_type; |
958 | locks_release_private(fl); | 955 | locks_release_private(fl); |
959 | locks_copy_private(fl, request); | 956 | locks_copy_private(fl, request); |
960 | request = fl; | 957 | request = fl; |
961 | added = 1; | 958 | added = 1; |
962 | } | 959 | } |
963 | } | 960 | } |
964 | /* Go on to next lock. | 961 | /* Go on to next lock. |
965 | */ | 962 | */ |
966 | next_lock: | 963 | next_lock: |
967 | before = &fl->fl_next; | 964 | before = &fl->fl_next; |
968 | } | 965 | } |
969 | 966 | ||
970 | /* | 967 | /* |
971 | * The above code only modifies existing locks in case of | 968 | * The above code only modifies existing locks in case of |
972 | * merging or replacing. If new lock(s) need to be inserted | 969 | * merging or replacing. If new lock(s) need to be inserted |
973 | * all modifications are done bellow this, so it's safe yet to | 970 | * all modifications are done bellow this, so it's safe yet to |
974 | * bail out. | 971 | * bail out. |
975 | */ | 972 | */ |
976 | error = -ENOLCK; /* "no luck" */ | 973 | error = -ENOLCK; /* "no luck" */ |
977 | if (right && left == right && !new_fl2) | 974 | if (right && left == right && !new_fl2) |
978 | goto out; | 975 | goto out; |
979 | 976 | ||
980 | error = 0; | 977 | error = 0; |
981 | if (!added) { | 978 | if (!added) { |
982 | if (request->fl_type == F_UNLCK) { | 979 | if (request->fl_type == F_UNLCK) { |
983 | if (request->fl_flags & FL_EXISTS) | 980 | if (request->fl_flags & FL_EXISTS) |
984 | error = -ENOENT; | 981 | error = -ENOENT; |
985 | goto out; | 982 | goto out; |
986 | } | 983 | } |
987 | 984 | ||
988 | if (!new_fl) { | 985 | if (!new_fl) { |
989 | error = -ENOLCK; | 986 | error = -ENOLCK; |
990 | goto out; | 987 | goto out; |
991 | } | 988 | } |
992 | locks_copy_lock(new_fl, request); | 989 | locks_copy_lock(new_fl, request); |
993 | locks_insert_lock(before, new_fl); | 990 | locks_insert_lock(before, new_fl); |
994 | new_fl = NULL; | 991 | new_fl = NULL; |
995 | } | 992 | } |
996 | if (right) { | 993 | if (right) { |
997 | if (left == right) { | 994 | if (left == right) { |
998 | /* The new lock breaks the old one in two pieces, | 995 | /* The new lock breaks the old one in two pieces, |
999 | * so we have to use the second new lock. | 996 | * so we have to use the second new lock. |
1000 | */ | 997 | */ |
1001 | left = new_fl2; | 998 | left = new_fl2; |
1002 | new_fl2 = NULL; | 999 | new_fl2 = NULL; |
1003 | locks_copy_lock(left, right); | 1000 | locks_copy_lock(left, right); |
1004 | locks_insert_lock(before, left); | 1001 | locks_insert_lock(before, left); |
1005 | } | 1002 | } |
1006 | right->fl_start = request->fl_end + 1; | 1003 | right->fl_start = request->fl_end + 1; |
1007 | locks_wake_up_blocks(right); | 1004 | locks_wake_up_blocks(right); |
1008 | } | 1005 | } |
1009 | if (left) { | 1006 | if (left) { |
1010 | left->fl_end = request->fl_start - 1; | 1007 | left->fl_end = request->fl_start - 1; |
1011 | locks_wake_up_blocks(left); | 1008 | locks_wake_up_blocks(left); |
1012 | } | 1009 | } |
1013 | out: | 1010 | out: |
1014 | unlock_flocks(); | 1011 | unlock_flocks(); |
1015 | /* | 1012 | /* |
1016 | * Free any unused locks. | 1013 | * Free any unused locks. |
1017 | */ | 1014 | */ |
1018 | if (new_fl) | 1015 | if (new_fl) |
1019 | locks_free_lock(new_fl); | 1016 | locks_free_lock(new_fl); |
1020 | if (new_fl2) | 1017 | if (new_fl2) |
1021 | locks_free_lock(new_fl2); | 1018 | locks_free_lock(new_fl2); |
1022 | return error; | 1019 | return error; |
1023 | } | 1020 | } |
1024 | 1021 | ||
1025 | /** | 1022 | /** |
1026 | * posix_lock_file - Apply a POSIX-style lock to a file | 1023 | * posix_lock_file - Apply a POSIX-style lock to a file |
1027 | * @filp: The file to apply the lock to | 1024 | * @filp: The file to apply the lock to |
1028 | * @fl: The lock to be applied | 1025 | * @fl: The lock to be applied |
1029 | * @conflock: Place to return a copy of the conflicting lock, if found. | 1026 | * @conflock: Place to return a copy of the conflicting lock, if found. |
1030 | * | 1027 | * |
1031 | * Add a POSIX style lock to a file. | 1028 | * Add a POSIX style lock to a file. |
1032 | * We merge adjacent & overlapping locks whenever possible. | 1029 | * We merge adjacent & overlapping locks whenever possible. |
1033 | * POSIX locks are sorted by owner task, then by starting address | 1030 | * POSIX locks are sorted by owner task, then by starting address |
1034 | * | 1031 | * |
1035 | * Note that if called with an FL_EXISTS argument, the caller may determine | 1032 | * Note that if called with an FL_EXISTS argument, the caller may determine |
1036 | * whether or not a lock was successfully freed by testing the return | 1033 | * whether or not a lock was successfully freed by testing the return |
1037 | * value for -ENOENT. | 1034 | * value for -ENOENT. |
1038 | */ | 1035 | */ |
1039 | int posix_lock_file(struct file *filp, struct file_lock *fl, | 1036 | int posix_lock_file(struct file *filp, struct file_lock *fl, |
1040 | struct file_lock *conflock) | 1037 | struct file_lock *conflock) |
1041 | { | 1038 | { |
1042 | return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock); | 1039 | return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock); |
1043 | } | 1040 | } |
1044 | EXPORT_SYMBOL(posix_lock_file); | 1041 | EXPORT_SYMBOL(posix_lock_file); |
1045 | 1042 | ||
1046 | /** | 1043 | /** |
1047 | * posix_lock_file_wait - Apply a POSIX-style lock to a file | 1044 | * posix_lock_file_wait - Apply a POSIX-style lock to a file |
1048 | * @filp: The file to apply the lock to | 1045 | * @filp: The file to apply the lock to |
1049 | * @fl: The lock to be applied | 1046 | * @fl: The lock to be applied |
1050 | * | 1047 | * |
1051 | * Add a POSIX style lock to a file. | 1048 | * Add a POSIX style lock to a file. |
1052 | * We merge adjacent & overlapping locks whenever possible. | 1049 | * We merge adjacent & overlapping locks whenever possible. |
1053 | * POSIX locks are sorted by owner task, then by starting address | 1050 | * POSIX locks are sorted by owner task, then by starting address |
1054 | */ | 1051 | */ |
1055 | int posix_lock_file_wait(struct file *filp, struct file_lock *fl) | 1052 | int posix_lock_file_wait(struct file *filp, struct file_lock *fl) |
1056 | { | 1053 | { |
1057 | int error; | 1054 | int error; |
1058 | might_sleep (); | 1055 | might_sleep (); |
1059 | for (;;) { | 1056 | for (;;) { |
1060 | error = posix_lock_file(filp, fl, NULL); | 1057 | error = posix_lock_file(filp, fl, NULL); |
1061 | if (error != FILE_LOCK_DEFERRED) | 1058 | if (error != FILE_LOCK_DEFERRED) |
1062 | break; | 1059 | break; |
1063 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); | 1060 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); |
1064 | if (!error) | 1061 | if (!error) |
1065 | continue; | 1062 | continue; |
1066 | 1063 | ||
1067 | locks_delete_block(fl); | 1064 | locks_delete_block(fl); |
1068 | break; | 1065 | break; |
1069 | } | 1066 | } |
1070 | return error; | 1067 | return error; |
1071 | } | 1068 | } |
1072 | EXPORT_SYMBOL(posix_lock_file_wait); | 1069 | EXPORT_SYMBOL(posix_lock_file_wait); |
1073 | 1070 | ||
1074 | /** | 1071 | /** |
1075 | * locks_mandatory_locked - Check for an active lock | 1072 | * locks_mandatory_locked - Check for an active lock |
1076 | * @inode: the file to check | 1073 | * @inode: the file to check |
1077 | * | 1074 | * |
1078 | * Searches the inode's list of locks to find any POSIX locks which conflict. | 1075 | * Searches the inode's list of locks to find any POSIX locks which conflict. |
1079 | * This function is called from locks_verify_locked() only. | 1076 | * This function is called from locks_verify_locked() only. |
1080 | */ | 1077 | */ |
1081 | int locks_mandatory_locked(struct inode *inode) | 1078 | int locks_mandatory_locked(struct inode *inode) |
1082 | { | 1079 | { |
1083 | fl_owner_t owner = current->files; | 1080 | fl_owner_t owner = current->files; |
1084 | struct file_lock *fl; | 1081 | struct file_lock *fl; |
1085 | 1082 | ||
1086 | /* | 1083 | /* |
1087 | * Search the lock list for this inode for any POSIX locks. | 1084 | * Search the lock list for this inode for any POSIX locks. |
1088 | */ | 1085 | */ |
1089 | lock_flocks(); | 1086 | lock_flocks(); |
1090 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1087 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1091 | if (!IS_POSIX(fl)) | 1088 | if (!IS_POSIX(fl)) |
1092 | continue; | 1089 | continue; |
1093 | if (fl->fl_owner != owner) | 1090 | if (fl->fl_owner != owner) |
1094 | break; | 1091 | break; |
1095 | } | 1092 | } |
1096 | unlock_flocks(); | 1093 | unlock_flocks(); |
1097 | return fl ? -EAGAIN : 0; | 1094 | return fl ? -EAGAIN : 0; |
1098 | } | 1095 | } |
1099 | 1096 | ||
1100 | /** | 1097 | /** |
1101 | * locks_mandatory_area - Check for a conflicting lock | 1098 | * locks_mandatory_area - Check for a conflicting lock |
1102 | * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ | 1099 | * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ |
1103 | * for shared | 1100 | * for shared |
1104 | * @inode: the file to check | 1101 | * @inode: the file to check |
1105 | * @filp: how the file was opened (if it was) | 1102 | * @filp: how the file was opened (if it was) |
1106 | * @offset: start of area to check | 1103 | * @offset: start of area to check |
1107 | * @count: length of area to check | 1104 | * @count: length of area to check |
1108 | * | 1105 | * |
1109 | * Searches the inode's list of locks to find any POSIX locks which conflict. | 1106 | * Searches the inode's list of locks to find any POSIX locks which conflict. |
1110 | * This function is called from rw_verify_area() and | 1107 | * This function is called from rw_verify_area() and |
1111 | * locks_verify_truncate(). | 1108 | * locks_verify_truncate(). |
1112 | */ | 1109 | */ |
1113 | int locks_mandatory_area(int read_write, struct inode *inode, | 1110 | int locks_mandatory_area(int read_write, struct inode *inode, |
1114 | struct file *filp, loff_t offset, | 1111 | struct file *filp, loff_t offset, |
1115 | size_t count) | 1112 | size_t count) |
1116 | { | 1113 | { |
1117 | struct file_lock fl; | 1114 | struct file_lock fl; |
1118 | int error; | 1115 | int error; |
1119 | 1116 | ||
1120 | locks_init_lock(&fl); | 1117 | locks_init_lock(&fl); |
1121 | fl.fl_owner = current->files; | 1118 | fl.fl_owner = current->files; |
1122 | fl.fl_pid = current->tgid; | 1119 | fl.fl_pid = current->tgid; |
1123 | fl.fl_file = filp; | 1120 | fl.fl_file = filp; |
1124 | fl.fl_flags = FL_POSIX | FL_ACCESS; | 1121 | fl.fl_flags = FL_POSIX | FL_ACCESS; |
1125 | if (filp && !(filp->f_flags & O_NONBLOCK)) | 1122 | if (filp && !(filp->f_flags & O_NONBLOCK)) |
1126 | fl.fl_flags |= FL_SLEEP; | 1123 | fl.fl_flags |= FL_SLEEP; |
1127 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; | 1124 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; |
1128 | fl.fl_start = offset; | 1125 | fl.fl_start = offset; |
1129 | fl.fl_end = offset + count - 1; | 1126 | fl.fl_end = offset + count - 1; |
1130 | 1127 | ||
1131 | for (;;) { | 1128 | for (;;) { |
1132 | error = __posix_lock_file(inode, &fl, NULL); | 1129 | error = __posix_lock_file(inode, &fl, NULL); |
1133 | if (error != FILE_LOCK_DEFERRED) | 1130 | if (error != FILE_LOCK_DEFERRED) |
1134 | break; | 1131 | break; |
1135 | error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); | 1132 | error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); |
1136 | if (!error) { | 1133 | if (!error) { |
1137 | /* | 1134 | /* |
1138 | * If we've been sleeping someone might have | 1135 | * If we've been sleeping someone might have |
1139 | * changed the permissions behind our back. | 1136 | * changed the permissions behind our back. |
1140 | */ | 1137 | */ |
1141 | if (__mandatory_lock(inode)) | 1138 | if (__mandatory_lock(inode)) |
1142 | continue; | 1139 | continue; |
1143 | } | 1140 | } |
1144 | 1141 | ||
1145 | locks_delete_block(&fl); | 1142 | locks_delete_block(&fl); |
1146 | break; | 1143 | break; |
1147 | } | 1144 | } |
1148 | 1145 | ||
1149 | return error; | 1146 | return error; |
1150 | } | 1147 | } |
1151 | 1148 | ||
1152 | EXPORT_SYMBOL(locks_mandatory_area); | 1149 | EXPORT_SYMBOL(locks_mandatory_area); |
1153 | 1150 | ||
1154 | /* We already had a lease on this file; just change its type */ | 1151 | /* We already had a lease on this file; just change its type */ |
1155 | int lease_modify(struct file_lock **before, int arg) | 1152 | int lease_modify(struct file_lock **before, int arg) |
1156 | { | 1153 | { |
1157 | struct file_lock *fl = *before; | 1154 | struct file_lock *fl = *before; |
1158 | int error = assign_type(fl, arg); | 1155 | int error = assign_type(fl, arg); |
1159 | 1156 | ||
1160 | if (error) | 1157 | if (error) |
1161 | return error; | 1158 | return error; |
1162 | locks_wake_up_blocks(fl); | 1159 | locks_wake_up_blocks(fl); |
1163 | if (arg == F_UNLCK) | 1160 | if (arg == F_UNLCK) |
1164 | locks_delete_lock(before); | 1161 | locks_delete_lock(before); |
1165 | return 0; | 1162 | return 0; |
1166 | } | 1163 | } |
1167 | 1164 | ||
1168 | EXPORT_SYMBOL(lease_modify); | 1165 | EXPORT_SYMBOL(lease_modify); |
1169 | 1166 | ||
1170 | static void time_out_leases(struct inode *inode) | 1167 | static void time_out_leases(struct inode *inode) |
1171 | { | 1168 | { |
1172 | struct file_lock **before; | 1169 | struct file_lock **before; |
1173 | struct file_lock *fl; | 1170 | struct file_lock *fl; |
1174 | 1171 | ||
1175 | before = &inode->i_flock; | 1172 | before = &inode->i_flock; |
1176 | while ((fl = *before) && IS_LEASE(fl) && (fl->fl_type & F_INPROGRESS)) { | 1173 | while ((fl = *before) && IS_LEASE(fl) && (fl->fl_type & F_INPROGRESS)) { |
1177 | if ((fl->fl_break_time == 0) | 1174 | if ((fl->fl_break_time == 0) |
1178 | || time_before(jiffies, fl->fl_break_time)) { | 1175 | || time_before(jiffies, fl->fl_break_time)) { |
1179 | before = &fl->fl_next; | 1176 | before = &fl->fl_next; |
1180 | continue; | 1177 | continue; |
1181 | } | 1178 | } |
1182 | lease_modify(before, fl->fl_type & ~F_INPROGRESS); | 1179 | lease_modify(before, fl->fl_type & ~F_INPROGRESS); |
1183 | if (fl == *before) /* lease_modify may have freed fl */ | 1180 | if (fl == *before) /* lease_modify may have freed fl */ |
1184 | before = &fl->fl_next; | 1181 | before = &fl->fl_next; |
1185 | } | 1182 | } |
1186 | } | 1183 | } |
1187 | 1184 | ||
1188 | /** | 1185 | /** |
1189 | * __break_lease - revoke all outstanding leases on file | 1186 | * __break_lease - revoke all outstanding leases on file |
1190 | * @inode: the inode of the file to return | 1187 | * @inode: the inode of the file to return |
1191 | * @mode: the open mode (read or write) | 1188 | * @mode: the open mode (read or write) |
1192 | * | 1189 | * |
1193 | * break_lease (inlined for speed) has checked there already is at least | 1190 | * break_lease (inlined for speed) has checked there already is at least |
1194 | * some kind of lock (maybe a lease) on this file. Leases are broken on | 1191 | * some kind of lock (maybe a lease) on this file. Leases are broken on |
1195 | * a call to open() or truncate(). This function can sleep unless you | 1192 | * a call to open() or truncate(). This function can sleep unless you |
1196 | * specified %O_NONBLOCK to your open(). | 1193 | * specified %O_NONBLOCK to your open(). |
1197 | */ | 1194 | */ |
1198 | int __break_lease(struct inode *inode, unsigned int mode) | 1195 | int __break_lease(struct inode *inode, unsigned int mode) |
1199 | { | 1196 | { |
1200 | int error = 0, future; | 1197 | int error = 0, future; |
1201 | struct file_lock *new_fl, *flock; | 1198 | struct file_lock *new_fl, *flock; |
1202 | struct file_lock *fl; | 1199 | struct file_lock *fl; |
1203 | unsigned long break_time; | 1200 | unsigned long break_time; |
1204 | int i_have_this_lease = 0; | 1201 | int i_have_this_lease = 0; |
1205 | int want_write = (mode & O_ACCMODE) != O_RDONLY; | 1202 | int want_write = (mode & O_ACCMODE) != O_RDONLY; |
1206 | 1203 | ||
1207 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); | 1204 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); |
1208 | 1205 | ||
1209 | lock_flocks(); | 1206 | lock_flocks(); |
1210 | 1207 | ||
1211 | time_out_leases(inode); | 1208 | time_out_leases(inode); |
1212 | 1209 | ||
1213 | flock = inode->i_flock; | 1210 | flock = inode->i_flock; |
1214 | if ((flock == NULL) || !IS_LEASE(flock)) | 1211 | if ((flock == NULL) || !IS_LEASE(flock)) |
1215 | goto out; | 1212 | goto out; |
1216 | 1213 | ||
1217 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) | 1214 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) |
1218 | if (fl->fl_owner == current->files) | 1215 | if (fl->fl_owner == current->files) |
1219 | i_have_this_lease = 1; | 1216 | i_have_this_lease = 1; |
1220 | 1217 | ||
1221 | if (want_write) { | 1218 | if (want_write) { |
1222 | /* If we want write access, we have to revoke any lease. */ | 1219 | /* If we want write access, we have to revoke any lease. */ |
1223 | future = F_UNLCK | F_INPROGRESS; | 1220 | future = F_UNLCK | F_INPROGRESS; |
1224 | } else if (flock->fl_type & F_INPROGRESS) { | 1221 | } else if (flock->fl_type & F_INPROGRESS) { |
1225 | /* If the lease is already being broken, we just leave it */ | 1222 | /* If the lease is already being broken, we just leave it */ |
1226 | future = flock->fl_type; | 1223 | future = flock->fl_type; |
1227 | } else if (flock->fl_type & F_WRLCK) { | 1224 | } else if (flock->fl_type & F_WRLCK) { |
1228 | /* Downgrade the exclusive lease to a read-only lease. */ | 1225 | /* Downgrade the exclusive lease to a read-only lease. */ |
1229 | future = F_RDLCK | F_INPROGRESS; | 1226 | future = F_RDLCK | F_INPROGRESS; |
1230 | } else { | 1227 | } else { |
1231 | /* the existing lease was read-only, so we can read too. */ | 1228 | /* the existing lease was read-only, so we can read too. */ |
1232 | goto out; | 1229 | goto out; |
1233 | } | 1230 | } |
1234 | 1231 | ||
1235 | if (IS_ERR(new_fl) && !i_have_this_lease | 1232 | if (IS_ERR(new_fl) && !i_have_this_lease |
1236 | && ((mode & O_NONBLOCK) == 0)) { | 1233 | && ((mode & O_NONBLOCK) == 0)) { |
1237 | error = PTR_ERR(new_fl); | 1234 | error = PTR_ERR(new_fl); |
1238 | goto out; | 1235 | goto out; |
1239 | } | 1236 | } |
1240 | 1237 | ||
1241 | break_time = 0; | 1238 | break_time = 0; |
1242 | if (lease_break_time > 0) { | 1239 | if (lease_break_time > 0) { |
1243 | break_time = jiffies + lease_break_time * HZ; | 1240 | break_time = jiffies + lease_break_time * HZ; |
1244 | if (break_time == 0) | 1241 | if (break_time == 0) |
1245 | break_time++; /* so that 0 means no break time */ | 1242 | break_time++; /* so that 0 means no break time */ |
1246 | } | 1243 | } |
1247 | 1244 | ||
1248 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { | 1245 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { |
1249 | if (fl->fl_type != future) { | 1246 | if (fl->fl_type != future) { |
1250 | fl->fl_type = future; | 1247 | fl->fl_type = future; |
1251 | fl->fl_break_time = break_time; | 1248 | fl->fl_break_time = break_time; |
1252 | /* lease must have lmops break callback */ | 1249 | /* lease must have lmops break callback */ |
1253 | fl->fl_lmops->fl_break(fl); | 1250 | fl->fl_lmops->fl_break(fl); |
1254 | } | 1251 | } |
1255 | } | 1252 | } |
1256 | 1253 | ||
1257 | if (i_have_this_lease || (mode & O_NONBLOCK)) { | 1254 | if (i_have_this_lease || (mode & O_NONBLOCK)) { |
1258 | error = -EWOULDBLOCK; | 1255 | error = -EWOULDBLOCK; |
1259 | goto out; | 1256 | goto out; |
1260 | } | 1257 | } |
1261 | 1258 | ||
1262 | restart: | 1259 | restart: |
1263 | break_time = flock->fl_break_time; | 1260 | break_time = flock->fl_break_time; |
1264 | if (break_time != 0) { | 1261 | if (break_time != 0) { |
1265 | break_time -= jiffies; | 1262 | break_time -= jiffies; |
1266 | if (break_time == 0) | 1263 | if (break_time == 0) |
1267 | break_time++; | 1264 | break_time++; |
1268 | } | 1265 | } |
1269 | locks_insert_block(flock, new_fl); | 1266 | locks_insert_block(flock, new_fl); |
1270 | unlock_flocks(); | 1267 | unlock_flocks(); |
1271 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1268 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1272 | !new_fl->fl_next, break_time); | 1269 | !new_fl->fl_next, break_time); |
1273 | lock_flocks(); | 1270 | lock_flocks(); |
1274 | __locks_delete_block(new_fl); | 1271 | __locks_delete_block(new_fl); |
1275 | if (error >= 0) { | 1272 | if (error >= 0) { |
1276 | if (error == 0) | 1273 | if (error == 0) |
1277 | time_out_leases(inode); | 1274 | time_out_leases(inode); |
1278 | /* Wait for the next lease that has not been broken yet */ | 1275 | /* Wait for the next lease that has not been broken yet */ |
1279 | for (flock = inode->i_flock; flock && IS_LEASE(flock); | 1276 | for (flock = inode->i_flock; flock && IS_LEASE(flock); |
1280 | flock = flock->fl_next) { | 1277 | flock = flock->fl_next) { |
1281 | if (flock->fl_type & F_INPROGRESS) | 1278 | if (flock->fl_type & F_INPROGRESS) |
1282 | goto restart; | 1279 | goto restart; |
1283 | } | 1280 | } |
1284 | error = 0; | 1281 | error = 0; |
1285 | } | 1282 | } |
1286 | 1283 | ||
1287 | out: | 1284 | out: |
1288 | unlock_flocks(); | 1285 | unlock_flocks(); |
1289 | if (!IS_ERR(new_fl)) | 1286 | if (!IS_ERR(new_fl)) |
1290 | locks_free_lock(new_fl); | 1287 | locks_free_lock(new_fl); |
1291 | return error; | 1288 | return error; |
1292 | } | 1289 | } |
1293 | 1290 | ||
1294 | EXPORT_SYMBOL(__break_lease); | 1291 | EXPORT_SYMBOL(__break_lease); |
1295 | 1292 | ||
1296 | /** | 1293 | /** |
1297 | * lease_get_mtime - get the last modified time of an inode | 1294 | * lease_get_mtime - get the last modified time of an inode |
1298 | * @inode: the inode | 1295 | * @inode: the inode |
1299 | * @time: pointer to a timespec which will contain the last modified time | 1296 | * @time: pointer to a timespec which will contain the last modified time |
1300 | * | 1297 | * |
1301 | * This is to force NFS clients to flush their caches for files with | 1298 | * This is to force NFS clients to flush their caches for files with |
1302 | * exclusive leases. The justification is that if someone has an | 1299 | * exclusive leases. The justification is that if someone has an |
1303 | * exclusive lease, then they could be modifying it. | 1300 | * exclusive lease, then they could be modifying it. |
1304 | */ | 1301 | */ |
1305 | void lease_get_mtime(struct inode *inode, struct timespec *time) | 1302 | void lease_get_mtime(struct inode *inode, struct timespec *time) |
1306 | { | 1303 | { |
1307 | struct file_lock *flock = inode->i_flock; | 1304 | struct file_lock *flock = inode->i_flock; |
1308 | if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK)) | 1305 | if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK)) |
1309 | *time = current_fs_time(inode->i_sb); | 1306 | *time = current_fs_time(inode->i_sb); |
1310 | else | 1307 | else |
1311 | *time = inode->i_mtime; | 1308 | *time = inode->i_mtime; |
1312 | } | 1309 | } |
1313 | 1310 | ||
1314 | EXPORT_SYMBOL(lease_get_mtime); | 1311 | EXPORT_SYMBOL(lease_get_mtime); |
1315 | 1312 | ||
1316 | /** | 1313 | /** |
1317 | * fcntl_getlease - Enquire what lease is currently active | 1314 | * fcntl_getlease - Enquire what lease is currently active |
1318 | * @filp: the file | 1315 | * @filp: the file |
1319 | * | 1316 | * |
1320 | * The value returned by this function will be one of | 1317 | * The value returned by this function will be one of |
1321 | * (if no lease break is pending): | 1318 | * (if no lease break is pending): |
1322 | * | 1319 | * |
1323 | * %F_RDLCK to indicate a shared lease is held. | 1320 | * %F_RDLCK to indicate a shared lease is held. |
1324 | * | 1321 | * |
1325 | * %F_WRLCK to indicate an exclusive lease is held. | 1322 | * %F_WRLCK to indicate an exclusive lease is held. |
1326 | * | 1323 | * |
1327 | * %F_UNLCK to indicate no lease is held. | 1324 | * %F_UNLCK to indicate no lease is held. |
1328 | * | 1325 | * |
1329 | * (if a lease break is pending): | 1326 | * (if a lease break is pending): |
1330 | * | 1327 | * |
1331 | * %F_RDLCK to indicate an exclusive lease needs to be | 1328 | * %F_RDLCK to indicate an exclusive lease needs to be |
1332 | * changed to a shared lease (or removed). | 1329 | * changed to a shared lease (or removed). |
1333 | * | 1330 | * |
1334 | * %F_UNLCK to indicate the lease needs to be removed. | 1331 | * %F_UNLCK to indicate the lease needs to be removed. |
1335 | * | 1332 | * |
1336 | * XXX: sfr & willy disagree over whether F_INPROGRESS | 1333 | * XXX: sfr & willy disagree over whether F_INPROGRESS |
1337 | * should be returned to userspace. | 1334 | * should be returned to userspace. |
1338 | */ | 1335 | */ |
1339 | int fcntl_getlease(struct file *filp) | 1336 | int fcntl_getlease(struct file *filp) |
1340 | { | 1337 | { |
1341 | struct file_lock *fl; | 1338 | struct file_lock *fl; |
1342 | int type = F_UNLCK; | 1339 | int type = F_UNLCK; |
1343 | 1340 | ||
1344 | lock_flocks(); | 1341 | lock_flocks(); |
1345 | time_out_leases(filp->f_path.dentry->d_inode); | 1342 | time_out_leases(filp->f_path.dentry->d_inode); |
1346 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); | 1343 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); |
1347 | fl = fl->fl_next) { | 1344 | fl = fl->fl_next) { |
1348 | if (fl->fl_file == filp) { | 1345 | if (fl->fl_file == filp) { |
1349 | type = fl->fl_type & ~F_INPROGRESS; | 1346 | type = fl->fl_type & ~F_INPROGRESS; |
1350 | break; | 1347 | break; |
1351 | } | 1348 | } |
1352 | } | 1349 | } |
1353 | unlock_flocks(); | 1350 | unlock_flocks(); |
1354 | return type; | 1351 | return type; |
1355 | } | 1352 | } |
1356 | 1353 | ||
1357 | /** | 1354 | /** |
1358 | * generic_setlease - sets a lease on an open file | 1355 | * generic_setlease - sets a lease on an open file |
1359 | * @filp: file pointer | 1356 | * @filp: file pointer |
1360 | * @arg: type of lease to obtain | 1357 | * @arg: type of lease to obtain |
1361 | * @flp: input - file_lock to use, output - file_lock inserted | 1358 | * @flp: input - file_lock to use, output - file_lock inserted |
1362 | * | 1359 | * |
1363 | * The (input) flp->fl_lmops->fl_break function is required | 1360 | * The (input) flp->fl_lmops->fl_break function is required |
1364 | * by break_lease(). | 1361 | * by break_lease(). |
1365 | * | 1362 | * |
1366 | * Called with file_lock_lock held. | 1363 | * Called with file_lock_lock held. |
1367 | */ | 1364 | */ |
1368 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1365 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
1369 | { | 1366 | { |
1370 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1367 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
1371 | struct dentry *dentry = filp->f_path.dentry; | 1368 | struct dentry *dentry = filp->f_path.dentry; |
1372 | struct inode *inode = dentry->d_inode; | 1369 | struct inode *inode = dentry->d_inode; |
1373 | int error, rdlease_count = 0, wrlease_count = 0; | 1370 | int error, rdlease_count = 0, wrlease_count = 0; |
1374 | 1371 | ||
1375 | lease = *flp; | 1372 | lease = *flp; |
1376 | 1373 | ||
1377 | error = -EACCES; | 1374 | error = -EACCES; |
1378 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) | 1375 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) |
1379 | goto out; | 1376 | goto out; |
1380 | error = -EINVAL; | 1377 | error = -EINVAL; |
1381 | if (!S_ISREG(inode->i_mode)) | 1378 | if (!S_ISREG(inode->i_mode)) |
1382 | goto out; | 1379 | goto out; |
1383 | error = security_file_lock(filp, arg); | 1380 | error = security_file_lock(filp, arg); |
1384 | if (error) | 1381 | if (error) |
1385 | goto out; | 1382 | goto out; |
1386 | 1383 | ||
1387 | time_out_leases(inode); | 1384 | time_out_leases(inode); |
1388 | 1385 | ||
1389 | BUG_ON(!(*flp)->fl_lmops->fl_break); | 1386 | BUG_ON(!(*flp)->fl_lmops->fl_break); |
1390 | 1387 | ||
1391 | if (arg != F_UNLCK) { | 1388 | if (arg != F_UNLCK) { |
1392 | error = -EAGAIN; | 1389 | error = -EAGAIN; |
1393 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1390 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1394 | goto out; | 1391 | goto out; |
1395 | if ((arg == F_WRLCK) | 1392 | if ((arg == F_WRLCK) |
1396 | && ((atomic_read(&dentry->d_count) > 1) | 1393 | && ((atomic_read(&dentry->d_count) > 1) |
1397 | || (atomic_read(&inode->i_count) > 1))) | 1394 | || (atomic_read(&inode->i_count) > 1))) |
1398 | goto out; | 1395 | goto out; |
1399 | } | 1396 | } |
1400 | 1397 | ||
1401 | /* | 1398 | /* |
1402 | * At this point, we know that if there is an exclusive | 1399 | * At this point, we know that if there is an exclusive |
1403 | * lease on this file, then we hold it on this filp | 1400 | * lease on this file, then we hold it on this filp |
1404 | * (otherwise our open of this file would have blocked). | 1401 | * (otherwise our open of this file would have blocked). |
1405 | * And if we are trying to acquire an exclusive lease, | 1402 | * And if we are trying to acquire an exclusive lease, |
1406 | * then the file is not open by anyone (including us) | 1403 | * then the file is not open by anyone (including us) |
1407 | * except for this filp. | 1404 | * except for this filp. |
1408 | */ | 1405 | */ |
1409 | for (before = &inode->i_flock; | 1406 | for (before = &inode->i_flock; |
1410 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1407 | ((fl = *before) != NULL) && IS_LEASE(fl); |
1411 | before = &fl->fl_next) { | 1408 | before = &fl->fl_next) { |
1412 | if (lease->fl_lmops->fl_mylease(fl, lease)) | 1409 | if (lease->fl_lmops->fl_mylease(fl, lease)) |
1413 | my_before = before; | 1410 | my_before = before; |
1414 | else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) | 1411 | else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) |
1415 | /* | 1412 | /* |
1416 | * Someone is in the process of opening this | 1413 | * Someone is in the process of opening this |
1417 | * file for writing so we may not take an | 1414 | * file for writing so we may not take an |
1418 | * exclusive lease on it. | 1415 | * exclusive lease on it. |
1419 | */ | 1416 | */ |
1420 | wrlease_count++; | 1417 | wrlease_count++; |
1421 | else | 1418 | else |
1422 | rdlease_count++; | 1419 | rdlease_count++; |
1423 | } | 1420 | } |
1424 | 1421 | ||
1425 | error = -EAGAIN; | 1422 | error = -EAGAIN; |
1426 | if ((arg == F_RDLCK && (wrlease_count > 0)) || | 1423 | if ((arg == F_RDLCK && (wrlease_count > 0)) || |
1427 | (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0))) | 1424 | (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0))) |
1428 | goto out; | 1425 | goto out; |
1429 | 1426 | ||
1430 | if (my_before != NULL) { | 1427 | if (my_before != NULL) { |
1431 | error = lease->fl_lmops->fl_change(my_before, arg); | 1428 | error = lease->fl_lmops->fl_change(my_before, arg); |
1432 | if (!error) | 1429 | if (!error) |
1433 | *flp = *my_before; | 1430 | *flp = *my_before; |
1434 | goto out; | 1431 | goto out; |
1435 | } | 1432 | } |
1436 | 1433 | ||
1437 | if (arg == F_UNLCK) | 1434 | if (arg == F_UNLCK) |
1438 | goto out; | 1435 | goto out; |
1439 | 1436 | ||
1440 | error = -EINVAL; | 1437 | error = -EINVAL; |
1441 | if (!leases_enable) | 1438 | if (!leases_enable) |
1442 | goto out; | 1439 | goto out; |
1443 | 1440 | ||
1444 | locks_insert_lock(before, lease); | 1441 | locks_insert_lock(before, lease); |
1445 | return 0; | 1442 | return 0; |
1446 | 1443 | ||
1447 | out: | 1444 | out: |
1448 | return error; | 1445 | return error; |
1449 | } | 1446 | } |
1450 | EXPORT_SYMBOL(generic_setlease); | 1447 | EXPORT_SYMBOL(generic_setlease); |
1451 | 1448 | ||
1452 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1449 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1453 | { | 1450 | { |
1454 | if (filp->f_op && filp->f_op->setlease) | 1451 | if (filp->f_op && filp->f_op->setlease) |
1455 | return filp->f_op->setlease(filp, arg, lease); | 1452 | return filp->f_op->setlease(filp, arg, lease); |
1456 | else | 1453 | else |
1457 | return generic_setlease(filp, arg, lease); | 1454 | return generic_setlease(filp, arg, lease); |
1458 | } | 1455 | } |
1459 | 1456 | ||
1460 | /** | 1457 | /** |
1461 | * vfs_setlease - sets a lease on an open file | 1458 | * vfs_setlease - sets a lease on an open file |
1462 | * @filp: file pointer | 1459 | * @filp: file pointer |
1463 | * @arg: type of lease to obtain | 1460 | * @arg: type of lease to obtain |
1464 | * @lease: file_lock to use | 1461 | * @lease: file_lock to use |
1465 | * | 1462 | * |
1466 | * Call this to establish a lease on the file. | 1463 | * Call this to establish a lease on the file. |
1467 | * The (*lease)->fl_lmops->fl_break operation must be set; if not, | 1464 | * The (*lease)->fl_lmops->fl_break operation must be set; if not, |
1468 | * break_lease will oops! | 1465 | * break_lease will oops! |
1469 | * | 1466 | * |
1470 | * This will call the filesystem's setlease file method, if | 1467 | * This will call the filesystem's setlease file method, if |
1471 | * defined. Note that there is no getlease method; instead, the | 1468 | * defined. Note that there is no getlease method; instead, the |
1472 | * filesystem setlease method should call back to setlease() to | 1469 | * filesystem setlease method should call back to setlease() to |
1473 | * add a lease to the inode's lease list, where fcntl_getlease() can | 1470 | * add a lease to the inode's lease list, where fcntl_getlease() can |
1474 | * find it. Since fcntl_getlease() only reports whether the current | 1471 | * find it. Since fcntl_getlease() only reports whether the current |
1475 | * task holds a lease, a cluster filesystem need only do this for | 1472 | * task holds a lease, a cluster filesystem need only do this for |
1476 | * leases held by processes on this node. | 1473 | * leases held by processes on this node. |
1477 | * | 1474 | * |
1478 | * There is also no break_lease method; filesystems that | 1475 | * There is also no break_lease method; filesystems that |
1479 | * handle their own leases should break leases themselves from the | 1476 | * handle their own leases should break leases themselves from the |
1480 | * filesystem's open, create, and (on truncate) setattr methods. | 1477 | * filesystem's open, create, and (on truncate) setattr methods. |
1481 | * | 1478 | * |
1482 | * Warning: the only current setlease methods exist only to disable | 1479 | * Warning: the only current setlease methods exist only to disable |
1483 | * leases in certain cases. More vfs changes may be required to | 1480 | * leases in certain cases. More vfs changes may be required to |
1484 | * allow a full filesystem lease implementation. | 1481 | * allow a full filesystem lease implementation. |
1485 | */ | 1482 | */ |
1486 | 1483 | ||
1487 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1484 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1488 | { | 1485 | { |
1489 | int error; | 1486 | int error; |
1490 | 1487 | ||
1491 | lock_flocks(); | 1488 | lock_flocks(); |
1492 | error = __vfs_setlease(filp, arg, lease); | 1489 | error = __vfs_setlease(filp, arg, lease); |
1493 | unlock_flocks(); | 1490 | unlock_flocks(); |
1494 | 1491 | ||
1495 | return error; | 1492 | return error; |
1496 | } | 1493 | } |
1497 | EXPORT_SYMBOL_GPL(vfs_setlease); | 1494 | EXPORT_SYMBOL_GPL(vfs_setlease); |
1498 | 1495 | ||
1499 | static int do_fcntl_delete_lease(struct file *filp) | 1496 | static int do_fcntl_delete_lease(struct file *filp) |
1500 | { | 1497 | { |
1501 | struct file_lock fl, *flp = &fl; | 1498 | struct file_lock fl, *flp = &fl; |
1502 | 1499 | ||
1503 | lease_init(filp, F_UNLCK, flp); | 1500 | lease_init(filp, F_UNLCK, flp); |
1504 | 1501 | ||
1505 | return vfs_setlease(filp, F_UNLCK, &flp); | 1502 | return vfs_setlease(filp, F_UNLCK, &flp); |
1506 | } | 1503 | } |
1507 | 1504 | ||
1508 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | 1505 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) |
1509 | { | 1506 | { |
1510 | struct file_lock *fl; | 1507 | struct file_lock *fl; |
1511 | struct fasync_struct *new; | 1508 | struct fasync_struct *new; |
1512 | struct inode *inode = filp->f_path.dentry->d_inode; | 1509 | struct inode *inode = filp->f_path.dentry->d_inode; |
1513 | int error; | 1510 | int error; |
1514 | 1511 | ||
1515 | fl = lease_alloc(filp, arg); | 1512 | fl = lease_alloc(filp, arg); |
1516 | if (IS_ERR(fl)) | 1513 | if (IS_ERR(fl)) |
1517 | return PTR_ERR(fl); | 1514 | return PTR_ERR(fl); |
1518 | 1515 | ||
1519 | new = fasync_alloc(); | 1516 | new = fasync_alloc(); |
1520 | if (!new) { | 1517 | if (!new) { |
1521 | locks_free_lock(fl); | 1518 | locks_free_lock(fl); |
1522 | return -ENOMEM; | 1519 | return -ENOMEM; |
1523 | } | 1520 | } |
1524 | lock_flocks(); | 1521 | lock_flocks(); |
1525 | error = __vfs_setlease(filp, arg, &fl); | 1522 | error = __vfs_setlease(filp, arg, &fl); |
1526 | if (error) { | 1523 | if (error) { |
1527 | unlock_flocks(); | 1524 | unlock_flocks(); |
1528 | locks_free_lock(fl); | 1525 | locks_free_lock(fl); |
1529 | goto out_free_fasync; | 1526 | goto out_free_fasync; |
1530 | } | 1527 | } |
1531 | 1528 | ||
1532 | /* | 1529 | /* |
1533 | * fasync_insert_entry() returns the old entry if any. | 1530 | * fasync_insert_entry() returns the old entry if any. |
1534 | * If there was no old entry, then it used 'new' and | 1531 | * If there was no old entry, then it used 'new' and |
1535 | * inserted it into the fasync list. Clear new so that | 1532 | * inserted it into the fasync list. Clear new so that |
1536 | * we don't release it here. | 1533 | * we don't release it here. |
1537 | */ | 1534 | */ |
1538 | if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) | 1535 | if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) |
1539 | new = NULL; | 1536 | new = NULL; |
1540 | 1537 | ||
1541 | if (error < 0) { | 1538 | if (error < 0) { |
1542 | /* remove lease just inserted by setlease */ | 1539 | /* remove lease just inserted by setlease */ |
1543 | fl->fl_type = F_UNLCK | F_INPROGRESS; | 1540 | fl->fl_type = F_UNLCK | F_INPROGRESS; |
1544 | fl->fl_break_time = jiffies - 10; | 1541 | fl->fl_break_time = jiffies - 10; |
1545 | time_out_leases(inode); | 1542 | time_out_leases(inode); |
1546 | } else { | 1543 | } else { |
1547 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1544 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1548 | } | 1545 | } |
1549 | unlock_flocks(); | 1546 | unlock_flocks(); |
1550 | 1547 | ||
1551 | out_free_fasync: | 1548 | out_free_fasync: |
1552 | if (new) | 1549 | if (new) |
1553 | fasync_free(new); | 1550 | fasync_free(new); |
1554 | return error; | 1551 | return error; |
1555 | } | 1552 | } |
1556 | 1553 | ||
1557 | /** | 1554 | /** |
1558 | * fcntl_setlease - sets a lease on an open file | 1555 | * fcntl_setlease - sets a lease on an open file |
1559 | * @fd: open file descriptor | 1556 | * @fd: open file descriptor |
1560 | * @filp: file pointer | 1557 | * @filp: file pointer |
1561 | * @arg: type of lease to obtain | 1558 | * @arg: type of lease to obtain |
1562 | * | 1559 | * |
1563 | * Call this fcntl to establish a lease on the file. | 1560 | * Call this fcntl to establish a lease on the file. |
1564 | * Note that you also need to call %F_SETSIG to | 1561 | * Note that you also need to call %F_SETSIG to |
1565 | * receive a signal when the lease is broken. | 1562 | * receive a signal when the lease is broken. |
1566 | */ | 1563 | */ |
1567 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1564 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1568 | { | 1565 | { |
1569 | if (arg == F_UNLCK) | 1566 | if (arg == F_UNLCK) |
1570 | return do_fcntl_delete_lease(filp); | 1567 | return do_fcntl_delete_lease(filp); |
1571 | return do_fcntl_add_lease(fd, filp, arg); | 1568 | return do_fcntl_add_lease(fd, filp, arg); |
1572 | } | 1569 | } |
1573 | 1570 | ||
1574 | /** | 1571 | /** |
1575 | * flock_lock_file_wait - Apply a FLOCK-style lock to a file | 1572 | * flock_lock_file_wait - Apply a FLOCK-style lock to a file |
1576 | * @filp: The file to apply the lock to | 1573 | * @filp: The file to apply the lock to |
1577 | * @fl: The lock to be applied | 1574 | * @fl: The lock to be applied |
1578 | * | 1575 | * |
1579 | * Add a FLOCK style lock to a file. | 1576 | * Add a FLOCK style lock to a file. |
1580 | */ | 1577 | */ |
1581 | int flock_lock_file_wait(struct file *filp, struct file_lock *fl) | 1578 | int flock_lock_file_wait(struct file *filp, struct file_lock *fl) |
1582 | { | 1579 | { |
1583 | int error; | 1580 | int error; |
1584 | might_sleep(); | 1581 | might_sleep(); |
1585 | for (;;) { | 1582 | for (;;) { |
1586 | error = flock_lock_file(filp, fl); | 1583 | error = flock_lock_file(filp, fl); |
1587 | if (error != FILE_LOCK_DEFERRED) | 1584 | if (error != FILE_LOCK_DEFERRED) |
1588 | break; | 1585 | break; |
1589 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); | 1586 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); |
1590 | if (!error) | 1587 | if (!error) |
1591 | continue; | 1588 | continue; |
1592 | 1589 | ||
1593 | locks_delete_block(fl); | 1590 | locks_delete_block(fl); |
1594 | break; | 1591 | break; |
1595 | } | 1592 | } |
1596 | return error; | 1593 | return error; |
1597 | } | 1594 | } |
1598 | 1595 | ||
1599 | EXPORT_SYMBOL(flock_lock_file_wait); | 1596 | EXPORT_SYMBOL(flock_lock_file_wait); |
1600 | 1597 | ||
1601 | /** | 1598 | /** |
1602 | * sys_flock: - flock() system call. | 1599 | * sys_flock: - flock() system call. |
1603 | * @fd: the file descriptor to lock. | 1600 | * @fd: the file descriptor to lock. |
1604 | * @cmd: the type of lock to apply. | 1601 | * @cmd: the type of lock to apply. |
1605 | * | 1602 | * |
1606 | * Apply a %FL_FLOCK style lock to an open file descriptor. | 1603 | * Apply a %FL_FLOCK style lock to an open file descriptor. |
1607 | * The @cmd can be one of | 1604 | * The @cmd can be one of |
1608 | * | 1605 | * |
1609 | * %LOCK_SH -- a shared lock. | 1606 | * %LOCK_SH -- a shared lock. |
1610 | * | 1607 | * |
1611 | * %LOCK_EX -- an exclusive lock. | 1608 | * %LOCK_EX -- an exclusive lock. |
1612 | * | 1609 | * |
1613 | * %LOCK_UN -- remove an existing lock. | 1610 | * %LOCK_UN -- remove an existing lock. |
1614 | * | 1611 | * |
1615 | * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. | 1612 | * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. |
1616 | * | 1613 | * |
1617 | * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other | 1614 | * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other |
1618 | * processes read and write access respectively. | 1615 | * processes read and write access respectively. |
1619 | */ | 1616 | */ |
1620 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | 1617 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) |
1621 | { | 1618 | { |
1622 | struct file *filp; | 1619 | struct file *filp; |
1623 | struct file_lock *lock; | 1620 | struct file_lock *lock; |
1624 | int can_sleep, unlock; | 1621 | int can_sleep, unlock; |
1625 | int error; | 1622 | int error; |
1626 | 1623 | ||
1627 | error = -EBADF; | 1624 | error = -EBADF; |
1628 | filp = fget(fd); | 1625 | filp = fget(fd); |
1629 | if (!filp) | 1626 | if (!filp) |
1630 | goto out; | 1627 | goto out; |
1631 | 1628 | ||
1632 | can_sleep = !(cmd & LOCK_NB); | 1629 | can_sleep = !(cmd & LOCK_NB); |
1633 | cmd &= ~LOCK_NB; | 1630 | cmd &= ~LOCK_NB; |
1634 | unlock = (cmd == LOCK_UN); | 1631 | unlock = (cmd == LOCK_UN); |
1635 | 1632 | ||
1636 | if (!unlock && !(cmd & LOCK_MAND) && | 1633 | if (!unlock && !(cmd & LOCK_MAND) && |
1637 | !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) | 1634 | !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) |
1638 | goto out_putf; | 1635 | goto out_putf; |
1639 | 1636 | ||
1640 | error = flock_make_lock(filp, &lock, cmd); | 1637 | error = flock_make_lock(filp, &lock, cmd); |
1641 | if (error) | 1638 | if (error) |
1642 | goto out_putf; | 1639 | goto out_putf; |
1643 | if (can_sleep) | 1640 | if (can_sleep) |
1644 | lock->fl_flags |= FL_SLEEP; | 1641 | lock->fl_flags |= FL_SLEEP; |
1645 | 1642 | ||
1646 | error = security_file_lock(filp, lock->fl_type); | 1643 | error = security_file_lock(filp, lock->fl_type); |
1647 | if (error) | 1644 | if (error) |
1648 | goto out_free; | 1645 | goto out_free; |
1649 | 1646 | ||
1650 | if (filp->f_op && filp->f_op->flock) | 1647 | if (filp->f_op && filp->f_op->flock) |
1651 | error = filp->f_op->flock(filp, | 1648 | error = filp->f_op->flock(filp, |
1652 | (can_sleep) ? F_SETLKW : F_SETLK, | 1649 | (can_sleep) ? F_SETLKW : F_SETLK, |
1653 | lock); | 1650 | lock); |
1654 | else | 1651 | else |
1655 | error = flock_lock_file_wait(filp, lock); | 1652 | error = flock_lock_file_wait(filp, lock); |
1656 | 1653 | ||
1657 | out_free: | 1654 | out_free: |
1658 | locks_free_lock(lock); | 1655 | locks_free_lock(lock); |
1659 | 1656 | ||
1660 | out_putf: | 1657 | out_putf: |
1661 | fput(filp); | 1658 | fput(filp); |
1662 | out: | 1659 | out: |
1663 | return error; | 1660 | return error; |
1664 | } | 1661 | } |
1665 | 1662 | ||
1666 | /** | 1663 | /** |
1667 | * vfs_test_lock - test file byte range lock | 1664 | * vfs_test_lock - test file byte range lock |
1668 | * @filp: The file to test lock for | 1665 | * @filp: The file to test lock for |
1669 | * @fl: The lock to test; also used to hold result | 1666 | * @fl: The lock to test; also used to hold result |
1670 | * | 1667 | * |
1671 | * Returns -ERRNO on failure. Indicates presence of conflicting lock by | 1668 | * Returns -ERRNO on failure. Indicates presence of conflicting lock by |
1672 | * setting conf->fl_type to something other than F_UNLCK. | 1669 | * setting conf->fl_type to something other than F_UNLCK. |
1673 | */ | 1670 | */ |
1674 | int vfs_test_lock(struct file *filp, struct file_lock *fl) | 1671 | int vfs_test_lock(struct file *filp, struct file_lock *fl) |
1675 | { | 1672 | { |
1676 | if (filp->f_op && filp->f_op->lock) | 1673 | if (filp->f_op && filp->f_op->lock) |
1677 | return filp->f_op->lock(filp, F_GETLK, fl); | 1674 | return filp->f_op->lock(filp, F_GETLK, fl); |
1678 | posix_test_lock(filp, fl); | 1675 | posix_test_lock(filp, fl); |
1679 | return 0; | 1676 | return 0; |
1680 | } | 1677 | } |
1681 | EXPORT_SYMBOL_GPL(vfs_test_lock); | 1678 | EXPORT_SYMBOL_GPL(vfs_test_lock); |
1682 | 1679 | ||
1683 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) | 1680 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) |
1684 | { | 1681 | { |
1685 | flock->l_pid = fl->fl_pid; | 1682 | flock->l_pid = fl->fl_pid; |
1686 | #if BITS_PER_LONG == 32 | 1683 | #if BITS_PER_LONG == 32 |
1687 | /* | 1684 | /* |
1688 | * Make sure we can represent the posix lock via | 1685 | * Make sure we can represent the posix lock via |
1689 | * legacy 32bit flock. | 1686 | * legacy 32bit flock. |
1690 | */ | 1687 | */ |
1691 | if (fl->fl_start > OFFT_OFFSET_MAX) | 1688 | if (fl->fl_start > OFFT_OFFSET_MAX) |
1692 | return -EOVERFLOW; | 1689 | return -EOVERFLOW; |
1693 | if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) | 1690 | if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) |
1694 | return -EOVERFLOW; | 1691 | return -EOVERFLOW; |
1695 | #endif | 1692 | #endif |
1696 | flock->l_start = fl->fl_start; | 1693 | flock->l_start = fl->fl_start; |
1697 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : | 1694 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : |
1698 | fl->fl_end - fl->fl_start + 1; | 1695 | fl->fl_end - fl->fl_start + 1; |
1699 | flock->l_whence = 0; | 1696 | flock->l_whence = 0; |
1700 | flock->l_type = fl->fl_type; | 1697 | flock->l_type = fl->fl_type; |
1701 | return 0; | 1698 | return 0; |
1702 | } | 1699 | } |
1703 | 1700 | ||
1704 | #if BITS_PER_LONG == 32 | 1701 | #if BITS_PER_LONG == 32 |
1705 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) | 1702 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) |
1706 | { | 1703 | { |
1707 | flock->l_pid = fl->fl_pid; | 1704 | flock->l_pid = fl->fl_pid; |
1708 | flock->l_start = fl->fl_start; | 1705 | flock->l_start = fl->fl_start; |
1709 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : | 1706 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : |
1710 | fl->fl_end - fl->fl_start + 1; | 1707 | fl->fl_end - fl->fl_start + 1; |
1711 | flock->l_whence = 0; | 1708 | flock->l_whence = 0; |
1712 | flock->l_type = fl->fl_type; | 1709 | flock->l_type = fl->fl_type; |
1713 | } | 1710 | } |
1714 | #endif | 1711 | #endif |
1715 | 1712 | ||
1716 | /* Report the first existing lock that would conflict with l. | 1713 | /* Report the first existing lock that would conflict with l. |
1717 | * This implements the F_GETLK command of fcntl(). | 1714 | * This implements the F_GETLK command of fcntl(). |
1718 | */ | 1715 | */ |
1719 | int fcntl_getlk(struct file *filp, struct flock __user *l) | 1716 | int fcntl_getlk(struct file *filp, struct flock __user *l) |
1720 | { | 1717 | { |
1721 | struct file_lock file_lock; | 1718 | struct file_lock file_lock; |
1722 | struct flock flock; | 1719 | struct flock flock; |
1723 | int error; | 1720 | int error; |
1724 | 1721 | ||
1725 | error = -EFAULT; | 1722 | error = -EFAULT; |
1726 | if (copy_from_user(&flock, l, sizeof(flock))) | 1723 | if (copy_from_user(&flock, l, sizeof(flock))) |
1727 | goto out; | 1724 | goto out; |
1728 | error = -EINVAL; | 1725 | error = -EINVAL; |
1729 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) | 1726 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) |
1730 | goto out; | 1727 | goto out; |
1731 | 1728 | ||
1732 | error = flock_to_posix_lock(filp, &file_lock, &flock); | 1729 | error = flock_to_posix_lock(filp, &file_lock, &flock); |
1733 | if (error) | 1730 | if (error) |
1734 | goto out; | 1731 | goto out; |
1735 | 1732 | ||
1736 | error = vfs_test_lock(filp, &file_lock); | 1733 | error = vfs_test_lock(filp, &file_lock); |
1737 | if (error) | 1734 | if (error) |
1738 | goto out; | 1735 | goto out; |
1739 | 1736 | ||
1740 | flock.l_type = file_lock.fl_type; | 1737 | flock.l_type = file_lock.fl_type; |
1741 | if (file_lock.fl_type != F_UNLCK) { | 1738 | if (file_lock.fl_type != F_UNLCK) { |
1742 | error = posix_lock_to_flock(&flock, &file_lock); | 1739 | error = posix_lock_to_flock(&flock, &file_lock); |
1743 | if (error) | 1740 | if (error) |
1744 | goto out; | 1741 | goto out; |
1745 | } | 1742 | } |
1746 | error = -EFAULT; | 1743 | error = -EFAULT; |
1747 | if (!copy_to_user(l, &flock, sizeof(flock))) | 1744 | if (!copy_to_user(l, &flock, sizeof(flock))) |
1748 | error = 0; | 1745 | error = 0; |
1749 | out: | 1746 | out: |
1750 | return error; | 1747 | return error; |
1751 | } | 1748 | } |
1752 | 1749 | ||
1753 | /** | 1750 | /** |
1754 | * vfs_lock_file - file byte range lock | 1751 | * vfs_lock_file - file byte range lock |
1755 | * @filp: The file to apply the lock to | 1752 | * @filp: The file to apply the lock to |
1756 | * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) | 1753 | * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) |
1757 | * @fl: The lock to be applied | 1754 | * @fl: The lock to be applied |
1758 | * @conf: Place to return a copy of the conflicting lock, if found. | 1755 | * @conf: Place to return a copy of the conflicting lock, if found. |
1759 | * | 1756 | * |
1760 | * A caller that doesn't care about the conflicting lock may pass NULL | 1757 | * A caller that doesn't care about the conflicting lock may pass NULL |
1761 | * as the final argument. | 1758 | * as the final argument. |
1762 | * | 1759 | * |
1763 | * If the filesystem defines a private ->lock() method, then @conf will | 1760 | * If the filesystem defines a private ->lock() method, then @conf will |
1764 | * be left unchanged; so a caller that cares should initialize it to | 1761 | * be left unchanged; so a caller that cares should initialize it to |
1765 | * some acceptable default. | 1762 | * some acceptable default. |
1766 | * | 1763 | * |
1767 | * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX | 1764 | * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX |
1768 | * locks, the ->lock() interface may return asynchronously, before the lock has | 1765 | * locks, the ->lock() interface may return asynchronously, before the lock has |
1769 | * been granted or denied by the underlying filesystem, if (and only if) | 1766 | * been granted or denied by the underlying filesystem, if (and only if) |
1770 | * fl_grant is set. Callers expecting ->lock() to return asynchronously | 1767 | * fl_grant is set. Callers expecting ->lock() to return asynchronously |
1771 | * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) | 1768 | * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) |
1772 | * the request is for a blocking lock. When ->lock() does return asynchronously, | 1769 | * the request is for a blocking lock. When ->lock() does return asynchronously, |
1773 | * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock | 1770 | * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock |
1774 | * request completes. | 1771 | * request completes. |
1775 | * If the request is for non-blocking lock the file system should return | 1772 | * If the request is for non-blocking lock the file system should return |
1776 | * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine | 1773 | * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine |
1777 | * with the result. If the request timed out the callback routine will return a | 1774 | * with the result. If the request timed out the callback routine will return a |
1778 | * nonzero return code and the file system should release the lock. The file | 1775 | * nonzero return code and the file system should release the lock. The file |
1779 | * system is also responsible to keep a corresponding posix lock when it | 1776 | * system is also responsible to keep a corresponding posix lock when it |
1780 | * grants a lock so the VFS can find out which locks are locally held and do | 1777 | * grants a lock so the VFS can find out which locks are locally held and do |
1781 | * the correct lock cleanup when required. | 1778 | * the correct lock cleanup when required. |
1782 | * The underlying filesystem must not drop the kernel lock or call | 1779 | * The underlying filesystem must not drop the kernel lock or call |
1783 | * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED | 1780 | * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED |
1784 | * return code. | 1781 | * return code. |
1785 | */ | 1782 | */ |
1786 | int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) | 1783 | int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) |
1787 | { | 1784 | { |
1788 | if (filp->f_op && filp->f_op->lock) | 1785 | if (filp->f_op && filp->f_op->lock) |
1789 | return filp->f_op->lock(filp, cmd, fl); | 1786 | return filp->f_op->lock(filp, cmd, fl); |
1790 | else | 1787 | else |
1791 | return posix_lock_file(filp, fl, conf); | 1788 | return posix_lock_file(filp, fl, conf); |
1792 | } | 1789 | } |
1793 | EXPORT_SYMBOL_GPL(vfs_lock_file); | 1790 | EXPORT_SYMBOL_GPL(vfs_lock_file); |
1794 | 1791 | ||
1795 | static int do_lock_file_wait(struct file *filp, unsigned int cmd, | 1792 | static int do_lock_file_wait(struct file *filp, unsigned int cmd, |
1796 | struct file_lock *fl) | 1793 | struct file_lock *fl) |
1797 | { | 1794 | { |
1798 | int error; | 1795 | int error; |
1799 | 1796 | ||
1800 | error = security_file_lock(filp, fl->fl_type); | 1797 | error = security_file_lock(filp, fl->fl_type); |
1801 | if (error) | 1798 | if (error) |
1802 | return error; | 1799 | return error; |
1803 | 1800 | ||
1804 | for (;;) { | 1801 | for (;;) { |
1805 | error = vfs_lock_file(filp, cmd, fl, NULL); | 1802 | error = vfs_lock_file(filp, cmd, fl, NULL); |
1806 | if (error != FILE_LOCK_DEFERRED) | 1803 | if (error != FILE_LOCK_DEFERRED) |
1807 | break; | 1804 | break; |
1808 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); | 1805 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); |
1809 | if (!error) | 1806 | if (!error) |
1810 | continue; | 1807 | continue; |
1811 | 1808 | ||
1812 | locks_delete_block(fl); | 1809 | locks_delete_block(fl); |
1813 | break; | 1810 | break; |
1814 | } | 1811 | } |
1815 | 1812 | ||
1816 | return error; | 1813 | return error; |
1817 | } | 1814 | } |
1818 | 1815 | ||
1819 | /* Apply the lock described by l to an open file descriptor. | 1816 | /* Apply the lock described by l to an open file descriptor. |
1820 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). | 1817 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). |
1821 | */ | 1818 | */ |
1822 | int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, | 1819 | int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, |
1823 | struct flock __user *l) | 1820 | struct flock __user *l) |
1824 | { | 1821 | { |
1825 | struct file_lock *file_lock = locks_alloc_lock(); | 1822 | struct file_lock *file_lock = locks_alloc_lock(); |
1826 | struct flock flock; | 1823 | struct flock flock; |
1827 | struct inode *inode; | 1824 | struct inode *inode; |
1828 | struct file *f; | 1825 | struct file *f; |
1829 | int error; | 1826 | int error; |
1830 | 1827 | ||
1831 | if (file_lock == NULL) | 1828 | if (file_lock == NULL) |
1832 | return -ENOLCK; | 1829 | return -ENOLCK; |
1833 | 1830 | ||
1834 | /* | 1831 | /* |
1835 | * This might block, so we do it before checking the inode. | 1832 | * This might block, so we do it before checking the inode. |
1836 | */ | 1833 | */ |
1837 | error = -EFAULT; | 1834 | error = -EFAULT; |
1838 | if (copy_from_user(&flock, l, sizeof(flock))) | 1835 | if (copy_from_user(&flock, l, sizeof(flock))) |
1839 | goto out; | 1836 | goto out; |
1840 | 1837 | ||
1841 | inode = filp->f_path.dentry->d_inode; | 1838 | inode = filp->f_path.dentry->d_inode; |
1842 | 1839 | ||
1843 | /* Don't allow mandatory locks on files that may be memory mapped | 1840 | /* Don't allow mandatory locks on files that may be memory mapped |
1844 | * and shared. | 1841 | * and shared. |
1845 | */ | 1842 | */ |
1846 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { | 1843 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { |
1847 | error = -EAGAIN; | 1844 | error = -EAGAIN; |
1848 | goto out; | 1845 | goto out; |
1849 | } | 1846 | } |
1850 | 1847 | ||
1851 | again: | 1848 | again: |
1852 | error = flock_to_posix_lock(filp, file_lock, &flock); | 1849 | error = flock_to_posix_lock(filp, file_lock, &flock); |
1853 | if (error) | 1850 | if (error) |
1854 | goto out; | 1851 | goto out; |
1855 | if (cmd == F_SETLKW) { | 1852 | if (cmd == F_SETLKW) { |
1856 | file_lock->fl_flags |= FL_SLEEP; | 1853 | file_lock->fl_flags |= FL_SLEEP; |
1857 | } | 1854 | } |
1858 | 1855 | ||
1859 | error = -EBADF; | 1856 | error = -EBADF; |
1860 | switch (flock.l_type) { | 1857 | switch (flock.l_type) { |
1861 | case F_RDLCK: | 1858 | case F_RDLCK: |
1862 | if (!(filp->f_mode & FMODE_READ)) | 1859 | if (!(filp->f_mode & FMODE_READ)) |
1863 | goto out; | 1860 | goto out; |
1864 | break; | 1861 | break; |
1865 | case F_WRLCK: | 1862 | case F_WRLCK: |
1866 | if (!(filp->f_mode & FMODE_WRITE)) | 1863 | if (!(filp->f_mode & FMODE_WRITE)) |
1867 | goto out; | 1864 | goto out; |
1868 | break; | 1865 | break; |
1869 | case F_UNLCK: | 1866 | case F_UNLCK: |
1870 | break; | 1867 | break; |
1871 | default: | 1868 | default: |
1872 | error = -EINVAL; | 1869 | error = -EINVAL; |
1873 | goto out; | 1870 | goto out; |
1874 | } | 1871 | } |
1875 | 1872 | ||
1876 | error = do_lock_file_wait(filp, cmd, file_lock); | 1873 | error = do_lock_file_wait(filp, cmd, file_lock); |
1877 | 1874 | ||
1878 | /* | 1875 | /* |
1879 | * Attempt to detect a close/fcntl race and recover by | 1876 | * Attempt to detect a close/fcntl race and recover by |
1880 | * releasing the lock that was just acquired. | 1877 | * releasing the lock that was just acquired. |
1881 | */ | 1878 | */ |
1882 | /* | 1879 | /* |
1883 | * we need that spin_lock here - it prevents reordering between | 1880 | * we need that spin_lock here - it prevents reordering between |
1884 | * update of inode->i_flock and check for it done in close(). | 1881 | * update of inode->i_flock and check for it done in close(). |
1885 | * rcu_read_lock() wouldn't do. | 1882 | * rcu_read_lock() wouldn't do. |
1886 | */ | 1883 | */ |
1887 | spin_lock(¤t->files->file_lock); | 1884 | spin_lock(¤t->files->file_lock); |
1888 | f = fcheck(fd); | 1885 | f = fcheck(fd); |
1889 | spin_unlock(¤t->files->file_lock); | 1886 | spin_unlock(¤t->files->file_lock); |
1890 | if (!error && f != filp && flock.l_type != F_UNLCK) { | 1887 | if (!error && f != filp && flock.l_type != F_UNLCK) { |
1891 | flock.l_type = F_UNLCK; | 1888 | flock.l_type = F_UNLCK; |
1892 | goto again; | 1889 | goto again; |
1893 | } | 1890 | } |
1894 | 1891 | ||
1895 | out: | 1892 | out: |
1896 | locks_free_lock(file_lock); | 1893 | locks_free_lock(file_lock); |
1897 | return error; | 1894 | return error; |
1898 | } | 1895 | } |
1899 | 1896 | ||
1900 | #if BITS_PER_LONG == 32 | 1897 | #if BITS_PER_LONG == 32 |
1901 | /* Report the first existing lock that would conflict with l. | 1898 | /* Report the first existing lock that would conflict with l. |
1902 | * This implements the F_GETLK command of fcntl(). | 1899 | * This implements the F_GETLK command of fcntl(). |
1903 | */ | 1900 | */ |
1904 | int fcntl_getlk64(struct file *filp, struct flock64 __user *l) | 1901 | int fcntl_getlk64(struct file *filp, struct flock64 __user *l) |
1905 | { | 1902 | { |
1906 | struct file_lock file_lock; | 1903 | struct file_lock file_lock; |
1907 | struct flock64 flock; | 1904 | struct flock64 flock; |
1908 | int error; | 1905 | int error; |
1909 | 1906 | ||
1910 | error = -EFAULT; | 1907 | error = -EFAULT; |
1911 | if (copy_from_user(&flock, l, sizeof(flock))) | 1908 | if (copy_from_user(&flock, l, sizeof(flock))) |
1912 | goto out; | 1909 | goto out; |
1913 | error = -EINVAL; | 1910 | error = -EINVAL; |
1914 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) | 1911 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) |
1915 | goto out; | 1912 | goto out; |
1916 | 1913 | ||
1917 | error = flock64_to_posix_lock(filp, &file_lock, &flock); | 1914 | error = flock64_to_posix_lock(filp, &file_lock, &flock); |
1918 | if (error) | 1915 | if (error) |
1919 | goto out; | 1916 | goto out; |
1920 | 1917 | ||
1921 | error = vfs_test_lock(filp, &file_lock); | 1918 | error = vfs_test_lock(filp, &file_lock); |
1922 | if (error) | 1919 | if (error) |
1923 | goto out; | 1920 | goto out; |
1924 | 1921 | ||
1925 | flock.l_type = file_lock.fl_type; | 1922 | flock.l_type = file_lock.fl_type; |
1926 | if (file_lock.fl_type != F_UNLCK) | 1923 | if (file_lock.fl_type != F_UNLCK) |
1927 | posix_lock_to_flock64(&flock, &file_lock); | 1924 | posix_lock_to_flock64(&flock, &file_lock); |
1928 | 1925 | ||
1929 | error = -EFAULT; | 1926 | error = -EFAULT; |
1930 | if (!copy_to_user(l, &flock, sizeof(flock))) | 1927 | if (!copy_to_user(l, &flock, sizeof(flock))) |
1931 | error = 0; | 1928 | error = 0; |
1932 | 1929 | ||
1933 | out: | 1930 | out: |
1934 | return error; | 1931 | return error; |
1935 | } | 1932 | } |
1936 | 1933 | ||
1937 | /* Apply the lock described by l to an open file descriptor. | 1934 | /* Apply the lock described by l to an open file descriptor. |
1938 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). | 1935 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). |
1939 | */ | 1936 | */ |
1940 | int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, | 1937 | int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, |
1941 | struct flock64 __user *l) | 1938 | struct flock64 __user *l) |
1942 | { | 1939 | { |
1943 | struct file_lock *file_lock = locks_alloc_lock(); | 1940 | struct file_lock *file_lock = locks_alloc_lock(); |
1944 | struct flock64 flock; | 1941 | struct flock64 flock; |
1945 | struct inode *inode; | 1942 | struct inode *inode; |
1946 | struct file *f; | 1943 | struct file *f; |
1947 | int error; | 1944 | int error; |
1948 | 1945 | ||
1949 | if (file_lock == NULL) | 1946 | if (file_lock == NULL) |
1950 | return -ENOLCK; | 1947 | return -ENOLCK; |
1951 | 1948 | ||
1952 | /* | 1949 | /* |
1953 | * This might block, so we do it before checking the inode. | 1950 | * This might block, so we do it before checking the inode. |
1954 | */ | 1951 | */ |
1955 | error = -EFAULT; | 1952 | error = -EFAULT; |
1956 | if (copy_from_user(&flock, l, sizeof(flock))) | 1953 | if (copy_from_user(&flock, l, sizeof(flock))) |
1957 | goto out; | 1954 | goto out; |
1958 | 1955 | ||
1959 | inode = filp->f_path.dentry->d_inode; | 1956 | inode = filp->f_path.dentry->d_inode; |
1960 | 1957 | ||
1961 | /* Don't allow mandatory locks on files that may be memory mapped | 1958 | /* Don't allow mandatory locks on files that may be memory mapped |
1962 | * and shared. | 1959 | * and shared. |
1963 | */ | 1960 | */ |
1964 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { | 1961 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { |
1965 | error = -EAGAIN; | 1962 | error = -EAGAIN; |
1966 | goto out; | 1963 | goto out; |
1967 | } | 1964 | } |
1968 | 1965 | ||
1969 | again: | 1966 | again: |
1970 | error = flock64_to_posix_lock(filp, file_lock, &flock); | 1967 | error = flock64_to_posix_lock(filp, file_lock, &flock); |
1971 | if (error) | 1968 | if (error) |
1972 | goto out; | 1969 | goto out; |
1973 | if (cmd == F_SETLKW64) { | 1970 | if (cmd == F_SETLKW64) { |
1974 | file_lock->fl_flags |= FL_SLEEP; | 1971 | file_lock->fl_flags |= FL_SLEEP; |
1975 | } | 1972 | } |
1976 | 1973 | ||
1977 | error = -EBADF; | 1974 | error = -EBADF; |
1978 | switch (flock.l_type) { | 1975 | switch (flock.l_type) { |
1979 | case F_RDLCK: | 1976 | case F_RDLCK: |
1980 | if (!(filp->f_mode & FMODE_READ)) | 1977 | if (!(filp->f_mode & FMODE_READ)) |
1981 | goto out; | 1978 | goto out; |
1982 | break; | 1979 | break; |
1983 | case F_WRLCK: | 1980 | case F_WRLCK: |
1984 | if (!(filp->f_mode & FMODE_WRITE)) | 1981 | if (!(filp->f_mode & FMODE_WRITE)) |
1985 | goto out; | 1982 | goto out; |
1986 | break; | 1983 | break; |
1987 | case F_UNLCK: | 1984 | case F_UNLCK: |
1988 | break; | 1985 | break; |
1989 | default: | 1986 | default: |
1990 | error = -EINVAL; | 1987 | error = -EINVAL; |
1991 | goto out; | 1988 | goto out; |
1992 | } | 1989 | } |
1993 | 1990 | ||
1994 | error = do_lock_file_wait(filp, cmd, file_lock); | 1991 | error = do_lock_file_wait(filp, cmd, file_lock); |
1995 | 1992 | ||
1996 | /* | 1993 | /* |
1997 | * Attempt to detect a close/fcntl race and recover by | 1994 | * Attempt to detect a close/fcntl race and recover by |
1998 | * releasing the lock that was just acquired. | 1995 | * releasing the lock that was just acquired. |
1999 | */ | 1996 | */ |
2000 | spin_lock(¤t->files->file_lock); | 1997 | spin_lock(¤t->files->file_lock); |
2001 | f = fcheck(fd); | 1998 | f = fcheck(fd); |
2002 | spin_unlock(¤t->files->file_lock); | 1999 | spin_unlock(¤t->files->file_lock); |
2003 | if (!error && f != filp && flock.l_type != F_UNLCK) { | 2000 | if (!error && f != filp && flock.l_type != F_UNLCK) { |
2004 | flock.l_type = F_UNLCK; | 2001 | flock.l_type = F_UNLCK; |
2005 | goto again; | 2002 | goto again; |
2006 | } | 2003 | } |
2007 | 2004 | ||
2008 | out: | 2005 | out: |
2009 | locks_free_lock(file_lock); | 2006 | locks_free_lock(file_lock); |
2010 | return error; | 2007 | return error; |
2011 | } | 2008 | } |
2012 | #endif /* BITS_PER_LONG == 32 */ | 2009 | #endif /* BITS_PER_LONG == 32 */ |
2013 | 2010 | ||
2014 | /* | 2011 | /* |
2015 | * This function is called when the file is being removed | 2012 | * This function is called when the file is being removed |
2016 | * from the task's fd array. POSIX locks belonging to this task | 2013 | * from the task's fd array. POSIX locks belonging to this task |
2017 | * are deleted at this time. | 2014 | * are deleted at this time. |
2018 | */ | 2015 | */ |
2019 | void locks_remove_posix(struct file *filp, fl_owner_t owner) | 2016 | void locks_remove_posix(struct file *filp, fl_owner_t owner) |
2020 | { | 2017 | { |
2021 | struct file_lock lock; | 2018 | struct file_lock lock; |
2022 | 2019 | ||
2023 | /* | 2020 | /* |
2024 | * If there are no locks held on this file, we don't need to call | 2021 | * If there are no locks held on this file, we don't need to call |
2025 | * posix_lock_file(). Another process could be setting a lock on this | 2022 | * posix_lock_file(). Another process could be setting a lock on this |
2026 | * file at the same time, but we wouldn't remove that lock anyway. | 2023 | * file at the same time, but we wouldn't remove that lock anyway. |
2027 | */ | 2024 | */ |
2028 | if (!filp->f_path.dentry->d_inode->i_flock) | 2025 | if (!filp->f_path.dentry->d_inode->i_flock) |
2029 | return; | 2026 | return; |
2030 | 2027 | ||
2031 | lock.fl_type = F_UNLCK; | 2028 | lock.fl_type = F_UNLCK; |
2032 | lock.fl_flags = FL_POSIX | FL_CLOSE; | 2029 | lock.fl_flags = FL_POSIX | FL_CLOSE; |
2033 | lock.fl_start = 0; | 2030 | lock.fl_start = 0; |
2034 | lock.fl_end = OFFSET_MAX; | 2031 | lock.fl_end = OFFSET_MAX; |
2035 | lock.fl_owner = owner; | 2032 | lock.fl_owner = owner; |
2036 | lock.fl_pid = current->tgid; | 2033 | lock.fl_pid = current->tgid; |
2037 | lock.fl_file = filp; | 2034 | lock.fl_file = filp; |
2038 | lock.fl_ops = NULL; | 2035 | lock.fl_ops = NULL; |
2039 | lock.fl_lmops = NULL; | 2036 | lock.fl_lmops = NULL; |
2040 | 2037 | ||
2041 | vfs_lock_file(filp, F_SETLK, &lock, NULL); | 2038 | vfs_lock_file(filp, F_SETLK, &lock, NULL); |
2042 | 2039 | ||
2043 | if (lock.fl_ops && lock.fl_ops->fl_release_private) | 2040 | if (lock.fl_ops && lock.fl_ops->fl_release_private) |
2044 | lock.fl_ops->fl_release_private(&lock); | 2041 | lock.fl_ops->fl_release_private(&lock); |
2045 | } | 2042 | } |
2046 | 2043 | ||
2047 | EXPORT_SYMBOL(locks_remove_posix); | 2044 | EXPORT_SYMBOL(locks_remove_posix); |
2048 | 2045 | ||
2049 | /* | 2046 | /* |
2050 | * This function is called on the last close of an open file. | 2047 | * This function is called on the last close of an open file. |
2051 | */ | 2048 | */ |
2052 | void locks_remove_flock(struct file *filp) | 2049 | void locks_remove_flock(struct file *filp) |
2053 | { | 2050 | { |
2054 | struct inode * inode = filp->f_path.dentry->d_inode; | 2051 | struct inode * inode = filp->f_path.dentry->d_inode; |
2055 | struct file_lock *fl; | 2052 | struct file_lock *fl; |
2056 | struct file_lock **before; | 2053 | struct file_lock **before; |
2057 | 2054 | ||
2058 | if (!inode->i_flock) | 2055 | if (!inode->i_flock) |
2059 | return; | 2056 | return; |
2060 | 2057 | ||
2061 | if (filp->f_op && filp->f_op->flock) { | 2058 | if (filp->f_op && filp->f_op->flock) { |
2062 | struct file_lock fl = { | 2059 | struct file_lock fl = { |
2063 | .fl_pid = current->tgid, | 2060 | .fl_pid = current->tgid, |
2064 | .fl_file = filp, | 2061 | .fl_file = filp, |
2065 | .fl_flags = FL_FLOCK, | 2062 | .fl_flags = FL_FLOCK, |
2066 | .fl_type = F_UNLCK, | 2063 | .fl_type = F_UNLCK, |
2067 | .fl_end = OFFSET_MAX, | 2064 | .fl_end = OFFSET_MAX, |
2068 | }; | 2065 | }; |
2069 | filp->f_op->flock(filp, F_SETLKW, &fl); | 2066 | filp->f_op->flock(filp, F_SETLKW, &fl); |
2070 | if (fl.fl_ops && fl.fl_ops->fl_release_private) | 2067 | if (fl.fl_ops && fl.fl_ops->fl_release_private) |
2071 | fl.fl_ops->fl_release_private(&fl); | 2068 | fl.fl_ops->fl_release_private(&fl); |
2072 | } | 2069 | } |
2073 | 2070 | ||
2074 | lock_flocks(); | 2071 | lock_flocks(); |
2075 | before = &inode->i_flock; | 2072 | before = &inode->i_flock; |
2076 | 2073 | ||
2077 | while ((fl = *before) != NULL) { | 2074 | while ((fl = *before) != NULL) { |
2078 | if (fl->fl_file == filp) { | 2075 | if (fl->fl_file == filp) { |
2079 | if (IS_FLOCK(fl)) { | 2076 | if (IS_FLOCK(fl)) { |
2080 | locks_delete_lock(before); | 2077 | locks_delete_lock(before); |
2081 | continue; | 2078 | continue; |
2082 | } | 2079 | } |
2083 | if (IS_LEASE(fl)) { | 2080 | if (IS_LEASE(fl)) { |
2084 | lease_modify(before, F_UNLCK); | 2081 | lease_modify(before, F_UNLCK); |
2085 | continue; | 2082 | continue; |
2086 | } | 2083 | } |
2087 | /* What? */ | 2084 | /* What? */ |
2088 | BUG(); | 2085 | BUG(); |
2089 | } | 2086 | } |
2090 | before = &fl->fl_next; | 2087 | before = &fl->fl_next; |
2091 | } | 2088 | } |
2092 | unlock_flocks(); | 2089 | unlock_flocks(); |
2093 | } | 2090 | } |
2094 | 2091 | ||
2095 | /** | 2092 | /** |
2096 | * posix_unblock_lock - stop waiting for a file lock | 2093 | * posix_unblock_lock - stop waiting for a file lock |
2097 | * @filp: how the file was opened | 2094 | * @filp: how the file was opened |
2098 | * @waiter: the lock which was waiting | 2095 | * @waiter: the lock which was waiting |
2099 | * | 2096 | * |
2100 | * lockd needs to block waiting for locks. | 2097 | * lockd needs to block waiting for locks. |
2101 | */ | 2098 | */ |
2102 | int | 2099 | int |
2103 | posix_unblock_lock(struct file *filp, struct file_lock *waiter) | 2100 | posix_unblock_lock(struct file *filp, struct file_lock *waiter) |
2104 | { | 2101 | { |
2105 | int status = 0; | 2102 | int status = 0; |
2106 | 2103 | ||
2107 | lock_flocks(); | 2104 | lock_flocks(); |
2108 | if (waiter->fl_next) | 2105 | if (waiter->fl_next) |
2109 | __locks_delete_block(waiter); | 2106 | __locks_delete_block(waiter); |
2110 | else | 2107 | else |
2111 | status = -ENOENT; | 2108 | status = -ENOENT; |
2112 | unlock_flocks(); | 2109 | unlock_flocks(); |
2113 | return status; | 2110 | return status; |
2114 | } | 2111 | } |
2115 | 2112 | ||
2116 | EXPORT_SYMBOL(posix_unblock_lock); | 2113 | EXPORT_SYMBOL(posix_unblock_lock); |
2117 | 2114 | ||
2118 | /** | 2115 | /** |
2119 | * vfs_cancel_lock - file byte range unblock lock | 2116 | * vfs_cancel_lock - file byte range unblock lock |
2120 | * @filp: The file to apply the unblock to | 2117 | * @filp: The file to apply the unblock to |
2121 | * @fl: The lock to be unblocked | 2118 | * @fl: The lock to be unblocked |
2122 | * | 2119 | * |
2123 | * Used by lock managers to cancel blocked requests | 2120 | * Used by lock managers to cancel blocked requests |
2124 | */ | 2121 | */ |
2125 | int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 2122 | int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
2126 | { | 2123 | { |
2127 | if (filp->f_op && filp->f_op->lock) | 2124 | if (filp->f_op && filp->f_op->lock) |
2128 | return filp->f_op->lock(filp, F_CANCELLK, fl); | 2125 | return filp->f_op->lock(filp, F_CANCELLK, fl); |
2129 | return 0; | 2126 | return 0; |
2130 | } | 2127 | } |
2131 | 2128 | ||
2132 | EXPORT_SYMBOL_GPL(vfs_cancel_lock); | 2129 | EXPORT_SYMBOL_GPL(vfs_cancel_lock); |
2133 | 2130 | ||
2134 | #ifdef CONFIG_PROC_FS | 2131 | #ifdef CONFIG_PROC_FS |
2135 | #include <linux/proc_fs.h> | 2132 | #include <linux/proc_fs.h> |
2136 | #include <linux/seq_file.h> | 2133 | #include <linux/seq_file.h> |
2137 | 2134 | ||
2138 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2135 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
2139 | loff_t id, char *pfx) | 2136 | loff_t id, char *pfx) |
2140 | { | 2137 | { |
2141 | struct inode *inode = NULL; | 2138 | struct inode *inode = NULL; |
2142 | unsigned int fl_pid; | 2139 | unsigned int fl_pid; |
2143 | 2140 | ||
2144 | if (fl->fl_nspid) | 2141 | if (fl->fl_nspid) |
2145 | fl_pid = pid_vnr(fl->fl_nspid); | 2142 | fl_pid = pid_vnr(fl->fl_nspid); |
2146 | else | 2143 | else |
2147 | fl_pid = fl->fl_pid; | 2144 | fl_pid = fl->fl_pid; |
2148 | 2145 | ||
2149 | if (fl->fl_file != NULL) | 2146 | if (fl->fl_file != NULL) |
2150 | inode = fl->fl_file->f_path.dentry->d_inode; | 2147 | inode = fl->fl_file->f_path.dentry->d_inode; |
2151 | 2148 | ||
2152 | seq_printf(f, "%lld:%s ", id, pfx); | 2149 | seq_printf(f, "%lld:%s ", id, pfx); |
2153 | if (IS_POSIX(fl)) { | 2150 | if (IS_POSIX(fl)) { |
2154 | seq_printf(f, "%6s %s ", | 2151 | seq_printf(f, "%6s %s ", |
2155 | (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", | 2152 | (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", |
2156 | (inode == NULL) ? "*NOINODE*" : | 2153 | (inode == NULL) ? "*NOINODE*" : |
2157 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); | 2154 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); |
2158 | } else if (IS_FLOCK(fl)) { | 2155 | } else if (IS_FLOCK(fl)) { |
2159 | if (fl->fl_type & LOCK_MAND) { | 2156 | if (fl->fl_type & LOCK_MAND) { |
2160 | seq_printf(f, "FLOCK MSNFS "); | 2157 | seq_printf(f, "FLOCK MSNFS "); |
2161 | } else { | 2158 | } else { |
2162 | seq_printf(f, "FLOCK ADVISORY "); | 2159 | seq_printf(f, "FLOCK ADVISORY "); |
2163 | } | 2160 | } |
2164 | } else if (IS_LEASE(fl)) { | 2161 | } else if (IS_LEASE(fl)) { |
2165 | seq_printf(f, "LEASE "); | 2162 | seq_printf(f, "LEASE "); |
2166 | if (fl->fl_type & F_INPROGRESS) | 2163 | if (fl->fl_type & F_INPROGRESS) |
2167 | seq_printf(f, "BREAKING "); | 2164 | seq_printf(f, "BREAKING "); |
2168 | else if (fl->fl_file) | 2165 | else if (fl->fl_file) |
2169 | seq_printf(f, "ACTIVE "); | 2166 | seq_printf(f, "ACTIVE "); |
2170 | else | 2167 | else |
2171 | seq_printf(f, "BREAKER "); | 2168 | seq_printf(f, "BREAKER "); |
2172 | } else { | 2169 | } else { |
2173 | seq_printf(f, "UNKNOWN UNKNOWN "); | 2170 | seq_printf(f, "UNKNOWN UNKNOWN "); |
2174 | } | 2171 | } |
2175 | if (fl->fl_type & LOCK_MAND) { | 2172 | if (fl->fl_type & LOCK_MAND) { |
2176 | seq_printf(f, "%s ", | 2173 | seq_printf(f, "%s ", |
2177 | (fl->fl_type & LOCK_READ) | 2174 | (fl->fl_type & LOCK_READ) |
2178 | ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " | 2175 | ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " |
2179 | : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); | 2176 | : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); |
2180 | } else { | 2177 | } else { |
2181 | seq_printf(f, "%s ", | 2178 | seq_printf(f, "%s ", |
2182 | (fl->fl_type & F_INPROGRESS) | 2179 | (fl->fl_type & F_INPROGRESS) |
2183 | ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " | 2180 | ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " |
2184 | : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); | 2181 | : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); |
2185 | } | 2182 | } |
2186 | if (inode) { | 2183 | if (inode) { |
2187 | #ifdef WE_CAN_BREAK_LSLK_NOW | 2184 | #ifdef WE_CAN_BREAK_LSLK_NOW |
2188 | seq_printf(f, "%d %s:%ld ", fl_pid, | 2185 | seq_printf(f, "%d %s:%ld ", fl_pid, |
2189 | inode->i_sb->s_id, inode->i_ino); | 2186 | inode->i_sb->s_id, inode->i_ino); |
2190 | #else | 2187 | #else |
2191 | /* userspace relies on this representation of dev_t ;-( */ | 2188 | /* userspace relies on this representation of dev_t ;-( */ |
2192 | seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, | 2189 | seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, |
2193 | MAJOR(inode->i_sb->s_dev), | 2190 | MAJOR(inode->i_sb->s_dev), |
2194 | MINOR(inode->i_sb->s_dev), inode->i_ino); | 2191 | MINOR(inode->i_sb->s_dev), inode->i_ino); |
2195 | #endif | 2192 | #endif |
2196 | } else { | 2193 | } else { |
2197 | seq_printf(f, "%d <none>:0 ", fl_pid); | 2194 | seq_printf(f, "%d <none>:0 ", fl_pid); |
2198 | } | 2195 | } |
2199 | if (IS_POSIX(fl)) { | 2196 | if (IS_POSIX(fl)) { |
2200 | if (fl->fl_end == OFFSET_MAX) | 2197 | if (fl->fl_end == OFFSET_MAX) |
2201 | seq_printf(f, "%Ld EOF\n", fl->fl_start); | 2198 | seq_printf(f, "%Ld EOF\n", fl->fl_start); |
2202 | else | 2199 | else |
2203 | seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); | 2200 | seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); |
2204 | } else { | 2201 | } else { |
2205 | seq_printf(f, "0 EOF\n"); | 2202 | seq_printf(f, "0 EOF\n"); |
2206 | } | 2203 | } |
2207 | } | 2204 | } |
2208 | 2205 | ||
2209 | static int locks_show(struct seq_file *f, void *v) | 2206 | static int locks_show(struct seq_file *f, void *v) |
2210 | { | 2207 | { |
2211 | struct file_lock *fl, *bfl; | 2208 | struct file_lock *fl, *bfl; |
2212 | 2209 | ||
2213 | fl = list_entry(v, struct file_lock, fl_link); | 2210 | fl = list_entry(v, struct file_lock, fl_link); |
2214 | 2211 | ||
2215 | lock_get_status(f, fl, *((loff_t *)f->private), ""); | 2212 | lock_get_status(f, fl, *((loff_t *)f->private), ""); |
2216 | 2213 | ||
2217 | list_for_each_entry(bfl, &fl->fl_block, fl_block) | 2214 | list_for_each_entry(bfl, &fl->fl_block, fl_block) |
2218 | lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); | 2215 | lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); |
2219 | 2216 | ||
2220 | return 0; | 2217 | return 0; |
2221 | } | 2218 | } |
2222 | 2219 | ||
2223 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2220 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2224 | { | 2221 | { |
2225 | loff_t *p = f->private; | 2222 | loff_t *p = f->private; |
2226 | 2223 | ||
2227 | lock_flocks(); | 2224 | lock_flocks(); |
2228 | *p = (*pos + 1); | 2225 | *p = (*pos + 1); |
2229 | return seq_list_start(&file_lock_list, *pos); | 2226 | return seq_list_start(&file_lock_list, *pos); |
2230 | } | 2227 | } |
2231 | 2228 | ||
2232 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | 2229 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) |
2233 | { | 2230 | { |
2234 | loff_t *p = f->private; | 2231 | loff_t *p = f->private; |
2235 | ++*p; | 2232 | ++*p; |
2236 | return seq_list_next(v, &file_lock_list, pos); | 2233 | return seq_list_next(v, &file_lock_list, pos); |
2237 | } | 2234 | } |
2238 | 2235 | ||
2239 | static void locks_stop(struct seq_file *f, void *v) | 2236 | static void locks_stop(struct seq_file *f, void *v) |
2240 | { | 2237 | { |
2241 | unlock_flocks(); | 2238 | unlock_flocks(); |
2242 | } | 2239 | } |
2243 | 2240 | ||
2244 | static const struct seq_operations locks_seq_operations = { | 2241 | static const struct seq_operations locks_seq_operations = { |
2245 | .start = locks_start, | 2242 | .start = locks_start, |
2246 | .next = locks_next, | 2243 | .next = locks_next, |
2247 | .stop = locks_stop, | 2244 | .stop = locks_stop, |
2248 | .show = locks_show, | 2245 | .show = locks_show, |
2249 | }; | 2246 | }; |
2250 | 2247 | ||
2251 | static int locks_open(struct inode *inode, struct file *filp) | 2248 | static int locks_open(struct inode *inode, struct file *filp) |
2252 | { | 2249 | { |
2253 | return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); | 2250 | return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); |
2254 | } | 2251 | } |
2255 | 2252 | ||
2256 | static const struct file_operations proc_locks_operations = { | 2253 | static const struct file_operations proc_locks_operations = { |
2257 | .open = locks_open, | 2254 | .open = locks_open, |
2258 | .read = seq_read, | 2255 | .read = seq_read, |
2259 | .llseek = seq_lseek, | 2256 | .llseek = seq_lseek, |
2260 | .release = seq_release_private, | 2257 | .release = seq_release_private, |
2261 | }; | 2258 | }; |
2262 | 2259 | ||
2263 | static int __init proc_locks_init(void) | 2260 | static int __init proc_locks_init(void) |
2264 | { | 2261 | { |
2265 | proc_create("locks", 0, NULL, &proc_locks_operations); | 2262 | proc_create("locks", 0, NULL, &proc_locks_operations); |
2266 | return 0; | 2263 | return 0; |
2267 | } | 2264 | } |
2268 | module_init(proc_locks_init); | 2265 | module_init(proc_locks_init); |
2269 | #endif | 2266 | #endif |
2270 | 2267 | ||
2271 | /** | 2268 | /** |
2272 | * lock_may_read - checks that the region is free of locks | 2269 | * lock_may_read - checks that the region is free of locks |
2273 | * @inode: the inode that is being read | 2270 | * @inode: the inode that is being read |
2274 | * @start: the first byte to read | 2271 | * @start: the first byte to read |
2275 | * @len: the number of bytes to read | 2272 | * @len: the number of bytes to read |
2276 | * | 2273 | * |
2277 | * Emulates Windows locking requirements. Whole-file | 2274 | * Emulates Windows locking requirements. Whole-file |
2278 | * mandatory locks (share modes) can prohibit a read and | 2275 | * mandatory locks (share modes) can prohibit a read and |
2279 | * byte-range POSIX locks can prohibit a read if they overlap. | 2276 | * byte-range POSIX locks can prohibit a read if they overlap. |
2280 | * | 2277 | * |
2281 | * N.B. this function is only ever called | 2278 | * N.B. this function is only ever called |
2282 | * from knfsd and ownership of locks is never checked. | 2279 | * from knfsd and ownership of locks is never checked. |
2283 | */ | 2280 | */ |
2284 | int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | 2281 | int lock_may_read(struct inode *inode, loff_t start, unsigned long len) |
2285 | { | 2282 | { |
2286 | struct file_lock *fl; | 2283 | struct file_lock *fl; |
2287 | int result = 1; | 2284 | int result = 1; |
2288 | lock_flocks(); | 2285 | lock_flocks(); |
2289 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2286 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2290 | if (IS_POSIX(fl)) { | 2287 | if (IS_POSIX(fl)) { |
2291 | if (fl->fl_type == F_RDLCK) | 2288 | if (fl->fl_type == F_RDLCK) |
2292 | continue; | 2289 | continue; |
2293 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2290 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
2294 | continue; | 2291 | continue; |
2295 | } else if (IS_FLOCK(fl)) { | 2292 | } else if (IS_FLOCK(fl)) { |
2296 | if (!(fl->fl_type & LOCK_MAND)) | 2293 | if (!(fl->fl_type & LOCK_MAND)) |
2297 | continue; | 2294 | continue; |
2298 | if (fl->fl_type & LOCK_READ) | 2295 | if (fl->fl_type & LOCK_READ) |
2299 | continue; | 2296 | continue; |
2300 | } else | 2297 | } else |
2301 | continue; | 2298 | continue; |
2302 | result = 0; | 2299 | result = 0; |
2303 | break; | 2300 | break; |
2304 | } | 2301 | } |
2305 | unlock_flocks(); | 2302 | unlock_flocks(); |
2306 | return result; | 2303 | return result; |
2307 | } | 2304 | } |
2308 | 2305 | ||
2309 | EXPORT_SYMBOL(lock_may_read); | 2306 | EXPORT_SYMBOL(lock_may_read); |
2310 | 2307 | ||
2311 | /** | 2308 | /** |
2312 | * lock_may_write - checks that the region is free of locks | 2309 | * lock_may_write - checks that the region is free of locks |
2313 | * @inode: the inode that is being written | 2310 | * @inode: the inode that is being written |
2314 | * @start: the first byte to write | 2311 | * @start: the first byte to write |
2315 | * @len: the number of bytes to write | 2312 | * @len: the number of bytes to write |
2316 | * | 2313 | * |
2317 | * Emulates Windows locking requirements. Whole-file | 2314 | * Emulates Windows locking requirements. Whole-file |
2318 | * mandatory locks (share modes) can prohibit a write and | 2315 | * mandatory locks (share modes) can prohibit a write and |
2319 | * byte-range POSIX locks can prohibit a write if they overlap. | 2316 | * byte-range POSIX locks can prohibit a write if they overlap. |
2320 | * | 2317 | * |
2321 | * N.B. this function is only ever called | 2318 | * N.B. this function is only ever called |
2322 | * from knfsd and ownership of locks is never checked. | 2319 | * from knfsd and ownership of locks is never checked. |
2323 | */ | 2320 | */ |
2324 | int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | 2321 | int lock_may_write(struct inode *inode, loff_t start, unsigned long len) |
2325 | { | 2322 | { |
2326 | struct file_lock *fl; | 2323 | struct file_lock *fl; |
2327 | int result = 1; | 2324 | int result = 1; |
2328 | lock_flocks(); | 2325 | lock_flocks(); |
2329 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2326 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2330 | if (IS_POSIX(fl)) { | 2327 | if (IS_POSIX(fl)) { |
2331 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2328 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
2332 | continue; | 2329 | continue; |
2333 | } else if (IS_FLOCK(fl)) { | 2330 | } else if (IS_FLOCK(fl)) { |
2334 | if (!(fl->fl_type & LOCK_MAND)) | 2331 | if (!(fl->fl_type & LOCK_MAND)) |
2335 | continue; | 2332 | continue; |
2336 | if (fl->fl_type & LOCK_WRITE) | 2333 | if (fl->fl_type & LOCK_WRITE) |
2337 | continue; | 2334 | continue; |
2338 | } else | 2335 | } else |
2339 | continue; | 2336 | continue; |
2340 | result = 0; | 2337 | result = 0; |
2341 | break; | 2338 | break; |
2342 | } | 2339 | } |
2343 | unlock_flocks(); | 2340 | unlock_flocks(); |
2344 | return result; | 2341 | return result; |
2345 | } | 2342 | } |
2346 | 2343 | ||
2347 | EXPORT_SYMBOL(lock_may_write); | 2344 | EXPORT_SYMBOL(lock_may_write); |
2348 | 2345 | ||
2349 | static int __init filelock_init(void) | 2346 | static int __init filelock_init(void) |
2350 | { | 2347 | { |
2351 | filelock_cache = kmem_cache_create("file_lock_cache", | 2348 | filelock_cache = kmem_cache_create("file_lock_cache", |
2352 | sizeof(struct file_lock), 0, SLAB_PANIC, | 2349 | sizeof(struct file_lock), 0, SLAB_PANIC, |
2353 | init_once); | 2350 | init_once); |
2354 | return 0; | 2351 | return 0; |
2355 | } | 2352 | } |
2356 | 2353 | ||
2357 | core_initcall(filelock_init); | 2354 | core_initcall(filelock_init); |
2358 | 2355 |
include/linux/fs.h
1 | #ifndef _LINUX_FS_H | 1 | #ifndef _LINUX_FS_H |
2 | #define _LINUX_FS_H | 2 | #define _LINUX_FS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This file has definitions for some important file table | 5 | * This file has definitions for some important file table |
6 | * structures etc. | 6 | * structures etc. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/limits.h> | 9 | #include <linux/limits.h> |
10 | #include <linux/ioctl.h> | 10 | #include <linux/ioctl.h> |
11 | #include <linux/blk_types.h> | 11 | #include <linux/blk_types.h> |
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change | 15 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change |
16 | * the file limit at runtime and only root can increase the per-process | 16 | * the file limit at runtime and only root can increase the per-process |
17 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute | 17 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
18 | * upper limit on files-per-process. | 18 | * upper limit on files-per-process. |
19 | * | 19 | * |
20 | * Some programs (notably those using select()) may have to be | 20 | * Some programs (notably those using select()) may have to be |
21 | * recompiled to take full advantage of the new limits.. | 21 | * recompiled to take full advantage of the new limits.. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | /* Fixed constants first: */ | 24 | /* Fixed constants first: */ |
25 | #undef NR_OPEN | 25 | #undef NR_OPEN |
26 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ | 26 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ |
27 | 27 | ||
28 | #define BLOCK_SIZE_BITS 10 | 28 | #define BLOCK_SIZE_BITS 10 |
29 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) | 29 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) |
30 | 30 | ||
31 | #define SEEK_SET 0 /* seek relative to beginning of file */ | 31 | #define SEEK_SET 0 /* seek relative to beginning of file */ |
32 | #define SEEK_CUR 1 /* seek relative to current file position */ | 32 | #define SEEK_CUR 1 /* seek relative to current file position */ |
33 | #define SEEK_END 2 /* seek relative to end of file */ | 33 | #define SEEK_END 2 /* seek relative to end of file */ |
34 | #define SEEK_MAX SEEK_END | 34 | #define SEEK_MAX SEEK_END |
35 | 35 | ||
36 | struct fstrim_range { | 36 | struct fstrim_range { |
37 | uint64_t start; | 37 | uint64_t start; |
38 | uint64_t len; | 38 | uint64_t len; |
39 | uint64_t minlen; | 39 | uint64_t minlen; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | /* And dynamically-tunable limits and defaults: */ | 42 | /* And dynamically-tunable limits and defaults: */ |
43 | struct files_stat_struct { | 43 | struct files_stat_struct { |
44 | unsigned long nr_files; /* read only */ | 44 | unsigned long nr_files; /* read only */ |
45 | unsigned long nr_free_files; /* read only */ | 45 | unsigned long nr_free_files; /* read only */ |
46 | unsigned long max_files; /* tunable */ | 46 | unsigned long max_files; /* tunable */ |
47 | }; | 47 | }; |
48 | 48 | ||
49 | struct inodes_stat_t { | 49 | struct inodes_stat_t { |
50 | int nr_inodes; | 50 | int nr_inodes; |
51 | int nr_unused; | 51 | int nr_unused; |
52 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 52 | int dummy[5]; /* padding for sysctl ABI compatibility */ |
53 | }; | 53 | }; |
54 | 54 | ||
55 | 55 | ||
56 | #define NR_FILE 8192 /* this can well be larger on a larger system */ | 56 | #define NR_FILE 8192 /* this can well be larger on a larger system */ |
57 | 57 | ||
58 | #define MAY_EXEC 1 | 58 | #define MAY_EXEC 1 |
59 | #define MAY_WRITE 2 | 59 | #define MAY_WRITE 2 |
60 | #define MAY_READ 4 | 60 | #define MAY_READ 4 |
61 | #define MAY_APPEND 8 | 61 | #define MAY_APPEND 8 |
62 | #define MAY_ACCESS 16 | 62 | #define MAY_ACCESS 16 |
63 | #define MAY_OPEN 32 | 63 | #define MAY_OPEN 32 |
64 | #define MAY_CHDIR 64 | 64 | #define MAY_CHDIR 64 |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond | 67 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond |
68 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() | 68 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() |
69 | */ | 69 | */ |
70 | 70 | ||
71 | /* file is open for reading */ | 71 | /* file is open for reading */ |
72 | #define FMODE_READ ((__force fmode_t)0x1) | 72 | #define FMODE_READ ((__force fmode_t)0x1) |
73 | /* file is open for writing */ | 73 | /* file is open for writing */ |
74 | #define FMODE_WRITE ((__force fmode_t)0x2) | 74 | #define FMODE_WRITE ((__force fmode_t)0x2) |
75 | /* file is seekable */ | 75 | /* file is seekable */ |
76 | #define FMODE_LSEEK ((__force fmode_t)0x4) | 76 | #define FMODE_LSEEK ((__force fmode_t)0x4) |
77 | /* file can be accessed using pread */ | 77 | /* file can be accessed using pread */ |
78 | #define FMODE_PREAD ((__force fmode_t)0x8) | 78 | #define FMODE_PREAD ((__force fmode_t)0x8) |
79 | /* file can be accessed using pwrite */ | 79 | /* file can be accessed using pwrite */ |
80 | #define FMODE_PWRITE ((__force fmode_t)0x10) | 80 | #define FMODE_PWRITE ((__force fmode_t)0x10) |
81 | /* File is opened for execution with sys_execve / sys_uselib */ | 81 | /* File is opened for execution with sys_execve / sys_uselib */ |
82 | #define FMODE_EXEC ((__force fmode_t)0x20) | 82 | #define FMODE_EXEC ((__force fmode_t)0x20) |
83 | /* File is opened with O_NDELAY (only set for block devices) */ | 83 | /* File is opened with O_NDELAY (only set for block devices) */ |
84 | #define FMODE_NDELAY ((__force fmode_t)0x40) | 84 | #define FMODE_NDELAY ((__force fmode_t)0x40) |
85 | /* File is opened with O_EXCL (only set for block devices) */ | 85 | /* File is opened with O_EXCL (only set for block devices) */ |
86 | #define FMODE_EXCL ((__force fmode_t)0x80) | 86 | #define FMODE_EXCL ((__force fmode_t)0x80) |
87 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls | 87 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls |
88 | (specialy hack for floppy.c) */ | 88 | (specialy hack for floppy.c) */ |
89 | #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) | 89 | #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Don't update ctime and mtime. | 92 | * Don't update ctime and mtime. |
93 | * | 93 | * |
94 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll | 94 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll |
95 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. | 95 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. |
96 | */ | 96 | */ |
97 | #define FMODE_NOCMTIME ((__force fmode_t)0x800) | 97 | #define FMODE_NOCMTIME ((__force fmode_t)0x800) |
98 | 98 | ||
99 | /* Expect random access pattern */ | 99 | /* Expect random access pattern */ |
100 | #define FMODE_RANDOM ((__force fmode_t)0x1000) | 100 | #define FMODE_RANDOM ((__force fmode_t)0x1000) |
101 | 101 | ||
102 | /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ | 102 | /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ |
103 | #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) | 103 | #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) |
104 | 104 | ||
105 | /* File was opened by fanotify and shouldn't generate fanotify events */ | 105 | /* File was opened by fanotify and shouldn't generate fanotify events */ |
106 | #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) | 106 | #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * The below are the various read and write types that we support. Some of | 109 | * The below are the various read and write types that we support. Some of |
110 | * them include behavioral modifiers that send information down to the | 110 | * them include behavioral modifiers that send information down to the |
111 | * block layer and IO scheduler. Terminology: | 111 | * block layer and IO scheduler. Terminology: |
112 | * | 112 | * |
113 | * The block layer uses device plugging to defer IO a little bit, in | 113 | * The block layer uses device plugging to defer IO a little bit, in |
114 | * the hope that we will see more IO very shortly. This increases | 114 | * the hope that we will see more IO very shortly. This increases |
115 | * coalescing of adjacent IO and thus reduces the number of IOs we | 115 | * coalescing of adjacent IO and thus reduces the number of IOs we |
116 | * have to send to the device. It also allows for better queuing, | 116 | * have to send to the device. It also allows for better queuing, |
117 | * if the IO isn't mergeable. If the caller is going to be waiting | 117 | * if the IO isn't mergeable. If the caller is going to be waiting |
118 | * for the IO, then he must ensure that the device is unplugged so | 118 | * for the IO, then he must ensure that the device is unplugged so |
119 | * that the IO is dispatched to the driver. | 119 | * that the IO is dispatched to the driver. |
120 | * | 120 | * |
121 | * All IO is handled async in Linux. This is fine for background | 121 | * All IO is handled async in Linux. This is fine for background |
122 | * writes, but for reads or writes that someone waits for completion | 122 | * writes, but for reads or writes that someone waits for completion |
123 | * on, we want to notify the block layer and IO scheduler so that they | 123 | * on, we want to notify the block layer and IO scheduler so that they |
124 | * know about it. That allows them to make better scheduling | 124 | * know about it. That allows them to make better scheduling |
125 | * decisions. So when the below references 'sync' and 'async', it | 125 | * decisions. So when the below references 'sync' and 'async', it |
126 | * is referencing this priority hint. | 126 | * is referencing this priority hint. |
127 | * | 127 | * |
128 | * With that in mind, the available types are: | 128 | * With that in mind, the available types are: |
129 | * | 129 | * |
130 | * READ A normal read operation. Device will be plugged. | 130 | * READ A normal read operation. Device will be plugged. |
131 | * READ_SYNC A synchronous read. Device is not plugged, caller can | 131 | * READ_SYNC A synchronous read. Device is not plugged, caller can |
132 | * immediately wait on this read without caring about | 132 | * immediately wait on this read without caring about |
133 | * unplugging. | 133 | * unplugging. |
134 | * READA Used for read-ahead operations. Lower priority, and the | 134 | * READA Used for read-ahead operations. Lower priority, and the |
135 | * block layer could (in theory) choose to ignore this | 135 | * block layer could (in theory) choose to ignore this |
136 | * request if it runs into resource problems. | 136 | * request if it runs into resource problems. |
137 | * WRITE A normal async write. Device will be plugged. | 137 | * WRITE A normal async write. Device will be plugged. |
138 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down | 138 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down |
139 | * the hint that someone will be waiting on this IO | 139 | * the hint that someone will be waiting on this IO |
140 | * shortly. The device must still be unplugged explicitly, | 140 | * shortly. The device must still be unplugged explicitly, |
141 | * WRITE_SYNC_PLUG does not do this as we could be | 141 | * WRITE_SYNC_PLUG does not do this as we could be |
142 | * submitting more writes before we actually wait on any | 142 | * submitting more writes before we actually wait on any |
143 | * of them. | 143 | * of them. |
144 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device | 144 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device |
145 | * immediately after submission. The write equivalent | 145 | * immediately after submission. The write equivalent |
146 | * of READ_SYNC. | 146 | * of READ_SYNC. |
147 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. | 147 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. |
148 | * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. | 148 | * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. |
149 | * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on | 149 | * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on |
150 | * non-volatile media on completion. | 150 | * non-volatile media on completion. |
151 | * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded | 151 | * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded |
152 | * by a cache flush and data is guaranteed to be on | 152 | * by a cache flush and data is guaranteed to be on |
153 | * non-volatile media on completion. | 153 | * non-volatile media on completion. |
154 | * | 154 | * |
155 | */ | 155 | */ |
156 | #define RW_MASK REQ_WRITE | 156 | #define RW_MASK REQ_WRITE |
157 | #define RWA_MASK REQ_RAHEAD | 157 | #define RWA_MASK REQ_RAHEAD |
158 | 158 | ||
159 | #define READ 0 | 159 | #define READ 0 |
160 | #define WRITE RW_MASK | 160 | #define WRITE RW_MASK |
161 | #define READA RWA_MASK | 161 | #define READA RWA_MASK |
162 | 162 | ||
163 | #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) | 163 | #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) |
164 | #define READ_META (READ | REQ_META) | 164 | #define READ_META (READ | REQ_META) |
165 | #define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) | 165 | #define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) |
166 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) | 166 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) |
167 | #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) | 167 | #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) |
168 | #define WRITE_META (WRITE | REQ_META) | 168 | #define WRITE_META (WRITE | REQ_META) |
169 | #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | 169 | #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
170 | REQ_FLUSH) | 170 | REQ_FLUSH) |
171 | #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | 171 | #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
172 | REQ_FUA) | 172 | REQ_FUA) |
173 | #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | 173 | #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
174 | REQ_FLUSH | REQ_FUA) | 174 | REQ_FLUSH | REQ_FUA) |
175 | 175 | ||
176 | #define SEL_IN 1 | 176 | #define SEL_IN 1 |
177 | #define SEL_OUT 2 | 177 | #define SEL_OUT 2 |
178 | #define SEL_EX 4 | 178 | #define SEL_EX 4 |
179 | 179 | ||
180 | /* public flags for file_system_type */ | 180 | /* public flags for file_system_type */ |
181 | #define FS_REQUIRES_DEV 1 | 181 | #define FS_REQUIRES_DEV 1 |
182 | #define FS_BINARY_MOUNTDATA 2 | 182 | #define FS_BINARY_MOUNTDATA 2 |
183 | #define FS_HAS_SUBTYPE 4 | 183 | #define FS_HAS_SUBTYPE 4 |
184 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 184 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
185 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() | 185 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() |
186 | * during rename() internally. | 186 | * during rename() internally. |
187 | */ | 187 | */ |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * These are the fs-independent mount-flags: up to 32 flags are supported | 190 | * These are the fs-independent mount-flags: up to 32 flags are supported |
191 | */ | 191 | */ |
192 | #define MS_RDONLY 1 /* Mount read-only */ | 192 | #define MS_RDONLY 1 /* Mount read-only */ |
193 | #define MS_NOSUID 2 /* Ignore suid and sgid bits */ | 193 | #define MS_NOSUID 2 /* Ignore suid and sgid bits */ |
194 | #define MS_NODEV 4 /* Disallow access to device special files */ | 194 | #define MS_NODEV 4 /* Disallow access to device special files */ |
195 | #define MS_NOEXEC 8 /* Disallow program execution */ | 195 | #define MS_NOEXEC 8 /* Disallow program execution */ |
196 | #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ | 196 | #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ |
197 | #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ | 197 | #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ |
198 | #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ | 198 | #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ |
199 | #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ | 199 | #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ |
200 | #define MS_NOATIME 1024 /* Do not update access times. */ | 200 | #define MS_NOATIME 1024 /* Do not update access times. */ |
201 | #define MS_NODIRATIME 2048 /* Do not update directory access times */ | 201 | #define MS_NODIRATIME 2048 /* Do not update directory access times */ |
202 | #define MS_BIND 4096 | 202 | #define MS_BIND 4096 |
203 | #define MS_MOVE 8192 | 203 | #define MS_MOVE 8192 |
204 | #define MS_REC 16384 | 204 | #define MS_REC 16384 |
205 | #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. | 205 | #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. |
206 | MS_VERBOSE is deprecated. */ | 206 | MS_VERBOSE is deprecated. */ |
207 | #define MS_SILENT 32768 | 207 | #define MS_SILENT 32768 |
208 | #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ | 208 | #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ |
209 | #define MS_UNBINDABLE (1<<17) /* change to unbindable */ | 209 | #define MS_UNBINDABLE (1<<17) /* change to unbindable */ |
210 | #define MS_PRIVATE (1<<18) /* change to private */ | 210 | #define MS_PRIVATE (1<<18) /* change to private */ |
211 | #define MS_SLAVE (1<<19) /* change to slave */ | 211 | #define MS_SLAVE (1<<19) /* change to slave */ |
212 | #define MS_SHARED (1<<20) /* change to shared */ | 212 | #define MS_SHARED (1<<20) /* change to shared */ |
213 | #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ | 213 | #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ |
214 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ | 214 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ |
215 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ | 215 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ |
216 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ | 216 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ |
217 | #define MS_BORN (1<<29) | 217 | #define MS_BORN (1<<29) |
218 | #define MS_ACTIVE (1<<30) | 218 | #define MS_ACTIVE (1<<30) |
219 | #define MS_NOUSER (1<<31) | 219 | #define MS_NOUSER (1<<31) |
220 | 220 | ||
221 | /* | 221 | /* |
222 | * Superblock flags that can be altered by MS_REMOUNT | 222 | * Superblock flags that can be altered by MS_REMOUNT |
223 | */ | 223 | */ |
224 | #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) | 224 | #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) |
225 | 225 | ||
226 | /* | 226 | /* |
227 | * Old magic mount flag and mask | 227 | * Old magic mount flag and mask |
228 | */ | 228 | */ |
229 | #define MS_MGC_VAL 0xC0ED0000 | 229 | #define MS_MGC_VAL 0xC0ED0000 |
230 | #define MS_MGC_MSK 0xffff0000 | 230 | #define MS_MGC_MSK 0xffff0000 |
231 | 231 | ||
232 | /* Inode flags - they have nothing to superblock flags now */ | 232 | /* Inode flags - they have nothing to superblock flags now */ |
233 | 233 | ||
234 | #define S_SYNC 1 /* Writes are synced at once */ | 234 | #define S_SYNC 1 /* Writes are synced at once */ |
235 | #define S_NOATIME 2 /* Do not update access times */ | 235 | #define S_NOATIME 2 /* Do not update access times */ |
236 | #define S_APPEND 4 /* Append-only file */ | 236 | #define S_APPEND 4 /* Append-only file */ |
237 | #define S_IMMUTABLE 8 /* Immutable file */ | 237 | #define S_IMMUTABLE 8 /* Immutable file */ |
238 | #define S_DEAD 16 /* removed, but still open directory */ | 238 | #define S_DEAD 16 /* removed, but still open directory */ |
239 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ | 239 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ |
240 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ | 240 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ |
241 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ | 241 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
242 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | 242 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ |
243 | #define S_PRIVATE 512 /* Inode is fs-internal */ | 243 | #define S_PRIVATE 512 /* Inode is fs-internal */ |
244 | #define S_IMA 1024 /* Inode has an associated IMA struct */ | 244 | #define S_IMA 1024 /* Inode has an associated IMA struct */ |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * Note that nosuid etc flags are inode-specific: setting some file-system | 247 | * Note that nosuid etc flags are inode-specific: setting some file-system |
248 | * flags just means all the inodes inherit those flags by default. It might be | 248 | * flags just means all the inodes inherit those flags by default. It might be |
249 | * possible to override it selectively if you really wanted to with some | 249 | * possible to override it selectively if you really wanted to with some |
250 | * ioctl() that is not currently implemented. | 250 | * ioctl() that is not currently implemented. |
251 | * | 251 | * |
252 | * Exception: MS_RDONLY is always applied to the entire file system. | 252 | * Exception: MS_RDONLY is always applied to the entire file system. |
253 | * | 253 | * |
254 | * Unfortunately, it is possible to change a filesystems flags with it mounted | 254 | * Unfortunately, it is possible to change a filesystems flags with it mounted |
255 | * with files in use. This means that all of the inodes will not have their | 255 | * with files in use. This means that all of the inodes will not have their |
256 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount | 256 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount |
257 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org | 257 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org |
258 | */ | 258 | */ |
259 | #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) | 259 | #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) |
260 | 260 | ||
261 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) | 261 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) |
262 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ | 262 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ |
263 | ((inode)->i_flags & S_SYNC)) | 263 | ((inode)->i_flags & S_SYNC)) |
264 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ | 264 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ |
265 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) | 265 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) |
266 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) | 266 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) |
267 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) | 267 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) |
268 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) | 268 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) |
269 | 269 | ||
270 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) | 270 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) |
271 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) | 271 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) |
272 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) | 272 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) |
273 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) | 273 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) |
274 | 274 | ||
275 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) | 275 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) |
276 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) | 276 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) |
277 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) | 277 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) |
278 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) | 278 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) |
279 | #define IS_IMA(inode) ((inode)->i_flags & S_IMA) | 279 | #define IS_IMA(inode) ((inode)->i_flags & S_IMA) |
280 | 280 | ||
281 | /* the read-only stuff doesn't really belong here, but any other place is | 281 | /* the read-only stuff doesn't really belong here, but any other place is |
282 | probably as bad and I don't want to create yet another include file. */ | 282 | probably as bad and I don't want to create yet another include file. */ |
283 | 283 | ||
284 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ | 284 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ |
285 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ | 285 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ |
286 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ | 286 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ |
287 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ | 287 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ |
288 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ | 288 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ |
289 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ | 289 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ |
290 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ | 290 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ |
291 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ | 291 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ |
292 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ | 292 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ |
293 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ | 293 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ |
294 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ | 294 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ |
295 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ | 295 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ |
296 | #if 0 | 296 | #if 0 |
297 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ | 297 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ |
298 | 298 | ||
299 | /* Some people are morons. Do not use sizeof! */ | 299 | /* Some people are morons. Do not use sizeof! */ |
300 | 300 | ||
301 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ | 301 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ |
302 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ | 302 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ |
303 | /* This was here just to show that the number is taken - | 303 | /* This was here just to show that the number is taken - |
304 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ | 304 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ |
305 | #endif | 305 | #endif |
306 | /* A jump here: 108-111 have been used for various private purposes. */ | 306 | /* A jump here: 108-111 have been used for various private purposes. */ |
307 | #define BLKBSZGET _IOR(0x12,112,size_t) | 307 | #define BLKBSZGET _IOR(0x12,112,size_t) |
308 | #define BLKBSZSET _IOW(0x12,113,size_t) | 308 | #define BLKBSZSET _IOW(0x12,113,size_t) |
309 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ | 309 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ |
310 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) | 310 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) |
311 | #define BLKTRACESTART _IO(0x12,116) | 311 | #define BLKTRACESTART _IO(0x12,116) |
312 | #define BLKTRACESTOP _IO(0x12,117) | 312 | #define BLKTRACESTOP _IO(0x12,117) |
313 | #define BLKTRACETEARDOWN _IO(0x12,118) | 313 | #define BLKTRACETEARDOWN _IO(0x12,118) |
314 | #define BLKDISCARD _IO(0x12,119) | 314 | #define BLKDISCARD _IO(0x12,119) |
315 | #define BLKIOMIN _IO(0x12,120) | 315 | #define BLKIOMIN _IO(0x12,120) |
316 | #define BLKIOOPT _IO(0x12,121) | 316 | #define BLKIOOPT _IO(0x12,121) |
317 | #define BLKALIGNOFF _IO(0x12,122) | 317 | #define BLKALIGNOFF _IO(0x12,122) |
318 | #define BLKPBSZGET _IO(0x12,123) | 318 | #define BLKPBSZGET _IO(0x12,123) |
319 | #define BLKDISCARDZEROES _IO(0x12,124) | 319 | #define BLKDISCARDZEROES _IO(0x12,124) |
320 | #define BLKSECDISCARD _IO(0x12,125) | 320 | #define BLKSECDISCARD _IO(0x12,125) |
321 | 321 | ||
322 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 322 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
323 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 323 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
324 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ | 324 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ |
325 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ | 325 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ |
326 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ | 326 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ |
327 | #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ | 327 | #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ |
328 | 328 | ||
329 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) | 329 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) |
330 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) | 330 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) |
331 | #define FS_IOC_GETVERSION _IOR('v', 1, long) | 331 | #define FS_IOC_GETVERSION _IOR('v', 1, long) |
332 | #define FS_IOC_SETVERSION _IOW('v', 2, long) | 332 | #define FS_IOC_SETVERSION _IOW('v', 2, long) |
333 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) | 333 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) |
334 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) | 334 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) |
335 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) | 335 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) |
336 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) | 336 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) |
337 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) | 337 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) |
338 | 338 | ||
339 | /* | 339 | /* |
340 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) | 340 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) |
341 | */ | 341 | */ |
342 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ | 342 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ |
343 | #define FS_UNRM_FL 0x00000002 /* Undelete */ | 343 | #define FS_UNRM_FL 0x00000002 /* Undelete */ |
344 | #define FS_COMPR_FL 0x00000004 /* Compress file */ | 344 | #define FS_COMPR_FL 0x00000004 /* Compress file */ |
345 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ | 345 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ |
346 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ | 346 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ |
347 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ | 347 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ |
348 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ | 348 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ |
349 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ | 349 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ |
350 | /* Reserved for compression usage... */ | 350 | /* Reserved for compression usage... */ |
351 | #define FS_DIRTY_FL 0x00000100 | 351 | #define FS_DIRTY_FL 0x00000100 |
352 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ | 352 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
353 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ | 353 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ |
354 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ | 354 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ |
355 | /* End compression flags --- maybe not all used */ | 355 | /* End compression flags --- maybe not all used */ |
356 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ | 356 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ |
357 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ | 357 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ |
358 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ | 358 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ |
359 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ | 359 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ |
360 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ | 360 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
361 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ | 361 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
362 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 362 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
363 | #define FS_EXTENT_FL 0x00080000 /* Extents */ | 363 | #define FS_EXTENT_FL 0x00080000 /* Extents */ |
364 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ | 364 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ |
365 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ | 365 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ |
366 | 366 | ||
367 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ | 367 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ |
368 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 368 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ |
369 | 369 | ||
370 | 370 | ||
371 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 | 371 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 |
372 | #define SYNC_FILE_RANGE_WRITE 2 | 372 | #define SYNC_FILE_RANGE_WRITE 2 |
373 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 | 373 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 |
374 | 374 | ||
375 | #ifdef __KERNEL__ | 375 | #ifdef __KERNEL__ |
376 | 376 | ||
377 | #include <linux/linkage.h> | 377 | #include <linux/linkage.h> |
378 | #include <linux/wait.h> | 378 | #include <linux/wait.h> |
379 | #include <linux/types.h> | 379 | #include <linux/types.h> |
380 | #include <linux/kdev_t.h> | 380 | #include <linux/kdev_t.h> |
381 | #include <linux/dcache.h> | 381 | #include <linux/dcache.h> |
382 | #include <linux/path.h> | 382 | #include <linux/path.h> |
383 | #include <linux/stat.h> | 383 | #include <linux/stat.h> |
384 | #include <linux/cache.h> | 384 | #include <linux/cache.h> |
385 | #include <linux/kobject.h> | 385 | #include <linux/kobject.h> |
386 | #include <linux/list.h> | 386 | #include <linux/list.h> |
387 | #include <linux/radix-tree.h> | 387 | #include <linux/radix-tree.h> |
388 | #include <linux/prio_tree.h> | 388 | #include <linux/prio_tree.h> |
389 | #include <linux/init.h> | 389 | #include <linux/init.h> |
390 | #include <linux/pid.h> | 390 | #include <linux/pid.h> |
391 | #include <linux/mutex.h> | 391 | #include <linux/mutex.h> |
392 | #include <linux/capability.h> | 392 | #include <linux/capability.h> |
393 | #include <linux/semaphore.h> | 393 | #include <linux/semaphore.h> |
394 | #include <linux/fiemap.h> | 394 | #include <linux/fiemap.h> |
395 | 395 | ||
396 | #include <asm/atomic.h> | 396 | #include <asm/atomic.h> |
397 | #include <asm/byteorder.h> | 397 | #include <asm/byteorder.h> |
398 | 398 | ||
399 | struct export_operations; | 399 | struct export_operations; |
400 | struct hd_geometry; | 400 | struct hd_geometry; |
401 | struct iovec; | 401 | struct iovec; |
402 | struct nameidata; | 402 | struct nameidata; |
403 | struct kiocb; | 403 | struct kiocb; |
404 | struct pipe_inode_info; | 404 | struct pipe_inode_info; |
405 | struct poll_table_struct; | 405 | struct poll_table_struct; |
406 | struct kstatfs; | 406 | struct kstatfs; |
407 | struct vm_area_struct; | 407 | struct vm_area_struct; |
408 | struct vfsmount; | 408 | struct vfsmount; |
409 | struct cred; | 409 | struct cred; |
410 | 410 | ||
411 | extern void __init inode_init(void); | 411 | extern void __init inode_init(void); |
412 | extern void __init inode_init_early(void); | 412 | extern void __init inode_init_early(void); |
413 | extern void __init files_init(unsigned long); | 413 | extern void __init files_init(unsigned long); |
414 | 414 | ||
415 | extern struct files_stat_struct files_stat; | 415 | extern struct files_stat_struct files_stat; |
416 | extern unsigned long get_max_files(void); | 416 | extern unsigned long get_max_files(void); |
417 | extern int sysctl_nr_open; | 417 | extern int sysctl_nr_open; |
418 | extern struct inodes_stat_t inodes_stat; | 418 | extern struct inodes_stat_t inodes_stat; |
419 | extern int leases_enable, lease_break_time; | 419 | extern int leases_enable, lease_break_time; |
420 | 420 | ||
421 | struct buffer_head; | 421 | struct buffer_head; |
422 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, | 422 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, |
423 | struct buffer_head *bh_result, int create); | 423 | struct buffer_head *bh_result, int create); |
424 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | 424 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, |
425 | ssize_t bytes, void *private, int ret, | 425 | ssize_t bytes, void *private, int ret, |
426 | bool is_async); | 426 | bool is_async); |
427 | 427 | ||
428 | /* | 428 | /* |
429 | * Attribute flags. These should be or-ed together to figure out what | 429 | * Attribute flags. These should be or-ed together to figure out what |
430 | * has been changed! | 430 | * has been changed! |
431 | */ | 431 | */ |
432 | #define ATTR_MODE (1 << 0) | 432 | #define ATTR_MODE (1 << 0) |
433 | #define ATTR_UID (1 << 1) | 433 | #define ATTR_UID (1 << 1) |
434 | #define ATTR_GID (1 << 2) | 434 | #define ATTR_GID (1 << 2) |
435 | #define ATTR_SIZE (1 << 3) | 435 | #define ATTR_SIZE (1 << 3) |
436 | #define ATTR_ATIME (1 << 4) | 436 | #define ATTR_ATIME (1 << 4) |
437 | #define ATTR_MTIME (1 << 5) | 437 | #define ATTR_MTIME (1 << 5) |
438 | #define ATTR_CTIME (1 << 6) | 438 | #define ATTR_CTIME (1 << 6) |
439 | #define ATTR_ATIME_SET (1 << 7) | 439 | #define ATTR_ATIME_SET (1 << 7) |
440 | #define ATTR_MTIME_SET (1 << 8) | 440 | #define ATTR_MTIME_SET (1 << 8) |
441 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ | 441 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ |
442 | #define ATTR_ATTR_FLAG (1 << 10) | 442 | #define ATTR_ATTR_FLAG (1 << 10) |
443 | #define ATTR_KILL_SUID (1 << 11) | 443 | #define ATTR_KILL_SUID (1 << 11) |
444 | #define ATTR_KILL_SGID (1 << 12) | 444 | #define ATTR_KILL_SGID (1 << 12) |
445 | #define ATTR_FILE (1 << 13) | 445 | #define ATTR_FILE (1 << 13) |
446 | #define ATTR_KILL_PRIV (1 << 14) | 446 | #define ATTR_KILL_PRIV (1 << 14) |
447 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ | 447 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ |
448 | #define ATTR_TIMES_SET (1 << 16) | 448 | #define ATTR_TIMES_SET (1 << 16) |
449 | 449 | ||
450 | /* | 450 | /* |
451 | * This is the Inode Attributes structure, used for notify_change(). It | 451 | * This is the Inode Attributes structure, used for notify_change(). It |
452 | * uses the above definitions as flags, to know which values have changed. | 452 | * uses the above definitions as flags, to know which values have changed. |
453 | * Also, in this manner, a Filesystem can look at only the values it cares | 453 | * Also, in this manner, a Filesystem can look at only the values it cares |
454 | * about. Basically, these are the attributes that the VFS layer can | 454 | * about. Basically, these are the attributes that the VFS layer can |
455 | * request to change from the FS layer. | 455 | * request to change from the FS layer. |
456 | * | 456 | * |
457 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 | 457 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 |
458 | */ | 458 | */ |
459 | struct iattr { | 459 | struct iattr { |
460 | unsigned int ia_valid; | 460 | unsigned int ia_valid; |
461 | umode_t ia_mode; | 461 | umode_t ia_mode; |
462 | uid_t ia_uid; | 462 | uid_t ia_uid; |
463 | gid_t ia_gid; | 463 | gid_t ia_gid; |
464 | loff_t ia_size; | 464 | loff_t ia_size; |
465 | struct timespec ia_atime; | 465 | struct timespec ia_atime; |
466 | struct timespec ia_mtime; | 466 | struct timespec ia_mtime; |
467 | struct timespec ia_ctime; | 467 | struct timespec ia_ctime; |
468 | 468 | ||
469 | /* | 469 | /* |
470 | * Not an attribute, but an auxilary info for filesystems wanting to | 470 | * Not an attribute, but an auxilary info for filesystems wanting to |
471 | * implement an ftruncate() like method. NOTE: filesystem should | 471 | * implement an ftruncate() like method. NOTE: filesystem should |
472 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). | 472 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). |
473 | */ | 473 | */ |
474 | struct file *ia_file; | 474 | struct file *ia_file; |
475 | }; | 475 | }; |
476 | 476 | ||
477 | /* | 477 | /* |
478 | * Includes for diskquotas. | 478 | * Includes for diskquotas. |
479 | */ | 479 | */ |
480 | #include <linux/quota.h> | 480 | #include <linux/quota.h> |
481 | 481 | ||
482 | /** | 482 | /** |
483 | * enum positive_aop_returns - aop return codes with specific semantics | 483 | * enum positive_aop_returns - aop return codes with specific semantics |
484 | * | 484 | * |
485 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 485 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
486 | * completed, that the page is still locked, and | 486 | * completed, that the page is still locked, and |
487 | * should be considered active. The VM uses this hint | 487 | * should be considered active. The VM uses this hint |
488 | * to return the page to the active list -- it won't | 488 | * to return the page to the active list -- it won't |
489 | * be a candidate for writeback again in the near | 489 | * be a candidate for writeback again in the near |
490 | * future. Other callers must be careful to unlock | 490 | * future. Other callers must be careful to unlock |
491 | * the page if they get this return. Returned by | 491 | * the page if they get this return. Returned by |
492 | * writepage(); | 492 | * writepage(); |
493 | * | 493 | * |
494 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 494 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
495 | * unlocked it and the page might have been truncated. | 495 | * unlocked it and the page might have been truncated. |
496 | * The caller should back up to acquiring a new page and | 496 | * The caller should back up to acquiring a new page and |
497 | * trying again. The aop will be taking reasonable | 497 | * trying again. The aop will be taking reasonable |
498 | * precautions not to livelock. If the caller held a page | 498 | * precautions not to livelock. If the caller held a page |
499 | * reference, it should drop it before retrying. Returned | 499 | * reference, it should drop it before retrying. Returned |
500 | * by readpage(). | 500 | * by readpage(). |
501 | * | 501 | * |
502 | * address_space_operation functions return these large constants to indicate | 502 | * address_space_operation functions return these large constants to indicate |
503 | * special semantics to the caller. These are much larger than the bytes in a | 503 | * special semantics to the caller. These are much larger than the bytes in a |
504 | * page to allow for functions that return the number of bytes operated on in a | 504 | * page to allow for functions that return the number of bytes operated on in a |
505 | * given page. | 505 | * given page. |
506 | */ | 506 | */ |
507 | 507 | ||
508 | enum positive_aop_returns { | 508 | enum positive_aop_returns { |
509 | AOP_WRITEPAGE_ACTIVATE = 0x80000, | 509 | AOP_WRITEPAGE_ACTIVATE = 0x80000, |
510 | AOP_TRUNCATED_PAGE = 0x80001, | 510 | AOP_TRUNCATED_PAGE = 0x80001, |
511 | }; | 511 | }; |
512 | 512 | ||
513 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ | 513 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ |
514 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ | 514 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ |
515 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct | 515 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct |
516 | * helper code (eg buffer layer) | 516 | * helper code (eg buffer layer) |
517 | * to clear GFP_FS from alloc */ | 517 | * to clear GFP_FS from alloc */ |
518 | 518 | ||
519 | /* | 519 | /* |
520 | * oh the beauties of C type declarations. | 520 | * oh the beauties of C type declarations. |
521 | */ | 521 | */ |
522 | struct page; | 522 | struct page; |
523 | struct address_space; | 523 | struct address_space; |
524 | struct writeback_control; | 524 | struct writeback_control; |
525 | 525 | ||
526 | struct iov_iter { | 526 | struct iov_iter { |
527 | const struct iovec *iov; | 527 | const struct iovec *iov; |
528 | unsigned long nr_segs; | 528 | unsigned long nr_segs; |
529 | size_t iov_offset; | 529 | size_t iov_offset; |
530 | size_t count; | 530 | size_t count; |
531 | }; | 531 | }; |
532 | 532 | ||
533 | size_t iov_iter_copy_from_user_atomic(struct page *page, | 533 | size_t iov_iter_copy_from_user_atomic(struct page *page, |
534 | struct iov_iter *i, unsigned long offset, size_t bytes); | 534 | struct iov_iter *i, unsigned long offset, size_t bytes); |
535 | size_t iov_iter_copy_from_user(struct page *page, | 535 | size_t iov_iter_copy_from_user(struct page *page, |
536 | struct iov_iter *i, unsigned long offset, size_t bytes); | 536 | struct iov_iter *i, unsigned long offset, size_t bytes); |
537 | void iov_iter_advance(struct iov_iter *i, size_t bytes); | 537 | void iov_iter_advance(struct iov_iter *i, size_t bytes); |
538 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); | 538 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); |
539 | size_t iov_iter_single_seg_count(struct iov_iter *i); | 539 | size_t iov_iter_single_seg_count(struct iov_iter *i); |
540 | 540 | ||
541 | static inline void iov_iter_init(struct iov_iter *i, | 541 | static inline void iov_iter_init(struct iov_iter *i, |
542 | const struct iovec *iov, unsigned long nr_segs, | 542 | const struct iovec *iov, unsigned long nr_segs, |
543 | size_t count, size_t written) | 543 | size_t count, size_t written) |
544 | { | 544 | { |
545 | i->iov = iov; | 545 | i->iov = iov; |
546 | i->nr_segs = nr_segs; | 546 | i->nr_segs = nr_segs; |
547 | i->iov_offset = 0; | 547 | i->iov_offset = 0; |
548 | i->count = count + written; | 548 | i->count = count + written; |
549 | 549 | ||
550 | iov_iter_advance(i, written); | 550 | iov_iter_advance(i, written); |
551 | } | 551 | } |
552 | 552 | ||
553 | static inline size_t iov_iter_count(struct iov_iter *i) | 553 | static inline size_t iov_iter_count(struct iov_iter *i) |
554 | { | 554 | { |
555 | return i->count; | 555 | return i->count; |
556 | } | 556 | } |
557 | 557 | ||
558 | /* | 558 | /* |
559 | * "descriptor" for what we're up to with a read. | 559 | * "descriptor" for what we're up to with a read. |
560 | * This allows us to use the same read code yet | 560 | * This allows us to use the same read code yet |
561 | * have multiple different users of the data that | 561 | * have multiple different users of the data that |
562 | * we read from a file. | 562 | * we read from a file. |
563 | * | 563 | * |
564 | * The simplest case just copies the data to user | 564 | * The simplest case just copies the data to user |
565 | * mode. | 565 | * mode. |
566 | */ | 566 | */ |
567 | typedef struct { | 567 | typedef struct { |
568 | size_t written; | 568 | size_t written; |
569 | size_t count; | 569 | size_t count; |
570 | union { | 570 | union { |
571 | char __user *buf; | 571 | char __user *buf; |
572 | void *data; | 572 | void *data; |
573 | } arg; | 573 | } arg; |
574 | int error; | 574 | int error; |
575 | } read_descriptor_t; | 575 | } read_descriptor_t; |
576 | 576 | ||
577 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, | 577 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, |
578 | unsigned long, unsigned long); | 578 | unsigned long, unsigned long); |
579 | 579 | ||
580 | struct address_space_operations { | 580 | struct address_space_operations { |
581 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 581 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
582 | int (*readpage)(struct file *, struct page *); | 582 | int (*readpage)(struct file *, struct page *); |
583 | void (*sync_page)(struct page *); | 583 | void (*sync_page)(struct page *); |
584 | 584 | ||
585 | /* Write back some dirty pages from this mapping. */ | 585 | /* Write back some dirty pages from this mapping. */ |
586 | int (*writepages)(struct address_space *, struct writeback_control *); | 586 | int (*writepages)(struct address_space *, struct writeback_control *); |
587 | 587 | ||
588 | /* Set a page dirty. Return true if this dirtied it */ | 588 | /* Set a page dirty. Return true if this dirtied it */ |
589 | int (*set_page_dirty)(struct page *page); | 589 | int (*set_page_dirty)(struct page *page); |
590 | 590 | ||
591 | int (*readpages)(struct file *filp, struct address_space *mapping, | 591 | int (*readpages)(struct file *filp, struct address_space *mapping, |
592 | struct list_head *pages, unsigned nr_pages); | 592 | struct list_head *pages, unsigned nr_pages); |
593 | 593 | ||
594 | int (*write_begin)(struct file *, struct address_space *mapping, | 594 | int (*write_begin)(struct file *, struct address_space *mapping, |
595 | loff_t pos, unsigned len, unsigned flags, | 595 | loff_t pos, unsigned len, unsigned flags, |
596 | struct page **pagep, void **fsdata); | 596 | struct page **pagep, void **fsdata); |
597 | int (*write_end)(struct file *, struct address_space *mapping, | 597 | int (*write_end)(struct file *, struct address_space *mapping, |
598 | loff_t pos, unsigned len, unsigned copied, | 598 | loff_t pos, unsigned len, unsigned copied, |
599 | struct page *page, void *fsdata); | 599 | struct page *page, void *fsdata); |
600 | 600 | ||
601 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ | 601 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ |
602 | sector_t (*bmap)(struct address_space *, sector_t); | 602 | sector_t (*bmap)(struct address_space *, sector_t); |
603 | void (*invalidatepage) (struct page *, unsigned long); | 603 | void (*invalidatepage) (struct page *, unsigned long); |
604 | int (*releasepage) (struct page *, gfp_t); | 604 | int (*releasepage) (struct page *, gfp_t); |
605 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 605 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
606 | loff_t offset, unsigned long nr_segs); | 606 | loff_t offset, unsigned long nr_segs); |
607 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, | 607 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, |
608 | void **, unsigned long *); | 608 | void **, unsigned long *); |
609 | /* migrate the contents of a page to the specified target */ | 609 | /* migrate the contents of a page to the specified target */ |
610 | int (*migratepage) (struct address_space *, | 610 | int (*migratepage) (struct address_space *, |
611 | struct page *, struct page *); | 611 | struct page *, struct page *); |
612 | int (*launder_page) (struct page *); | 612 | int (*launder_page) (struct page *); |
613 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, | 613 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, |
614 | unsigned long); | 614 | unsigned long); |
615 | int (*error_remove_page)(struct address_space *, struct page *); | 615 | int (*error_remove_page)(struct address_space *, struct page *); |
616 | }; | 616 | }; |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * pagecache_write_begin/pagecache_write_end must be used by general code | 619 | * pagecache_write_begin/pagecache_write_end must be used by general code |
620 | * to write into the pagecache. | 620 | * to write into the pagecache. |
621 | */ | 621 | */ |
622 | int pagecache_write_begin(struct file *, struct address_space *mapping, | 622 | int pagecache_write_begin(struct file *, struct address_space *mapping, |
623 | loff_t pos, unsigned len, unsigned flags, | 623 | loff_t pos, unsigned len, unsigned flags, |
624 | struct page **pagep, void **fsdata); | 624 | struct page **pagep, void **fsdata); |
625 | 625 | ||
626 | int pagecache_write_end(struct file *, struct address_space *mapping, | 626 | int pagecache_write_end(struct file *, struct address_space *mapping, |
627 | loff_t pos, unsigned len, unsigned copied, | 627 | loff_t pos, unsigned len, unsigned copied, |
628 | struct page *page, void *fsdata); | 628 | struct page *page, void *fsdata); |
629 | 629 | ||
630 | struct backing_dev_info; | 630 | struct backing_dev_info; |
631 | struct address_space { | 631 | struct address_space { |
632 | struct inode *host; /* owner: inode, block_device */ | 632 | struct inode *host; /* owner: inode, block_device */ |
633 | struct radix_tree_root page_tree; /* radix tree of all pages */ | 633 | struct radix_tree_root page_tree; /* radix tree of all pages */ |
634 | spinlock_t tree_lock; /* and lock protecting it */ | 634 | spinlock_t tree_lock; /* and lock protecting it */ |
635 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ | 635 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ |
636 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ | 636 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ |
637 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ | 637 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ |
638 | spinlock_t i_mmap_lock; /* protect tree, count, list */ | 638 | spinlock_t i_mmap_lock; /* protect tree, count, list */ |
639 | unsigned int truncate_count; /* Cover race condition with truncate */ | 639 | unsigned int truncate_count; /* Cover race condition with truncate */ |
640 | unsigned long nrpages; /* number of total pages */ | 640 | unsigned long nrpages; /* number of total pages */ |
641 | pgoff_t writeback_index;/* writeback starts here */ | 641 | pgoff_t writeback_index;/* writeback starts here */ |
642 | const struct address_space_operations *a_ops; /* methods */ | 642 | const struct address_space_operations *a_ops; /* methods */ |
643 | unsigned long flags; /* error bits/gfp mask */ | 643 | unsigned long flags; /* error bits/gfp mask */ |
644 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ | 644 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ |
645 | spinlock_t private_lock; /* for use by the address_space */ | 645 | spinlock_t private_lock; /* for use by the address_space */ |
646 | struct list_head private_list; /* ditto */ | 646 | struct list_head private_list; /* ditto */ |
647 | struct address_space *assoc_mapping; /* ditto */ | 647 | struct address_space *assoc_mapping; /* ditto */ |
648 | } __attribute__((aligned(sizeof(long)))); | 648 | } __attribute__((aligned(sizeof(long)))); |
649 | /* | 649 | /* |
650 | * On most architectures that alignment is already the case; but | 650 | * On most architectures that alignment is already the case; but |
651 | * must be enforced here for CRIS, to let the least signficant bit | 651 | * must be enforced here for CRIS, to let the least signficant bit |
652 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. | 652 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. |
653 | */ | 653 | */ |
654 | 654 | ||
655 | struct block_device { | 655 | struct block_device { |
656 | dev_t bd_dev; /* not a kdev_t - it's a search key */ | 656 | dev_t bd_dev; /* not a kdev_t - it's a search key */ |
657 | struct inode * bd_inode; /* will die */ | 657 | struct inode * bd_inode; /* will die */ |
658 | struct super_block * bd_super; | 658 | struct super_block * bd_super; |
659 | int bd_openers; | 659 | int bd_openers; |
660 | struct mutex bd_mutex; /* open/close mutex */ | 660 | struct mutex bd_mutex; /* open/close mutex */ |
661 | struct list_head bd_inodes; | 661 | struct list_head bd_inodes; |
662 | void * bd_claiming; | 662 | void * bd_claiming; |
663 | void * bd_holder; | 663 | void * bd_holder; |
664 | int bd_holders; | 664 | int bd_holders; |
665 | #ifdef CONFIG_SYSFS | 665 | #ifdef CONFIG_SYSFS |
666 | struct list_head bd_holder_list; | 666 | struct list_head bd_holder_list; |
667 | #endif | 667 | #endif |
668 | struct block_device * bd_contains; | 668 | struct block_device * bd_contains; |
669 | unsigned bd_block_size; | 669 | unsigned bd_block_size; |
670 | struct hd_struct * bd_part; | 670 | struct hd_struct * bd_part; |
671 | /* number of times partitions within this device have been opened. */ | 671 | /* number of times partitions within this device have been opened. */ |
672 | unsigned bd_part_count; | 672 | unsigned bd_part_count; |
673 | int bd_invalidated; | 673 | int bd_invalidated; |
674 | struct gendisk * bd_disk; | 674 | struct gendisk * bd_disk; |
675 | struct list_head bd_list; | 675 | struct list_head bd_list; |
676 | /* | 676 | /* |
677 | * Private data. You must have bd_claim'ed the block_device | 677 | * Private data. You must have bd_claim'ed the block_device |
678 | * to use this. NOTE: bd_claim allows an owner to claim | 678 | * to use this. NOTE: bd_claim allows an owner to claim |
679 | * the same device multiple times, the owner must take special | 679 | * the same device multiple times, the owner must take special |
680 | * care to not mess up bd_private for that case. | 680 | * care to not mess up bd_private for that case. |
681 | */ | 681 | */ |
682 | unsigned long bd_private; | 682 | unsigned long bd_private; |
683 | 683 | ||
684 | /* The counter of freeze processes */ | 684 | /* The counter of freeze processes */ |
685 | int bd_fsfreeze_count; | 685 | int bd_fsfreeze_count; |
686 | /* Mutex for freeze */ | 686 | /* Mutex for freeze */ |
687 | struct mutex bd_fsfreeze_mutex; | 687 | struct mutex bd_fsfreeze_mutex; |
688 | }; | 688 | }; |
689 | 689 | ||
690 | /* | 690 | /* |
691 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache | 691 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache |
692 | * radix trees | 692 | * radix trees |
693 | */ | 693 | */ |
694 | #define PAGECACHE_TAG_DIRTY 0 | 694 | #define PAGECACHE_TAG_DIRTY 0 |
695 | #define PAGECACHE_TAG_WRITEBACK 1 | 695 | #define PAGECACHE_TAG_WRITEBACK 1 |
696 | #define PAGECACHE_TAG_TOWRITE 2 | 696 | #define PAGECACHE_TAG_TOWRITE 2 |
697 | 697 | ||
698 | int mapping_tagged(struct address_space *mapping, int tag); | 698 | int mapping_tagged(struct address_space *mapping, int tag); |
699 | 699 | ||
700 | /* | 700 | /* |
701 | * Might pages of this file be mapped into userspace? | 701 | * Might pages of this file be mapped into userspace? |
702 | */ | 702 | */ |
703 | static inline int mapping_mapped(struct address_space *mapping) | 703 | static inline int mapping_mapped(struct address_space *mapping) |
704 | { | 704 | { |
705 | return !prio_tree_empty(&mapping->i_mmap) || | 705 | return !prio_tree_empty(&mapping->i_mmap) || |
706 | !list_empty(&mapping->i_mmap_nonlinear); | 706 | !list_empty(&mapping->i_mmap_nonlinear); |
707 | } | 707 | } |
708 | 708 | ||
709 | /* | 709 | /* |
710 | * Might pages of this file have been modified in userspace? | 710 | * Might pages of this file have been modified in userspace? |
711 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff | 711 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff |
712 | * marks vma as VM_SHARED if it is shared, and the file was opened for | 712 | * marks vma as VM_SHARED if it is shared, and the file was opened for |
713 | * writing i.e. vma may be mprotected writable even if now readonly. | 713 | * writing i.e. vma may be mprotected writable even if now readonly. |
714 | */ | 714 | */ |
715 | static inline int mapping_writably_mapped(struct address_space *mapping) | 715 | static inline int mapping_writably_mapped(struct address_space *mapping) |
716 | { | 716 | { |
717 | return mapping->i_mmap_writable != 0; | 717 | return mapping->i_mmap_writable != 0; |
718 | } | 718 | } |
719 | 719 | ||
720 | /* | 720 | /* |
721 | * Use sequence counter to get consistent i_size on 32-bit processors. | 721 | * Use sequence counter to get consistent i_size on 32-bit processors. |
722 | */ | 722 | */ |
723 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 723 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
724 | #include <linux/seqlock.h> | 724 | #include <linux/seqlock.h> |
725 | #define __NEED_I_SIZE_ORDERED | 725 | #define __NEED_I_SIZE_ORDERED |
726 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) | 726 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) |
727 | #else | 727 | #else |
728 | #define i_size_ordered_init(inode) do { } while (0) | 728 | #define i_size_ordered_init(inode) do { } while (0) |
729 | #endif | 729 | #endif |
730 | 730 | ||
731 | struct posix_acl; | 731 | struct posix_acl; |
732 | #define ACL_NOT_CACHED ((void *)(-1)) | 732 | #define ACL_NOT_CACHED ((void *)(-1)) |
733 | 733 | ||
734 | struct inode { | 734 | struct inode { |
735 | struct hlist_node i_hash; | 735 | struct hlist_node i_hash; |
736 | struct list_head i_wb_list; /* backing dev IO list */ | 736 | struct list_head i_wb_list; /* backing dev IO list */ |
737 | struct list_head i_lru; /* inode LRU list */ | 737 | struct list_head i_lru; /* inode LRU list */ |
738 | struct list_head i_sb_list; | 738 | struct list_head i_sb_list; |
739 | struct list_head i_dentry; | 739 | struct list_head i_dentry; |
740 | unsigned long i_ino; | 740 | unsigned long i_ino; |
741 | atomic_t i_count; | 741 | atomic_t i_count; |
742 | unsigned int i_nlink; | 742 | unsigned int i_nlink; |
743 | uid_t i_uid; | 743 | uid_t i_uid; |
744 | gid_t i_gid; | 744 | gid_t i_gid; |
745 | dev_t i_rdev; | 745 | dev_t i_rdev; |
746 | unsigned int i_blkbits; | 746 | unsigned int i_blkbits; |
747 | u64 i_version; | 747 | u64 i_version; |
748 | loff_t i_size; | 748 | loff_t i_size; |
749 | #ifdef __NEED_I_SIZE_ORDERED | 749 | #ifdef __NEED_I_SIZE_ORDERED |
750 | seqcount_t i_size_seqcount; | 750 | seqcount_t i_size_seqcount; |
751 | #endif | 751 | #endif |
752 | struct timespec i_atime; | 752 | struct timespec i_atime; |
753 | struct timespec i_mtime; | 753 | struct timespec i_mtime; |
754 | struct timespec i_ctime; | 754 | struct timespec i_ctime; |
755 | blkcnt_t i_blocks; | 755 | blkcnt_t i_blocks; |
756 | unsigned short i_bytes; | 756 | unsigned short i_bytes; |
757 | umode_t i_mode; | 757 | umode_t i_mode; |
758 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ | 758 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ |
759 | struct mutex i_mutex; | 759 | struct mutex i_mutex; |
760 | struct rw_semaphore i_alloc_sem; | 760 | struct rw_semaphore i_alloc_sem; |
761 | const struct inode_operations *i_op; | 761 | const struct inode_operations *i_op; |
762 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ | 762 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ |
763 | struct super_block *i_sb; | 763 | struct super_block *i_sb; |
764 | struct file_lock *i_flock; | 764 | struct file_lock *i_flock; |
765 | struct address_space *i_mapping; | 765 | struct address_space *i_mapping; |
766 | struct address_space i_data; | 766 | struct address_space i_data; |
767 | #ifdef CONFIG_QUOTA | 767 | #ifdef CONFIG_QUOTA |
768 | struct dquot *i_dquot[MAXQUOTAS]; | 768 | struct dquot *i_dquot[MAXQUOTAS]; |
769 | #endif | 769 | #endif |
770 | struct list_head i_devices; | 770 | struct list_head i_devices; |
771 | union { | 771 | union { |
772 | struct pipe_inode_info *i_pipe; | 772 | struct pipe_inode_info *i_pipe; |
773 | struct block_device *i_bdev; | 773 | struct block_device *i_bdev; |
774 | struct cdev *i_cdev; | 774 | struct cdev *i_cdev; |
775 | }; | 775 | }; |
776 | 776 | ||
777 | __u32 i_generation; | 777 | __u32 i_generation; |
778 | 778 | ||
779 | #ifdef CONFIG_FSNOTIFY | 779 | #ifdef CONFIG_FSNOTIFY |
780 | __u32 i_fsnotify_mask; /* all events this inode cares about */ | 780 | __u32 i_fsnotify_mask; /* all events this inode cares about */ |
781 | struct hlist_head i_fsnotify_marks; | 781 | struct hlist_head i_fsnotify_marks; |
782 | #endif | 782 | #endif |
783 | 783 | ||
784 | unsigned long i_state; | 784 | unsigned long i_state; |
785 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 785 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
786 | 786 | ||
787 | unsigned int i_flags; | 787 | unsigned int i_flags; |
788 | 788 | ||
789 | #ifdef CONFIG_IMA | 789 | #ifdef CONFIG_IMA |
790 | /* protected by i_lock */ | 790 | /* protected by i_lock */ |
791 | unsigned int i_readcount; /* struct files open RO */ | 791 | unsigned int i_readcount; /* struct files open RO */ |
792 | #endif | 792 | #endif |
793 | atomic_t i_writecount; | 793 | atomic_t i_writecount; |
794 | #ifdef CONFIG_SECURITY | 794 | #ifdef CONFIG_SECURITY |
795 | void *i_security; | 795 | void *i_security; |
796 | #endif | 796 | #endif |
797 | #ifdef CONFIG_FS_POSIX_ACL | 797 | #ifdef CONFIG_FS_POSIX_ACL |
798 | struct posix_acl *i_acl; | 798 | struct posix_acl *i_acl; |
799 | struct posix_acl *i_default_acl; | 799 | struct posix_acl *i_default_acl; |
800 | #endif | 800 | #endif |
801 | void *i_private; /* fs or device private pointer */ | 801 | void *i_private; /* fs or device private pointer */ |
802 | }; | 802 | }; |
803 | 803 | ||
804 | static inline int inode_unhashed(struct inode *inode) | 804 | static inline int inode_unhashed(struct inode *inode) |
805 | { | 805 | { |
806 | return hlist_unhashed(&inode->i_hash); | 806 | return hlist_unhashed(&inode->i_hash); |
807 | } | 807 | } |
808 | 808 | ||
809 | /* | 809 | /* |
810 | * inode->i_mutex nesting subclasses for the lock validator: | 810 | * inode->i_mutex nesting subclasses for the lock validator: |
811 | * | 811 | * |
812 | * 0: the object of the current VFS operation | 812 | * 0: the object of the current VFS operation |
813 | * 1: parent | 813 | * 1: parent |
814 | * 2: child/target | 814 | * 2: child/target |
815 | * 3: quota file | 815 | * 3: quota file |
816 | * | 816 | * |
817 | * The locking order between these classes is | 817 | * The locking order between these classes is |
818 | * parent -> child -> normal -> xattr -> quota | 818 | * parent -> child -> normal -> xattr -> quota |
819 | */ | 819 | */ |
820 | enum inode_i_mutex_lock_class | 820 | enum inode_i_mutex_lock_class |
821 | { | 821 | { |
822 | I_MUTEX_NORMAL, | 822 | I_MUTEX_NORMAL, |
823 | I_MUTEX_PARENT, | 823 | I_MUTEX_PARENT, |
824 | I_MUTEX_CHILD, | 824 | I_MUTEX_CHILD, |
825 | I_MUTEX_XATTR, | 825 | I_MUTEX_XATTR, |
826 | I_MUTEX_QUOTA | 826 | I_MUTEX_QUOTA |
827 | }; | 827 | }; |
828 | 828 | ||
829 | /* | 829 | /* |
830 | * NOTE: in a 32bit arch with a preemptable kernel and | 830 | * NOTE: in a 32bit arch with a preemptable kernel and |
831 | * an UP compile the i_size_read/write must be atomic | 831 | * an UP compile the i_size_read/write must be atomic |
832 | * with respect to the local cpu (unlike with preempt disabled), | 832 | * with respect to the local cpu (unlike with preempt disabled), |
833 | * but they don't need to be atomic with respect to other cpus like in | 833 | * but they don't need to be atomic with respect to other cpus like in |
834 | * true SMP (so they need either to either locally disable irq around | 834 | * true SMP (so they need either to either locally disable irq around |
835 | * the read or for example on x86 they can be still implemented as a | 835 | * the read or for example on x86 they can be still implemented as a |
836 | * cmpxchg8b without the need of the lock prefix). For SMP compiles | 836 | * cmpxchg8b without the need of the lock prefix). For SMP compiles |
837 | * and 64bit archs it makes no difference if preempt is enabled or not. | 837 | * and 64bit archs it makes no difference if preempt is enabled or not. |
838 | */ | 838 | */ |
839 | static inline loff_t i_size_read(const struct inode *inode) | 839 | static inline loff_t i_size_read(const struct inode *inode) |
840 | { | 840 | { |
841 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 841 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
842 | loff_t i_size; | 842 | loff_t i_size; |
843 | unsigned int seq; | 843 | unsigned int seq; |
844 | 844 | ||
845 | do { | 845 | do { |
846 | seq = read_seqcount_begin(&inode->i_size_seqcount); | 846 | seq = read_seqcount_begin(&inode->i_size_seqcount); |
847 | i_size = inode->i_size; | 847 | i_size = inode->i_size; |
848 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); | 848 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); |
849 | return i_size; | 849 | return i_size; |
850 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 850 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
851 | loff_t i_size; | 851 | loff_t i_size; |
852 | 852 | ||
853 | preempt_disable(); | 853 | preempt_disable(); |
854 | i_size = inode->i_size; | 854 | i_size = inode->i_size; |
855 | preempt_enable(); | 855 | preempt_enable(); |
856 | return i_size; | 856 | return i_size; |
857 | #else | 857 | #else |
858 | return inode->i_size; | 858 | return inode->i_size; |
859 | #endif | 859 | #endif |
860 | } | 860 | } |
861 | 861 | ||
862 | /* | 862 | /* |
863 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it | 863 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it |
864 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount | 864 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount |
865 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. | 865 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. |
866 | */ | 866 | */ |
867 | static inline void i_size_write(struct inode *inode, loff_t i_size) | 867 | static inline void i_size_write(struct inode *inode, loff_t i_size) |
868 | { | 868 | { |
869 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 869 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
870 | write_seqcount_begin(&inode->i_size_seqcount); | 870 | write_seqcount_begin(&inode->i_size_seqcount); |
871 | inode->i_size = i_size; | 871 | inode->i_size = i_size; |
872 | write_seqcount_end(&inode->i_size_seqcount); | 872 | write_seqcount_end(&inode->i_size_seqcount); |
873 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 873 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
874 | preempt_disable(); | 874 | preempt_disable(); |
875 | inode->i_size = i_size; | 875 | inode->i_size = i_size; |
876 | preempt_enable(); | 876 | preempt_enable(); |
877 | #else | 877 | #else |
878 | inode->i_size = i_size; | 878 | inode->i_size = i_size; |
879 | #endif | 879 | #endif |
880 | } | 880 | } |
881 | 881 | ||
882 | static inline unsigned iminor(const struct inode *inode) | 882 | static inline unsigned iminor(const struct inode *inode) |
883 | { | 883 | { |
884 | return MINOR(inode->i_rdev); | 884 | return MINOR(inode->i_rdev); |
885 | } | 885 | } |
886 | 886 | ||
887 | static inline unsigned imajor(const struct inode *inode) | 887 | static inline unsigned imajor(const struct inode *inode) |
888 | { | 888 | { |
889 | return MAJOR(inode->i_rdev); | 889 | return MAJOR(inode->i_rdev); |
890 | } | 890 | } |
891 | 891 | ||
892 | extern struct block_device *I_BDEV(struct inode *inode); | 892 | extern struct block_device *I_BDEV(struct inode *inode); |
893 | 893 | ||
894 | struct fown_struct { | 894 | struct fown_struct { |
895 | rwlock_t lock; /* protects pid, uid, euid fields */ | 895 | rwlock_t lock; /* protects pid, uid, euid fields */ |
896 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ | 896 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ |
897 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ | 897 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ |
898 | uid_t uid, euid; /* uid/euid of process setting the owner */ | 898 | uid_t uid, euid; /* uid/euid of process setting the owner */ |
899 | int signum; /* posix.1b rt signal to be delivered on IO */ | 899 | int signum; /* posix.1b rt signal to be delivered on IO */ |
900 | }; | 900 | }; |
901 | 901 | ||
902 | /* | 902 | /* |
903 | * Track a single file's readahead state | 903 | * Track a single file's readahead state |
904 | */ | 904 | */ |
905 | struct file_ra_state { | 905 | struct file_ra_state { |
906 | pgoff_t start; /* where readahead started */ | 906 | pgoff_t start; /* where readahead started */ |
907 | unsigned int size; /* # of readahead pages */ | 907 | unsigned int size; /* # of readahead pages */ |
908 | unsigned int async_size; /* do asynchronous readahead when | 908 | unsigned int async_size; /* do asynchronous readahead when |
909 | there are only # of pages ahead */ | 909 | there are only # of pages ahead */ |
910 | 910 | ||
911 | unsigned int ra_pages; /* Maximum readahead window */ | 911 | unsigned int ra_pages; /* Maximum readahead window */ |
912 | unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ | 912 | unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ |
913 | loff_t prev_pos; /* Cache last read() position */ | 913 | loff_t prev_pos; /* Cache last read() position */ |
914 | }; | 914 | }; |
915 | 915 | ||
916 | /* | 916 | /* |
917 | * Check if @index falls in the readahead windows. | 917 | * Check if @index falls in the readahead windows. |
918 | */ | 918 | */ |
919 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) | 919 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) |
920 | { | 920 | { |
921 | return (index >= ra->start && | 921 | return (index >= ra->start && |
922 | index < ra->start + ra->size); | 922 | index < ra->start + ra->size); |
923 | } | 923 | } |
924 | 924 | ||
925 | #define FILE_MNT_WRITE_TAKEN 1 | 925 | #define FILE_MNT_WRITE_TAKEN 1 |
926 | #define FILE_MNT_WRITE_RELEASED 2 | 926 | #define FILE_MNT_WRITE_RELEASED 2 |
927 | 927 | ||
928 | struct file { | 928 | struct file { |
929 | /* | 929 | /* |
930 | * fu_list becomes invalid after file_free is called and queued via | 930 | * fu_list becomes invalid after file_free is called and queued via |
931 | * fu_rcuhead for RCU freeing | 931 | * fu_rcuhead for RCU freeing |
932 | */ | 932 | */ |
933 | union { | 933 | union { |
934 | struct list_head fu_list; | 934 | struct list_head fu_list; |
935 | struct rcu_head fu_rcuhead; | 935 | struct rcu_head fu_rcuhead; |
936 | } f_u; | 936 | } f_u; |
937 | struct path f_path; | 937 | struct path f_path; |
938 | #define f_dentry f_path.dentry | 938 | #define f_dentry f_path.dentry |
939 | #define f_vfsmnt f_path.mnt | 939 | #define f_vfsmnt f_path.mnt |
940 | const struct file_operations *f_op; | 940 | const struct file_operations *f_op; |
941 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ | 941 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ |
942 | #ifdef CONFIG_SMP | 942 | #ifdef CONFIG_SMP |
943 | int f_sb_list_cpu; | 943 | int f_sb_list_cpu; |
944 | #endif | 944 | #endif |
945 | atomic_long_t f_count; | 945 | atomic_long_t f_count; |
946 | unsigned int f_flags; | 946 | unsigned int f_flags; |
947 | fmode_t f_mode; | 947 | fmode_t f_mode; |
948 | loff_t f_pos; | 948 | loff_t f_pos; |
949 | struct fown_struct f_owner; | 949 | struct fown_struct f_owner; |
950 | const struct cred *f_cred; | 950 | const struct cred *f_cred; |
951 | struct file_ra_state f_ra; | 951 | struct file_ra_state f_ra; |
952 | 952 | ||
953 | u64 f_version; | 953 | u64 f_version; |
954 | #ifdef CONFIG_SECURITY | 954 | #ifdef CONFIG_SECURITY |
955 | void *f_security; | 955 | void *f_security; |
956 | #endif | 956 | #endif |
957 | /* needed for tty driver, and maybe others */ | 957 | /* needed for tty driver, and maybe others */ |
958 | void *private_data; | 958 | void *private_data; |
959 | 959 | ||
960 | #ifdef CONFIG_EPOLL | 960 | #ifdef CONFIG_EPOLL |
961 | /* Used by fs/eventpoll.c to link all the hooks to this file */ | 961 | /* Used by fs/eventpoll.c to link all the hooks to this file */ |
962 | struct list_head f_ep_links; | 962 | struct list_head f_ep_links; |
963 | #endif /* #ifdef CONFIG_EPOLL */ | 963 | #endif /* #ifdef CONFIG_EPOLL */ |
964 | struct address_space *f_mapping; | 964 | struct address_space *f_mapping; |
965 | #ifdef CONFIG_DEBUG_WRITECOUNT | 965 | #ifdef CONFIG_DEBUG_WRITECOUNT |
966 | unsigned long f_mnt_write_state; | 966 | unsigned long f_mnt_write_state; |
967 | #endif | 967 | #endif |
968 | }; | 968 | }; |
969 | 969 | ||
970 | #define get_file(x) atomic_long_inc(&(x)->f_count) | 970 | #define get_file(x) atomic_long_inc(&(x)->f_count) |
971 | #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) | 971 | #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) |
972 | #define file_count(x) atomic_long_read(&(x)->f_count) | 972 | #define file_count(x) atomic_long_read(&(x)->f_count) |
973 | 973 | ||
974 | #ifdef CONFIG_DEBUG_WRITECOUNT | 974 | #ifdef CONFIG_DEBUG_WRITECOUNT |
975 | static inline void file_take_write(struct file *f) | 975 | static inline void file_take_write(struct file *f) |
976 | { | 976 | { |
977 | WARN_ON(f->f_mnt_write_state != 0); | 977 | WARN_ON(f->f_mnt_write_state != 0); |
978 | f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; | 978 | f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; |
979 | } | 979 | } |
980 | static inline void file_release_write(struct file *f) | 980 | static inline void file_release_write(struct file *f) |
981 | { | 981 | { |
982 | f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; | 982 | f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; |
983 | } | 983 | } |
984 | static inline void file_reset_write(struct file *f) | 984 | static inline void file_reset_write(struct file *f) |
985 | { | 985 | { |
986 | f->f_mnt_write_state = 0; | 986 | f->f_mnt_write_state = 0; |
987 | } | 987 | } |
988 | static inline void file_check_state(struct file *f) | 988 | static inline void file_check_state(struct file *f) |
989 | { | 989 | { |
990 | /* | 990 | /* |
991 | * At this point, either both or neither of these bits | 991 | * At this point, either both or neither of these bits |
992 | * should be set. | 992 | * should be set. |
993 | */ | 993 | */ |
994 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); | 994 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); |
995 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); | 995 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); |
996 | } | 996 | } |
997 | static inline int file_check_writeable(struct file *f) | 997 | static inline int file_check_writeable(struct file *f) |
998 | { | 998 | { |
999 | if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) | 999 | if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) |
1000 | return 0; | 1000 | return 0; |
1001 | printk(KERN_WARNING "writeable file with no " | 1001 | printk(KERN_WARNING "writeable file with no " |
1002 | "mnt_want_write()\n"); | 1002 | "mnt_want_write()\n"); |
1003 | WARN_ON(1); | 1003 | WARN_ON(1); |
1004 | return -EINVAL; | 1004 | return -EINVAL; |
1005 | } | 1005 | } |
1006 | #else /* !CONFIG_DEBUG_WRITECOUNT */ | 1006 | #else /* !CONFIG_DEBUG_WRITECOUNT */ |
1007 | static inline void file_take_write(struct file *filp) {} | 1007 | static inline void file_take_write(struct file *filp) {} |
1008 | static inline void file_release_write(struct file *filp) {} | 1008 | static inline void file_release_write(struct file *filp) {} |
1009 | static inline void file_reset_write(struct file *filp) {} | 1009 | static inline void file_reset_write(struct file *filp) {} |
1010 | static inline void file_check_state(struct file *filp) {} | 1010 | static inline void file_check_state(struct file *filp) {} |
1011 | static inline int file_check_writeable(struct file *filp) | 1011 | static inline int file_check_writeable(struct file *filp) |
1012 | { | 1012 | { |
1013 | return 0; | 1013 | return 0; |
1014 | } | 1014 | } |
1015 | #endif /* CONFIG_DEBUG_WRITECOUNT */ | 1015 | #endif /* CONFIG_DEBUG_WRITECOUNT */ |
1016 | 1016 | ||
1017 | #define MAX_NON_LFS ((1UL<<31) - 1) | 1017 | #define MAX_NON_LFS ((1UL<<31) - 1) |
1018 | 1018 | ||
1019 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 1019 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
1020 | limits, otherwise bad things can happen in VM. */ | 1020 | limits, otherwise bad things can happen in VM. */ |
1021 | #if BITS_PER_LONG==32 | 1021 | #if BITS_PER_LONG==32 |
1022 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 1022 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
1023 | #elif BITS_PER_LONG==64 | 1023 | #elif BITS_PER_LONG==64 |
1024 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL | 1024 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL |
1025 | #endif | 1025 | #endif |
1026 | 1026 | ||
1027 | #define FL_POSIX 1 | 1027 | #define FL_POSIX 1 |
1028 | #define FL_FLOCK 2 | 1028 | #define FL_FLOCK 2 |
1029 | #define FL_ACCESS 8 /* not trying to lock, just looking */ | 1029 | #define FL_ACCESS 8 /* not trying to lock, just looking */ |
1030 | #define FL_EXISTS 16 /* when unlocking, test for existence */ | 1030 | #define FL_EXISTS 16 /* when unlocking, test for existence */ |
1031 | #define FL_LEASE 32 /* lease held on this file */ | 1031 | #define FL_LEASE 32 /* lease held on this file */ |
1032 | #define FL_CLOSE 64 /* unlock on close */ | 1032 | #define FL_CLOSE 64 /* unlock on close */ |
1033 | #define FL_SLEEP 128 /* A blocking lock */ | 1033 | #define FL_SLEEP 128 /* A blocking lock */ |
1034 | 1034 | ||
1035 | /* | 1035 | /* |
1036 | * Special return value from posix_lock_file() and vfs_lock_file() for | 1036 | * Special return value from posix_lock_file() and vfs_lock_file() for |
1037 | * asynchronous locking. | 1037 | * asynchronous locking. |
1038 | */ | 1038 | */ |
1039 | #define FILE_LOCK_DEFERRED 1 | 1039 | #define FILE_LOCK_DEFERRED 1 |
1040 | 1040 | ||
1041 | /* | 1041 | /* |
1042 | * The POSIX file lock owner is determined by | 1042 | * The POSIX file lock owner is determined by |
1043 | * the "struct files_struct" in the thread group | 1043 | * the "struct files_struct" in the thread group |
1044 | * (or NULL for no owner - BSD locks). | 1044 | * (or NULL for no owner - BSD locks). |
1045 | * | 1045 | * |
1046 | * Lockd stuffs a "host" pointer into this. | 1046 | * Lockd stuffs a "host" pointer into this. |
1047 | */ | 1047 | */ |
1048 | typedef struct files_struct *fl_owner_t; | 1048 | typedef struct files_struct *fl_owner_t; |
1049 | 1049 | ||
1050 | struct file_lock_operations { | 1050 | struct file_lock_operations { |
1051 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 1051 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
1052 | void (*fl_release_private)(struct file_lock *); | 1052 | void (*fl_release_private)(struct file_lock *); |
1053 | }; | 1053 | }; |
1054 | 1054 | ||
1055 | struct lock_manager_operations { | 1055 | struct lock_manager_operations { |
1056 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | 1056 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); |
1057 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | 1057 | void (*fl_notify)(struct file_lock *); /* unblock callback */ |
1058 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); | 1058 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); |
1059 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | ||
1060 | void (*fl_release_private)(struct file_lock *); | 1059 | void (*fl_release_private)(struct file_lock *); |
1061 | void (*fl_break)(struct file_lock *); | 1060 | void (*fl_break)(struct file_lock *); |
1062 | int (*fl_mylease)(struct file_lock *, struct file_lock *); | 1061 | int (*fl_mylease)(struct file_lock *, struct file_lock *); |
1063 | int (*fl_change)(struct file_lock **, int); | 1062 | int (*fl_change)(struct file_lock **, int); |
1064 | }; | 1063 | }; |
1065 | 1064 | ||
1066 | struct lock_manager { | 1065 | struct lock_manager { |
1067 | struct list_head list; | 1066 | struct list_head list; |
1068 | }; | 1067 | }; |
1069 | 1068 | ||
1070 | void locks_start_grace(struct lock_manager *); | 1069 | void locks_start_grace(struct lock_manager *); |
1071 | void locks_end_grace(struct lock_manager *); | 1070 | void locks_end_grace(struct lock_manager *); |
1072 | int locks_in_grace(void); | 1071 | int locks_in_grace(void); |
1073 | 1072 | ||
1074 | /* that will die - we need it for nfs_lock_info */ | 1073 | /* that will die - we need it for nfs_lock_info */ |
1075 | #include <linux/nfs_fs_i.h> | 1074 | #include <linux/nfs_fs_i.h> |
1076 | 1075 | ||
1077 | struct file_lock { | 1076 | struct file_lock { |
1078 | struct file_lock *fl_next; /* singly linked list for this inode */ | 1077 | struct file_lock *fl_next; /* singly linked list for this inode */ |
1079 | struct list_head fl_link; /* doubly linked list of all locks */ | 1078 | struct list_head fl_link; /* doubly linked list of all locks */ |
1080 | struct list_head fl_block; /* circular list of blocked processes */ | 1079 | struct list_head fl_block; /* circular list of blocked processes */ |
1081 | fl_owner_t fl_owner; | 1080 | fl_owner_t fl_owner; |
1082 | unsigned char fl_flags; | 1081 | unsigned char fl_flags; |
1083 | unsigned char fl_type; | 1082 | unsigned char fl_type; |
1084 | unsigned int fl_pid; | 1083 | unsigned int fl_pid; |
1085 | struct pid *fl_nspid; | 1084 | struct pid *fl_nspid; |
1086 | wait_queue_head_t fl_wait; | 1085 | wait_queue_head_t fl_wait; |
1087 | struct file *fl_file; | 1086 | struct file *fl_file; |
1088 | loff_t fl_start; | 1087 | loff_t fl_start; |
1089 | loff_t fl_end; | 1088 | loff_t fl_end; |
1090 | 1089 | ||
1091 | struct fasync_struct * fl_fasync; /* for lease break notifications */ | 1090 | struct fasync_struct * fl_fasync; /* for lease break notifications */ |
1092 | unsigned long fl_break_time; /* for nonblocking lease breaks */ | 1091 | unsigned long fl_break_time; /* for nonblocking lease breaks */ |
1093 | 1092 | ||
1094 | const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ | 1093 | const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ |
1095 | const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ | 1094 | const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ |
1096 | union { | 1095 | union { |
1097 | struct nfs_lock_info nfs_fl; | 1096 | struct nfs_lock_info nfs_fl; |
1098 | struct nfs4_lock_info nfs4_fl; | 1097 | struct nfs4_lock_info nfs4_fl; |
1099 | struct { | 1098 | struct { |
1100 | struct list_head link; /* link in AFS vnode's pending_locks list */ | 1099 | struct list_head link; /* link in AFS vnode's pending_locks list */ |
1101 | int state; /* state of grant or error if -ve */ | 1100 | int state; /* state of grant or error if -ve */ |
1102 | } afs; | 1101 | } afs; |
1103 | } fl_u; | 1102 | } fl_u; |
1104 | }; | 1103 | }; |
1105 | 1104 | ||
1106 | /* The following constant reflects the upper bound of the file/locking space */ | 1105 | /* The following constant reflects the upper bound of the file/locking space */ |
1107 | #ifndef OFFSET_MAX | 1106 | #ifndef OFFSET_MAX |
1108 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) | 1107 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) |
1109 | #define OFFSET_MAX INT_LIMIT(loff_t) | 1108 | #define OFFSET_MAX INT_LIMIT(loff_t) |
1110 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) | 1109 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) |
1111 | #endif | 1110 | #endif |
1112 | 1111 | ||
1113 | #include <linux/fcntl.h> | 1112 | #include <linux/fcntl.h> |
1114 | 1113 | ||
1115 | extern void send_sigio(struct fown_struct *fown, int fd, int band); | 1114 | extern void send_sigio(struct fown_struct *fown, int fd, int band); |
1116 | 1115 | ||
1117 | #ifdef CONFIG_FILE_LOCKING | 1116 | #ifdef CONFIG_FILE_LOCKING |
1118 | extern int fcntl_getlk(struct file *, struct flock __user *); | 1117 | extern int fcntl_getlk(struct file *, struct flock __user *); |
1119 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, | 1118 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, |
1120 | struct flock __user *); | 1119 | struct flock __user *); |
1121 | 1120 | ||
1122 | #if BITS_PER_LONG == 32 | 1121 | #if BITS_PER_LONG == 32 |
1123 | extern int fcntl_getlk64(struct file *, struct flock64 __user *); | 1122 | extern int fcntl_getlk64(struct file *, struct flock64 __user *); |
1124 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, | 1123 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, |
1125 | struct flock64 __user *); | 1124 | struct flock64 __user *); |
1126 | #endif | 1125 | #endif |
1127 | 1126 | ||
1128 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); | 1127 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); |
1129 | extern int fcntl_getlease(struct file *filp); | 1128 | extern int fcntl_getlease(struct file *filp); |
1130 | 1129 | ||
1131 | /* fs/locks.c */ | 1130 | /* fs/locks.c */ |
1132 | void locks_free_lock(struct file_lock *fl); | 1131 | void locks_free_lock(struct file_lock *fl); |
1133 | extern void locks_init_lock(struct file_lock *); | 1132 | extern void locks_init_lock(struct file_lock *); |
1134 | extern struct file_lock * locks_alloc_lock(void); | 1133 | extern struct file_lock * locks_alloc_lock(void); |
1135 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); | 1134 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); |
1136 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); | 1135 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); |
1137 | extern void locks_remove_posix(struct file *, fl_owner_t); | 1136 | extern void locks_remove_posix(struct file *, fl_owner_t); |
1138 | extern void locks_remove_flock(struct file *); | 1137 | extern void locks_remove_flock(struct file *); |
1139 | extern void locks_release_private(struct file_lock *); | 1138 | extern void locks_release_private(struct file_lock *); |
1140 | extern void posix_test_lock(struct file *, struct file_lock *); | 1139 | extern void posix_test_lock(struct file *, struct file_lock *); |
1141 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); | 1140 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); |
1142 | extern int posix_lock_file_wait(struct file *, struct file_lock *); | 1141 | extern int posix_lock_file_wait(struct file *, struct file_lock *); |
1143 | extern int posix_unblock_lock(struct file *, struct file_lock *); | 1142 | extern int posix_unblock_lock(struct file *, struct file_lock *); |
1144 | extern int vfs_test_lock(struct file *, struct file_lock *); | 1143 | extern int vfs_test_lock(struct file *, struct file_lock *); |
1145 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); | 1144 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); |
1146 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); | 1145 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); |
1147 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); | 1146 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); |
1148 | extern int __break_lease(struct inode *inode, unsigned int flags); | 1147 | extern int __break_lease(struct inode *inode, unsigned int flags); |
1149 | extern void lease_get_mtime(struct inode *, struct timespec *time); | 1148 | extern void lease_get_mtime(struct inode *, struct timespec *time); |
1150 | extern int generic_setlease(struct file *, long, struct file_lock **); | 1149 | extern int generic_setlease(struct file *, long, struct file_lock **); |
1151 | extern int vfs_setlease(struct file *, long, struct file_lock **); | 1150 | extern int vfs_setlease(struct file *, long, struct file_lock **); |
1152 | extern int lease_modify(struct file_lock **, int); | 1151 | extern int lease_modify(struct file_lock **, int); |
1153 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); | 1152 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); |
1154 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); | 1153 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); |
1155 | extern void lock_flocks(void); | 1154 | extern void lock_flocks(void); |
1156 | extern void unlock_flocks(void); | 1155 | extern void unlock_flocks(void); |
1157 | #else /* !CONFIG_FILE_LOCKING */ | 1156 | #else /* !CONFIG_FILE_LOCKING */ |
1158 | static inline int fcntl_getlk(struct file *file, struct flock __user *user) | 1157 | static inline int fcntl_getlk(struct file *file, struct flock __user *user) |
1159 | { | 1158 | { |
1160 | return -EINVAL; | 1159 | return -EINVAL; |
1161 | } | 1160 | } |
1162 | 1161 | ||
1163 | static inline int fcntl_setlk(unsigned int fd, struct file *file, | 1162 | static inline int fcntl_setlk(unsigned int fd, struct file *file, |
1164 | unsigned int cmd, struct flock __user *user) | 1163 | unsigned int cmd, struct flock __user *user) |
1165 | { | 1164 | { |
1166 | return -EACCES; | 1165 | return -EACCES; |
1167 | } | 1166 | } |
1168 | 1167 | ||
1169 | #if BITS_PER_LONG == 32 | 1168 | #if BITS_PER_LONG == 32 |
1170 | static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) | 1169 | static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) |
1171 | { | 1170 | { |
1172 | return -EINVAL; | 1171 | return -EINVAL; |
1173 | } | 1172 | } |
1174 | 1173 | ||
1175 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, | 1174 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, |
1176 | unsigned int cmd, struct flock64 __user *user) | 1175 | unsigned int cmd, struct flock64 __user *user) |
1177 | { | 1176 | { |
1178 | return -EACCES; | 1177 | return -EACCES; |
1179 | } | 1178 | } |
1180 | #endif | 1179 | #endif |
1181 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1180 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1182 | { | 1181 | { |
1183 | return 0; | 1182 | return 0; |
1184 | } | 1183 | } |
1185 | 1184 | ||
1186 | static inline int fcntl_getlease(struct file *filp) | 1185 | static inline int fcntl_getlease(struct file *filp) |
1187 | { | 1186 | { |
1188 | return 0; | 1187 | return 0; |
1189 | } | 1188 | } |
1190 | 1189 | ||
1191 | static inline void locks_init_lock(struct file_lock *fl) | 1190 | static inline void locks_init_lock(struct file_lock *fl) |
1192 | { | 1191 | { |
1193 | return; | 1192 | return; |
1194 | } | 1193 | } |
1195 | 1194 | ||
1196 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1195 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1197 | { | 1196 | { |
1198 | return; | 1197 | return; |
1199 | } | 1198 | } |
1200 | 1199 | ||
1201 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1200 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1202 | { | 1201 | { |
1203 | return; | 1202 | return; |
1204 | } | 1203 | } |
1205 | 1204 | ||
1206 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) | 1205 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) |
1207 | { | 1206 | { |
1208 | return; | 1207 | return; |
1209 | } | 1208 | } |
1210 | 1209 | ||
1211 | static inline void locks_remove_flock(struct file *filp) | 1210 | static inline void locks_remove_flock(struct file *filp) |
1212 | { | 1211 | { |
1213 | return; | 1212 | return; |
1214 | } | 1213 | } |
1215 | 1214 | ||
1216 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) | 1215 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) |
1217 | { | 1216 | { |
1218 | return; | 1217 | return; |
1219 | } | 1218 | } |
1220 | 1219 | ||
1221 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, | 1220 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, |
1222 | struct file_lock *conflock) | 1221 | struct file_lock *conflock) |
1223 | { | 1222 | { |
1224 | return -ENOLCK; | 1223 | return -ENOLCK; |
1225 | } | 1224 | } |
1226 | 1225 | ||
1227 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) | 1226 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) |
1228 | { | 1227 | { |
1229 | return -ENOLCK; | 1228 | return -ENOLCK; |
1230 | } | 1229 | } |
1231 | 1230 | ||
1232 | static inline int posix_unblock_lock(struct file *filp, | 1231 | static inline int posix_unblock_lock(struct file *filp, |
1233 | struct file_lock *waiter) | 1232 | struct file_lock *waiter) |
1234 | { | 1233 | { |
1235 | return -ENOENT; | 1234 | return -ENOENT; |
1236 | } | 1235 | } |
1237 | 1236 | ||
1238 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) | 1237 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) |
1239 | { | 1238 | { |
1240 | return 0; | 1239 | return 0; |
1241 | } | 1240 | } |
1242 | 1241 | ||
1243 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, | 1242 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, |
1244 | struct file_lock *fl, struct file_lock *conf) | 1243 | struct file_lock *fl, struct file_lock *conf) |
1245 | { | 1244 | { |
1246 | return -ENOLCK; | 1245 | return -ENOLCK; |
1247 | } | 1246 | } |
1248 | 1247 | ||
1249 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 1248 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
1250 | { | 1249 | { |
1251 | return 0; | 1250 | return 0; |
1252 | } | 1251 | } |
1253 | 1252 | ||
1254 | static inline int flock_lock_file_wait(struct file *filp, | 1253 | static inline int flock_lock_file_wait(struct file *filp, |
1255 | struct file_lock *request) | 1254 | struct file_lock *request) |
1256 | { | 1255 | { |
1257 | return -ENOLCK; | 1256 | return -ENOLCK; |
1258 | } | 1257 | } |
1259 | 1258 | ||
1260 | static inline int __break_lease(struct inode *inode, unsigned int mode) | 1259 | static inline int __break_lease(struct inode *inode, unsigned int mode) |
1261 | { | 1260 | { |
1262 | return 0; | 1261 | return 0; |
1263 | } | 1262 | } |
1264 | 1263 | ||
1265 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) | 1264 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) |
1266 | { | 1265 | { |
1267 | return; | 1266 | return; |
1268 | } | 1267 | } |
1269 | 1268 | ||
1270 | static inline int generic_setlease(struct file *filp, long arg, | 1269 | static inline int generic_setlease(struct file *filp, long arg, |
1271 | struct file_lock **flp) | 1270 | struct file_lock **flp) |
1272 | { | 1271 | { |
1273 | return -EINVAL; | 1272 | return -EINVAL; |
1274 | } | 1273 | } |
1275 | 1274 | ||
1276 | static inline int vfs_setlease(struct file *filp, long arg, | 1275 | static inline int vfs_setlease(struct file *filp, long arg, |
1277 | struct file_lock **lease) | 1276 | struct file_lock **lease) |
1278 | { | 1277 | { |
1279 | return -EINVAL; | 1278 | return -EINVAL; |
1280 | } | 1279 | } |
1281 | 1280 | ||
1282 | static inline int lease_modify(struct file_lock **before, int arg) | 1281 | static inline int lease_modify(struct file_lock **before, int arg) |
1283 | { | 1282 | { |
1284 | return -EINVAL; | 1283 | return -EINVAL; |
1285 | } | 1284 | } |
1286 | 1285 | ||
1287 | static inline int lock_may_read(struct inode *inode, loff_t start, | 1286 | static inline int lock_may_read(struct inode *inode, loff_t start, |
1288 | unsigned long len) | 1287 | unsigned long len) |
1289 | { | 1288 | { |
1290 | return 1; | 1289 | return 1; |
1291 | } | 1290 | } |
1292 | 1291 | ||
1293 | static inline int lock_may_write(struct inode *inode, loff_t start, | 1292 | static inline int lock_may_write(struct inode *inode, loff_t start, |
1294 | unsigned long len) | 1293 | unsigned long len) |
1295 | { | 1294 | { |
1296 | return 1; | 1295 | return 1; |
1297 | } | 1296 | } |
1298 | 1297 | ||
1299 | static inline void lock_flocks(void) | 1298 | static inline void lock_flocks(void) |
1300 | { | 1299 | { |
1301 | } | 1300 | } |
1302 | 1301 | ||
1303 | static inline void unlock_flocks(void) | 1302 | static inline void unlock_flocks(void) |
1304 | { | 1303 | { |
1305 | } | 1304 | } |
1306 | 1305 | ||
1307 | #endif /* !CONFIG_FILE_LOCKING */ | 1306 | #endif /* !CONFIG_FILE_LOCKING */ |
1308 | 1307 | ||
1309 | 1308 | ||
1310 | struct fasync_struct { | 1309 | struct fasync_struct { |
1311 | spinlock_t fa_lock; | 1310 | spinlock_t fa_lock; |
1312 | int magic; | 1311 | int magic; |
1313 | int fa_fd; | 1312 | int fa_fd; |
1314 | struct fasync_struct *fa_next; /* singly linked list */ | 1313 | struct fasync_struct *fa_next; /* singly linked list */ |
1315 | struct file *fa_file; | 1314 | struct file *fa_file; |
1316 | struct rcu_head fa_rcu; | 1315 | struct rcu_head fa_rcu; |
1317 | }; | 1316 | }; |
1318 | 1317 | ||
1319 | #define FASYNC_MAGIC 0x4601 | 1318 | #define FASYNC_MAGIC 0x4601 |
1320 | 1319 | ||
1321 | /* SMP safe fasync helpers: */ | 1320 | /* SMP safe fasync helpers: */ |
1322 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); | 1321 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); |
1323 | extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); | 1322 | extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); |
1324 | extern int fasync_remove_entry(struct file *, struct fasync_struct **); | 1323 | extern int fasync_remove_entry(struct file *, struct fasync_struct **); |
1325 | extern struct fasync_struct *fasync_alloc(void); | 1324 | extern struct fasync_struct *fasync_alloc(void); |
1326 | extern void fasync_free(struct fasync_struct *); | 1325 | extern void fasync_free(struct fasync_struct *); |
1327 | 1326 | ||
1328 | /* can be called from interrupts */ | 1327 | /* can be called from interrupts */ |
1329 | extern void kill_fasync(struct fasync_struct **, int, int); | 1328 | extern void kill_fasync(struct fasync_struct **, int, int); |
1330 | 1329 | ||
1331 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); | 1330 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); |
1332 | extern int f_setown(struct file *filp, unsigned long arg, int force); | 1331 | extern int f_setown(struct file *filp, unsigned long arg, int force); |
1333 | extern void f_delown(struct file *filp); | 1332 | extern void f_delown(struct file *filp); |
1334 | extern pid_t f_getown(struct file *filp); | 1333 | extern pid_t f_getown(struct file *filp); |
1335 | extern int send_sigurg(struct fown_struct *fown); | 1334 | extern int send_sigurg(struct fown_struct *fown); |
1336 | 1335 | ||
1337 | /* | 1336 | /* |
1338 | * Umount options | 1337 | * Umount options |
1339 | */ | 1338 | */ |
1340 | 1339 | ||
1341 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ | 1340 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ |
1342 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ | 1341 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ |
1343 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ | 1342 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ |
1344 | #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ | 1343 | #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ |
1345 | #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ | 1344 | #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ |
1346 | 1345 | ||
1347 | extern struct list_head super_blocks; | 1346 | extern struct list_head super_blocks; |
1348 | extern spinlock_t sb_lock; | 1347 | extern spinlock_t sb_lock; |
1349 | 1348 | ||
1350 | struct super_block { | 1349 | struct super_block { |
1351 | struct list_head s_list; /* Keep this first */ | 1350 | struct list_head s_list; /* Keep this first */ |
1352 | dev_t s_dev; /* search index; _not_ kdev_t */ | 1351 | dev_t s_dev; /* search index; _not_ kdev_t */ |
1353 | unsigned char s_dirt; | 1352 | unsigned char s_dirt; |
1354 | unsigned char s_blocksize_bits; | 1353 | unsigned char s_blocksize_bits; |
1355 | unsigned long s_blocksize; | 1354 | unsigned long s_blocksize; |
1356 | loff_t s_maxbytes; /* Max file size */ | 1355 | loff_t s_maxbytes; /* Max file size */ |
1357 | struct file_system_type *s_type; | 1356 | struct file_system_type *s_type; |
1358 | const struct super_operations *s_op; | 1357 | const struct super_operations *s_op; |
1359 | const struct dquot_operations *dq_op; | 1358 | const struct dquot_operations *dq_op; |
1360 | const struct quotactl_ops *s_qcop; | 1359 | const struct quotactl_ops *s_qcop; |
1361 | const struct export_operations *s_export_op; | 1360 | const struct export_operations *s_export_op; |
1362 | unsigned long s_flags; | 1361 | unsigned long s_flags; |
1363 | unsigned long s_magic; | 1362 | unsigned long s_magic; |
1364 | struct dentry *s_root; | 1363 | struct dentry *s_root; |
1365 | struct rw_semaphore s_umount; | 1364 | struct rw_semaphore s_umount; |
1366 | struct mutex s_lock; | 1365 | struct mutex s_lock; |
1367 | int s_count; | 1366 | int s_count; |
1368 | atomic_t s_active; | 1367 | atomic_t s_active; |
1369 | #ifdef CONFIG_SECURITY | 1368 | #ifdef CONFIG_SECURITY |
1370 | void *s_security; | 1369 | void *s_security; |
1371 | #endif | 1370 | #endif |
1372 | const struct xattr_handler **s_xattr; | 1371 | const struct xattr_handler **s_xattr; |
1373 | 1372 | ||
1374 | struct list_head s_inodes; /* all inodes */ | 1373 | struct list_head s_inodes; /* all inodes */ |
1375 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1374 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ |
1376 | #ifdef CONFIG_SMP | 1375 | #ifdef CONFIG_SMP |
1377 | struct list_head __percpu *s_files; | 1376 | struct list_head __percpu *s_files; |
1378 | #else | 1377 | #else |
1379 | struct list_head s_files; | 1378 | struct list_head s_files; |
1380 | #endif | 1379 | #endif |
1381 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ | 1380 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ |
1382 | struct list_head s_dentry_lru; /* unused dentry lru */ | 1381 | struct list_head s_dentry_lru; /* unused dentry lru */ |
1383 | int s_nr_dentry_unused; /* # of dentry on lru */ | 1382 | int s_nr_dentry_unused; /* # of dentry on lru */ |
1384 | 1383 | ||
1385 | struct block_device *s_bdev; | 1384 | struct block_device *s_bdev; |
1386 | struct backing_dev_info *s_bdi; | 1385 | struct backing_dev_info *s_bdi; |
1387 | struct mtd_info *s_mtd; | 1386 | struct mtd_info *s_mtd; |
1388 | struct list_head s_instances; | 1387 | struct list_head s_instances; |
1389 | struct quota_info s_dquot; /* Diskquota specific options */ | 1388 | struct quota_info s_dquot; /* Diskquota specific options */ |
1390 | 1389 | ||
1391 | int s_frozen; | 1390 | int s_frozen; |
1392 | wait_queue_head_t s_wait_unfrozen; | 1391 | wait_queue_head_t s_wait_unfrozen; |
1393 | 1392 | ||
1394 | char s_id[32]; /* Informational name */ | 1393 | char s_id[32]; /* Informational name */ |
1395 | 1394 | ||
1396 | void *s_fs_info; /* Filesystem private info */ | 1395 | void *s_fs_info; /* Filesystem private info */ |
1397 | fmode_t s_mode; | 1396 | fmode_t s_mode; |
1398 | 1397 | ||
1399 | /* Granularity of c/m/atime in ns. | 1398 | /* Granularity of c/m/atime in ns. |
1400 | Cannot be worse than a second */ | 1399 | Cannot be worse than a second */ |
1401 | u32 s_time_gran; | 1400 | u32 s_time_gran; |
1402 | 1401 | ||
1403 | /* | 1402 | /* |
1404 | * The next field is for VFS *only*. No filesystems have any business | 1403 | * The next field is for VFS *only*. No filesystems have any business |
1405 | * even looking at it. You had been warned. | 1404 | * even looking at it. You had been warned. |
1406 | */ | 1405 | */ |
1407 | struct mutex s_vfs_rename_mutex; /* Kludge */ | 1406 | struct mutex s_vfs_rename_mutex; /* Kludge */ |
1408 | 1407 | ||
1409 | /* | 1408 | /* |
1410 | * Filesystem subtype. If non-empty the filesystem type field | 1409 | * Filesystem subtype. If non-empty the filesystem type field |
1411 | * in /proc/mounts will be "type.subtype" | 1410 | * in /proc/mounts will be "type.subtype" |
1412 | */ | 1411 | */ |
1413 | char *s_subtype; | 1412 | char *s_subtype; |
1414 | 1413 | ||
1415 | /* | 1414 | /* |
1416 | * Saved mount options for lazy filesystems using | 1415 | * Saved mount options for lazy filesystems using |
1417 | * generic_show_options() | 1416 | * generic_show_options() |
1418 | */ | 1417 | */ |
1419 | char __rcu *s_options; | 1418 | char __rcu *s_options; |
1420 | }; | 1419 | }; |
1421 | 1420 | ||
1422 | extern struct timespec current_fs_time(struct super_block *sb); | 1421 | extern struct timespec current_fs_time(struct super_block *sb); |
1423 | 1422 | ||
1424 | /* | 1423 | /* |
1425 | * Snapshotting support. | 1424 | * Snapshotting support. |
1426 | */ | 1425 | */ |
1427 | enum { | 1426 | enum { |
1428 | SB_UNFROZEN = 0, | 1427 | SB_UNFROZEN = 0, |
1429 | SB_FREEZE_WRITE = 1, | 1428 | SB_FREEZE_WRITE = 1, |
1430 | SB_FREEZE_TRANS = 2, | 1429 | SB_FREEZE_TRANS = 2, |
1431 | }; | 1430 | }; |
1432 | 1431 | ||
1433 | #define vfs_check_frozen(sb, level) \ | 1432 | #define vfs_check_frozen(sb, level) \ |
1434 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | 1433 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) |
1435 | 1434 | ||
1436 | #define get_fs_excl() atomic_inc(¤t->fs_excl) | 1435 | #define get_fs_excl() atomic_inc(¤t->fs_excl) |
1437 | #define put_fs_excl() atomic_dec(¤t->fs_excl) | 1436 | #define put_fs_excl() atomic_dec(¤t->fs_excl) |
1438 | #define has_fs_excl() atomic_read(¤t->fs_excl) | 1437 | #define has_fs_excl() atomic_read(¤t->fs_excl) |
1439 | 1438 | ||
1440 | #define is_owner_or_cap(inode) \ | 1439 | #define is_owner_or_cap(inode) \ |
1441 | ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) | 1440 | ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) |
1442 | 1441 | ||
1443 | /* not quite ready to be deprecated, but... */ | 1442 | /* not quite ready to be deprecated, but... */ |
1444 | extern void lock_super(struct super_block *); | 1443 | extern void lock_super(struct super_block *); |
1445 | extern void unlock_super(struct super_block *); | 1444 | extern void unlock_super(struct super_block *); |
1446 | 1445 | ||
1447 | /* | 1446 | /* |
1448 | * VFS helper functions.. | 1447 | * VFS helper functions.. |
1449 | */ | 1448 | */ |
1450 | extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); | 1449 | extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); |
1451 | extern int vfs_mkdir(struct inode *, struct dentry *, int); | 1450 | extern int vfs_mkdir(struct inode *, struct dentry *, int); |
1452 | extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); | 1451 | extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); |
1453 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); | 1452 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); |
1454 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *); | 1453 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *); |
1455 | extern int vfs_rmdir(struct inode *, struct dentry *); | 1454 | extern int vfs_rmdir(struct inode *, struct dentry *); |
1456 | extern int vfs_unlink(struct inode *, struct dentry *); | 1455 | extern int vfs_unlink(struct inode *, struct dentry *); |
1457 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 1456 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
1458 | 1457 | ||
1459 | /* | 1458 | /* |
1460 | * VFS dentry helper functions. | 1459 | * VFS dentry helper functions. |
1461 | */ | 1460 | */ |
1462 | extern void dentry_unhash(struct dentry *dentry); | 1461 | extern void dentry_unhash(struct dentry *dentry); |
1463 | 1462 | ||
1464 | /* | 1463 | /* |
1465 | * VFS file helper functions. | 1464 | * VFS file helper functions. |
1466 | */ | 1465 | */ |
1467 | extern int file_permission(struct file *, int); | 1466 | extern int file_permission(struct file *, int); |
1468 | extern void inode_init_owner(struct inode *inode, const struct inode *dir, | 1467 | extern void inode_init_owner(struct inode *inode, const struct inode *dir, |
1469 | mode_t mode); | 1468 | mode_t mode); |
1470 | /* | 1469 | /* |
1471 | * VFS FS_IOC_FIEMAP helper definitions. | 1470 | * VFS FS_IOC_FIEMAP helper definitions. |
1472 | */ | 1471 | */ |
1473 | struct fiemap_extent_info { | 1472 | struct fiemap_extent_info { |
1474 | unsigned int fi_flags; /* Flags as passed from user */ | 1473 | unsigned int fi_flags; /* Flags as passed from user */ |
1475 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | 1474 | unsigned int fi_extents_mapped; /* Number of mapped extents */ |
1476 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | 1475 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ |
1477 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent | 1476 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent |
1478 | * array */ | 1477 | * array */ |
1479 | }; | 1478 | }; |
1480 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | 1479 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, |
1481 | u64 phys, u64 len, u32 flags); | 1480 | u64 phys, u64 len, u32 flags); |
1482 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | 1481 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); |
1483 | 1482 | ||
1484 | /* | 1483 | /* |
1485 | * File types | 1484 | * File types |
1486 | * | 1485 | * |
1487 | * NOTE! These match bits 12..15 of stat.st_mode | 1486 | * NOTE! These match bits 12..15 of stat.st_mode |
1488 | * (ie "(i_mode >> 12) & 15"). | 1487 | * (ie "(i_mode >> 12) & 15"). |
1489 | */ | 1488 | */ |
1490 | #define DT_UNKNOWN 0 | 1489 | #define DT_UNKNOWN 0 |
1491 | #define DT_FIFO 1 | 1490 | #define DT_FIFO 1 |
1492 | #define DT_CHR 2 | 1491 | #define DT_CHR 2 |
1493 | #define DT_DIR 4 | 1492 | #define DT_DIR 4 |
1494 | #define DT_BLK 6 | 1493 | #define DT_BLK 6 |
1495 | #define DT_REG 8 | 1494 | #define DT_REG 8 |
1496 | #define DT_LNK 10 | 1495 | #define DT_LNK 10 |
1497 | #define DT_SOCK 12 | 1496 | #define DT_SOCK 12 |
1498 | #define DT_WHT 14 | 1497 | #define DT_WHT 14 |
1499 | 1498 | ||
1500 | /* | 1499 | /* |
1501 | * This is the "filldir" function type, used by readdir() to let | 1500 | * This is the "filldir" function type, used by readdir() to let |
1502 | * the kernel specify what kind of dirent layout it wants to have. | 1501 | * the kernel specify what kind of dirent layout it wants to have. |
1503 | * This allows the kernel to read directories into kernel space or | 1502 | * This allows the kernel to read directories into kernel space or |
1504 | * to have different dirent layouts depending on the binary type. | 1503 | * to have different dirent layouts depending on the binary type. |
1505 | */ | 1504 | */ |
1506 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); | 1505 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); |
1507 | struct block_device_operations; | 1506 | struct block_device_operations; |
1508 | 1507 | ||
1509 | /* These macros are for out of kernel modules to test that | 1508 | /* These macros are for out of kernel modules to test that |
1510 | * the kernel supports the unlocked_ioctl and compat_ioctl | 1509 | * the kernel supports the unlocked_ioctl and compat_ioctl |
1511 | * fields in struct file_operations. */ | 1510 | * fields in struct file_operations. */ |
1512 | #define HAVE_COMPAT_IOCTL 1 | 1511 | #define HAVE_COMPAT_IOCTL 1 |
1513 | #define HAVE_UNLOCKED_IOCTL 1 | 1512 | #define HAVE_UNLOCKED_IOCTL 1 |
1514 | 1513 | ||
1515 | /* | 1514 | /* |
1516 | * NOTE: | 1515 | * NOTE: |
1517 | * all file operations except setlease can be called without | 1516 | * all file operations except setlease can be called without |
1518 | * the big kernel lock held in all filesystems. | 1517 | * the big kernel lock held in all filesystems. |
1519 | */ | 1518 | */ |
1520 | struct file_operations { | 1519 | struct file_operations { |
1521 | struct module *owner; | 1520 | struct module *owner; |
1522 | loff_t (*llseek) (struct file *, loff_t, int); | 1521 | loff_t (*llseek) (struct file *, loff_t, int); |
1523 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); | 1522 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); |
1524 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); | 1523 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
1525 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1524 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1526 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1525 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1527 | int (*readdir) (struct file *, void *, filldir_t); | 1526 | int (*readdir) (struct file *, void *, filldir_t); |
1528 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 1527 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
1529 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 1528 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
1530 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 1529 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
1531 | int (*mmap) (struct file *, struct vm_area_struct *); | 1530 | int (*mmap) (struct file *, struct vm_area_struct *); |
1532 | int (*open) (struct inode *, struct file *); | 1531 | int (*open) (struct inode *, struct file *); |
1533 | int (*flush) (struct file *, fl_owner_t id); | 1532 | int (*flush) (struct file *, fl_owner_t id); |
1534 | int (*release) (struct inode *, struct file *); | 1533 | int (*release) (struct inode *, struct file *); |
1535 | int (*fsync) (struct file *, int datasync); | 1534 | int (*fsync) (struct file *, int datasync); |
1536 | int (*aio_fsync) (struct kiocb *, int datasync); | 1535 | int (*aio_fsync) (struct kiocb *, int datasync); |
1537 | int (*fasync) (int, struct file *, int); | 1536 | int (*fasync) (int, struct file *, int); |
1538 | int (*lock) (struct file *, int, struct file_lock *); | 1537 | int (*lock) (struct file *, int, struct file_lock *); |
1539 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); | 1538 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); |
1540 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); | 1539 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
1541 | int (*check_flags)(int); | 1540 | int (*check_flags)(int); |
1542 | int (*flock) (struct file *, int, struct file_lock *); | 1541 | int (*flock) (struct file *, int, struct file_lock *); |
1543 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); | 1542 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); |
1544 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); | 1543 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); |
1545 | int (*setlease)(struct file *, long, struct file_lock **); | 1544 | int (*setlease)(struct file *, long, struct file_lock **); |
1546 | }; | 1545 | }; |
1547 | 1546 | ||
1548 | struct inode_operations { | 1547 | struct inode_operations { |
1549 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); | 1548 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); |
1550 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); | 1549 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); |
1551 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 1550 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
1552 | int (*unlink) (struct inode *,struct dentry *); | 1551 | int (*unlink) (struct inode *,struct dentry *); |
1553 | int (*symlink) (struct inode *,struct dentry *,const char *); | 1552 | int (*symlink) (struct inode *,struct dentry *,const char *); |
1554 | int (*mkdir) (struct inode *,struct dentry *,int); | 1553 | int (*mkdir) (struct inode *,struct dentry *,int); |
1555 | int (*rmdir) (struct inode *,struct dentry *); | 1554 | int (*rmdir) (struct inode *,struct dentry *); |
1556 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); | 1555 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); |
1557 | int (*rename) (struct inode *, struct dentry *, | 1556 | int (*rename) (struct inode *, struct dentry *, |
1558 | struct inode *, struct dentry *); | 1557 | struct inode *, struct dentry *); |
1559 | int (*readlink) (struct dentry *, char __user *,int); | 1558 | int (*readlink) (struct dentry *, char __user *,int); |
1560 | void * (*follow_link) (struct dentry *, struct nameidata *); | 1559 | void * (*follow_link) (struct dentry *, struct nameidata *); |
1561 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 1560 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
1562 | void (*truncate) (struct inode *); | 1561 | void (*truncate) (struct inode *); |
1563 | int (*permission) (struct inode *, int); | 1562 | int (*permission) (struct inode *, int); |
1564 | int (*check_acl)(struct inode *, int); | 1563 | int (*check_acl)(struct inode *, int); |
1565 | int (*setattr) (struct dentry *, struct iattr *); | 1564 | int (*setattr) (struct dentry *, struct iattr *); |
1566 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); | 1565 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); |
1567 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 1566 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
1568 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 1567 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
1569 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 1568 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
1570 | int (*removexattr) (struct dentry *, const char *); | 1569 | int (*removexattr) (struct dentry *, const char *); |
1571 | void (*truncate_range)(struct inode *, loff_t, loff_t); | 1570 | void (*truncate_range)(struct inode *, loff_t, loff_t); |
1572 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, | 1571 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, |
1573 | loff_t len); | 1572 | loff_t len); |
1574 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | 1573 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, |
1575 | u64 len); | 1574 | u64 len); |
1576 | }; | 1575 | }; |
1577 | 1576 | ||
1578 | struct seq_file; | 1577 | struct seq_file; |
1579 | 1578 | ||
1580 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | 1579 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, |
1581 | unsigned long nr_segs, unsigned long fast_segs, | 1580 | unsigned long nr_segs, unsigned long fast_segs, |
1582 | struct iovec *fast_pointer, | 1581 | struct iovec *fast_pointer, |
1583 | struct iovec **ret_pointer); | 1582 | struct iovec **ret_pointer); |
1584 | 1583 | ||
1585 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); | 1584 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); |
1586 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); | 1585 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); |
1587 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, | 1586 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, |
1588 | unsigned long, loff_t *); | 1587 | unsigned long, loff_t *); |
1589 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, | 1588 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, |
1590 | unsigned long, loff_t *); | 1589 | unsigned long, loff_t *); |
1591 | 1590 | ||
1592 | struct super_operations { | 1591 | struct super_operations { |
1593 | struct inode *(*alloc_inode)(struct super_block *sb); | 1592 | struct inode *(*alloc_inode)(struct super_block *sb); |
1594 | void (*destroy_inode)(struct inode *); | 1593 | void (*destroy_inode)(struct inode *); |
1595 | 1594 | ||
1596 | void (*dirty_inode) (struct inode *); | 1595 | void (*dirty_inode) (struct inode *); |
1597 | int (*write_inode) (struct inode *, struct writeback_control *wbc); | 1596 | int (*write_inode) (struct inode *, struct writeback_control *wbc); |
1598 | int (*drop_inode) (struct inode *); | 1597 | int (*drop_inode) (struct inode *); |
1599 | void (*evict_inode) (struct inode *); | 1598 | void (*evict_inode) (struct inode *); |
1600 | void (*put_super) (struct super_block *); | 1599 | void (*put_super) (struct super_block *); |
1601 | void (*write_super) (struct super_block *); | 1600 | void (*write_super) (struct super_block *); |
1602 | int (*sync_fs)(struct super_block *sb, int wait); | 1601 | int (*sync_fs)(struct super_block *sb, int wait); |
1603 | int (*freeze_fs) (struct super_block *); | 1602 | int (*freeze_fs) (struct super_block *); |
1604 | int (*unfreeze_fs) (struct super_block *); | 1603 | int (*unfreeze_fs) (struct super_block *); |
1605 | int (*statfs) (struct dentry *, struct kstatfs *); | 1604 | int (*statfs) (struct dentry *, struct kstatfs *); |
1606 | int (*remount_fs) (struct super_block *, int *, char *); | 1605 | int (*remount_fs) (struct super_block *, int *, char *); |
1607 | void (*umount_begin) (struct super_block *); | 1606 | void (*umount_begin) (struct super_block *); |
1608 | 1607 | ||
1609 | int (*show_options)(struct seq_file *, struct vfsmount *); | 1608 | int (*show_options)(struct seq_file *, struct vfsmount *); |
1610 | int (*show_stats)(struct seq_file *, struct vfsmount *); | 1609 | int (*show_stats)(struct seq_file *, struct vfsmount *); |
1611 | #ifdef CONFIG_QUOTA | 1610 | #ifdef CONFIG_QUOTA |
1612 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 1611 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
1613 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1612 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
1614 | #endif | 1613 | #endif |
1615 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1614 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
1616 | int (*trim_fs) (struct super_block *, struct fstrim_range *); | 1615 | int (*trim_fs) (struct super_block *, struct fstrim_range *); |
1617 | }; | 1616 | }; |
1618 | 1617 | ||
1619 | /* | 1618 | /* |
1620 | * Inode state bits. Protected by inode_lock. | 1619 | * Inode state bits. Protected by inode_lock. |
1621 | * | 1620 | * |
1622 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, | 1621 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, |
1623 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. | 1622 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. |
1624 | * | 1623 | * |
1625 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, | 1624 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, |
1626 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at | 1625 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at |
1627 | * various stages of removing an inode. | 1626 | * various stages of removing an inode. |
1628 | * | 1627 | * |
1629 | * Two bits are used for locking and completion notification, I_NEW and I_SYNC. | 1628 | * Two bits are used for locking and completion notification, I_NEW and I_SYNC. |
1630 | * | 1629 | * |
1631 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on | 1630 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on |
1632 | * fdatasync(). i_atime is the usual cause. | 1631 | * fdatasync(). i_atime is the usual cause. |
1633 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of | 1632 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of |
1634 | * these changes separately from I_DIRTY_SYNC so that we | 1633 | * these changes separately from I_DIRTY_SYNC so that we |
1635 | * don't have to write inode on fdatasync() when only | 1634 | * don't have to write inode on fdatasync() when only |
1636 | * mtime has changed in it. | 1635 | * mtime has changed in it. |
1637 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. | 1636 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. |
1638 | * I_NEW Serves as both a mutex and completion notification. | 1637 | * I_NEW Serves as both a mutex and completion notification. |
1639 | * New inodes set I_NEW. If two processes both create | 1638 | * New inodes set I_NEW. If two processes both create |
1640 | * the same inode, one of them will release its inode and | 1639 | * the same inode, one of them will release its inode and |
1641 | * wait for I_NEW to be released before returning. | 1640 | * wait for I_NEW to be released before returning. |
1642 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can | 1641 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can |
1643 | * also cause waiting on I_NEW, without I_NEW actually | 1642 | * also cause waiting on I_NEW, without I_NEW actually |
1644 | * being set. find_inode() uses this to prevent returning | 1643 | * being set. find_inode() uses this to prevent returning |
1645 | * nearly-dead inodes. | 1644 | * nearly-dead inodes. |
1646 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count | 1645 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count |
1647 | * is zero. I_FREEING must be set when I_WILL_FREE is | 1646 | * is zero. I_FREEING must be set when I_WILL_FREE is |
1648 | * cleared. | 1647 | * cleared. |
1649 | * I_FREEING Set when inode is about to be freed but still has dirty | 1648 | * I_FREEING Set when inode is about to be freed but still has dirty |
1650 | * pages or buffers attached or the inode itself is still | 1649 | * pages or buffers attached or the inode itself is still |
1651 | * dirty. | 1650 | * dirty. |
1652 | * I_CLEAR Added by end_writeback(). In this state the inode is clean | 1651 | * I_CLEAR Added by end_writeback(). In this state the inode is clean |
1653 | * and can be destroyed. Inode keeps I_FREEING. | 1652 | * and can be destroyed. Inode keeps I_FREEING. |
1654 | * | 1653 | * |
1655 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are | 1654 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are |
1656 | * prohibited for many purposes. iget() must wait for | 1655 | * prohibited for many purposes. iget() must wait for |
1657 | * the inode to be completely released, then create it | 1656 | * the inode to be completely released, then create it |
1658 | * anew. Other functions will just ignore such inodes, | 1657 | * anew. Other functions will just ignore such inodes, |
1659 | * if appropriate. I_NEW is used for waiting. | 1658 | * if appropriate. I_NEW is used for waiting. |
1660 | * | 1659 | * |
1661 | * I_SYNC Synchonized write of dirty inode data. The bits is | 1660 | * I_SYNC Synchonized write of dirty inode data. The bits is |
1662 | * set during data writeback, and cleared with a wakeup | 1661 | * set during data writeback, and cleared with a wakeup |
1663 | * on the bit address once it is done. | 1662 | * on the bit address once it is done. |
1664 | * | 1663 | * |
1665 | * Q: What is the difference between I_WILL_FREE and I_FREEING? | 1664 | * Q: What is the difference between I_WILL_FREE and I_FREEING? |
1666 | */ | 1665 | */ |
1667 | #define I_DIRTY_SYNC (1 << 0) | 1666 | #define I_DIRTY_SYNC (1 << 0) |
1668 | #define I_DIRTY_DATASYNC (1 << 1) | 1667 | #define I_DIRTY_DATASYNC (1 << 1) |
1669 | #define I_DIRTY_PAGES (1 << 2) | 1668 | #define I_DIRTY_PAGES (1 << 2) |
1670 | #define __I_NEW 3 | 1669 | #define __I_NEW 3 |
1671 | #define I_NEW (1 << __I_NEW) | 1670 | #define I_NEW (1 << __I_NEW) |
1672 | #define I_WILL_FREE (1 << 4) | 1671 | #define I_WILL_FREE (1 << 4) |
1673 | #define I_FREEING (1 << 5) | 1672 | #define I_FREEING (1 << 5) |
1674 | #define I_CLEAR (1 << 6) | 1673 | #define I_CLEAR (1 << 6) |
1675 | #define __I_SYNC 7 | 1674 | #define __I_SYNC 7 |
1676 | #define I_SYNC (1 << __I_SYNC) | 1675 | #define I_SYNC (1 << __I_SYNC) |
1677 | #define I_REFERENCED (1 << 8) | 1676 | #define I_REFERENCED (1 << 8) |
1678 | 1677 | ||
1679 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) | 1678 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) |
1680 | 1679 | ||
1681 | extern void __mark_inode_dirty(struct inode *, int); | 1680 | extern void __mark_inode_dirty(struct inode *, int); |
1682 | static inline void mark_inode_dirty(struct inode *inode) | 1681 | static inline void mark_inode_dirty(struct inode *inode) |
1683 | { | 1682 | { |
1684 | __mark_inode_dirty(inode, I_DIRTY); | 1683 | __mark_inode_dirty(inode, I_DIRTY); |
1685 | } | 1684 | } |
1686 | 1685 | ||
1687 | static inline void mark_inode_dirty_sync(struct inode *inode) | 1686 | static inline void mark_inode_dirty_sync(struct inode *inode) |
1688 | { | 1687 | { |
1689 | __mark_inode_dirty(inode, I_DIRTY_SYNC); | 1688 | __mark_inode_dirty(inode, I_DIRTY_SYNC); |
1690 | } | 1689 | } |
1691 | 1690 | ||
1692 | /** | 1691 | /** |
1693 | * inc_nlink - directly increment an inode's link count | 1692 | * inc_nlink - directly increment an inode's link count |
1694 | * @inode: inode | 1693 | * @inode: inode |
1695 | * | 1694 | * |
1696 | * This is a low-level filesystem helper to replace any | 1695 | * This is a low-level filesystem helper to replace any |
1697 | * direct filesystem manipulation of i_nlink. Currently, | 1696 | * direct filesystem manipulation of i_nlink. Currently, |
1698 | * it is only here for parity with dec_nlink(). | 1697 | * it is only here for parity with dec_nlink(). |
1699 | */ | 1698 | */ |
1700 | static inline void inc_nlink(struct inode *inode) | 1699 | static inline void inc_nlink(struct inode *inode) |
1701 | { | 1700 | { |
1702 | inode->i_nlink++; | 1701 | inode->i_nlink++; |
1703 | } | 1702 | } |
1704 | 1703 | ||
1705 | static inline void inode_inc_link_count(struct inode *inode) | 1704 | static inline void inode_inc_link_count(struct inode *inode) |
1706 | { | 1705 | { |
1707 | inc_nlink(inode); | 1706 | inc_nlink(inode); |
1708 | mark_inode_dirty(inode); | 1707 | mark_inode_dirty(inode); |
1709 | } | 1708 | } |
1710 | 1709 | ||
1711 | /** | 1710 | /** |
1712 | * drop_nlink - directly drop an inode's link count | 1711 | * drop_nlink - directly drop an inode's link count |
1713 | * @inode: inode | 1712 | * @inode: inode |
1714 | * | 1713 | * |
1715 | * This is a low-level filesystem helper to replace any | 1714 | * This is a low-level filesystem helper to replace any |
1716 | * direct filesystem manipulation of i_nlink. In cases | 1715 | * direct filesystem manipulation of i_nlink. In cases |
1717 | * where we are attempting to track writes to the | 1716 | * where we are attempting to track writes to the |
1718 | * filesystem, a decrement to zero means an imminent | 1717 | * filesystem, a decrement to zero means an imminent |
1719 | * write when the file is truncated and actually unlinked | 1718 | * write when the file is truncated and actually unlinked |
1720 | * on the filesystem. | 1719 | * on the filesystem. |
1721 | */ | 1720 | */ |
1722 | static inline void drop_nlink(struct inode *inode) | 1721 | static inline void drop_nlink(struct inode *inode) |
1723 | { | 1722 | { |
1724 | inode->i_nlink--; | 1723 | inode->i_nlink--; |
1725 | } | 1724 | } |
1726 | 1725 | ||
1727 | /** | 1726 | /** |
1728 | * clear_nlink - directly zero an inode's link count | 1727 | * clear_nlink - directly zero an inode's link count |
1729 | * @inode: inode | 1728 | * @inode: inode |
1730 | * | 1729 | * |
1731 | * This is a low-level filesystem helper to replace any | 1730 | * This is a low-level filesystem helper to replace any |
1732 | * direct filesystem manipulation of i_nlink. See | 1731 | * direct filesystem manipulation of i_nlink. See |
1733 | * drop_nlink() for why we care about i_nlink hitting zero. | 1732 | * drop_nlink() for why we care about i_nlink hitting zero. |
1734 | */ | 1733 | */ |
1735 | static inline void clear_nlink(struct inode *inode) | 1734 | static inline void clear_nlink(struct inode *inode) |
1736 | { | 1735 | { |
1737 | inode->i_nlink = 0; | 1736 | inode->i_nlink = 0; |
1738 | } | 1737 | } |
1739 | 1738 | ||
1740 | static inline void inode_dec_link_count(struct inode *inode) | 1739 | static inline void inode_dec_link_count(struct inode *inode) |
1741 | { | 1740 | { |
1742 | drop_nlink(inode); | 1741 | drop_nlink(inode); |
1743 | mark_inode_dirty(inode); | 1742 | mark_inode_dirty(inode); |
1744 | } | 1743 | } |
1745 | 1744 | ||
1746 | /** | 1745 | /** |
1747 | * inode_inc_iversion - increments i_version | 1746 | * inode_inc_iversion - increments i_version |
1748 | * @inode: inode that need to be updated | 1747 | * @inode: inode that need to be updated |
1749 | * | 1748 | * |
1750 | * Every time the inode is modified, the i_version field will be incremented. | 1749 | * Every time the inode is modified, the i_version field will be incremented. |
1751 | * The filesystem has to be mounted with i_version flag | 1750 | * The filesystem has to be mounted with i_version flag |
1752 | */ | 1751 | */ |
1753 | 1752 | ||
1754 | static inline void inode_inc_iversion(struct inode *inode) | 1753 | static inline void inode_inc_iversion(struct inode *inode) |
1755 | { | 1754 | { |
1756 | spin_lock(&inode->i_lock); | 1755 | spin_lock(&inode->i_lock); |
1757 | inode->i_version++; | 1756 | inode->i_version++; |
1758 | spin_unlock(&inode->i_lock); | 1757 | spin_unlock(&inode->i_lock); |
1759 | } | 1758 | } |
1760 | 1759 | ||
1761 | extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); | 1760 | extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); |
1762 | static inline void file_accessed(struct file *file) | 1761 | static inline void file_accessed(struct file *file) |
1763 | { | 1762 | { |
1764 | if (!(file->f_flags & O_NOATIME)) | 1763 | if (!(file->f_flags & O_NOATIME)) |
1765 | touch_atime(file->f_path.mnt, file->f_path.dentry); | 1764 | touch_atime(file->f_path.mnt, file->f_path.dentry); |
1766 | } | 1765 | } |
1767 | 1766 | ||
1768 | int sync_inode(struct inode *inode, struct writeback_control *wbc); | 1767 | int sync_inode(struct inode *inode, struct writeback_control *wbc); |
1769 | int sync_inode_metadata(struct inode *inode, int wait); | 1768 | int sync_inode_metadata(struct inode *inode, int wait); |
1770 | 1769 | ||
1771 | struct file_system_type { | 1770 | struct file_system_type { |
1772 | const char *name; | 1771 | const char *name; |
1773 | int fs_flags; | 1772 | int fs_flags; |
1774 | int (*get_sb) (struct file_system_type *, int, | 1773 | int (*get_sb) (struct file_system_type *, int, |
1775 | const char *, void *, struct vfsmount *); | 1774 | const char *, void *, struct vfsmount *); |
1776 | struct dentry *(*mount) (struct file_system_type *, int, | 1775 | struct dentry *(*mount) (struct file_system_type *, int, |
1777 | const char *, void *); | 1776 | const char *, void *); |
1778 | void (*kill_sb) (struct super_block *); | 1777 | void (*kill_sb) (struct super_block *); |
1779 | struct module *owner; | 1778 | struct module *owner; |
1780 | struct file_system_type * next; | 1779 | struct file_system_type * next; |
1781 | struct list_head fs_supers; | 1780 | struct list_head fs_supers; |
1782 | 1781 | ||
1783 | struct lock_class_key s_lock_key; | 1782 | struct lock_class_key s_lock_key; |
1784 | struct lock_class_key s_umount_key; | 1783 | struct lock_class_key s_umount_key; |
1785 | struct lock_class_key s_vfs_rename_key; | 1784 | struct lock_class_key s_vfs_rename_key; |
1786 | 1785 | ||
1787 | struct lock_class_key i_lock_key; | 1786 | struct lock_class_key i_lock_key; |
1788 | struct lock_class_key i_mutex_key; | 1787 | struct lock_class_key i_mutex_key; |
1789 | struct lock_class_key i_mutex_dir_key; | 1788 | struct lock_class_key i_mutex_dir_key; |
1790 | struct lock_class_key i_alloc_sem_key; | 1789 | struct lock_class_key i_alloc_sem_key; |
1791 | }; | 1790 | }; |
1792 | 1791 | ||
1793 | extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, | 1792 | extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, |
1794 | void *data, int (*fill_super)(struct super_block *, void *, int)); | 1793 | void *data, int (*fill_super)(struct super_block *, void *, int)); |
1795 | extern struct dentry *mount_bdev(struct file_system_type *fs_type, | 1794 | extern struct dentry *mount_bdev(struct file_system_type *fs_type, |
1796 | int flags, const char *dev_name, void *data, | 1795 | int flags, const char *dev_name, void *data, |
1797 | int (*fill_super)(struct super_block *, void *, int)); | 1796 | int (*fill_super)(struct super_block *, void *, int)); |
1798 | extern int get_sb_bdev(struct file_system_type *fs_type, | 1797 | extern int get_sb_bdev(struct file_system_type *fs_type, |
1799 | int flags, const char *dev_name, void *data, | 1798 | int flags, const char *dev_name, void *data, |
1800 | int (*fill_super)(struct super_block *, void *, int), | 1799 | int (*fill_super)(struct super_block *, void *, int), |
1801 | struct vfsmount *mnt); | 1800 | struct vfsmount *mnt); |
1802 | extern struct dentry *mount_single(struct file_system_type *fs_type, | 1801 | extern struct dentry *mount_single(struct file_system_type *fs_type, |
1803 | int flags, void *data, | 1802 | int flags, void *data, |
1804 | int (*fill_super)(struct super_block *, void *, int)); | 1803 | int (*fill_super)(struct super_block *, void *, int)); |
1805 | extern int get_sb_single(struct file_system_type *fs_type, | 1804 | extern int get_sb_single(struct file_system_type *fs_type, |
1806 | int flags, void *data, | 1805 | int flags, void *data, |
1807 | int (*fill_super)(struct super_block *, void *, int), | 1806 | int (*fill_super)(struct super_block *, void *, int), |
1808 | struct vfsmount *mnt); | 1807 | struct vfsmount *mnt); |
1809 | extern struct dentry *mount_nodev(struct file_system_type *fs_type, | 1808 | extern struct dentry *mount_nodev(struct file_system_type *fs_type, |
1810 | int flags, void *data, | 1809 | int flags, void *data, |
1811 | int (*fill_super)(struct super_block *, void *, int)); | 1810 | int (*fill_super)(struct super_block *, void *, int)); |
1812 | extern int get_sb_nodev(struct file_system_type *fs_type, | 1811 | extern int get_sb_nodev(struct file_system_type *fs_type, |
1813 | int flags, void *data, | 1812 | int flags, void *data, |
1814 | int (*fill_super)(struct super_block *, void *, int), | 1813 | int (*fill_super)(struct super_block *, void *, int), |
1815 | struct vfsmount *mnt); | 1814 | struct vfsmount *mnt); |
1816 | void generic_shutdown_super(struct super_block *sb); | 1815 | void generic_shutdown_super(struct super_block *sb); |
1817 | void kill_block_super(struct super_block *sb); | 1816 | void kill_block_super(struct super_block *sb); |
1818 | void kill_anon_super(struct super_block *sb); | 1817 | void kill_anon_super(struct super_block *sb); |
1819 | void kill_litter_super(struct super_block *sb); | 1818 | void kill_litter_super(struct super_block *sb); |
1820 | void deactivate_super(struct super_block *sb); | 1819 | void deactivate_super(struct super_block *sb); |
1821 | void deactivate_locked_super(struct super_block *sb); | 1820 | void deactivate_locked_super(struct super_block *sb); |
1822 | int set_anon_super(struct super_block *s, void *data); | 1821 | int set_anon_super(struct super_block *s, void *data); |
1823 | struct super_block *sget(struct file_system_type *type, | 1822 | struct super_block *sget(struct file_system_type *type, |
1824 | int (*test)(struct super_block *,void *), | 1823 | int (*test)(struct super_block *,void *), |
1825 | int (*set)(struct super_block *,void *), | 1824 | int (*set)(struct super_block *,void *), |
1826 | void *data); | 1825 | void *data); |
1827 | extern struct dentry *mount_pseudo(struct file_system_type *, char *, | 1826 | extern struct dentry *mount_pseudo(struct file_system_type *, char *, |
1828 | const struct super_operations *ops, unsigned long); | 1827 | const struct super_operations *ops, unsigned long); |
1829 | extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); | 1828 | extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); |
1830 | 1829 | ||
1831 | static inline void sb_mark_dirty(struct super_block *sb) | 1830 | static inline void sb_mark_dirty(struct super_block *sb) |
1832 | { | 1831 | { |
1833 | sb->s_dirt = 1; | 1832 | sb->s_dirt = 1; |
1834 | } | 1833 | } |
1835 | static inline void sb_mark_clean(struct super_block *sb) | 1834 | static inline void sb_mark_clean(struct super_block *sb) |
1836 | { | 1835 | { |
1837 | sb->s_dirt = 0; | 1836 | sb->s_dirt = 0; |
1838 | } | 1837 | } |
1839 | static inline int sb_is_dirty(struct super_block *sb) | 1838 | static inline int sb_is_dirty(struct super_block *sb) |
1840 | { | 1839 | { |
1841 | return sb->s_dirt; | 1840 | return sb->s_dirt; |
1842 | } | 1841 | } |
1843 | 1842 | ||
1844 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ | 1843 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ |
1845 | #define fops_get(fops) \ | 1844 | #define fops_get(fops) \ |
1846 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) | 1845 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) |
1847 | #define fops_put(fops) \ | 1846 | #define fops_put(fops) \ |
1848 | do { if (fops) module_put((fops)->owner); } while(0) | 1847 | do { if (fops) module_put((fops)->owner); } while(0) |
1849 | 1848 | ||
1850 | extern int register_filesystem(struct file_system_type *); | 1849 | extern int register_filesystem(struct file_system_type *); |
1851 | extern int unregister_filesystem(struct file_system_type *); | 1850 | extern int unregister_filesystem(struct file_system_type *); |
1852 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); | 1851 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); |
1853 | #define kern_mount(type) kern_mount_data(type, NULL) | 1852 | #define kern_mount(type) kern_mount_data(type, NULL) |
1854 | extern int may_umount_tree(struct vfsmount *); | 1853 | extern int may_umount_tree(struct vfsmount *); |
1855 | extern int may_umount(struct vfsmount *); | 1854 | extern int may_umount(struct vfsmount *); |
1856 | extern long do_mount(char *, char *, char *, unsigned long, void *); | 1855 | extern long do_mount(char *, char *, char *, unsigned long, void *); |
1857 | extern struct vfsmount *collect_mounts(struct path *); | 1856 | extern struct vfsmount *collect_mounts(struct path *); |
1858 | extern void drop_collected_mounts(struct vfsmount *); | 1857 | extern void drop_collected_mounts(struct vfsmount *); |
1859 | extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, | 1858 | extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, |
1860 | struct vfsmount *); | 1859 | struct vfsmount *); |
1861 | extern int vfs_statfs(struct path *, struct kstatfs *); | 1860 | extern int vfs_statfs(struct path *, struct kstatfs *); |
1862 | extern int statfs_by_dentry(struct dentry *, struct kstatfs *); | 1861 | extern int statfs_by_dentry(struct dentry *, struct kstatfs *); |
1863 | extern int freeze_super(struct super_block *super); | 1862 | extern int freeze_super(struct super_block *super); |
1864 | extern int thaw_super(struct super_block *super); | 1863 | extern int thaw_super(struct super_block *super); |
1865 | 1864 | ||
1866 | extern int current_umask(void); | 1865 | extern int current_umask(void); |
1867 | 1866 | ||
1868 | /* /sys/fs */ | 1867 | /* /sys/fs */ |
1869 | extern struct kobject *fs_kobj; | 1868 | extern struct kobject *fs_kobj; |
1870 | 1869 | ||
1871 | #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) | 1870 | #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) |
1872 | extern int rw_verify_area(int, struct file *, loff_t *, size_t); | 1871 | extern int rw_verify_area(int, struct file *, loff_t *, size_t); |
1873 | 1872 | ||
1874 | #define FLOCK_VERIFY_READ 1 | 1873 | #define FLOCK_VERIFY_READ 1 |
1875 | #define FLOCK_VERIFY_WRITE 2 | 1874 | #define FLOCK_VERIFY_WRITE 2 |
1876 | 1875 | ||
1877 | #ifdef CONFIG_FILE_LOCKING | 1876 | #ifdef CONFIG_FILE_LOCKING |
1878 | extern int locks_mandatory_locked(struct inode *); | 1877 | extern int locks_mandatory_locked(struct inode *); |
1879 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); | 1878 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); |
1880 | 1879 | ||
1881 | /* | 1880 | /* |
1882 | * Candidates for mandatory locking have the setgid bit set | 1881 | * Candidates for mandatory locking have the setgid bit set |
1883 | * but no group execute bit - an otherwise meaningless combination. | 1882 | * but no group execute bit - an otherwise meaningless combination. |
1884 | */ | 1883 | */ |
1885 | 1884 | ||
1886 | static inline int __mandatory_lock(struct inode *ino) | 1885 | static inline int __mandatory_lock(struct inode *ino) |
1887 | { | 1886 | { |
1888 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; | 1887 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; |
1889 | } | 1888 | } |
1890 | 1889 | ||
1891 | /* | 1890 | /* |
1892 | * ... and these candidates should be on MS_MANDLOCK mounted fs, | 1891 | * ... and these candidates should be on MS_MANDLOCK mounted fs, |
1893 | * otherwise these will be advisory locks | 1892 | * otherwise these will be advisory locks |
1894 | */ | 1893 | */ |
1895 | 1894 | ||
1896 | static inline int mandatory_lock(struct inode *ino) | 1895 | static inline int mandatory_lock(struct inode *ino) |
1897 | { | 1896 | { |
1898 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); | 1897 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); |
1899 | } | 1898 | } |
1900 | 1899 | ||
1901 | static inline int locks_verify_locked(struct inode *inode) | 1900 | static inline int locks_verify_locked(struct inode *inode) |
1902 | { | 1901 | { |
1903 | if (mandatory_lock(inode)) | 1902 | if (mandatory_lock(inode)) |
1904 | return locks_mandatory_locked(inode); | 1903 | return locks_mandatory_locked(inode); |
1905 | return 0; | 1904 | return 0; |
1906 | } | 1905 | } |
1907 | 1906 | ||
1908 | static inline int locks_verify_truncate(struct inode *inode, | 1907 | static inline int locks_verify_truncate(struct inode *inode, |
1909 | struct file *filp, | 1908 | struct file *filp, |
1910 | loff_t size) | 1909 | loff_t size) |
1911 | { | 1910 | { |
1912 | if (inode->i_flock && mandatory_lock(inode)) | 1911 | if (inode->i_flock && mandatory_lock(inode)) |
1913 | return locks_mandatory_area( | 1912 | return locks_mandatory_area( |
1914 | FLOCK_VERIFY_WRITE, inode, filp, | 1913 | FLOCK_VERIFY_WRITE, inode, filp, |
1915 | size < inode->i_size ? size : inode->i_size, | 1914 | size < inode->i_size ? size : inode->i_size, |
1916 | (size < inode->i_size ? inode->i_size - size | 1915 | (size < inode->i_size ? inode->i_size - size |
1917 | : size - inode->i_size) | 1916 | : size - inode->i_size) |
1918 | ); | 1917 | ); |
1919 | return 0; | 1918 | return 0; |
1920 | } | 1919 | } |
1921 | 1920 | ||
1922 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1921 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1923 | { | 1922 | { |
1924 | if (inode->i_flock) | 1923 | if (inode->i_flock) |
1925 | return __break_lease(inode, mode); | 1924 | return __break_lease(inode, mode); |
1926 | return 0; | 1925 | return 0; |
1927 | } | 1926 | } |
1928 | #else /* !CONFIG_FILE_LOCKING */ | 1927 | #else /* !CONFIG_FILE_LOCKING */ |
1929 | static inline int locks_mandatory_locked(struct inode *inode) | 1928 | static inline int locks_mandatory_locked(struct inode *inode) |
1930 | { | 1929 | { |
1931 | return 0; | 1930 | return 0; |
1932 | } | 1931 | } |
1933 | 1932 | ||
1934 | static inline int locks_mandatory_area(int rw, struct inode *inode, | 1933 | static inline int locks_mandatory_area(int rw, struct inode *inode, |
1935 | struct file *filp, loff_t offset, | 1934 | struct file *filp, loff_t offset, |
1936 | size_t count) | 1935 | size_t count) |
1937 | { | 1936 | { |
1938 | return 0; | 1937 | return 0; |
1939 | } | 1938 | } |
1940 | 1939 | ||
1941 | static inline int __mandatory_lock(struct inode *inode) | 1940 | static inline int __mandatory_lock(struct inode *inode) |
1942 | { | 1941 | { |
1943 | return 0; | 1942 | return 0; |
1944 | } | 1943 | } |
1945 | 1944 | ||
1946 | static inline int mandatory_lock(struct inode *inode) | 1945 | static inline int mandatory_lock(struct inode *inode) |
1947 | { | 1946 | { |
1948 | return 0; | 1947 | return 0; |
1949 | } | 1948 | } |
1950 | 1949 | ||
1951 | static inline int locks_verify_locked(struct inode *inode) | 1950 | static inline int locks_verify_locked(struct inode *inode) |
1952 | { | 1951 | { |
1953 | return 0; | 1952 | return 0; |
1954 | } | 1953 | } |
1955 | 1954 | ||
1956 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, | 1955 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, |
1957 | size_t size) | 1956 | size_t size) |
1958 | { | 1957 | { |
1959 | return 0; | 1958 | return 0; |
1960 | } | 1959 | } |
1961 | 1960 | ||
1962 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1961 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1963 | { | 1962 | { |
1964 | return 0; | 1963 | return 0; |
1965 | } | 1964 | } |
1966 | 1965 | ||
1967 | #endif /* CONFIG_FILE_LOCKING */ | 1966 | #endif /* CONFIG_FILE_LOCKING */ |
1968 | 1967 | ||
1969 | /* fs/open.c */ | 1968 | /* fs/open.c */ |
1970 | 1969 | ||
1971 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, | 1970 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, |
1972 | struct file *filp); | 1971 | struct file *filp); |
1973 | extern int do_fallocate(struct file *file, int mode, loff_t offset, | 1972 | extern int do_fallocate(struct file *file, int mode, loff_t offset, |
1974 | loff_t len); | 1973 | loff_t len); |
1975 | extern long do_sys_open(int dfd, const char __user *filename, int flags, | 1974 | extern long do_sys_open(int dfd, const char __user *filename, int flags, |
1976 | int mode); | 1975 | int mode); |
1977 | extern struct file *filp_open(const char *, int, int); | 1976 | extern struct file *filp_open(const char *, int, int); |
1978 | extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, | 1977 | extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, |
1979 | const struct cred *); | 1978 | const struct cred *); |
1980 | extern int filp_close(struct file *, fl_owner_t id); | 1979 | extern int filp_close(struct file *, fl_owner_t id); |
1981 | extern char * getname(const char __user *); | 1980 | extern char * getname(const char __user *); |
1982 | 1981 | ||
1983 | /* fs/ioctl.c */ | 1982 | /* fs/ioctl.c */ |
1984 | 1983 | ||
1985 | extern int ioctl_preallocate(struct file *filp, void __user *argp); | 1984 | extern int ioctl_preallocate(struct file *filp, void __user *argp); |
1986 | 1985 | ||
1987 | /* fs/dcache.c */ | 1986 | /* fs/dcache.c */ |
1988 | extern void __init vfs_caches_init_early(void); | 1987 | extern void __init vfs_caches_init_early(void); |
1989 | extern void __init vfs_caches_init(unsigned long); | 1988 | extern void __init vfs_caches_init(unsigned long); |
1990 | 1989 | ||
1991 | extern struct kmem_cache *names_cachep; | 1990 | extern struct kmem_cache *names_cachep; |
1992 | 1991 | ||
1993 | #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) | 1992 | #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) |
1994 | #define __getname() __getname_gfp(GFP_KERNEL) | 1993 | #define __getname() __getname_gfp(GFP_KERNEL) |
1995 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) | 1994 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) |
1996 | #ifndef CONFIG_AUDITSYSCALL | 1995 | #ifndef CONFIG_AUDITSYSCALL |
1997 | #define putname(name) __putname(name) | 1996 | #define putname(name) __putname(name) |
1998 | #else | 1997 | #else |
1999 | extern void putname(const char *name); | 1998 | extern void putname(const char *name); |
2000 | #endif | 1999 | #endif |
2001 | 2000 | ||
2002 | #ifdef CONFIG_BLOCK | 2001 | #ifdef CONFIG_BLOCK |
2003 | extern int register_blkdev(unsigned int, const char *); | 2002 | extern int register_blkdev(unsigned int, const char *); |
2004 | extern void unregister_blkdev(unsigned int, const char *); | 2003 | extern void unregister_blkdev(unsigned int, const char *); |
2005 | extern struct block_device *bdget(dev_t); | 2004 | extern struct block_device *bdget(dev_t); |
2006 | extern struct block_device *bdgrab(struct block_device *bdev); | 2005 | extern struct block_device *bdgrab(struct block_device *bdev); |
2007 | extern void bd_set_size(struct block_device *, loff_t size); | 2006 | extern void bd_set_size(struct block_device *, loff_t size); |
2008 | extern void bd_forget(struct inode *inode); | 2007 | extern void bd_forget(struct inode *inode); |
2009 | extern void bdput(struct block_device *); | 2008 | extern void bdput(struct block_device *); |
2010 | extern struct block_device *open_by_devnum(dev_t, fmode_t); | 2009 | extern struct block_device *open_by_devnum(dev_t, fmode_t); |
2011 | extern void invalidate_bdev(struct block_device *); | 2010 | extern void invalidate_bdev(struct block_device *); |
2012 | extern int sync_blockdev(struct block_device *bdev); | 2011 | extern int sync_blockdev(struct block_device *bdev); |
2013 | extern struct super_block *freeze_bdev(struct block_device *); | 2012 | extern struct super_block *freeze_bdev(struct block_device *); |
2014 | extern void emergency_thaw_all(void); | 2013 | extern void emergency_thaw_all(void); |
2015 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | 2014 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); |
2016 | extern int fsync_bdev(struct block_device *); | 2015 | extern int fsync_bdev(struct block_device *); |
2017 | #else | 2016 | #else |
2018 | static inline void bd_forget(struct inode *inode) {} | 2017 | static inline void bd_forget(struct inode *inode) {} |
2019 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | 2018 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } |
2020 | static inline void invalidate_bdev(struct block_device *bdev) {} | 2019 | static inline void invalidate_bdev(struct block_device *bdev) {} |
2021 | 2020 | ||
2022 | static inline struct super_block *freeze_bdev(struct block_device *sb) | 2021 | static inline struct super_block *freeze_bdev(struct block_device *sb) |
2023 | { | 2022 | { |
2024 | return NULL; | 2023 | return NULL; |
2025 | } | 2024 | } |
2026 | 2025 | ||
2027 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) | 2026 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
2028 | { | 2027 | { |
2029 | return 0; | 2028 | return 0; |
2030 | } | 2029 | } |
2031 | #endif | 2030 | #endif |
2032 | extern int sync_filesystem(struct super_block *); | 2031 | extern int sync_filesystem(struct super_block *); |
2033 | extern const struct file_operations def_blk_fops; | 2032 | extern const struct file_operations def_blk_fops; |
2034 | extern const struct file_operations def_chr_fops; | 2033 | extern const struct file_operations def_chr_fops; |
2035 | extern const struct file_operations bad_sock_fops; | 2034 | extern const struct file_operations bad_sock_fops; |
2036 | extern const struct file_operations def_fifo_fops; | 2035 | extern const struct file_operations def_fifo_fops; |
2037 | #ifdef CONFIG_BLOCK | 2036 | #ifdef CONFIG_BLOCK |
2038 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); | 2037 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); |
2039 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); | 2038 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); |
2040 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); | 2039 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); |
2041 | extern int blkdev_get(struct block_device *, fmode_t); | 2040 | extern int blkdev_get(struct block_device *, fmode_t); |
2042 | extern int blkdev_put(struct block_device *, fmode_t); | 2041 | extern int blkdev_put(struct block_device *, fmode_t); |
2043 | extern int bd_claim(struct block_device *, void *); | 2042 | extern int bd_claim(struct block_device *, void *); |
2044 | extern void bd_release(struct block_device *); | 2043 | extern void bd_release(struct block_device *); |
2045 | #ifdef CONFIG_SYSFS | 2044 | #ifdef CONFIG_SYSFS |
2046 | extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); | 2045 | extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); |
2047 | extern void bd_release_from_disk(struct block_device *, struct gendisk *); | 2046 | extern void bd_release_from_disk(struct block_device *, struct gendisk *); |
2048 | #else | 2047 | #else |
2049 | #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) | 2048 | #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) |
2050 | #define bd_release_from_disk(bdev, disk) bd_release(bdev) | 2049 | #define bd_release_from_disk(bdev, disk) bd_release(bdev) |
2051 | #endif | 2050 | #endif |
2052 | #endif | 2051 | #endif |
2053 | 2052 | ||
2054 | /* fs/char_dev.c */ | 2053 | /* fs/char_dev.c */ |
2055 | #define CHRDEV_MAJOR_HASH_SIZE 255 | 2054 | #define CHRDEV_MAJOR_HASH_SIZE 255 |
2056 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); | 2055 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); |
2057 | extern int register_chrdev_region(dev_t, unsigned, const char *); | 2056 | extern int register_chrdev_region(dev_t, unsigned, const char *); |
2058 | extern int __register_chrdev(unsigned int major, unsigned int baseminor, | 2057 | extern int __register_chrdev(unsigned int major, unsigned int baseminor, |
2059 | unsigned int count, const char *name, | 2058 | unsigned int count, const char *name, |
2060 | const struct file_operations *fops); | 2059 | const struct file_operations *fops); |
2061 | extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, | 2060 | extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, |
2062 | unsigned int count, const char *name); | 2061 | unsigned int count, const char *name); |
2063 | extern void unregister_chrdev_region(dev_t, unsigned); | 2062 | extern void unregister_chrdev_region(dev_t, unsigned); |
2064 | extern void chrdev_show(struct seq_file *,off_t); | 2063 | extern void chrdev_show(struct seq_file *,off_t); |
2065 | 2064 | ||
2066 | static inline int register_chrdev(unsigned int major, const char *name, | 2065 | static inline int register_chrdev(unsigned int major, const char *name, |
2067 | const struct file_operations *fops) | 2066 | const struct file_operations *fops) |
2068 | { | 2067 | { |
2069 | return __register_chrdev(major, 0, 256, name, fops); | 2068 | return __register_chrdev(major, 0, 256, name, fops); |
2070 | } | 2069 | } |
2071 | 2070 | ||
2072 | static inline void unregister_chrdev(unsigned int major, const char *name) | 2071 | static inline void unregister_chrdev(unsigned int major, const char *name) |
2073 | { | 2072 | { |
2074 | __unregister_chrdev(major, 0, 256, name); | 2073 | __unregister_chrdev(major, 0, 256, name); |
2075 | } | 2074 | } |
2076 | 2075 | ||
2077 | /* fs/block_dev.c */ | 2076 | /* fs/block_dev.c */ |
2078 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ | 2077 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ |
2079 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ | 2078 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ |
2080 | 2079 | ||
2081 | #ifdef CONFIG_BLOCK | 2080 | #ifdef CONFIG_BLOCK |
2082 | #define BLKDEV_MAJOR_HASH_SIZE 255 | 2081 | #define BLKDEV_MAJOR_HASH_SIZE 255 |
2083 | extern const char *__bdevname(dev_t, char *buffer); | 2082 | extern const char *__bdevname(dev_t, char *buffer); |
2084 | extern const char *bdevname(struct block_device *bdev, char *buffer); | 2083 | extern const char *bdevname(struct block_device *bdev, char *buffer); |
2085 | extern struct block_device *lookup_bdev(const char *); | 2084 | extern struct block_device *lookup_bdev(const char *); |
2086 | extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); | 2085 | extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); |
2087 | extern void close_bdev_exclusive(struct block_device *, fmode_t); | 2086 | extern void close_bdev_exclusive(struct block_device *, fmode_t); |
2088 | extern void blkdev_show(struct seq_file *,off_t); | 2087 | extern void blkdev_show(struct seq_file *,off_t); |
2089 | 2088 | ||
2090 | #else | 2089 | #else |
2091 | #define BLKDEV_MAJOR_HASH_SIZE 0 | 2090 | #define BLKDEV_MAJOR_HASH_SIZE 0 |
2092 | #endif | 2091 | #endif |
2093 | 2092 | ||
2094 | extern void init_special_inode(struct inode *, umode_t, dev_t); | 2093 | extern void init_special_inode(struct inode *, umode_t, dev_t); |
2095 | 2094 | ||
2096 | /* Invalid inode operations -- fs/bad_inode.c */ | 2095 | /* Invalid inode operations -- fs/bad_inode.c */ |
2097 | extern void make_bad_inode(struct inode *); | 2096 | extern void make_bad_inode(struct inode *); |
2098 | extern int is_bad_inode(struct inode *); | 2097 | extern int is_bad_inode(struct inode *); |
2099 | 2098 | ||
2100 | extern const struct file_operations read_pipefifo_fops; | 2099 | extern const struct file_operations read_pipefifo_fops; |
2101 | extern const struct file_operations write_pipefifo_fops; | 2100 | extern const struct file_operations write_pipefifo_fops; |
2102 | extern const struct file_operations rdwr_pipefifo_fops; | 2101 | extern const struct file_operations rdwr_pipefifo_fops; |
2103 | 2102 | ||
2104 | extern int fs_may_remount_ro(struct super_block *); | 2103 | extern int fs_may_remount_ro(struct super_block *); |
2105 | 2104 | ||
2106 | #ifdef CONFIG_BLOCK | 2105 | #ifdef CONFIG_BLOCK |
2107 | /* | 2106 | /* |
2108 | * return READ, READA, or WRITE | 2107 | * return READ, READA, or WRITE |
2109 | */ | 2108 | */ |
2110 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) | 2109 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) |
2111 | 2110 | ||
2112 | /* | 2111 | /* |
2113 | * return data direction, READ or WRITE | 2112 | * return data direction, READ or WRITE |
2114 | */ | 2113 | */ |
2115 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) | 2114 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) |
2116 | 2115 | ||
2117 | extern void check_disk_size_change(struct gendisk *disk, | 2116 | extern void check_disk_size_change(struct gendisk *disk, |
2118 | struct block_device *bdev); | 2117 | struct block_device *bdev); |
2119 | extern int revalidate_disk(struct gendisk *); | 2118 | extern int revalidate_disk(struct gendisk *); |
2120 | extern int check_disk_change(struct block_device *); | 2119 | extern int check_disk_change(struct block_device *); |
2121 | extern int __invalidate_device(struct block_device *); | 2120 | extern int __invalidate_device(struct block_device *); |
2122 | extern int invalidate_partition(struct gendisk *, int); | 2121 | extern int invalidate_partition(struct gendisk *, int); |
2123 | #endif | 2122 | #endif |
2124 | unsigned long invalidate_mapping_pages(struct address_space *mapping, | 2123 | unsigned long invalidate_mapping_pages(struct address_space *mapping, |
2125 | pgoff_t start, pgoff_t end); | 2124 | pgoff_t start, pgoff_t end); |
2126 | 2125 | ||
2127 | static inline void invalidate_remote_inode(struct inode *inode) | 2126 | static inline void invalidate_remote_inode(struct inode *inode) |
2128 | { | 2127 | { |
2129 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 2128 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2130 | S_ISLNK(inode->i_mode)) | 2129 | S_ISLNK(inode->i_mode)) |
2131 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 2130 | invalidate_mapping_pages(inode->i_mapping, 0, -1); |
2132 | } | 2131 | } |
2133 | extern int invalidate_inode_pages2(struct address_space *mapping); | 2132 | extern int invalidate_inode_pages2(struct address_space *mapping); |
2134 | extern int invalidate_inode_pages2_range(struct address_space *mapping, | 2133 | extern int invalidate_inode_pages2_range(struct address_space *mapping, |
2135 | pgoff_t start, pgoff_t end); | 2134 | pgoff_t start, pgoff_t end); |
2136 | extern int write_inode_now(struct inode *, int); | 2135 | extern int write_inode_now(struct inode *, int); |
2137 | extern int filemap_fdatawrite(struct address_space *); | 2136 | extern int filemap_fdatawrite(struct address_space *); |
2138 | extern int filemap_flush(struct address_space *); | 2137 | extern int filemap_flush(struct address_space *); |
2139 | extern int filemap_fdatawait(struct address_space *); | 2138 | extern int filemap_fdatawait(struct address_space *); |
2140 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, | 2139 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, |
2141 | loff_t lend); | 2140 | loff_t lend); |
2142 | extern int filemap_write_and_wait(struct address_space *mapping); | 2141 | extern int filemap_write_and_wait(struct address_space *mapping); |
2143 | extern int filemap_write_and_wait_range(struct address_space *mapping, | 2142 | extern int filemap_write_and_wait_range(struct address_space *mapping, |
2144 | loff_t lstart, loff_t lend); | 2143 | loff_t lstart, loff_t lend); |
2145 | extern int __filemap_fdatawrite_range(struct address_space *mapping, | 2144 | extern int __filemap_fdatawrite_range(struct address_space *mapping, |
2146 | loff_t start, loff_t end, int sync_mode); | 2145 | loff_t start, loff_t end, int sync_mode); |
2147 | extern int filemap_fdatawrite_range(struct address_space *mapping, | 2146 | extern int filemap_fdatawrite_range(struct address_space *mapping, |
2148 | loff_t start, loff_t end); | 2147 | loff_t start, loff_t end); |
2149 | 2148 | ||
2150 | extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, | 2149 | extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, |
2151 | int datasync); | 2150 | int datasync); |
2152 | extern int vfs_fsync(struct file *file, int datasync); | 2151 | extern int vfs_fsync(struct file *file, int datasync); |
2153 | extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); | 2152 | extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); |
2154 | extern void sync_supers(void); | 2153 | extern void sync_supers(void); |
2155 | extern void emergency_sync(void); | 2154 | extern void emergency_sync(void); |
2156 | extern void emergency_remount(void); | 2155 | extern void emergency_remount(void); |
2157 | #ifdef CONFIG_BLOCK | 2156 | #ifdef CONFIG_BLOCK |
2158 | extern sector_t bmap(struct inode *, sector_t); | 2157 | extern sector_t bmap(struct inode *, sector_t); |
2159 | #endif | 2158 | #endif |
2160 | extern int notify_change(struct dentry *, struct iattr *); | 2159 | extern int notify_change(struct dentry *, struct iattr *); |
2161 | extern int inode_permission(struct inode *, int); | 2160 | extern int inode_permission(struct inode *, int); |
2162 | extern int generic_permission(struct inode *, int, | 2161 | extern int generic_permission(struct inode *, int, |
2163 | int (*check_acl)(struct inode *, int)); | 2162 | int (*check_acl)(struct inode *, int)); |
2164 | 2163 | ||
2165 | static inline bool execute_ok(struct inode *inode) | 2164 | static inline bool execute_ok(struct inode *inode) |
2166 | { | 2165 | { |
2167 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); | 2166 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); |
2168 | } | 2167 | } |
2169 | 2168 | ||
2170 | extern int get_write_access(struct inode *); | 2169 | extern int get_write_access(struct inode *); |
2171 | extern int deny_write_access(struct file *); | 2170 | extern int deny_write_access(struct file *); |
2172 | static inline void put_write_access(struct inode * inode) | 2171 | static inline void put_write_access(struct inode * inode) |
2173 | { | 2172 | { |
2174 | atomic_dec(&inode->i_writecount); | 2173 | atomic_dec(&inode->i_writecount); |
2175 | } | 2174 | } |
2176 | static inline void allow_write_access(struct file *file) | 2175 | static inline void allow_write_access(struct file *file) |
2177 | { | 2176 | { |
2178 | if (file) | 2177 | if (file) |
2179 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); | 2178 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); |
2180 | } | 2179 | } |
2181 | extern int do_pipe_flags(int *, int); | 2180 | extern int do_pipe_flags(int *, int); |
2182 | extern struct file *create_read_pipe(struct file *f, int flags); | 2181 | extern struct file *create_read_pipe(struct file *f, int flags); |
2183 | extern struct file *create_write_pipe(int flags); | 2182 | extern struct file *create_write_pipe(int flags); |
2184 | extern void free_write_pipe(struct file *); | 2183 | extern void free_write_pipe(struct file *); |
2185 | 2184 | ||
2186 | extern struct file *do_filp_open(int dfd, const char *pathname, | 2185 | extern struct file *do_filp_open(int dfd, const char *pathname, |
2187 | int open_flag, int mode, int acc_mode); | 2186 | int open_flag, int mode, int acc_mode); |
2188 | extern int may_open(struct path *, int, int); | 2187 | extern int may_open(struct path *, int, int); |
2189 | 2188 | ||
2190 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2189 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
2191 | extern struct file * open_exec(const char *); | 2190 | extern struct file * open_exec(const char *); |
2192 | 2191 | ||
2193 | /* fs/dcache.c -- generic fs support functions */ | 2192 | /* fs/dcache.c -- generic fs support functions */ |
2194 | extern int is_subdir(struct dentry *, struct dentry *); | 2193 | extern int is_subdir(struct dentry *, struct dentry *); |
2195 | extern int path_is_under(struct path *, struct path *); | 2194 | extern int path_is_under(struct path *, struct path *); |
2196 | extern ino_t find_inode_number(struct dentry *, struct qstr *); | 2195 | extern ino_t find_inode_number(struct dentry *, struct qstr *); |
2197 | 2196 | ||
2198 | #include <linux/err.h> | 2197 | #include <linux/err.h> |
2199 | 2198 | ||
2200 | /* needed for stackable file system support */ | 2199 | /* needed for stackable file system support */ |
2201 | extern loff_t default_llseek(struct file *file, loff_t offset, int origin); | 2200 | extern loff_t default_llseek(struct file *file, loff_t offset, int origin); |
2202 | 2201 | ||
2203 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); | 2202 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); |
2204 | 2203 | ||
2205 | extern int inode_init_always(struct super_block *, struct inode *); | 2204 | extern int inode_init_always(struct super_block *, struct inode *); |
2206 | extern void inode_init_once(struct inode *); | 2205 | extern void inode_init_once(struct inode *); |
2207 | extern void ihold(struct inode * inode); | 2206 | extern void ihold(struct inode * inode); |
2208 | extern void iput(struct inode *); | 2207 | extern void iput(struct inode *); |
2209 | extern struct inode * igrab(struct inode *); | 2208 | extern struct inode * igrab(struct inode *); |
2210 | extern ino_t iunique(struct super_block *, ino_t); | 2209 | extern ino_t iunique(struct super_block *, ino_t); |
2211 | extern int inode_needs_sync(struct inode *inode); | 2210 | extern int inode_needs_sync(struct inode *inode); |
2212 | extern int generic_delete_inode(struct inode *inode); | 2211 | extern int generic_delete_inode(struct inode *inode); |
2213 | extern int generic_drop_inode(struct inode *inode); | 2212 | extern int generic_drop_inode(struct inode *inode); |
2214 | 2213 | ||
2215 | extern struct inode *ilookup5_nowait(struct super_block *sb, | 2214 | extern struct inode *ilookup5_nowait(struct super_block *sb, |
2216 | unsigned long hashval, int (*test)(struct inode *, void *), | 2215 | unsigned long hashval, int (*test)(struct inode *, void *), |
2217 | void *data); | 2216 | void *data); |
2218 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, | 2217 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, |
2219 | int (*test)(struct inode *, void *), void *data); | 2218 | int (*test)(struct inode *, void *), void *data); |
2220 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); | 2219 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); |
2221 | 2220 | ||
2222 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); | 2221 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); |
2223 | extern struct inode * iget_locked(struct super_block *, unsigned long); | 2222 | extern struct inode * iget_locked(struct super_block *, unsigned long); |
2224 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); | 2223 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); |
2225 | extern int insert_inode_locked(struct inode *); | 2224 | extern int insert_inode_locked(struct inode *); |
2226 | extern void unlock_new_inode(struct inode *); | 2225 | extern void unlock_new_inode(struct inode *); |
2227 | extern unsigned int get_next_ino(void); | 2226 | extern unsigned int get_next_ino(void); |
2228 | 2227 | ||
2229 | extern void __iget(struct inode * inode); | 2228 | extern void __iget(struct inode * inode); |
2230 | extern void iget_failed(struct inode *); | 2229 | extern void iget_failed(struct inode *); |
2231 | extern void end_writeback(struct inode *); | 2230 | extern void end_writeback(struct inode *); |
2232 | extern void __destroy_inode(struct inode *); | 2231 | extern void __destroy_inode(struct inode *); |
2233 | extern struct inode *new_inode(struct super_block *); | 2232 | extern struct inode *new_inode(struct super_block *); |
2234 | extern int should_remove_suid(struct dentry *); | 2233 | extern int should_remove_suid(struct dentry *); |
2235 | extern int file_remove_suid(struct file *); | 2234 | extern int file_remove_suid(struct file *); |
2236 | 2235 | ||
2237 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); | 2236 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); |
2238 | extern void remove_inode_hash(struct inode *); | 2237 | extern void remove_inode_hash(struct inode *); |
2239 | static inline void insert_inode_hash(struct inode *inode) | 2238 | static inline void insert_inode_hash(struct inode *inode) |
2240 | { | 2239 | { |
2241 | __insert_inode_hash(inode, inode->i_ino); | 2240 | __insert_inode_hash(inode, inode->i_ino); |
2242 | } | 2241 | } |
2243 | extern void inode_sb_list_add(struct inode *inode); | 2242 | extern void inode_sb_list_add(struct inode *inode); |
2244 | 2243 | ||
2245 | #ifdef CONFIG_BLOCK | 2244 | #ifdef CONFIG_BLOCK |
2246 | extern void submit_bio(int, struct bio *); | 2245 | extern void submit_bio(int, struct bio *); |
2247 | extern int bdev_read_only(struct block_device *); | 2246 | extern int bdev_read_only(struct block_device *); |
2248 | #endif | 2247 | #endif |
2249 | extern int set_blocksize(struct block_device *, int); | 2248 | extern int set_blocksize(struct block_device *, int); |
2250 | extern int sb_set_blocksize(struct super_block *, int); | 2249 | extern int sb_set_blocksize(struct super_block *, int); |
2251 | extern int sb_min_blocksize(struct super_block *, int); | 2250 | extern int sb_min_blocksize(struct super_block *, int); |
2252 | 2251 | ||
2253 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); | 2252 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); |
2254 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); | 2253 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); |
2255 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); | 2254 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); |
2256 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); | 2255 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); |
2257 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2256 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2258 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, | 2257 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, |
2259 | loff_t *); | 2258 | loff_t *); |
2260 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2259 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2261 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, | 2260 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, |
2262 | unsigned long *, loff_t, loff_t *, size_t, size_t); | 2261 | unsigned long *, loff_t, loff_t *, size_t, size_t); |
2263 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, | 2262 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, |
2264 | unsigned long, loff_t, loff_t *, size_t, ssize_t); | 2263 | unsigned long, loff_t, loff_t *, size_t, ssize_t); |
2265 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); | 2264 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); |
2266 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); | 2265 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); |
2267 | extern int generic_segment_checks(const struct iovec *iov, | 2266 | extern int generic_segment_checks(const struct iovec *iov, |
2268 | unsigned long *nr_segs, size_t *count, int access_flags); | 2267 | unsigned long *nr_segs, size_t *count, int access_flags); |
2269 | 2268 | ||
2270 | /* fs/block_dev.c */ | 2269 | /* fs/block_dev.c */ |
2271 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2270 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2272 | unsigned long nr_segs, loff_t pos); | 2271 | unsigned long nr_segs, loff_t pos); |
2273 | extern int blkdev_fsync(struct file *filp, int datasync); | 2272 | extern int blkdev_fsync(struct file *filp, int datasync); |
2274 | 2273 | ||
2275 | /* fs/splice.c */ | 2274 | /* fs/splice.c */ |
2276 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, | 2275 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, |
2277 | struct pipe_inode_info *, size_t, unsigned int); | 2276 | struct pipe_inode_info *, size_t, unsigned int); |
2278 | extern ssize_t default_file_splice_read(struct file *, loff_t *, | 2277 | extern ssize_t default_file_splice_read(struct file *, loff_t *, |
2279 | struct pipe_inode_info *, size_t, unsigned int); | 2278 | struct pipe_inode_info *, size_t, unsigned int); |
2280 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, | 2279 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, |
2281 | struct file *, loff_t *, size_t, unsigned int); | 2280 | struct file *, loff_t *, size_t, unsigned int); |
2282 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2281 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
2283 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2282 | struct file *out, loff_t *, size_t len, unsigned int flags); |
2284 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | 2283 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, |
2285 | size_t len, unsigned int flags); | 2284 | size_t len, unsigned int flags); |
2286 | 2285 | ||
2287 | extern void | 2286 | extern void |
2288 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); | 2287 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); |
2289 | extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); | 2288 | extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); |
2290 | extern loff_t no_llseek(struct file *file, loff_t offset, int origin); | 2289 | extern loff_t no_llseek(struct file *file, loff_t offset, int origin); |
2291 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); | 2290 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); |
2292 | extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, | 2291 | extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, |
2293 | int origin); | 2292 | int origin); |
2294 | extern int generic_file_open(struct inode * inode, struct file * filp); | 2293 | extern int generic_file_open(struct inode * inode, struct file * filp); |
2295 | extern int nonseekable_open(struct inode * inode, struct file * filp); | 2294 | extern int nonseekable_open(struct inode * inode, struct file * filp); |
2296 | 2295 | ||
2297 | #ifdef CONFIG_FS_XIP | 2296 | #ifdef CONFIG_FS_XIP |
2298 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, | 2297 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, |
2299 | loff_t *ppos); | 2298 | loff_t *ppos); |
2300 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); | 2299 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); |
2301 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, | 2300 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, |
2302 | size_t len, loff_t *ppos); | 2301 | size_t len, loff_t *ppos); |
2303 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); | 2302 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); |
2304 | #else | 2303 | #else |
2305 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | 2304 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) |
2306 | { | 2305 | { |
2307 | return 0; | 2306 | return 0; |
2308 | } | 2307 | } |
2309 | #endif | 2308 | #endif |
2310 | 2309 | ||
2311 | #ifdef CONFIG_BLOCK | 2310 | #ifdef CONFIG_BLOCK |
2312 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, | 2311 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, |
2313 | loff_t file_offset); | 2312 | loff_t file_offset); |
2314 | 2313 | ||
2315 | enum { | 2314 | enum { |
2316 | /* need locking between buffered and direct access */ | 2315 | /* need locking between buffered and direct access */ |
2317 | DIO_LOCKING = 0x01, | 2316 | DIO_LOCKING = 0x01, |
2318 | 2317 | ||
2319 | /* filesystem does not support filling holes */ | 2318 | /* filesystem does not support filling holes */ |
2320 | DIO_SKIP_HOLES = 0x02, | 2319 | DIO_SKIP_HOLES = 0x02, |
2321 | }; | 2320 | }; |
2322 | 2321 | ||
2323 | void dio_end_io(struct bio *bio, int error); | 2322 | void dio_end_io(struct bio *bio, int error); |
2324 | 2323 | ||
2325 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2324 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
2326 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2325 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
2327 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2326 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
2328 | dio_submit_t submit_io, int flags); | 2327 | dio_submit_t submit_io, int flags); |
2329 | 2328 | ||
2330 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | 2329 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, |
2331 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, | 2330 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, |
2332 | loff_t offset, unsigned long nr_segs, get_block_t get_block, | 2331 | loff_t offset, unsigned long nr_segs, get_block_t get_block, |
2333 | dio_iodone_t end_io) | 2332 | dio_iodone_t end_io) |
2334 | { | 2333 | { |
2335 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2334 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2336 | nr_segs, get_block, end_io, NULL, | 2335 | nr_segs, get_block, end_io, NULL, |
2337 | DIO_LOCKING | DIO_SKIP_HOLES); | 2336 | DIO_LOCKING | DIO_SKIP_HOLES); |
2338 | } | 2337 | } |
2339 | #endif | 2338 | #endif |
2340 | 2339 | ||
2341 | extern const struct file_operations generic_ro_fops; | 2340 | extern const struct file_operations generic_ro_fops; |
2342 | 2341 | ||
2343 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2342 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
2344 | 2343 | ||
2345 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); | 2344 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); |
2346 | extern int vfs_follow_link(struct nameidata *, const char *); | 2345 | extern int vfs_follow_link(struct nameidata *, const char *); |
2347 | extern int page_readlink(struct dentry *, char __user *, int); | 2346 | extern int page_readlink(struct dentry *, char __user *, int); |
2348 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2347 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2349 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2348 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
2350 | extern int __page_symlink(struct inode *inode, const char *symname, int len, | 2349 | extern int __page_symlink(struct inode *inode, const char *symname, int len, |
2351 | int nofs); | 2350 | int nofs); |
2352 | extern int page_symlink(struct inode *inode, const char *symname, int len); | 2351 | extern int page_symlink(struct inode *inode, const char *symname, int len); |
2353 | extern const struct inode_operations page_symlink_inode_operations; | 2352 | extern const struct inode_operations page_symlink_inode_operations; |
2354 | extern int generic_readlink(struct dentry *, char __user *, int); | 2353 | extern int generic_readlink(struct dentry *, char __user *, int); |
2355 | extern void generic_fillattr(struct inode *, struct kstat *); | 2354 | extern void generic_fillattr(struct inode *, struct kstat *); |
2356 | extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2355 | extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2357 | void __inode_add_bytes(struct inode *inode, loff_t bytes); | 2356 | void __inode_add_bytes(struct inode *inode, loff_t bytes); |
2358 | void inode_add_bytes(struct inode *inode, loff_t bytes); | 2357 | void inode_add_bytes(struct inode *inode, loff_t bytes); |
2359 | void inode_sub_bytes(struct inode *inode, loff_t bytes); | 2358 | void inode_sub_bytes(struct inode *inode, loff_t bytes); |
2360 | loff_t inode_get_bytes(struct inode *inode); | 2359 | loff_t inode_get_bytes(struct inode *inode); |
2361 | void inode_set_bytes(struct inode *inode, loff_t bytes); | 2360 | void inode_set_bytes(struct inode *inode, loff_t bytes); |
2362 | 2361 | ||
2363 | extern int vfs_readdir(struct file *, filldir_t, void *); | 2362 | extern int vfs_readdir(struct file *, filldir_t, void *); |
2364 | 2363 | ||
2365 | extern int vfs_stat(const char __user *, struct kstat *); | 2364 | extern int vfs_stat(const char __user *, struct kstat *); |
2366 | extern int vfs_lstat(const char __user *, struct kstat *); | 2365 | extern int vfs_lstat(const char __user *, struct kstat *); |
2367 | extern int vfs_fstat(unsigned int, struct kstat *); | 2366 | extern int vfs_fstat(unsigned int, struct kstat *); |
2368 | extern int vfs_fstatat(int , const char __user *, struct kstat *, int); | 2367 | extern int vfs_fstatat(int , const char __user *, struct kstat *, int); |
2369 | 2368 | ||
2370 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | 2369 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, |
2371 | unsigned long arg); | 2370 | unsigned long arg); |
2372 | extern int __generic_block_fiemap(struct inode *inode, | 2371 | extern int __generic_block_fiemap(struct inode *inode, |
2373 | struct fiemap_extent_info *fieinfo, | 2372 | struct fiemap_extent_info *fieinfo, |
2374 | loff_t start, loff_t len, | 2373 | loff_t start, loff_t len, |
2375 | get_block_t *get_block); | 2374 | get_block_t *get_block); |
2376 | extern int generic_block_fiemap(struct inode *inode, | 2375 | extern int generic_block_fiemap(struct inode *inode, |
2377 | struct fiemap_extent_info *fieinfo, u64 start, | 2376 | struct fiemap_extent_info *fieinfo, u64 start, |
2378 | u64 len, get_block_t *get_block); | 2377 | u64 len, get_block_t *get_block); |
2379 | 2378 | ||
2380 | extern void get_filesystem(struct file_system_type *fs); | 2379 | extern void get_filesystem(struct file_system_type *fs); |
2381 | extern void put_filesystem(struct file_system_type *fs); | 2380 | extern void put_filesystem(struct file_system_type *fs); |
2382 | extern struct file_system_type *get_fs_type(const char *name); | 2381 | extern struct file_system_type *get_fs_type(const char *name); |
2383 | extern struct super_block *get_super(struct block_device *); | 2382 | extern struct super_block *get_super(struct block_device *); |
2384 | extern struct super_block *get_active_super(struct block_device *bdev); | 2383 | extern struct super_block *get_active_super(struct block_device *bdev); |
2385 | extern struct super_block *user_get_super(dev_t); | 2384 | extern struct super_block *user_get_super(dev_t); |
2386 | extern void drop_super(struct super_block *sb); | 2385 | extern void drop_super(struct super_block *sb); |
2387 | extern void iterate_supers(void (*)(struct super_block *, void *), void *); | 2386 | extern void iterate_supers(void (*)(struct super_block *, void *), void *); |
2388 | 2387 | ||
2389 | extern int dcache_dir_open(struct inode *, struct file *); | 2388 | extern int dcache_dir_open(struct inode *, struct file *); |
2390 | extern int dcache_dir_close(struct inode *, struct file *); | 2389 | extern int dcache_dir_close(struct inode *, struct file *); |
2391 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); | 2390 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); |
2392 | extern int dcache_readdir(struct file *, void *, filldir_t); | 2391 | extern int dcache_readdir(struct file *, void *, filldir_t); |
2393 | extern int simple_setattr(struct dentry *, struct iattr *); | 2392 | extern int simple_setattr(struct dentry *, struct iattr *); |
2394 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2393 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2395 | extern int simple_statfs(struct dentry *, struct kstatfs *); | 2394 | extern int simple_statfs(struct dentry *, struct kstatfs *); |
2396 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); | 2395 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); |
2397 | extern int simple_unlink(struct inode *, struct dentry *); | 2396 | extern int simple_unlink(struct inode *, struct dentry *); |
2398 | extern int simple_rmdir(struct inode *, struct dentry *); | 2397 | extern int simple_rmdir(struct inode *, struct dentry *); |
2399 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 2398 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
2400 | extern int noop_fsync(struct file *, int); | 2399 | extern int noop_fsync(struct file *, int); |
2401 | extern int simple_empty(struct dentry *); | 2400 | extern int simple_empty(struct dentry *); |
2402 | extern int simple_readpage(struct file *file, struct page *page); | 2401 | extern int simple_readpage(struct file *file, struct page *page); |
2403 | extern int simple_write_begin(struct file *file, struct address_space *mapping, | 2402 | extern int simple_write_begin(struct file *file, struct address_space *mapping, |
2404 | loff_t pos, unsigned len, unsigned flags, | 2403 | loff_t pos, unsigned len, unsigned flags, |
2405 | struct page **pagep, void **fsdata); | 2404 | struct page **pagep, void **fsdata); |
2406 | extern int simple_write_end(struct file *file, struct address_space *mapping, | 2405 | extern int simple_write_end(struct file *file, struct address_space *mapping, |
2407 | loff_t pos, unsigned len, unsigned copied, | 2406 | loff_t pos, unsigned len, unsigned copied, |
2408 | struct page *page, void *fsdata); | 2407 | struct page *page, void *fsdata); |
2409 | 2408 | ||
2410 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); | 2409 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); |
2411 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); | 2410 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); |
2412 | extern const struct file_operations simple_dir_operations; | 2411 | extern const struct file_operations simple_dir_operations; |
2413 | extern const struct inode_operations simple_dir_inode_operations; | 2412 | extern const struct inode_operations simple_dir_inode_operations; |
2414 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; | 2413 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; |
2415 | struct dentry *d_alloc_name(struct dentry *, const char *); | 2414 | struct dentry *d_alloc_name(struct dentry *, const char *); |
2416 | extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); | 2415 | extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); |
2417 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); | 2416 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); |
2418 | extern void simple_release_fs(struct vfsmount **mount, int *count); | 2417 | extern void simple_release_fs(struct vfsmount **mount, int *count); |
2419 | 2418 | ||
2420 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, | 2419 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, |
2421 | loff_t *ppos, const void *from, size_t available); | 2420 | loff_t *ppos, const void *from, size_t available); |
2422 | extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, | 2421 | extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, |
2423 | const void __user *from, size_t count); | 2422 | const void __user *from, size_t count); |
2424 | 2423 | ||
2425 | extern int generic_file_fsync(struct file *, int); | 2424 | extern int generic_file_fsync(struct file *, int); |
2426 | 2425 | ||
2427 | extern int generic_check_addressable(unsigned, u64); | 2426 | extern int generic_check_addressable(unsigned, u64); |
2428 | 2427 | ||
2429 | #ifdef CONFIG_MIGRATION | 2428 | #ifdef CONFIG_MIGRATION |
2430 | extern int buffer_migrate_page(struct address_space *, | 2429 | extern int buffer_migrate_page(struct address_space *, |
2431 | struct page *, struct page *); | 2430 | struct page *, struct page *); |
2432 | #else | 2431 | #else |
2433 | #define buffer_migrate_page NULL | 2432 | #define buffer_migrate_page NULL |
2434 | #endif | 2433 | #endif |
2435 | 2434 | ||
2436 | extern int inode_change_ok(const struct inode *, struct iattr *); | 2435 | extern int inode_change_ok(const struct inode *, struct iattr *); |
2437 | extern int inode_newsize_ok(const struct inode *, loff_t offset); | 2436 | extern int inode_newsize_ok(const struct inode *, loff_t offset); |
2438 | extern void setattr_copy(struct inode *inode, const struct iattr *attr); | 2437 | extern void setattr_copy(struct inode *inode, const struct iattr *attr); |
2439 | 2438 | ||
2440 | extern void file_update_time(struct file *file); | 2439 | extern void file_update_time(struct file *file); |
2441 | 2440 | ||
2442 | extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); | 2441 | extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); |
2443 | extern void save_mount_options(struct super_block *sb, char *options); | 2442 | extern void save_mount_options(struct super_block *sb, char *options); |
2444 | extern void replace_mount_options(struct super_block *sb, char *options); | 2443 | extern void replace_mount_options(struct super_block *sb, char *options); |
2445 | 2444 | ||
2446 | static inline ino_t parent_ino(struct dentry *dentry) | 2445 | static inline ino_t parent_ino(struct dentry *dentry) |
2447 | { | 2446 | { |
2448 | ino_t res; | 2447 | ino_t res; |
2449 | 2448 | ||
2450 | spin_lock(&dentry->d_lock); | 2449 | spin_lock(&dentry->d_lock); |
2451 | res = dentry->d_parent->d_inode->i_ino; | 2450 | res = dentry->d_parent->d_inode->i_ino; |
2452 | spin_unlock(&dentry->d_lock); | 2451 | spin_unlock(&dentry->d_lock); |
2453 | return res; | 2452 | return res; |
2454 | } | 2453 | } |
2455 | 2454 | ||
2456 | /* Transaction based IO helpers */ | 2455 | /* Transaction based IO helpers */ |
2457 | 2456 | ||
2458 | /* | 2457 | /* |
2459 | * An argresp is stored in an allocated page and holds the | 2458 | * An argresp is stored in an allocated page and holds the |
2460 | * size of the argument or response, along with its content | 2459 | * size of the argument or response, along with its content |
2461 | */ | 2460 | */ |
2462 | struct simple_transaction_argresp { | 2461 | struct simple_transaction_argresp { |
2463 | ssize_t size; | 2462 | ssize_t size; |
2464 | char data[0]; | 2463 | char data[0]; |
2465 | }; | 2464 | }; |
2466 | 2465 | ||
2467 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) | 2466 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) |
2468 | 2467 | ||
2469 | char *simple_transaction_get(struct file *file, const char __user *buf, | 2468 | char *simple_transaction_get(struct file *file, const char __user *buf, |
2470 | size_t size); | 2469 | size_t size); |
2471 | ssize_t simple_transaction_read(struct file *file, char __user *buf, | 2470 | ssize_t simple_transaction_read(struct file *file, char __user *buf, |
2472 | size_t size, loff_t *pos); | 2471 | size_t size, loff_t *pos); |
2473 | int simple_transaction_release(struct inode *inode, struct file *file); | 2472 | int simple_transaction_release(struct inode *inode, struct file *file); |
2474 | 2473 | ||
2475 | void simple_transaction_set(struct file *file, size_t n); | 2474 | void simple_transaction_set(struct file *file, size_t n); |
2476 | 2475 | ||
2477 | /* | 2476 | /* |
2478 | * simple attribute files | 2477 | * simple attribute files |
2479 | * | 2478 | * |
2480 | * These attributes behave similar to those in sysfs: | 2479 | * These attributes behave similar to those in sysfs: |
2481 | * | 2480 | * |
2482 | * Writing to an attribute immediately sets a value, an open file can be | 2481 | * Writing to an attribute immediately sets a value, an open file can be |
2483 | * written to multiple times. | 2482 | * written to multiple times. |
2484 | * | 2483 | * |
2485 | * Reading from an attribute creates a buffer from the value that might get | 2484 | * Reading from an attribute creates a buffer from the value that might get |
2486 | * read with multiple read calls. When the attribute has been read | 2485 | * read with multiple read calls. When the attribute has been read |
2487 | * completely, no further read calls are possible until the file is opened | 2486 | * completely, no further read calls are possible until the file is opened |
2488 | * again. | 2487 | * again. |
2489 | * | 2488 | * |
2490 | * All attributes contain a text representation of a numeric value | 2489 | * All attributes contain a text representation of a numeric value |
2491 | * that are accessed with the get() and set() functions. | 2490 | * that are accessed with the get() and set() functions. |
2492 | */ | 2491 | */ |
2493 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ | 2492 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ |
2494 | static int __fops ## _open(struct inode *inode, struct file *file) \ | 2493 | static int __fops ## _open(struct inode *inode, struct file *file) \ |
2495 | { \ | 2494 | { \ |
2496 | __simple_attr_check_format(__fmt, 0ull); \ | 2495 | __simple_attr_check_format(__fmt, 0ull); \ |
2497 | return simple_attr_open(inode, file, __get, __set, __fmt); \ | 2496 | return simple_attr_open(inode, file, __get, __set, __fmt); \ |
2498 | } \ | 2497 | } \ |
2499 | static const struct file_operations __fops = { \ | 2498 | static const struct file_operations __fops = { \ |
2500 | .owner = THIS_MODULE, \ | 2499 | .owner = THIS_MODULE, \ |
2501 | .open = __fops ## _open, \ | 2500 | .open = __fops ## _open, \ |
2502 | .release = simple_attr_release, \ | 2501 | .release = simple_attr_release, \ |
2503 | .read = simple_attr_read, \ | 2502 | .read = simple_attr_read, \ |
2504 | .write = simple_attr_write, \ | 2503 | .write = simple_attr_write, \ |
2505 | .llseek = generic_file_llseek, \ | 2504 | .llseek = generic_file_llseek, \ |
2506 | }; | 2505 | }; |
2507 | 2506 | ||
2508 | static inline void __attribute__((format(printf, 1, 2))) | 2507 | static inline void __attribute__((format(printf, 1, 2))) |
2509 | __simple_attr_check_format(const char *fmt, ...) | 2508 | __simple_attr_check_format(const char *fmt, ...) |
2510 | { | 2509 | { |
2511 | /* don't do anything, just let the compiler check the arguments; */ | 2510 | /* don't do anything, just let the compiler check the arguments; */ |
2512 | } | 2511 | } |
2513 | 2512 | ||
2514 | int simple_attr_open(struct inode *inode, struct file *file, | 2513 | int simple_attr_open(struct inode *inode, struct file *file, |
2515 | int (*get)(void *, u64 *), int (*set)(void *, u64), | 2514 | int (*get)(void *, u64 *), int (*set)(void *, u64), |
2516 | const char *fmt); | 2515 | const char *fmt); |
2517 | int simple_attr_release(struct inode *inode, struct file *file); | 2516 | int simple_attr_release(struct inode *inode, struct file *file); |
2518 | ssize_t simple_attr_read(struct file *file, char __user *buf, | 2517 | ssize_t simple_attr_read(struct file *file, char __user *buf, |
2519 | size_t len, loff_t *ppos); | 2518 | size_t len, loff_t *ppos); |
2520 | ssize_t simple_attr_write(struct file *file, const char __user *buf, | 2519 | ssize_t simple_attr_write(struct file *file, const char __user *buf, |
2521 | size_t len, loff_t *ppos); | 2520 | size_t len, loff_t *ppos); |
2522 | 2521 | ||
2523 | struct ctl_table; | 2522 | struct ctl_table; |
2524 | int proc_nr_files(struct ctl_table *table, int write, | 2523 | int proc_nr_files(struct ctl_table *table, int write, |
2525 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2524 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2526 | int proc_nr_dentry(struct ctl_table *table, int write, | 2525 | int proc_nr_dentry(struct ctl_table *table, int write, |
2527 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2526 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2528 | int proc_nr_inodes(struct ctl_table *table, int write, | 2527 | int proc_nr_inodes(struct ctl_table *table, int write, |
2529 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2528 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2530 | int __init get_filesystem_list(char *buf); | 2529 | int __init get_filesystem_list(char *buf); |
2531 | 2530 | ||
2532 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) | 2531 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) |
2533 | #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ | 2532 | #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ |
2534 | (flag & FMODE_NONOTIFY))) | 2533 | (flag & FMODE_NONOTIFY))) |
2535 | 2534 | ||
2536 | #endif /* __KERNEL__ */ | 2535 | #endif /* __KERNEL__ */ |
2537 | #endif /* _LINUX_FS_H */ | 2536 | #endif /* _LINUX_FS_H */ |
2538 | 2537 |