Blame view
fs/file_table.c
9.44 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 8 9 10 |
/* * linux/fs/file_table.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ #include <linux/string.h> #include <linux/slab.h> #include <linux/file.h> |
9f3acc314 [PATCH] split lin... |
11 |
#include <linux/fdtable.h> |
1da177e4c Linux-2.6.12-rc2 |
12 13 |
#include <linux/init.h> #include <linux/module.h> |
1da177e4c Linux-2.6.12-rc2 |
14 15 16 |
#include <linux/fs.h> #include <linux/security.h> #include <linux/eventpoll.h> |
ab2af1f50 [PATCH] files: fi... |
17 |
#include <linux/rcupdate.h> |
1da177e4c Linux-2.6.12-rc2 |
18 |
#include <linux/mount.h> |
16f7e0fe2 [PATCH] capable/c... |
19 |
#include <linux/capability.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/cdev.h> |
0eeca2830 [PATCH] inotify |
21 |
#include <linux/fsnotify.h> |
529bf6be5 [PATCH] fix file ... |
22 |
#include <linux/sysctl.h> |
6416ccb78 fs: scale files_lock |
23 |
#include <linux/lglock.h> |
529bf6be5 [PATCH] fix file ... |
24 |
#include <linux/percpu_counter.h> |
6416ccb78 fs: scale files_lock |
25 |
#include <linux/percpu.h> |
4a9d4b024 switch fput to ta... |
26 27 |
#include <linux/hardirq.h> #include <linux/task_work.h> |
0552f879d Untangling ima me... |
28 |
#include <linux/ima.h> |
529bf6be5 [PATCH] fix file ... |
29 |
|
60063497a atomic: use <linu... |
30 |
#include <linux/atomic.h> |
1da177e4c Linux-2.6.12-rc2 |
31 |
|
e81e3f4dc fs: move get_empt... |
32 |
#include "internal.h" |
1da177e4c Linux-2.6.12-rc2 |
33 34 35 36 |
/* sysctl tunables... */ struct files_stat_struct files_stat = { .max_files = NR_FILE }; |
b6b3fdead filp_cachep can b... |
37 38 |
/* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; |
529bf6be5 [PATCH] fix file ... |
39 |
static struct percpu_counter nr_files __cacheline_aligned_in_smp; |
1da177e4c Linux-2.6.12-rc2 |
40 |
|
5c33b183a uninline file_fre... |
41 |
static void file_free_rcu(struct rcu_head *head) |
1da177e4c Linux-2.6.12-rc2 |
42 |
{ |
d76b0d9b2 CRED: Use creds i... |
43 44 45 |
struct file *f = container_of(head, struct file, f_u.fu_rcuhead); put_cred(f->f_cred); |
529bf6be5 [PATCH] fix file ... |
46 |
kmem_cache_free(filp_cachep, f); |
1da177e4c Linux-2.6.12-rc2 |
47 |
} |
529bf6be5 [PATCH] fix file ... |
48 |
static inline void file_free(struct file *f) |
1da177e4c Linux-2.6.12-rc2 |
49 |
{ |
529bf6be5 [PATCH] fix file ... |
50 |
percpu_counter_dec(&nr_files); |
ad775f5a8 [PATCH] r/o bind ... |
51 |
file_check_state(f); |
529bf6be5 [PATCH] fix file ... |
52 |
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); |
1da177e4c Linux-2.6.12-rc2 |
53 |
} |
529bf6be5 [PATCH] fix file ... |
54 55 56 |
/* * Return the total number of open files in the system */ |
518de9b39 fs: allow for mor... |
57 |
static long get_nr_files(void) |
1da177e4c Linux-2.6.12-rc2 |
58 |
{ |
529bf6be5 [PATCH] fix file ... |
59 |
return percpu_counter_read_positive(&nr_files); |
1da177e4c Linux-2.6.12-rc2 |
60 |
} |
529bf6be5 [PATCH] fix file ... |
61 62 63 |
/* * Return the maximum number of open files in the system */ |
518de9b39 fs: allow for mor... |
64 |
unsigned long get_max_files(void) |
ab2af1f50 [PATCH] files: fi... |
65 |
{ |
529bf6be5 [PATCH] fix file ... |
66 |
return files_stat.max_files; |
ab2af1f50 [PATCH] files: fi... |
67 |
} |
529bf6be5 [PATCH] fix file ... |
68 69 70 71 72 73 |
EXPORT_SYMBOL_GPL(get_max_files); /* * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
8d65af789 sysctl: remove "s... |
74 |
int proc_nr_files(ctl_table *table, int write, |
529bf6be5 [PATCH] fix file ... |
75 76 77 |
void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); |
518de9b39 fs: allow for mor... |
78 |
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
529bf6be5 [PATCH] fix file ... |
79 80 |
} #else |
8d65af789 sysctl: remove "s... |
81 |
int proc_nr_files(ctl_table *table, int write, |
529bf6be5 [PATCH] fix file ... |
82 83 84 85 86 |
void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } #endif |
ab2af1f50 [PATCH] files: fi... |
87 |
|
1da177e4c Linux-2.6.12-rc2 |
88 |
/* Find an unused file structure and return a pointer to it. |
1afc99bea propagate error f... |
89 90 |
* Returns an error pointer if some error happend e.g. we over file * structures limit, run out of memory or operation is not permitted. |
430e285e0 [PATCH] fix up ne... |
91 92 93 94 95 96 |
* * Be very careful using this. You are responsible for * getting write access to any mount that you might assign * to this filp, if it is opened for write. If this is not * done, you will imbalance int the mount's writer count * and a warning at __fput() time. |
1da177e4c Linux-2.6.12-rc2 |
97 98 99 |
*/ struct file *get_empty_filp(void) { |
86a264abe CRED: Wrap curren... |
100 |
const struct cred *cred = current_cred(); |
518de9b39 fs: allow for mor... |
101 |
static long old_max; |
1afc99bea propagate error f... |
102 103 |
struct file *f; int error; |
1da177e4c Linux-2.6.12-rc2 |
104 105 106 107 |
/* * Privileged users can go above max_files */ |
529bf6be5 [PATCH] fix file ... |
108 109 110 111 112 |
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { /* * percpu_counters are inaccurate. Do an expensive check before * we go and fail. */ |
52d9f3b40 lib: percpu_count... |
113 |
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files) |
529bf6be5 [PATCH] fix file ... |
114 115 |
goto over; } |
af4d2ecbf [PATCH] Fix of bo... |
116 |
|
4975e45ff fs: use kmem_cach... |
117 |
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); |
1afc99bea propagate error f... |
118 119 |
if (unlikely(!f)) return ERR_PTR(-ENOMEM); |
af4d2ecbf [PATCH] Fix of bo... |
120 |
|
529bf6be5 [PATCH] fix file ... |
121 |
percpu_counter_inc(&nr_files); |
78d297887 CRED: Fix kernel ... |
122 |
f->f_cred = get_cred(cred); |
1afc99bea propagate error f... |
123 124 125 126 127 |
error = security_file_alloc(f); if (unlikely(error)) { file_free(f); return ERR_PTR(error); } |
1da177e4c Linux-2.6.12-rc2 |
128 |
|
516e0cc56 [PATCH] f_count m... |
129 |
atomic_long_set(&f->f_count, 1); |
af4d2ecbf [PATCH] Fix of bo... |
130 |
rwlock_init(&f->f_owner.lock); |
684999149 Rename struct fil... |
131 |
spin_lock_init(&f->f_lock); |
5a6b7951b [PATCH] get_empty... |
132 |
eventpoll_init_file(f); |
af4d2ecbf [PATCH] Fix of bo... |
133 |
/* f->f_version: 0 */ |
af4d2ecbf [PATCH] Fix of bo... |
134 135 136 |
return f; over: |
1da177e4c Linux-2.6.12-rc2 |
137 |
/* Ran out of filps - report that */ |
529bf6be5 [PATCH] fix file ... |
138 |
if (get_nr_files() > old_max) { |
518de9b39 fs: allow for mor... |
139 140 |
pr_info("VFS: file-max limit %lu reached ", get_max_files()); |
529bf6be5 [PATCH] fix file ... |
141 |
old_max = get_nr_files(); |
1da177e4c Linux-2.6.12-rc2 |
142 |
} |
1afc99bea propagate error f... |
143 |
return ERR_PTR(-ENFILE); |
1da177e4c Linux-2.6.12-rc2 |
144 |
} |
ce8d2cdf3 r/o bind mounts: ... |
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
/** * alloc_file - allocate and initialize a 'struct file' * @mnt: the vfsmount on which the file will reside * @dentry: the dentry representing the new file * @mode: the mode with which the new file will be opened * @fop: the 'struct file_operations' for the new file * * Use this instead of get_empty_filp() to get a new * 'struct file'. Do so because of the same initialization * pitfalls reasons listed for init_file(). This is a * preferred interface to using init_file(). * * If all the callers of init_file() are eliminated, its * code should be moved into this function. */ |
2c48b9c45 switch alloc_file... |
160 161 |
struct file *alloc_file(struct path *path, fmode_t mode, const struct file_operations *fop) |
ce8d2cdf3 r/o bind mounts: ... |
162 163 |
{ struct file *file; |
ce8d2cdf3 r/o bind mounts: ... |
164 165 |
file = get_empty_filp(); |
1afc99bea propagate error f... |
166 |
if (IS_ERR(file)) |
39b652527 fs: Preserve erro... |
167 |
return file; |
ce8d2cdf3 r/o bind mounts: ... |
168 |
|
2c48b9c45 switch alloc_file... |
169 |
file->f_path = *path; |
dd37978c5 cache the value o... |
170 |
file->f_inode = path->dentry->d_inode; |
2c48b9c45 switch alloc_file... |
171 |
file->f_mapping = path->dentry->d_inode->i_mapping; |
ce8d2cdf3 r/o bind mounts: ... |
172 173 |
file->f_mode = mode; file->f_op = fop; |
4a3fd211c [PATCH] r/o bind ... |
174 175 176 177 178 179 180 |
/* * These mounts don't really matter in practice * for r/o bind mounts. They aren't userspace- * visible. We do this for consistency, and so * that we can do debugging checks at __fput() */ |
2c48b9c45 switch alloc_file... |
181 |
if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { |
ad775f5a8 [PATCH] r/o bind ... |
182 |
file_take_write(file); |
385e3ed4f alloc_file(): sim... |
183 |
WARN_ON(mnt_clone_write(path->mnt)); |
4a3fd211c [PATCH] r/o bind ... |
184 |
} |
890275b5e IMA: maintain i_r... |
185 186 |
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); |
3d1e46315 get rid of init_f... |
187 |
return file; |
ce8d2cdf3 r/o bind mounts: ... |
188 |
} |
73efc4681 re-export alloc_f... |
189 |
EXPORT_SYMBOL(alloc_file); |
ce8d2cdf3 r/o bind mounts: ... |
190 |
|
aceaf78da [PATCH] r/o bind ... |
191 192 193 194 195 196 197 198 |
/** * drop_file_write_access - give up ability to write to a file * @file: the file to which we will stop writing * * This is a central place which will give up the ability * to write to @file, along with access to write through * its vfsmount. */ |
b57ce9694 vfs: drop_file_wr... |
199 |
static void drop_file_write_access(struct file *file) |
aceaf78da [PATCH] r/o bind ... |
200 |
{ |
4a3fd211c [PATCH] r/o bind ... |
201 |
struct vfsmount *mnt = file->f_path.mnt; |
aceaf78da [PATCH] r/o bind ... |
202 203 204 205 |
struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; put_write_access(inode); |
ad775f5a8 [PATCH] r/o bind ... |
206 207 208 209 210 |
if (special_file(inode->i_mode)) return; if (file_check_writeable(file) != 0) return; |
eb04c2828 fs: Add freezing ... |
211 |
__mnt_drop_write(mnt); |
ad775f5a8 [PATCH] r/o bind ... |
212 |
file_release_write(file); |
aceaf78da [PATCH] r/o bind ... |
213 |
} |
aceaf78da [PATCH] r/o bind ... |
214 |
|
d7065da03 get rid of the ma... |
215 |
/* the real guts of fput() - releasing the last reference to file |
1da177e4c Linux-2.6.12-rc2 |
216 |
*/ |
d7065da03 get rid of the ma... |
217 |
static void __fput(struct file *file) |
1da177e4c Linux-2.6.12-rc2 |
218 |
{ |
0f7fc9e4d [PATCH] VFS: chan... |
219 220 |
struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; |
c77cecee5 Replace a bunch o... |
221 |
struct inode *inode = file->f_inode; |
1da177e4c Linux-2.6.12-rc2 |
222 223 |
might_sleep(); |
0eeca2830 [PATCH] inotify |
224 225 |
fsnotify_close(file); |
1da177e4c Linux-2.6.12-rc2 |
226 227 228 229 230 231 |
/* * The function eventpoll_release() should be the first called * in the file cleanup chain. */ eventpoll_release(file); locks_remove_flock(file); |
233e70f42 saner FASYNC hand... |
232 |
if (unlikely(file->f_flags & FASYNC)) { |
72c2d5319 file->f_op is nev... |
233 |
if (file->f_op->fasync) |
233e70f42 saner FASYNC hand... |
234 235 |
file->f_op->fasync(-1, file, 0); } |
4199d35cb vfs: move ima_fil... |
236 |
ima_file_free(file); |
72c2d5319 file->f_op is nev... |
237 |
if (file->f_op->release) |
1da177e4c Linux-2.6.12-rc2 |
238 239 |
file->f_op->release(inode, file); security_file_free(file); |
60ed8cf78 fix cdev leak on ... |
240 241 |
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { |
1da177e4c Linux-2.6.12-rc2 |
242 |
cdev_put(inode->i_cdev); |
60ed8cf78 fix cdev leak on ... |
243 |
} |
1da177e4c Linux-2.6.12-rc2 |
244 |
fops_put(file->f_op); |
609d7fa95 [PATCH] file: mod... |
245 |
put_pid(file->f_owner.pid); |
890275b5e IMA: maintain i_r... |
246 247 |
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); |
aceaf78da [PATCH] r/o bind ... |
248 249 |
if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); |
0f7fc9e4d [PATCH] VFS: chan... |
250 251 |
file->f_path.dentry = NULL; file->f_path.mnt = NULL; |
dd37978c5 cache the value o... |
252 |
file->f_inode = NULL; |
1da177e4c Linux-2.6.12-rc2 |
253 254 255 256 |
file_free(file); dput(dentry); mntput(mnt); } |
4f5e65a1c fput: turn "list_... |
257 |
static LLIST_HEAD(delayed_fput_list); |
4a9d4b024 switch fput to ta... |
258 259 |
static void delayed_fput(struct work_struct *unused) { |
4f5e65a1c fput: turn "list_... |
260 261 262 263 264 265 |
struct llist_node *node = llist_del_all(&delayed_fput_list); struct llist_node *next; for (; node; node = next) { next = llist_next(node); __fput(llist_entry(node, struct file, f_u.fu_llist)); |
4a9d4b024 switch fput to ta... |
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
} } static void ____fput(struct callback_head *work) { __fput(container_of(work, struct file, f_u.fu_rcuhead)); } /* * If kernel thread really needs to have the final fput() it has done * to complete, call this. The only user right now is the boot - we * *do* need to make sure our writes to binaries on initramfs has * not left us with opened struct file waiting for __fput() - execve() * won't work without that. Please, don't add more callers without * very good reasons; in particular, never call that with locks * held and never call that from a thread that might need to do * some work on any kind of umount. */ void flush_delayed_fput(void) { delayed_fput(NULL); } |
c7314d74f nfsd regression s... |
288 |
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); |
4a9d4b024 switch fput to ta... |
289 |
|
d7065da03 get rid of the ma... |
290 291 |
void fput(struct file *file) { |
4a9d4b024 switch fput to ta... |
292 293 |
if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; |
e7b2c4069 fput: task_work_a... |
294 |
|
e7b2c4069 fput: task_work_a... |
295 296 297 298 |
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) return; |
64372501e fs/file_table.c:f... |
299 300 |
/* * After this task has run exit_task_work(), |
be49b30a9 fs/file_table.c:f... |
301 |
* task_work_add() will fail. Fall through to delayed |
64372501e fs/file_table.c:f... |
302 303 |
* fput to avoid leaking *file. */ |
4a9d4b024 switch fput to ta... |
304 |
} |
4f5e65a1c fput: turn "list_... |
305 306 |
if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) |
c7314d74f nfsd regression s... |
307 |
schedule_delayed_work(&delayed_fput_work, 1); |
4a9d4b024 switch fput to ta... |
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
} } /* * synchronous analog of fput(); for kernel threads that might be needed * in some umount() (and thus can't use flush_delayed_fput() without * risking deadlocks), need to wait for completion of __fput() and know * for this specific struct file it won't involve anything that would * need them. Use only if you really need it - at the very least, * don't blindly convert fput() by kernel thread to that. */ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; |
4a9d4b024 switch fput to ta... |
323 |
BUG_ON(!(task->flags & PF_KTHREAD)); |
d7065da03 get rid of the ma... |
324 |
__fput(file); |
4a9d4b024 switch fput to ta... |
325 |
} |
d7065da03 get rid of the ma... |
326 327 328 |
} EXPORT_SYMBOL(fput); |
1da177e4c Linux-2.6.12-rc2 |
329 330 |
void put_filp(struct file *file) { |
516e0cc56 [PATCH] f_count m... |
331 |
if (atomic_long_dec_and_test(&file->f_count)) { |
1da177e4c Linux-2.6.12-rc2 |
332 |
security_file_free(file); |
1da177e4c Linux-2.6.12-rc2 |
333 334 335 |
file_free(file); } } |
1da177e4c Linux-2.6.12-rc2 |
336 337 |
void __init files_init(unsigned long mempages) { |
518de9b39 fs: allow for mor... |
338 |
unsigned long n; |
b6b3fdead filp_cachep can b... |
339 340 341 342 343 344 |
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* * One file with associated inode and dcache is very roughly 1K. |
1da177e4c Linux-2.6.12-rc2 |
345 346 347 348 |
* Per default don't use more than 10% of our memory for files. */ n = (mempages * (PAGE_SIZE / 1024)) / 10; |
518de9b39 fs: allow for mor... |
349 |
files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
ab2af1f50 [PATCH] files: fi... |
350 |
files_defer_init(); |
0216bfcff [PATCH] percpu co... |
351 |
percpu_counter_init(&nr_files, 0); |
1da177e4c Linux-2.6.12-rc2 |
352 |
} |