Blame view
fs/file_table.c
8.53 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 8 9 10 |
/* * linux/fs/file_table.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ #include <linux/string.h> #include <linux/slab.h> #include <linux/file.h> |
9f3acc314 [PATCH] split lin... |
11 |
#include <linux/fdtable.h> |
1da177e4c Linux-2.6.12-rc2 |
12 13 |
#include <linux/init.h> #include <linux/module.h> |
1da177e4c Linux-2.6.12-rc2 |
14 15 16 |
#include <linux/fs.h> #include <linux/security.h> #include <linux/eventpoll.h> |
ab2af1f50 [PATCH] files: fi... |
17 |
#include <linux/rcupdate.h> |
1da177e4c Linux-2.6.12-rc2 |
18 |
#include <linux/mount.h> |
16f7e0fe2 [PATCH] capable/c... |
19 |
#include <linux/capability.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/cdev.h> |
0eeca2830 [PATCH] inotify |
21 |
#include <linux/fsnotify.h> |
529bf6be5 [PATCH] fix file ... |
22 23 |
#include <linux/sysctl.h> #include <linux/percpu_counter.h> |
6416ccb78 fs: scale files_lock |
24 |
#include <linux/percpu.h> |
4a9d4b024 switch fput to ta... |
25 26 |
#include <linux/hardirq.h> #include <linux/task_work.h> |
0552f879d Untangling ima me... |
27 |
#include <linux/ima.h> |
4248b0da4 fs, file table: r... |
28 |
#include <linux/swap.h> |
529bf6be5 [PATCH] fix file ... |
29 |
|
60063497a atomic: use <linu... |
30 |
#include <linux/atomic.h> |
1da177e4c Linux-2.6.12-rc2 |
31 |
|
e81e3f4dc fs: move get_empt... |
32 |
#include "internal.h" |
1da177e4c Linux-2.6.12-rc2 |
33 34 35 36 |
/* sysctl tunables... */ struct files_stat_struct files_stat = { .max_files = NR_FILE }; |
b6b3fdead filp_cachep can b... |
37 38 |
/* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; |
529bf6be5 [PATCH] fix file ... |
39 |
static struct percpu_counter nr_files __cacheline_aligned_in_smp; |
1da177e4c Linux-2.6.12-rc2 |
40 |
|
5c33b183a uninline file_fre... |
41 |
static void file_free_rcu(struct rcu_head *head) |
1da177e4c Linux-2.6.12-rc2 |
42 |
{ |
d76b0d9b2 CRED: Use creds i... |
43 44 45 |
struct file *f = container_of(head, struct file, f_u.fu_rcuhead); put_cred(f->f_cred); |
529bf6be5 [PATCH] fix file ... |
46 |
kmem_cache_free(filp_cachep, f); |
1da177e4c Linux-2.6.12-rc2 |
47 |
} |
529bf6be5 [PATCH] fix file ... |
48 |
static inline void file_free(struct file *f) |
1da177e4c Linux-2.6.12-rc2 |
49 |
{ |
529bf6be5 [PATCH] fix file ... |
50 51 |
percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); |
1da177e4c Linux-2.6.12-rc2 |
52 |
} |
529bf6be5 [PATCH] fix file ... |
53 54 55 |
/* * Return the total number of open files in the system */ |
518de9b39 fs: allow for mor... |
56 |
static long get_nr_files(void) |
1da177e4c Linux-2.6.12-rc2 |
57 |
{ |
529bf6be5 [PATCH] fix file ... |
58 |
return percpu_counter_read_positive(&nr_files); |
1da177e4c Linux-2.6.12-rc2 |
59 |
} |
529bf6be5 [PATCH] fix file ... |
60 61 62 |
/* * Return the maximum number of open files in the system */ |
518de9b39 fs: allow for mor... |
63 |
unsigned long get_max_files(void) |
ab2af1f50 [PATCH] files: fi... |
64 |
{ |
529bf6be5 [PATCH] fix file ... |
65 |
return files_stat.max_files; |
ab2af1f50 [PATCH] files: fi... |
66 |
} |
529bf6be5 [PATCH] fix file ... |
67 68 69 70 71 72 |
EXPORT_SYMBOL_GPL(get_max_files); /* * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
1f7e0616c fs: convert use o... |
73 |
int proc_nr_files(struct ctl_table *table, int write, |
529bf6be5 [PATCH] fix file ... |
74 75 76 |
void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); |
518de9b39 fs: allow for mor... |
77 |
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
529bf6be5 [PATCH] fix file ... |
78 79 |
} #else |
1f7e0616c fs: convert use o... |
80 |
int proc_nr_files(struct ctl_table *table, int write, |
529bf6be5 [PATCH] fix file ... |
81 82 83 84 85 |
void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } #endif |
ab2af1f50 [PATCH] files: fi... |
86 |
|
1da177e4c Linux-2.6.12-rc2 |
87 |
/* Find an unused file structure and return a pointer to it. |
1afc99bea propagate error f... |
88 89 |
* Returns an error pointer if some error happend e.g. we over file * structures limit, run out of memory or operation is not permitted. |
430e285e0 [PATCH] fix up ne... |
90 91 92 93 94 95 |
* * Be very careful using this. You are responsible for * getting write access to any mount that you might assign * to this filp, if it is opened for write. If this is not * done, you will imbalance int the mount's writer count * and a warning at __fput() time. |
1da177e4c Linux-2.6.12-rc2 |
96 97 98 |
*/ struct file *get_empty_filp(void) { |
86a264abe CRED: Wrap curren... |
99 |
const struct cred *cred = current_cred(); |
518de9b39 fs: allow for mor... |
100 |
static long old_max; |
1afc99bea propagate error f... |
101 102 |
struct file *f; int error; |
1da177e4c Linux-2.6.12-rc2 |
103 104 105 106 |
/* * Privileged users can go above max_files */ |
529bf6be5 [PATCH] fix file ... |
107 108 109 110 111 |
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { /* * percpu_counters are inaccurate. Do an expensive check before * we go and fail. */ |
52d9f3b40 lib: percpu_count... |
112 |
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files) |
529bf6be5 [PATCH] fix file ... |
113 114 |
goto over; } |
af4d2ecbf [PATCH] Fix of bo... |
115 |
|
4975e45ff fs: use kmem_cach... |
116 |
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); |
1afc99bea propagate error f... |
117 118 |
if (unlikely(!f)) return ERR_PTR(-ENOMEM); |
af4d2ecbf [PATCH] Fix of bo... |
119 |
|
529bf6be5 [PATCH] fix file ... |
120 |
percpu_counter_inc(&nr_files); |
78d297887 CRED: Fix kernel ... |
121 |
f->f_cred = get_cred(cred); |
1afc99bea propagate error f... |
122 123 124 125 126 |
error = security_file_alloc(f); if (unlikely(error)) { file_free(f); return ERR_PTR(error); } |
1da177e4c Linux-2.6.12-rc2 |
127 |
|
516e0cc56 [PATCH] f_count m... |
128 |
atomic_long_set(&f->f_count, 1); |
af4d2ecbf [PATCH] Fix of bo... |
129 |
rwlock_init(&f->f_owner.lock); |
684999149 Rename struct fil... |
130 |
spin_lock_init(&f->f_lock); |
9c225f265 vfs: atomic f_pos... |
131 |
mutex_init(&f->f_pos_lock); |
5a6b7951b [PATCH] get_empty... |
132 |
eventpoll_init_file(f); |
af4d2ecbf [PATCH] Fix of bo... |
133 |
/* f->f_version: 0 */ |
af4d2ecbf [PATCH] Fix of bo... |
134 135 136 |
return f; over: |
1da177e4c Linux-2.6.12-rc2 |
137 |
/* Ran out of filps - report that */ |
529bf6be5 [PATCH] fix file ... |
138 |
if (get_nr_files() > old_max) { |
518de9b39 fs: allow for mor... |
139 140 |
pr_info("VFS: file-max limit %lu reached ", get_max_files()); |
529bf6be5 [PATCH] fix file ... |
141 |
old_max = get_nr_files(); |
1da177e4c Linux-2.6.12-rc2 |
142 |
} |
1afc99bea propagate error f... |
143 |
return ERR_PTR(-ENFILE); |
1da177e4c Linux-2.6.12-rc2 |
144 |
} |
ce8d2cdf3 r/o bind mounts: ... |
145 146 |
/** * alloc_file - allocate and initialize a 'struct file' |
a457606a6 fs/file_table.c: ... |
147 148 |
* * @path: the (dentry, vfsmount) pair for the new file |
ce8d2cdf3 r/o bind mounts: ... |
149 150 |
* @mode: the mode with which the new file will be opened * @fop: the 'struct file_operations' for the new file |
ce8d2cdf3 r/o bind mounts: ... |
151 |
*/ |
2c48b9c45 switch alloc_file... |
152 153 |
struct file *alloc_file(struct path *path, fmode_t mode, const struct file_operations *fop) |
ce8d2cdf3 r/o bind mounts: ... |
154 155 |
{ struct file *file; |
ce8d2cdf3 r/o bind mounts: ... |
156 157 |
file = get_empty_filp(); |
1afc99bea propagate error f... |
158 |
if (IS_ERR(file)) |
39b652527 fs: Preserve erro... |
159 |
return file; |
ce8d2cdf3 r/o bind mounts: ... |
160 |
|
2c48b9c45 switch alloc_file... |
161 |
file->f_path = *path; |
dd37978c5 cache the value o... |
162 |
file->f_inode = path->dentry->d_inode; |
2c48b9c45 switch alloc_file... |
163 |
file->f_mapping = path->dentry->d_inode->i_mapping; |
293bc9822 new methods: ->re... |
164 |
if ((mode & FMODE_READ) && |
843631820 ->aio_read and ->... |
165 |
likely(fop->read || fop->read_iter)) |
7f7f25e82 replace checking ... |
166 |
mode |= FMODE_CAN_READ; |
293bc9822 new methods: ->re... |
167 |
if ((mode & FMODE_WRITE) && |
843631820 ->aio_read and ->... |
168 |
likely(fop->write || fop->write_iter)) |
7f7f25e82 replace checking ... |
169 |
mode |= FMODE_CAN_WRITE; |
ce8d2cdf3 r/o bind mounts: ... |
170 171 |
file->f_mode = mode; file->f_op = fop; |
890275b5e IMA: maintain i_r... |
172 173 |
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); |
3d1e46315 get rid of init_f... |
174 |
return file; |
ce8d2cdf3 r/o bind mounts: ... |
175 |
} |
73efc4681 re-export alloc_f... |
176 |
EXPORT_SYMBOL(alloc_file); |
ce8d2cdf3 r/o bind mounts: ... |
177 |
|
d7065da03 get rid of the ma... |
178 |
/* the real guts of fput() - releasing the last reference to file |
1da177e4c Linux-2.6.12-rc2 |
179 |
*/ |
d7065da03 get rid of the ma... |
180 |
static void __fput(struct file *file) |
1da177e4c Linux-2.6.12-rc2 |
181 |
{ |
0f7fc9e4d [PATCH] VFS: chan... |
182 183 |
struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; |
c77cecee5 Replace a bunch o... |
184 |
struct inode *inode = file->f_inode; |
1da177e4c Linux-2.6.12-rc2 |
185 186 |
might_sleep(); |
0eeca2830 [PATCH] inotify |
187 188 |
fsnotify_close(file); |
1da177e4c Linux-2.6.12-rc2 |
189 190 191 192 193 |
/* * The function eventpoll_release() should be the first called * in the file cleanup chain. */ eventpoll_release(file); |
78ed8a133 locks: rename loc... |
194 |
locks_remove_file(file); |
1da177e4c Linux-2.6.12-rc2 |
195 |
|
233e70f42 saner FASYNC hand... |
196 |
if (unlikely(file->f_flags & FASYNC)) { |
72c2d5319 file->f_op is nev... |
197 |
if (file->f_op->fasync) |
233e70f42 saner FASYNC hand... |
198 199 |
file->f_op->fasync(-1, file, 0); } |
4199d35cb vfs: move ima_fil... |
200 |
ima_file_free(file); |
72c2d5319 file->f_op is nev... |
201 |
if (file->f_op->release) |
1da177e4c Linux-2.6.12-rc2 |
202 203 |
file->f_op->release(inode, file); security_file_free(file); |
60ed8cf78 fix cdev leak on ... |
204 205 |
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { |
1da177e4c Linux-2.6.12-rc2 |
206 |
cdev_put(inode->i_cdev); |
60ed8cf78 fix cdev leak on ... |
207 |
} |
1da177e4c Linux-2.6.12-rc2 |
208 |
fops_put(file->f_op); |
609d7fa95 [PATCH] file: mod... |
209 |
put_pid(file->f_owner.pid); |
890275b5e IMA: maintain i_r... |
210 211 |
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); |
83f936c75 mark struct file ... |
212 213 214 215 |
if (file->f_mode & FMODE_WRITER) { put_write_access(inode); __mnt_drop_write(mnt); } |
0f7fc9e4d [PATCH] VFS: chan... |
216 217 |
file->f_path.dentry = NULL; file->f_path.mnt = NULL; |
dd37978c5 cache the value o... |
218 |
file->f_inode = NULL; |
1da177e4c Linux-2.6.12-rc2 |
219 220 221 222 |
file_free(file); dput(dentry); mntput(mnt); } |
4f5e65a1c fput: turn "list_... |
223 |
static LLIST_HEAD(delayed_fput_list); |
4a9d4b024 switch fput to ta... |
224 225 |
static void delayed_fput(struct work_struct *unused) { |
4f5e65a1c fput: turn "list_... |
226 227 228 229 230 231 |
struct llist_node *node = llist_del_all(&delayed_fput_list); struct llist_node *next; for (; node; node = next) { next = llist_next(node); __fput(llist_entry(node, struct file, f_u.fu_llist)); |
4a9d4b024 switch fput to ta... |
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
} } static void ____fput(struct callback_head *work) { __fput(container_of(work, struct file, f_u.fu_rcuhead)); } /* * If kernel thread really needs to have the final fput() it has done * to complete, call this. The only user right now is the boot - we * *do* need to make sure our writes to binaries on initramfs has * not left us with opened struct file waiting for __fput() - execve() * won't work without that. Please, don't add more callers without * very good reasons; in particular, never call that with locks * held and never call that from a thread that might need to do * some work on any kind of umount. */ void flush_delayed_fput(void) { delayed_fput(NULL); } |
c7314d74f nfsd regression s... |
254 |
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); |
4a9d4b024 switch fput to ta... |
255 |
|
d7065da03 get rid of the ma... |
256 257 |
void fput(struct file *file) { |
4a9d4b024 switch fput to ta... |
258 259 |
if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; |
e7b2c4069 fput: task_work_a... |
260 |
|
e7b2c4069 fput: task_work_a... |
261 262 263 264 |
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) return; |
64372501e fs/file_table.c:f... |
265 266 |
/* * After this task has run exit_task_work(), |
be49b30a9 fs/file_table.c:f... |
267 |
* task_work_add() will fail. Fall through to delayed |
64372501e fs/file_table.c:f... |
268 269 |
* fput to avoid leaking *file. */ |
4a9d4b024 switch fput to ta... |
270 |
} |
4f5e65a1c fput: turn "list_... |
271 272 |
if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) |
c7314d74f nfsd regression s... |
273 |
schedule_delayed_work(&delayed_fput_work, 1); |
4a9d4b024 switch fput to ta... |
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
} } /* * synchronous analog of fput(); for kernel threads that might be needed * in some umount() (and thus can't use flush_delayed_fput() without * risking deadlocks), need to wait for completion of __fput() and know * for this specific struct file it won't involve anything that would * need them. Use only if you really need it - at the very least, * don't blindly convert fput() by kernel thread to that. */ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; |
4a9d4b024 switch fput to ta... |
289 |
BUG_ON(!(task->flags & PF_KTHREAD)); |
d7065da03 get rid of the ma... |
290 |
__fput(file); |
4a9d4b024 switch fput to ta... |
291 |
} |
d7065da03 get rid of the ma... |
292 293 294 |
} EXPORT_SYMBOL(fput); |
1da177e4c Linux-2.6.12-rc2 |
295 296 |
void put_filp(struct file *file) { |
516e0cc56 [PATCH] f_count m... |
297 |
if (atomic_long_dec_and_test(&file->f_count)) { |
1da177e4c Linux-2.6.12-rc2 |
298 |
security_file_free(file); |
1da177e4c Linux-2.6.12-rc2 |
299 300 301 |
file_free(file); } } |
4248b0da4 fs, file table: r... |
302 |
void __init files_init(void) |
1da177e4c Linux-2.6.12-rc2 |
303 |
{ |
b6b3fdead filp_cachep can b... |
304 305 |
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
4248b0da4 fs, file table: r... |
306 307 |
percpu_counter_init(&nr_files, 0, GFP_KERNEL); } |
b6b3fdead filp_cachep can b... |
308 |
|
4248b0da4 fs, file table: r... |
309 310 311 312 313 314 315 316 317 318 319 |
/* * One file with associated inode and dcache is very roughly 1K. Per default * do not use more than 10% of our memory for files. */ void __init files_maxfiles_init(void) { unsigned long n; unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; memreserve = min(memreserve, totalram_pages - 1); n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; |
1da177e4c Linux-2.6.12-rc2 |
320 |
|
518de9b39 fs: allow for mor... |
321 |
files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
1da177e4c Linux-2.6.12-rc2 |
322 |
} |