Blame view
fs/file_table.c
8.56 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 10 |
/* * linux/fs/file_table.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ #include <linux/string.h> #include <linux/slab.h> #include <linux/file.h> |
9f3acc314
|
11 |
#include <linux/fdtable.h> |
1da177e4c
|
12 13 |
#include <linux/init.h> #include <linux/module.h> |
1da177e4c
|
14 15 |
#include <linux/fs.h> #include <linux/security.h> |
5b825c3af
|
16 |
#include <linux/cred.h> |
1da177e4c
|
17 |
#include <linux/eventpoll.h> |
ab2af1f50
|
18 |
#include <linux/rcupdate.h> |
1da177e4c
|
19 |
#include <linux/mount.h> |
16f7e0fe2
|
20 |
#include <linux/capability.h> |
1da177e4c
|
21 |
#include <linux/cdev.h> |
0eeca2830
|
22 |
#include <linux/fsnotify.h> |
529bf6be5
|
23 24 |
#include <linux/sysctl.h> #include <linux/percpu_counter.h> |
6416ccb78
|
25 |
#include <linux/percpu.h> |
4a9d4b024
|
26 27 |
#include <linux/hardirq.h> #include <linux/task_work.h> |
0552f879d
|
28 |
#include <linux/ima.h> |
4248b0da4
|
29 |
#include <linux/swap.h> |
529bf6be5
|
30 |
|
60063497a
|
31 |
#include <linux/atomic.h> |
1da177e4c
|
32 |
|
e81e3f4dc
|
33 |
#include "internal.h" |
1da177e4c
|
34 35 36 37 |
/* sysctl tunables... */ struct files_stat_struct files_stat = { .max_files = NR_FILE }; |
b6b3fdead
|
38 39 |
/* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; |
529bf6be5
|
40 |
static struct percpu_counter nr_files __cacheline_aligned_in_smp; |
1da177e4c
|
41 |
|
5c33b183a
|
42 |
static void file_free_rcu(struct rcu_head *head) |
1da177e4c
|
43 |
{ |
d76b0d9b2
|
44 45 46 |
struct file *f = container_of(head, struct file, f_u.fu_rcuhead); put_cred(f->f_cred); |
529bf6be5
|
47 |
kmem_cache_free(filp_cachep, f); |
1da177e4c
|
48 |
} |
529bf6be5
|
49 |
static inline void file_free(struct file *f) |
1da177e4c
|
50 |
{ |
529bf6be5
|
51 52 |
percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); |
1da177e4c
|
53 |
} |
529bf6be5
|
54 55 56 |
/* * Return the total number of open files in the system */ |
518de9b39
|
57 |
static long get_nr_files(void) |
1da177e4c
|
58 |
{ |
529bf6be5
|
59 |
return percpu_counter_read_positive(&nr_files); |
1da177e4c
|
60 |
} |
529bf6be5
|
61 62 63 |
/* * Return the maximum number of open files in the system */ |
518de9b39
|
64 |
unsigned long get_max_files(void) |
ab2af1f50
|
65 |
{ |
529bf6be5
|
66 |
return files_stat.max_files; |
ab2af1f50
|
67 |
} |
529bf6be5
|
68 69 70 71 72 73 |
EXPORT_SYMBOL_GPL(get_max_files); /* * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
1f7e0616c
|
74 |
int proc_nr_files(struct ctl_table *table, int write, |
529bf6be5
|
75 76 77 |
void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); |
518de9b39
|
78 |
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
529bf6be5
|
79 80 |
} #else |
1f7e0616c
|
81 |
int proc_nr_files(struct ctl_table *table, int write, |
529bf6be5
|
82 83 84 85 86 |
void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } #endif |
ab2af1f50
|
87 |
|
1da177e4c
|
88 |
/* Find an unused file structure and return a pointer to it. |
1afc99bea
|
89 90 |
* Returns an error pointer if some error happend e.g. we over file * structures limit, run out of memory or operation is not permitted. |
430e285e0
|
91 92 93 94 95 96 |
* * Be very careful using this. You are responsible for * getting write access to any mount that you might assign * to this filp, if it is opened for write. If this is not * done, you will imbalance int the mount's writer count * and a warning at __fput() time. |
1da177e4c
|
97 98 99 |
*/ struct file *get_empty_filp(void) { |
86a264abe
|
100 |
const struct cred *cred = current_cred(); |
518de9b39
|
101 |
static long old_max; |
1afc99bea
|
102 103 |
struct file *f; int error; |
1da177e4c
|
104 105 106 107 |
/* * Privileged users can go above max_files */ |
529bf6be5
|
108 109 110 111 112 |
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { /* * percpu_counters are inaccurate. Do an expensive check before * we go and fail. */ |
52d9f3b40
|
113 |
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files) |
529bf6be5
|
114 115 |
goto over; } |
af4d2ecbf
|
116 |
|
4975e45ff
|
117 |
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); |
1afc99bea
|
118 119 |
if (unlikely(!f)) return ERR_PTR(-ENOMEM); |
af4d2ecbf
|
120 |
|
529bf6be5
|
121 |
percpu_counter_inc(&nr_files); |
78d297887
|
122 |
f->f_cred = get_cred(cred); |
1afc99bea
|
123 124 125 126 127 |
error = security_file_alloc(f); if (unlikely(error)) { file_free(f); return ERR_PTR(error); } |
1da177e4c
|
128 |
|
516e0cc56
|
129 |
atomic_long_set(&f->f_count, 1); |
af4d2ecbf
|
130 |
rwlock_init(&f->f_owner.lock); |
684999149
|
131 |
spin_lock_init(&f->f_lock); |
9c225f265
|
132 |
mutex_init(&f->f_pos_lock); |
5a6b7951b
|
133 |
eventpoll_init_file(f); |
af4d2ecbf
|
134 |
/* f->f_version: 0 */ |
af4d2ecbf
|
135 136 137 |
return f; over: |
1da177e4c
|
138 |
/* Ran out of filps - report that */ |
529bf6be5
|
139 |
if (get_nr_files() > old_max) { |
518de9b39
|
140 141 |
pr_info("VFS: file-max limit %lu reached ", get_max_files()); |
529bf6be5
|
142 |
old_max = get_nr_files(); |
1da177e4c
|
143 |
} |
1afc99bea
|
144 |
return ERR_PTR(-ENFILE); |
1da177e4c
|
145 |
} |
ce8d2cdf3
|
146 147 |
/** * alloc_file - allocate and initialize a 'struct file' |
a457606a6
|
148 149 |
* * @path: the (dentry, vfsmount) pair for the new file |
ce8d2cdf3
|
150 151 |
* @mode: the mode with which the new file will be opened * @fop: the 'struct file_operations' for the new file |
ce8d2cdf3
|
152 |
*/ |
a4141d7cf
|
153 |
struct file *alloc_file(const struct path *path, fmode_t mode, |
2c48b9c45
|
154 |
const struct file_operations *fop) |
ce8d2cdf3
|
155 156 |
{ struct file *file; |
ce8d2cdf3
|
157 158 |
file = get_empty_filp(); |
1afc99bea
|
159 |
if (IS_ERR(file)) |
39b652527
|
160 |
return file; |
ce8d2cdf3
|
161 |
|
2c48b9c45
|
162 |
file->f_path = *path; |
dd37978c5
|
163 |
file->f_inode = path->dentry->d_inode; |
2c48b9c45
|
164 |
file->f_mapping = path->dentry->d_inode->i_mapping; |
5660e13d2
|
165 |
file->f_wb_err = filemap_sample_wb_err(file->f_mapping); |
293bc9822
|
166 |
if ((mode & FMODE_READ) && |
843631820
|
167 |
likely(fop->read || fop->read_iter)) |
7f7f25e82
|
168 |
mode |= FMODE_CAN_READ; |
293bc9822
|
169 |
if ((mode & FMODE_WRITE) && |
843631820
|
170 |
likely(fop->write || fop->write_iter)) |
7f7f25e82
|
171 |
mode |= FMODE_CAN_WRITE; |
ce8d2cdf3
|
172 173 |
file->f_mode = mode; file->f_op = fop; |
890275b5e
|
174 175 |
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); |
3d1e46315
|
176 |
return file; |
ce8d2cdf3
|
177 |
} |
73efc4681
|
178 |
EXPORT_SYMBOL(alloc_file); |
ce8d2cdf3
|
179 |
|
d7065da03
|
180 |
/* the real guts of fput() - releasing the last reference to file |
1da177e4c
|
181 |
*/ |
d7065da03
|
182 |
static void __fput(struct file *file) |
1da177e4c
|
183 |
{ |
0f7fc9e4d
|
184 185 |
struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; |
c77cecee5
|
186 |
struct inode *inode = file->f_inode; |
1da177e4c
|
187 188 |
might_sleep(); |
0eeca2830
|
189 190 |
fsnotify_close(file); |
1da177e4c
|
191 192 193 194 195 |
/* * The function eventpoll_release() should be the first called * in the file cleanup chain. */ eventpoll_release(file); |
78ed8a133
|
196 |
locks_remove_file(file); |
1da177e4c
|
197 |
|
233e70f42
|
198 |
if (unlikely(file->f_flags & FASYNC)) { |
72c2d5319
|
199 |
if (file->f_op->fasync) |
233e70f42
|
200 201 |
file->f_op->fasync(-1, file, 0); } |
4199d35cb
|
202 |
ima_file_free(file); |
72c2d5319
|
203 |
if (file->f_op->release) |
1da177e4c
|
204 205 |
file->f_op->release(inode, file); security_file_free(file); |
60ed8cf78
|
206 207 |
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { |
1da177e4c
|
208 |
cdev_put(inode->i_cdev); |
60ed8cf78
|
209 |
} |
1da177e4c
|
210 |
fops_put(file->f_op); |
609d7fa95
|
211 |
put_pid(file->f_owner.pid); |
890275b5e
|
212 213 |
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); |
83f936c75
|
214 215 216 217 |
if (file->f_mode & FMODE_WRITER) { put_write_access(inode); __mnt_drop_write(mnt); } |
0f7fc9e4d
|
218 219 |
file->f_path.dentry = NULL; file->f_path.mnt = NULL; |
dd37978c5
|
220 |
file->f_inode = NULL; |
1da177e4c
|
221 222 223 224 |
file_free(file); dput(dentry); mntput(mnt); } |
4f5e65a1c
|
225 |
static LLIST_HEAD(delayed_fput_list); |
4a9d4b024
|
226 227 |
static void delayed_fput(struct work_struct *unused) { |
4f5e65a1c
|
228 |
struct llist_node *node = llist_del_all(&delayed_fput_list); |
b9ea557ee
|
229 |
struct file *f, *t; |
4f5e65a1c
|
230 |
|
b9ea557ee
|
231 232 |
llist_for_each_entry_safe(f, t, node, f_u.fu_llist) __fput(f); |
4a9d4b024
|
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
} static void ____fput(struct callback_head *work) { __fput(container_of(work, struct file, f_u.fu_rcuhead)); } /* * If kernel thread really needs to have the final fput() it has done * to complete, call this. The only user right now is the boot - we * *do* need to make sure our writes to binaries on initramfs has * not left us with opened struct file waiting for __fput() - execve() * won't work without that. Please, don't add more callers without * very good reasons; in particular, never call that with locks * held and never call that from a thread that might need to do * some work on any kind of umount. */ void flush_delayed_fput(void) { delayed_fput(NULL); } |
c7314d74f
|
254 |
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); |
4a9d4b024
|
255 |
|
d7065da03
|
256 257 |
void fput(struct file *file) { |
4a9d4b024
|
258 259 |
if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; |
e7b2c4069
|
260 |
|
e7b2c4069
|
261 262 263 264 |
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) return; |
64372501e
|
265 266 |
/* * After this task has run exit_task_work(), |
be49b30a9
|
267 |
* task_work_add() will fail. Fall through to delayed |
64372501e
|
268 269 |
* fput to avoid leaking *file. */ |
4a9d4b024
|
270 |
} |
4f5e65a1c
|
271 272 |
if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) |
c7314d74f
|
273 |
schedule_delayed_work(&delayed_fput_work, 1); |
4a9d4b024
|
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
} } /* * synchronous analog of fput(); for kernel threads that might be needed * in some umount() (and thus can't use flush_delayed_fput() without * risking deadlocks), need to wait for completion of __fput() and know * for this specific struct file it won't involve anything that would * need them. Use only if you really need it - at the very least, * don't blindly convert fput() by kernel thread to that. */ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; |
4a9d4b024
|
289 |
BUG_ON(!(task->flags & PF_KTHREAD)); |
d7065da03
|
290 |
__fput(file); |
4a9d4b024
|
291 |
} |
d7065da03
|
292 293 294 |
} EXPORT_SYMBOL(fput); |
1da177e4c
|
295 296 |
void put_filp(struct file *file) { |
516e0cc56
|
297 |
if (atomic_long_dec_and_test(&file->f_count)) { |
1da177e4c
|
298 |
security_file_free(file); |
1da177e4c
|
299 300 301 |
file_free(file); } } |
4248b0da4
|
302 |
void __init files_init(void) |
b9ea557ee
|
303 |
{ |
b6b3fdead
|
304 305 |
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
4248b0da4
|
306 307 |
percpu_counter_init(&nr_files, 0, GFP_KERNEL); } |
b6b3fdead
|
308 |
|
4248b0da4
|
309 310 311 312 313 314 315 316 317 318 319 |
/* * One file with associated inode and dcache is very roughly 1K. Per default * do not use more than 10% of our memory for files. */ void __init files_maxfiles_init(void) { unsigned long n; unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; memreserve = min(memreserve, totalram_pages - 1); n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; |
1da177e4c
|
320 |
|
518de9b39
|
321 |
files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
b9ea557ee
|
322 |
} |