Blame view

fs/file_table.c 8.53 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   *  linux/fs/file_table.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
   */
  
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
11
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
  #include <linux/init.h>
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
  #include <linux/fs.h>
  #include <linux/security.h>
  #include <linux/eventpoll.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
17
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/mount.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
19
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/cdev.h>
0eeca2830   Robert Love   [PATCH] inotify
21
  #include <linux/fsnotify.h>
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
22
23
  #include <linux/sysctl.h>
  #include <linux/percpu_counter.h>
6416ccb78   Nick Piggin   fs: scale files_lock
24
  #include <linux/percpu.h>
4a9d4b024   Al Viro   switch fput to ta...
25
26
  #include <linux/hardirq.h>
  #include <linux/task_work.h>
0552f879d   Al Viro   Untangling ima me...
27
  #include <linux/ima.h>
4248b0da4   Mel Gorman   fs, file table: r...
28
  #include <linux/swap.h>
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
29

60063497a   Arun Sharma   atomic: use <linu...
30
  #include <linux/atomic.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

e81e3f4dc   Eric Paris   fs: move get_empt...
32
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
34
35
36
  /* sysctl tunables... */
  struct files_stat_struct files_stat = {
  	.max_files = NR_FILE
  };
b6b3fdead   Eric Dumazet   filp_cachep can b...
37
38
  /* SLAB cache for file structures */
  static struct kmem_cache *filp_cachep __read_mostly;
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
39
  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

5c33b183a   Al Viro   uninline file_fre...
41
  static void file_free_rcu(struct rcu_head *head)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
  {
d76b0d9b2   David Howells   CRED: Use creds i...
43
44
45
  	struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
  
  	put_cred(f->f_cred);
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
46
  	kmem_cache_free(filp_cachep, f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
48
  static inline void file_free(struct file *f)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
50
51
  	percpu_counter_dec(&nr_files);
  	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
53
54
55
  /*
   * Return the total number of open files in the system
   */
518de9b39   Eric Dumazet   fs: allow for mor...
56
  static long get_nr_files(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
58
  	return percpu_counter_read_positive(&nr_files);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
60
61
62
  /*
   * Return the maximum number of open files in the system
   */
518de9b39   Eric Dumazet   fs: allow for mor...
63
  unsigned long get_max_files(void)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
64
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
65
  	return files_stat.max_files;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
66
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
67
68
69
70
71
72
  EXPORT_SYMBOL_GPL(get_max_files);
  
  /*
   * Handle nr_files sysctl
   */
  #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
1f7e0616c   Joe Perches   fs: convert use o...
73
  int proc_nr_files(struct ctl_table *table, int write,
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
74
75
76
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	files_stat.nr_files = get_nr_files();
518de9b39   Eric Dumazet   fs: allow for mor...
77
  	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
78
79
  }
  #else
1f7e0616c   Joe Perches   fs: convert use o...
80
  int proc_nr_files(struct ctl_table *table, int write,
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
81
82
83
84
85
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return -ENOSYS;
  }
  #endif
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
86

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
  /* Find an unused file structure and return a pointer to it.
1afc99bea   Al Viro   propagate error f...
88
89
   * Returns an error pointer if some error happend e.g. we over file
   * structures limit, run out of memory or operation is not permitted.
430e285e0   Dave Hansen   [PATCH] fix up ne...
90
91
92
93
94
95
   *
   * Be very careful using this.  You are responsible for
   * getting write access to any mount that you might assign
   * to this filp, if it is opened for write.  If this is not
   * done, you will imbalance int the mount's writer count
   * and a warning at __fput() time.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
97
98
   */
  struct file *get_empty_filp(void)
  {
86a264abe   David Howells   CRED: Wrap curren...
99
  	const struct cred *cred = current_cred();
518de9b39   Eric Dumazet   fs: allow for mor...
100
  	static long old_max;
1afc99bea   Al Viro   propagate error f...
101
102
  	struct file *f;
  	int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
105
106
  
  	/*
  	 * Privileged users can go above max_files
  	 */
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
107
108
109
110
111
  	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
  		/*
  		 * percpu_counters are inaccurate.  Do an expensive check before
  		 * we go and fail.
  		 */
52d9f3b40   Peter Zijlstra   lib: percpu_count...
112
  		if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
113
114
  			goto over;
  	}
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
115

4975e45ff   Denis Cheng   fs: use kmem_cach...
116
  	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
1afc99bea   Al Viro   propagate error f...
117
118
  	if (unlikely(!f))
  		return ERR_PTR(-ENOMEM);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
119

529bf6be5   Dipankar Sarma   [PATCH] fix file ...
120
  	percpu_counter_inc(&nr_files);
78d297887   Tetsuo Handa   CRED: Fix kernel ...
121
  	f->f_cred = get_cred(cred);
1afc99bea   Al Viro   propagate error f...
122
123
124
125
126
  	error = security_file_alloc(f);
  	if (unlikely(error)) {
  		file_free(f);
  		return ERR_PTR(error);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127

516e0cc56   Al Viro   [PATCH] f_count m...
128
  	atomic_long_set(&f->f_count, 1);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
129
  	rwlock_init(&f->f_owner.lock);
684999149   Jonathan Corbet   Rename struct fil...
130
  	spin_lock_init(&f->f_lock);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
131
  	mutex_init(&f->f_pos_lock);
5a6b7951b   Benjamin LaHaise   [PATCH] get_empty...
132
  	eventpoll_init_file(f);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
133
  	/* f->f_version: 0 */
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
134
135
136
  	return f;
  
  over:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
  	/* Ran out of filps - report that */
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
138
  	if (get_nr_files() > old_max) {
518de9b39   Eric Dumazet   fs: allow for mor...
139
140
  		pr_info("VFS: file-max limit %lu reached
  ", get_max_files());
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
141
  		old_max = get_nr_files();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
  	}
1afc99bea   Al Viro   propagate error f...
143
  	return ERR_PTR(-ENFILE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
  }
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
145
146
  /**
   * alloc_file - allocate and initialize a 'struct file'
a457606a6   Eric Biggers   fs/file_table.c: ...
147
148
   *
   * @path: the (dentry, vfsmount) pair for the new file
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
149
150
   * @mode: the mode with which the new file will be opened
   * @fop: the 'struct file_operations' for the new file
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
151
   */
2c48b9c45   Al Viro   switch alloc_file...
152
153
  struct file *alloc_file(struct path *path, fmode_t mode,
  		const struct file_operations *fop)
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
154
155
  {
  	struct file *file;
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
156
157
  
  	file = get_empty_filp();
1afc99bea   Al Viro   propagate error f...
158
  	if (IS_ERR(file))
39b652527   Anatol Pomozov   fs: Preserve erro...
159
  		return file;
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
160

2c48b9c45   Al Viro   switch alloc_file...
161
  	file->f_path = *path;
dd37978c5   Al Viro   cache the value o...
162
  	file->f_inode = path->dentry->d_inode;
2c48b9c45   Al Viro   switch alloc_file...
163
  	file->f_mapping = path->dentry->d_inode->i_mapping;
293bc9822   Al Viro   new methods: ->re...
164
  	if ((mode & FMODE_READ) &&
843631820   Al Viro   ->aio_read and ->...
165
  	     likely(fop->read || fop->read_iter))
7f7f25e82   Al Viro   replace checking ...
166
  		mode |= FMODE_CAN_READ;
293bc9822   Al Viro   new methods: ->re...
167
  	if ((mode & FMODE_WRITE) &&
843631820   Al Viro   ->aio_read and ->...
168
  	     likely(fop->write || fop->write_iter))
7f7f25e82   Al Viro   replace checking ...
169
  		mode |= FMODE_CAN_WRITE;
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
170
171
  	file->f_mode = mode;
  	file->f_op = fop;
890275b5e   Mimi Zohar   IMA: maintain i_r...
172
173
  	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
  		i_readcount_inc(path->dentry->d_inode);
3d1e46315   Al Viro   get rid of init_f...
174
  	return file;
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
175
  }
73efc4681   Roland Dreier   re-export alloc_f...
176
  EXPORT_SYMBOL(alloc_file);
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
177

d7065da03   Al Viro   get rid of the ma...
178
  /* the real guts of fput() - releasing the last reference to file
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
   */
d7065da03   Al Viro   get rid of the ma...
180
  static void __fput(struct file *file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
  {
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
182
183
  	struct dentry *dentry = file->f_path.dentry;
  	struct vfsmount *mnt = file->f_path.mnt;
c77cecee5   David Howells   Replace a bunch o...
184
  	struct inode *inode = file->f_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
186
  
  	might_sleep();
0eeca2830   Robert Love   [PATCH] inotify
187
188
  
  	fsnotify_close(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189
190
191
192
193
  	/*
  	 * The function eventpoll_release() should be the first called
  	 * in the file cleanup chain.
  	 */
  	eventpoll_release(file);
78ed8a133   Jeff Layton   locks: rename loc...
194
  	locks_remove_file(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195

233e70f42   Al Viro   saner FASYNC hand...
196
  	if (unlikely(file->f_flags & FASYNC)) {
72c2d5319   Al Viro   file->f_op is nev...
197
  		if (file->f_op->fasync)
233e70f42   Al Viro   saner FASYNC hand...
198
199
  			file->f_op->fasync(-1, file, 0);
  	}
4199d35cb   Mimi Zohar   vfs: move ima_fil...
200
  	ima_file_free(file);
72c2d5319   Al Viro   file->f_op is nev...
201
  	if (file->f_op->release)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
  		file->f_op->release(inode, file);
  	security_file_free(file);
60ed8cf78   Miklos Szeredi   fix cdev leak on ...
204
205
  	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
  		     !(file->f_mode & FMODE_PATH))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  		cdev_put(inode->i_cdev);
60ed8cf78   Miklos Szeredi   fix cdev leak on ...
207
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
  	fops_put(file->f_op);
609d7fa95   Eric W. Biederman   [PATCH] file: mod...
209
  	put_pid(file->f_owner.pid);
890275b5e   Mimi Zohar   IMA: maintain i_r...
210
211
  	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
  		i_readcount_dec(inode);
83f936c75   Al Viro   mark struct file ...
212
213
214
215
  	if (file->f_mode & FMODE_WRITER) {
  		put_write_access(inode);
  		__mnt_drop_write(mnt);
  	}
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
216
217
  	file->f_path.dentry = NULL;
  	file->f_path.mnt = NULL;
dd37978c5   Al Viro   cache the value o...
218
  	file->f_inode = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219
220
221
222
  	file_free(file);
  	dput(dentry);
  	mntput(mnt);
  }
4f5e65a1c   Oleg Nesterov   fput: turn "list_...
223
  static LLIST_HEAD(delayed_fput_list);
4a9d4b024   Al Viro   switch fput to ta...
224
225
  static void delayed_fput(struct work_struct *unused)
  {
4f5e65a1c   Oleg Nesterov   fput: turn "list_...
226
227
228
229
230
231
  	struct llist_node *node = llist_del_all(&delayed_fput_list);
  	struct llist_node *next;
  
  	for (; node; node = next) {
  		next = llist_next(node);
  		__fput(llist_entry(node, struct file, f_u.fu_llist));
4a9d4b024   Al Viro   switch fput to ta...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
  	}
  }
  
  static void ____fput(struct callback_head *work)
  {
  	__fput(container_of(work, struct file, f_u.fu_rcuhead));
  }
  
  /*
   * If kernel thread really needs to have the final fput() it has done
   * to complete, call this.  The only user right now is the boot - we
   * *do* need to make sure our writes to binaries on initramfs has
   * not left us with opened struct file waiting for __fput() - execve()
   * won't work without that.  Please, don't add more callers without
   * very good reasons; in particular, never call that with locks
   * held and never call that from a thread that might need to do
   * some work on any kind of umount.
   */
  void flush_delayed_fput(void)
  {
  	delayed_fput(NULL);
  }
c7314d74f   Al Viro   nfsd regression s...
254
  static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
4a9d4b024   Al Viro   switch fput to ta...
255

d7065da03   Al Viro   get rid of the ma...
256
257
  void fput(struct file *file)
  {
4a9d4b024   Al Viro   switch fput to ta...
258
259
  	if (atomic_long_dec_and_test(&file->f_count)) {
  		struct task_struct *task = current;
e7b2c4069   Oleg Nesterov   fput: task_work_a...
260

e7b2c4069   Oleg Nesterov   fput: task_work_a...
261
262
263
264
  		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
  			init_task_work(&file->f_u.fu_rcuhead, ____fput);
  			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
  				return;
64372501e   Andrew Morton   fs/file_table.c:f...
265
266
  			/*
  			 * After this task has run exit_task_work(),
be49b30a9   Andrew Morton   fs/file_table.c:f...
267
  			 * task_work_add() will fail.  Fall through to delayed
64372501e   Andrew Morton   fs/file_table.c:f...
268
269
  			 * fput to avoid leaking *file.
  			 */
4a9d4b024   Al Viro   switch fput to ta...
270
  		}
4f5e65a1c   Oleg Nesterov   fput: turn "list_...
271
272
  
  		if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
c7314d74f   Al Viro   nfsd regression s...
273
  			schedule_delayed_work(&delayed_fput_work, 1);
4a9d4b024   Al Viro   switch fput to ta...
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  	}
  }
  
  /*
   * synchronous analog of fput(); for kernel threads that might be needed
   * in some umount() (and thus can't use flush_delayed_fput() without
   * risking deadlocks), need to wait for completion of __fput() and know
   * for this specific struct file it won't involve anything that would
   * need them.  Use only if you really need it - at the very least,
   * don't blindly convert fput() by kernel thread to that.
   */
  void __fput_sync(struct file *file)
  {
  	if (atomic_long_dec_and_test(&file->f_count)) {
  		struct task_struct *task = current;
4a9d4b024   Al Viro   switch fput to ta...
289
  		BUG_ON(!(task->flags & PF_KTHREAD));
d7065da03   Al Viro   get rid of the ma...
290
  		__fput(file);
4a9d4b024   Al Viro   switch fput to ta...
291
  	}
d7065da03   Al Viro   get rid of the ma...
292
293
294
  }
  
  EXPORT_SYMBOL(fput);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
295
296
  void put_filp(struct file *file)
  {
516e0cc56   Al Viro   [PATCH] f_count m...
297
  	if (atomic_long_dec_and_test(&file->f_count)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
298
  		security_file_free(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
300
301
  		file_free(file);
  	}
  }
4248b0da4   Mel Gorman   fs, file table: r...
302
  void __init files_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  { 
b6b3fdead   Eric Dumazet   filp_cachep can b...
304
305
  	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
  			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
4248b0da4   Mel Gorman   fs, file table: r...
306
307
  	percpu_counter_init(&nr_files, 0, GFP_KERNEL);
  }
b6b3fdead   Eric Dumazet   filp_cachep can b...
308

4248b0da4   Mel Gorman   fs, file table: r...
309
310
311
312
313
314
315
316
317
318
319
  /*
   * One file with associated inode and dcache is very roughly 1K. Per default
   * do not use more than 10% of our memory for files.
   */
  void __init files_maxfiles_init(void)
  {
  	unsigned long n;
  	unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
  
  	memreserve = min(memreserve, totalram_pages - 1);
  	n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320

518de9b39   Eric Dumazet   fs: allow for mor...
321
  	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  }