Blame view

fs/file_table.c 8.57 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/fs/file_table.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
   */
  
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/file.h>
  #include <linux/init.h>
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
  #include <linux/fs.h>
  #include <linux/security.h>
  #include <linux/eventpoll.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
16
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
  #include <linux/mount.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
18
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/cdev.h>
0eeca2830   Robert Love   [PATCH] inotify
20
  #include <linux/fsnotify.h>
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
21
22
23
24
  #include <linux/sysctl.h>
  #include <linux/percpu_counter.h>
  
  #include <asm/atomic.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
29
  
  /* sysctl tunables... */
  struct files_stat_struct files_stat = {
  	.max_files = NR_FILE
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  /* public. Not pretty! */
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
31
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32

529bf6be5   Dipankar Sarma   [PATCH] fix file ...
33
  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34

529bf6be5   Dipankar Sarma   [PATCH] fix file ...
35
  static inline void file_free_rcu(struct rcu_head *head)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
37
38
  	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
  	kmem_cache_free(filp_cachep, f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
40
  static inline void file_free(struct file *f)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
42
43
  	percpu_counter_dec(&nr_files);
  	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
45
46
47
48
  /*
   * Return the total number of open files in the system
   */
  static int get_nr_files(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
50
  	return percpu_counter_read_positive(&nr_files);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
52
53
54
55
  /*
   * Return the maximum number of open files in the system
   */
  int get_max_files(void)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
56
  {
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
57
  	return files_stat.max_files;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
58
  }
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  EXPORT_SYMBOL_GPL(get_max_files);
  
  /*
   * Handle nr_files sysctl
   */
  #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
  int proc_nr_files(ctl_table *table, int write, struct file *filp,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	files_stat.nr_files = get_nr_files();
  	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
  }
  #else
  int proc_nr_files(ctl_table *table, int write, struct file *filp,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return -ENOSYS;
  }
  #endif
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
78

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
80
81
82
83
84
  /* Find an unused file structure and return a pointer to it.
   * Returns NULL, if there are no more free file structures or
   * we run out of memory.
   */
  struct file *get_empty_filp(void)
  {
5a6b7951b   Benjamin LaHaise   [PATCH] get_empty...
85
  	struct task_struct *tsk;
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
86
  	static int old_max;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
91
  	struct file * f;
  
  	/*
  	 * Privileged users can go above max_files
  	 */
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
92
93
94
95
96
  	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
  		/*
  		 * percpu_counters are inaccurate.  Do an expensive check before
  		 * we go and fail.
  		 */
52d9f3b40   Peter Zijlstra   lib: percpu_count...
97
  		if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
98
99
  			goto over;
  	}
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
100

4975e45ff   Denis Cheng   fs: use kmem_cach...
101
  	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
102
103
  	if (f == NULL)
  		goto fail;
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
104
  	percpu_counter_inc(&nr_files);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
105
106
  	if (security_file_alloc(f))
  		goto fail_sec;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107

5a6b7951b   Benjamin LaHaise   [PATCH] get_empty...
108
109
  	tsk = current;
  	INIT_LIST_HEAD(&f->f_u.fu_list);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
110
  	atomic_set(&f->f_count, 1);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
111
  	rwlock_init(&f->f_owner.lock);
5a6b7951b   Benjamin LaHaise   [PATCH] get_empty...
112
113
114
  	f->f_uid = tsk->fsuid;
  	f->f_gid = tsk->fsgid;
  	eventpoll_init_file(f);
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
115
  	/* f->f_version: 0 */
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
116
117
118
  	return f;
  
  over:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
  	/* Ran out of filps - report that */
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
120
  	if (get_nr_files() > old_max) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
  		printk(KERN_INFO "VFS: file-max limit %d reached
  ",
529bf6be5   Dipankar Sarma   [PATCH] fix file ...
123
124
  					get_max_files());
  		old_max = get_nr_files();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
  	}
af4d2ecbf   Kirill Korotaev   [PATCH] Fix of bo...
126
127
128
129
  	goto fail;
  
  fail_sec:
  	file_free(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
134
  fail:
  	return NULL;
  }
  
  EXPORT_SYMBOL(get_empty_filp);
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
  /**
   * alloc_file - allocate and initialize a 'struct file'
   * @mnt: the vfsmount on which the file will reside
   * @dentry: the dentry representing the new file
   * @mode: the mode with which the new file will be opened
   * @fop: the 'struct file_operations' for the new file
   *
   * Use this instead of get_empty_filp() to get a new
   * 'struct file'.  Do so because of the same initialization
   * pitfalls reasons listed for init_file().  This is a
   * preferred interface to using init_file().
   *
   * If all the callers of init_file() are eliminated, its
   * code should be moved into this function.
   */
  struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
  		mode_t mode, const struct file_operations *fop)
  {
  	struct file *file;
  	struct path;
  
  	file = get_empty_filp();
  	if (!file)
  		return NULL;
  
  	init_file(file, mnt, dentry, mode, fop);
  	return file;
  }
  EXPORT_SYMBOL(alloc_file);
  
  /**
   * init_file - initialize a 'struct file'
   * @file: the already allocated 'struct file' to initialized
   * @mnt: the vfsmount on which the file resides
   * @dentry: the dentry representing this file
   * @mode: the mode the file is opened with
   * @fop: the 'struct file_operations' for this file
   *
   * Use this instead of setting the members directly.  Doing so
   * avoids making mistakes like forgetting the mntget() or
   * forgetting to take a write on the mnt.
   *
   * Note: This is a crappy interface.  It is here to make
   * merging with the existing users of get_empty_filp()
   * who have complex failure logic easier.  All users
   * of this should be moving to alloc_file().
   */
  int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
  	   mode_t mode, const struct file_operations *fop)
  {
  	int error = 0;
  	file->f_path.dentry = dentry;
  	file->f_path.mnt = mntget(mnt);
  	file->f_mapping = dentry->d_inode->i_mapping;
  	file->f_mode = mode;
  	file->f_op = fop;
  	return error;
  }
  EXPORT_SYMBOL(init_file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
195
  void fastcall fput(struct file *file)
  {
095975da2   Nick Piggin   [PATCH] rcu file:...
196
  	if (atomic_dec_and_test(&file->f_count))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
200
201
202
203
204
205
206
  		__fput(file);
  }
  
  EXPORT_SYMBOL(fput);
  
  /* __fput is called from task context when aio completion releases the last
   * last use of a struct file *.  Do not use otherwise.
   */
  void fastcall __fput(struct file *file)
  {
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
207
208
  	struct dentry *dentry = file->f_path.dentry;
  	struct vfsmount *mnt = file->f_path.mnt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
210
211
  	struct inode *inode = dentry->d_inode;
  
  	might_sleep();
0eeca2830   Robert Love   [PATCH] inotify
212
213
  
  	fsnotify_close(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
215
216
217
218
219
220
221
222
223
  	/*
  	 * The function eventpoll_release() should be the first called
  	 * in the file cleanup chain.
  	 */
  	eventpoll_release(file);
  	locks_remove_flock(file);
  
  	if (file->f_op && file->f_op->release)
  		file->f_op->release(inode, file);
  	security_file_free(file);
577c4eb09   Theodore Ts'o   [PATCH] inode-die...
224
  	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
226
227
228
  		cdev_put(inode->i_cdev);
  	fops_put(file->f_op);
  	if (file->f_mode & FMODE_WRITE)
  		put_write_access(inode);
609d7fa95   Eric W. Biederman   [PATCH] file: mod...
229
  	put_pid(file->f_owner.pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
  	file_kill(file);
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
231
232
  	file->f_path.dentry = NULL;
  	file->f_path.mnt = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
235
236
237
238
239
240
241
  	file_free(file);
  	dput(dentry);
  	mntput(mnt);
  }
  
  struct file fastcall *fget(unsigned int fd)
  {
  	struct file *file;
  	struct files_struct *files = current->files;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
242
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
  	file = fcheck_files(files, fd);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
244
  	if (file) {
095975da2   Nick Piggin   [PATCH] rcu file:...
245
  		if (!atomic_inc_not_zero(&file->f_count)) {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
246
247
248
249
250
251
  			/* File object ref couldn't be taken */
  			rcu_read_unlock();
  			return NULL;
  		}
  	}
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
  	return file;
  }
  
  EXPORT_SYMBOL(fget);
  
  /*
   * Lightweight file lookup - no refcnt increment if fd table isn't shared. 
   * You can use this only if it is guranteed that the current task already 
   * holds a refcnt to that file. That check has to be done at fget() only
   * and a flag is returned to be passed to the corresponding fput_light().
   * There must not be a cloning between an fget_light/fput_light pair.
   */
  struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
  {
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	*fput_needed = 0;
  	if (likely((atomic_read(&files->count) == 1))) {
  		file = fcheck_files(files, fd);
  	} else {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
273
  		rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
275
  		file = fcheck_files(files, fd);
  		if (file) {
095975da2   Nick Piggin   [PATCH] rcu file:...
276
  			if (atomic_inc_not_zero(&file->f_count))
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
277
278
279
280
  				*fput_needed = 1;
  			else
  				/* Didn't get the reference, someone's freed */
  				file = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
  		}
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
282
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
  	}
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
284

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
286
287
288
289
290
  	return file;
  }
  
  
  void put_filp(struct file *file)
  {
095975da2   Nick Piggin   [PATCH] rcu file:...
291
  	if (atomic_dec_and_test(&file->f_count)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
294
295
296
297
298
299
300
301
302
  		security_file_free(file);
  		file_kill(file);
  		file_free(file);
  	}
  }
  
  void file_move(struct file *file, struct list_head *list)
  {
  	if (!list)
  		return;
  	file_list_lock();
2f5120166   Eric Dumazet   [PATCH] reduce si...
303
  	list_move(&file->f_u.fu_list, list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
304
305
306
307
308
  	file_list_unlock();
  }
  
  void file_kill(struct file *file)
  {
2f5120166   Eric Dumazet   [PATCH] reduce si...
309
  	if (!list_empty(&file->f_u.fu_list)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
  		file_list_lock();
2f5120166   Eric Dumazet   [PATCH] reduce si...
311
  		list_del_init(&file->f_u.fu_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
314
315
316
317
  		file_list_unlock();
  	}
  }
  
  int fs_may_remount_ro(struct super_block *sb)
  {
cfdaf9e5f   Matthias Kaehlcke   fs/file_table.c: ...
318
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
320
321
  
  	/* Check that no files are currently opened for writing. */
  	file_list_lock();
cfdaf9e5f   Matthias Kaehlcke   fs/file_table.c: ...
322
  	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
323
  		struct inode *inode = file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
  
  		/* File with pending delete? */
  		if (inode->i_nlink == 0)
  			goto too_bad;
  
  		/* Writeable file? */
  		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
  			goto too_bad;
  	}
  	file_list_unlock();
  	return 1; /* Tis' cool bro. */
  too_bad:
  	file_list_unlock();
  	return 0;
  }
  
  void __init files_init(unsigned long mempages)
  { 
  	int n; 
  	/* One file with associated inode and dcache is very roughly 1K. 
  	 * Per default don't use more than 10% of our memory for files. 
  	 */ 
  
  	n = (mempages * (PAGE_SIZE / 1024)) / 10;
  	files_stat.max_files = n; 
  	if (files_stat.max_files < NR_FILE)
  		files_stat.max_files = NR_FILE;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
351
  	files_defer_init();
0216bfcff   Mingming Cao   [PATCH] percpu co...
352
  	percpu_counter_init(&nr_files, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
  }