Blame view

fs/file.c 11.8 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *  linux/fs/file.c
   *
   *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
   *
   *  Manage the dynamic fd arrays in the process files_struct.
   */
630d9c472   Paul Gortmaker   fs: reduce the us...
8
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
  #include <linux/fs.h>
  #include <linux/mm.h>
6d4831c28   Andrew Morton   vfs: avoid large ...
11
  #include <linux/mmzone.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
  #include <linux/time.h>
d43c36dc6   Alexey Dobriyan   headers: remove s...
13
  #include <linux/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
17
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/bitops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
19
20
21
22
23
24
25
26
  #include <linux/interrupt.h>
  #include <linux/spinlock.h>
  #include <linux/rcupdate.h>
  #include <linux/workqueue.h>
  
  struct fdtable_defer {
  	spinlock_t lock;
  	struct work_struct wq;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
27
28
  	struct fdtable *next;
  };
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
29
  int sysctl_nr_open __read_mostly = 1024*1024;
eceea0b3d   Al Viro   [PATCH] avoid mul...
30
31
  int sysctl_nr_open_min = BITS_PER_LONG;
  int sysctl_nr_open_max = 1024 * 1024; /* raised later */
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
32

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
33
34
35
36
37
38
39
  /*
   * We use this list to defer free fdtables that have vmalloced
   * sets/arrays. By keeping a per-cpu list, we avoid having to embed
   * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
   * this per-task structure.
   */
  static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

1fd36adcd   David Howells   Replace the fd_se...
41
  static void *alloc_fdmem(size_t size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
  {
6d4831c28   Andrew Morton   vfs: avoid large ...
43
44
45
46
47
48
49
50
51
  	/*
  	 * Very large allocations can stress page reclaim, so fall back to
  	 * vmalloc() if the allocation size will be considered "large" by the VM.
  	 */
  	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  		void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
  		if (data != NULL)
  			return data;
  	}
a892e2d7d   Changli Gao   vfs: use kmalloc(...
52
  	return vmalloc(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
  }
a892e2d7d   Changli Gao   vfs: use kmalloc(...
54
  static void free_fdmem(void *ptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
  {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
56
  	is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
  }
a892e2d7d   Changli Gao   vfs: use kmalloc(...
58
  static void __free_fdtable(struct fdtable *fdt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
  {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
60
61
62
  	free_fdmem(fdt->fd);
  	free_fdmem(fdt->open_fds);
  	kfree(fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
63
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64

65f27f384   David Howells   WorkStruct: Pass ...
65
  static void free_fdtable_work(struct work_struct *work)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
66
  {
65f27f384   David Howells   WorkStruct: Pass ...
67
68
  	struct fdtable_defer *f =
  		container_of(work, struct fdtable_defer, wq);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
69
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
71
72
73
74
75
76
  	spin_lock_bh(&f->lock);
  	fdt = f->next;
  	f->next = NULL;
  	spin_unlock_bh(&f->lock);
  	while(fdt) {
  		struct fdtable *next = fdt->next;
a892e2d7d   Changli Gao   vfs: use kmalloc(...
77
78
  
  		__free_fdtable(fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
79
80
81
  		fdt = next;
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82

4fd45812c   Vadim Lobanov   [PATCH] fdtable: ...
83
  void free_fdtable_rcu(struct rcu_head *rcu)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
84
85
  {
  	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
86
  	struct fdtable_defer *fddef;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
88
  	BUG_ON(!fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
89

4fd45812c   Vadim Lobanov   [PATCH] fdtable: ...
90
  	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
91
  		/*
4fd45812c   Vadim Lobanov   [PATCH] fdtable: ...
92
93
  		 * This fdtable is embedded in the files structure and that
  		 * structure itself is getting destroyed.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
94
  		 */
4fd45812c   Vadim Lobanov   [PATCH] fdtable: ...
95
96
  		kmem_cache_free(files_cachep,
  				container_of(fdt, struct files_struct, fdtab));
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
97
98
  		return;
  	}
a892e2d7d   Changli Gao   vfs: use kmalloc(...
99
  	if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
100
  		kfree(fdt->fd);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
101
  		kfree(fdt->open_fds);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
102
  		kfree(fdt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
  	} else {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
104
105
106
107
  		fddef = &get_cpu_var(fdtable_defer_list);
  		spin_lock(&fddef->lock);
  		fdt->next = fddef->next;
  		fddef->next = fdt;
593be07ae   Tejun Heo   [PATCH] file: kil...
108
109
  		/* vmallocs are handled from the workqueue context */
  		schedule_work(&fddef->wq);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
110
111
  		spin_unlock(&fddef->lock);
  		put_cpu_var(fdtable_defer_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
  	}
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
113
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
114
115
116
117
  /*
   * Expand the fdset in the files_struct.  Called with the files spinlock
   * held for write.
   */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
118
  static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
119
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
120
  	unsigned int cpy, set;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
121

5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
122
  	BUG_ON(nfdt->max_fds < ofdt->max_fds);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
123
124
125
126
127
128
129
130
131
132
133
134
  
  	cpy = ofdt->max_fds * sizeof(struct file *);
  	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
  	memcpy(nfdt->fd, ofdt->fd, cpy);
  	memset((char *)(nfdt->fd) + cpy, 0, set);
  
  	cpy = ofdt->max_fds / BITS_PER_BYTE;
  	set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
  	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
  	memset((char *)(nfdt->open_fds) + cpy, 0, set);
  	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
  	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
  }
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
136
  static struct fdtable * alloc_fdtable(unsigned int nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
138
  	struct fdtable *fdt;
1fd36adcd   David Howells   Replace the fd_se...
139
  	void *data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
141
  	/*
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
142
143
144
145
146
  	 * Figure out how many fds we actually want to support in this fdtable.
  	 * Allocation steps are keyed to the size of the fdarray, since it
  	 * grows far faster than any of the other dynamic data. We try to fit
  	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
  	 * and growing in powers of two from there on.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
147
  	 */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
148
149
150
  	nr /= (1024 / sizeof(struct file *));
  	nr = roundup_pow_of_two(nr + 1);
  	nr *= (1024 / sizeof(struct file *));
5c598b342   Al Viro   [PATCH] fix sysct...
151
152
153
154
155
156
157
158
159
160
  	/*
  	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
  	 * had been set lower between the check in expand_files() and here.  Deal
  	 * with that in caller, it's cheaper that way.
  	 *
  	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
  	 * bitmaps handling below becomes unpleasant, to put it mildly...
  	 */
  	if (unlikely(nr > sysctl_nr_open))
  		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
161

5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
162
163
  	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
  	if (!fdt)
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
164
  		goto out;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
165
166
167
168
  	fdt->max_fds = nr;
  	data = alloc_fdmem(nr * sizeof(struct file *));
  	if (!data)
  		goto out_fdt;
1fd36adcd   David Howells   Replace the fd_se...
169
170
171
  	fdt->fd = data;
  
  	data = alloc_fdmem(max_t(size_t,
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
172
173
174
  				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
  	if (!data)
  		goto out_arr;
1fd36adcd   David Howells   Replace the fd_se...
175
  	fdt->open_fds = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
176
  	data += nr / BITS_PER_BYTE;
1fd36adcd   David Howells   Replace the fd_se...
177
  	fdt->close_on_exec = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
178
  	fdt->next = NULL;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
179
  	return fdt;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
180
181
  
  out_arr:
a892e2d7d   Changli Gao   vfs: use kmalloc(...
182
  	free_fdmem(fdt->fd);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
183
  out_fdt:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
184
  	kfree(fdt);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
185
  out:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
186
187
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
189
  /*
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
190
191
192
193
194
   * Expand the file descriptor table.
   * This function will allocate a new fdtable and both fd array and fdset, of
   * the given size.
   * Return <0 error code on error; 1 on successful completion.
   * The files->file_lock should be held on entry, and will be held on exit.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
195
196
197
198
199
   */
  static int expand_fdtable(struct files_struct *files, int nr)
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
  {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
200
  	struct fdtable *new_fdt, *cur_fdt;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
201
202
  
  	spin_unlock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
203
  	new_fdt = alloc_fdtable(nr);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
204
  	spin_lock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
205
206
  	if (!new_fdt)
  		return -ENOMEM;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
207
  	/*
5c598b342   Al Viro   [PATCH] fix sysct...
208
209
210
211
  	 * extremely unlikely race - sysctl_nr_open decreased between the check in
  	 * caller and alloc_fdtable().  Cheaper to catch it here...
  	 */
  	if (unlikely(new_fdt->max_fds <= nr)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
212
  		__free_fdtable(new_fdt);
5c598b342   Al Viro   [PATCH] fix sysct...
213
214
215
  		return -EMFILE;
  	}
  	/*
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
216
217
  	 * Check again since another task may have expanded the fd table while
  	 * we dropped the lock
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
218
  	 */
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
219
  	cur_fdt = files_fdtable(files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
220
  	if (nr >= cur_fdt->max_fds) {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
221
222
223
  		/* Continue as planned */
  		copy_fdtable(new_fdt, cur_fdt);
  		rcu_assign_pointer(files->fdt, new_fdt);
4fd45812c   Vadim Lobanov   [PATCH] fdtable: ...
224
  		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
01b2d93ca   Vadim Lobanov   [PATCH] fdtable: ...
225
  			free_fdtable(cur_fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
226
  	} else {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
227
  		/* Somebody else expanded, so undo our attempt */
a892e2d7d   Changli Gao   vfs: use kmalloc(...
228
  		__free_fdtable(new_fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
229
  	}
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
230
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
232
233
234
  }
  
  /*
   * Expand files.
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
235
236
237
238
239
   * This function will expand the file structures, if the requested size exceeds
   * the current capacity and there is room for expansion.
   * Return <0 error code on error; 0 when nothing done; 1 when files were
   * expanded and execution may have blocked.
   * The files->file_lock should be held on entry, and will be held on exit.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
241
242
   */
  int expand_files(struct files_struct *files, int nr)
  {
badf16621   Dipankar Sarma   [PATCH] files: br...
243
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244

badf16621   Dipankar Sarma   [PATCH] files: br...
245
  	fdt = files_fdtable(files);
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
246
247
248
249
250
  
  	/*
  	 * N.B. For clone tasks sharing a files structure, this test
  	 * will limit the total number of files that can be opened.
  	 */
d554ed895   Jiri Slaby   fs: use rlimit he...
251
  	if (nr >= rlimit(RLIMIT_NOFILE))
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
252
  		return -EMFILE;
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
253
  	/* Do we need to expand? */
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
254
  	if (nr < fdt->max_fds)
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
255
  		return 0;
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
256

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
257
  	/* Can we expand? */
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
258
  	if (nr >= sysctl_nr_open)
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
259
260
261
262
  		return -EMFILE;
  
  	/* All good, so we try */
  	return expand_fdtable(files, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
264

02afc6267   Al Viro   [PATCH] dup_fd() ...
265
266
267
268
269
270
  static int count_open_files(struct fdtable *fdt)
  {
  	int size = fdt->max_fds;
  	int i;
  
  	/* Find the last open fd */
1fd36adcd   David Howells   Replace the fd_se...
271
272
  	for (i = size / BITS_PER_LONG; i > 0; ) {
  		if (fdt->open_fds[--i])
02afc6267   Al Viro   [PATCH] dup_fd() ...
273
274
  			break;
  	}
1fd36adcd   David Howells   Replace the fd_se...
275
  	i = (i + 1) * BITS_PER_LONG;
02afc6267   Al Viro   [PATCH] dup_fd() ...
276
277
  	return i;
  }
02afc6267   Al Viro   [PATCH] dup_fd() ...
278
279
280
281
282
283
284
285
286
287
288
289
290
  /*
   * Allocate a new files structure and copy contents from the
   * passed in files structure.
   * errorp will be valid only when the returned files_struct is NULL.
   */
  struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
  {
  	struct files_struct *newf;
  	struct file **old_fds, **new_fds;
  	int open_files, size, i;
  	struct fdtable *old_fdt, *new_fdt;
  
  	*errorp = -ENOMEM;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
291
  	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
02afc6267   Al Viro   [PATCH] dup_fd() ...
292
293
  	if (!newf)
  		goto out;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
294
295
296
297
298
299
  	atomic_set(&newf->count, 1);
  
  	spin_lock_init(&newf->file_lock);
  	newf->next_fd = 0;
  	new_fdt = &newf->fdtab;
  	new_fdt->max_fds = NR_OPEN_DEFAULT;
1fd36adcd   David Howells   Replace the fd_se...
300
301
  	new_fdt->close_on_exec = newf->close_on_exec_init;
  	new_fdt->open_fds = newf->open_fds_init;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
302
  	new_fdt->fd = &newf->fd_array[0];
afbec7fff   Al Viro   [PATCH] dup_fd() ...
303
  	new_fdt->next = NULL;
02afc6267   Al Viro   [PATCH] dup_fd() ...
304
305
  	spin_lock(&oldf->file_lock);
  	old_fdt = files_fdtable(oldf);
02afc6267   Al Viro   [PATCH] dup_fd() ...
306
307
308
309
  	open_files = count_open_files(old_fdt);
  
  	/*
  	 * Check whether we need to allocate a larger fd array and fd set.
02afc6267   Al Viro   [PATCH] dup_fd() ...
310
  	 */
adbecb128   Al Viro   [PATCH] dup_fd() ...
311
  	while (unlikely(open_files > new_fdt->max_fds)) {
02afc6267   Al Viro   [PATCH] dup_fd() ...
312
  		spin_unlock(&oldf->file_lock);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
313

a892e2d7d   Changli Gao   vfs: use kmalloc(...
314
315
  		if (new_fdt != &newf->fdtab)
  			__free_fdtable(new_fdt);
adbecb128   Al Viro   [PATCH] dup_fd() ...
316

9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
317
318
319
320
321
322
323
324
  		new_fdt = alloc_fdtable(open_files - 1);
  		if (!new_fdt) {
  			*errorp = -ENOMEM;
  			goto out_release;
  		}
  
  		/* beyond sysctl_nr_open; nothing to do */
  		if (unlikely(new_fdt->max_fds < open_files)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
325
  			__free_fdtable(new_fdt);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
326
  			*errorp = -EMFILE;
02afc6267   Al Viro   [PATCH] dup_fd() ...
327
  			goto out_release;
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
328
  		}
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
329

02afc6267   Al Viro   [PATCH] dup_fd() ...
330
331
332
333
334
335
336
  		/*
  		 * Reacquire the oldf lock and a pointer to its fd table
  		 * who knows it may have a new bigger fd table. We need
  		 * the latest pointer.
  		 */
  		spin_lock(&oldf->file_lock);
  		old_fdt = files_fdtable(oldf);
adbecb128   Al Viro   [PATCH] dup_fd() ...
337
  		open_files = count_open_files(old_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
338
339
340
341
  	}
  
  	old_fds = old_fdt->fd;
  	new_fds = new_fdt->fd;
1fd36adcd   David Howells   Replace the fd_se...
342
343
  	memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
  	memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
02afc6267   Al Viro   [PATCH] dup_fd() ...
344
345
346
347
348
349
350
351
352
353
354
355
  
  	for (i = open_files; i != 0; i--) {
  		struct file *f = *old_fds++;
  		if (f) {
  			get_file(f);
  		} else {
  			/*
  			 * The fd may be claimed in the fd bitmap but not yet
  			 * instantiated in the files array if a sibling thread
  			 * is partway through open().  So make sure that this
  			 * fd is available to the new process.
  			 */
1dce27c5a   David Howells   Wrap accesses to ...
356
  			__clear_open_fd(open_files - i, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
357
358
359
360
361
362
363
364
365
366
367
368
  		}
  		rcu_assign_pointer(*new_fds++, f);
  	}
  	spin_unlock(&oldf->file_lock);
  
  	/* compute the remainder to be cleared */
  	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
  
  	/* This is long word aligned thus could use a optimized version */
  	memset(new_fds, 0, size);
  
  	if (new_fdt->max_fds > open_files) {
1fd36adcd   David Howells   Replace the fd_se...
369
370
  		int left = (new_fdt->max_fds - open_files) / 8;
  		int start = open_files / BITS_PER_LONG;
02afc6267   Al Viro   [PATCH] dup_fd() ...
371

1fd36adcd   David Howells   Replace the fd_se...
372
373
  		memset(&new_fdt->open_fds[start], 0, left);
  		memset(&new_fdt->close_on_exec[start], 0, left);
02afc6267   Al Viro   [PATCH] dup_fd() ...
374
  	}
afbec7fff   Al Viro   [PATCH] dup_fd() ...
375
  	rcu_assign_pointer(newf->fdt, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
376
377
378
379
380
381
382
  	return newf;
  
  out_release:
  	kmem_cache_free(files_cachep, newf);
  out:
  	return NULL;
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
383
384
385
386
  static void __devinit fdtable_defer_list_init(int cpu)
  {
  	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
  	spin_lock_init(&fddef->lock);
65f27f384   David Howells   WorkStruct: Pass ...
387
  	INIT_WORK(&fddef->wq, free_fdtable_work);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
388
389
390
391
392
393
  	fddef->next = NULL;
  }
  
  void __init files_defer_init(void)
  {
  	int i;
0a9450227   KAMEZAWA Hiroyuki   [PATCH] for_each_...
394
  	for_each_possible_cpu(i)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
395
  		fdtable_defer_list_init(i);
eceea0b3d   Al Viro   [PATCH] avoid mul...
396
397
  	sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
  			     -BITS_PER_LONG;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
398
  }
f52111b15   Al Viro   [PATCH] take init...
399
400
401
402
403
404
405
  
  struct files_struct init_files = {
  	.count		= ATOMIC_INIT(1),
  	.fdt		= &init_files.fdtab,
  	.fdtab		= {
  		.max_fds	= NR_OPEN_DEFAULT,
  		.fd		= &init_files.fd_array[0],
1fd36adcd   David Howells   Replace the fd_se...
406
407
  		.close_on_exec	= init_files.close_on_exec_init,
  		.open_fds	= init_files.open_fds_init,
f52111b15   Al Viro   [PATCH] take init...
408
409
410
  	},
  	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
  };
1027abe88   Al Viro   [PATCH] merge loc...
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
  
  /*
   * allocate a file descriptor, mark it busy.
   */
  int alloc_fd(unsigned start, unsigned flags)
  {
  	struct files_struct *files = current->files;
  	unsigned int fd;
  	int error;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  repeat:
  	fdt = files_fdtable(files);
  	fd = start;
  	if (fd < files->next_fd)
  		fd = files->next_fd;
  
  	if (fd < fdt->max_fds)
1fd36adcd   David Howells   Replace the fd_se...
430
  		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
1027abe88   Al Viro   [PATCH] merge loc...
431
432
433
434
435
436
437
438
439
440
441
442
443
444
  
  	error = expand_files(files, fd);
  	if (error < 0)
  		goto out;
  
  	/*
  	 * If we needed to expand the fs array we
  	 * might have blocked - try again.
  	 */
  	if (error)
  		goto repeat;
  
  	if (start <= files->next_fd)
  		files->next_fd = fd + 1;
1dce27c5a   David Howells   Wrap accesses to ...
445
  	__set_open_fd(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
446
  	if (flags & O_CLOEXEC)
1dce27c5a   David Howells   Wrap accesses to ...
447
  		__set_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
448
  	else
1dce27c5a   David Howells   Wrap accesses to ...
449
  		__clear_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
450
451
452
  	error = fd;
  #if 1
  	/* Sanity check */
7dc521579   Paul E. McKenney   vfs: Apply lockde...
453
  	if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
1027abe88   Al Viro   [PATCH] merge loc...
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
  		printk(KERN_WARNING "alloc_fd: slot %d not NULL!
  ", fd);
  		rcu_assign_pointer(fdt->fd[fd], NULL);
  	}
  #endif
  
  out:
  	spin_unlock(&files->file_lock);
  	return error;
  }
  
  int get_unused_fd(void)
  {
  	return alloc_fd(0, 0);
  }
  EXPORT_SYMBOL(get_unused_fd);