Blame view

fs/file.c 23.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *  linux/fs/file.c
   *
   *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
   *
   *  Manage the dynamic fd arrays in the process files_struct.
   */
fe17f22d7   Al Viro   take purely descr...
8
  #include <linux/syscalls.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
9
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
  #include <linux/fs.h>
  #include <linux/mm.h>
6d4831c28   Andrew Morton   vfs: avoid large ...
12
  #include <linux/mmzone.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/time.h>
d43c36dc6   Alexey Dobriyan   headers: remove s...
14
  #include <linux/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
18
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/bitops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
20
21
22
23
24
25
26
27
  #include <linux/interrupt.h>
  #include <linux/spinlock.h>
  #include <linux/rcupdate.h>
  #include <linux/workqueue.h>
  
  struct fdtable_defer {
  	spinlock_t lock;
  	struct work_struct wq;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
28
29
  	struct fdtable *next;
  };
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
30
  int sysctl_nr_open __read_mostly = 1024*1024;
eceea0b3d   Al Viro   [PATCH] avoid mul...
31
32
  int sysctl_nr_open_min = BITS_PER_LONG;
  int sysctl_nr_open_max = 1024 * 1024; /* raised later */
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
33

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
34
35
36
37
38
39
40
  /*
   * We use this list to defer free fdtables that have vmalloced
   * sets/arrays. By keeping a per-cpu list, we avoid having to embed
   * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
   * this per-task structure.
   */
  static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41

1fd36adcd   David Howells   Replace the fd_se...
42
  static void *alloc_fdmem(size_t size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
  {
6d4831c28   Andrew Morton   vfs: avoid large ...
44
45
46
47
48
49
50
51
52
  	/*
  	 * Very large allocations can stress page reclaim, so fall back to
  	 * vmalloc() if the allocation size will be considered "large" by the VM.
  	 */
  	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  		void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
  		if (data != NULL)
  			return data;
  	}
a892e2d7d   Changli Gao   vfs: use kmalloc(...
53
  	return vmalloc(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
  }
a892e2d7d   Changli Gao   vfs: use kmalloc(...
55
  static void free_fdmem(void *ptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56
  {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
57
  	is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
58
  }
a892e2d7d   Changli Gao   vfs: use kmalloc(...
59
  static void __free_fdtable(struct fdtable *fdt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
  {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
61
62
63
  	free_fdmem(fdt->fd);
  	free_fdmem(fdt->open_fds);
  	kfree(fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
64
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65

65f27f384   David Howells   WorkStruct: Pass ...
66
  static void free_fdtable_work(struct work_struct *work)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
67
  {
65f27f384   David Howells   WorkStruct: Pass ...
68
69
  	struct fdtable_defer *f =
  		container_of(work, struct fdtable_defer, wq);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
70
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
72
73
74
75
76
77
  	spin_lock_bh(&f->lock);
  	fdt = f->next;
  	f->next = NULL;
  	spin_unlock_bh(&f->lock);
  	while(fdt) {
  		struct fdtable *next = fdt->next;
a892e2d7d   Changli Gao   vfs: use kmalloc(...
78
79
  
  		__free_fdtable(fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
80
81
82
  		fdt = next;
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83

7cf4dc3c8   Al Viro   move files_struct...
84
  static void free_fdtable_rcu(struct rcu_head *rcu)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
85
86
  {
  	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
87
  	struct fdtable_defer *fddef;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
89
  	BUG_ON(!fdt);
1983e781d   Al Viro   trim free_fdtable...
90
  	BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
91

a892e2d7d   Changli Gao   vfs: use kmalloc(...
92
  	if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
93
  		kfree(fdt->fd);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
94
  		kfree(fdt->open_fds);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
95
  		kfree(fdt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
  	} else {
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
97
98
99
100
  		fddef = &get_cpu_var(fdtable_defer_list);
  		spin_lock(&fddef->lock);
  		fdt->next = fddef->next;
  		fddef->next = fdt;
593be07ae   Tejun Heo   [PATCH] file: kil...
101
102
  		/* vmallocs are handled from the workqueue context */
  		schedule_work(&fddef->wq);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
103
104
  		spin_unlock(&fddef->lock);
  		put_cpu_var(fdtable_defer_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
  	}
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
106
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
107
108
109
110
  /*
   * Expand the fdset in the files_struct.  Called with the files spinlock
   * held for write.
   */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
111
  static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
112
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
113
  	unsigned int cpy, set;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
114

5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
115
  	BUG_ON(nfdt->max_fds < ofdt->max_fds);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
116
117
118
119
120
121
122
123
124
125
126
127
  
  	cpy = ofdt->max_fds * sizeof(struct file *);
  	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
  	memcpy(nfdt->fd, ofdt->fd, cpy);
  	memset((char *)(nfdt->fd) + cpy, 0, set);
  
  	cpy = ofdt->max_fds / BITS_PER_BYTE;
  	set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
  	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
  	memset((char *)(nfdt->open_fds) + cpy, 0, set);
  	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
  	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
  }
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
129
  static struct fdtable * alloc_fdtable(unsigned int nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
131
  	struct fdtable *fdt;
1fd36adcd   David Howells   Replace the fd_se...
132
  	void *data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
133

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
134
  	/*
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
135
136
137
138
139
  	 * Figure out how many fds we actually want to support in this fdtable.
  	 * Allocation steps are keyed to the size of the fdarray, since it
  	 * grows far faster than any of the other dynamic data. We try to fit
  	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
  	 * and growing in powers of two from there on.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
140
  	 */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
141
142
143
  	nr /= (1024 / sizeof(struct file *));
  	nr = roundup_pow_of_two(nr + 1);
  	nr *= (1024 / sizeof(struct file *));
5c598b342   Al Viro   [PATCH] fix sysct...
144
145
146
147
148
149
150
151
152
153
  	/*
  	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
  	 * had been set lower between the check in expand_files() and here.  Deal
  	 * with that in caller, it's cheaper that way.
  	 *
  	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
  	 * bitmaps handling below becomes unpleasant, to put it mildly...
  	 */
  	if (unlikely(nr > sysctl_nr_open))
  		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
154

5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
155
156
  	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
  	if (!fdt)
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
157
  		goto out;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
158
159
160
161
  	fdt->max_fds = nr;
  	data = alloc_fdmem(nr * sizeof(struct file *));
  	if (!data)
  		goto out_fdt;
1fd36adcd   David Howells   Replace the fd_se...
162
163
164
  	fdt->fd = data;
  
  	data = alloc_fdmem(max_t(size_t,
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
165
166
167
  				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
  	if (!data)
  		goto out_arr;
1fd36adcd   David Howells   Replace the fd_se...
168
  	fdt->open_fds = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
169
  	data += nr / BITS_PER_BYTE;
1fd36adcd   David Howells   Replace the fd_se...
170
  	fdt->close_on_exec = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
171
  	fdt->next = NULL;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
172
  	return fdt;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
173
174
  
  out_arr:
a892e2d7d   Changli Gao   vfs: use kmalloc(...
175
  	free_fdmem(fdt->fd);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
176
  out_fdt:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
177
  	kfree(fdt);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
178
  out:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
179
180
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
182
  /*
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
183
184
185
186
187
   * Expand the file descriptor table.
   * This function will allocate a new fdtable and both fd array and fdset, of
   * the given size.
   * Return <0 error code on error; 1 on successful completion.
   * The files->file_lock should be held on entry, and will be held on exit.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
188
189
190
191
192
   */
  static int expand_fdtable(struct files_struct *files, int nr)
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
  {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
193
  	struct fdtable *new_fdt, *cur_fdt;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
194
195
  
  	spin_unlock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
196
  	new_fdt = alloc_fdtable(nr);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
197
  	spin_lock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
198
199
  	if (!new_fdt)
  		return -ENOMEM;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
200
  	/*
5c598b342   Al Viro   [PATCH] fix sysct...
201
202
203
204
  	 * extremely unlikely race - sysctl_nr_open decreased between the check in
  	 * caller and alloc_fdtable().  Cheaper to catch it here...
  	 */
  	if (unlikely(new_fdt->max_fds <= nr)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
205
  		__free_fdtable(new_fdt);
5c598b342   Al Viro   [PATCH] fix sysct...
206
207
208
  		return -EMFILE;
  	}
  	/*
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
209
210
  	 * Check again since another task may have expanded the fd table while
  	 * we dropped the lock
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
211
  	 */
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
212
  	cur_fdt = files_fdtable(files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
213
  	if (nr >= cur_fdt->max_fds) {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
214
215
216
  		/* Continue as planned */
  		copy_fdtable(new_fdt, cur_fdt);
  		rcu_assign_pointer(files->fdt, new_fdt);
4fd45812c   Vadim Lobanov   [PATCH] fdtable: ...
217
  		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
1983e781d   Al Viro   trim free_fdtable...
218
  			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
219
  	} else {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
220
  		/* Somebody else expanded, so undo our attempt */
a892e2d7d   Changli Gao   vfs: use kmalloc(...
221
  		__free_fdtable(new_fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
222
  	}
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
223
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
225
226
227
  }
  
  /*
   * Expand files.
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
228
229
230
231
232
   * This function will expand the file structures, if the requested size exceeds
   * the current capacity and there is room for expansion.
   * Return <0 error code on error; 0 when nothing done; 1 when files were
   * expanded and execution may have blocked.
   * The files->file_lock should be held on entry, and will be held on exit.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
   */
ad47bd725   Al Viro   make expand_files...
234
  static int expand_files(struct files_struct *files, int nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
  {
badf16621   Dipankar Sarma   [PATCH] files: br...
236
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237

badf16621   Dipankar Sarma   [PATCH] files: br...
238
  	fdt = files_fdtable(files);
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
239

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
240
  	/* Do we need to expand? */
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
241
  	if (nr < fdt->max_fds)
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
242
  		return 0;
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
243

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
244
  	/* Can we expand? */
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
245
  	if (nr >= sysctl_nr_open)
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
246
247
248
249
  		return -EMFILE;
  
  	/* All good, so we try */
  	return expand_fdtable(files, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
251

b8318b01a   Al Viro   take __{set,clear...
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
  static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
  {
  	__set_bit(fd, fdt->close_on_exec);
  }
  
  static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
  {
  	__clear_bit(fd, fdt->close_on_exec);
  }
  
  static inline void __set_open_fd(int fd, struct fdtable *fdt)
  {
  	__set_bit(fd, fdt->open_fds);
  }
  
  static inline void __clear_open_fd(int fd, struct fdtable *fdt)
  {
  	__clear_bit(fd, fdt->open_fds);
  }
02afc6267   Al Viro   [PATCH] dup_fd() ...
271
272
273
274
275
276
  static int count_open_files(struct fdtable *fdt)
  {
  	int size = fdt->max_fds;
  	int i;
  
  	/* Find the last open fd */
1fd36adcd   David Howells   Replace the fd_se...
277
278
  	for (i = size / BITS_PER_LONG; i > 0; ) {
  		if (fdt->open_fds[--i])
02afc6267   Al Viro   [PATCH] dup_fd() ...
279
280
  			break;
  	}
1fd36adcd   David Howells   Replace the fd_se...
281
  	i = (i + 1) * BITS_PER_LONG;
02afc6267   Al Viro   [PATCH] dup_fd() ...
282
283
  	return i;
  }
02afc6267   Al Viro   [PATCH] dup_fd() ...
284
285
286
287
288
289
290
291
292
293
294
295
296
  /*
   * Allocate a new files structure and copy contents from the
   * passed in files structure.
   * errorp will be valid only when the returned files_struct is NULL.
   */
  struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
  {
  	struct files_struct *newf;
  	struct file **old_fds, **new_fds;
  	int open_files, size, i;
  	struct fdtable *old_fdt, *new_fdt;
  
  	*errorp = -ENOMEM;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
297
  	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
02afc6267   Al Viro   [PATCH] dup_fd() ...
298
299
  	if (!newf)
  		goto out;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
300
301
302
303
304
305
  	atomic_set(&newf->count, 1);
  
  	spin_lock_init(&newf->file_lock);
  	newf->next_fd = 0;
  	new_fdt = &newf->fdtab;
  	new_fdt->max_fds = NR_OPEN_DEFAULT;
1fd36adcd   David Howells   Replace the fd_se...
306
307
  	new_fdt->close_on_exec = newf->close_on_exec_init;
  	new_fdt->open_fds = newf->open_fds_init;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
308
  	new_fdt->fd = &newf->fd_array[0];
afbec7fff   Al Viro   [PATCH] dup_fd() ...
309
  	new_fdt->next = NULL;
02afc6267   Al Viro   [PATCH] dup_fd() ...
310
311
  	spin_lock(&oldf->file_lock);
  	old_fdt = files_fdtable(oldf);
02afc6267   Al Viro   [PATCH] dup_fd() ...
312
313
314
315
  	open_files = count_open_files(old_fdt);
  
  	/*
  	 * Check whether we need to allocate a larger fd array and fd set.
02afc6267   Al Viro   [PATCH] dup_fd() ...
316
  	 */
adbecb128   Al Viro   [PATCH] dup_fd() ...
317
  	while (unlikely(open_files > new_fdt->max_fds)) {
02afc6267   Al Viro   [PATCH] dup_fd() ...
318
  		spin_unlock(&oldf->file_lock);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
319

a892e2d7d   Changli Gao   vfs: use kmalloc(...
320
321
  		if (new_fdt != &newf->fdtab)
  			__free_fdtable(new_fdt);
adbecb128   Al Viro   [PATCH] dup_fd() ...
322

9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
323
324
325
326
327
328
329
330
  		new_fdt = alloc_fdtable(open_files - 1);
  		if (!new_fdt) {
  			*errorp = -ENOMEM;
  			goto out_release;
  		}
  
  		/* beyond sysctl_nr_open; nothing to do */
  		if (unlikely(new_fdt->max_fds < open_files)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
331
  			__free_fdtable(new_fdt);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
332
  			*errorp = -EMFILE;
02afc6267   Al Viro   [PATCH] dup_fd() ...
333
  			goto out_release;
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
334
  		}
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
335

02afc6267   Al Viro   [PATCH] dup_fd() ...
336
337
338
339
340
341
342
  		/*
  		 * Reacquire the oldf lock and a pointer to its fd table
  		 * who knows it may have a new bigger fd table. We need
  		 * the latest pointer.
  		 */
  		spin_lock(&oldf->file_lock);
  		old_fdt = files_fdtable(oldf);
adbecb128   Al Viro   [PATCH] dup_fd() ...
343
  		open_files = count_open_files(old_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
344
345
346
347
  	}
  
  	old_fds = old_fdt->fd;
  	new_fds = new_fdt->fd;
1fd36adcd   David Howells   Replace the fd_se...
348
349
  	memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
  	memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
02afc6267   Al Viro   [PATCH] dup_fd() ...
350
351
352
353
354
355
356
357
358
359
360
361
  
  	for (i = open_files; i != 0; i--) {
  		struct file *f = *old_fds++;
  		if (f) {
  			get_file(f);
  		} else {
  			/*
  			 * The fd may be claimed in the fd bitmap but not yet
  			 * instantiated in the files array if a sibling thread
  			 * is partway through open().  So make sure that this
  			 * fd is available to the new process.
  			 */
1dce27c5a   David Howells   Wrap accesses to ...
362
  			__clear_open_fd(open_files - i, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
363
364
365
366
367
368
369
370
371
372
373
374
  		}
  		rcu_assign_pointer(*new_fds++, f);
  	}
  	spin_unlock(&oldf->file_lock);
  
  	/* compute the remainder to be cleared */
  	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
  
  	/* This is long word aligned thus could use a optimized version */
  	memset(new_fds, 0, size);
  
  	if (new_fdt->max_fds > open_files) {
1fd36adcd   David Howells   Replace the fd_se...
375
376
  		int left = (new_fdt->max_fds - open_files) / 8;
  		int start = open_files / BITS_PER_LONG;
02afc6267   Al Viro   [PATCH] dup_fd() ...
377

1fd36adcd   David Howells   Replace the fd_se...
378
379
  		memset(&new_fdt->open_fds[start], 0, left);
  		memset(&new_fdt->close_on_exec[start], 0, left);
02afc6267   Al Viro   [PATCH] dup_fd() ...
380
  	}
afbec7fff   Al Viro   [PATCH] dup_fd() ...
381
  	rcu_assign_pointer(newf->fdt, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
382
383
384
385
386
387
388
  	return newf;
  
  out_release:
  	kmem_cache_free(files_cachep, newf);
  out:
  	return NULL;
  }
7cf4dc3c8   Al Viro   move files_struct...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
  static void close_files(struct files_struct * files)
  {
  	int i, j;
  	struct fdtable *fdt;
  
  	j = 0;
  
  	/*
  	 * It is safe to dereference the fd table without RCU or
  	 * ->file_lock because this is the last reference to the
  	 * files structure.  But use RCU to shut RCU-lockdep up.
  	 */
  	rcu_read_lock();
  	fdt = files_fdtable(files);
  	rcu_read_unlock();
  	for (;;) {
  		unsigned long set;
  		i = j * BITS_PER_LONG;
  		if (i >= fdt->max_fds)
  			break;
  		set = fdt->open_fds[j++];
  		while (set) {
  			if (set & 1) {
  				struct file * file = xchg(&fdt->fd[i], NULL);
  				if (file) {
  					filp_close(file, files);
  					cond_resched();
  				}
  			}
  			i++;
  			set >>= 1;
  		}
  	}
  }
  
  struct files_struct *get_files_struct(struct task_struct *task)
  {
  	struct files_struct *files;
  
  	task_lock(task);
  	files = task->files;
  	if (files)
  		atomic_inc(&files->count);
  	task_unlock(task);
  
  	return files;
  }
  
  void put_files_struct(struct files_struct *files)
  {
  	struct fdtable *fdt;
  
  	if (atomic_dec_and_test(&files->count)) {
  		close_files(files);
b9e02af0a   Al Viro   don't bother with...
443
  		/* not really needed, since nobody can see us */
7cf4dc3c8   Al Viro   move files_struct...
444
445
  		rcu_read_lock();
  		fdt = files_fdtable(files);
7cf4dc3c8   Al Viro   move files_struct...
446
  		rcu_read_unlock();
b9e02af0a   Al Viro   don't bother with...
447
448
449
450
  		/* free the arrays if they are not embedded */
  		if (fdt != &files->fdtab)
  			__free_fdtable(fdt);
  		kmem_cache_free(files_cachep, files);
7cf4dc3c8   Al Viro   move files_struct...
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
  	}
  }
  
  void reset_files_struct(struct files_struct *files)
  {
  	struct task_struct *tsk = current;
  	struct files_struct *old;
  
  	old = tsk->files;
  	task_lock(tsk);
  	tsk->files = files;
  	task_unlock(tsk);
  	put_files_struct(old);
  }
  
  void exit_files(struct task_struct *tsk)
  {
  	struct files_struct * files = tsk->files;
  
  	if (files) {
  		task_lock(tsk);
  		tsk->files = NULL;
  		task_unlock(tsk);
  		put_files_struct(files);
  	}
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
477
478
479
480
  static void __devinit fdtable_defer_list_init(int cpu)
  {
  	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
  	spin_lock_init(&fddef->lock);
65f27f384   David Howells   WorkStruct: Pass ...
481
  	INIT_WORK(&fddef->wq, free_fdtable_work);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
482
483
484
485
486
487
  	fddef->next = NULL;
  }
  
  void __init files_defer_init(void)
  {
  	int i;
0a9450227   KAMEZAWA Hiroyuki   [PATCH] for_each_...
488
  	for_each_possible_cpu(i)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
489
  		fdtable_defer_list_init(i);
eceea0b3d   Al Viro   [PATCH] avoid mul...
490
491
  	sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
  			     -BITS_PER_LONG;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
492
  }
f52111b15   Al Viro   [PATCH] take init...
493
494
495
496
497
498
499
  
  struct files_struct init_files = {
  	.count		= ATOMIC_INIT(1),
  	.fdt		= &init_files.fdtab,
  	.fdtab		= {
  		.max_fds	= NR_OPEN_DEFAULT,
  		.fd		= &init_files.fd_array[0],
1fd36adcd   David Howells   Replace the fd_se...
500
501
  		.close_on_exec	= init_files.close_on_exec_init,
  		.open_fds	= init_files.open_fds_init,
f52111b15   Al Viro   [PATCH] take init...
502
503
504
  	},
  	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
  };
1027abe88   Al Viro   [PATCH] merge loc...
505
506
507
508
  
  /*
   * allocate a file descriptor, mark it busy.
   */
dcfadfa4e   Al Viro   new helper: __all...
509
510
  int __alloc_fd(struct files_struct *files,
  	       unsigned start, unsigned end, unsigned flags)
1027abe88   Al Viro   [PATCH] merge loc...
511
  {
1027abe88   Al Viro   [PATCH] merge loc...
512
513
514
515
516
517
518
519
520
521
522
523
  	unsigned int fd;
  	int error;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  repeat:
  	fdt = files_fdtable(files);
  	fd = start;
  	if (fd < files->next_fd)
  		fd = files->next_fd;
  
  	if (fd < fdt->max_fds)
1fd36adcd   David Howells   Replace the fd_se...
524
  		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
1027abe88   Al Viro   [PATCH] merge loc...
525

f33ff9927   Al Viro   take rlimit check...
526
527
528
529
530
531
532
  	/*
  	 * N.B. For clone tasks sharing a files structure, this test
  	 * will limit the total number of files that can be opened.
  	 */
  	error = -EMFILE;
  	if (fd >= end)
  		goto out;
1027abe88   Al Viro   [PATCH] merge loc...
533
534
535
536
537
538
539
540
541
542
543
544
545
  	error = expand_files(files, fd);
  	if (error < 0)
  		goto out;
  
  	/*
  	 * If we needed to expand the fs array we
  	 * might have blocked - try again.
  	 */
  	if (error)
  		goto repeat;
  
  	if (start <= files->next_fd)
  		files->next_fd = fd + 1;
1dce27c5a   David Howells   Wrap accesses to ...
546
  	__set_open_fd(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
547
  	if (flags & O_CLOEXEC)
1dce27c5a   David Howells   Wrap accesses to ...
548
  		__set_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
549
  	else
1dce27c5a   David Howells   Wrap accesses to ...
550
  		__clear_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
551
552
553
  	error = fd;
  #if 1
  	/* Sanity check */
7dc521579   Paul E. McKenney   vfs: Apply lockde...
554
  	if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
1027abe88   Al Viro   [PATCH] merge loc...
555
556
557
558
559
560
561
562
563
564
  		printk(KERN_WARNING "alloc_fd: slot %d not NULL!
  ", fd);
  		rcu_assign_pointer(fdt->fd[fd], NULL);
  	}
  #endif
  
  out:
  	spin_unlock(&files->file_lock);
  	return error;
  }
ad47bd725   Al Viro   make expand_files...
565
  static int alloc_fd(unsigned start, unsigned flags)
dcfadfa4e   Al Viro   new helper: __all...
566
567
568
  {
  	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
  }
1a7bd2265   Al Viro   make get_unused_f...
569
  int get_unused_fd_flags(unsigned flags)
1027abe88   Al Viro   [PATCH] merge loc...
570
  {
dcfadfa4e   Al Viro   new helper: __all...
571
  	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
1027abe88   Al Viro   [PATCH] merge loc...
572
  }
1a7bd2265   Al Viro   make get_unused_f...
573
  EXPORT_SYMBOL(get_unused_fd_flags);
56007cae9   Al Viro   move put_unused_f...
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
  
  static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  {
  	struct fdtable *fdt = files_fdtable(files);
  	__clear_open_fd(fd, fdt);
  	if (fd < files->next_fd)
  		files->next_fd = fd;
  }
  
  void put_unused_fd(unsigned int fd)
  {
  	struct files_struct *files = current->files;
  	spin_lock(&files->file_lock);
  	__put_unused_fd(files, fd);
  	spin_unlock(&files->file_lock);
  }
  
  EXPORT_SYMBOL(put_unused_fd);
  
  /*
   * Install a file pointer in the fd array.
   *
   * The VFS is full of places where we drop the files lock between
   * setting the open_fds bitmap and installing the file in the file
   * array.  At any such point, we are vulnerable to a dup2() race
   * installing a file in the array before us.  We need to detect this and
   * fput() the struct file we are about to overwrite in this case.
   *
   * It should never happen - if we allow dup2() do it, _really_ bad things
   * will follow.
f869e8a7f   Al Viro   expose a low-leve...
604
605
606
607
608
609
610
   *
   * NOTE: __fd_install() variant is really, really low-level; don't
   * use it unless you are forced to by truly lousy API shoved down
   * your throat.  'files' *MUST* be either current->files or obtained
   * by get_files_struct(current) done by whoever had given it to you,
   * or really bad things will happen.  Normally you want to use
   * fd_install() instead.
56007cae9   Al Viro   move put_unused_f...
611
   */
f869e8a7f   Al Viro   expose a low-leve...
612
613
  void __fd_install(struct files_struct *files, unsigned int fd,
  		struct file *file)
56007cae9   Al Viro   move put_unused_f...
614
  {
56007cae9   Al Viro   move put_unused_f...
615
616
617
618
619
620
621
  	struct fdtable *fdt;
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	BUG_ON(fdt->fd[fd] != NULL);
  	rcu_assign_pointer(fdt->fd[fd], file);
  	spin_unlock(&files->file_lock);
  }
f869e8a7f   Al Viro   expose a low-leve...
622
623
624
625
  void fd_install(unsigned int fd, struct file *file)
  {
  	__fd_install(current->files, fd, file);
  }
56007cae9   Al Viro   move put_unused_f...
626
  EXPORT_SYMBOL(fd_install);
0ee8cdfe6   Al Viro   take fget() and f...
627

483ce1d4b   Al Viro   take descriptor-r...
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
  /*
   * The same warnings as for __alloc_fd()/__fd_install() apply here...
   */
  int __close_fd(struct files_struct *files, unsigned fd)
  {
  	struct file *file;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (fd >= fdt->max_fds)
  		goto out_unlock;
  	file = fdt->fd[fd];
  	if (!file)
  		goto out_unlock;
  	rcu_assign_pointer(fdt->fd[fd], NULL);
  	__clear_close_on_exec(fd, fdt);
  	__put_unused_fd(files, fd);
  	spin_unlock(&files->file_lock);
  	return filp_close(file, files);
  
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return -EBADF;
  }
6a6d27de3   Al Viro   take close-on-exe...
653
654
655
656
657
658
  void do_close_on_exec(struct files_struct *files)
  {
  	unsigned i;
  	struct fdtable *fdt;
  
  	/* exec unshares first */
6a6d27de3   Al Viro   take close-on-exe...
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
  	spin_lock(&files->file_lock);
  	for (i = 0; ; i++) {
  		unsigned long set;
  		unsigned fd = i * BITS_PER_LONG;
  		fdt = files_fdtable(files);
  		if (fd >= fdt->max_fds)
  			break;
  		set = fdt->close_on_exec[i];
  		if (!set)
  			continue;
  		fdt->close_on_exec[i] = 0;
  		for ( ; set ; fd++, set >>= 1) {
  			struct file *file;
  			if (!(set & 1))
  				continue;
  			file = fdt->fd[fd];
  			if (!file)
  				continue;
  			rcu_assign_pointer(fdt->fd[fd], NULL);
  			__put_unused_fd(files, fd);
  			spin_unlock(&files->file_lock);
  			filp_close(file, files);
  			cond_resched();
  			spin_lock(&files->file_lock);
  		}
  
  	}
  	spin_unlock(&files->file_lock);
  }
0ee8cdfe6   Al Viro   take fget() and f...
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
  struct file *fget(unsigned int fd)
  {
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	rcu_read_lock();
  	file = fcheck_files(files, fd);
  	if (file) {
  		/* File object ref couldn't be taken */
  		if (file->f_mode & FMODE_PATH ||
  		    !atomic_long_inc_not_zero(&file->f_count))
  			file = NULL;
  	}
  	rcu_read_unlock();
  
  	return file;
  }
  
  EXPORT_SYMBOL(fget);
  
  struct file *fget_raw(unsigned int fd)
  {
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	rcu_read_lock();
  	file = fcheck_files(files, fd);
  	if (file) {
  		/* File object ref couldn't be taken */
  		if (!atomic_long_inc_not_zero(&file->f_count))
  			file = NULL;
  	}
  	rcu_read_unlock();
  
  	return file;
  }
  
  EXPORT_SYMBOL(fget_raw);
  
  /*
   * Lightweight file lookup - no refcnt increment if fd table isn't shared.
   *
   * You can use this instead of fget if you satisfy all of the following
   * conditions:
   * 1) You must call fput_light before exiting the syscall and returning control
   *    to userspace (i.e. you cannot remember the returned struct file * after
   *    returning to userspace).
   * 2) You must not call filp_close on the returned struct file * in between
   *    calls to fget_light and fput_light.
   * 3) You must not clone the current task in between the calls to fget_light
   *    and fput_light.
   *
   * The fput_needed flag returned by fget_light should be passed to the
   * corresponding fput_light.
   */
  struct file *fget_light(unsigned int fd, int *fput_needed)
  {
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	*fput_needed = 0;
  	if (atomic_read(&files->count) == 1) {
  		file = fcheck_files(files, fd);
  		if (file && (file->f_mode & FMODE_PATH))
  			file = NULL;
  	} else {
  		rcu_read_lock();
  		file = fcheck_files(files, fd);
  		if (file) {
  			if (!(file->f_mode & FMODE_PATH) &&
  			    atomic_long_inc_not_zero(&file->f_count))
  				*fput_needed = 1;
  			else
  				/* Didn't get the reference, someone's freed */
  				file = NULL;
  		}
  		rcu_read_unlock();
  	}
  
  	return file;
  }
4557c669e   Al Viro   export fget_light
769
  EXPORT_SYMBOL(fget_light);
0ee8cdfe6   Al Viro   take fget() and f...
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
  
  struct file *fget_raw_light(unsigned int fd, int *fput_needed)
  {
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	*fput_needed = 0;
  	if (atomic_read(&files->count) == 1) {
  		file = fcheck_files(files, fd);
  	} else {
  		rcu_read_lock();
  		file = fcheck_files(files, fd);
  		if (file) {
  			if (atomic_long_inc_not_zero(&file->f_count))
  				*fput_needed = 1;
  			else
  				/* Didn't get the reference, someone's freed */
  				file = NULL;
  		}
  		rcu_read_unlock();
  	}
  
  	return file;
  }
fe17f22d7   Al Viro   take purely descr...
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
  
  void set_close_on_exec(unsigned int fd, int flag)
  {
  	struct files_struct *files = current->files;
  	struct fdtable *fdt;
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (flag)
  		__set_close_on_exec(fd, fdt);
  	else
  		__clear_close_on_exec(fd, fdt);
  	spin_unlock(&files->file_lock);
  }
  
  bool get_close_on_exec(unsigned int fd)
  {
  	struct files_struct *files = current->files;
  	struct fdtable *fdt;
  	bool res;
  	rcu_read_lock();
  	fdt = files_fdtable(files);
  	res = close_on_exec(fd, fdt);
  	rcu_read_unlock();
  	return res;
  }
8280d1617   Al Viro   new helper: repla...
819
820
  static int do_dup2(struct files_struct *files,
  	struct file *file, unsigned fd, unsigned flags)
fe17f22d7   Al Viro   take purely descr...
821
  {
8280d1617   Al Viro   new helper: repla...
822
  	struct file *tofree;
fe17f22d7   Al Viro   take purely descr...
823
  	struct fdtable *fdt;
fe17f22d7   Al Viro   take purely descr...
824
825
826
827
828
829
830
831
832
833
834
835
836
837
  	/*
  	 * We need to detect attempts to do dup2() over allocated but still
  	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
  	 * extra work in their equivalent of fget() - they insert struct
  	 * file immediately after grabbing descriptor, mark it larval if
  	 * more work (e.g. actual opening) is needed and make sure that
  	 * fget() treats larval files as absent.  Potentially interesting,
  	 * but while extra work in fget() is trivial, locking implications
  	 * and amount of surgery on open()-related paths in VFS are not.
  	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
  	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
  	 * scope of POSIX or SUS, since neither considers shared descriptor
  	 * tables and this condition does not arise without those.
  	 */
fe17f22d7   Al Viro   take purely descr...
838
  	fdt = files_fdtable(files);
8280d1617   Al Viro   new helper: repla...
839
840
841
  	tofree = fdt->fd[fd];
  	if (!tofree && fd_is_open(fd, fdt))
  		goto Ebusy;
fe17f22d7   Al Viro   take purely descr...
842
  	get_file(file);
8280d1617   Al Viro   new helper: repla...
843
844
  	rcu_assign_pointer(fdt->fd[fd], file);
  	__set_open_fd(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
845
  	if (flags & O_CLOEXEC)
8280d1617   Al Viro   new helper: repla...
846
  		__set_close_on_exec(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
847
  	else
8280d1617   Al Viro   new helper: repla...
848
  		__clear_close_on_exec(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
849
850
851
852
  	spin_unlock(&files->file_lock);
  
  	if (tofree)
  		filp_close(tofree, files);
8280d1617   Al Viro   new helper: repla...
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  	return fd;
  
  Ebusy:
  	spin_unlock(&files->file_lock);
  	return -EBUSY;
  }
  
  int replace_fd(unsigned fd, struct file *file, unsigned flags)
  {
  	int err;
  	struct files_struct *files = current->files;
  
  	if (!file)
  		return __close_fd(files, fd);
  
  	if (fd >= rlimit(RLIMIT_NOFILE))
08f05c497   Al Viro   Return the right ...
869
  		return -EBADF;
8280d1617   Al Viro   new helper: repla...
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
  
  	spin_lock(&files->file_lock);
  	err = expand_files(files, fd);
  	if (unlikely(err < 0))
  		goto out_unlock;
  	return do_dup2(files, file, fd, flags);
  
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return err;
  }
  
  SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
  {
  	int err = -EBADF;
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	if ((flags & ~O_CLOEXEC) != 0)
  		return -EINVAL;
aed976475   Richard W.M. Jones   dup3: Return an e...
890
891
  	if (unlikely(oldfd == newfd))
  		return -EINVAL;
8280d1617   Al Viro   new helper: repla...
892
  	if (newfd >= rlimit(RLIMIT_NOFILE))
08f05c497   Al Viro   Return the right ...
893
  		return -EBADF;
8280d1617   Al Viro   new helper: repla...
894
895
896
897
898
899
900
901
902
903
904
905
  
  	spin_lock(&files->file_lock);
  	err = expand_files(files, newfd);
  	file = fcheck(oldfd);
  	if (unlikely(!file))
  		goto Ebadf;
  	if (unlikely(err < 0)) {
  		if (err == -EMFILE)
  			goto Ebadf;
  		goto out_unlock;
  	}
  	return do_dup2(files, file, newfd, flags);
fe17f22d7   Al Viro   take purely descr...
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
  
  Ebadf:
  	err = -EBADF;
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return err;
  }
  
  SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
  {
  	if (unlikely(newfd == oldfd)) { /* corner case */
  		struct files_struct *files = current->files;
  		int retval = oldfd;
  
  		rcu_read_lock();
  		if (!fcheck_files(files, oldfd))
  			retval = -EBADF;
  		rcu_read_unlock();
  		return retval;
  	}
  	return sys_dup3(oldfd, newfd, 0);
  }
  
  SYSCALL_DEFINE1(dup, unsigned int, fildes)
  {
  	int ret = -EBADF;
  	struct file *file = fget_raw(fildes);
  
  	if (file) {
  		ret = get_unused_fd();
  		if (ret >= 0)
  			fd_install(ret, file);
  		else
  			fput(file);
  	}
  	return ret;
  }
  
  int f_dupfd(unsigned int from, struct file *file, unsigned flags)
  {
  	int err;
  	if (from >= rlimit(RLIMIT_NOFILE))
  		return -EINVAL;
  	err = alloc_fd(from, flags);
  	if (err >= 0) {
  		get_file(file);
  		fd_install(err, file);
  	}
  	return err;
  }
c3c073f80   Al Viro   new helper: itera...
956
957
958
959
960
961
  
  int iterate_fd(struct files_struct *files, unsigned n,
  		int (*f)(const void *, struct file *, unsigned),
  		const void *p)
  {
  	struct fdtable *fdt;
c3c073f80   Al Viro   new helper: itera...
962
963
964
965
  	int res = 0;
  	if (!files)
  		return 0;
  	spin_lock(&files->file_lock);
a77cfcb42   Al Viro   fix off-by-one in...
966
967
968
969
970
971
972
973
  	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
  		struct file *file;
  		file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
  		if (!file)
  			continue;
  		res = f(p, file, n);
  		if (res)
  			break;
c3c073f80   Al Viro   new helper: itera...
974
975
976
977
978
  	}
  	spin_unlock(&files->file_lock);
  	return res;
  }
  EXPORT_SYMBOL(iterate_fd);