Blame view

fs/file.c 23.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
  /*
   *  linux/fs/file.c
   *
   *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
   *
   *  Manage the dynamic fd arrays in the process files_struct.
   */
fe17f22d7   Al Viro   take purely descr...
9
  #include <linux/syscalls.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
10
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
  #include <linux/fs.h>
  #include <linux/mm.h>
3f07c0144   Ingo Molnar   sched/headers: Pr...
13
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
16
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
  #include <linux/bitops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
18
19
  #include <linux/spinlock.h>
  #include <linux/rcupdate.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
20

9b80a184e   Alexey Dobriyan   fs/file: more uns...
21
22
  unsigned int sysctl_nr_open __read_mostly = 1024*1024;
  unsigned int sysctl_nr_open_min = BITS_PER_LONG;
752343be6   Rasmus Villemoes   fs/file.c: __cons...
23
24
  /* our min() is unusable in constant expressions ;-/ */
  #define __const_min(x, y) ((x) < (y) ? (x) : (y))
9b80a184e   Alexey Dobriyan   fs/file: more uns...
25
26
  unsigned int sysctl_nr_open_max =
  	__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
27

a892e2d7d   Changli Gao   vfs: use kmalloc(...
28
  static void __free_fdtable(struct fdtable *fdt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  {
f6c0a1920   Al Viro   fs/file.c: don't ...
30
31
  	kvfree(fdt->fd);
  	kvfree(fdt->open_fds);
a892e2d7d   Changli Gao   vfs: use kmalloc(...
32
  	kfree(fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
33
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34

7cf4dc3c8   Al Viro   move files_struct...
35
  static void free_fdtable_rcu(struct rcu_head *rcu)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
36
  {
ac3e3c5b1   Al Viro   don't bother with...
37
  	__free_fdtable(container_of(rcu, struct fdtable, rcu));
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
38
  }
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
39
40
  #define BITBIT_NR(nr)	BITS_TO_LONGS(BITS_TO_LONGS(nr))
  #define BITBIT_SIZE(nr)	(BITBIT_NR(nr) * sizeof(long))
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
41
  /*
ea5c58e70   Eric Biggers   vfs: clear remain...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
   * Copy 'count' fd bits from the old table to the new table and clear the extra
   * space if any.  This does not copy the file pointers.  Called with the files
   * spinlock held for write.
   */
  static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
  			    unsigned int count)
  {
  	unsigned int cpy, set;
  
  	cpy = count / BITS_PER_BYTE;
  	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
  	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
  	memset((char *)nfdt->open_fds + cpy, 0, set);
  	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
  	memset((char *)nfdt->close_on_exec + cpy, 0, set);
  
  	cpy = BITBIT_SIZE(count);
  	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
  	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
  	memset((char *)nfdt->full_fds_bits + cpy, 0, set);
  }
  
  /*
   * Copy all file descriptors from the old table to the new, expanded table and
   * clear the extra space.  Called with the files spinlock held for write.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
67
   */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
68
  static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
69
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
70
  	unsigned int cpy, set;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
71

5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
72
  	BUG_ON(nfdt->max_fds < ofdt->max_fds);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
73
74
75
76
  
  	cpy = ofdt->max_fds * sizeof(struct file *);
  	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
  	memcpy(nfdt->fd, ofdt->fd, cpy);
ea5c58e70   Eric Biggers   vfs: clear remain...
77
  	memset((char *)nfdt->fd + cpy, 0, set);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
78

ea5c58e70   Eric Biggers   vfs: clear remain...
79
  	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  }
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
81
  static struct fdtable * alloc_fdtable(unsigned int nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
83
  	struct fdtable *fdt;
1fd36adcd   David Howells   Replace the fd_se...
84
  	void *data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
86
  	/*
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
87
88
89
90
91
  	 * Figure out how many fds we actually want to support in this fdtable.
  	 * Allocation steps are keyed to the size of the fdarray, since it
  	 * grows far faster than any of the other dynamic data. We try to fit
  	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
  	 * and growing in powers of two from there on.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
92
  	 */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
93
94
95
  	nr /= (1024 / sizeof(struct file *));
  	nr = roundup_pow_of_two(nr + 1);
  	nr *= (1024 / sizeof(struct file *));
5c598b342   Al Viro   [PATCH] fix sysct...
96
97
98
99
100
101
102
103
104
105
  	/*
  	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
  	 * had been set lower between the check in expand_files() and here.  Deal
  	 * with that in caller, it's cheaper that way.
  	 *
  	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
  	 * bitmaps handling below becomes unpleasant, to put it mildly...
  	 */
  	if (unlikely(nr > sysctl_nr_open))
  		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
106

5d097056c   Vladimir Davydov   kmemcg: account c...
107
  	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
108
  	if (!fdt)
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
109
  		goto out;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
110
  	fdt->max_fds = nr;
c823bd924   Michal Hocko   fs/file.c: replac...
111
  	data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
112
113
  	if (!data)
  		goto out_fdt;
1fd36adcd   David Howells   Replace the fd_se...
114
  	fdt->fd = data;
c823bd924   Michal Hocko   fs/file.c: replac...
115
116
117
  	data = kvmalloc(max_t(size_t,
  				 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
  				 GFP_KERNEL_ACCOUNT);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
118
119
  	if (!data)
  		goto out_arr;
1fd36adcd   David Howells   Replace the fd_se...
120
  	fdt->open_fds = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
121
  	data += nr / BITS_PER_BYTE;
1fd36adcd   David Howells   Replace the fd_se...
122
  	fdt->close_on_exec = data;
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
123
124
  	data += nr / BITS_PER_BYTE;
  	fdt->full_fds_bits = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
125

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
126
  	return fdt;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
127
128
  
  out_arr:
f6c0a1920   Al Viro   fs/file.c: don't ...
129
  	kvfree(fdt->fd);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
130
  out_fdt:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
131
  	kfree(fdt);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
132
  out:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
133
134
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
136
  /*
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
137
138
139
140
141
   * Expand the file descriptor table.
   * This function will allocate a new fdtable and both fd array and fdset, of
   * the given size.
   * Return <0 error code on error; 1 on successful completion.
   * The files->file_lock should be held on entry, and will be held on exit.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
142
   */
9b80a184e   Alexey Dobriyan   fs/file: more uns...
143
  static int expand_fdtable(struct files_struct *files, unsigned int nr)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
144
145
146
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
  {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
147
  	struct fdtable *new_fdt, *cur_fdt;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
148
149
  
  	spin_unlock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
150
  	new_fdt = alloc_fdtable(nr);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
151
152
153
154
155
156
  
  	/* make sure all __fd_install() have seen resize_in_progress
  	 * or have finished their rcu_read_lock_sched() section.
  	 */
  	if (atomic_read(&files->count) > 1)
  		synchronize_sched();
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
157
  	spin_lock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
158
159
  	if (!new_fdt)
  		return -ENOMEM;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
160
  	/*
5c598b342   Al Viro   [PATCH] fix sysct...
161
162
163
164
  	 * extremely unlikely race - sysctl_nr_open decreased between the check in
  	 * caller and alloc_fdtable().  Cheaper to catch it here...
  	 */
  	if (unlikely(new_fdt->max_fds <= nr)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
165
  		__free_fdtable(new_fdt);
5c598b342   Al Viro   [PATCH] fix sysct...
166
167
  		return -EMFILE;
  	}
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
168
  	cur_fdt = files_fdtable(files);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
169
170
171
172
173
174
175
  	BUG_ON(nr < cur_fdt->max_fds);
  	copy_fdtable(new_fdt, cur_fdt);
  	rcu_assign_pointer(files->fdt, new_fdt);
  	if (cur_fdt != &files->fdtab)
  		call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
  	/* coupled with smp_rmb() in __fd_install() */
  	smp_wmb();
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
176
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
178
179
180
  }
  
  /*
   * Expand files.
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
181
182
183
184
185
   * This function will expand the file structures, if the requested size exceeds
   * the current capacity and there is room for expansion.
   * Return <0 error code on error; 0 when nothing done; 1 when files were
   * expanded and execution may have blocked.
   * The files->file_lock should be held on entry, and will be held on exit.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
   */
9b80a184e   Alexey Dobriyan   fs/file: more uns...
187
  static int expand_files(struct files_struct *files, unsigned int nr)
8a81252b7   Eric Dumazet   fs/file.c: don't ...
188
189
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
  {
badf16621   Dipankar Sarma   [PATCH] files: br...
191
  	struct fdtable *fdt;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
192
  	int expanded = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193

8a81252b7   Eric Dumazet   fs/file.c: don't ...
194
  repeat:
badf16621   Dipankar Sarma   [PATCH] files: br...
195
  	fdt = files_fdtable(files);
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
196

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
197
  	/* Do we need to expand? */
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
198
  	if (nr < fdt->max_fds)
8a81252b7   Eric Dumazet   fs/file.c: don't ...
199
  		return expanded;
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
200

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
201
  	/* Can we expand? */
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
202
  	if (nr >= sysctl_nr_open)
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
203
  		return -EMFILE;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
204
205
206
207
208
209
210
  	if (unlikely(files->resize_in_progress)) {
  		spin_unlock(&files->file_lock);
  		expanded = 1;
  		wait_event(files->resize_wait, !files->resize_in_progress);
  		spin_lock(&files->file_lock);
  		goto repeat;
  	}
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
211
  	/* All good, so we try */
8a81252b7   Eric Dumazet   fs/file.c: don't ...
212
213
214
215
216
217
  	files->resize_in_progress = true;
  	expanded = expand_fdtable(files, nr);
  	files->resize_in_progress = false;
  
  	wake_up_all(&files->resize_wait);
  	return expanded;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
219

9b80a184e   Alexey Dobriyan   fs/file: more uns...
220
  static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
221
222
223
  {
  	__set_bit(fd, fdt->close_on_exec);
  }
9b80a184e   Alexey Dobriyan   fs/file: more uns...
224
  static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
225
  {
fc90888d0   Linus Torvalds   vfs: conditionall...
226
227
  	if (test_bit(fd, fdt->close_on_exec))
  		__clear_bit(fd, fdt->close_on_exec);
b8318b01a   Al Viro   take __{set,clear...
228
  }
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
229
  static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
230
231
  {
  	__set_bit(fd, fdt->open_fds);
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
232
233
234
  	fd /= BITS_PER_LONG;
  	if (!~fdt->open_fds[fd])
  		__set_bit(fd, fdt->full_fds_bits);
b8318b01a   Al Viro   take __{set,clear...
235
  }
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
236
  static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
237
238
  {
  	__clear_bit(fd, fdt->open_fds);
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
239
  	__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
b8318b01a   Al Viro   take __{set,clear...
240
  }
9b80a184e   Alexey Dobriyan   fs/file: more uns...
241
  static unsigned int count_open_files(struct fdtable *fdt)
02afc6267   Al Viro   [PATCH] dup_fd() ...
242
  {
9b80a184e   Alexey Dobriyan   fs/file: more uns...
243
244
  	unsigned int size = fdt->max_fds;
  	unsigned int i;
02afc6267   Al Viro   [PATCH] dup_fd() ...
245
246
  
  	/* Find the last open fd */
1fd36adcd   David Howells   Replace the fd_se...
247
248
  	for (i = size / BITS_PER_LONG; i > 0; ) {
  		if (fdt->open_fds[--i])
02afc6267   Al Viro   [PATCH] dup_fd() ...
249
250
  			break;
  	}
1fd36adcd   David Howells   Replace the fd_se...
251
  	i = (i + 1) * BITS_PER_LONG;
02afc6267   Al Viro   [PATCH] dup_fd() ...
252
253
  	return i;
  }
02afc6267   Al Viro   [PATCH] dup_fd() ...
254
255
256
257
258
259
260
261
262
  /*
   * Allocate a new files structure and copy contents from the
   * passed in files structure.
   * errorp will be valid only when the returned files_struct is NULL.
   */
  struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
  {
  	struct files_struct *newf;
  	struct file **old_fds, **new_fds;
9b80a184e   Alexey Dobriyan   fs/file: more uns...
263
  	unsigned int open_files, i;
02afc6267   Al Viro   [PATCH] dup_fd() ...
264
265
266
  	struct fdtable *old_fdt, *new_fdt;
  
  	*errorp = -ENOMEM;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
267
  	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
02afc6267   Al Viro   [PATCH] dup_fd() ...
268
269
  	if (!newf)
  		goto out;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
270
271
272
  	atomic_set(&newf->count, 1);
  
  	spin_lock_init(&newf->file_lock);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
273
274
  	newf->resize_in_progress = false;
  	init_waitqueue_head(&newf->resize_wait);
afbec7fff   Al Viro   [PATCH] dup_fd() ...
275
276
277
  	newf->next_fd = 0;
  	new_fdt = &newf->fdtab;
  	new_fdt->max_fds = NR_OPEN_DEFAULT;
1fd36adcd   David Howells   Replace the fd_se...
278
279
  	new_fdt->close_on_exec = newf->close_on_exec_init;
  	new_fdt->open_fds = newf->open_fds_init;
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
280
  	new_fdt->full_fds_bits = newf->full_fds_bits_init;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
281
  	new_fdt->fd = &newf->fd_array[0];
afbec7fff   Al Viro   [PATCH] dup_fd() ...
282

02afc6267   Al Viro   [PATCH] dup_fd() ...
283
284
  	spin_lock(&oldf->file_lock);
  	old_fdt = files_fdtable(oldf);
02afc6267   Al Viro   [PATCH] dup_fd() ...
285
286
287
288
  	open_files = count_open_files(old_fdt);
  
  	/*
  	 * Check whether we need to allocate a larger fd array and fd set.
02afc6267   Al Viro   [PATCH] dup_fd() ...
289
  	 */
adbecb128   Al Viro   [PATCH] dup_fd() ...
290
  	while (unlikely(open_files > new_fdt->max_fds)) {
02afc6267   Al Viro   [PATCH] dup_fd() ...
291
  		spin_unlock(&oldf->file_lock);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
292

a892e2d7d   Changli Gao   vfs: use kmalloc(...
293
294
  		if (new_fdt != &newf->fdtab)
  			__free_fdtable(new_fdt);
adbecb128   Al Viro   [PATCH] dup_fd() ...
295

9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
296
297
298
299
300
301
302
303
  		new_fdt = alloc_fdtable(open_files - 1);
  		if (!new_fdt) {
  			*errorp = -ENOMEM;
  			goto out_release;
  		}
  
  		/* beyond sysctl_nr_open; nothing to do */
  		if (unlikely(new_fdt->max_fds < open_files)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
304
  			__free_fdtable(new_fdt);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
305
  			*errorp = -EMFILE;
02afc6267   Al Viro   [PATCH] dup_fd() ...
306
  			goto out_release;
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
307
  		}
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
308

02afc6267   Al Viro   [PATCH] dup_fd() ...
309
310
311
312
313
314
315
  		/*
  		 * Reacquire the oldf lock and a pointer to its fd table
  		 * who knows it may have a new bigger fd table. We need
  		 * the latest pointer.
  		 */
  		spin_lock(&oldf->file_lock);
  		old_fdt = files_fdtable(oldf);
adbecb128   Al Viro   [PATCH] dup_fd() ...
316
  		open_files = count_open_files(old_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
317
  	}
ea5c58e70   Eric Biggers   vfs: clear remain...
318
  	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
02afc6267   Al Viro   [PATCH] dup_fd() ...
319
320
  	old_fds = old_fdt->fd;
  	new_fds = new_fdt->fd;
02afc6267   Al Viro   [PATCH] dup_fd() ...
321
322
323
324
325
326
327
328
329
330
331
  	for (i = open_files; i != 0; i--) {
  		struct file *f = *old_fds++;
  		if (f) {
  			get_file(f);
  		} else {
  			/*
  			 * The fd may be claimed in the fd bitmap but not yet
  			 * instantiated in the files array if a sibling thread
  			 * is partway through open().  So make sure that this
  			 * fd is available to the new process.
  			 */
1dce27c5a   David Howells   Wrap accesses to ...
332
  			__clear_open_fd(open_files - i, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
333
334
335
336
  		}
  		rcu_assign_pointer(*new_fds++, f);
  	}
  	spin_unlock(&oldf->file_lock);
ea5c58e70   Eric Biggers   vfs: clear remain...
337
338
  	/* clear the remainder */
  	memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
02afc6267   Al Viro   [PATCH] dup_fd() ...
339

afbec7fff   Al Viro   [PATCH] dup_fd() ...
340
  	rcu_assign_pointer(newf->fdt, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
341
342
343
344
345
346
347
  	return newf;
  
  out_release:
  	kmem_cache_free(files_cachep, newf);
  out:
  	return NULL;
  }
ce08b62d1   Oleg Nesterov   change close_file...
348
  static struct fdtable *close_files(struct files_struct * files)
7cf4dc3c8   Al Viro   move files_struct...
349
  {
7cf4dc3c8   Al Viro   move files_struct...
350
351
352
  	/*
  	 * It is safe to dereference the fd table without RCU or
  	 * ->file_lock because this is the last reference to the
ce08b62d1   Oleg Nesterov   change close_file...
353
  	 * files structure.
7cf4dc3c8   Al Viro   move files_struct...
354
  	 */
ce08b62d1   Oleg Nesterov   change close_file...
355
  	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
9b80a184e   Alexey Dobriyan   fs/file: more uns...
356
  	unsigned int i, j = 0;
ce08b62d1   Oleg Nesterov   change close_file...
357

7cf4dc3c8   Al Viro   move files_struct...
358
359
360
361
362
363
364
365
366
367
368
  	for (;;) {
  		unsigned long set;
  		i = j * BITS_PER_LONG;
  		if (i >= fdt->max_fds)
  			break;
  		set = fdt->open_fds[j++];
  		while (set) {
  			if (set & 1) {
  				struct file * file = xchg(&fdt->fd[i], NULL);
  				if (file) {
  					filp_close(file, files);
388a4c880   Paul E. McKenney   fs: Eliminate con...
369
  					cond_resched();
7cf4dc3c8   Al Viro   move files_struct...
370
371
372
373
374
375
  				}
  			}
  			i++;
  			set >>= 1;
  		}
  	}
ce08b62d1   Oleg Nesterov   change close_file...
376
377
  
  	return fdt;
7cf4dc3c8   Al Viro   move files_struct...
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
  }
  
  struct files_struct *get_files_struct(struct task_struct *task)
  {
  	struct files_struct *files;
  
  	task_lock(task);
  	files = task->files;
  	if (files)
  		atomic_inc(&files->count);
  	task_unlock(task);
  
  	return files;
  }
  
  void put_files_struct(struct files_struct *files)
  {
7cf4dc3c8   Al Viro   move files_struct...
395
  	if (atomic_dec_and_test(&files->count)) {
ce08b62d1   Oleg Nesterov   change close_file...
396
  		struct fdtable *fdt = close_files(files);
b9e02af0a   Al Viro   don't bother with...
397
398
399
400
  		/* free the arrays if they are not embedded */
  		if (fdt != &files->fdtab)
  			__free_fdtable(fdt);
  		kmem_cache_free(files_cachep, files);
7cf4dc3c8   Al Viro   move files_struct...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
  	}
  }
  
  void reset_files_struct(struct files_struct *files)
  {
  	struct task_struct *tsk = current;
  	struct files_struct *old;
  
  	old = tsk->files;
  	task_lock(tsk);
  	tsk->files = files;
  	task_unlock(tsk);
  	put_files_struct(old);
  }
  
  void exit_files(struct task_struct *tsk)
  {
  	struct files_struct * files = tsk->files;
  
  	if (files) {
  		task_lock(tsk);
  		tsk->files = NULL;
  		task_unlock(tsk);
  		put_files_struct(files);
  	}
  }
f52111b15   Al Viro   [PATCH] take init...
427
428
429
430
431
432
  struct files_struct init_files = {
  	.count		= ATOMIC_INIT(1),
  	.fdt		= &init_files.fdtab,
  	.fdtab		= {
  		.max_fds	= NR_OPEN_DEFAULT,
  		.fd		= &init_files.fd_array[0],
1fd36adcd   David Howells   Replace the fd_se...
433
434
  		.close_on_exec	= init_files.close_on_exec_init,
  		.open_fds	= init_files.open_fds_init,
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
435
  		.full_fds_bits	= init_files.full_fds_bits_init,
f52111b15   Al Viro   [PATCH] take init...
436
  	},
eece09ec2   Thomas Gleixner   locking: Various ...
437
  	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
d609ecd88   Shuriyc Chu   fs/file.c: initia...
438
  	.resize_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
f52111b15   Al Viro   [PATCH] take init...
439
  };
1027abe88   Al Viro   [PATCH] merge loc...
440

9b80a184e   Alexey Dobriyan   fs/file: more uns...
441
  static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
442
  {
9b80a184e   Alexey Dobriyan   fs/file: more uns...
443
444
445
  	unsigned int maxfd = fdt->max_fds;
  	unsigned int maxbit = maxfd / BITS_PER_LONG;
  	unsigned int bitbit = start / BITS_PER_LONG;
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
446
447
448
449
450
451
452
453
  
  	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
  	if (bitbit > maxfd)
  		return maxfd;
  	if (bitbit > start)
  		start = bitbit;
  	return find_next_zero_bit(fdt->open_fds, maxfd, start);
  }
1027abe88   Al Viro   [PATCH] merge loc...
454
455
456
  /*
   * allocate a file descriptor, mark it busy.
   */
dcfadfa4e   Al Viro   new helper: __all...
457
458
  int __alloc_fd(struct files_struct *files,
  	       unsigned start, unsigned end, unsigned flags)
1027abe88   Al Viro   [PATCH] merge loc...
459
  {
1027abe88   Al Viro   [PATCH] merge loc...
460
461
462
463
464
465
466
467
468
469
470
471
  	unsigned int fd;
  	int error;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  repeat:
  	fdt = files_fdtable(files);
  	fd = start;
  	if (fd < files->next_fd)
  		fd = files->next_fd;
  
  	if (fd < fdt->max_fds)
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
472
  		fd = find_next_fd(fdt, fd);
1027abe88   Al Viro   [PATCH] merge loc...
473

f33ff9927   Al Viro   take rlimit check...
474
475
476
477
478
479
480
  	/*
  	 * N.B. For clone tasks sharing a files structure, this test
  	 * will limit the total number of files that can be opened.
  	 */
  	error = -EMFILE;
  	if (fd >= end)
  		goto out;
1027abe88   Al Viro   [PATCH] merge loc...
481
482
483
484
485
486
487
488
489
490
491
492
493
  	error = expand_files(files, fd);
  	if (error < 0)
  		goto out;
  
  	/*
  	 * If we needed to expand the fs array we
  	 * might have blocked - try again.
  	 */
  	if (error)
  		goto repeat;
  
  	if (start <= files->next_fd)
  		files->next_fd = fd + 1;
1dce27c5a   David Howells   Wrap accesses to ...
494
  	__set_open_fd(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
495
  	if (flags & O_CLOEXEC)
1dce27c5a   David Howells   Wrap accesses to ...
496
  		__set_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
497
  	else
1dce27c5a   David Howells   Wrap accesses to ...
498
  		__clear_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
499
500
501
  	error = fd;
  #if 1
  	/* Sanity check */
add1f0995   Paul E. McKenney   fs: Substitute rc...
502
  	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
1027abe88   Al Viro   [PATCH] merge loc...
503
504
505
506
507
508
509
510
511
512
  		printk(KERN_WARNING "alloc_fd: slot %d not NULL!
  ", fd);
  		rcu_assign_pointer(fdt->fd[fd], NULL);
  	}
  #endif
  
  out:
  	spin_unlock(&files->file_lock);
  	return error;
  }
ad47bd725   Al Viro   make expand_files...
513
  static int alloc_fd(unsigned start, unsigned flags)
dcfadfa4e   Al Viro   new helper: __all...
514
515
516
  {
  	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
  }
1a7bd2265   Al Viro   make get_unused_f...
517
  int get_unused_fd_flags(unsigned flags)
1027abe88   Al Viro   [PATCH] merge loc...
518
  {
dcfadfa4e   Al Viro   new helper: __all...
519
  	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
1027abe88   Al Viro   [PATCH] merge loc...
520
  }
1a7bd2265   Al Viro   make get_unused_f...
521
  EXPORT_SYMBOL(get_unused_fd_flags);
56007cae9   Al Viro   move put_unused_f...
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  
  static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  {
  	struct fdtable *fdt = files_fdtable(files);
  	__clear_open_fd(fd, fdt);
  	if (fd < files->next_fd)
  		files->next_fd = fd;
  }
  
  void put_unused_fd(unsigned int fd)
  {
  	struct files_struct *files = current->files;
  	spin_lock(&files->file_lock);
  	__put_unused_fd(files, fd);
  	spin_unlock(&files->file_lock);
  }
  
  EXPORT_SYMBOL(put_unused_fd);
  
  /*
   * Install a file pointer in the fd array.
   *
   * The VFS is full of places where we drop the files lock between
   * setting the open_fds bitmap and installing the file in the file
   * array.  At any such point, we are vulnerable to a dup2() race
   * installing a file in the array before us.  We need to detect this and
   * fput() the struct file we are about to overwrite in this case.
   *
   * It should never happen - if we allow dup2() do it, _really_ bad things
   * will follow.
f869e8a7f   Al Viro   expose a low-leve...
552
553
554
555
556
557
558
   *
   * NOTE: __fd_install() variant is really, really low-level; don't
   * use it unless you are forced to by truly lousy API shoved down
   * your throat.  'files' *MUST* be either current->files or obtained
   * by get_files_struct(current) done by whoever had given it to you,
   * or really bad things will happen.  Normally you want to use
   * fd_install() instead.
56007cae9   Al Viro   move put_unused_f...
559
   */
f869e8a7f   Al Viro   expose a low-leve...
560
561
  void __fd_install(struct files_struct *files, unsigned int fd,
  		struct file *file)
56007cae9   Al Viro   move put_unused_f...
562
  {
56007cae9   Al Viro   move put_unused_f...
563
  	struct fdtable *fdt;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
564

8a81252b7   Eric Dumazet   fs/file.c: don't ...
565
  	rcu_read_lock_sched();
c02b1a9b4   Mateusz Guzik   vfs: grab the loc...
566
  	if (unlikely(files->resize_in_progress)) {
8a81252b7   Eric Dumazet   fs/file.c: don't ...
567
  		rcu_read_unlock_sched();
c02b1a9b4   Mateusz Guzik   vfs: grab the loc...
568
569
570
571
572
573
  		spin_lock(&files->file_lock);
  		fdt = files_fdtable(files);
  		BUG_ON(fdt->fd[fd] != NULL);
  		rcu_assign_pointer(fdt->fd[fd], file);
  		spin_unlock(&files->file_lock);
  		return;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
574
575
576
577
  	}
  	/* coupled with smp_wmb() in expand_fdtable() */
  	smp_rmb();
  	fdt = rcu_dereference_sched(files->fdt);
56007cae9   Al Viro   move put_unused_f...
578
579
  	BUG_ON(fdt->fd[fd] != NULL);
  	rcu_assign_pointer(fdt->fd[fd], file);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
580
  	rcu_read_unlock_sched();
56007cae9   Al Viro   move put_unused_f...
581
  }
f869e8a7f   Al Viro   expose a low-leve...
582
583
584
585
  void fd_install(unsigned int fd, struct file *file)
  {
  	__fd_install(current->files, fd, file);
  }
56007cae9   Al Viro   move put_unused_f...
586
  EXPORT_SYMBOL(fd_install);
0ee8cdfe6   Al Viro   take fget() and f...
587

483ce1d4b   Al Viro   take descriptor-r...
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
  /*
   * The same warnings as for __alloc_fd()/__fd_install() apply here...
   */
  int __close_fd(struct files_struct *files, unsigned fd)
  {
  	struct file *file;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (fd >= fdt->max_fds)
  		goto out_unlock;
  	file = fdt->fd[fd];
  	if (!file)
  		goto out_unlock;
  	rcu_assign_pointer(fdt->fd[fd], NULL);
483ce1d4b   Al Viro   take descriptor-r...
604
605
606
607
608
609
610
611
  	__put_unused_fd(files, fd);
  	spin_unlock(&files->file_lock);
  	return filp_close(file, files);
  
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return -EBADF;
  }
2ca2a09d6   Dominik Brodowski   fs: add ksys_clos...
612
  EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
483ce1d4b   Al Viro   take descriptor-r...
613

6a6d27de3   Al Viro   take close-on-exe...
614
615
616
617
618
619
  void do_close_on_exec(struct files_struct *files)
  {
  	unsigned i;
  	struct fdtable *fdt;
  
  	/* exec unshares first */
6a6d27de3   Al Viro   take close-on-exe...
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
  	spin_lock(&files->file_lock);
  	for (i = 0; ; i++) {
  		unsigned long set;
  		unsigned fd = i * BITS_PER_LONG;
  		fdt = files_fdtable(files);
  		if (fd >= fdt->max_fds)
  			break;
  		set = fdt->close_on_exec[i];
  		if (!set)
  			continue;
  		fdt->close_on_exec[i] = 0;
  		for ( ; set ; fd++, set >>= 1) {
  			struct file *file;
  			if (!(set & 1))
  				continue;
  			file = fdt->fd[fd];
  			if (!file)
  				continue;
  			rcu_assign_pointer(fdt->fd[fd], NULL);
  			__put_unused_fd(files, fd);
  			spin_unlock(&files->file_lock);
  			filp_close(file, files);
  			cond_resched();
  			spin_lock(&files->file_lock);
  		}
  
  	}
  	spin_unlock(&files->file_lock);
  }
1deb46e25   Oleg Nesterov   fs: factor out co...
649
  static struct file *__fget(unsigned int fd, fmode_t mask)
0ee8cdfe6   Al Viro   take fget() and f...
650
  {
0ee8cdfe6   Al Viro   take fget() and f...
651
  	struct files_struct *files = current->files;
1deb46e25   Oleg Nesterov   fs: factor out co...
652
  	struct file *file;
0ee8cdfe6   Al Viro   take fget() and f...
653
654
  
  	rcu_read_lock();
5ba97d283   Eric Dumazet   fs/file.c: __fget...
655
  loop:
0ee8cdfe6   Al Viro   take fget() and f...
656
657
  	file = fcheck_files(files, fd);
  	if (file) {
5ba97d283   Eric Dumazet   fs/file.c: __fget...
658
659
660
661
662
  		/* File object ref couldn't be taken.
  		 * dup2() atomicity guarantee is the reason
  		 * we loop to catch the new file (or NULL pointer)
  		 */
  		if (file->f_mode & mask)
0ee8cdfe6   Al Viro   take fget() and f...
663
  			file = NULL;
5ba97d283   Eric Dumazet   fs/file.c: __fget...
664
665
  		else if (!get_file_rcu(file))
  			goto loop;
0ee8cdfe6   Al Viro   take fget() and f...
666
667
668
669
670
  	}
  	rcu_read_unlock();
  
  	return file;
  }
1deb46e25   Oleg Nesterov   fs: factor out co...
671
672
673
674
  struct file *fget(unsigned int fd)
  {
  	return __fget(fd, FMODE_PATH);
  }
0ee8cdfe6   Al Viro   take fget() and f...
675
676
677
678
  EXPORT_SYMBOL(fget);
  
  struct file *fget_raw(unsigned int fd)
  {
1deb46e25   Oleg Nesterov   fs: factor out co...
679
  	return __fget(fd, 0);
0ee8cdfe6   Al Viro   take fget() and f...
680
  }
0ee8cdfe6   Al Viro   take fget() and f...
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
  EXPORT_SYMBOL(fget_raw);
  
  /*
   * Lightweight file lookup - no refcnt increment if fd table isn't shared.
   *
   * You can use this instead of fget if you satisfy all of the following
   * conditions:
   * 1) You must call fput_light before exiting the syscall and returning control
   *    to userspace (i.e. you cannot remember the returned struct file * after
   *    returning to userspace).
   * 2) You must not call filp_close on the returned struct file * in between
   *    calls to fget_light and fput_light.
   * 3) You must not clone the current task in between the calls to fget_light
   *    and fput_light.
   *
   * The fput_needed flag returned by fget_light should be passed to the
   * corresponding fput_light.
   */
bd2a31d52   Al Viro   get rid of fget_l...
699
  static unsigned long __fget_light(unsigned int fd, fmode_t mask)
0ee8cdfe6   Al Viro   take fget() and f...
700
  {
0ee8cdfe6   Al Viro   take fget() and f...
701
  	struct files_struct *files = current->files;
ad4618344   Oleg Nesterov   fs: factor out co...
702
  	struct file *file;
0ee8cdfe6   Al Viro   take fget() and f...
703

0ee8cdfe6   Al Viro   take fget() and f...
704
  	if (atomic_read(&files->count) == 1) {
a8d4b8345   Oleg Nesterov   introduce __fchec...
705
  		file = __fcheck_files(files, fd);
bd2a31d52   Al Viro   get rid of fget_l...
706
707
708
  		if (!file || unlikely(file->f_mode & mask))
  			return 0;
  		return (unsigned long)file;
0ee8cdfe6   Al Viro   take fget() and f...
709
  	} else {
e6ff9a9fa   Oleg Nesterov   fs: __fget_light(...
710
  		file = __fget(fd, mask);
bd2a31d52   Al Viro   get rid of fget_l...
711
712
713
  		if (!file)
  			return 0;
  		return FDPUT_FPUT | (unsigned long)file;
0ee8cdfe6   Al Viro   take fget() and f...
714
  	}
0ee8cdfe6   Al Viro   take fget() and f...
715
  }
bd2a31d52   Al Viro   get rid of fget_l...
716
  unsigned long __fdget(unsigned int fd)
ad4618344   Oleg Nesterov   fs: factor out co...
717
  {
bd2a31d52   Al Viro   get rid of fget_l...
718
  	return __fget_light(fd, FMODE_PATH);
ad4618344   Oleg Nesterov   fs: factor out co...
719
  }
bd2a31d52   Al Viro   get rid of fget_l...
720
  EXPORT_SYMBOL(__fdget);
0ee8cdfe6   Al Viro   take fget() and f...
721

bd2a31d52   Al Viro   get rid of fget_l...
722
  unsigned long __fdget_raw(unsigned int fd)
0ee8cdfe6   Al Viro   take fget() and f...
723
  {
bd2a31d52   Al Viro   get rid of fget_l...
724
  	return __fget_light(fd, 0);
0ee8cdfe6   Al Viro   take fget() and f...
725
  }
fe17f22d7   Al Viro   take purely descr...
726

bd2a31d52   Al Viro   get rid of fget_l...
727
728
  unsigned long __fdget_pos(unsigned int fd)
  {
99aea6813   Eric Biggers   vfs: Don't let __...
729
730
  	unsigned long v = __fdget(fd);
  	struct file *file = (struct file *)(v & ~3);
bd2a31d52   Al Viro   get rid of fget_l...
731

99aea6813   Eric Biggers   vfs: Don't let __...
732
  	if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
bd2a31d52   Al Viro   get rid of fget_l...
733
734
735
736
737
  		if (file_count(file) > 1) {
  			v |= FDPUT_POS_UNLOCK;
  			mutex_lock(&file->f_pos_lock);
  		}
  	}
99aea6813   Eric Biggers   vfs: Don't let __...
738
  	return v;
bd2a31d52   Al Viro   get rid of fget_l...
739
  }
63b6df141   Al Viro   give readdir(2)/g...
740
741
742
743
  void __f_unlock_pos(struct file *f)
  {
  	mutex_unlock(&f->f_pos_lock);
  }
bd2a31d52   Al Viro   get rid of fget_l...
744
745
746
747
748
  /*
   * We only lock f_pos if we have threads or if the file might be
   * shared with another process. In both cases we'll have an elevated
   * file count (done either by fdget() or by fork()).
   */
fe17f22d7   Al Viro   take purely descr...
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
  void set_close_on_exec(unsigned int fd, int flag)
  {
  	struct files_struct *files = current->files;
  	struct fdtable *fdt;
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (flag)
  		__set_close_on_exec(fd, fdt);
  	else
  		__clear_close_on_exec(fd, fdt);
  	spin_unlock(&files->file_lock);
  }
  
  bool get_close_on_exec(unsigned int fd)
  {
  	struct files_struct *files = current->files;
  	struct fdtable *fdt;
  	bool res;
  	rcu_read_lock();
  	fdt = files_fdtable(files);
  	res = close_on_exec(fd, fdt);
  	rcu_read_unlock();
  	return res;
  }
8280d1617   Al Viro   new helper: repla...
773
774
  static int do_dup2(struct files_struct *files,
  	struct file *file, unsigned fd, unsigned flags)
e983094d6   Al Viro   missing annotatio...
775
  __releases(&files->file_lock)
fe17f22d7   Al Viro   take purely descr...
776
  {
8280d1617   Al Viro   new helper: repla...
777
  	struct file *tofree;
fe17f22d7   Al Viro   take purely descr...
778
  	struct fdtable *fdt;
fe17f22d7   Al Viro   take purely descr...
779
780
781
782
783
784
785
786
787
788
789
790
791
792
  	/*
  	 * We need to detect attempts to do dup2() over allocated but still
  	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
  	 * extra work in their equivalent of fget() - they insert struct
  	 * file immediately after grabbing descriptor, mark it larval if
  	 * more work (e.g. actual opening) is needed and make sure that
  	 * fget() treats larval files as absent.  Potentially interesting,
  	 * but while extra work in fget() is trivial, locking implications
  	 * and amount of surgery on open()-related paths in VFS are not.
  	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
  	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
  	 * scope of POSIX or SUS, since neither considers shared descriptor
  	 * tables and this condition does not arise without those.
  	 */
fe17f22d7   Al Viro   take purely descr...
793
  	fdt = files_fdtable(files);
8280d1617   Al Viro   new helper: repla...
794
795
796
  	tofree = fdt->fd[fd];
  	if (!tofree && fd_is_open(fd, fdt))
  		goto Ebusy;
fe17f22d7   Al Viro   take purely descr...
797
  	get_file(file);
8280d1617   Al Viro   new helper: repla...
798
799
  	rcu_assign_pointer(fdt->fd[fd], file);
  	__set_open_fd(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
800
  	if (flags & O_CLOEXEC)
8280d1617   Al Viro   new helper: repla...
801
  		__set_close_on_exec(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
802
  	else
8280d1617   Al Viro   new helper: repla...
803
  		__clear_close_on_exec(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
804
805
806
807
  	spin_unlock(&files->file_lock);
  
  	if (tofree)
  		filp_close(tofree, files);
8280d1617   Al Viro   new helper: repla...
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
  	return fd;
  
  Ebusy:
  	spin_unlock(&files->file_lock);
  	return -EBUSY;
  }
  
  int replace_fd(unsigned fd, struct file *file, unsigned flags)
  {
  	int err;
  	struct files_struct *files = current->files;
  
  	if (!file)
  		return __close_fd(files, fd);
  
  	if (fd >= rlimit(RLIMIT_NOFILE))
08f05c497   Al Viro   Return the right ...
824
  		return -EBADF;
8280d1617   Al Viro   new helper: repla...
825
826
827
828
829
830
831
832
833
834
835
  
  	spin_lock(&files->file_lock);
  	err = expand_files(files, fd);
  	if (unlikely(err < 0))
  		goto out_unlock;
  	return do_dup2(files, file, fd, flags);
  
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return err;
  }
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
836
  static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
8280d1617   Al Viro   new helper: repla...
837
838
839
840
841
842
843
  {
  	int err = -EBADF;
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	if ((flags & ~O_CLOEXEC) != 0)
  		return -EINVAL;
aed976475   Richard W.M. Jones   dup3: Return an e...
844
845
  	if (unlikely(oldfd == newfd))
  		return -EINVAL;
8280d1617   Al Viro   new helper: repla...
846
  	if (newfd >= rlimit(RLIMIT_NOFILE))
08f05c497   Al Viro   Return the right ...
847
  		return -EBADF;
8280d1617   Al Viro   new helper: repla...
848
849
850
851
852
853
854
855
856
857
858
859
  
  	spin_lock(&files->file_lock);
  	err = expand_files(files, newfd);
  	file = fcheck(oldfd);
  	if (unlikely(!file))
  		goto Ebadf;
  	if (unlikely(err < 0)) {
  		if (err == -EMFILE)
  			goto Ebadf;
  		goto out_unlock;
  	}
  	return do_dup2(files, file, newfd, flags);
fe17f22d7   Al Viro   take purely descr...
860
861
862
863
864
865
866
  
  Ebadf:
  	err = -EBADF;
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return err;
  }
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
867
868
869
870
  SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
  {
  	return ksys_dup3(oldfd, newfd, flags);
  }
fe17f22d7   Al Viro   take purely descr...
871
872
873
874
875
876
877
878
879
880
881
882
  SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
  {
  	if (unlikely(newfd == oldfd)) { /* corner case */
  		struct files_struct *files = current->files;
  		int retval = oldfd;
  
  		rcu_read_lock();
  		if (!fcheck_files(files, oldfd))
  			retval = -EBADF;
  		rcu_read_unlock();
  		return retval;
  	}
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
883
  	return ksys_dup3(oldfd, newfd, 0);
fe17f22d7   Al Viro   take purely descr...
884
  }
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
885
  int ksys_dup(unsigned int fildes)
fe17f22d7   Al Viro   take purely descr...
886
887
888
889
890
  {
  	int ret = -EBADF;
  	struct file *file = fget_raw(fildes);
  
  	if (file) {
8d10a0358   Yann Droneaud   fs/file.c: replac...
891
  		ret = get_unused_fd_flags(0);
fe17f22d7   Al Viro   take purely descr...
892
893
894
895
896
897
898
  		if (ret >= 0)
  			fd_install(ret, file);
  		else
  			fput(file);
  	}
  	return ret;
  }
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
899
900
901
902
  SYSCALL_DEFINE1(dup, unsigned int, fildes)
  {
  	return ksys_dup(fildes);
  }
fe17f22d7   Al Viro   take purely descr...
903
904
905
906
907
908
909
910
911
912
913
914
  int f_dupfd(unsigned int from, struct file *file, unsigned flags)
  {
  	int err;
  	if (from >= rlimit(RLIMIT_NOFILE))
  		return -EINVAL;
  	err = alloc_fd(from, flags);
  	if (err >= 0) {
  		get_file(file);
  		fd_install(err, file);
  	}
  	return err;
  }
c3c073f80   Al Viro   new helper: itera...
915
916
917
918
919
920
  
  int iterate_fd(struct files_struct *files, unsigned n,
  		int (*f)(const void *, struct file *, unsigned),
  		const void *p)
  {
  	struct fdtable *fdt;
c3c073f80   Al Viro   new helper: itera...
921
922
923
924
  	int res = 0;
  	if (!files)
  		return 0;
  	spin_lock(&files->file_lock);
a77cfcb42   Al Viro   fix off-by-one in...
925
926
927
928
929
930
931
932
  	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
  		struct file *file;
  		file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
  		if (!file)
  			continue;
  		res = f(p, file, n);
  		if (res)
  			break;
c3c073f80   Al Viro   new helper: itera...
933
934
935
936
937
  	}
  	spin_unlock(&files->file_lock);
  	return res;
  }
  EXPORT_SYMBOL(iterate_fd);