Blame view

fs/file.c 28.2 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
  /*
   *  linux/fs/file.c
   *
   *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
   *
   *  Manage the dynamic fd arrays in the process files_struct.
   */
fe17f22d7   Al Viro   take purely descr...
9
  #include <linux/syscalls.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
10
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
  #include <linux/fs.h>
278a5fbae   Christian Brauner   open: add close_r...
12
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/mm.h>
3f07c0144   Ingo Molnar   sched/headers: Pr...
14
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
17
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/bitops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
19
20
  #include <linux/spinlock.h>
  #include <linux/rcupdate.h>
60997c3d4   Christian Brauner   close_range: add ...
21
  #include <linux/close_range.h>
665906104   Kees Cook   fs: Move __scm_in...
22
  #include <net/sock.h>
0f2122045   Jens Axboe   io_uring: don't r...
23
  #include <linux/io_uring.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
24

9b80a184e   Alexey Dobriyan   fs/file: more uns...
25
26
  unsigned int sysctl_nr_open __read_mostly = 1024*1024;
  unsigned int sysctl_nr_open_min = BITS_PER_LONG;
752343be6   Rasmus Villemoes   fs/file.c: __cons...
27
28
  /* our min() is unusable in constant expressions ;-/ */
  #define __const_min(x, y) ((x) < (y) ? (x) : (y))
9b80a184e   Alexey Dobriyan   fs/file: more uns...
29
30
  unsigned int sysctl_nr_open_max =
  	__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
31

a892e2d7d   Changli Gao   vfs: use kmalloc(...
32
  static void __free_fdtable(struct fdtable *fdt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
  {
f6c0a1920   Al Viro   fs/file.c: don't ...
34
35
  	kvfree(fdt->fd);
  	kvfree(fdt->open_fds);
a892e2d7d   Changli Gao   vfs: use kmalloc(...
36
  	kfree(fdt);
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
37
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38

7cf4dc3c8   Al Viro   move files_struct...
39
  static void free_fdtable_rcu(struct rcu_head *rcu)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
40
  {
ac3e3c5b1   Al Viro   don't bother with...
41
  	__free_fdtable(container_of(rcu, struct fdtable, rcu));
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
42
  }
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
43
44
  #define BITBIT_NR(nr)	BITS_TO_LONGS(BITS_TO_LONGS(nr))
  #define BITBIT_SIZE(nr)	(BITBIT_NR(nr) * sizeof(long))
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
45
  /*
ea5c58e70   Eric Biggers   vfs: clear remain...
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
   * Copy 'count' fd bits from the old table to the new table and clear the extra
   * space if any.  This does not copy the file pointers.  Called with the files
   * spinlock held for write.
   */
  static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
  			    unsigned int count)
  {
  	unsigned int cpy, set;
  
  	cpy = count / BITS_PER_BYTE;
  	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
  	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
  	memset((char *)nfdt->open_fds + cpy, 0, set);
  	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
  	memset((char *)nfdt->close_on_exec + cpy, 0, set);
  
  	cpy = BITBIT_SIZE(count);
  	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
  	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
  	memset((char *)nfdt->full_fds_bits + cpy, 0, set);
  }
  
  /*
   * Copy all file descriptors from the old table to the new, expanded table and
   * clear the extra space.  Called with the files spinlock held for write.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
71
   */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
72
  static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
73
  {
4e89b7210   Al Viro   fix multiplicatio...
74
  	size_t cpy, set;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
75

5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
76
  	BUG_ON(nfdt->max_fds < ofdt->max_fds);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
77
78
79
80
  
  	cpy = ofdt->max_fds * sizeof(struct file *);
  	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
  	memcpy(nfdt->fd, ofdt->fd, cpy);
ea5c58e70   Eric Biggers   vfs: clear remain...
81
  	memset((char *)nfdt->fd + cpy, 0, set);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
82

ea5c58e70   Eric Biggers   vfs: clear remain...
83
  	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
84
  }
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
85
  static struct fdtable * alloc_fdtable(unsigned int nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
  {
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
87
  	struct fdtable *fdt;
1fd36adcd   David Howells   Replace the fd_se...
88
  	void *data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
90
  	/*
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
91
92
93
94
95
  	 * Figure out how many fds we actually want to support in this fdtable.
  	 * Allocation steps are keyed to the size of the fdarray, since it
  	 * grows far faster than any of the other dynamic data. We try to fit
  	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
  	 * and growing in powers of two from there on.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
96
  	 */
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
97
98
99
  	nr /= (1024 / sizeof(struct file *));
  	nr = roundup_pow_of_two(nr + 1);
  	nr *= (1024 / sizeof(struct file *));
5c598b342   Al Viro   [PATCH] fix sysct...
100
101
102
103
104
105
106
107
108
109
  	/*
  	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
  	 * had been set lower between the check in expand_files() and here.  Deal
  	 * with that in caller, it's cheaper that way.
  	 *
  	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
  	 * bitmaps handling below becomes unpleasant, to put it mildly...
  	 */
  	if (unlikely(nr > sysctl_nr_open))
  		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
110

5d097056c   Vladimir Davydov   kmemcg: account c...
111
  	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
112
  	if (!fdt)
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
113
  		goto out;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
114
  	fdt->max_fds = nr;
c823bd924   Michal Hocko   fs/file.c: replac...
115
  	data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
116
117
  	if (!data)
  		goto out_fdt;
1fd36adcd   David Howells   Replace the fd_se...
118
  	fdt->fd = data;
c823bd924   Michal Hocko   fs/file.c: replac...
119
120
121
  	data = kvmalloc(max_t(size_t,
  				 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
  				 GFP_KERNEL_ACCOUNT);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
122
123
  	if (!data)
  		goto out_arr;
1fd36adcd   David Howells   Replace the fd_se...
124
  	fdt->open_fds = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
125
  	data += nr / BITS_PER_BYTE;
1fd36adcd   David Howells   Replace the fd_se...
126
  	fdt->close_on_exec = data;
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
127
128
  	data += nr / BITS_PER_BYTE;
  	fdt->full_fds_bits = data;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
129

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
130
  	return fdt;
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
131
132
  
  out_arr:
f6c0a1920   Al Viro   fs/file.c: don't ...
133
  	kvfree(fdt->fd);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
134
  out_fdt:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
135
  	kfree(fdt);
5466b456e   Vadim Lobanov   [PATCH] fdtable: ...
136
  out:
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
137
138
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
140
  /*
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
141
142
143
144
145
   * Expand the file descriptor table.
   * This function will allocate a new fdtable and both fd array and fdset, of
   * the given size.
   * Return <0 error code on error; 1 on successful completion.
   * The files->file_lock should be held on entry, and will be held on exit.
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
146
   */
9b80a184e   Alexey Dobriyan   fs/file: more uns...
147
  static int expand_fdtable(struct files_struct *files, unsigned int nr)
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
148
149
150
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
  {
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
151
  	struct fdtable *new_fdt, *cur_fdt;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
152
153
  
  	spin_unlock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
154
  	new_fdt = alloc_fdtable(nr);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
155
156
157
158
159
  
  	/* make sure all __fd_install() have seen resize_in_progress
  	 * or have finished their rcu_read_lock_sched() section.
  	 */
  	if (atomic_read(&files->count) > 1)
c93ffc15c   Paul E. McKenney   fs/file: Replace ...
160
  		synchronize_rcu();
8a81252b7   Eric Dumazet   fs/file.c: don't ...
161

ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
162
  	spin_lock(&files->file_lock);
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
163
164
  	if (!new_fdt)
  		return -ENOMEM;
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
165
  	/*
5c598b342   Al Viro   [PATCH] fix sysct...
166
167
168
169
  	 * extremely unlikely race - sysctl_nr_open decreased between the check in
  	 * caller and alloc_fdtable().  Cheaper to catch it here...
  	 */
  	if (unlikely(new_fdt->max_fds <= nr)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
170
  		__free_fdtable(new_fdt);
5c598b342   Al Viro   [PATCH] fix sysct...
171
172
  		return -EMFILE;
  	}
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
173
  	cur_fdt = files_fdtable(files);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
174
175
176
177
178
179
180
  	BUG_ON(nr < cur_fdt->max_fds);
  	copy_fdtable(new_fdt, cur_fdt);
  	rcu_assign_pointer(files->fdt, new_fdt);
  	if (cur_fdt != &files->fdtab)
  		call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
  	/* coupled with smp_rmb() in __fd_install() */
  	smp_wmb();
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
181
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
183
184
185
  }
  
  /*
   * Expand files.
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
186
187
188
189
190
   * This function will expand the file structures, if the requested size exceeds
   * the current capacity and there is room for expansion.
   * Return <0 error code on error; 0 when nothing done; 1 when files were
   * expanded and execution may have blocked.
   * The files->file_lock should be held on entry, and will be held on exit.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
   */
9b80a184e   Alexey Dobriyan   fs/file: more uns...
192
  static int expand_files(struct files_struct *files, unsigned int nr)
8a81252b7   Eric Dumazet   fs/file.c: don't ...
193
194
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
  {
badf16621   Dipankar Sarma   [PATCH] files: br...
196
  	struct fdtable *fdt;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
197
  	int expanded = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198

8a81252b7   Eric Dumazet   fs/file.c: don't ...
199
  repeat:
badf16621   Dipankar Sarma   [PATCH] files: br...
200
  	fdt = files_fdtable(files);
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
201

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
202
  	/* Do we need to expand? */
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
203
  	if (nr < fdt->max_fds)
8a81252b7   Eric Dumazet   fs/file.c: don't ...
204
  		return expanded;
4e1e018ec   Al Viro   [PATCH] fix RLIM_...
205

74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
206
  	/* Can we expand? */
9cfe015aa   Eric Dumazet   get rid of NR_OPE...
207
  	if (nr >= sysctl_nr_open)
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
208
  		return -EMFILE;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
209
210
211
212
213
214
215
  	if (unlikely(files->resize_in_progress)) {
  		spin_unlock(&files->file_lock);
  		expanded = 1;
  		wait_event(files->resize_wait, !files->resize_in_progress);
  		spin_lock(&files->file_lock);
  		goto repeat;
  	}
74d392aaa   Vadim Lobanov   [PATCH] Clean up ...
216
  	/* All good, so we try */
8a81252b7   Eric Dumazet   fs/file.c: don't ...
217
218
219
220
221
222
  	files->resize_in_progress = true;
  	expanded = expand_fdtable(files, nr);
  	files->resize_in_progress = false;
  
  	wake_up_all(&files->resize_wait);
  	return expanded;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
  }
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
224

9b80a184e   Alexey Dobriyan   fs/file: more uns...
225
  static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
226
227
228
  {
  	__set_bit(fd, fdt->close_on_exec);
  }
9b80a184e   Alexey Dobriyan   fs/file: more uns...
229
  static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
230
  {
fc90888d0   Linus Torvalds   vfs: conditionall...
231
232
  	if (test_bit(fd, fdt->close_on_exec))
  		__clear_bit(fd, fdt->close_on_exec);
b8318b01a   Al Viro   take __{set,clear...
233
  }
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
234
  static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
235
236
  {
  	__set_bit(fd, fdt->open_fds);
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
237
238
239
  	fd /= BITS_PER_LONG;
  	if (!~fdt->open_fds[fd])
  		__set_bit(fd, fdt->full_fds_bits);
b8318b01a   Al Viro   take __{set,clear...
240
  }
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
241
  static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
b8318b01a   Al Viro   take __{set,clear...
242
243
  {
  	__clear_bit(fd, fdt->open_fds);
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
244
  	__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
b8318b01a   Al Viro   take __{set,clear...
245
  }
9b80a184e   Alexey Dobriyan   fs/file: more uns...
246
  static unsigned int count_open_files(struct fdtable *fdt)
02afc6267   Al Viro   [PATCH] dup_fd() ...
247
  {
9b80a184e   Alexey Dobriyan   fs/file: more uns...
248
249
  	unsigned int size = fdt->max_fds;
  	unsigned int i;
02afc6267   Al Viro   [PATCH] dup_fd() ...
250
251
  
  	/* Find the last open fd */
1fd36adcd   David Howells   Replace the fd_se...
252
253
  	for (i = size / BITS_PER_LONG; i > 0; ) {
  		if (fdt->open_fds[--i])
02afc6267   Al Viro   [PATCH] dup_fd() ...
254
255
  			break;
  	}
1fd36adcd   David Howells   Replace the fd_se...
256
  	i = (i + 1) * BITS_PER_LONG;
02afc6267   Al Viro   [PATCH] dup_fd() ...
257
258
  	return i;
  }
60997c3d4   Christian Brauner   close_range: add ...
259
260
261
262
263
264
265
266
267
  static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
  {
  	unsigned int count;
  
  	count = count_open_files(fdt);
  	if (max_fds < NR_OPEN_DEFAULT)
  		max_fds = NR_OPEN_DEFAULT;
  	return min(count, max_fds);
  }
02afc6267   Al Viro   [PATCH] dup_fd() ...
268
269
270
271
272
  /*
   * Allocate a new files structure and copy contents from the
   * passed in files structure.
   * errorp will be valid only when the returned files_struct is NULL.
   */
60997c3d4   Christian Brauner   close_range: add ...
273
  struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
02afc6267   Al Viro   [PATCH] dup_fd() ...
274
275
276
  {
  	struct files_struct *newf;
  	struct file **old_fds, **new_fds;
9b80a184e   Alexey Dobriyan   fs/file: more uns...
277
  	unsigned int open_files, i;
02afc6267   Al Viro   [PATCH] dup_fd() ...
278
279
280
  	struct fdtable *old_fdt, *new_fdt;
  
  	*errorp = -ENOMEM;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
281
  	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
02afc6267   Al Viro   [PATCH] dup_fd() ...
282
283
  	if (!newf)
  		goto out;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
284
285
286
  	atomic_set(&newf->count, 1);
  
  	spin_lock_init(&newf->file_lock);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
287
288
  	newf->resize_in_progress = false;
  	init_waitqueue_head(&newf->resize_wait);
afbec7fff   Al Viro   [PATCH] dup_fd() ...
289
290
291
  	newf->next_fd = 0;
  	new_fdt = &newf->fdtab;
  	new_fdt->max_fds = NR_OPEN_DEFAULT;
1fd36adcd   David Howells   Replace the fd_se...
292
293
  	new_fdt->close_on_exec = newf->close_on_exec_init;
  	new_fdt->open_fds = newf->open_fds_init;
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
294
  	new_fdt->full_fds_bits = newf->full_fds_bits_init;
afbec7fff   Al Viro   [PATCH] dup_fd() ...
295
  	new_fdt->fd = &newf->fd_array[0];
afbec7fff   Al Viro   [PATCH] dup_fd() ...
296

02afc6267   Al Viro   [PATCH] dup_fd() ...
297
298
  	spin_lock(&oldf->file_lock);
  	old_fdt = files_fdtable(oldf);
60997c3d4   Christian Brauner   close_range: add ...
299
  	open_files = sane_fdtable_size(old_fdt, max_fds);
02afc6267   Al Viro   [PATCH] dup_fd() ...
300
301
302
  
  	/*
  	 * Check whether we need to allocate a larger fd array and fd set.
02afc6267   Al Viro   [PATCH] dup_fd() ...
303
  	 */
adbecb128   Al Viro   [PATCH] dup_fd() ...
304
  	while (unlikely(open_files > new_fdt->max_fds)) {
02afc6267   Al Viro   [PATCH] dup_fd() ...
305
  		spin_unlock(&oldf->file_lock);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
306

a892e2d7d   Changli Gao   vfs: use kmalloc(...
307
308
  		if (new_fdt != &newf->fdtab)
  			__free_fdtable(new_fdt);
adbecb128   Al Viro   [PATCH] dup_fd() ...
309

9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
310
311
312
313
314
315
316
317
  		new_fdt = alloc_fdtable(open_files - 1);
  		if (!new_fdt) {
  			*errorp = -ENOMEM;
  			goto out_release;
  		}
  
  		/* beyond sysctl_nr_open; nothing to do */
  		if (unlikely(new_fdt->max_fds < open_files)) {
a892e2d7d   Changli Gao   vfs: use kmalloc(...
318
  			__free_fdtable(new_fdt);
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
319
  			*errorp = -EMFILE;
02afc6267   Al Viro   [PATCH] dup_fd() ...
320
  			goto out_release;
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
321
  		}
9dec3c4d3   Al Viro   [PATCH] dup_fd() ...
322

02afc6267   Al Viro   [PATCH] dup_fd() ...
323
324
325
326
327
328
329
  		/*
  		 * Reacquire the oldf lock and a pointer to its fd table
  		 * who knows it may have a new bigger fd table. We need
  		 * the latest pointer.
  		 */
  		spin_lock(&oldf->file_lock);
  		old_fdt = files_fdtable(oldf);
60997c3d4   Christian Brauner   close_range: add ...
330
  		open_files = sane_fdtable_size(old_fdt, max_fds);
02afc6267   Al Viro   [PATCH] dup_fd() ...
331
  	}
ea5c58e70   Eric Biggers   vfs: clear remain...
332
  	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
02afc6267   Al Viro   [PATCH] dup_fd() ...
333
334
  	old_fds = old_fdt->fd;
  	new_fds = new_fdt->fd;
02afc6267   Al Viro   [PATCH] dup_fd() ...
335
336
337
338
339
340
341
342
343
344
345
  	for (i = open_files; i != 0; i--) {
  		struct file *f = *old_fds++;
  		if (f) {
  			get_file(f);
  		} else {
  			/*
  			 * The fd may be claimed in the fd bitmap but not yet
  			 * instantiated in the files array if a sibling thread
  			 * is partway through open().  So make sure that this
  			 * fd is available to the new process.
  			 */
1dce27c5a   David Howells   Wrap accesses to ...
346
  			__clear_open_fd(open_files - i, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
347
348
349
350
  		}
  		rcu_assign_pointer(*new_fds++, f);
  	}
  	spin_unlock(&oldf->file_lock);
ea5c58e70   Eric Biggers   vfs: clear remain...
351
352
  	/* clear the remainder */
  	memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
02afc6267   Al Viro   [PATCH] dup_fd() ...
353

afbec7fff   Al Viro   [PATCH] dup_fd() ...
354
  	rcu_assign_pointer(newf->fdt, new_fdt);
02afc6267   Al Viro   [PATCH] dup_fd() ...
355
356
357
358
359
360
361
  	return newf;
  
  out_release:
  	kmem_cache_free(files_cachep, newf);
  out:
  	return NULL;
  }
ce08b62d1   Oleg Nesterov   change close_file...
362
  static struct fdtable *close_files(struct files_struct * files)
7cf4dc3c8   Al Viro   move files_struct...
363
  {
7cf4dc3c8   Al Viro   move files_struct...
364
365
366
  	/*
  	 * It is safe to dereference the fd table without RCU or
  	 * ->file_lock because this is the last reference to the
ce08b62d1   Oleg Nesterov   change close_file...
367
  	 * files structure.
7cf4dc3c8   Al Viro   move files_struct...
368
  	 */
ce08b62d1   Oleg Nesterov   change close_file...
369
  	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
9b80a184e   Alexey Dobriyan   fs/file: more uns...
370
  	unsigned int i, j = 0;
ce08b62d1   Oleg Nesterov   change close_file...
371

7cf4dc3c8   Al Viro   move files_struct...
372
373
374
375
376
377
378
379
380
381
382
  	for (;;) {
  		unsigned long set;
  		i = j * BITS_PER_LONG;
  		if (i >= fdt->max_fds)
  			break;
  		set = fdt->open_fds[j++];
  		while (set) {
  			if (set & 1) {
  				struct file * file = xchg(&fdt->fd[i], NULL);
  				if (file) {
  					filp_close(file, files);
388a4c880   Paul E. McKenney   fs: Eliminate con...
383
  					cond_resched();
7cf4dc3c8   Al Viro   move files_struct...
384
385
386
387
388
389
  				}
  			}
  			i++;
  			set >>= 1;
  		}
  	}
ce08b62d1   Oleg Nesterov   change close_file...
390
391
  
  	return fdt;
7cf4dc3c8   Al Viro   move files_struct...
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  }
  
  struct files_struct *get_files_struct(struct task_struct *task)
  {
  	struct files_struct *files;
  
  	task_lock(task);
  	files = task->files;
  	if (files)
  		atomic_inc(&files->count);
  	task_unlock(task);
  
  	return files;
  }
  
  void put_files_struct(struct files_struct *files)
  {
7cf4dc3c8   Al Viro   move files_struct...
409
  	if (atomic_dec_and_test(&files->count)) {
ce08b62d1   Oleg Nesterov   change close_file...
410
  		struct fdtable *fdt = close_files(files);
b9e02af0a   Al Viro   don't bother with...
411
412
413
414
  		/* free the arrays if they are not embedded */
  		if (fdt != &files->fdtab)
  			__free_fdtable(fdt);
  		kmem_cache_free(files_cachep, files);
7cf4dc3c8   Al Viro   move files_struct...
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  	}
  }
  
  void reset_files_struct(struct files_struct *files)
  {
  	struct task_struct *tsk = current;
  	struct files_struct *old;
  
  	old = tsk->files;
  	task_lock(tsk);
  	tsk->files = files;
  	task_unlock(tsk);
  	put_files_struct(old);
  }
  
  void exit_files(struct task_struct *tsk)
  {
  	struct files_struct * files = tsk->files;
  
  	if (files) {
0f2122045   Jens Axboe   io_uring: don't r...
435
  		io_uring_files_cancel(files);
7cf4dc3c8   Al Viro   move files_struct...
436
437
438
439
440
441
  		task_lock(tsk);
  		tsk->files = NULL;
  		task_unlock(tsk);
  		put_files_struct(files);
  	}
  }
f52111b15   Al Viro   [PATCH] take init...
442
443
444
445
446
447
  struct files_struct init_files = {
  	.count		= ATOMIC_INIT(1),
  	.fdt		= &init_files.fdtab,
  	.fdtab		= {
  		.max_fds	= NR_OPEN_DEFAULT,
  		.fd		= &init_files.fd_array[0],
1fd36adcd   David Howells   Replace the fd_se...
448
449
  		.close_on_exec	= init_files.close_on_exec_init,
  		.open_fds	= init_files.open_fds_init,
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
450
  		.full_fds_bits	= init_files.full_fds_bits_init,
f52111b15   Al Viro   [PATCH] take init...
451
  	},
eece09ec2   Thomas Gleixner   locking: Various ...
452
  	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
5704a0681   Shuriyc Chu   fs/file.c: initia...
453
  	.resize_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
f52111b15   Al Viro   [PATCH] take init...
454
  };
1027abe88   Al Viro   [PATCH] merge loc...
455

9b80a184e   Alexey Dobriyan   fs/file: more uns...
456
  static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
457
  {
9b80a184e   Alexey Dobriyan   fs/file: more uns...
458
459
460
  	unsigned int maxfd = fdt->max_fds;
  	unsigned int maxbit = maxfd / BITS_PER_LONG;
  	unsigned int bitbit = start / BITS_PER_LONG;
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
461
462
463
464
465
466
467
468
  
  	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
  	if (bitbit > maxfd)
  		return maxfd;
  	if (bitbit > start)
  		start = bitbit;
  	return find_next_zero_bit(fdt->open_fds, maxfd, start);
  }
1027abe88   Al Viro   [PATCH] merge loc...
469
470
471
  /*
   * allocate a file descriptor, mark it busy.
   */
dcfadfa4e   Al Viro   new helper: __all...
472
473
  int __alloc_fd(struct files_struct *files,
  	       unsigned start, unsigned end, unsigned flags)
1027abe88   Al Viro   [PATCH] merge loc...
474
  {
1027abe88   Al Viro   [PATCH] merge loc...
475
476
477
478
479
480
481
482
483
484
485
486
  	unsigned int fd;
  	int error;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  repeat:
  	fdt = files_fdtable(files);
  	fd = start;
  	if (fd < files->next_fd)
  		fd = files->next_fd;
  
  	if (fd < fdt->max_fds)
f3f86e33d   Linus Torvalds   vfs: Fix patholog...
487
  		fd = find_next_fd(fdt, fd);
1027abe88   Al Viro   [PATCH] merge loc...
488

f33ff9927   Al Viro   take rlimit check...
489
490
491
492
493
494
495
  	/*
  	 * N.B. For clone tasks sharing a files structure, this test
  	 * will limit the total number of files that can be opened.
  	 */
  	error = -EMFILE;
  	if (fd >= end)
  		goto out;
1027abe88   Al Viro   [PATCH] merge loc...
496
497
498
499
500
501
502
503
504
505
506
507
508
  	error = expand_files(files, fd);
  	if (error < 0)
  		goto out;
  
  	/*
  	 * If we needed to expand the fs array we
  	 * might have blocked - try again.
  	 */
  	if (error)
  		goto repeat;
  
  	if (start <= files->next_fd)
  		files->next_fd = fd + 1;
1dce27c5a   David Howells   Wrap accesses to ...
509
  	__set_open_fd(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
510
  	if (flags & O_CLOEXEC)
1dce27c5a   David Howells   Wrap accesses to ...
511
  		__set_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
512
  	else
1dce27c5a   David Howells   Wrap accesses to ...
513
  		__clear_close_on_exec(fd, fdt);
1027abe88   Al Viro   [PATCH] merge loc...
514
515
516
  	error = fd;
  #if 1
  	/* Sanity check */
add1f0995   Paul E. McKenney   fs: Substitute rc...
517
  	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
1027abe88   Al Viro   [PATCH] merge loc...
518
519
520
521
522
523
524
525
526
527
  		printk(KERN_WARNING "alloc_fd: slot %d not NULL!
  ", fd);
  		rcu_assign_pointer(fdt->fd[fd], NULL);
  	}
  #endif
  
  out:
  	spin_unlock(&files->file_lock);
  	return error;
  }
ad47bd725   Al Viro   make expand_files...
528
  static int alloc_fd(unsigned start, unsigned flags)
dcfadfa4e   Al Viro   new helper: __all...
529
530
531
  {
  	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
  }
4022e7af8   Jens Axboe   io_uring: make su...
532
533
534
535
  int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
  {
  	return __alloc_fd(current->files, 0, nofile, flags);
  }
1a7bd2265   Al Viro   make get_unused_f...
536
  int get_unused_fd_flags(unsigned flags)
1027abe88   Al Viro   [PATCH] merge loc...
537
  {
4022e7af8   Jens Axboe   io_uring: make su...
538
  	return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
1027abe88   Al Viro   [PATCH] merge loc...
539
  }
1a7bd2265   Al Viro   make get_unused_f...
540
  EXPORT_SYMBOL(get_unused_fd_flags);
56007cae9   Al Viro   move put_unused_f...
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
  
  static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  {
  	struct fdtable *fdt = files_fdtable(files);
  	__clear_open_fd(fd, fdt);
  	if (fd < files->next_fd)
  		files->next_fd = fd;
  }
  
  void put_unused_fd(unsigned int fd)
  {
  	struct files_struct *files = current->files;
  	spin_lock(&files->file_lock);
  	__put_unused_fd(files, fd);
  	spin_unlock(&files->file_lock);
  }
  
  EXPORT_SYMBOL(put_unused_fd);
  
  /*
   * Install a file pointer in the fd array.
   *
   * The VFS is full of places where we drop the files lock between
   * setting the open_fds bitmap and installing the file in the file
   * array.  At any such point, we are vulnerable to a dup2() race
   * installing a file in the array before us.  We need to detect this and
   * fput() the struct file we are about to overwrite in this case.
   *
   * It should never happen - if we allow dup2() do it, _really_ bad things
   * will follow.
f869e8a7f   Al Viro   expose a low-leve...
571
572
573
574
575
576
577
   *
   * NOTE: __fd_install() variant is really, really low-level; don't
   * use it unless you are forced to by truly lousy API shoved down
   * your throat.  'files' *MUST* be either current->files or obtained
   * by get_files_struct(current) done by whoever had given it to you,
   * or really bad things will happen.  Normally you want to use
   * fd_install() instead.
56007cae9   Al Viro   move put_unused_f...
578
   */
f869e8a7f   Al Viro   expose a low-leve...
579
580
  void __fd_install(struct files_struct *files, unsigned int fd,
  		struct file *file)
56007cae9   Al Viro   move put_unused_f...
581
  {
56007cae9   Al Viro   move put_unused_f...
582
  	struct fdtable *fdt;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
583

8a81252b7   Eric Dumazet   fs/file.c: don't ...
584
  	rcu_read_lock_sched();
c02b1a9b4   Mateusz Guzik   vfs: grab the loc...
585
  	if (unlikely(files->resize_in_progress)) {
8a81252b7   Eric Dumazet   fs/file.c: don't ...
586
  		rcu_read_unlock_sched();
c02b1a9b4   Mateusz Guzik   vfs: grab the loc...
587
588
589
590
591
592
  		spin_lock(&files->file_lock);
  		fdt = files_fdtable(files);
  		BUG_ON(fdt->fd[fd] != NULL);
  		rcu_assign_pointer(fdt->fd[fd], file);
  		spin_unlock(&files->file_lock);
  		return;
8a81252b7   Eric Dumazet   fs/file.c: don't ...
593
594
595
596
  	}
  	/* coupled with smp_wmb() in expand_fdtable() */
  	smp_rmb();
  	fdt = rcu_dereference_sched(files->fdt);
56007cae9   Al Viro   move put_unused_f...
597
598
  	BUG_ON(fdt->fd[fd] != NULL);
  	rcu_assign_pointer(fdt->fd[fd], file);
8a81252b7   Eric Dumazet   fs/file.c: don't ...
599
  	rcu_read_unlock_sched();
56007cae9   Al Viro   move put_unused_f...
600
  }
665906104   Kees Cook   fs: Move __scm_in...
601
602
603
604
  /*
   * This consumes the "file" refcount, so callers should treat it
   * as if they had called fput(file).
   */
f869e8a7f   Al Viro   expose a low-leve...
605
606
607
608
  void fd_install(unsigned int fd, struct file *file)
  {
  	__fd_install(current->files, fd, file);
  }
56007cae9   Al Viro   move put_unused_f...
609
  EXPORT_SYMBOL(fd_install);
0ee8cdfe6   Al Viro   take fget() and f...
610

278a5fbae   Christian Brauner   open: add close_r...
611
  static struct file *pick_file(struct files_struct *files, unsigned fd)
483ce1d4b   Al Viro   take descriptor-r...
612
  {
278a5fbae   Christian Brauner   open: add close_r...
613
  	struct file *file = NULL;
483ce1d4b   Al Viro   take descriptor-r...
614
615
616
617
618
619
620
621
622
623
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (fd >= fdt->max_fds)
  		goto out_unlock;
  	file = fdt->fd[fd];
  	if (!file)
  		goto out_unlock;
  	rcu_assign_pointer(fdt->fd[fd], NULL);
483ce1d4b   Al Viro   take descriptor-r...
624
  	__put_unused_fd(files, fd);
483ce1d4b   Al Viro   take descriptor-r...
625
626
627
  
  out_unlock:
  	spin_unlock(&files->file_lock);
278a5fbae   Christian Brauner   open: add close_r...
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
  	return file;
  }
  
  /*
   * The same warnings as for __alloc_fd()/__fd_install() apply here...
   */
  int __close_fd(struct files_struct *files, unsigned fd)
  {
  	struct file *file;
  
  	file = pick_file(files, fd);
  	if (!file)
  		return -EBADF;
  
  	return filp_close(file, files);
483ce1d4b   Al Viro   take descriptor-r...
643
  }
2ca2a09d6   Dominik Brodowski   fs: add ksys_clos...
644
  EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
483ce1d4b   Al Viro   take descriptor-r...
645

278a5fbae   Christian Brauner   open: add close_r...
646
647
648
649
650
651
652
653
654
  /**
   * __close_range() - Close all file descriptors in a given range.
   *
   * @fd:     starting file descriptor to close
   * @max_fd: last file descriptor to close
   *
   * This closes a range of file descriptors. All file descriptors
   * from @fd up to and including @max_fd are closed.
   */
60997c3d4   Christian Brauner   close_range: add ...
655
  int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
278a5fbae   Christian Brauner   open: add close_r...
656
657
  {
  	unsigned int cur_max;
60997c3d4   Christian Brauner   close_range: add ...
658
659
660
661
662
  	struct task_struct *me = current;
  	struct files_struct *cur_fds = me->files, *fds = NULL;
  
  	if (flags & ~CLOSE_RANGE_UNSHARE)
  		return -EINVAL;
278a5fbae   Christian Brauner   open: add close_r...
663
664
665
666
667
  
  	if (fd > max_fd)
  		return -EINVAL;
  
  	rcu_read_lock();
60997c3d4   Christian Brauner   close_range: add ...
668
  	cur_max = files_fdtable(cur_fds)->max_fds;
278a5fbae   Christian Brauner   open: add close_r...
669
670
671
672
  	rcu_read_unlock();
  
  	/* cap to last valid index into fdtable */
  	cur_max--;
60997c3d4   Christian Brauner   close_range: add ...
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
  	if (flags & CLOSE_RANGE_UNSHARE) {
  		int ret;
  		unsigned int max_unshare_fds = NR_OPEN_MAX;
  
  		/*
  		 * If the requested range is greater than the current maximum,
  		 * we're closing everything so only copy all file descriptors
  		 * beneath the lowest file descriptor.
  		 */
  		if (max_fd >= cur_max)
  			max_unshare_fds = fd;
  
  		ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
  		if (ret)
  			return ret;
  
  		/*
  		 * We used to share our file descriptor table, and have now
  		 * created a private one, make sure we're using it below.
  		 */
  		if (fds)
  			swap(cur_fds, fds);
  	}
278a5fbae   Christian Brauner   open: add close_r...
696
697
698
  	max_fd = min(max_fd, cur_max);
  	while (fd <= max_fd) {
  		struct file *file;
60997c3d4   Christian Brauner   close_range: add ...
699
  		file = pick_file(cur_fds, fd++);
278a5fbae   Christian Brauner   open: add close_r...
700
701
  		if (!file)
  			continue;
60997c3d4   Christian Brauner   close_range: add ...
702
  		filp_close(file, cur_fds);
278a5fbae   Christian Brauner   open: add close_r...
703
704
  		cond_resched();
  	}
60997c3d4   Christian Brauner   close_range: add ...
705
706
707
708
709
710
711
712
713
714
  	if (fds) {
  		/*
  		 * We're done closing the files we were supposed to. Time to install
  		 * the new file descriptor table and drop the old one.
  		 */
  		task_lock(me);
  		me->files = cur_fds;
  		task_unlock(me);
  		put_files_struct(fds);
  	}
278a5fbae   Christian Brauner   open: add close_r...
715
716
  	return 0;
  }
80cd79563   Todd Kjos   binder: fix use-a...
717
  /*
6e802a4ba   Jens Axboe   fs: move filp_clo...
718
719
720
   * variant of __close_fd that gets a ref on the file for later fput.
   * The caller must ensure that filp_close() called on the file, and then
   * an fput().
80cd79563   Todd Kjos   binder: fix use-a...
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
   */
  int __close_fd_get_file(unsigned int fd, struct file **res)
  {
  	struct files_struct *files = current->files;
  	struct file *file;
  	struct fdtable *fdt;
  
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (fd >= fdt->max_fds)
  		goto out_unlock;
  	file = fdt->fd[fd];
  	if (!file)
  		goto out_unlock;
  	rcu_assign_pointer(fdt->fd[fd], NULL);
  	__put_unused_fd(files, fd);
  	spin_unlock(&files->file_lock);
  	get_file(file);
  	*res = file;
6e802a4ba   Jens Axboe   fs: move filp_clo...
740
  	return 0;
80cd79563   Todd Kjos   binder: fix use-a...
741
742
743
744
745
746
  
  out_unlock:
  	spin_unlock(&files->file_lock);
  	*res = NULL;
  	return -ENOENT;
  }
6a6d27de3   Al Viro   take close-on-exe...
747
748
749
750
751
752
  void do_close_on_exec(struct files_struct *files)
  {
  	unsigned i;
  	struct fdtable *fdt;
  
  	/* exec unshares first */
6a6d27de3   Al Viro   take close-on-exe...
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
  	spin_lock(&files->file_lock);
  	for (i = 0; ; i++) {
  		unsigned long set;
  		unsigned fd = i * BITS_PER_LONG;
  		fdt = files_fdtable(files);
  		if (fd >= fdt->max_fds)
  			break;
  		set = fdt->close_on_exec[i];
  		if (!set)
  			continue;
  		fdt->close_on_exec[i] = 0;
  		for ( ; set ; fd++, set >>= 1) {
  			struct file *file;
  			if (!(set & 1))
  				continue;
  			file = fdt->fd[fd];
  			if (!file)
  				continue;
  			rcu_assign_pointer(fdt->fd[fd], NULL);
  			__put_unused_fd(files, fd);
  			spin_unlock(&files->file_lock);
  			filp_close(file, files);
  			cond_resched();
  			spin_lock(&files->file_lock);
  		}
  
  	}
  	spin_unlock(&files->file_lock);
  }
5e876fb43   Sargun Dhillon   vfs, fdtable: Add...
782
783
  static struct file *__fget_files(struct files_struct *files, unsigned int fd,
  				 fmode_t mask, unsigned int refs)
0ee8cdfe6   Al Viro   take fget() and f...
784
  {
1deb46e25   Oleg Nesterov   fs: factor out co...
785
  	struct file *file;
0ee8cdfe6   Al Viro   take fget() and f...
786
787
  
  	rcu_read_lock();
5ba97d283   Eric Dumazet   fs/file.c: __fget...
788
  loop:
0ee8cdfe6   Al Viro   take fget() and f...
789
790
  	file = fcheck_files(files, fd);
  	if (file) {
5ba97d283   Eric Dumazet   fs/file.c: __fget...
791
792
793
794
795
  		/* File object ref couldn't be taken.
  		 * dup2() atomicity guarantee is the reason
  		 * we loop to catch the new file (or NULL pointer)
  		 */
  		if (file->f_mode & mask)
0ee8cdfe6   Al Viro   take fget() and f...
796
  			file = NULL;
091141a42   Jens Axboe   fs: add fget_many...
797
  		else if (!get_file_rcu_many(file, refs))
5ba97d283   Eric Dumazet   fs/file.c: __fget...
798
  			goto loop;
0ee8cdfe6   Al Viro   take fget() and f...
799
800
801
802
803
  	}
  	rcu_read_unlock();
  
  	return file;
  }
5e876fb43   Sargun Dhillon   vfs, fdtable: Add...
804
805
806
807
808
  static inline struct file *__fget(unsigned int fd, fmode_t mask,
  				  unsigned int refs)
  {
  	return __fget_files(current->files, fd, mask, refs);
  }
091141a42   Jens Axboe   fs: add fget_many...
809
810
811
812
  struct file *fget_many(unsigned int fd, unsigned int refs)
  {
  	return __fget(fd, FMODE_PATH, refs);
  }
1deb46e25   Oleg Nesterov   fs: factor out co...
813
814
  struct file *fget(unsigned int fd)
  {
091141a42   Jens Axboe   fs: add fget_many...
815
  	return __fget(fd, FMODE_PATH, 1);
1deb46e25   Oleg Nesterov   fs: factor out co...
816
  }
0ee8cdfe6   Al Viro   take fget() and f...
817
818
819
820
  EXPORT_SYMBOL(fget);
  
  struct file *fget_raw(unsigned int fd)
  {
091141a42   Jens Axboe   fs: add fget_many...
821
  	return __fget(fd, 0, 1);
0ee8cdfe6   Al Viro   take fget() and f...
822
  }
0ee8cdfe6   Al Viro   take fget() and f...
823
  EXPORT_SYMBOL(fget_raw);
5e876fb43   Sargun Dhillon   vfs, fdtable: Add...
824
825
826
827
828
829
830
831
832
833
834
  struct file *fget_task(struct task_struct *task, unsigned int fd)
  {
  	struct file *file = NULL;
  
  	task_lock(task);
  	if (task->files)
  		file = __fget_files(task->files, fd, 0, 1);
  	task_unlock(task);
  
  	return file;
  }
0ee8cdfe6   Al Viro   take fget() and f...
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
  /*
   * Lightweight file lookup - no refcnt increment if fd table isn't shared.
   *
   * You can use this instead of fget if you satisfy all of the following
   * conditions:
   * 1) You must call fput_light before exiting the syscall and returning control
   *    to userspace (i.e. you cannot remember the returned struct file * after
   *    returning to userspace).
   * 2) You must not call filp_close on the returned struct file * in between
   *    calls to fget_light and fput_light.
   * 3) You must not clone the current task in between the calls to fget_light
   *    and fput_light.
   *
   * The fput_needed flag returned by fget_light should be passed to the
   * corresponding fput_light.
   */
bd2a31d52   Al Viro   get rid of fget_l...
851
  static unsigned long __fget_light(unsigned int fd, fmode_t mask)
0ee8cdfe6   Al Viro   take fget() and f...
852
  {
0ee8cdfe6   Al Viro   take fget() and f...
853
  	struct files_struct *files = current->files;
ad4618344   Oleg Nesterov   fs: factor out co...
854
  	struct file *file;
0ee8cdfe6   Al Viro   take fget() and f...
855

0ee8cdfe6   Al Viro   take fget() and f...
856
  	if (atomic_read(&files->count) == 1) {
a8d4b8345   Oleg Nesterov   introduce __fchec...
857
  		file = __fcheck_files(files, fd);
bd2a31d52   Al Viro   get rid of fget_l...
858
859
860
  		if (!file || unlikely(file->f_mode & mask))
  			return 0;
  		return (unsigned long)file;
0ee8cdfe6   Al Viro   take fget() and f...
861
  	} else {
091141a42   Jens Axboe   fs: add fget_many...
862
  		file = __fget(fd, mask, 1);
bd2a31d52   Al Viro   get rid of fget_l...
863
864
865
  		if (!file)
  			return 0;
  		return FDPUT_FPUT | (unsigned long)file;
0ee8cdfe6   Al Viro   take fget() and f...
866
  	}
0ee8cdfe6   Al Viro   take fget() and f...
867
  }
bd2a31d52   Al Viro   get rid of fget_l...
868
  unsigned long __fdget(unsigned int fd)
ad4618344   Oleg Nesterov   fs: factor out co...
869
  {
bd2a31d52   Al Viro   get rid of fget_l...
870
  	return __fget_light(fd, FMODE_PATH);
ad4618344   Oleg Nesterov   fs: factor out co...
871
  }
bd2a31d52   Al Viro   get rid of fget_l...
872
  EXPORT_SYMBOL(__fdget);
0ee8cdfe6   Al Viro   take fget() and f...
873

bd2a31d52   Al Viro   get rid of fget_l...
874
  unsigned long __fdget_raw(unsigned int fd)
0ee8cdfe6   Al Viro   take fget() and f...
875
  {
bd2a31d52   Al Viro   get rid of fget_l...
876
  	return __fget_light(fd, 0);
0ee8cdfe6   Al Viro   take fget() and f...
877
  }
fe17f22d7   Al Viro   take purely descr...
878

bd2a31d52   Al Viro   get rid of fget_l...
879
880
  unsigned long __fdget_pos(unsigned int fd)
  {
99aea6813   Eric Biggers   vfs: Don't let __...
881
882
  	unsigned long v = __fdget(fd);
  	struct file *file = (struct file *)(v & ~3);
bd2a31d52   Al Viro   get rid of fget_l...
883

2be7d348f   Linus Torvalds   Revert "vfs: prop...
884
  	if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
bd2a31d52   Al Viro   get rid of fget_l...
885
886
887
888
889
  		if (file_count(file) > 1) {
  			v |= FDPUT_POS_UNLOCK;
  			mutex_lock(&file->f_pos_lock);
  		}
  	}
99aea6813   Eric Biggers   vfs: Don't let __...
890
  	return v;
bd2a31d52   Al Viro   get rid of fget_l...
891
  }
63b6df141   Al Viro   give readdir(2)/g...
892
893
894
895
  void __f_unlock_pos(struct file *f)
  {
  	mutex_unlock(&f->f_pos_lock);
  }
bd2a31d52   Al Viro   get rid of fget_l...
896
897
898
899
900
  /*
   * We only lock f_pos if we have threads or if the file might be
   * shared with another process. In both cases we'll have an elevated
   * file count (done either by fdget() or by fork()).
   */
fe17f22d7   Al Viro   take purely descr...
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
  void set_close_on_exec(unsigned int fd, int flag)
  {
  	struct files_struct *files = current->files;
  	struct fdtable *fdt;
  	spin_lock(&files->file_lock);
  	fdt = files_fdtable(files);
  	if (flag)
  		__set_close_on_exec(fd, fdt);
  	else
  		__clear_close_on_exec(fd, fdt);
  	spin_unlock(&files->file_lock);
  }
  
  bool get_close_on_exec(unsigned int fd)
  {
  	struct files_struct *files = current->files;
  	struct fdtable *fdt;
  	bool res;
  	rcu_read_lock();
  	fdt = files_fdtable(files);
  	res = close_on_exec(fd, fdt);
  	rcu_read_unlock();
  	return res;
  }
8280d1617   Al Viro   new helper: repla...
925
926
  static int do_dup2(struct files_struct *files,
  	struct file *file, unsigned fd, unsigned flags)
e983094d6   Al Viro   missing annotatio...
927
  __releases(&files->file_lock)
fe17f22d7   Al Viro   take purely descr...
928
  {
8280d1617   Al Viro   new helper: repla...
929
  	struct file *tofree;
fe17f22d7   Al Viro   take purely descr...
930
  	struct fdtable *fdt;
fe17f22d7   Al Viro   take purely descr...
931
932
933
934
935
936
937
938
939
940
941
942
943
944
  	/*
  	 * We need to detect attempts to do dup2() over allocated but still
  	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
  	 * extra work in their equivalent of fget() - they insert struct
  	 * file immediately after grabbing descriptor, mark it larval if
  	 * more work (e.g. actual opening) is needed and make sure that
  	 * fget() treats larval files as absent.  Potentially interesting,
  	 * but while extra work in fget() is trivial, locking implications
  	 * and amount of surgery on open()-related paths in VFS are not.
  	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
  	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
  	 * scope of POSIX or SUS, since neither considers shared descriptor
  	 * tables and this condition does not arise without those.
  	 */
fe17f22d7   Al Viro   take purely descr...
945
  	fdt = files_fdtable(files);
8280d1617   Al Viro   new helper: repla...
946
947
948
  	tofree = fdt->fd[fd];
  	if (!tofree && fd_is_open(fd, fdt))
  		goto Ebusy;
fe17f22d7   Al Viro   take purely descr...
949
  	get_file(file);
8280d1617   Al Viro   new helper: repla...
950
951
  	rcu_assign_pointer(fdt->fd[fd], file);
  	__set_open_fd(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
952
  	if (flags & O_CLOEXEC)
8280d1617   Al Viro   new helper: repla...
953
  		__set_close_on_exec(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
954
  	else
8280d1617   Al Viro   new helper: repla...
955
  		__clear_close_on_exec(fd, fdt);
fe17f22d7   Al Viro   take purely descr...
956
957
958
959
  	spin_unlock(&files->file_lock);
  
  	if (tofree)
  		filp_close(tofree, files);
8280d1617   Al Viro   new helper: repla...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
  	return fd;
  
  Ebusy:
  	spin_unlock(&files->file_lock);
  	return -EBUSY;
  }
  
  int replace_fd(unsigned fd, struct file *file, unsigned flags)
  {
  	int err;
  	struct files_struct *files = current->files;
  
  	if (!file)
  		return __close_fd(files, fd);
  
  	if (fd >= rlimit(RLIMIT_NOFILE))
08f05c497   Al Viro   Return the right ...
976
  		return -EBADF;
8280d1617   Al Viro   new helper: repla...
977
978
979
980
981
982
983
984
985
986
987
  
  	spin_lock(&files->file_lock);
  	err = expand_files(files, fd);
  	if (unlikely(err < 0))
  		goto out_unlock;
  	return do_dup2(files, file, fd, flags);
  
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return err;
  }
665906104   Kees Cook   fs: Move __scm_in...
988
989
990
  /**
   * __receive_fd() - Install received file into file descriptor table
   *
173817151   Kees Cook   fs: Expand __rece...
991
   * @fd: fd to install into (if negative, a new fd will be allocated)
665906104   Kees Cook   fs: Move __scm_in...
992
993
994
995
996
   * @file: struct file that was received from another process
   * @ufd: __user pointer to write new fd number to
   * @o_flags: the O_* flags to apply to the new fd entry
   *
   * Installs a received file into the file descriptor table, with appropriate
deefa7f35   Kees Cook   fs: Add receive_f...
997
998
   * checks and count updates. Optionally writes the fd number to userspace, if
   * @ufd is non-NULL.
665906104   Kees Cook   fs: Move __scm_in...
999
1000
1001
1002
   *
   * This helper handles its own reference counting of the incoming
   * struct file.
   *
deefa7f35   Kees Cook   fs: Add receive_f...
1003
   * Returns newly install fd or -ve on error.
665906104   Kees Cook   fs: Move __scm_in...
1004
   */
173817151   Kees Cook   fs: Expand __rece...
1005
  int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags)
665906104   Kees Cook   fs: Move __scm_in...
1006
1007
1008
1009
1010
1011
1012
  {
  	int new_fd;
  	int error;
  
  	error = security_file_receive(file);
  	if (error)
  		return error;
173817151   Kees Cook   fs: Expand __rece...
1013
1014
1015
1016
1017
1018
1019
  	if (fd < 0) {
  		new_fd = get_unused_fd_flags(o_flags);
  		if (new_fd < 0)
  			return new_fd;
  	} else {
  		new_fd = fd;
  	}
665906104   Kees Cook   fs: Move __scm_in...
1020

deefa7f35   Kees Cook   fs: Add receive_f...
1021
1022
1023
  	if (ufd) {
  		error = put_user(new_fd, ufd);
  		if (error) {
173817151   Kees Cook   fs: Expand __rece...
1024
1025
  			if (fd < 0)
  				put_unused_fd(new_fd);
deefa7f35   Kees Cook   fs: Add receive_f...
1026
1027
  			return error;
  		}
665906104   Kees Cook   fs: Move __scm_in...
1028
  	}
173817151   Kees Cook   fs: Expand __rece...
1029
1030
1031
1032
1033
1034
1035
  	if (fd < 0) {
  		fd_install(new_fd, get_file(file));
  	} else {
  		error = replace_fd(new_fd, file, o_flags);
  		if (error)
  			return error;
  	}
665906104   Kees Cook   fs: Move __scm_in...
1036
1037
  	/* Bump the sock usage counts, if any. */
  	__receive_sock(file);
deefa7f35   Kees Cook   fs: Add receive_f...
1038
  	return new_fd;
665906104   Kees Cook   fs: Move __scm_in...
1039
  }
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
1040
  static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
8280d1617   Al Viro   new helper: repla...
1041
1042
1043
1044
1045
1046
1047
  {
  	int err = -EBADF;
  	struct file *file;
  	struct files_struct *files = current->files;
  
  	if ((flags & ~O_CLOEXEC) != 0)
  		return -EINVAL;
aed976475   Richard W.M. Jones   dup3: Return an e...
1048
1049
  	if (unlikely(oldfd == newfd))
  		return -EINVAL;
8280d1617   Al Viro   new helper: repla...
1050
  	if (newfd >= rlimit(RLIMIT_NOFILE))
08f05c497   Al Viro   Return the right ...
1051
  		return -EBADF;
8280d1617   Al Viro   new helper: repla...
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
  
  	spin_lock(&files->file_lock);
  	err = expand_files(files, newfd);
  	file = fcheck(oldfd);
  	if (unlikely(!file))
  		goto Ebadf;
  	if (unlikely(err < 0)) {
  		if (err == -EMFILE)
  			goto Ebadf;
  		goto out_unlock;
  	}
  	return do_dup2(files, file, newfd, flags);
fe17f22d7   Al Viro   take purely descr...
1064
1065
1066
1067
1068
1069
1070
  
  Ebadf:
  	err = -EBADF;
  out_unlock:
  	spin_unlock(&files->file_lock);
  	return err;
  }
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
1071
1072
1073
1074
  SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
  {
  	return ksys_dup3(oldfd, newfd, flags);
  }
fe17f22d7   Al Viro   take purely descr...
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
  SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
  {
  	if (unlikely(newfd == oldfd)) { /* corner case */
  		struct files_struct *files = current->files;
  		int retval = oldfd;
  
  		rcu_read_lock();
  		if (!fcheck_files(files, oldfd))
  			retval = -EBADF;
  		rcu_read_unlock();
  		return retval;
  	}
c7248321a   Dominik Brodowski   fs: add ksys_dup{...
1087
  	return ksys_dup3(oldfd, newfd, 0);
fe17f22d7   Al Viro   take purely descr...
1088
  }
bc1cd99a9   Christoph Hellwig   fs: remove ksys_dup
1089
  SYSCALL_DEFINE1(dup, unsigned int, fildes)
fe17f22d7   Al Viro   take purely descr...
1090
1091
1092
1093
1094
  {
  	int ret = -EBADF;
  	struct file *file = fget_raw(fildes);
  
  	if (file) {
8d10a0358   Yann Droneaud   fs/file.c: replac...
1095
  		ret = get_unused_fd_flags(0);
fe17f22d7   Al Viro   take purely descr...
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
  		if (ret >= 0)
  			fd_install(ret, file);
  		else
  			fput(file);
  	}
  	return ret;
  }
  
  int f_dupfd(unsigned int from, struct file *file, unsigned flags)
  {
  	int err;
  	if (from >= rlimit(RLIMIT_NOFILE))
  		return -EINVAL;
  	err = alloc_fd(from, flags);
  	if (err >= 0) {
  		get_file(file);
  		fd_install(err, file);
  	}
  	return err;
  }
c3c073f80   Al Viro   new helper: itera...
1116
1117
1118
1119
1120
1121
  
  int iterate_fd(struct files_struct *files, unsigned n,
  		int (*f)(const void *, struct file *, unsigned),
  		const void *p)
  {
  	struct fdtable *fdt;
c3c073f80   Al Viro   new helper: itera...
1122
1123
1124
1125
  	int res = 0;
  	if (!files)
  		return 0;
  	spin_lock(&files->file_lock);
a77cfcb42   Al Viro   fix off-by-one in...
1126
1127
1128
1129
1130
1131
1132
1133
  	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
  		struct file *file;
  		file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
  		if (!file)
  			continue;
  		res = f(p, file, n);
  		if (res)
  			break;
c3c073f80   Al Viro   new helper: itera...
1134
1135
1136
1137
1138
  	}
  	spin_unlock(&files->file_lock);
  	return res;
  }
  EXPORT_SYMBOL(iterate_fd);