Blame view
fs/file.c
23.5 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 |
/* * linux/fs/file.c * * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes * * Manage the dynamic fd arrays in the process files_struct. */ |
fe17f22d7 take purely descr... |
8 |
#include <linux/syscalls.h> |
630d9c472 fs: reduce the us... |
9 |
#include <linux/export.h> |
1da177e4c Linux-2.6.12-rc2 |
10 11 |
#include <linux/fs.h> #include <linux/mm.h> |
6d4831c28 vfs: avoid large ... |
12 |
#include <linux/mmzone.h> |
1da177e4c Linux-2.6.12-rc2 |
13 |
#include <linux/time.h> |
d43c36dc6 headers: remove s... |
14 |
#include <linux/sched.h> |
1da177e4c Linux-2.6.12-rc2 |
15 16 17 |
#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/file.h> |
9f3acc314 [PATCH] split lin... |
18 |
#include <linux/fdtable.h> |
1da177e4c Linux-2.6.12-rc2 |
19 |
#include <linux/bitops.h> |
ab2af1f50 [PATCH] files: fi... |
20 21 22 23 24 25 26 27 |
#include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/workqueue.h> struct fdtable_defer { spinlock_t lock; struct work_struct wq; |
ab2af1f50 [PATCH] files: fi... |
28 29 |
struct fdtable *next; }; |
9cfe015aa get rid of NR_OPE... |
30 |
int sysctl_nr_open __read_mostly = 1024*1024; |
eceea0b3d [PATCH] avoid mul... |
31 32 |
int sysctl_nr_open_min = BITS_PER_LONG; int sysctl_nr_open_max = 1024 * 1024; /* raised later */ |
9cfe015aa get rid of NR_OPE... |
33 |
|
ab2af1f50 [PATCH] files: fi... |
34 35 36 37 38 39 40 |
/* * We use this list to defer free fdtables that have vmalloced * sets/arrays. By keeping a per-cpu list, we avoid having to embed * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in * this per-task structure. */ static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
1da177e4c Linux-2.6.12-rc2 |
41 |
|
1fd36adcd Replace the fd_se... |
42 |
static void *alloc_fdmem(size_t size) |
1da177e4c Linux-2.6.12-rc2 |
43 |
{ |
6d4831c28 vfs: avoid large ... |
44 45 46 47 48 49 50 51 52 |
/* * Very large allocations can stress page reclaim, so fall back to * vmalloc() if the allocation size will be considered "large" by the VM. */ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); if (data != NULL) return data; } |
a892e2d7d vfs: use kmalloc(... |
53 |
return vmalloc(size); |
1da177e4c Linux-2.6.12-rc2 |
54 |
} |
a892e2d7d vfs: use kmalloc(... |
55 |
static void free_fdmem(void *ptr) |
1da177e4c Linux-2.6.12-rc2 |
56 |
{ |
a892e2d7d vfs: use kmalloc(... |
57 |
is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); |
1da177e4c Linux-2.6.12-rc2 |
58 |
} |
a892e2d7d vfs: use kmalloc(... |
59 |
static void __free_fdtable(struct fdtable *fdt) |
1da177e4c Linux-2.6.12-rc2 |
60 |
{ |
a892e2d7d vfs: use kmalloc(... |
61 62 63 |
free_fdmem(fdt->fd); free_fdmem(fdt->open_fds); kfree(fdt); |
ab2af1f50 [PATCH] files: fi... |
64 |
} |
1da177e4c Linux-2.6.12-rc2 |
65 |
|
65f27f384 WorkStruct: Pass ... |
66 |
static void free_fdtable_work(struct work_struct *work) |
ab2af1f50 [PATCH] files: fi... |
67 |
{ |
65f27f384 WorkStruct: Pass ... |
68 69 |
struct fdtable_defer *f = container_of(work, struct fdtable_defer, wq); |
ab2af1f50 [PATCH] files: fi... |
70 |
struct fdtable *fdt; |
1da177e4c Linux-2.6.12-rc2 |
71 |
|
ab2af1f50 [PATCH] files: fi... |
72 73 74 75 76 77 |
spin_lock_bh(&f->lock); fdt = f->next; f->next = NULL; spin_unlock_bh(&f->lock); while(fdt) { struct fdtable *next = fdt->next; |
a892e2d7d vfs: use kmalloc(... |
78 79 |
__free_fdtable(fdt); |
ab2af1f50 [PATCH] files: fi... |
80 81 82 |
fdt = next; } } |
1da177e4c Linux-2.6.12-rc2 |
83 |
|
7cf4dc3c8 move files_struct... |
84 |
static void free_fdtable_rcu(struct rcu_head *rcu) |
ab2af1f50 [PATCH] files: fi... |
85 86 |
{ struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); |
ab2af1f50 [PATCH] files: fi... |
87 |
struct fdtable_defer *fddef; |
1da177e4c Linux-2.6.12-rc2 |
88 |
|
ab2af1f50 [PATCH] files: fi... |
89 |
BUG_ON(!fdt); |
1983e781d trim free_fdtable... |
90 |
BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); |
ab2af1f50 [PATCH] files: fi... |
91 |
|
a892e2d7d vfs: use kmalloc(... |
92 |
if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { |
ab2af1f50 [PATCH] files: fi... |
93 |
kfree(fdt->fd); |
5466b456e [PATCH] fdtable: ... |
94 |
kfree(fdt->open_fds); |
ab2af1f50 [PATCH] files: fi... |
95 |
kfree(fdt); |
1da177e4c Linux-2.6.12-rc2 |
96 |
} else { |
ab2af1f50 [PATCH] files: fi... |
97 98 99 100 |
fddef = &get_cpu_var(fdtable_defer_list); spin_lock(&fddef->lock); fdt->next = fddef->next; fddef->next = fdt; |
593be07ae [PATCH] file: kil... |
101 102 |
/* vmallocs are handled from the workqueue context */ schedule_work(&fddef->wq); |
ab2af1f50 [PATCH] files: fi... |
103 104 |
spin_unlock(&fddef->lock); put_cpu_var(fdtable_defer_list); |
1da177e4c Linux-2.6.12-rc2 |
105 |
} |
ab2af1f50 [PATCH] files: fi... |
106 |
} |
ab2af1f50 [PATCH] files: fi... |
107 108 109 110 |
/* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. */ |
5466b456e [PATCH] fdtable: ... |
111 |
static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) |
ab2af1f50 [PATCH] files: fi... |
112 |
{ |
5466b456e [PATCH] fdtable: ... |
113 |
unsigned int cpy, set; |
ab2af1f50 [PATCH] files: fi... |
114 |
|
5466b456e [PATCH] fdtable: ... |
115 |
BUG_ON(nfdt->max_fds < ofdt->max_fds); |
5466b456e [PATCH] fdtable: ... |
116 117 118 119 120 121 122 123 124 125 126 127 |
cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); memcpy(nfdt->fd, ofdt->fd, cpy); memset((char *)(nfdt->fd) + cpy, 0, set); cpy = ofdt->max_fds / BITS_PER_BYTE; set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE; memcpy(nfdt->open_fds, ofdt->open_fds, cpy); memset((char *)(nfdt->open_fds) + cpy, 0, set); memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); memset((char *)(nfdt->close_on_exec) + cpy, 0, set); |
1da177e4c Linux-2.6.12-rc2 |
128 |
} |
5466b456e [PATCH] fdtable: ... |
129 |
static struct fdtable * alloc_fdtable(unsigned int nr) |
1da177e4c Linux-2.6.12-rc2 |
130 |
{ |
5466b456e [PATCH] fdtable: ... |
131 |
struct fdtable *fdt; |
1fd36adcd Replace the fd_se... |
132 |
void *data; |
1da177e4c Linux-2.6.12-rc2 |
133 |
|
ab2af1f50 [PATCH] files: fi... |
134 |
/* |
5466b456e [PATCH] fdtable: ... |
135 136 137 138 139 |
* Figure out how many fds we actually want to support in this fdtable. * Allocation steps are keyed to the size of the fdarray, since it * grows far faster than any of the other dynamic data. We try to fit * the fdarray into comfortable page-tuned chunks: starting at 1024B * and growing in powers of two from there on. |
ab2af1f50 [PATCH] files: fi... |
140 |
*/ |
5466b456e [PATCH] fdtable: ... |
141 142 143 |
nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); |
5c598b342 [PATCH] fix sysct... |
144 145 146 147 148 149 150 151 152 153 |
/* * Note that this can drive nr *below* what we had passed if sysctl_nr_open * had been set lower between the check in expand_files() and here. Deal * with that in caller, it's cheaper that way. * * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise * bitmaps handling below becomes unpleasant, to put it mildly... */ if (unlikely(nr > sysctl_nr_open)) nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; |
bbea9f696 [PATCH] fdtable: ... |
154 |
|
5466b456e [PATCH] fdtable: ... |
155 156 |
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) |
bbea9f696 [PATCH] fdtable: ... |
157 |
goto out; |
5466b456e [PATCH] fdtable: ... |
158 159 160 161 |
fdt->max_fds = nr; data = alloc_fdmem(nr * sizeof(struct file *)); if (!data) goto out_fdt; |
1fd36adcd Replace the fd_se... |
162 163 164 |
fdt->fd = data; data = alloc_fdmem(max_t(size_t, |
5466b456e [PATCH] fdtable: ... |
165 166 167 |
2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); if (!data) goto out_arr; |
1fd36adcd Replace the fd_se... |
168 |
fdt->open_fds = data; |
5466b456e [PATCH] fdtable: ... |
169 |
data += nr / BITS_PER_BYTE; |
1fd36adcd Replace the fd_se... |
170 |
fdt->close_on_exec = data; |
5466b456e [PATCH] fdtable: ... |
171 |
fdt->next = NULL; |
ab2af1f50 [PATCH] files: fi... |
172 |
return fdt; |
5466b456e [PATCH] fdtable: ... |
173 174 |
out_arr: |
a892e2d7d vfs: use kmalloc(... |
175 |
free_fdmem(fdt->fd); |
5466b456e [PATCH] fdtable: ... |
176 |
out_fdt: |
ab2af1f50 [PATCH] files: fi... |
177 |
kfree(fdt); |
5466b456e [PATCH] fdtable: ... |
178 |
out: |
ab2af1f50 [PATCH] files: fi... |
179 180 |
return NULL; } |
1da177e4c Linux-2.6.12-rc2 |
181 |
|
ab2af1f50 [PATCH] files: fi... |
182 |
/* |
74d392aaa [PATCH] Clean up ... |
183 184 185 186 187 |
* Expand the file descriptor table. * This function will allocate a new fdtable and both fd array and fdset, of * the given size. * Return <0 error code on error; 1 on successful completion. * The files->file_lock should be held on entry, and will be held on exit. |
ab2af1f50 [PATCH] files: fi... |
188 189 190 191 192 |
*/ static int expand_fdtable(struct files_struct *files, int nr) __releases(files->file_lock) __acquires(files->file_lock) { |
74d392aaa [PATCH] Clean up ... |
193 |
struct fdtable *new_fdt, *cur_fdt; |
ab2af1f50 [PATCH] files: fi... |
194 195 |
spin_unlock(&files->file_lock); |
74d392aaa [PATCH] Clean up ... |
196 |
new_fdt = alloc_fdtable(nr); |
ab2af1f50 [PATCH] files: fi... |
197 |
spin_lock(&files->file_lock); |
74d392aaa [PATCH] Clean up ... |
198 199 |
if (!new_fdt) return -ENOMEM; |
ab2af1f50 [PATCH] files: fi... |
200 |
/* |
5c598b342 [PATCH] fix sysct... |
201 202 203 204 |
* extremely unlikely race - sysctl_nr_open decreased between the check in * caller and alloc_fdtable(). Cheaper to catch it here... */ if (unlikely(new_fdt->max_fds <= nr)) { |
a892e2d7d vfs: use kmalloc(... |
205 |
__free_fdtable(new_fdt); |
5c598b342 [PATCH] fix sysct... |
206 207 208 |
return -EMFILE; } /* |
74d392aaa [PATCH] Clean up ... |
209 210 |
* Check again since another task may have expanded the fd table while * we dropped the lock |
ab2af1f50 [PATCH] files: fi... |
211 |
*/ |
74d392aaa [PATCH] Clean up ... |
212 |
cur_fdt = files_fdtable(files); |
bbea9f696 [PATCH] fdtable: ... |
213 |
if (nr >= cur_fdt->max_fds) { |
74d392aaa [PATCH] Clean up ... |
214 215 216 |
/* Continue as planned */ copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); |
4fd45812c [PATCH] fdtable: ... |
217 |
if (cur_fdt->max_fds > NR_OPEN_DEFAULT) |
1983e781d trim free_fdtable... |
218 |
call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
ab2af1f50 [PATCH] files: fi... |
219 |
} else { |
74d392aaa [PATCH] Clean up ... |
220 |
/* Somebody else expanded, so undo our attempt */ |
a892e2d7d vfs: use kmalloc(... |
221 |
__free_fdtable(new_fdt); |
ab2af1f50 [PATCH] files: fi... |
222 |
} |
74d392aaa [PATCH] Clean up ... |
223 |
return 1; |
1da177e4c Linux-2.6.12-rc2 |
224 225 226 227 |
} /* * Expand files. |
74d392aaa [PATCH] Clean up ... |
228 229 230 231 232 |
* This function will expand the file structures, if the requested size exceeds * the current capacity and there is room for expansion. * Return <0 error code on error; 0 when nothing done; 1 when files were * expanded and execution may have blocked. * The files->file_lock should be held on entry, and will be held on exit. |
1da177e4c Linux-2.6.12-rc2 |
233 |
*/ |
ad47bd725 make expand_files... |
234 |
static int expand_files(struct files_struct *files, int nr) |
1da177e4c Linux-2.6.12-rc2 |
235 |
{ |
badf16621 [PATCH] files: br... |
236 |
struct fdtable *fdt; |
1da177e4c Linux-2.6.12-rc2 |
237 |
|
badf16621 [PATCH] files: br... |
238 |
fdt = files_fdtable(files); |
4e1e018ec [PATCH] fix RLIM_... |
239 |
|
74d392aaa [PATCH] Clean up ... |
240 |
/* Do we need to expand? */ |
bbea9f696 [PATCH] fdtable: ... |
241 |
if (nr < fdt->max_fds) |
74d392aaa [PATCH] Clean up ... |
242 |
return 0; |
4e1e018ec [PATCH] fix RLIM_... |
243 |
|
74d392aaa [PATCH] Clean up ... |
244 |
/* Can we expand? */ |
9cfe015aa get rid of NR_OPE... |
245 |
if (nr >= sysctl_nr_open) |
74d392aaa [PATCH] Clean up ... |
246 247 248 249 |
return -EMFILE; /* All good, so we try */ return expand_fdtable(files, nr); |
1da177e4c Linux-2.6.12-rc2 |
250 |
} |
ab2af1f50 [PATCH] files: fi... |
251 |
|
b8318b01a take __{set,clear... |
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
static inline void __set_close_on_exec(int fd, struct fdtable *fdt) { __set_bit(fd, fdt->close_on_exec); } static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) { __clear_bit(fd, fdt->close_on_exec); } static inline void __set_open_fd(int fd, struct fdtable *fdt) { __set_bit(fd, fdt->open_fds); } static inline void __clear_open_fd(int fd, struct fdtable *fdt) { __clear_bit(fd, fdt->open_fds); } |
02afc6267 [PATCH] dup_fd() ... |
271 272 273 274 275 276 |
static int count_open_files(struct fdtable *fdt) { int size = fdt->max_fds; int i; /* Find the last open fd */ |
1fd36adcd Replace the fd_se... |
277 278 |
for (i = size / BITS_PER_LONG; i > 0; ) { if (fdt->open_fds[--i]) |
02afc6267 [PATCH] dup_fd() ... |
279 280 |
break; } |
1fd36adcd Replace the fd_se... |
281 |
i = (i + 1) * BITS_PER_LONG; |
02afc6267 [PATCH] dup_fd() ... |
282 283 |
return i; } |
02afc6267 [PATCH] dup_fd() ... |
284 285 286 287 288 289 290 291 292 293 294 295 296 |
/* * Allocate a new files structure and copy contents from the * passed in files structure. * errorp will be valid only when the returned files_struct is NULL. */ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) { struct files_struct *newf; struct file **old_fds, **new_fds; int open_files, size, i; struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; |
afbec7fff [PATCH] dup_fd() ... |
297 |
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); |
02afc6267 [PATCH] dup_fd() ... |
298 299 |
if (!newf) goto out; |
afbec7fff [PATCH] dup_fd() ... |
300 301 302 303 304 305 |
atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; |
1fd36adcd Replace the fd_se... |
306 307 |
new_fdt->close_on_exec = newf->close_on_exec_init; new_fdt->open_fds = newf->open_fds_init; |
afbec7fff [PATCH] dup_fd() ... |
308 |
new_fdt->fd = &newf->fd_array[0]; |
afbec7fff [PATCH] dup_fd() ... |
309 |
new_fdt->next = NULL; |
02afc6267 [PATCH] dup_fd() ... |
310 311 |
spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); |
02afc6267 [PATCH] dup_fd() ... |
312 313 314 315 |
open_files = count_open_files(old_fdt); /* * Check whether we need to allocate a larger fd array and fd set. |
02afc6267 [PATCH] dup_fd() ... |
316 |
*/ |
adbecb128 [PATCH] dup_fd() ... |
317 |
while (unlikely(open_files > new_fdt->max_fds)) { |
02afc6267 [PATCH] dup_fd() ... |
318 |
spin_unlock(&oldf->file_lock); |
9dec3c4d3 [PATCH] dup_fd() ... |
319 |
|
a892e2d7d vfs: use kmalloc(... |
320 321 |
if (new_fdt != &newf->fdtab) __free_fdtable(new_fdt); |
adbecb128 [PATCH] dup_fd() ... |
322 |
|
9dec3c4d3 [PATCH] dup_fd() ... |
323 324 325 326 327 328 329 330 |
new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { *errorp = -ENOMEM; goto out_release; } /* beyond sysctl_nr_open; nothing to do */ if (unlikely(new_fdt->max_fds < open_files)) { |
a892e2d7d vfs: use kmalloc(... |
331 |
__free_fdtable(new_fdt); |
9dec3c4d3 [PATCH] dup_fd() ... |
332 |
*errorp = -EMFILE; |
02afc6267 [PATCH] dup_fd() ... |
333 |
goto out_release; |
9dec3c4d3 [PATCH] dup_fd() ... |
334 |
} |
9dec3c4d3 [PATCH] dup_fd() ... |
335 |
|
02afc6267 [PATCH] dup_fd() ... |
336 337 338 339 340 341 342 |
/* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need * the latest pointer. */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); |
adbecb128 [PATCH] dup_fd() ... |
343 |
open_files = count_open_files(old_fdt); |
02afc6267 [PATCH] dup_fd() ... |
344 345 346 347 |
} old_fds = old_fdt->fd; new_fds = new_fdt->fd; |
1fd36adcd Replace the fd_se... |
348 349 |
memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); |
02afc6267 [PATCH] dup_fd() ... |
350 351 352 353 354 355 356 357 358 359 360 361 |
for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; if (f) { get_file(f); } else { /* * The fd may be claimed in the fd bitmap but not yet * instantiated in the files array if a sibling thread * is partway through open(). So make sure that this * fd is available to the new process. */ |
1dce27c5a Wrap accesses to ... |
362 |
__clear_open_fd(open_files - i, new_fdt); |
02afc6267 [PATCH] dup_fd() ... |
363 364 365 366 367 368 369 370 371 372 373 374 |
} rcu_assign_pointer(*new_fds++, f); } spin_unlock(&oldf->file_lock); /* compute the remainder to be cleared */ size = (new_fdt->max_fds - open_files) * sizeof(struct file *); /* This is long word aligned thus could use a optimized version */ memset(new_fds, 0, size); if (new_fdt->max_fds > open_files) { |
1fd36adcd Replace the fd_se... |
375 376 |
int left = (new_fdt->max_fds - open_files) / 8; int start = open_files / BITS_PER_LONG; |
02afc6267 [PATCH] dup_fd() ... |
377 |
|
1fd36adcd Replace the fd_se... |
378 379 |
memset(&new_fdt->open_fds[start], 0, left); memset(&new_fdt->close_on_exec[start], 0, left); |
02afc6267 [PATCH] dup_fd() ... |
380 |
} |
afbec7fff [PATCH] dup_fd() ... |
381 |
rcu_assign_pointer(newf->fdt, new_fdt); |
02afc6267 [PATCH] dup_fd() ... |
382 383 384 385 386 387 388 |
return newf; out_release: kmem_cache_free(files_cachep, newf); out: return NULL; } |
7cf4dc3c8 move files_struct... |
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 |
static void close_files(struct files_struct * files) { int i, j; struct fdtable *fdt; j = 0; /* * It is safe to dereference the fd table without RCU or * ->file_lock because this is the last reference to the * files structure. But use RCU to shut RCU-lockdep up. */ rcu_read_lock(); fdt = files_fdtable(files); rcu_read_unlock(); for (;;) { unsigned long set; i = j * BITS_PER_LONG; if (i >= fdt->max_fds) break; set = fdt->open_fds[j++]; while (set) { if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); if (file) { filp_close(file, files); cond_resched(); } } i++; set >>= 1; } } } struct files_struct *get_files_struct(struct task_struct *task) { struct files_struct *files; task_lock(task); files = task->files; if (files) atomic_inc(&files->count); task_unlock(task); return files; } void put_files_struct(struct files_struct *files) { struct fdtable *fdt; if (atomic_dec_and_test(&files->count)) { close_files(files); |
b9e02af0a don't bother with... |
443 |
/* not really needed, since nobody can see us */ |
7cf4dc3c8 move files_struct... |
444 445 |
rcu_read_lock(); fdt = files_fdtable(files); |
7cf4dc3c8 move files_struct... |
446 |
rcu_read_unlock(); |
b9e02af0a don't bother with... |
447 448 449 450 |
/* free the arrays if they are not embedded */ if (fdt != &files->fdtab) __free_fdtable(fdt); kmem_cache_free(files_cachep, files); |
7cf4dc3c8 move files_struct... |
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 |
} } void reset_files_struct(struct files_struct *files) { struct task_struct *tsk = current; struct files_struct *old; old = tsk->files; task_lock(tsk); tsk->files = files; task_unlock(tsk); put_files_struct(old); } void exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; if (files) { task_lock(tsk); tsk->files = NULL; task_unlock(tsk); put_files_struct(files); } } |
ab2af1f50 [PATCH] files: fi... |
477 478 479 480 |
static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); spin_lock_init(&fddef->lock); |
65f27f384 WorkStruct: Pass ... |
481 |
INIT_WORK(&fddef->wq, free_fdtable_work); |
ab2af1f50 [PATCH] files: fi... |
482 483 484 485 486 487 |
fddef->next = NULL; } void __init files_defer_init(void) { int i; |
0a9450227 [PATCH] for_each_... |
488 |
for_each_possible_cpu(i) |
ab2af1f50 [PATCH] files: fi... |
489 |
fdtable_defer_list_init(i); |
eceea0b3d [PATCH] avoid mul... |
490 491 |
sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG; |
ab2af1f50 [PATCH] files: fi... |
492 |
} |
f52111b15 [PATCH] take init... |
493 494 495 496 497 498 499 |
struct files_struct init_files = { .count = ATOMIC_INIT(1), .fdt = &init_files.fdtab, .fdtab = { .max_fds = NR_OPEN_DEFAULT, .fd = &init_files.fd_array[0], |
1fd36adcd Replace the fd_se... |
500 501 |
.close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, |
f52111b15 [PATCH] take init... |
502 503 504 |
}, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; |
1027abe88 [PATCH] merge loc... |
505 506 507 508 |
/* * allocate a file descriptor, mark it busy. */ |
dcfadfa4e new helper: __all... |
509 510 |
int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags) |
1027abe88 [PATCH] merge loc... |
511 |
{ |
1027abe88 [PATCH] merge loc... |
512 513 514 515 516 517 518 519 520 521 522 523 |
unsigned int fd; int error; struct fdtable *fdt; spin_lock(&files->file_lock); repeat: fdt = files_fdtable(files); fd = start; if (fd < files->next_fd) fd = files->next_fd; if (fd < fdt->max_fds) |
1fd36adcd Replace the fd_se... |
524 |
fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); |
1027abe88 [PATCH] merge loc... |
525 |
|
f33ff9927 take rlimit check... |
526 527 528 529 530 531 532 |
/* * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ error = -EMFILE; if (fd >= end) goto out; |
1027abe88 [PATCH] merge loc... |
533 534 535 536 537 538 539 540 541 542 543 544 545 |
error = expand_files(files, fd); if (error < 0) goto out; /* * If we needed to expand the fs array we * might have blocked - try again. */ if (error) goto repeat; if (start <= files->next_fd) files->next_fd = fd + 1; |
1dce27c5a Wrap accesses to ... |
546 |
__set_open_fd(fd, fdt); |
1027abe88 [PATCH] merge loc... |
547 |
if (flags & O_CLOEXEC) |
1dce27c5a Wrap accesses to ... |
548 |
__set_close_on_exec(fd, fdt); |
1027abe88 [PATCH] merge loc... |
549 |
else |
1dce27c5a Wrap accesses to ... |
550 |
__clear_close_on_exec(fd, fdt); |
1027abe88 [PATCH] merge loc... |
551 552 553 |
error = fd; #if 1 /* Sanity check */ |
7dc521579 vfs: Apply lockde... |
554 |
if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { |
1027abe88 [PATCH] merge loc... |
555 556 557 558 559 560 561 562 563 564 |
printk(KERN_WARNING "alloc_fd: slot %d not NULL! ", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } #endif out: spin_unlock(&files->file_lock); return error; } |
ad47bd725 make expand_files... |
565 |
static int alloc_fd(unsigned start, unsigned flags) |
dcfadfa4e new helper: __all... |
566 567 568 |
{ return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } |
1a7bd2265 make get_unused_f... |
569 |
int get_unused_fd_flags(unsigned flags) |
1027abe88 [PATCH] merge loc... |
570 |
{ |
dcfadfa4e new helper: __all... |
571 |
return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); |
1027abe88 [PATCH] merge loc... |
572 |
} |
1a7bd2265 make get_unused_f... |
573 |
EXPORT_SYMBOL(get_unused_fd_flags); |
56007cae9 move put_unused_f... |
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 |
static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; } void put_unused_fd(unsigned int fd) { struct files_struct *files = current->files; spin_lock(&files->file_lock); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); } EXPORT_SYMBOL(put_unused_fd); /* * Install a file pointer in the fd array. * * The VFS is full of places where we drop the files lock between * setting the open_fds bitmap and installing the file in the file * array. At any such point, we are vulnerable to a dup2() race * installing a file in the array before us. We need to detect this and * fput() the struct file we are about to overwrite in this case. * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. |
f869e8a7f expose a low-leve... |
604 605 606 607 608 609 610 |
* * NOTE: __fd_install() variant is really, really low-level; don't * use it unless you are forced to by truly lousy API shoved down * your throat. 'files' *MUST* be either current->files or obtained * by get_files_struct(current) done by whoever had given it to you, * or really bad things will happen. Normally you want to use * fd_install() instead. |
56007cae9 move put_unused_f... |
611 |
*/ |
f869e8a7f expose a low-leve... |
612 613 |
void __fd_install(struct files_struct *files, unsigned int fd, struct file *file) |
56007cae9 move put_unused_f... |
614 |
{ |
56007cae9 move put_unused_f... |
615 616 617 618 619 620 621 |
struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); } |
f869e8a7f expose a low-leve... |
622 623 624 625 |
void fd_install(unsigned int fd, struct file *file) { __fd_install(current->files, fd, file); } |
56007cae9 move put_unused_f... |
626 |
EXPORT_SYMBOL(fd_install); |
0ee8cdfe6 take fget() and f... |
627 |
|
483ce1d4b take descriptor-r... |
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 |
/* * The same warnings as for __alloc_fd()/__fd_install() apply here... */ int __close_fd(struct files_struct *files, unsigned fd) { struct file *file; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); if (fd >= fdt->max_fds) goto out_unlock; file = fdt->fd[fd]; if (!file) goto out_unlock; rcu_assign_pointer(fdt->fd[fd], NULL); __clear_close_on_exec(fd, fdt); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); return filp_close(file, files); out_unlock: spin_unlock(&files->file_lock); return -EBADF; } |
6a6d27de3 take close-on-exe... |
653 654 655 656 657 658 |
void do_close_on_exec(struct files_struct *files) { unsigned i; struct fdtable *fdt; /* exec unshares first */ |
6a6d27de3 take close-on-exe... |
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 |
spin_lock(&files->file_lock); for (i = 0; ; i++) { unsigned long set; unsigned fd = i * BITS_PER_LONG; fdt = files_fdtable(files); if (fd >= fdt->max_fds) break; set = fdt->close_on_exec[i]; if (!set) continue; fdt->close_on_exec[i] = 0; for ( ; set ; fd++, set >>= 1) { struct file *file; if (!(set & 1)) continue; file = fdt->fd[fd]; if (!file) continue; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); filp_close(file, files); cond_resched(); spin_lock(&files->file_lock); } } spin_unlock(&files->file_lock); } |
0ee8cdfe6 take fget() and f... |
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 |
struct file *fget(unsigned int fd) { struct file *file; struct files_struct *files = current->files; rcu_read_lock(); file = fcheck_files(files, fd); if (file) { /* File object ref couldn't be taken */ if (file->f_mode & FMODE_PATH || !atomic_long_inc_not_zero(&file->f_count)) file = NULL; } rcu_read_unlock(); return file; } EXPORT_SYMBOL(fget); struct file *fget_raw(unsigned int fd) { struct file *file; struct files_struct *files = current->files; rcu_read_lock(); file = fcheck_files(files, fd); if (file) { /* File object ref couldn't be taken */ if (!atomic_long_inc_not_zero(&file->f_count)) file = NULL; } rcu_read_unlock(); return file; } EXPORT_SYMBOL(fget_raw); /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * * You can use this instead of fget if you satisfy all of the following * conditions: * 1) You must call fput_light before exiting the syscall and returning control * to userspace (i.e. you cannot remember the returned struct file * after * returning to userspace). * 2) You must not call filp_close on the returned struct file * in between * calls to fget_light and fput_light. * 3) You must not clone the current task in between the calls to fget_light * and fput_light. * * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ struct file *fget_light(unsigned int fd, int *fput_needed) { struct file *file; struct files_struct *files = current->files; *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = fcheck_files(files, fd); if (file && (file->f_mode & FMODE_PATH)) file = NULL; } else { rcu_read_lock(); file = fcheck_files(files, fd); if (file) { if (!(file->f_mode & FMODE_PATH) && atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ file = NULL; } rcu_read_unlock(); } return file; } |
4557c669e export fget_light |
769 |
EXPORT_SYMBOL(fget_light); |
0ee8cdfe6 take fget() and f... |
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 |
struct file *fget_raw_light(unsigned int fd, int *fput_needed) { struct file *file; struct files_struct *files = current->files; *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = fcheck_files(files, fd); } else { rcu_read_lock(); file = fcheck_files(files, fd); if (file) { if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ file = NULL; } rcu_read_unlock(); } return file; } |
fe17f22d7 take purely descr... |
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 |
void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); if (flag) __set_close_on_exec(fd, fdt); else __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); } bool get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; struct fdtable *fdt; bool res; rcu_read_lock(); fdt = files_fdtable(files); res = close_on_exec(fd, fdt); rcu_read_unlock(); return res; } |
8280d1617 new helper: repla... |
819 820 |
static int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) |
fe17f22d7 take purely descr... |
821 |
{ |
8280d1617 new helper: repla... |
822 |
struct file *tofree; |
fe17f22d7 take purely descr... |
823 |
struct fdtable *fdt; |
fe17f22d7 take purely descr... |
824 825 826 827 828 829 830 831 832 833 834 835 836 837 |
/* * We need to detect attempts to do dup2() over allocated but still * not finished descriptor. NB: OpenBSD avoids that at the price of * extra work in their equivalent of fget() - they insert struct * file immediately after grabbing descriptor, mark it larval if * more work (e.g. actual opening) is needed and make sure that * fget() treats larval files as absent. Potentially interesting, * but while extra work in fget() is trivial, locking implications * and amount of surgery on open()-related paths in VFS are not. * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" * deadlocks in rather amusing ways, AFAICS. All of that is out of * scope of POSIX or SUS, since neither considers shared descriptor * tables and this condition does not arise without those. */ |
fe17f22d7 take purely descr... |
838 |
fdt = files_fdtable(files); |
8280d1617 new helper: repla... |
839 840 841 |
tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; |
fe17f22d7 take purely descr... |
842 |
get_file(file); |
8280d1617 new helper: repla... |
843 844 |
rcu_assign_pointer(fdt->fd[fd], file); __set_open_fd(fd, fdt); |
fe17f22d7 take purely descr... |
845 |
if (flags & O_CLOEXEC) |
8280d1617 new helper: repla... |
846 |
__set_close_on_exec(fd, fdt); |
fe17f22d7 take purely descr... |
847 |
else |
8280d1617 new helper: repla... |
848 |
__clear_close_on_exec(fd, fdt); |
fe17f22d7 take purely descr... |
849 850 851 852 |
spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); |
8280d1617 new helper: repla... |
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 |
return fd; Ebusy: spin_unlock(&files->file_lock); return -EBUSY; } int replace_fd(unsigned fd, struct file *file, unsigned flags) { int err; struct files_struct *files = current->files; if (!file) return __close_fd(files, fd); if (fd >= rlimit(RLIMIT_NOFILE)) |
08f05c497 Return the right ... |
869 |
return -EBADF; |
8280d1617 new helper: repla... |
870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 |
spin_lock(&files->file_lock); err = expand_files(files, fd); if (unlikely(err < 0)) goto out_unlock; return do_dup2(files, file, fd, flags); out_unlock: spin_unlock(&files->file_lock); return err; } SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { int err = -EBADF; struct file *file; struct files_struct *files = current->files; if ((flags & ~O_CLOEXEC) != 0) return -EINVAL; |
aed976475 dup3: Return an e... |
890 891 |
if (unlikely(oldfd == newfd)) return -EINVAL; |
8280d1617 new helper: repla... |
892 |
if (newfd >= rlimit(RLIMIT_NOFILE)) |
08f05c497 Return the right ... |
893 |
return -EBADF; |
8280d1617 new helper: repla... |
894 895 896 897 898 899 900 901 902 903 904 905 |
spin_lock(&files->file_lock); err = expand_files(files, newfd); file = fcheck(oldfd); if (unlikely(!file)) goto Ebadf; if (unlikely(err < 0)) { if (err == -EMFILE) goto Ebadf; goto out_unlock; } return do_dup2(files, file, newfd, flags); |
fe17f22d7 take purely descr... |
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 |
Ebadf: err = -EBADF; out_unlock: spin_unlock(&files->file_lock); return err; } SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ struct files_struct *files = current->files; int retval = oldfd; rcu_read_lock(); if (!fcheck_files(files, oldfd)) retval = -EBADF; rcu_read_unlock(); return retval; } return sys_dup3(oldfd, newfd, 0); } SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); if (file) { ret = get_unused_fd(); if (ret >= 0) fd_install(ret, file); else fput(file); } return ret; } int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; if (from >= rlimit(RLIMIT_NOFILE)) return -EINVAL; err = alloc_fd(from, flags); if (err >= 0) { get_file(file); fd_install(err, file); } return err; } |
c3c073f80 new helper: itera... |
956 957 958 959 960 961 |
int iterate_fd(struct files_struct *files, unsigned n, int (*f)(const void *, struct file *, unsigned), const void *p) { struct fdtable *fdt; |
c3c073f80 new helper: itera... |
962 963 964 965 |
int res = 0; if (!files) return 0; spin_lock(&files->file_lock); |
a77cfcb42 fix off-by-one in... |
966 967 968 969 970 971 972 973 |
for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { struct file *file; file = rcu_dereference_check_fdtable(files, fdt->fd[n]); if (!file) continue; res = f(p, file, n); if (res) break; |
c3c073f80 new helper: itera... |
974 975 976 977 978 |
} spin_unlock(&files->file_lock); return res; } EXPORT_SYMBOL(iterate_fd); |