Commit 518de9b39e854542de59bfb8b9f61c8f7ecf808b
Committed by
Linus Torvalds
1 parent
571428be55
Exists in
master
and in
4 other branches
fs: allow for more than 2^31 files
Robin Holt tried to boot a 16TB system and found af_unix was overflowing a 32bit value : <quote> We were seeing a failure which prevented boot. The kernel was incapable of creating either a named pipe or unix domain socket. This comes down to a common kernel function called unix_create1() which does: atomic_inc(&unix_nr_socks); if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) goto out; The function get_max_files() is a simple return of files_stat.max_files. files_stat.max_files is a signed integer and is computed in fs/file_table.c's files_init(). n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = n; In our case, mempages (total_ram_pages) is approx 3,758,096,384 (0xe0000000). That leaves max_files at approximately 1,503,238,553. This causes 2 * get_max_files() to integer overflow. </quote> Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long integers, and change af_unix to use an atomic_long_t instead of atomic_t. get_max_files() is changed to return an unsigned long. get_nr_files() is changed to return a long. unix_nr_socks is changed from atomic_t to atomic_long_t, while not strictly needed to address Robin problem. Before patch (on a 64bit kernel) : # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max -18446744071562067968 After patch: # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max 2147483648 # cat /proc/sys/fs/file-nr 704 0 2147483648 Reported-by: Robin Holt <holt@sgi.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: David Miller <davem@davemloft.net> Reviewed-by: Robin Holt <holt@sgi.com> Tested-by: Robin Holt <holt@sgi.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 21 additions and 24 deletions Side-by-side Diff
fs/file_table.c
... | ... | @@ -60,7 +60,7 @@ |
60 | 60 | /* |
61 | 61 | * Return the total number of open files in the system |
62 | 62 | */ |
63 | -static int get_nr_files(void) | |
63 | +static long get_nr_files(void) | |
64 | 64 | { |
65 | 65 | return percpu_counter_read_positive(&nr_files); |
66 | 66 | } |
... | ... | @@ -68,7 +68,7 @@ |
68 | 68 | /* |
69 | 69 | * Return the maximum number of open files in the system |
70 | 70 | */ |
71 | -int get_max_files(void) | |
71 | +unsigned long get_max_files(void) | |
72 | 72 | { |
73 | 73 | return files_stat.max_files; |
74 | 74 | } |
... | ... | @@ -82,7 +82,7 @@ |
82 | 82 | void __user *buffer, size_t *lenp, loff_t *ppos) |
83 | 83 | { |
84 | 84 | files_stat.nr_files = get_nr_files(); |
85 | - return proc_dointvec(table, write, buffer, lenp, ppos); | |
85 | + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); | |
86 | 86 | } |
87 | 87 | #else |
88 | 88 | int proc_nr_files(ctl_table *table, int write, |
... | ... | @@ -105,7 +105,7 @@ |
105 | 105 | struct file *get_empty_filp(void) |
106 | 106 | { |
107 | 107 | const struct cred *cred = current_cred(); |
108 | - static int old_max; | |
108 | + static long old_max; | |
109 | 109 | struct file * f; |
110 | 110 | |
111 | 111 | /* |
... | ... | @@ -140,8 +140,7 @@ |
140 | 140 | over: |
141 | 141 | /* Ran out of filps - report that */ |
142 | 142 | if (get_nr_files() > old_max) { |
143 | - printk(KERN_INFO "VFS: file-max limit %d reached\n", | |
144 | - get_max_files()); | |
143 | + pr_info("VFS: file-max limit %lu reached\n", get_max_files()); | |
145 | 144 | old_max = get_nr_files(); |
146 | 145 | } |
147 | 146 | goto fail; |
... | ... | @@ -487,7 +486,7 @@ |
487 | 486 | |
488 | 487 | void __init files_init(unsigned long mempages) |
489 | 488 | { |
490 | - int n; | |
489 | + unsigned long n; | |
491 | 490 | |
492 | 491 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, |
493 | 492 | SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
... | ... | @@ -498,9 +497,7 @@ |
498 | 497 | */ |
499 | 498 | |
500 | 499 | n = (mempages * (PAGE_SIZE / 1024)) / 10; |
501 | - files_stat.max_files = n; | |
502 | - if (files_stat.max_files < NR_FILE) | |
503 | - files_stat.max_files = NR_FILE; | |
500 | + files_stat.max_files = max_t(unsigned long, n, NR_FILE); | |
504 | 501 | files_defer_init(); |
505 | 502 | lg_lock_init(files_lglock); |
506 | 503 | percpu_counter_init(&nr_files, 0); |
include/linux/fs.h
... | ... | @@ -34,9 +34,9 @@ |
34 | 34 | |
35 | 35 | /* And dynamically-tunable limits and defaults: */ |
36 | 36 | struct files_stat_struct { |
37 | - int nr_files; /* read only */ | |
38 | - int nr_free_files; /* read only */ | |
39 | - int max_files; /* tunable */ | |
37 | + unsigned long nr_files; /* read only */ | |
38 | + unsigned long nr_free_files; /* read only */ | |
39 | + unsigned long max_files; /* tunable */ | |
40 | 40 | }; |
41 | 41 | |
42 | 42 | struct inodes_stat_t { |
... | ... | @@ -400,7 +400,7 @@ |
400 | 400 | extern void __init files_init(unsigned long); |
401 | 401 | |
402 | 402 | extern struct files_stat_struct files_stat; |
403 | -extern int get_max_files(void); | |
403 | +extern unsigned long get_max_files(void); | |
404 | 404 | extern int sysctl_nr_open; |
405 | 405 | extern struct inodes_stat_t inodes_stat; |
406 | 406 | extern int leases_enable, lease_break_time; |
kernel/sysctl.c
... | ... | @@ -1352,16 +1352,16 @@ |
1352 | 1352 | { |
1353 | 1353 | .procname = "file-nr", |
1354 | 1354 | .data = &files_stat, |
1355 | - .maxlen = 3*sizeof(int), | |
1355 | + .maxlen = sizeof(files_stat), | |
1356 | 1356 | .mode = 0444, |
1357 | 1357 | .proc_handler = proc_nr_files, |
1358 | 1358 | }, |
1359 | 1359 | { |
1360 | 1360 | .procname = "file-max", |
1361 | 1361 | .data = &files_stat.max_files, |
1362 | - .maxlen = sizeof(int), | |
1362 | + .maxlen = sizeof(files_stat.max_files), | |
1363 | 1363 | .mode = 0644, |
1364 | - .proc_handler = proc_dointvec, | |
1364 | + .proc_handler = proc_doulongvec_minmax, | |
1365 | 1365 | }, |
1366 | 1366 | { |
1367 | 1367 | .procname = "nr_open", |
net/unix/af_unix.c
... | ... | @@ -117,7 +117,7 @@ |
117 | 117 | |
118 | 118 | static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; |
119 | 119 | static DEFINE_SPINLOCK(unix_table_lock); |
120 | -static atomic_t unix_nr_socks = ATOMIC_INIT(0); | |
120 | +static atomic_long_t unix_nr_socks; | |
121 | 121 | |
122 | 122 | #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) |
123 | 123 | |
124 | 124 | |
... | ... | @@ -360,13 +360,13 @@ |
360 | 360 | if (u->addr) |
361 | 361 | unix_release_addr(u->addr); |
362 | 362 | |
363 | - atomic_dec(&unix_nr_socks); | |
363 | + atomic_long_dec(&unix_nr_socks); | |
364 | 364 | local_bh_disable(); |
365 | 365 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
366 | 366 | local_bh_enable(); |
367 | 367 | #ifdef UNIX_REFCNT_DEBUG |
368 | - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, | |
369 | - atomic_read(&unix_nr_socks)); | |
368 | + printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, | |
369 | + atomic_long_read(&unix_nr_socks)); | |
370 | 370 | #endif |
371 | 371 | } |
372 | 372 | |
... | ... | @@ -606,8 +606,8 @@ |
606 | 606 | struct sock *sk = NULL; |
607 | 607 | struct unix_sock *u; |
608 | 608 | |
609 | - atomic_inc(&unix_nr_socks); | |
610 | - if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) | |
609 | + atomic_long_inc(&unix_nr_socks); | |
610 | + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) | |
611 | 611 | goto out; |
612 | 612 | |
613 | 613 | sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); |
... | ... | @@ -632,7 +632,7 @@ |
632 | 632 | unix_insert_socket(unix_sockets_unbound, sk); |
633 | 633 | out: |
634 | 634 | if (sk == NULL) |
635 | - atomic_dec(&unix_nr_socks); | |
635 | + atomic_long_dec(&unix_nr_socks); | |
636 | 636 | else { |
637 | 637 | local_bh_disable(); |
638 | 638 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |