Commit 518de9b39e854542de59bfb8b9f61c8f7ecf808b

Authored by Eric Dumazet
Committed by Linus Torvalds
1 parent 571428be55

fs: allow for more than 2^31 files

Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :

<quote>

We were seeing a failure which prevented boot.  The kernel was incapable
of creating either a named pipe or unix domain socket.  This comes down
to a common kernel function called unix_create1() which does:

        atomic_inc(&unix_nr_socks);
        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
                goto out;

The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().

        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = n;

In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000).  That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.

</quote>

Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of atomic_t.

get_max_files() is changed to return an unsigned long.  get_nr_files() is
changed to return a long.

unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.

Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968

After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704     0       2147483648

Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 21 additions and 24 deletions Side-by-side Diff

... ... @@ -60,7 +60,7 @@
60 60 /*
61 61 * Return the total number of open files in the system
62 62 */
63   -static int get_nr_files(void)
  63 +static long get_nr_files(void)
64 64 {
65 65 return percpu_counter_read_positive(&nr_files);
66 66 }
... ... @@ -68,7 +68,7 @@
68 68 /*
69 69 * Return the maximum number of open files in the system
70 70 */
71   -int get_max_files(void)
  71 +unsigned long get_max_files(void)
72 72 {
73 73 return files_stat.max_files;
74 74 }
... ... @@ -82,7 +82,7 @@
82 82 void __user *buffer, size_t *lenp, loff_t *ppos)
83 83 {
84 84 files_stat.nr_files = get_nr_files();
85   - return proc_dointvec(table, write, buffer, lenp, ppos);
  85 + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
86 86 }
87 87 #else
88 88 int proc_nr_files(ctl_table *table, int write,
... ... @@ -105,7 +105,7 @@
105 105 struct file *get_empty_filp(void)
106 106 {
107 107 const struct cred *cred = current_cred();
108   - static int old_max;
  108 + static long old_max;
109 109 struct file * f;
110 110  
111 111 /*
... ... @@ -140,8 +140,7 @@
140 140 over:
141 141 /* Ran out of filps - report that */
142 142 if (get_nr_files() > old_max) {
143   - printk(KERN_INFO "VFS: file-max limit %d reached\n",
144   - get_max_files());
  143 + pr_info("VFS: file-max limit %lu reached\n", get_max_files());
145 144 old_max = get_nr_files();
146 145 }
147 146 goto fail;
... ... @@ -487,7 +486,7 @@
487 486  
488 487 void __init files_init(unsigned long mempages)
489 488 {
490   - int n;
  489 + unsigned long n;
491 490  
492 491 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
493 492 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
... ... @@ -498,9 +497,7 @@
498 497 */
499 498  
500 499 n = (mempages * (PAGE_SIZE / 1024)) / 10;
501   - files_stat.max_files = n;
502   - if (files_stat.max_files < NR_FILE)
503   - files_stat.max_files = NR_FILE;
  500 + files_stat.max_files = max_t(unsigned long, n, NR_FILE);
504 501 files_defer_init();
505 502 lg_lock_init(files_lglock);
506 503 percpu_counter_init(&nr_files, 0);
... ... @@ -34,9 +34,9 @@
34 34  
35 35 /* And dynamically-tunable limits and defaults: */
36 36 struct files_stat_struct {
37   - int nr_files; /* read only */
38   - int nr_free_files; /* read only */
39   - int max_files; /* tunable */
  37 + unsigned long nr_files; /* read only */
  38 + unsigned long nr_free_files; /* read only */
  39 + unsigned long max_files; /* tunable */
40 40 };
41 41  
42 42 struct inodes_stat_t {
... ... @@ -400,7 +400,7 @@
400 400 extern void __init files_init(unsigned long);
401 401  
402 402 extern struct files_stat_struct files_stat;
403   -extern int get_max_files(void);
  403 +extern unsigned long get_max_files(void);
404 404 extern int sysctl_nr_open;
405 405 extern struct inodes_stat_t inodes_stat;
406 406 extern int leases_enable, lease_break_time;
... ... @@ -1352,16 +1352,16 @@
1352 1352 {
1353 1353 .procname = "file-nr",
1354 1354 .data = &files_stat,
1355   - .maxlen = 3*sizeof(int),
  1355 + .maxlen = sizeof(files_stat),
1356 1356 .mode = 0444,
1357 1357 .proc_handler = proc_nr_files,
1358 1358 },
1359 1359 {
1360 1360 .procname = "file-max",
1361 1361 .data = &files_stat.max_files,
1362   - .maxlen = sizeof(int),
  1362 + .maxlen = sizeof(files_stat.max_files),
1363 1363 .mode = 0644,
1364   - .proc_handler = proc_dointvec,
  1364 + .proc_handler = proc_doulongvec_minmax,
1365 1365 },
1366 1366 {
1367 1367 .procname = "nr_open",
... ... @@ -117,7 +117,7 @@
117 117  
118 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 119 static DEFINE_SPINLOCK(unix_table_lock);
120   -static atomic_t unix_nr_socks = ATOMIC_INIT(0);
  120 +static atomic_long_t unix_nr_socks;
121 121  
122 122 #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
123 123  
124 124  
... ... @@ -360,13 +360,13 @@
360 360 if (u->addr)
361 361 unix_release_addr(u->addr);
362 362  
363   - atomic_dec(&unix_nr_socks);
  363 + atomic_long_dec(&unix_nr_socks);
364 364 local_bh_disable();
365 365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 366 local_bh_enable();
367 367 #ifdef UNIX_REFCNT_DEBUG
368   - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369   - atomic_read(&unix_nr_socks));
  368 + printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
  369 + atomic_long_read(&unix_nr_socks));
370 370 #endif
371 371 }
372 372  
... ... @@ -606,8 +606,8 @@
606 606 struct sock *sk = NULL;
607 607 struct unix_sock *u;
608 608  
609   - atomic_inc(&unix_nr_socks);
610   - if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
  609 + atomic_long_inc(&unix_nr_socks);
  610 + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
611 611 goto out;
612 612  
613 613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
... ... @@ -632,7 +632,7 @@
632 632 unix_insert_socket(unix_sockets_unbound, sk);
633 633 out:
634 634 if (sk == NULL)
635   - atomic_dec(&unix_nr_socks);
  635 + atomic_long_dec(&unix_nr_socks);
636 636 else {
637 637 local_bh_disable();
638 638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);