Commit d1d9fd3308fdef6b4bf564fa3d6cfe35b68b50bc

Authored by Ben Blum
Committed by Linus Torvalds
1 parent 72a8cb30d1

cgroups: use vmalloc for large cgroups pidlist allocations

Separates all pidlist allocation requests to a separate function that
judges based on the requested size whether or not the array needs to be
vmalloced or can be gotten via kmalloc, and similar for kfree/vfree.

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 42 additions and 5 deletions Side-by-side Diff

... ... @@ -50,6 +50,7 @@
50 50 #include <linux/smp_lock.h>
51 51 #include <linux/pid_namespace.h>
52 52 #include <linux/idr.h>
  53 +#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
53 54  
54 55 #include <asm/atomic.h>
55 56  
... ... @@ -2351,6 +2352,42 @@
2351 2352 */
2352 2353  
2353 2354 /*
  2355 + * The following two functions "fix" the issue where there are more pids
  2356 + * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
  2357 + * TODO: replace with a kernel-wide solution to this problem
  2358 + */
  2359 +#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
  2360 +static void *pidlist_allocate(int count)
  2361 +{
  2362 + if (PIDLIST_TOO_LARGE(count))
  2363 + return vmalloc(count * sizeof(pid_t));
  2364 + else
  2365 + return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
  2366 +}
  2367 +static void pidlist_free(void *p)
  2368 +{
  2369 + if (is_vmalloc_addr(p))
  2370 + vfree(p);
  2371 + else
  2372 + kfree(p);
  2373 +}
  2374 +static void *pidlist_resize(void *p, int newcount)
  2375 +{
  2376 + void *newlist;
  2377 + /* note: if new alloc fails, old p will still be valid either way */
  2378 + if (is_vmalloc_addr(p)) {
  2379 + newlist = vmalloc(newcount * sizeof(pid_t));
  2380 + if (!newlist)
  2381 + return NULL;
  2382 + memcpy(newlist, p, newcount * sizeof(pid_t));
  2383 + vfree(p);
  2384 + } else {
  2385 + newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
  2386 + }
  2387 + return newlist;
  2388 +}
  2389 +
  2390 +/*
2354 2391 * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
2355 2392 * If the new stripped list is sufficiently smaller and there's enough memory
2356 2393 * to allocate a new buffer, will let go of the unneeded memory. Returns the
... ... @@ -2389,7 +2426,7 @@
2389 2426 * we'll just stay with what we've got.
2390 2427 */
2391 2428 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2392   - newlist = krealloc(list, dest * sizeof(pid_t), GFP_KERNEL);
  2429 + newlist = pidlist_resize(list, dest);
2393 2430 if (newlist)
2394 2431 *p = newlist;
2395 2432 }
... ... @@ -2470,7 +2507,7 @@
2470 2507 * show up until sometime later on.
2471 2508 */
2472 2509 length = cgroup_task_count(cgrp);
2473   - array = kmalloc(length * sizeof(pid_t), GFP_KERNEL);
  2510 + array = pidlist_allocate(length);
2474 2511 if (!array)
2475 2512 return -ENOMEM;
2476 2513 /* now, populate the array */
2477 2514  
... ... @@ -2494,11 +2531,11 @@
2494 2531 length = pidlist_uniq(&array, length);
2495 2532 l = cgroup_pidlist_find(cgrp, type);
2496 2533 if (!l) {
2497   - kfree(array);
  2534 + pidlist_free(array);
2498 2535 return -ENOMEM;
2499 2536 }
2500 2537 /* store array, freeing old if necessary - lock already held */
2501   - kfree(l->list);
  2538 + pidlist_free(l->list);
2502 2539 l->list = array;
2503 2540 l->length = length;
2504 2541 l->use_count++;
... ... @@ -2659,7 +2696,7 @@
2659 2696 /* we're the last user if refcount is 0; remove and free */
2660 2697 list_del(&l->links);
2661 2698 mutex_unlock(&l->owner->pidlist_mutex);
2662   - kfree(l->list);
  2699 + pidlist_free(l->list);
2663 2700 put_pid_ns(l->key.ns);
2664 2701 up_write(&l->mutex);
2665 2702 kfree(l);