cgroups: use flex_array in attach_proc

Convert cgroup_attach_proc to use flex_array. The cgroup_attach_proc implementation requires a pre-allocated array to store task pointers to atomically move a thread-group, but asking for a monolithic array with kmalloc() may be unreliable for very large groups. Using flex_array provides the same functionality with less risk of failure. This is a post-patch for cgroup-procs-write.patch. Signed-off-by: Ben Blum <bblum@andrew.cmu.edu> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Matt Helsley <matthltc@us.ibm.com> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

cgroups: use flex_array in attach_proc
Convert cgroup_attach_proc to use flex_array. The cgroup_attach_proc implementation requires a pre-allocated array to store task pointers to atomically move a thread-group, but asking for a monolithic array with kmalloc() may be unreliable for very large groups. Using flex_array provides the same functionality with less risk of failure. This is a post-patch for cgroup-procs-write.patch. Signed-off-by: Ben Blum <bblum@andrew.cmu.edu> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Matt Helsley <matthltc@us.ibm.com> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Ben Blum · Linus Torvalds
1 parent 74a1166dfe
Showing 1 changed file with 24 additions and 9 deletions Side-by-side Diff
kernel/cgroup.c
@@ -57,6 +57,7 @@
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 #include <linux/eventfd.h>
 #include <linux/poll.h>
+#include <linux/flex_array.h> /* used in cgroup_attach_proc */
  
 #include <asm/atomic.h>
  
@@ -1995,7 +1996,7 @@
 	struct cgroupfs_root *root = cgrp->root;
 	/* threadgroup list cursor and array */
 	struct task_struct *tsk;
-	struct task_struct **group;
+	struct flex_array *group;
 	/*
 	 * we need to make sure we have css_sets for all the tasks we're
 	 * going to move -before- we actually start moving them, so that in
  
@@ -2012,9 +2013,15 @@
 	 * and if threads exit, this will just be an over-estimate.
 	 */
 	group_size = get_nr_threads(leader);
-	group = kmalloc(group_size * sizeof(*group), GFP_KERNEL);
+	/* flex_array supports very large thread-groups better than kmalloc. */
+	group = flex_array_alloc(sizeof(struct task_struct *), group_size,
+				 GFP_KERNEL);
 	if (!group)
 		return -ENOMEM;
+	/* pre-allocate to guarantee space while iterating in rcu read-side. */
+	retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
+	if (retval)
+		goto out_free_group_list;
  
 	/* prevent changes to the threadgroup list while we take a snapshot. */
 	rcu_read_lock();
@@ -2037,7 +2044,12 @@
 		/* as per above, nr_threads may decrease, but not increase. */
 		BUG_ON(i >= group_size);
 		get_task_struct(tsk);
-		group[i] = tsk;
+		/*
+		 * saying GFP_ATOMIC has no effect here because we did prealloc
+		 * earlier, but it's good form to communicate our expectations.
+		 */
+		retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
+		BUG_ON(retval != 0);
 		i++;
 	} while_each_thread(leader, tsk);
 	/* remember the number of threads in the array for later. */
@@ -2059,7 +2071,8 @@
 		if (ss->can_attach_task) {
 			/* run on each task in the threadgroup. */
 			for (i = 0; i < group_size; i++) {
-				retval = ss->can_attach_task(cgrp, group[i]);
+				tsk = flex_array_get_ptr(group, i);
+				retval = ss->can_attach_task(cgrp, tsk);
 				if (retval) {
 					failed_ss = ss;
 					cancel_failed_ss = true;
@@ -2075,7 +2088,7 @@
 	 */
 	INIT_LIST_HEAD(&newcg_list);
 	for (i = 0; i < group_size; i++) {
-		tsk = group[i];
+		tsk = flex_array_get_ptr(group, i);
 		/* nothing to do if this task is already in the cgroup */
 		oldcgrp = task_cgroup_from_root(tsk, root);
 		if (cgrp == oldcgrp)
@@ -2114,7 +2127,7 @@
 			ss->pre_attach(cgrp);
 	}
 	for (i = 0; i < group_size; i++) {
-		tsk = group[i];
+		tsk = flex_array_get_ptr(group, i);
 		/* leave current thread as it is if it's already there */
 		oldcgrp = task_cgroup_from_root(tsk, root);
 		if (cgrp == oldcgrp)
  
@@ -2167,10 +2180,12 @@
 		}
 	}
 	/* clean up the array of referenced threads in the group. */
-	for (i = 0; i < group_size; i++)
-		put_task_struct(group[i]);
+	for (i = 0; i < group_size; i++) {
+		tsk = flex_array_get_ptr(group, i);
+		put_task_struct(tsk);
+	}
 out_free_group_list:
-	kfree(group);
+	flex_array_free(group);
 	return retval;
 }
...	...	@@ -57,6 +57,7 @@
57	57	#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
58	58	#include <linux/eventfd.h>
59	59	#include <linux/poll.h>
	60	+#include <linux/flex_array.h> /* used in cgroup_attach_proc */
60	61
61	62	#include <asm/atomic.h>
62	63
...	...	@@ -1995,7 +1996,7 @@
1995	1996	struct cgroupfs_root *root = cgrp->root;
1996	1997	/* threadgroup list cursor and array */
1997	1998	struct task_struct *tsk;
1998		- struct task_struct **group;
	1999	+ struct flex_array *group;
1999	2000	/*
2000	2001	* we need to make sure we have css_sets for all the tasks we're
2001	2002	* going to move -before- we actually start moving them, so that in
2002	2003
...	...	@@ -2012,9 +2013,15 @@
2012	2013	* and if threads exit, this will just be an over-estimate.
2013	2014	*/
2014	2015	group_size = get_nr_threads(leader);
2015		- group = kmalloc(group_size * sizeof(*group), GFP_KERNEL);
	2016	+ /* flex_array supports very large thread-groups better than kmalloc. */
	2017	+ group = flex_array_alloc(sizeof(struct task_struct *), group_size,
	2018	+ GFP_KERNEL);
2016	2019	if (!group)
2017	2020	return -ENOMEM;
	2021	+ /* pre-allocate to guarantee space while iterating in rcu read-side. */
	2022	+ retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
	2023	+ if (retval)
	2024	+ goto out_free_group_list;
2018	2025
2019	2026	/* prevent changes to the threadgroup list while we take a snapshot. */
2020	2027	rcu_read_lock();
...	...	@@ -2037,7 +2044,12 @@
2037	2044	/* as per above, nr_threads may decrease, but not increase. */
2038	2045	BUG_ON(i >= group_size);
2039	2046	get_task_struct(tsk);
2040		- group[i] = tsk;
	2047	+ /*
	2048	+ * saying GFP_ATOMIC has no effect here because we did prealloc
	2049	+ * earlier, but it's good form to communicate our expectations.
	2050	+ */
	2051	+ retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
	2052	+ BUG_ON(retval != 0);
2041	2053	i++;
2042	2054	} while_each_thread(leader, tsk);
2043	2055	/* remember the number of threads in the array for later. */
...	...	@@ -2059,7 +2071,8 @@
2059	2071	if (ss->can_attach_task) {
2060	2072	/* run on each task in the threadgroup. */
2061	2073	for (i = 0; i < group_size; i++) {
2062		- retval = ss->can_attach_task(cgrp, group[i]);
	2074	+ tsk = flex_array_get_ptr(group, i);
	2075	+ retval = ss->can_attach_task(cgrp, tsk);
2063	2076	if (retval) {
2064	2077	failed_ss = ss;
2065	2078	cancel_failed_ss = true;
...	...	@@ -2075,7 +2088,7 @@
2075	2088	*/
2076	2089	INIT_LIST_HEAD(&newcg_list);
2077	2090	for (i = 0; i < group_size; i++) {
2078		- tsk = group[i];
	2091	+ tsk = flex_array_get_ptr(group, i);
2079	2092	/* nothing to do if this task is already in the cgroup */
2080	2093	oldcgrp = task_cgroup_from_root(tsk, root);
2081	2094	if (cgrp == oldcgrp)
...	...	@@ -2114,7 +2127,7 @@
2114	2127	ss->pre_attach(cgrp);
2115	2128	}
2116	2129	for (i = 0; i < group_size; i++) {
2117		- tsk = group[i];
	2130	+ tsk = flex_array_get_ptr(group, i);
2118	2131	/* leave current thread as it is if it's already there */
2119	2132	oldcgrp = task_cgroup_from_root(tsk, root);
2120	2133	if (cgrp == oldcgrp)
2121	2134
...	...	@@ -2167,10 +2180,12 @@
2167	2180	}
2168	2181	}
2169	2182	/* clean up the array of referenced threads in the group. */
2170		- for (i = 0; i < group_size; i++)
2171		- put_task_struct(group[i]);
	2183	+ for (i = 0; i < group_size; i++) {
	2184	+ tsk = flex_array_get_ptr(group, i);
	2185	+ put_task_struct(tsk);
	2186	+ }
2172	2187	out_free_group_list:
2173		- kfree(group);
	2188	+ flex_array_free(group);
2174	2189	return retval;
2175	2190	}
2176	2191