memcg: some modification to softlimit under hierarchical memory reclaim.

This patch clean up/fixes for memcg's uncharge soft limit path. Problems: Now, res_counter_charge()/uncharge() handles softlimit information at charge/uncharge and softlimit-check is done when event counter per memcg goes over limit. Now, event counter per memcg is updated only when memory usage is over soft limit. Here, considering hierarchical memcg management, ancesotors should be taken care of. Now, ancerstors(hierarchy) are handled in charge() but not in uncharge(). This is not good. Prolems: 1. memcg's event counter incremented only when softlimit hits. That's bad. It makes event counter hard to be reused for other purpose. 2. At uncharge, only the lowest level rescounter is handled. This is bug. Because ancesotor's event counter is not incremented, children should take care of them. 3. res_counter_uncharge()'s 3rd argument is NULL in most case. ops under res_counter->lock should be small. No "if" sentense is better. Fixes: * Removed soft_limit_xx poitner and checks in charge and uncharge. Do-check-only-when-necessary scheme works enough well without them. * make event-counter of memcg incremented at every charge/uncharge. (per-cpu area will be accessed soon anyway) * All ancestors are checked at soft-limit-check. This is necessary because ancesotor's event counter may never be modified. Then, they should be checked at the same time. Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

memcg: some modification to softlimit under hierarchical memory reclaim.
This patch clean up/fixes for memcg's uncharge soft limit path. Problems: Now, res_counter_charge()/uncharge() handles softlimit information at charge/uncharge and softlimit-check is done when event counter per memcg goes over limit. Now, event counter per memcg is updated only when memory usage is over soft limit. Here, considering hierarchical memcg management, ancesotors should be taken care of. Now, ancerstors(hierarchy) are handled in charge() but not in uncharge(). This is not good. Prolems: 1. memcg's event counter incremented only when softlimit hits. That's bad. It makes event counter hard to be reused for other purpose. 2. At uncharge, only the lowest level rescounter is handled. This is bug. Because ancesotor's event counter is not incremented, children should take care of them. 3. res_counter_uncharge()'s 3rd argument is NULL in most case. ops under res_counter->lock should be small. No "if" sentense is better. Fixes: * Removed soft_limit_xx poitner and checks in charge and uncharge. Do-check-only-when-necessary scheme works enough well without them. * make event-counter of memcg incremented at every charge/uncharge. (per-cpu area will be accessed soon anyway) * All ancestors are checked at soft-limit-check. This is necessary because ancesotor's event counter may never be modified. Then, they should be checked at the same time. Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
KAMEZAWA Hiroyuki · Linus Torvalds
1 parent 3dece8347d
Showing 3 changed files with 54 additions and 83 deletions Side-by-side Diff
include/linux/res_counter.h
kernel/res_counter.c
mm/memcontrol.c
@@ -114,8 +114,7 @@
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at,
-		struct res_counter **soft_limit_at);
+		unsigned long val, struct res_counter **limit_fail_at);
  
 /*
  * uncharge - tell that some portion of the resource is released
@@ -128,8 +127,7 @@
  */
  
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
-				bool *was_soft_limit_excess);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val);
  
 static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 {
@@ -37,27 +37,17 @@
 }
  
 int res_counter_charge(struct res_counter *counter, unsigned long val,
-			struct res_counter **limit_fail_at,
-			struct res_counter **soft_limit_fail_at)
+			struct res_counter **limit_fail_at)
 {
 	int ret;
 	unsigned long flags;
 	struct res_counter *c, *u;
  
 	*limit_fail_at = NULL;
-	if (soft_limit_fail_at)
-		*soft_limit_fail_at = NULL;
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
 		ret = res_counter_charge_locked(c, val);
-		/*
-		 * With soft limits, we return the highest ancestor
-		 * that exceeds its soft limit
-		 */
-		if (soft_limit_fail_at &&
-			!res_counter_soft_limit_check_locked(c))
-			*soft_limit_fail_at = c;
 		spin_unlock(&c->lock);
 		if (ret < 0) {
 			*limit_fail_at = c;
@@ -85,8 +75,7 @@
 	counter->usage -= val;
 }
  
-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
-				bool *was_soft_limit_excess)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
@@ -94,9 +83,6 @@
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
-		if (was_soft_limit_excess)
-			*was_soft_limit_excess =
-				!res_counter_soft_limit_check_locked(c);
 		res_counter_uncharge_locked(c, val);
 		spin_unlock(&c->lock);
 	}
@@ -353,16 +353,6 @@
 }
  
 static void
-mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
-				struct mem_cgroup_per_zone *mz,
-				struct mem_cgroup_tree_per_zone *mctz)
-{
-	spin_lock(&mctz->lock);
-	__mem_cgroup_insert_exceeded(mem, mz, mctz);
-	spin_unlock(&mctz->lock);
-}
-
-static void
 mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_per_zone *mz,
 				struct mem_cgroup_tree_per_zone *mctz)
  
  
  
  
@@ -392,35 +382,41 @@
  
 static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
 {
-	unsigned long long prev_usage_in_excess, new_usage_in_excess;
-	bool updated_tree = false;
+	unsigned long long new_usage_in_excess;
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup_tree_per_zone *mctz;
-
-	mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+	int nid = page_to_nid(page);
+	int zid = page_zonenum(page);
 	mctz = soft_limit_tree_from_page(page);
  
 	/*
-	 * We do updates in lazy mode, mem's are removed
-	 * lazily from the per-zone, per-node rb tree
+	 * Necessary to update all ancestors when hierarchy is used.
+	 * because their event counter is not touched.
 	 */
-	prev_usage_in_excess = mz->usage_in_excess;
-
-	new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
-	if (prev_usage_in_excess) {
-		mem_cgroup_remove_exceeded(mem, mz, mctz);
-		updated_tree = true;
+	for (; mem; mem = parent_mem_cgroup(mem)) {
+		mz = mem_cgroup_zoneinfo(mem, nid, zid);
+		new_usage_in_excess =
+			res_counter_soft_limit_excess(&mem->res);
+		/*
+		 * We have to update the tree if mz is on RB-tree or
+		 * mem is over its softlimit.
+		 */
+		if (new_usage_in_excess || mz->on_tree) {
+			spin_lock(&mctz->lock);
+			/* if on-tree, remove it */
+			if (mz->on_tree)
+				__mem_cgroup_remove_exceeded(mem, mz, mctz);
+			/*
+			 * if over soft limit, insert again. mz->usage_in_excess
+			 * will be updated properly.
+			 */
+			if (new_usage_in_excess)
+				__mem_cgroup_insert_exceeded(mem, mz, mctz);
+			else
+				mz->usage_in_excess = 0;
+			spin_unlock(&mctz->lock);
+		}
 	}
-	if (!new_usage_in_excess)
-		goto done;
-	mem_cgroup_insert_exceeded(mem, mz, mctz);
-
-done:
-	if (updated_tree) {
-		spin_lock(&mctz->lock);
-		mz->usage_in_excess = new_usage_in_excess;
-		spin_unlock(&mctz->lock);
-	}
 }
  
 static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
  
@@ -1271,9 +1267,9 @@
 			gfp_t gfp_mask, struct mem_cgroup **memcg,
 			bool oom, struct page *page)
 {
-	struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
+	struct mem_cgroup *mem, *mem_over_limit;
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-	struct res_counter *fail_res, *soft_fail_res = NULL;
+	struct res_counter *fail_res;
  
 	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
 		/* Don't account this! */
  
  
@@ -1305,17 +1301,16 @@
  
 		if (mem_cgroup_is_root(mem))
 			goto done;
-		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
-						&soft_fail_res);
+		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
 		if (likely(!ret)) {
 			if (!do_swap_account)
 				break;
 			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-							&fail_res, NULL);
+							&fail_res);
 			if (likely(!ret))
 				break;
 			/* mem+swap counter fails */
-			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->res, PAGE_SIZE);
 			flags |= MEM_CGROUP_RECLAIM_NOSWAP;
 			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
 									memsw);
  
@@ -1354,16 +1349,11 @@
 		}
 	}
 	/*
-	 * Insert just the ancestor, we should trickle down to the correct
-	 * cgroup for reclaim, since the other nodes will be below their
-	 * soft limit
+	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
+	 * if they exceeds softlimit.
 	 */
-	if (soft_fail_res) {
-		mem_over_soft_limit =
-			mem_cgroup_from_res_counter(soft_fail_res, res);
-		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
-			mem_cgroup_update_tree(mem_over_soft_limit, page);
-	}
+	if (mem_cgroup_soft_limit_check(mem))
+		mem_cgroup_update_tree(mem, page);
 done:
 	return 0;
 nomem:
  
@@ -1438,10 +1428,9 @@
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
 		if (!mem_cgroup_is_root(mem)) {
-			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->res, PAGE_SIZE);
 			if (do_swap_account)
-				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
-							NULL);
+				res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 		}
 		css_put(&mem->css);
 		return;
@@ -1520,7 +1509,7 @@
 		goto out;
  
 	if (!mem_cgroup_is_root(from))
-		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&from->res, PAGE_SIZE);
 	mem_cgroup_charge_statistics(from, pc, false);
  
 	page = pc->page;
@@ -1540,7 +1529,7 @@
 	}
  
 	if (do_swap_account && !mem_cgroup_is_root(from))
-		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
+		res_counter_uncharge(&from->memsw, PAGE_SIZE);
 	css_put(&from->css);
  
 	css_get(&to->css);
  
@@ -1611,9 +1600,9 @@
 	css_put(&parent->css);
 	/* uncharge if move fails */
 	if (!mem_cgroup_is_root(parent)) {
-		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&parent->res, PAGE_SIZE);
 		if (do_swap_account)
-			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&parent->memsw, PAGE_SIZE);
 	}
 	return ret;
 }
@@ -1804,8 +1793,7 @@
 			 * calling css_tryget
 			 */
 			if (!mem_cgroup_is_root(memcg))
-				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
-							NULL);
+				res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 			mem_cgroup_swap_statistics(memcg, false);
 			mem_cgroup_put(memcg);
 		}
  
@@ -1832,9 +1820,9 @@
 	if (!mem)
 		return;
 	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	}
 	css_put(&mem->css);
 }
@@ -1849,7 +1837,6 @@
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem = NULL;
 	struct mem_cgroup_per_zone *mz;
-	bool soft_limit_excess = false;
  
 	if (mem_cgroup_disabled())
 		return NULL;
  
@@ -1889,10 +1876,10 @@
 	}
  
 	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		if (do_swap_account &&
 				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	}
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
 		mem_cgroup_swap_statistics(mem, true);
@@ -1909,7 +1896,7 @@
 	mz = page_cgroup_zoneinfo(pc);
 	unlock_page_cgroup(pc);
  
-	if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+	if (mem_cgroup_soft_limit_check(mem))
 		mem_cgroup_update_tree(mem, page);
 	/* at swapout, this memcg will be accessed to record to swap */
 	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
@@ -1987,7 +1974,7 @@
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
 		if (!mem_cgroup_is_root(memcg))
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 		mem_cgroup_swap_statistics(memcg, false);
 		mem_cgroup_put(memcg);
 	}
...	...	@@ -114,8 +114,7 @@
114	114	int __must_check res_counter_charge_locked(struct res_counter *counter,
115	115	unsigned long val);
116	116	int __must_check res_counter_charge(struct res_counter *counter,
117		- unsigned long val, struct res_counter **limit_fail_at,
118		- struct res_counter **soft_limit_at);
	117	+ unsigned long val, struct res_counter **limit_fail_at);
119	118
120	119	/*
121	120	* uncharge - tell that some portion of the resource is released
...	...	@@ -128,8 +127,7 @@
128	127	*/
129	128
130	129	void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
131		-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
132		- bool *was_soft_limit_excess);
	130	+void res_counter_uncharge(struct res_counter *counter, unsigned long val);
133	131
134	132	static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
135	133	{
...	...	@@ -37,27 +37,17 @@
37	37	}
38	38
39	39	int res_counter_charge(struct res_counter *counter, unsigned long val,
40		- struct res_counter **limit_fail_at,
41		- struct res_counter **soft_limit_fail_at)
	40	+ struct res_counter **limit_fail_at)
42	41	{
43	42	int ret;
44	43	unsigned long flags;
45	44	struct res_counter c, u;
46	45
47	46	*limit_fail_at = NULL;
48		- if (soft_limit_fail_at)
49		- *soft_limit_fail_at = NULL;
50	47	local_irq_save(flags);
51	48	for (c = counter; c != NULL; c = c->parent) {
52	49	spin_lock(&c->lock);
53	50	ret = res_counter_charge_locked(c, val);
54		- /*
55		- * With soft limits, we return the highest ancestor
56		- * that exceeds its soft limit
57		- */
58		- if (soft_limit_fail_at &&
59		- !res_counter_soft_limit_check_locked(c))
60		- *soft_limit_fail_at = c;
61	51	spin_unlock(&c->lock);
62	52	if (ret < 0) {
63	53	*limit_fail_at = c;
...	...	@@ -85,8 +75,7 @@
85	75	counter->usage -= val;
86	76	}
87	77
88		-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
89		- bool *was_soft_limit_excess)
	78	+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
90	79	{
91	80	unsigned long flags;
92	81	struct res_counter *c;
...	...	@@ -94,9 +83,6 @@
94	83	local_irq_save(flags);
95	84	for (c = counter; c != NULL; c = c->parent) {
96	85	spin_lock(&c->lock);
97		- if (was_soft_limit_excess)
98		- *was_soft_limit_excess =
99		- !res_counter_soft_limit_check_locked(c);
100	86	res_counter_uncharge_locked(c, val);
101	87	spin_unlock(&c->lock);
102	88	}
...	...	@@ -353,16 +353,6 @@
353	353	}
354	354
355	355	static void
356		-mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
357		- struct mem_cgroup_per_zone *mz,
358		- struct mem_cgroup_tree_per_zone *mctz)
359		-{
360		- spin_lock(&mctz->lock);
361		- __mem_cgroup_insert_exceeded(mem, mz, mctz);
362		- spin_unlock(&mctz->lock);
363		-}
364		-
365		-static void
366	356	mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
367	357	struct mem_cgroup_per_zone *mz,
368	358	struct mem_cgroup_tree_per_zone *mctz)
369	359
370	360
371	361
372	362
...	...	@@ -392,35 +382,41 @@
392	382
393	383	static void mem_cgroup_update_tree(struct mem_cgroup mem, struct page page)
394	384	{
395		- unsigned long long prev_usage_in_excess, new_usage_in_excess;
396		- bool updated_tree = false;
	385	+ unsigned long long new_usage_in_excess;
397	386	struct mem_cgroup_per_zone *mz;
398	387	struct mem_cgroup_tree_per_zone *mctz;
399		-
400		- mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
	388	+ int nid = page_to_nid(page);
	389	+ int zid = page_zonenum(page);
401	390	mctz = soft_limit_tree_from_page(page);
402	391
403	392	/*
404		- * We do updates in lazy mode, mem's are removed
405		- * lazily from the per-zone, per-node rb tree
	393	+ * Necessary to update all ancestors when hierarchy is used.
	394	+ * because their event counter is not touched.
406	395	*/
407		- prev_usage_in_excess = mz->usage_in_excess;
408		-
409		- new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
410		- if (prev_usage_in_excess) {
411		- mem_cgroup_remove_exceeded(mem, mz, mctz);
412		- updated_tree = true;
	396	+ for (; mem; mem = parent_mem_cgroup(mem)) {
	397	+ mz = mem_cgroup_zoneinfo(mem, nid, zid);
	398	+ new_usage_in_excess =
	399	+ res_counter_soft_limit_excess(&mem->res);
	400	+ /*
	401	+ * We have to update the tree if mz is on RB-tree or
	402	+ * mem is over its softlimit.
	403	+ */
	404	+ if (new_usage_in_excess \|\| mz->on_tree) {
	405	+ spin_lock(&mctz->lock);
	406	+ /* if on-tree, remove it */
	407	+ if (mz->on_tree)
	408	+ __mem_cgroup_remove_exceeded(mem, mz, mctz);
	409	+ /*
	410	+ * if over soft limit, insert again. mz->usage_in_excess
	411	+ * will be updated properly.
	412	+ */
	413	+ if (new_usage_in_excess)
	414	+ __mem_cgroup_insert_exceeded(mem, mz, mctz);
	415	+ else
	416	+ mz->usage_in_excess = 0;
	417	+ spin_unlock(&mctz->lock);
	418	+ }
413	419	}
414		- if (!new_usage_in_excess)
415		- goto done;
416		- mem_cgroup_insert_exceeded(mem, mz, mctz);
417		-
418		-done:
419		- if (updated_tree) {
420		- spin_lock(&mctz->lock);
421		- mz->usage_in_excess = new_usage_in_excess;
422		- spin_unlock(&mctz->lock);
423		- }
424	420	}
425	421
426	422	static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
427	423
...	...	@@ -1271,9 +1267,9 @@
1271	1267	gfp_t gfp_mask, struct mem_cgroup **memcg,
1272	1268	bool oom, struct page *page)
1273	1269	{
1274		- struct mem_cgroup mem, mem_over_limit, *mem_over_soft_limit;
	1270	+ struct mem_cgroup mem, mem_over_limit;
1275	1271	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1276		- struct res_counter fail_res, soft_fail_res = NULL;
	1272	+ struct res_counter *fail_res;
1277	1273
1278	1274	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
1279	1275	/* Don't account this! */
1280	1276
1281	1277
...	...	@@ -1305,17 +1301,16 @@
1305	1301
1306	1302	if (mem_cgroup_is_root(mem))
1307	1303	goto done;
1308		- ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
1309		- &soft_fail_res);
	1304	+ ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
1310	1305	if (likely(!ret)) {
1311	1306	if (!do_swap_account)
1312	1307	break;
1313	1308	ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
1314		- &fail_res, NULL);
	1309	+ &fail_res);
1315	1310	if (likely(!ret))
1316	1311	break;
1317	1312	/* mem+swap counter fails */
1318		- res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
	1313	+ res_counter_uncharge(&mem->res, PAGE_SIZE);
1319	1314	flags \|= MEM_CGROUP_RECLAIM_NOSWAP;
1320	1315	mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1321	1316	memsw);
1322	1317
...	...	@@ -1354,16 +1349,11 @@
1354	1349	}
1355	1350	}
1356	1351	/*
1357		- * Insert just the ancestor, we should trickle down to the correct
1358		- * cgroup for reclaim, since the other nodes will be below their
1359		- * soft limit
	1352	+ * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
	1353	+ * if they exceeds softlimit.
1360	1354	*/
1361		- if (soft_fail_res) {
1362		- mem_over_soft_limit =
1363		- mem_cgroup_from_res_counter(soft_fail_res, res);
1364		- if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
1365		- mem_cgroup_update_tree(mem_over_soft_limit, page);
1366		- }
	1355	+ if (mem_cgroup_soft_limit_check(mem))
	1356	+ mem_cgroup_update_tree(mem, page);
1367	1357	done:
1368	1358	return 0;
1369	1359	nomem:
1370	1360
...	...	@@ -1438,10 +1428,9 @@
1438	1428	if (unlikely(PageCgroupUsed(pc))) {
1439	1429	unlock_page_cgroup(pc);
1440	1430	if (!mem_cgroup_is_root(mem)) {
1441		- res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
	1431	+ res_counter_uncharge(&mem->res, PAGE_SIZE);
1442	1432	if (do_swap_account)
1443		- res_counter_uncharge(&mem->memsw, PAGE_SIZE,
1444		- NULL);
	1433	+ res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1445	1434	}
1446	1435	css_put(&mem->css);
1447	1436	return;
...	...	@@ -1520,7 +1509,7 @@
1520	1509	goto out;
1521	1510
1522	1511	if (!mem_cgroup_is_root(from))
1523		- res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
	1512	+ res_counter_uncharge(&from->res, PAGE_SIZE);
1524	1513	mem_cgroup_charge_statistics(from, pc, false);
1525	1514
1526	1515	page = pc->page;
...	...	@@ -1540,7 +1529,7 @@
1540	1529	}
1541	1530
1542	1531	if (do_swap_account && !mem_cgroup_is_root(from))
1543		- res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
	1532	+ res_counter_uncharge(&from->memsw, PAGE_SIZE);
1544	1533	css_put(&from->css);
1545	1534
1546	1535	css_get(&to->css);
1547	1536
...	...	@@ -1611,9 +1600,9 @@
1611	1600	css_put(&parent->css);
1612	1601	/* uncharge if move fails */
1613	1602	if (!mem_cgroup_is_root(parent)) {
1614		- res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
	1603	+ res_counter_uncharge(&parent->res, PAGE_SIZE);
1615	1604	if (do_swap_account)
1616		- res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
	1605	+ res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1617	1606	}
1618	1607	return ret;
1619	1608	}
...	...	@@ -1804,8 +1793,7 @@
1804	1793	* calling css_tryget
1805	1794	*/
1806	1795	if (!mem_cgroup_is_root(memcg))
1807		- res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
1808		- NULL);
	1796	+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1809	1797	mem_cgroup_swap_statistics(memcg, false);
1810	1798	mem_cgroup_put(memcg);
1811	1799	}
1812	1800
...	...	@@ -1832,9 +1820,9 @@
1832	1820	if (!mem)
1833	1821	return;
1834	1822	if (!mem_cgroup_is_root(mem)) {
1835		- res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
	1823	+ res_counter_uncharge(&mem->res, PAGE_SIZE);
1836	1824	if (do_swap_account)
1837		- res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
	1825	+ res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1838	1826	}
1839	1827	css_put(&mem->css);
1840	1828	}
...	...	@@ -1849,7 +1837,6 @@
1849	1837	struct page_cgroup *pc;
1850	1838	struct mem_cgroup *mem = NULL;
1851	1839	struct mem_cgroup_per_zone *mz;
1852		- bool soft_limit_excess = false;
1853	1840
1854	1841	if (mem_cgroup_disabled())
1855	1842	return NULL;
1856	1843
...	...	@@ -1889,10 +1876,10 @@
1889	1876	}
1890	1877
1891	1878	if (!mem_cgroup_is_root(mem)) {
1892		- res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
	1879	+ res_counter_uncharge(&mem->res, PAGE_SIZE);
1893	1880	if (do_swap_account &&
1894	1881	(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1895		- res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
	1882	+ res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1896	1883	}
1897	1884	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1898	1885	mem_cgroup_swap_statistics(mem, true);
...	...	@@ -1909,7 +1896,7 @@
1909	1896	mz = page_cgroup_zoneinfo(pc);
1910	1897	unlock_page_cgroup(pc);
1911	1898
1912		- if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
	1899	+ if (mem_cgroup_soft_limit_check(mem))
1913	1900	mem_cgroup_update_tree(mem, page);
1914	1901	/* at swapout, this memcg will be accessed to record to swap */
1915	1902	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
...	...	@@ -1987,7 +1974,7 @@
1987	1974	* This memcg can be obsolete one. We avoid calling css_tryget
1988	1975	*/
1989	1976	if (!mem_cgroup_is_root(memcg))
1990		- res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
	1977	+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1991	1978	mem_cgroup_swap_statistics(memcg, false);
1992	1979	mem_cgroup_put(memcg);
1993	1980	}