Commit 2bd9bb206b338888b226e70139a25a67d10007f0
Committed by
Linus Torvalds
1 parent
4b53433468
Exists in
master
and in
4 other branches
memcg: clean up waiting move acct
Now, for checking a memcg is under task-account-moving, we do css_tryget() against mc.to and mc.from. But this is just complicating things. This patch makes the check easier. This patch adds a spinlock to move_charge_struct and guard modification of mc.to and mc.from. By this, we don't have to think about complicated races arount this not-critical path. [balbir@linux.vnet.ibm.com: don't crash on a null memcg being passed] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 29 additions and 22 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -268,6 +268,7 @@ |
268 | 268 | |
269 | 269 | /* "mc" and its members are protected by cgroup_mutex */ |
270 | 270 | static struct move_charge_struct { |
271 | + spinlock_t lock; /* for from, to, moving_task */ | |
271 | 272 | struct mem_cgroup *from; |
272 | 273 | struct mem_cgroup *to; |
273 | 274 | unsigned long precharge; |
... | ... | @@ -276,6 +277,7 @@ |
276 | 277 | struct task_struct *moving_task; /* a task moving charges */ |
277 | 278 | wait_queue_head_t waitq; /* a waitq for other context */ |
278 | 279 | } mc = { |
280 | + .lock = __SPIN_LOCK_UNLOCKED(mc.lock), | |
279 | 281 | .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), |
280 | 282 | }; |
281 | 283 | |
282 | 284 | |
... | ... | @@ -1051,26 +1053,24 @@ |
1051 | 1053 | |
1052 | 1054 | static bool mem_cgroup_under_move(struct mem_cgroup *mem) |
1053 | 1055 | { |
1054 | - struct mem_cgroup *from = mc.from; | |
1055 | - struct mem_cgroup *to = mc.to; | |
1056 | + struct mem_cgroup *from; | |
1057 | + struct mem_cgroup *to; | |
1056 | 1058 | bool ret = false; |
1057 | - | |
1058 | - if (from == mem || to == mem) | |
1059 | - return true; | |
1060 | - | |
1061 | - if (!from || !to || !mem->use_hierarchy) | |
1062 | - return false; | |
1063 | - | |
1064 | - rcu_read_lock(); | |
1065 | - if (css_tryget(&from->css)) { | |
1066 | - ret = css_is_ancestor(&from->css, &mem->css); | |
1067 | - css_put(&from->css); | |
1068 | - } | |
1069 | - if (!ret && css_tryget(&to->css)) { | |
1070 | - ret = css_is_ancestor(&to->css, &mem->css); | |
1071 | - css_put(&to->css); | |
1072 | - } | |
1073 | - rcu_read_unlock(); | |
1059 | + /* | |
1060 | + * Unlike task_move routines, we access mc.to, mc.from not under | |
1061 | + * mutual exclusion by cgroup_mutex. Here, we take spinlock instead. | |
1062 | + */ | |
1063 | + spin_lock(&mc.lock); | |
1064 | + from = mc.from; | |
1065 | + to = mc.to; | |
1066 | + if (!from) | |
1067 | + goto unlock; | |
1068 | + if (from == mem || to == mem | |
1069 | + || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css)) | |
1070 | + || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css))) | |
1071 | + ret = true; | |
1072 | +unlock: | |
1073 | + spin_unlock(&mc.lock); | |
1074 | 1074 | return ret; |
1075 | 1075 | } |
1076 | 1076 | |
... | ... | @@ -1406,7 +1406,7 @@ |
1406 | 1406 | |
1407 | 1407 | static void memcg_oom_recover(struct mem_cgroup *mem) |
1408 | 1408 | { |
1409 | - if (atomic_read(&mem->oom_lock)) | |
1409 | + if (mem && atomic_read(&mem->oom_lock)) | |
1410 | 1410 | memcg_wakeup_oom(mem); |
1411 | 1411 | } |
1412 | 1412 | |
1413 | 1413 | |
... | ... | @@ -4441,11 +4441,13 @@ |
4441 | 4441 | |
4442 | 4442 | static void mem_cgroup_clear_mc(void) |
4443 | 4443 | { |
4444 | + struct mem_cgroup *from = mc.from; | |
4445 | + struct mem_cgroup *to = mc.to; | |
4446 | + | |
4444 | 4447 | /* we must uncharge all the leftover precharges from mc.to */ |
4445 | 4448 | if (mc.precharge) { |
4446 | 4449 | __mem_cgroup_cancel_charge(mc.to, mc.precharge); |
4447 | 4450 | mc.precharge = 0; |
4448 | - memcg_oom_recover(mc.to); | |
4449 | 4451 | } |
4450 | 4452 | /* |
4451 | 4453 | * we didn't uncharge from mc.from at mem_cgroup_move_account(), so |
... | ... | @@ -4454,7 +4456,6 @@ |
4454 | 4456 | if (mc.moved_charge) { |
4455 | 4457 | __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); |
4456 | 4458 | mc.moved_charge = 0; |
4457 | - memcg_oom_recover(mc.from); | |
4458 | 4459 | } |
4459 | 4460 | /* we must fixup refcnts and charges */ |
4460 | 4461 | if (mc.moved_swap) { |
4461 | 4462 | |
... | ... | @@ -4479,9 +4480,13 @@ |
4479 | 4480 | |
4480 | 4481 | mc.moved_swap = 0; |
4481 | 4482 | } |
4483 | + spin_lock(&mc.lock); | |
4482 | 4484 | mc.from = NULL; |
4483 | 4485 | mc.to = NULL; |
4484 | 4486 | mc.moving_task = NULL; |
4487 | + spin_unlock(&mc.lock); | |
4488 | + memcg_oom_recover(from); | |
4489 | + memcg_oom_recover(to); | |
4485 | 4490 | wake_up_all(&mc.waitq); |
4486 | 4491 | } |
4487 | 4492 | |
4488 | 4493 | |
... | ... | @@ -4510,12 +4515,14 @@ |
4510 | 4515 | VM_BUG_ON(mc.moved_charge); |
4511 | 4516 | VM_BUG_ON(mc.moved_swap); |
4512 | 4517 | VM_BUG_ON(mc.moving_task); |
4518 | + spin_lock(&mc.lock); | |
4513 | 4519 | mc.from = from; |
4514 | 4520 | mc.to = mem; |
4515 | 4521 | mc.precharge = 0; |
4516 | 4522 | mc.moved_charge = 0; |
4517 | 4523 | mc.moved_swap = 0; |
4518 | 4524 | mc.moving_task = current; |
4525 | + spin_unlock(&mc.lock); | |
4519 | 4526 | |
4520 | 4527 | ret = mem_cgroup_precharge_mc(mm); |
4521 | 4528 | if (ret) |