Commit 2bd9bb206b338888b226e70139a25a67d10007f0

Authored by KAMEZAWA Hiroyuki
Committed by Linus Torvalds
1 parent 4b53433468

memcg: clean up waiting move acct

Now, for checking a memcg is under task-account-moving, we do css_tryget()
against mc.to and mc.from.  But this is just complicating things.  This
patch makes the check easier.

This patch adds a spinlock to move_charge_struct and guard modification of
mc.to and mc.from.  By this, we don't have to think about complicated
races arount this not-critical path.

[balbir@linux.vnet.ibm.com: don't crash on a null memcg being passed]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 29 additions and 22 deletions Side-by-side Diff

... ... @@ -268,6 +268,7 @@
268 268  
269 269 /* "mc" and its members are protected by cgroup_mutex */
270 270 static struct move_charge_struct {
  271 + spinlock_t lock; /* for from, to, moving_task */
271 272 struct mem_cgroup *from;
272 273 struct mem_cgroup *to;
273 274 unsigned long precharge;
... ... @@ -276,6 +277,7 @@
276 277 struct task_struct *moving_task; /* a task moving charges */
277 278 wait_queue_head_t waitq; /* a waitq for other context */
278 279 } mc = {
  280 + .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
279 281 .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
280 282 };
281 283  
282 284  
... ... @@ -1051,26 +1053,24 @@
1051 1053  
1052 1054 static bool mem_cgroup_under_move(struct mem_cgroup *mem)
1053 1055 {
1054   - struct mem_cgroup *from = mc.from;
1055   - struct mem_cgroup *to = mc.to;
  1056 + struct mem_cgroup *from;
  1057 + struct mem_cgroup *to;
1056 1058 bool ret = false;
1057   -
1058   - if (from == mem || to == mem)
1059   - return true;
1060   -
1061   - if (!from || !to || !mem->use_hierarchy)
1062   - return false;
1063   -
1064   - rcu_read_lock();
1065   - if (css_tryget(&from->css)) {
1066   - ret = css_is_ancestor(&from->css, &mem->css);
1067   - css_put(&from->css);
1068   - }
1069   - if (!ret && css_tryget(&to->css)) {
1070   - ret = css_is_ancestor(&to->css, &mem->css);
1071   - css_put(&to->css);
1072   - }
1073   - rcu_read_unlock();
  1059 + /*
  1060 + * Unlike task_move routines, we access mc.to, mc.from not under
  1061 + * mutual exclusion by cgroup_mutex. Here, we take spinlock instead.
  1062 + */
  1063 + spin_lock(&mc.lock);
  1064 + from = mc.from;
  1065 + to = mc.to;
  1066 + if (!from)
  1067 + goto unlock;
  1068 + if (from == mem || to == mem
  1069 + || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css))
  1070 + || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css)))
  1071 + ret = true;
  1072 +unlock:
  1073 + spin_unlock(&mc.lock);
1074 1074 return ret;
1075 1075 }
1076 1076  
... ... @@ -1406,7 +1406,7 @@
1406 1406  
1407 1407 static void memcg_oom_recover(struct mem_cgroup *mem)
1408 1408 {
1409   - if (atomic_read(&mem->oom_lock))
  1409 + if (mem && atomic_read(&mem->oom_lock))
1410 1410 memcg_wakeup_oom(mem);
1411 1411 }
1412 1412  
1413 1413  
... ... @@ -4441,11 +4441,13 @@
4441 4441  
4442 4442 static void mem_cgroup_clear_mc(void)
4443 4443 {
  4444 + struct mem_cgroup *from = mc.from;
  4445 + struct mem_cgroup *to = mc.to;
  4446 +
4444 4447 /* we must uncharge all the leftover precharges from mc.to */
4445 4448 if (mc.precharge) {
4446 4449 __mem_cgroup_cancel_charge(mc.to, mc.precharge);
4447 4450 mc.precharge = 0;
4448   - memcg_oom_recover(mc.to);
4449 4451 }
4450 4452 /*
4451 4453 * we didn't uncharge from mc.from at mem_cgroup_move_account(), so
... ... @@ -4454,7 +4456,6 @@
4454 4456 if (mc.moved_charge) {
4455 4457 __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
4456 4458 mc.moved_charge = 0;
4457   - memcg_oom_recover(mc.from);
4458 4459 }
4459 4460 /* we must fixup refcnts and charges */
4460 4461 if (mc.moved_swap) {
4461 4462  
... ... @@ -4479,9 +4480,13 @@
4479 4480  
4480 4481 mc.moved_swap = 0;
4481 4482 }
  4483 + spin_lock(&mc.lock);
4482 4484 mc.from = NULL;
4483 4485 mc.to = NULL;
4484 4486 mc.moving_task = NULL;
  4487 + spin_unlock(&mc.lock);
  4488 + memcg_oom_recover(from);
  4489 + memcg_oom_recover(to);
4485 4490 wake_up_all(&mc.waitq);
4486 4491 }
4487 4492  
4488 4493  
... ... @@ -4510,12 +4515,14 @@
4510 4515 VM_BUG_ON(mc.moved_charge);
4511 4516 VM_BUG_ON(mc.moved_swap);
4512 4517 VM_BUG_ON(mc.moving_task);
  4518 + spin_lock(&mc.lock);
4513 4519 mc.from = from;
4514 4520 mc.to = mem;
4515 4521 mc.precharge = 0;
4516 4522 mc.moved_charge = 0;
4517 4523 mc.moved_swap = 0;
4518 4524 mc.moving_task = current;
  4525 + spin_unlock(&mc.lock);
4519 4526  
4520 4527 ret = mem_cgroup_precharge_mc(mm);
4521 4528 if (ret)