Commit 7f4d454dee2e0bdd21bafd413d1c53e443a26540
Committed by
Linus Torvalds
1 parent
a5e924f5f8
Exists in
master
and in
4 other branches
memcg: avoid deadlock caused by race between oom and cpuset_attach
mpol_rebind_mm(), which can be called from cpuset_attach(), does down_write(mm->mmap_sem). This means down_write(mm->mmap_sem) can be called under cgroup_mutex. OTOH, page fault path does down_read(mm->mmap_sem) and calls mem_cgroup_try_charge_xxx(), which may eventually calls mem_cgroup_out_of_memory(). And mem_cgroup_out_of_memory() calls cgroup_lock(). This means cgroup_lock() can be called under down_read(mm->mmap_sem). If those two paths race, deadlock can happen. This patch avoid this deadlock by: - remove cgroup_lock() from mem_cgroup_out_of_memory(). - define new mutex (memcg_tasklist) and serialize mem_cgroup_move_task() (->attach handler of memory cgroup) and mem_cgroup_out_of_memory. Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 5 additions and 2 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -51,6 +51,7 @@ |
51 | 51 | #define do_swap_account (0) |
52 | 52 | #endif |
53 | 53 | |
54 | +static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ | |
54 | 55 | |
55 | 56 | /* |
56 | 57 | * Statistics for memory cgroup. |
57 | 58 | |
... | ... | @@ -827,7 +828,9 @@ |
827 | 828 | |
828 | 829 | if (!nr_retries--) { |
829 | 830 | if (oom) { |
831 | + mutex_lock(&memcg_tasklist); | |
830 | 832 | mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); |
833 | + mutex_unlock(&memcg_tasklist); | |
831 | 834 | mem_over_limit->last_oom_jiffies = jiffies; |
832 | 835 | } |
833 | 836 | goto nomem; |
834 | 837 | |
... | ... | @@ -2211,10 +2214,12 @@ |
2211 | 2214 | struct cgroup *old_cont, |
2212 | 2215 | struct task_struct *p) |
2213 | 2216 | { |
2217 | + mutex_lock(&memcg_tasklist); | |
2214 | 2218 | /* |
2215 | 2219 | * FIXME: It's better to move charges of this process from old |
2216 | 2220 | * memcg to new memcg. But it's just on TODO-List now. |
2217 | 2221 | */ |
2222 | + mutex_unlock(&memcg_tasklist); | |
2218 | 2223 | } |
2219 | 2224 | |
2220 | 2225 | struct cgroup_subsys mem_cgroup_subsys = { |
mm/oom_kill.c
... | ... | @@ -429,7 +429,6 @@ |
429 | 429 | unsigned long points = 0; |
430 | 430 | struct task_struct *p; |
431 | 431 | |
432 | - cgroup_lock(); | |
433 | 432 | read_lock(&tasklist_lock); |
434 | 433 | retry: |
435 | 434 | p = select_bad_process(&points, mem); |
... | ... | @@ -444,7 +443,6 @@ |
444 | 443 | goto retry; |
445 | 444 | out: |
446 | 445 | read_unlock(&tasklist_lock); |
447 | - cgroup_unlock(); | |
448 | 446 | } |
449 | 447 | #endif |
450 | 448 |