Commit c9f01245b6a7d77d17deaa71af10f6aca14fa24e
Committed by
Linus Torvalds
1 parent
7b0d44fa49
Exists in
master
and in
6 other branches
oom: remove oom_disable_count
This removes mm->oom_disable_count entirely since it's unnecessary and currently buggy. The counter was intended to be per-process but it's currently decremented in the exit path for each thread that exits, causing it to underflow. The count was originally intended to prevent oom killing threads that share memory with threads that cannot be killed since it doesn't lead to future memory freeing. The counter could be fixed to represent all threads sharing the same mm, but it's better to remove the count since: - it is possible that the OOM_DISABLE thread sharing memory with the victim is waiting on that thread to exit and will actually cause future memory freeing, and - there is no guarantee that a thread is disabled from oom killing just because another thread sharing its mm is oom disabled. Signed-off-by: David Rientjes <rientjes@google.com> Reported-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 6 changed files with 6 additions and 49 deletions Side-by-side Diff
fs/exec.c
... | ... | @@ -841,10 +841,6 @@ |
841 | 841 | tsk->mm = mm; |
842 | 842 | tsk->active_mm = mm; |
843 | 843 | activate_mm(active_mm, mm); |
844 | - if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | |
845 | - atomic_dec(&old_mm->oom_disable_count); | |
846 | - atomic_inc(&tsk->mm->oom_disable_count); | |
847 | - } | |
848 | 844 | task_unlock(tsk); |
849 | 845 | arch_pick_mmap_layout(mm); |
850 | 846 | if (old_mm) { |
fs/proc/base.c
... | ... | @@ -1107,13 +1107,6 @@ |
1107 | 1107 | goto err_sighand; |
1108 | 1108 | } |
1109 | 1109 | |
1110 | - if (oom_adjust != task->signal->oom_adj) { | |
1111 | - if (oom_adjust == OOM_DISABLE) | |
1112 | - atomic_inc(&task->mm->oom_disable_count); | |
1113 | - if (task->signal->oom_adj == OOM_DISABLE) | |
1114 | - atomic_dec(&task->mm->oom_disable_count); | |
1115 | - } | |
1116 | - | |
1117 | 1110 | /* |
1118 | 1111 | * Warn that /proc/pid/oom_adj is deprecated, see |
1119 | 1112 | * Documentation/feature-removal-schedule.txt. |
... | ... | @@ -1215,12 +1208,6 @@ |
1215 | 1208 | goto err_sighand; |
1216 | 1209 | } |
1217 | 1210 | |
1218 | - if (oom_score_adj != task->signal->oom_score_adj) { | |
1219 | - if (oom_score_adj == OOM_SCORE_ADJ_MIN) | |
1220 | - atomic_inc(&task->mm->oom_disable_count); | |
1221 | - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | |
1222 | - atomic_dec(&task->mm->oom_disable_count); | |
1223 | - } | |
1224 | 1211 | task->signal->oom_score_adj = oom_score_adj; |
1225 | 1212 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
1226 | 1213 | task->signal->oom_score_adj_min = oom_score_adj; |
include/linux/mm_types.h
... | ... | @@ -336,9 +336,6 @@ |
336 | 336 | unsigned int token_priority; |
337 | 337 | unsigned int last_interval; |
338 | 338 | |
339 | - /* How many tasks sharing this mm are OOM_DISABLE */ | |
340 | - atomic_t oom_disable_count; | |
341 | - | |
342 | 339 | unsigned long flags; /* Must use atomic bitops to access the bits */ |
343 | 340 | |
344 | 341 | struct core_state *core_state; /* coredumping support */ |
kernel/exit.c
... | ... | @@ -681,8 +681,6 @@ |
681 | 681 | enter_lazy_tlb(mm, current); |
682 | 682 | /* We don't want this task to be frozen prematurely */ |
683 | 683 | clear_freeze_flag(tsk); |
684 | - if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | |
685 | - atomic_dec(&mm->oom_disable_count); | |
686 | 684 | task_unlock(tsk); |
687 | 685 | mm_update_next_owner(mm); |
688 | 686 | mmput(mm); |
kernel/fork.c
... | ... | @@ -501,7 +501,6 @@ |
501 | 501 | mm->cached_hole_size = ~0UL; |
502 | 502 | mm_init_aio(mm); |
503 | 503 | mm_init_owner(mm, p); |
504 | - atomic_set(&mm->oom_disable_count, 0); | |
505 | 504 | |
506 | 505 | if (likely(!mm_alloc_pgd(mm))) { |
507 | 506 | mm->def_flags = 0; |
... | ... | @@ -816,8 +815,6 @@ |
816 | 815 | /* Initializing for Swap token stuff */ |
817 | 816 | mm->token_priority = 0; |
818 | 817 | mm->last_interval = 0; |
819 | - if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | |
820 | - atomic_inc(&mm->oom_disable_count); | |
821 | 818 | |
822 | 819 | tsk->mm = mm; |
823 | 820 | tsk->active_mm = mm; |
824 | 821 | |
... | ... | @@ -1391,13 +1388,8 @@ |
1391 | 1388 | bad_fork_cleanup_namespaces: |
1392 | 1389 | exit_task_namespaces(p); |
1393 | 1390 | bad_fork_cleanup_mm: |
1394 | - if (p->mm) { | |
1395 | - task_lock(p); | |
1396 | - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | |
1397 | - atomic_dec(&p->mm->oom_disable_count); | |
1398 | - task_unlock(p); | |
1391 | + if (p->mm) | |
1399 | 1392 | mmput(p->mm); |
1400 | - } | |
1401 | 1393 | bad_fork_cleanup_signal: |
1402 | 1394 | if (!(clone_flags & CLONE_THREAD)) |
1403 | 1395 | free_signal_struct(p->signal); |
mm/oom_kill.c
... | ... | @@ -54,13 +54,7 @@ |
54 | 54 | |
55 | 55 | spin_lock_irq(&sighand->siglock); |
56 | 56 | old_val = current->signal->oom_score_adj; |
57 | - if (new_val != old_val) { | |
58 | - if (new_val == OOM_SCORE_ADJ_MIN) | |
59 | - atomic_inc(¤t->mm->oom_disable_count); | |
60 | - else if (old_val == OOM_SCORE_ADJ_MIN) | |
61 | - atomic_dec(¤t->mm->oom_disable_count); | |
62 | - current->signal->oom_score_adj = new_val; | |
63 | - } | |
57 | + current->signal->oom_score_adj = new_val; | |
64 | 58 | spin_unlock_irq(&sighand->siglock); |
65 | 59 | |
66 | 60 | return old_val; |
... | ... | @@ -173,16 +167,6 @@ |
173 | 167 | return 0; |
174 | 168 | |
175 | 169 | /* |
176 | - * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN | |
177 | - * so the entire heuristic doesn't need to be executed for something | |
178 | - * that cannot be killed. | |
179 | - */ | |
180 | - if (atomic_read(&p->mm->oom_disable_count)) { | |
181 | - task_unlock(p); | |
182 | - return 0; | |
183 | - } | |
184 | - | |
185 | - /* | |
186 | 170 | * The memory controller may have a limit of 0 bytes, so avoid a divide |
187 | 171 | * by zero, if necessary. |
188 | 172 | */ |
... | ... | @@ -451,6 +435,9 @@ |
451 | 435 | for_each_process(q) |
452 | 436 | if (q->mm == mm && !same_thread_group(q, p) && |
453 | 437 | !(q->flags & PF_KTHREAD)) { |
438 | + if (q->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | |
439 | + continue; | |
440 | + | |
454 | 441 | task_lock(q); /* Protect ->comm from prctl() */ |
455 | 442 | pr_err("Kill process %d (%s) sharing same memory\n", |
456 | 443 | task_pid_nr(q), q->comm); |
... | ... | @@ -727,7 +714,7 @@ |
727 | 714 | read_lock(&tasklist_lock); |
728 | 715 | if (sysctl_oom_kill_allocating_task && |
729 | 716 | !oom_unkillable_task(current, NULL, nodemask) && |
730 | - current->mm && !atomic_read(¤t->mm->oom_disable_count)) { | |
717 | + current->mm) { | |
731 | 718 | /* |
732 | 719 | * oom_kill_process() needs tasklist_lock held. If it returns |
733 | 720 | * non-zero, current could not be killed so we must fallback to |