Commit c9f01245b6a7d77d17deaa71af10f6aca14fa24e

Authored by David Rientjes
Committed by Linus Torvalds
1 parent 7b0d44fa49

oom: remove oom_disable_count

This removes mm->oom_disable_count entirely since it's unnecessary and
currently buggy.  The counter was intended to be per-process but it's
currently decremented in the exit path for each thread that exits, causing
it to underflow.

The count was originally intended to prevent oom killing threads that
share memory with threads that cannot be killed since it doesn't lead to
future memory freeing.  The counter could be fixed to represent all
threads sharing the same mm, but it's better to remove the count since:

 - it is possible that the OOM_DISABLE thread sharing memory with the
   victim is waiting on that thread to exit and will actually cause
   future memory freeing, and

 - there is no guarantee that a thread is disabled from oom killing just
   because another thread sharing its mm is oom disabled.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 6 changed files with 6 additions and 49 deletions Side-by-side Diff

... ... @@ -841,10 +841,6 @@
841 841 tsk->mm = mm;
842 842 tsk->active_mm = mm;
843 843 activate_mm(active_mm, mm);
844   - if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
845   - atomic_dec(&old_mm->oom_disable_count);
846   - atomic_inc(&tsk->mm->oom_disable_count);
847   - }
848 844 task_unlock(tsk);
849 845 arch_pick_mmap_layout(mm);
850 846 if (old_mm) {
... ... @@ -1107,13 +1107,6 @@
1107 1107 goto err_sighand;
1108 1108 }
1109 1109  
1110   - if (oom_adjust != task->signal->oom_adj) {
1111   - if (oom_adjust == OOM_DISABLE)
1112   - atomic_inc(&task->mm->oom_disable_count);
1113   - if (task->signal->oom_adj == OOM_DISABLE)
1114   - atomic_dec(&task->mm->oom_disable_count);
1115   - }
1116   -
1117 1110 /*
1118 1111 * Warn that /proc/pid/oom_adj is deprecated, see
1119 1112 * Documentation/feature-removal-schedule.txt.
... ... @@ -1215,12 +1208,6 @@
1215 1208 goto err_sighand;
1216 1209 }
1217 1210  
1218   - if (oom_score_adj != task->signal->oom_score_adj) {
1219   - if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1220   - atomic_inc(&task->mm->oom_disable_count);
1221   - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1222   - atomic_dec(&task->mm->oom_disable_count);
1223   - }
1224 1211 task->signal->oom_score_adj = oom_score_adj;
1225 1212 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1226 1213 task->signal->oom_score_adj_min = oom_score_adj;
include/linux/mm_types.h
... ... @@ -336,9 +336,6 @@
336 336 unsigned int token_priority;
337 337 unsigned int last_interval;
338 338  
339   - /* How many tasks sharing this mm are OOM_DISABLE */
340   - atomic_t oom_disable_count;
341   -
342 339 unsigned long flags; /* Must use atomic bitops to access the bits */
343 340  
344 341 struct core_state *core_state; /* coredumping support */
... ... @@ -681,8 +681,6 @@
681 681 enter_lazy_tlb(mm, current);
682 682 /* We don't want this task to be frozen prematurely */
683 683 clear_freeze_flag(tsk);
684   - if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
685   - atomic_dec(&mm->oom_disable_count);
686 684 task_unlock(tsk);
687 685 mm_update_next_owner(mm);
688 686 mmput(mm);
... ... @@ -501,7 +501,6 @@
501 501 mm->cached_hole_size = ~0UL;
502 502 mm_init_aio(mm);
503 503 mm_init_owner(mm, p);
504   - atomic_set(&mm->oom_disable_count, 0);
505 504  
506 505 if (likely(!mm_alloc_pgd(mm))) {
507 506 mm->def_flags = 0;
... ... @@ -816,8 +815,6 @@
816 815 /* Initializing for Swap token stuff */
817 816 mm->token_priority = 0;
818 817 mm->last_interval = 0;
819   - if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
820   - atomic_inc(&mm->oom_disable_count);
821 818  
822 819 tsk->mm = mm;
823 820 tsk->active_mm = mm;
824 821  
... ... @@ -1391,13 +1388,8 @@
1391 1388 bad_fork_cleanup_namespaces:
1392 1389 exit_task_namespaces(p);
1393 1390 bad_fork_cleanup_mm:
1394   - if (p->mm) {
1395   - task_lock(p);
1396   - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1397   - atomic_dec(&p->mm->oom_disable_count);
1398   - task_unlock(p);
  1391 + if (p->mm)
1399 1392 mmput(p->mm);
1400   - }
1401 1393 bad_fork_cleanup_signal:
1402 1394 if (!(clone_flags & CLONE_THREAD))
1403 1395 free_signal_struct(p->signal);
... ... @@ -54,13 +54,7 @@
54 54  
55 55 spin_lock_irq(&sighand->siglock);
56 56 old_val = current->signal->oom_score_adj;
57   - if (new_val != old_val) {
58   - if (new_val == OOM_SCORE_ADJ_MIN)
59   - atomic_inc(&current->mm->oom_disable_count);
60   - else if (old_val == OOM_SCORE_ADJ_MIN)
61   - atomic_dec(&current->mm->oom_disable_count);
62   - current->signal->oom_score_adj = new_val;
63   - }
  57 + current->signal->oom_score_adj = new_val;
64 58 spin_unlock_irq(&sighand->siglock);
65 59  
66 60 return old_val;
... ... @@ -173,16 +167,6 @@
173 167 return 0;
174 168  
175 169 /*
176   - * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN
177   - * so the entire heuristic doesn't need to be executed for something
178   - * that cannot be killed.
179   - */
180   - if (atomic_read(&p->mm->oom_disable_count)) {
181   - task_unlock(p);
182   - return 0;
183   - }
184   -
185   - /*
186 170 * The memory controller may have a limit of 0 bytes, so avoid a divide
187 171 * by zero, if necessary.
188 172 */
... ... @@ -451,6 +435,9 @@
451 435 for_each_process(q)
452 436 if (q->mm == mm && !same_thread_group(q, p) &&
453 437 !(q->flags & PF_KTHREAD)) {
  438 + if (q->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
  439 + continue;
  440 +
454 441 task_lock(q); /* Protect ->comm from prctl() */
455 442 pr_err("Kill process %d (%s) sharing same memory\n",
456 443 task_pid_nr(q), q->comm);
... ... @@ -727,7 +714,7 @@
727 714 read_lock(&tasklist_lock);
728 715 if (sysctl_oom_kill_allocating_task &&
729 716 !oom_unkillable_task(current, NULL, nodemask) &&
730   - current->mm && !atomic_read(&current->mm->oom_disable_count)) {
  717 + current->mm) {
731 718 /*
732 719 * oom_kill_process() needs tasklist_lock held. If it returns
733 720 * non-zero, current could not be killed so we must fallback to