Blame view

mm/oom_kill.c 30.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *  linux/mm/oom_kill.c
   * 
   *  Copyright (C)  1998,2000  Rik van Riel
   *	Thanks go out to Claus Fischer for some serious inspiration and
   *	for goading me into coding this file...
a63d83f42   David Rientjes   oom: badness heur...
7
8
   *  Copyright (C)  2010  Google, Inc.
   *	Rewritten by David Rientjes
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
   *
   *  The routines in this file are used to kill a process when
a49335cce   Paul Jackson   [PATCH] cpusets: ...
11
12
   *  we're seriously out of memory. This gets called from __alloc_pages()
   *  in mm/page_alloc.c when we really run out of memory.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
18
   *
   *  Since we won't call these routines often (on a well-configured
   *  machine) this file will double as a 'coding guide' and a signpost
   *  for newbie kernel hackers. It features several pointers to major
   *  kernel subsystems and hints as to where to find out what things do.
   */
8ac773b4f   Alexey Dobriyan   [PATCH] OOM kille...
19
  #include <linux/oom.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/mm.h>
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
21
  #include <linux/err.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
22
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
  #include <linux/sched.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/mm.h>
f7ccbae45   Ingo Molnar   sched/headers: Pr...
25
  #include <linux/sched/coredump.h>
299300258   Ingo Molnar   sched/headers: Pr...
26
  #include <linux/sched/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
  #include <linux/swap.h>
  #include <linux/timex.h>
  #include <linux/jiffies.h>
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
30
  #include <linux/cpuset.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
31
  #include <linux/export.h>
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
32
  #include <linux/notifier.h>
c7ba5c9e8   Pavel Emelianov   Memory controller...
33
  #include <linux/memcontrol.h>
6f48d0ebd   David Rientjes   oom: select task ...
34
  #include <linux/mempolicy.h>
5cd9c58fb   David Howells   security: Fix set...
35
  #include <linux/security.h>
edd45544c   David Rientjes   oom: avoid deferr...
36
  #include <linux/ptrace.h>
f660daac4   David Rientjes   oom: thaw threads...
37
  #include <linux/freezer.h>
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
38
  #include <linux/ftrace.h>
dc3f21ead   David Rientjes   mm, oom: introduc...
39
  #include <linux/ratelimit.h>
aac453635   Michal Hocko   mm, oom: introduc...
40
41
  #include <linux/kthread.h>
  #include <linux/init.h>
4d4bbd852   Michal Hocko   mm, oom_reaper: s...
42
  #include <linux/mmu_notifier.h>
aac453635   Michal Hocko   mm, oom: introduc...
43
44
45
  
  #include <asm/tlb.h>
  #include "internal.h"
852d8be0a   Yang Shi   mm: oom: show unr...
46
  #include "slab.h"
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
47
48
49
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/oom.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50

fadd8fbd1   KAMEZAWA Hiroyuki   [PATCH] support f...
51
  int sysctl_panic_on_oom;
fe071d7e8   David Rientjes   oom: add oom_kill...
52
  int sysctl_oom_kill_allocating_task;
ad915c432   David Rientjes   oom: enable oom t...
53
  int sysctl_oom_dump_tasks = 1;
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
54

a195d3f5b   Michal Hocko   mm/oom_kill.c: do...
55
56
57
58
59
60
61
62
  /*
   * Serializes oom killer invocations (out_of_memory()) from all contexts to
   * prevent from over eager oom killing (e.g. when the oom killer is invoked
   * from different domains).
   *
   * oom_killer_disable() relies on this lock to stabilize oom_killer_disabled
   * and mark_oom_victim
   */
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
63
  DEFINE_MUTEX(oom_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64

6f48d0ebd   David Rientjes   oom: select task ...
65
66
67
  #ifdef CONFIG_NUMA
  /**
   * has_intersects_mems_allowed() - check task eligiblity for kill
ad9624417   Oleg Nesterov   oom_kill: has_int...
68
   * @start: task struct of which task to consider
6f48d0ebd   David Rientjes   oom: select task ...
69
70
71
72
73
   * @mask: nodemask passed to page allocator for mempolicy ooms
   *
   * Task eligibility is determined by whether or not a candidate task, @tsk,
   * shares the same mempolicy nodes as current if it is bound by such a policy
   * and whether or not it has the same set of allowed cpuset nodes.
495789a51   KOSAKI Motohiro   oom: make oom_sco...
74
   */
ad9624417   Oleg Nesterov   oom_kill: has_int...
75
  static bool has_intersects_mems_allowed(struct task_struct *start,
6f48d0ebd   David Rientjes   oom: select task ...
76
  					const nodemask_t *mask)
495789a51   KOSAKI Motohiro   oom: make oom_sco...
77
  {
ad9624417   Oleg Nesterov   oom_kill: has_int...
78
79
  	struct task_struct *tsk;
  	bool ret = false;
495789a51   KOSAKI Motohiro   oom: make oom_sco...
80

ad9624417   Oleg Nesterov   oom_kill: has_int...
81
  	rcu_read_lock();
1da4db0cd   Oleg Nesterov   oom_kill: change ...
82
  	for_each_thread(start, tsk) {
6f48d0ebd   David Rientjes   oom: select task ...
83
84
85
86
87
88
89
  		if (mask) {
  			/*
  			 * If this is a mempolicy constrained oom, tsk's
  			 * cpuset is irrelevant.  Only return true if its
  			 * mempolicy intersects current, otherwise it may be
  			 * needlessly killed.
  			 */
ad9624417   Oleg Nesterov   oom_kill: has_int...
90
  			ret = mempolicy_nodemask_intersects(tsk, mask);
6f48d0ebd   David Rientjes   oom: select task ...
91
92
93
94
95
  		} else {
  			/*
  			 * This is not a mempolicy constrained oom, so only
  			 * check the mems of tsk's cpuset.
  			 */
ad9624417   Oleg Nesterov   oom_kill: has_int...
96
  			ret = cpuset_mems_allowed_intersects(current, tsk);
6f48d0ebd   David Rientjes   oom: select task ...
97
  		}
ad9624417   Oleg Nesterov   oom_kill: has_int...
98
99
  		if (ret)
  			break;
1da4db0cd   Oleg Nesterov   oom_kill: change ...
100
  	}
ad9624417   Oleg Nesterov   oom_kill: has_int...
101
  	rcu_read_unlock();
df1090a8d   KOSAKI Motohiro   oom: cleanup has_...
102

ad9624417   Oleg Nesterov   oom_kill: has_int...
103
  	return ret;
6f48d0ebd   David Rientjes   oom: select task ...
104
105
106
107
108
109
  }
  #else
  static bool has_intersects_mems_allowed(struct task_struct *tsk,
  					const nodemask_t *mask)
  {
  	return true;
495789a51   KOSAKI Motohiro   oom: make oom_sco...
110
  }
6f48d0ebd   David Rientjes   oom: select task ...
111
  #endif /* CONFIG_NUMA */
495789a51   KOSAKI Motohiro   oom: make oom_sco...
112

6f48d0ebd   David Rientjes   oom: select task ...
113
114
115
116
117
118
  /*
   * The process p may have detached its own ->mm while exiting or through
   * use_mm(), but one or more of its subthreads may still have a valid
   * pointer.  Return p, or any of its subthreads with a valid ->mm, with
   * task_lock() held.
   */
158e0a2d1   KAMEZAWA Hiroyuki   memcg: use find_l...
119
  struct task_struct *find_lock_task_mm(struct task_struct *p)
dd8e8f405   Oleg Nesterov   oom: introduce fi...
120
  {
1da4db0cd   Oleg Nesterov   oom_kill: change ...
121
  	struct task_struct *t;
dd8e8f405   Oleg Nesterov   oom: introduce fi...
122

4d4048be8   Oleg Nesterov   oom_kill: add rcu...
123
  	rcu_read_lock();
1da4db0cd   Oleg Nesterov   oom_kill: change ...
124
  	for_each_thread(p, t) {
dd8e8f405   Oleg Nesterov   oom: introduce fi...
125
126
  		task_lock(t);
  		if (likely(t->mm))
4d4048be8   Oleg Nesterov   oom_kill: add rcu...
127
  			goto found;
dd8e8f405   Oleg Nesterov   oom: introduce fi...
128
  		task_unlock(t);
1da4db0cd   Oleg Nesterov   oom_kill: change ...
129
  	}
4d4048be8   Oleg Nesterov   oom_kill: add rcu...
130
131
132
  	t = NULL;
  found:
  	rcu_read_unlock();
dd8e8f405   Oleg Nesterov   oom: introduce fi...
133

4d4048be8   Oleg Nesterov   oom_kill: add rcu...
134
  	return t;
dd8e8f405   Oleg Nesterov   oom: introduce fi...
135
  }
db2a0dd7a   Yaowei Bai   mm/oom_kill.c: in...
136
137
138
139
140
141
142
143
  /*
   * order == -1 means the oom kill is required by sysrq, otherwise only
   * for display purposes.
   */
  static inline bool is_sysrq_oom(struct oom_control *oc)
  {
  	return oc->order == -1;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
144
145
146
147
  static inline bool is_memcg_oom(struct oom_control *oc)
  {
  	return oc->memcg != NULL;
  }
ab290adba   KOSAKI Motohiro   oom: make oom_unk...
148
  /* return true if the task is not adequate as candidate victim task. */
e85bfd3aa   David Rientjes   oom: filter unkil...
149
  static bool oom_unkillable_task(struct task_struct *p,
2314b42db   Johannes Weiner   mm: memcontrol: d...
150
  		struct mem_cgroup *memcg, const nodemask_t *nodemask)
ab290adba   KOSAKI Motohiro   oom: make oom_unk...
151
152
153
154
155
156
157
  {
  	if (is_global_init(p))
  		return true;
  	if (p->flags & PF_KTHREAD)
  		return true;
  
  	/* When mem_cgroup_out_of_memory() and p is not member of the group */
72835c86c   Johannes Weiner   mm: unify remaini...
158
  	if (memcg && !task_in_mem_cgroup(p, memcg))
ab290adba   KOSAKI Motohiro   oom: make oom_unk...
159
160
161
162
163
164
165
166
  		return true;
  
  	/* p may not have freeable memory in nodemask */
  	if (!has_intersects_mems_allowed(p, nodemask))
  		return true;
  
  	return false;
  }
852d8be0a   Yang Shi   mm: oom: show unr...
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
  /*
   * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
   * than all user memory (LRU pages)
   */
  static bool is_dump_unreclaim_slabs(void)
  {
  	unsigned long nr_lru;
  
  	nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
  		 global_node_page_state(NR_INACTIVE_ANON) +
  		 global_node_page_state(NR_ACTIVE_FILE) +
  		 global_node_page_state(NR_INACTIVE_FILE) +
  		 global_node_page_state(NR_ISOLATED_ANON) +
  		 global_node_page_state(NR_ISOLATED_FILE) +
  		 global_node_page_state(NR_UNEVICTABLE);
  
  	return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
  /**
a63d83f42   David Rientjes   oom: badness heur...
186
   * oom_badness - heuristic function to determine which candidate task to kill
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
   * @p: task struct of which task we should calculate
a63d83f42   David Rientjes   oom: badness heur...
188
   * @totalpages: total present RAM allowed for page allocation
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
189
190
   * @memcg: task's memory controller, if constrained
   * @nodemask: nodemask passed to page allocator for mempolicy ooms
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
   *
a63d83f42   David Rientjes   oom: badness heur...
192
193
194
   * The heuristic for determining which task to kill is made to be as simple and
   * predictable as possible.  The goal is to return the highest value for the
   * task consuming the most memory to avoid subsequent oom failures.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
   */
a7f638f99   David Rientjes   mm, oom: normaliz...
196
197
  unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
  			  const nodemask_t *nodemask, unsigned long totalpages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
  {
1e11ad8dc   David Rientjes   mm, oom: fix badn...
199
  	long points;
61eafb00d   David Rientjes   mm, oom: fix and ...
200
  	long adj;
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
201

72835c86c   Johannes Weiner   mm: unify remaini...
202
  	if (oom_unkillable_task(p, memcg, nodemask))
26ebc9849   KOSAKI Motohiro   oom: /proc/<pid>/...
203
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204

dd8e8f405   Oleg Nesterov   oom: introduce fi...
205
206
  	p = find_lock_task_mm(p);
  	if (!p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
  		return 0;
bb8a4b7fd   Michal Hocko   mm, oom_reaper: h...
208
209
  	/*
  	 * Do not even consider tasks which are explicitly marked oom
b18dc5f29   Michal Hocko   mm, oom: skip vfo...
210
211
  	 * unkillable or have been already oom reaped or the are in
  	 * the middle of vfork
bb8a4b7fd   Michal Hocko   mm, oom_reaper: h...
212
  	 */
a9c58b907   David Rientjes   mm, oom: change t...
213
  	adj = (long)p->signal->oom_score_adj;
bb8a4b7fd   Michal Hocko   mm, oom_reaper: h...
214
  	if (adj == OOM_SCORE_ADJ_MIN ||
862e3073b   Michal Hocko   mm, oom: get rid ...
215
  			test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
b18dc5f29   Michal Hocko   mm, oom: skip vfo...
216
  			in_vfork(p)) {
5aecc85ab   Michal Hocko   oom: do not kill ...
217
218
219
  		task_unlock(p);
  		return 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
  	/*
a63d83f42   David Rientjes   oom: badness heur...
221
  	 * The baseline for the badness score is the proportion of RAM that each
f755a042d   KOSAKI Motohiro   oom: use pte page...
222
  	 * task's rss, pagetable and swap space use.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
  	 */
dc6c9a35b   Kirill A. Shutemov   mm: account pmd p...
224
  	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
af5b0f6a0   Kirill A. Shutemov   mm: consolidate p...
225
  		mm_pgtables_bytes(p->mm) / PAGE_SIZE;
a63d83f42   David Rientjes   oom: badness heur...
226
  	task_unlock(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227

61eafb00d   David Rientjes   mm, oom: fix and ...
228
229
230
  	/* Normalize to oom_score_adj units */
  	adj *= totalpages / 1000;
  	points += adj;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231

f19e8aa11   David Rientjes   oom: always retur...
232
  	/*
a7f638f99   David Rientjes   mm, oom: normaliz...
233
234
  	 * Never return 0 for an eligible task regardless of the root bonus and
  	 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
f19e8aa11   David Rientjes   oom: always retur...
235
  	 */
1e11ad8dc   David Rientjes   mm, oom: fix badn...
236
  	return points > 0 ? points : 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
238
239
240
241
242
243
  enum oom_constraint {
  	CONSTRAINT_NONE,
  	CONSTRAINT_CPUSET,
  	CONSTRAINT_MEMORY_POLICY,
  	CONSTRAINT_MEMCG,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
  /*
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
245
246
   * Determine the type of allocation constraint.
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
247
  static enum oom_constraint constrained_alloc(struct oom_control *oc)
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
248
  {
54a6eb5c4   Mel Gorman   mm: use two zonel...
249
  	struct zone *zone;
dd1a239f6   Mel Gorman   mm: have zonelist...
250
  	struct zoneref *z;
6e0fc46dc   David Rientjes   mm, oom: organize...
251
  	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
a63d83f42   David Rientjes   oom: badness heur...
252
253
  	bool cpuset_limited = false;
  	int nid;
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
254

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
255
  	if (is_memcg_oom(oc)) {
bbec2e151   Roman Gushchin   mm: rename page_c...
256
  		oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
257
258
  		return CONSTRAINT_MEMCG;
  	}
a63d83f42   David Rientjes   oom: badness heur...
259
  	/* Default to all available memory */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
260
261
262
263
  	oc->totalpages = totalram_pages + total_swap_pages;
  
  	if (!IS_ENABLED(CONFIG_NUMA))
  		return CONSTRAINT_NONE;
a63d83f42   David Rientjes   oom: badness heur...
264

6e0fc46dc   David Rientjes   mm, oom: organize...
265
  	if (!oc->zonelist)
a63d83f42   David Rientjes   oom: badness heur...
266
  		return CONSTRAINT_NONE;
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
267
268
269
270
271
  	/*
  	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
  	 * to kill current.We have to random task kill in this case.
  	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
  	 */
6e0fc46dc   David Rientjes   mm, oom: organize...
272
  	if (oc->gfp_mask & __GFP_THISNODE)
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
273
  		return CONSTRAINT_NONE;
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
274

4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
275
  	/*
a63d83f42   David Rientjes   oom: badness heur...
276
277
278
  	 * This is not a __GFP_THISNODE allocation, so a truncated nodemask in
  	 * the page allocator means a mempolicy is in effect.  Cpuset policy
  	 * is enforced in get_page_from_freelist().
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
279
  	 */
6e0fc46dc   David Rientjes   mm, oom: organize...
280
281
  	if (oc->nodemask &&
  	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
282
  		oc->totalpages = total_swap_pages;
6e0fc46dc   David Rientjes   mm, oom: organize...
283
  		for_each_node_mask(nid, *oc->nodemask)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
284
  			oc->totalpages += node_spanned_pages(nid);
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
285
  		return CONSTRAINT_MEMORY_POLICY;
a63d83f42   David Rientjes   oom: badness heur...
286
  	}
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
287
288
  
  	/* Check this allocation failure is caused by cpuset's wall function */
6e0fc46dc   David Rientjes   mm, oom: organize...
289
290
291
  	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
  			high_zoneidx, oc->nodemask)
  		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
a63d83f42   David Rientjes   oom: badness heur...
292
  			cpuset_limited = true;
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
293

a63d83f42   David Rientjes   oom: badness heur...
294
  	if (cpuset_limited) {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
295
  		oc->totalpages = total_swap_pages;
a63d83f42   David Rientjes   oom: badness heur...
296
  		for_each_node_mask(nid, cpuset_current_mems_allowed)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
297
  			oc->totalpages += node_spanned_pages(nid);
a63d83f42   David Rientjes   oom: badness heur...
298
299
  		return CONSTRAINT_CPUSET;
  	}
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
300
301
  	return CONSTRAINT_NONE;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
302
  static int oom_evaluate_task(struct task_struct *task, void *arg)
462607ecc   David Rientjes   mm, oom: introduc...
303
  {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
304
305
  	struct oom_control *oc = arg;
  	unsigned long points;
6e0fc46dc   David Rientjes   mm, oom: organize...
306
  	if (oom_unkillable_task(task, NULL, oc->nodemask))
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
307
  		goto next;
462607ecc   David Rientjes   mm, oom: introduc...
308
309
310
  
  	/*
  	 * This task already has access to memory reserves and is being killed.
a373966d1   Michal Hocko   mm, oom: hide mm ...
311
  	 * Don't allow any other task to have access to the reserves unless
862e3073b   Michal Hocko   mm, oom: get rid ...
312
  	 * the task has MMF_OOM_SKIP because chances that it would release
a373966d1   Michal Hocko   mm, oom: hide mm ...
313
  	 * any memory is quite low.
462607ecc   David Rientjes   mm, oom: introduc...
314
  	 */
862e3073b   Michal Hocko   mm, oom: get rid ...
315
316
  	if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
  		if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
317
318
  			goto next;
  		goto abort;
a373966d1   Michal Hocko   mm, oom: hide mm ...
319
  	}
462607ecc   David Rientjes   mm, oom: introduc...
320

e1e12d2f3   David Rientjes   mm, oom: fix race...
321
322
323
324
  	/*
  	 * If task is allocating a lot of memory and has been marked to be
  	 * killed first if it triggers an oom, then select it.
  	 */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
325
326
327
328
  	if (oom_task_origin(task)) {
  		points = ULONG_MAX;
  		goto select;
  	}
e1e12d2f3   David Rientjes   mm, oom: fix race...
329

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  	points = oom_badness(task, NULL, oc->nodemask, oc->totalpages);
  	if (!points || points < oc->chosen_points)
  		goto next;
  
  	/* Prefer thread group leaders for display purposes */
  	if (points == oc->chosen_points && thread_group_leader(oc->chosen))
  		goto next;
  select:
  	if (oc->chosen)
  		put_task_struct(oc->chosen);
  	get_task_struct(task);
  	oc->chosen = task;
  	oc->chosen_points = points;
  next:
  	return 0;
  abort:
  	if (oc->chosen)
  		put_task_struct(oc->chosen);
  	oc->chosen = (void *)-1UL;
  	return 1;
462607ecc   David Rientjes   mm, oom: introduc...
350
  }
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
351
  /*
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
352
353
   * Simple selection loop. We choose the process with the highest number of
   * 'points'. In case scan was aborted, oc->chosen is set to -1.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
355
  static void select_bad_process(struct oom_control *oc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
  {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
357
358
359
360
  	if (is_memcg_oom(oc))
  		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
  	else {
  		struct task_struct *p;
d49ad9355   David Rientjes   mm, oom: prefer t...
361

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
362
363
364
365
366
  		rcu_read_lock();
  		for_each_process(p)
  			if (oom_evaluate_task(p, oc))
  				break;
  		rcu_read_unlock();
1da4db0cd   Oleg Nesterov   oom_kill: change ...
367
  	}
972c4ea59   Oleg Nesterov   [PATCH] select_ba...
368

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
369
  	oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
372
  }
  
  /**
1b578df02   Randy Dunlap   mm/oom_kill: fix ...
373
   * dump_tasks - dump current memory state of all system tasks
dad7557eb   Wanpeng Li   mm: fix kernel-do...
374
   * @memcg: current's memory controller, if constrained
e85bfd3aa   David Rientjes   oom: filter unkil...
375
   * @nodemask: nodemask passed to page allocator for mempolicy ooms
1b578df02   Randy Dunlap   mm/oom_kill: fix ...
376
   *
e85bfd3aa   David Rientjes   oom: filter unkil...
377
378
379
   * Dumps the current memory state of all eligible tasks.  Tasks not in the same
   * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
   * are not shown.
af5b0f6a0   Kirill A. Shutemov   mm: consolidate p...
380
381
   * State information includes task's pid, uid, tgid, vm size, rss,
   * pgtables_bytes, swapents, oom_score_adj value, and name.
fef1bdd68   David Rientjes   oom: add sysctl t...
382
   */
2314b42db   Johannes Weiner   mm: memcontrol: d...
383
  static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
fef1bdd68   David Rientjes   oom: add sysctl t...
384
  {
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
385
386
  	struct task_struct *p;
  	struct task_struct *task;
fef1bdd68   David Rientjes   oom: add sysctl t...
387

c3b78b11e   Rodrigo Freire   mm, oom: describe...
388
389
390
391
  	pr_info("Tasks state (memory values in pages):
  ");
  	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name
  ");
6b0c81b3b   David Rientjes   mm, oom: reduce d...
392
  	rcu_read_lock();
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
393
  	for_each_process(p) {
72835c86c   Johannes Weiner   mm: unify remaini...
394
  		if (oom_unkillable_task(p, memcg, nodemask))
b4416d2be   David Rientjes   oom: do not dump ...
395
  			continue;
fef1bdd68   David Rientjes   oom: add sysctl t...
396

c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
397
398
  		task = find_lock_task_mm(p);
  		if (!task) {
6d2661ede   David Rientjes   oom: fix possible...
399
  			/*
74ab7f1d3   David Rientjes   oom: improve comm...
400
401
  			 * This is a kthread or all of p's threads have already
  			 * detached their mm's.  There's no need to report
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
402
  			 * them; they can't be oom killed anyway.
6d2661ede   David Rientjes   oom: fix possible...
403
  			 */
6d2661ede   David Rientjes   oom: fix possible...
404
405
  			continue;
  		}
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
406

c3b78b11e   Rodrigo Freire   mm, oom: describe...
407
408
  		pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s
  ",
078de5f70   Eric W. Biederman   userns: Store uid...
409
410
  			task->pid, from_kuid(&init_user_ns, task_uid(task)),
  			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
af5b0f6a0   Kirill A. Shutemov   mm: consolidate p...
411
  			mm_pgtables_bytes(task->mm),
de34d965a   David Rientjes   mm, oom: replace ...
412
  			get_mm_counter(task->mm, MM_SWAPENTS),
a63d83f42   David Rientjes   oom: badness heur...
413
  			task->signal->oom_score_adj, task->comm);
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
414
415
  		task_unlock(task);
  	}
6b0c81b3b   David Rientjes   mm, oom: reduce d...
416
  	rcu_read_unlock();
fef1bdd68   David Rientjes   oom: add sysctl t...
417
  }
2a966b77a   Vladimir Davydov   mm: oom: add memc...
418
  static void dump_header(struct oom_control *oc, struct task_struct *p)
1b604d75b   David Rientjes   oom: dump stack a...
419
  {
0205f7557   Michal Hocko   mm: simplify node...
420
421
422
423
424
  	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd
  ",
  		current->comm, oc->gfp_mask, &oc->gfp_mask,
  		nodemask_pr_args(oc->nodemask), oc->order,
  			current->signal->oom_score_adj);
9254990fb   Michal Hocko   oom: warn if we g...
425
426
427
  	if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
  		pr_warn("COMPACTION is disabled!!!
  ");
a0795cd41   Vlastimil Babka   mm, oom: print sy...
428

da39da3a5   David Rientjes   mm, oom: remove t...
429
  	cpuset_print_current_mems_allowed();
1b604d75b   David Rientjes   oom: dump stack a...
430
  	dump_stack();
852d8be0a   Yang Shi   mm: oom: show unr...
431
  	if (is_memcg_oom(oc))
2a966b77a   Vladimir Davydov   mm: oom: add memc...
432
  		mem_cgroup_print_oom_info(oc->memcg, p);
852d8be0a   Yang Shi   mm: oom: show unr...
433
  	else {
299c517ad   David Rientjes   mm, oom: header n...
434
  		show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
852d8be0a   Yang Shi   mm: oom: show unr...
435
436
437
  		if (is_dump_unreclaim_slabs())
  			dump_unreclaimable_slab();
  	}
1b604d75b   David Rientjes   oom: dump stack a...
438
  	if (sysctl_oom_dump_tasks)
2a966b77a   Vladimir Davydov   mm: oom: add memc...
439
  		dump_tasks(oc->memcg, oc->nodemask);
1b604d75b   David Rientjes   oom: dump stack a...
440
  }
5695be142   Michal Hocko   OOM, PM: OOM kill...
441
  /*
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
442
   * Number of OOM victims in flight
5695be142   Michal Hocko   OOM, PM: OOM kill...
443
   */
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
444
445
  static atomic_t oom_victims = ATOMIC_INIT(0);
  static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
5695be142   Michal Hocko   OOM, PM: OOM kill...
446

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
447
  static bool oom_killer_disabled __read_mostly;
5695be142   Michal Hocko   OOM, PM: OOM kill...
448

bc448e897   Michal Hocko   mm, oom_reaper: r...
449
  #define K(x) ((x) << (PAGE_SHIFT-10))
3ef22dfff   Michal Hocko   oom, oom_reaper: ...
450
451
452
453
454
455
  /*
   * task->mm can be NULL if the task is the exited group leader.  So to
   * determine whether the task is using a particular mm, we examine all the
   * task's threads: if one of those is using this mm then this task was also
   * using it.
   */
44a70adec   Michal Hocko   mm, oom_adj: make...
456
  bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
3ef22dfff   Michal Hocko   oom, oom_reaper: ...
457
458
459
460
461
462
463
464
465
466
  {
  	struct task_struct *t;
  
  	for_each_thread(p, t) {
  		struct mm_struct *t_mm = READ_ONCE(t->mm);
  		if (t_mm)
  			return t_mm == mm;
  	}
  	return false;
  }
aac453635   Michal Hocko   mm, oom: introduc...
467
468
469
470
471
472
  #ifdef CONFIG_MMU
  /*
   * OOM Reaper kernel thread which tries to reap the memory used by the OOM
   * victim (if that is possible) to help the OOM killer to move on.
   */
  static struct task_struct *oom_reaper_th;
aac453635   Michal Hocko   mm, oom: introduc...
473
  static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
29c696e1c   Vladimir Davydov   oom: make oom_rea...
474
  static struct task_struct *oom_reaper_list;
03049269d   Michal Hocko   mm, oom_reaper: i...
475
  static DEFINE_SPINLOCK(oom_reaper_lock);
93065ac75   Michal Hocko   mm, oom: distingu...
476
  bool __oom_reap_task_mm(struct mm_struct *mm)
aac453635   Michal Hocko   mm, oom: introduc...
477
  {
aac453635   Michal Hocko   mm, oom: introduc...
478
  	struct vm_area_struct *vma;
93065ac75   Michal Hocko   mm, oom: distingu...
479
  	bool ret = true;
27ae357fa   David Rientjes   mm, oom: fix conc...
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
  
  	/*
  	 * Tell all users of get_user/copy_from_user etc... that the content
  	 * is no longer stable. No barriers really needed because unmapping
  	 * should imply barriers already and the reader would hit a page fault
  	 * if it stumbled over a reaped memory.
  	 */
  	set_bit(MMF_UNSTABLE, &mm->flags);
  
  	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
  		if (!can_madv_dontneed_vma(vma))
  			continue;
  
  		/*
  		 * Only anonymous pages have a good chance to be dropped
  		 * without additional steps which we cannot afford as we
  		 * are OOM already.
  		 *
  		 * We do not even care about fs backed pages because all
  		 * which are reclaimable have already been reclaimed and
  		 * we do not want to block exit_mmap by keeping mm ref
  		 * count elevated without a good reason.
  		 */
  		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
  			const unsigned long start = vma->vm_start;
  			const unsigned long end = vma->vm_end;
  			struct mmu_gather tlb;
  
  			tlb_gather_mmu(&tlb, mm, start, end);
93065ac75   Michal Hocko   mm, oom: distingu...
509
  			if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
79cc81057   Tetsuo Handa   mm, oom: fix miss...
510
  				tlb_finish_mmu(&tlb, start, end);
93065ac75   Michal Hocko   mm, oom: distingu...
511
512
513
  				ret = false;
  				continue;
  			}
27ae357fa   David Rientjes   mm, oom: fix conc...
514
515
516
517
518
  			unmap_page_range(&tlb, vma, start, end, NULL);
  			mmu_notifier_invalidate_range_end(mm, start, end);
  			tlb_finish_mmu(&tlb, start, end);
  		}
  	}
93065ac75   Michal Hocko   mm, oom: distingu...
519
520
  
  	return ret;
27ae357fa   David Rientjes   mm, oom: fix conc...
521
  }
431f42fdf   Michal Hocko   mm/oom_kill.c: cl...
522
523
524
525
526
527
  /*
   * Reaps the address space of the give task.
   *
   * Returns true on success and false if none or part of the address space
   * has been reclaimed and the caller should retry later.
   */
27ae357fa   David Rientjes   mm, oom: fix conc...
528
529
  static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
  {
aac453635   Michal Hocko   mm, oom: introduc...
530
  	bool ret = true;
aac453635   Michal Hocko   mm, oom: introduc...
531
  	if (!down_read_trylock(&mm->mmap_sem)) {
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
532
  		trace_skip_task_reaping(tsk->pid);
af5679fbc   Michal Hocko   mm, oom: remove o...
533
  		return false;
4d4bbd852   Michal Hocko   mm, oom_reaper: s...
534
535
536
  	}
  
  	/*
212925802   Andrea Arcangeli   mm: oom: let oom_...
537
538
539
540
  	 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
  	 * work on the mm anymore. The check for MMF_OOM_SKIP must run
  	 * under mmap_sem for reading because it serializes against the
  	 * down_write();up_write() cycle in exit_mmap().
e5e3f4c4f   Michal Hocko   mm, oom_reaper: m...
541
  	 */
212925802   Andrea Arcangeli   mm: oom: let oom_...
542
  	if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
543
  		trace_skip_task_reaping(tsk->pid);
431f42fdf   Michal Hocko   mm/oom_kill.c: cl...
544
  		goto out_unlock;
aac453635   Michal Hocko   mm, oom: introduc...
545
  	}
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
546
  	trace_start_task_reaping(tsk->pid);
93065ac75   Michal Hocko   mm, oom: distingu...
547
  	/* failed to reap part of the address space. Try again later */
431f42fdf   Michal Hocko   mm/oom_kill.c: cl...
548
549
550
  	ret = __oom_reap_task_mm(mm);
  	if (!ret)
  		goto out_finish;
aac453635   Michal Hocko   mm, oom: introduc...
551

bc448e897   Michal Hocko   mm, oom_reaper: r...
552
553
554
555
556
557
  	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB
  ",
  			task_pid_nr(tsk), tsk->comm,
  			K(get_mm_counter(mm, MM_ANONPAGES)),
  			K(get_mm_counter(mm, MM_FILEPAGES)),
  			K(get_mm_counter(mm, MM_SHMEMPAGES)));
431f42fdf   Michal Hocko   mm/oom_kill.c: cl...
558
559
560
  out_finish:
  	trace_finish_task_reaping(tsk->pid);
  out_unlock:
aac453635   Michal Hocko   mm, oom: introduc...
561
  	up_read(&mm->mmap_sem);
36324a990   Michal Hocko   oom: clear TIF_ME...
562

aac453635   Michal Hocko   mm, oom: introduc...
563
564
  	return ret;
  }
bc448e897   Michal Hocko   mm, oom_reaper: r...
565
  #define MAX_OOM_REAP_RETRIES 10
36324a990   Michal Hocko   oom: clear TIF_ME...
566
  static void oom_reap_task(struct task_struct *tsk)
aac453635   Michal Hocko   mm, oom: introduc...
567
568
  {
  	int attempts = 0;
26db62f17   Michal Hocko   oom: keep mm of t...
569
  	struct mm_struct *mm = tsk->signal->oom_mm;
aac453635   Michal Hocko   mm, oom: introduc...
570
571
  
  	/* Retry the down_read_trylock(mmap_sem) a few times */
27ae357fa   David Rientjes   mm, oom: fix conc...
572
  	while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
aac453635   Michal Hocko   mm, oom: introduc...
573
  		schedule_timeout_idle(HZ/10);
97b1255cb   Tetsuo Handa   mm,oom_reaper: ch...
574
575
  	if (attempts <= MAX_OOM_REAP_RETRIES ||
  	    test_bit(MMF_OOM_SKIP, &mm->flags))
7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
576
  		goto done;
11a410d51   Michal Hocko   mm, oom_reaper: d...
577

7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
578
579
580
  	pr_info("oom_reaper: unable to reap pid:%d (%s)
  ",
  		task_pid_nr(tsk), tsk->comm);
7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
581
  	debug_show_all_locks();
bc448e897   Michal Hocko   mm, oom_reaper: r...
582

7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
583
  done:
449d777d7   Michal Hocko   mm, oom_reaper: c...
584
  	tsk->oom_reaper_list = NULL;
449d777d7   Michal Hocko   mm, oom_reaper: c...
585

26db62f17   Michal Hocko   oom: keep mm of t...
586
587
588
589
  	/*
  	 * Hide this mm from OOM killer because it has been either reaped or
  	 * somebody can't call up_write(mmap_sem).
  	 */
862e3073b   Michal Hocko   mm, oom: get rid ...
590
  	set_bit(MMF_OOM_SKIP, &mm->flags);
26db62f17   Michal Hocko   oom: keep mm of t...
591

aac453635   Michal Hocko   mm, oom: introduc...
592
  	/* Drop a reference taken by wake_oom_reaper */
36324a990   Michal Hocko   oom: clear TIF_ME...
593
  	put_task_struct(tsk);
aac453635   Michal Hocko   mm, oom: introduc...
594
595
596
597
598
  }
  
  static int oom_reaper(void *unused)
  {
  	while (true) {
03049269d   Michal Hocko   mm, oom_reaper: i...
599
  		struct task_struct *tsk = NULL;
aac453635   Michal Hocko   mm, oom: introduc...
600

29c696e1c   Vladimir Davydov   oom: make oom_rea...
601
  		wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
03049269d   Michal Hocko   mm, oom_reaper: i...
602
  		spin_lock(&oom_reaper_lock);
29c696e1c   Vladimir Davydov   oom: make oom_rea...
603
604
605
  		if (oom_reaper_list != NULL) {
  			tsk = oom_reaper_list;
  			oom_reaper_list = tsk->oom_reaper_list;
03049269d   Michal Hocko   mm, oom_reaper: i...
606
607
608
609
610
  		}
  		spin_unlock(&oom_reaper_lock);
  
  		if (tsk)
  			oom_reap_task(tsk);
aac453635   Michal Hocko   mm, oom: introduc...
611
612
613
614
  	}
  
  	return 0;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
615
  static void wake_oom_reaper(struct task_struct *tsk)
aac453635   Michal Hocko   mm, oom: introduc...
616
  {
7e70ddc33   Tetsuo Handa   oom, oom_reaper: ...
617
618
  	/* mm is already queued? */
  	if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
aac453635   Michal Hocko   mm, oom: introduc...
619
  		return;
36324a990   Michal Hocko   oom: clear TIF_ME...
620
  	get_task_struct(tsk);
aac453635   Michal Hocko   mm, oom: introduc...
621

03049269d   Michal Hocko   mm, oom_reaper: i...
622
  	spin_lock(&oom_reaper_lock);
29c696e1c   Vladimir Davydov   oom: make oom_rea...
623
624
  	tsk->oom_reaper_list = oom_reaper_list;
  	oom_reaper_list = tsk;
03049269d   Michal Hocko   mm, oom_reaper: i...
625
  	spin_unlock(&oom_reaper_lock);
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
626
  	trace_wake_reaper(tsk->pid);
03049269d   Michal Hocko   mm, oom_reaper: i...
627
  	wake_up(&oom_reaper_wait);
aac453635   Michal Hocko   mm, oom: introduc...
628
629
630
631
632
  }
  
  static int __init oom_init(void)
  {
  	oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
aac453635   Michal Hocko   mm, oom: introduc...
633
634
635
  	return 0;
  }
  subsys_initcall(oom_init)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
636
637
638
639
640
  #else
  static inline void wake_oom_reaper(struct task_struct *tsk)
  {
  }
  #endif /* CONFIG_MMU */
aac453635   Michal Hocko   mm, oom: introduc...
641

49550b605   Michal Hocko   oom: add helpers ...
642
  /**
16e951966   Johannes Weiner   mm: oom_kill: cle...
643
   * mark_oom_victim - mark the given task as OOM victim
49550b605   Michal Hocko   oom: add helpers ...
644
   * @tsk: task to mark
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
645
   *
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
646
   * Has to be called with oom_lock held and never after
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
647
   * oom has been disabled already.
26db62f17   Michal Hocko   oom: keep mm of t...
648
649
650
   *
   * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
   * under task_lock or operate on the current).
49550b605   Michal Hocko   oom: add helpers ...
651
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
652
  static void mark_oom_victim(struct task_struct *tsk)
49550b605   Michal Hocko   oom: add helpers ...
653
  {
26db62f17   Michal Hocko   oom: keep mm of t...
654
  	struct mm_struct *mm = tsk->mm;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
655
656
657
658
  	WARN_ON(oom_killer_disabled);
  	/* OOM killer might race with memcg OOM */
  	if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
  		return;
26db62f17   Michal Hocko   oom: keep mm of t...
659

26db62f17   Michal Hocko   oom: keep mm of t...
660
  	/* oom_mm is bound to the signal struct life time. */
4837fe37a   Michal Hocko   mm, oom_reaper: f...
661
  	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
f1f100764   Vegard Nossum   mm: add new mmgra...
662
  		mmgrab(tsk->signal->oom_mm);
4837fe37a   Michal Hocko   mm, oom_reaper: f...
663
664
  		set_bit(MMF_OOM_VICTIM, &mm->flags);
  	}
26db62f17   Michal Hocko   oom: keep mm of t...
665

63a8ca9b2   Michal Hocko   oom: thaw the OOM...
666
667
668
669
670
671
672
  	/*
  	 * Make sure that the task is woken up from uninterruptible sleep
  	 * if it is frozen because OOM killer wouldn't be able to free
  	 * any memory and livelock. freezing_slow_path will tell the freezer
  	 * that TIF_MEMDIE tasks should be ignored.
  	 */
  	__thaw_task(tsk);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
673
  	atomic_inc(&oom_victims);
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
674
  	trace_mark_victim(tsk->pid);
49550b605   Michal Hocko   oom: add helpers ...
675
676
677
  }
  
  /**
16e951966   Johannes Weiner   mm: oom_kill: cle...
678
   * exit_oom_victim - note the exit of an OOM victim
49550b605   Michal Hocko   oom: add helpers ...
679
   */
38531201c   Tetsuo Handa   mm, oom: enforce ...
680
  void exit_oom_victim(void)
49550b605   Michal Hocko   oom: add helpers ...
681
  {
38531201c   Tetsuo Handa   mm, oom: enforce ...
682
  	clear_thread_flag(TIF_MEMDIE);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
683

c38f1025f   Johannes Weiner   mm: oom_kill: gen...
684
  	if (!atomic_dec_return(&oom_victims))
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
685
  		wake_up_all(&oom_victims_wait);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
686
687
688
  }
  
  /**
7d2e7a22c   Michal Hocko   oom, suspend: fix...
689
690
691
692
693
   * oom_killer_enable - enable OOM killer
   */
  void oom_killer_enable(void)
  {
  	oom_killer_disabled = false;
d75da004c   Michal Hocko   oom: improve oom ...
694
695
  	pr_info("OOM killer enabled.
  ");
7d2e7a22c   Michal Hocko   oom, suspend: fix...
696
697
698
  }
  
  /**
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
699
   * oom_killer_disable - disable OOM killer
7d2e7a22c   Michal Hocko   oom, suspend: fix...
700
   * @timeout: maximum timeout to wait for oom victims in jiffies
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
701
702
   *
   * Forces all page allocations to fail rather than trigger OOM killer.
7d2e7a22c   Michal Hocko   oom, suspend: fix...
703
704
   * Will block and wait until all OOM victims are killed or the given
   * timeout expires.
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
705
706
707
708
709
710
711
712
   *
   * The function cannot be called when there are runnable user tasks because
   * the userspace would see unexpected allocation failures as a result. Any
   * new usage of this function should be consulted with MM people.
   *
   * Returns true if successful and false if the OOM killer cannot be
   * disabled.
   */
7d2e7a22c   Michal Hocko   oom, suspend: fix...
713
  bool oom_killer_disable(signed long timeout)
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
714
  {
7d2e7a22c   Michal Hocko   oom, suspend: fix...
715
  	signed long ret;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
716
  	/*
6afcf2895   Tetsuo Handa   mm,oom: make oom_...
717
718
  	 * Make sure to not race with an ongoing OOM killer. Check that the
  	 * current is not killed (possibly due to sharing the victim's memory).
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
719
  	 */
6afcf2895   Tetsuo Handa   mm,oom: make oom_...
720
  	if (mutex_lock_killable(&oom_lock))
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
721
  		return false;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
722
  	oom_killer_disabled = true;
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
723
  	mutex_unlock(&oom_lock);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
724

7d2e7a22c   Michal Hocko   oom, suspend: fix...
725
726
727
728
729
730
  	ret = wait_event_interruptible_timeout(oom_victims_wait,
  			!atomic_read(&oom_victims), timeout);
  	if (ret <= 0) {
  		oom_killer_enable();
  		return false;
  	}
d75da004c   Michal Hocko   oom: improve oom ...
731
732
  	pr_info("OOM killer disabled.
  ");
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
733
734
735
  
  	return true;
  }
1af8bb432   Michal Hocko   mm, oom: fortify ...
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
  static inline bool __task_will_free_mem(struct task_struct *task)
  {
  	struct signal_struct *sig = task->signal;
  
  	/*
  	 * A coredumping process may sleep for an extended period in exit_mm(),
  	 * so the oom killer cannot assume that the process will promptly exit
  	 * and release memory.
  	 */
  	if (sig->flags & SIGNAL_GROUP_COREDUMP)
  		return false;
  
  	if (sig->flags & SIGNAL_GROUP_EXIT)
  		return true;
  
  	if (thread_group_empty(task) && (task->flags & PF_EXITING))
  		return true;
  
  	return false;
  }
  
  /*
   * Checks whether the given task is dying or exiting and likely to
   * release its address space. This means that all threads and processes
   * sharing the same mm have to be killed or exiting.
091f362c5   Michal Hocko   mm, oom: tighten ...
761
762
   * Caller has to make sure that task->mm is stable (hold task_lock or
   * it operates on the current).
1af8bb432   Michal Hocko   mm, oom: fortify ...
763
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
764
  static bool task_will_free_mem(struct task_struct *task)
1af8bb432   Michal Hocko   mm, oom: fortify ...
765
  {
091f362c5   Michal Hocko   mm, oom: tighten ...
766
  	struct mm_struct *mm = task->mm;
1af8bb432   Michal Hocko   mm, oom: fortify ...
767
  	struct task_struct *p;
f33e6f067   Geert Uytterhoeven   mm, oom: fix unin...
768
  	bool ret = true;
1af8bb432   Michal Hocko   mm, oom: fortify ...
769

1af8bb432   Michal Hocko   mm, oom: fortify ...
770
  	/*
091f362c5   Michal Hocko   mm, oom: tighten ...
771
772
773
  	 * Skip tasks without mm because it might have passed its exit_mm and
  	 * exit_oom_victim. oom_reaper could have rescued that but do not rely
  	 * on that for now. We can consider find_lock_task_mm in future.
1af8bb432   Michal Hocko   mm, oom: fortify ...
774
  	 */
091f362c5   Michal Hocko   mm, oom: tighten ...
775
  	if (!mm)
1af8bb432   Michal Hocko   mm, oom: fortify ...
776
  		return false;
091f362c5   Michal Hocko   mm, oom: tighten ...
777
778
  	if (!__task_will_free_mem(task))
  		return false;
696453e66   Michal Hocko   mm, oom: task_wil...
779
780
781
782
783
  
  	/*
  	 * This task has already been drained by the oom reaper so there are
  	 * only small chances it will free some more
  	 */
862e3073b   Michal Hocko   mm, oom: get rid ...
784
  	if (test_bit(MMF_OOM_SKIP, &mm->flags))
696453e66   Michal Hocko   mm, oom: task_wil...
785
  		return false;
696453e66   Michal Hocko   mm, oom: task_wil...
786

091f362c5   Michal Hocko   mm, oom: tighten ...
787
  	if (atomic_read(&mm->mm_users) <= 1)
1af8bb432   Michal Hocko   mm, oom: fortify ...
788
  		return true;
1af8bb432   Michal Hocko   mm, oom: fortify ...
789
790
  
  	/*
5870c2e1d   Michal Hocko   mm/oom_kill.c: fi...
791
792
793
  	 * Make sure that all tasks which share the mm with the given tasks
  	 * are dying as well to make sure that a) nobody pins its mm and
  	 * b) the task is also reapable by the oom reaper.
1af8bb432   Michal Hocko   mm, oom: fortify ...
794
795
796
797
798
799
800
801
802
803
804
805
  	 */
  	rcu_read_lock();
  	for_each_process(p) {
  		if (!process_shares_mm(p, mm))
  			continue;
  		if (same_thread_group(task, p))
  			continue;
  		ret = __task_will_free_mem(p);
  		if (!ret)
  			break;
  	}
  	rcu_read_unlock();
1af8bb432   Michal Hocko   mm, oom: fortify ...
806
807
808
  
  	return ret;
  }
5989ad7b5   Roman Gushchin   mm, oom: refactor...
809
  static void __oom_kill_process(struct task_struct *victim)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
810
  {
5989ad7b5   Roman Gushchin   mm, oom: refactor...
811
  	struct task_struct *p;
647f2bdf4   David Rientjes   mm, oom: fold oom...
812
  	struct mm_struct *mm;
bb29902a7   Tetsuo Handa   oom, oom_reaper: ...
813
  	bool can_oom_reap = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
814

6b0c81b3b   David Rientjes   mm, oom: reduce d...
815
816
  	p = find_lock_task_mm(victim);
  	if (!p) {
6b0c81b3b   David Rientjes   mm, oom: reduce d...
817
  		put_task_struct(victim);
647f2bdf4   David Rientjes   mm, oom: fold oom...
818
  		return;
6b0c81b3b   David Rientjes   mm, oom: reduce d...
819
820
821
822
823
  	} else if (victim != p) {
  		get_task_struct(p);
  		put_task_struct(victim);
  		victim = p;
  	}
647f2bdf4   David Rientjes   mm, oom: fold oom...
824

880b76893   Tetsuo Handa   mm/oom_kill.c: fi...
825
  	/* Get a reference to safely compare mm after task_unlock(victim) */
647f2bdf4   David Rientjes   mm, oom: fold oom...
826
  	mm = victim->mm;
f1f100764   Vegard Nossum   mm: add new mmgra...
827
  	mmgrab(mm);
8e675f7af   Konstantin Khlebnikov   mm/oom_kill: coun...
828
829
830
  
  	/* Raise event before sending signal: task reaper must see this */
  	count_vm_event(OOM_KILL);
fe6bdfc8e   Roman Gushchin   mm: fix oom_kill ...
831
  	memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
8e675f7af   Konstantin Khlebnikov   mm/oom_kill: coun...
832

426fb5e72   Tetsuo Handa   mm/oom_kill.c: re...
833
  	/*
cd04ae1e2   Michal Hocko   mm, oom: do not r...
834
835
836
  	 * We should send SIGKILL before granting access to memory reserves
  	 * in order to prevent the OOM victim from depleting the memory
  	 * reserves from the user space under its control.
426fb5e72   Tetsuo Handa   mm/oom_kill.c: re...
837
  	 */
40b3b0253   Eric W. Biederman   signal: Pass pid ...
838
  	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID);
16e951966   Johannes Weiner   mm: oom_kill: cle...
839
  	mark_oom_victim(victim);
eca56ff90   Jerome Marchand   mm, shmem: add in...
840
841
  	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB
  ",
647f2bdf4   David Rientjes   mm, oom: fold oom...
842
843
  		task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
  		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
eca56ff90   Jerome Marchand   mm, shmem: add in...
844
845
  		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
  		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
647f2bdf4   David Rientjes   mm, oom: fold oom...
846
847
848
849
850
851
852
853
854
855
856
  	task_unlock(victim);
  
  	/*
  	 * Kill all user processes sharing victim->mm in other thread groups, if
  	 * any.  They don't get access to memory reserves, though, to avoid
  	 * depletion of all memory.  This prevents mm->mmap_sem livelock when an
  	 * oom killed thread cannot exit because it requires the semaphore and
  	 * its contended by another thread trying to allocate memory itself.
  	 * That thread will now get access to memory reserves since it has a
  	 * pending fatal signal.
  	 */
4d4048be8   Oleg Nesterov   oom_kill: add rcu...
857
  	rcu_read_lock();
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
858
  	for_each_process(p) {
4d7b3394f   Oleg Nesterov   mm/oom_kill: fix ...
859
  		if (!process_shares_mm(p, mm))
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
860
861
862
  			continue;
  		if (same_thread_group(p, victim))
  			continue;
1b51e65ea   Michal Hocko   oom, oom_reaper: ...
863
  		if (is_global_init(p)) {
aac453635   Michal Hocko   mm, oom: introduc...
864
  			can_oom_reap = false;
862e3073b   Michal Hocko   mm, oom: get rid ...
865
  			set_bit(MMF_OOM_SKIP, &mm->flags);
a373966d1   Michal Hocko   mm, oom: hide mm ...
866
867
868
869
  			pr_info("oom killer %d (%s) has mm pinned by %d (%s)
  ",
  					task_pid_nr(victim), victim->comm,
  					task_pid_nr(p), p->comm);
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
870
  			continue;
aac453635   Michal Hocko   mm, oom: introduc...
871
  		}
1b51e65ea   Michal Hocko   oom, oom_reaper: ...
872
873
874
875
876
877
  		/*
  		 * No use_mm() user needs to read from the userspace so we are
  		 * ok to reap it.
  		 */
  		if (unlikely(p->flags & PF_KTHREAD))
  			continue;
40b3b0253   Eric W. Biederman   signal: Pass pid ...
878
  		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID);
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
879
  	}
6b0c81b3b   David Rientjes   mm, oom: reduce d...
880
  	rcu_read_unlock();
647f2bdf4   David Rientjes   mm, oom: fold oom...
881

aac453635   Michal Hocko   mm, oom: introduc...
882
  	if (can_oom_reap)
36324a990   Michal Hocko   oom: clear TIF_ME...
883
  		wake_oom_reaper(victim);
aac453635   Michal Hocko   mm, oom: introduc...
884

880b76893   Tetsuo Handa   mm/oom_kill.c: fi...
885
  	mmdrop(mm);
6b0c81b3b   David Rientjes   mm, oom: reduce d...
886
  	put_task_struct(victim);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
887
  }
647f2bdf4   David Rientjes   mm, oom: fold oom...
888
  #undef K
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
889

309ed8825   David Rientjes   oom: extract pani...
890
  /*
3d8b38eb8   Roman Gushchin   mm, oom: introduc...
891
892
893
894
895
   * Kill provided task unless it's secured by setting
   * oom_score_adj to OOM_SCORE_ADJ_MIN.
   */
  static int oom_kill_memcg_member(struct task_struct *task, void *unused)
  {
eed3ca0a6   Tetsuo Handa   mm,oom: don't kil...
896
897
  	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
  	    !is_global_init(task)) {
3d8b38eb8   Roman Gushchin   mm, oom: introduc...
898
899
900
901
902
  		get_task_struct(task);
  		__oom_kill_process(task);
  	}
  	return 0;
  }
5989ad7b5   Roman Gushchin   mm, oom: refactor...
903
904
905
906
907
908
909
  static void oom_kill_process(struct oom_control *oc, const char *message)
  {
  	struct task_struct *p = oc->chosen;
  	unsigned int points = oc->chosen_points;
  	struct task_struct *victim = p;
  	struct task_struct *child;
  	struct task_struct *t;
3d8b38eb8   Roman Gushchin   mm, oom: introduc...
910
  	struct mem_cgroup *oom_group;
5989ad7b5   Roman Gushchin   mm, oom: refactor...
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
  	unsigned int victim_points = 0;
  	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
  					      DEFAULT_RATELIMIT_BURST);
  
  	/*
  	 * If the task is already exiting, don't alarm the sysadmin or kill
  	 * its children or threads, just give it access to memory reserves
  	 * so it can die quickly
  	 */
  	task_lock(p);
  	if (task_will_free_mem(p)) {
  		mark_oom_victim(p);
  		wake_oom_reaper(p);
  		task_unlock(p);
  		put_task_struct(p);
  		return;
  	}
  	task_unlock(p);
  
  	if (__ratelimit(&oom_rs))
  		dump_header(oc, p);
  
  	pr_err("%s: Kill process %d (%s) score %u or sacrifice child
  ",
  		message, task_pid_nr(p), p->comm, points);
  
  	/*
  	 * If any of p's children has a different mm and is eligible for kill,
  	 * the one with the highest oom_badness() score is sacrificed for its
  	 * parent.  This attempts to lose the minimal amount of work done while
  	 * still freeing memory.
  	 */
  	read_lock(&tasklist_lock);
b6f534ab6   Shakeel Butt   mm, oom: fix use-...
944
945
946
947
948
949
950
  
  	/*
  	 * The task 'p' might have already exited before reaching here. The
  	 * put_task_struct() will free task_struct 'p' while the loop still try
  	 * to access the field of 'p', so, get an extra reference.
  	 */
  	get_task_struct(p);
5989ad7b5   Roman Gushchin   mm, oom: refactor...
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
  	for_each_thread(p, t) {
  		list_for_each_entry(child, &t->children, sibling) {
  			unsigned int child_points;
  
  			if (process_shares_mm(child, p->mm))
  				continue;
  			/*
  			 * oom_badness() returns 0 if the thread is unkillable
  			 */
  			child_points = oom_badness(child,
  				oc->memcg, oc->nodemask, oc->totalpages);
  			if (child_points > victim_points) {
  				put_task_struct(victim);
  				victim = child;
  				victim_points = child_points;
  				get_task_struct(victim);
  			}
  		}
  	}
b6f534ab6   Shakeel Butt   mm, oom: fix use-...
970
  	put_task_struct(p);
5989ad7b5   Roman Gushchin   mm, oom: refactor...
971
  	read_unlock(&tasklist_lock);
3d8b38eb8   Roman Gushchin   mm, oom: introduc...
972
973
974
975
976
977
  	/*
  	 * Do we need to kill the entire memory cgroup?
  	 * Or even one of the ancestor memory cgroups?
  	 * Check this out before killing the victim task.
  	 */
  	oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
5989ad7b5   Roman Gushchin   mm, oom: refactor...
978
  	__oom_kill_process(victim);
3d8b38eb8   Roman Gushchin   mm, oom: introduc...
979
980
981
982
983
984
985
986
987
  
  	/*
  	 * If necessary, kill all tasks in the selected memory cgroup.
  	 */
  	if (oom_group) {
  		mem_cgroup_print_oom_group(oom_group);
  		mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
  		mem_cgroup_put(oom_group);
  	}
5989ad7b5   Roman Gushchin   mm, oom: refactor...
988
  }
309ed8825   David Rientjes   oom: extract pani...
989
990
991
  /*
   * Determines whether the kernel must panic because of the panic_on_oom sysctl.
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
992
993
  static void check_panic_on_oom(struct oom_control *oc,
  			       enum oom_constraint constraint)
309ed8825   David Rientjes   oom: extract pani...
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
  {
  	if (likely(!sysctl_panic_on_oom))
  		return;
  	if (sysctl_panic_on_oom != 2) {
  		/*
  		 * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
  		 * does not panic for cpuset, mempolicy, or memcg allocation
  		 * failures.
  		 */
  		if (constraint != CONSTRAINT_NONE)
  			return;
  	}
071a4befe   David Rientjes   mm, oom: do not p...
1006
  	/* Do not panic for oom kills triggered by sysrq */
db2a0dd7a   Yaowei Bai   mm/oom_kill.c: in...
1007
  	if (is_sysrq_oom(oc))
071a4befe   David Rientjes   mm, oom: do not p...
1008
  		return;
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1009
  	dump_header(oc, NULL);
309ed8825   David Rientjes   oom: extract pani...
1010
1011
1012
1013
  	panic("Out of memory: %s panic_on_oom is enabled
  ",
  		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
  }
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
  static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
  
  int register_oom_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_register(&oom_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(register_oom_notifier);
  
  int unregister_oom_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_unregister(&oom_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(unregister_oom_notifier);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027
  /**
6e0fc46dc   David Rientjes   mm, oom: organize...
1028
1029
   * out_of_memory - kill the "best" process when we run out of memory
   * @oc: pointer to struct oom_control
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030
1031
1032
1033
1034
1035
   *
   * If we run out of memory, we have the choice between either
   * killing a random task (bad), letting the system crash (worse)
   * OR try to be smart about which process to kill. Note that we
   * don't have to be perfect here, we just have to be good.
   */
6e0fc46dc   David Rientjes   mm, oom: organize...
1036
  bool out_of_memory(struct oom_control *oc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1037
  {
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
1038
  	unsigned long freed = 0;
e36589323   David Rientjes   oom: remove speci...
1039
  	enum oom_constraint constraint = CONSTRAINT_NONE;
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
1040

dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1041
1042
  	if (oom_killer_disabled)
  		return false;
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1043
1044
1045
1046
1047
1048
  	if (!is_memcg_oom(oc)) {
  		blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
  		if (freed > 0)
  			/* Got some memory back in the last second. */
  			return true;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1049

7b98c2e40   David Rientjes   oom: give current...
1050
  	/*
9ff4868e3   David Rientjes   mm, oom: allow ex...
1051
1052
1053
  	 * If current has a pending SIGKILL or is exiting, then automatically
  	 * select it.  The goal is to allow it to allocate so that it may
  	 * quickly exit and free its memory.
7b98c2e40   David Rientjes   oom: give current...
1054
  	 */
091f362c5   Michal Hocko   mm, oom: tighten ...
1055
  	if (task_will_free_mem(current)) {
16e951966   Johannes Weiner   mm: oom_kill: cle...
1056
  		mark_oom_victim(current);
1af8bb432   Michal Hocko   mm, oom: fortify ...
1057
  		wake_oom_reaper(current);
75e8f8b24   David Rientjes   mm, oom: remove u...
1058
  		return true;
7b98c2e40   David Rientjes   oom: give current...
1059
  	}
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
1060
  	/*
3da88fb3b   Michal Hocko   mm, oom: move GFP...
1061
1062
1063
1064
1065
  	 * The OOM killer does not compensate for IO-less reclaim.
  	 * pagefault_out_of_memory lost its gfp context so we have to
  	 * make sure exclude 0 mask - all other users should have at least
  	 * ___GFP_DIRECT_RECLAIM to get here.
  	 */
06ad276ac   Michal Hocko   mm, oom: do not e...
1066
  	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
3da88fb3b   Michal Hocko   mm, oom: move GFP...
1067
1068
1069
  		return true;
  
  	/*
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
1070
  	 * Check if there were limitations on the allocation (only relevant for
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1071
  	 * NUMA and memcg) that may require different handling.
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
1072
  	 */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1073
  	constraint = constrained_alloc(oc);
6e0fc46dc   David Rientjes   mm, oom: organize...
1074
1075
  	if (constraint != CONSTRAINT_MEMORY_POLICY)
  		oc->nodemask = NULL;
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1076
  	check_panic_on_oom(oc, constraint);
0aad4b312   David Rientjes   oom: fold __out_o...
1077

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1078
1079
  	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
  	    current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
121d1ba0a   David Rientjes   mm, oom: fix pote...
1080
  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
6b0c81b3b   David Rientjes   mm, oom: reduce d...
1081
  		get_task_struct(current);
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1082
1083
  		oc->chosen = current;
  		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
75e8f8b24   David Rientjes   mm, oom: remove u...
1084
  		return true;
0aad4b312   David Rientjes   oom: fold __out_o...
1085
  	}
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1086
  	select_bad_process(oc);
3100dab2a   Johannes Weiner   mm: memcontrol: p...
1087
1088
  	/* Found nothing?!?! */
  	if (!oc->chosen) {
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1089
  		dump_header(oc, NULL);
3100dab2a   Johannes Weiner   mm: memcontrol: p...
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
  		pr_warn("Out of memory and no killable processes...
  ");
  		/*
  		 * If we got here due to an actual allocation at the
  		 * system level, we cannot survive this and will enter
  		 * an endless loop in the allocator. Bail out now.
  		 */
  		if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
  			panic("System is deadlocked on memory
  ");
0aad4b312   David Rientjes   oom: fold __out_o...
1100
  	}
9bfe5ded0   Michal Hocko   mm, oom: remove s...
1101
  	if (oc->chosen && oc->chosen != (void *)-1UL)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1102
1103
  		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
  				 "Memory cgroup out of memory");
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1104
  	return !!oc->chosen;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
1105
  }
e36589323   David Rientjes   oom: remove speci...
1106
1107
  /*
   * The pagefault handler calls here because it is out of memory, so kill a
798fd7569   Vladimir Davydov   mm: zap ZONE_OOM_...
1108
1109
   * memory-hogging task. If oom_lock is held by somebody else, a parallel oom
   * killing is already in progress so do nothing.
e36589323   David Rientjes   oom: remove speci...
1110
1111
1112
   */
  void pagefault_out_of_memory(void)
  {
6e0fc46dc   David Rientjes   mm, oom: organize...
1113
1114
1115
  	struct oom_control oc = {
  		.zonelist = NULL,
  		.nodemask = NULL,
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1116
  		.memcg = NULL,
6e0fc46dc   David Rientjes   mm, oom: organize...
1117
1118
  		.gfp_mask = 0,
  		.order = 0,
6e0fc46dc   David Rientjes   mm, oom: organize...
1119
  	};
494264208   Johannes Weiner   mm: memcg: handle...
1120
  	if (mem_cgroup_oom_synchronize(true))
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1121
  		return;
3812c8c8f   Johannes Weiner   mm: memcg: do not...
1122

dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1123
1124
  	if (!mutex_trylock(&oom_lock))
  		return;
a104808e2   Tetsuo Handa   mm: don't emit wa...
1125
  	out_of_memory(&oc);
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1126
  	mutex_unlock(&oom_lock);
e36589323   David Rientjes   oom: remove speci...
1127
  }