Blame view

mm/oom_kill.c 29.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *  linux/mm/oom_kill.c
   * 
   *  Copyright (C)  1998,2000  Rik van Riel
   *	Thanks go out to Claus Fischer for some serious inspiration and
   *	for goading me into coding this file...
a63d83f42   David Rientjes   oom: badness heur...
7
8
   *  Copyright (C)  2010  Google, Inc.
   *	Rewritten by David Rientjes
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
   *
   *  The routines in this file are used to kill a process when
a49335cce   Paul Jackson   [PATCH] cpusets: ...
11
12
   *  we're seriously out of memory. This gets called from __alloc_pages()
   *  in mm/page_alloc.c when we really run out of memory.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
18
   *
   *  Since we won't call these routines often (on a well-configured
   *  machine) this file will double as a 'coding guide' and a signpost
   *  for newbie kernel hackers. It features several pointers to major
   *  kernel subsystems and hints as to where to find out what things do.
   */
8ac773b4f   Alexey Dobriyan   [PATCH] OOM kille...
19
  #include <linux/oom.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/mm.h>
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
21
  #include <linux/err.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
22
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
  #include <linux/sched.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/mm.h>
f7ccbae45   Ingo Molnar   sched/headers: Pr...
25
  #include <linux/sched/coredump.h>
299300258   Ingo Molnar   sched/headers: Pr...
26
  #include <linux/sched/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
  #include <linux/swap.h>
  #include <linux/timex.h>
  #include <linux/jiffies.h>
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
30
  #include <linux/cpuset.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
31
  #include <linux/export.h>
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
32
  #include <linux/notifier.h>
c7ba5c9e8   Pavel Emelianov   Memory controller...
33
  #include <linux/memcontrol.h>
6f48d0ebd   David Rientjes   oom: select task ...
34
  #include <linux/mempolicy.h>
5cd9c58fb   David Howells   security: Fix set...
35
  #include <linux/security.h>
edd45544c   David Rientjes   oom: avoid deferr...
36
  #include <linux/ptrace.h>
f660daac4   David Rientjes   oom: thaw threads...
37
  #include <linux/freezer.h>
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
38
  #include <linux/ftrace.h>
dc3f21ead   David Rientjes   mm, oom: introduc...
39
  #include <linux/ratelimit.h>
aac453635   Michal Hocko   mm, oom: introduc...
40
41
  #include <linux/kthread.h>
  #include <linux/init.h>
4d4bbd852   Michal Hocko   mm, oom_reaper: s...
42
  #include <linux/mmu_notifier.h>
aac453635   Michal Hocko   mm, oom: introduc...
43
44
45
  
  #include <asm/tlb.h>
  #include "internal.h"
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
46
47
48
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/oom.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49

fadd8fbd1   KAMEZAWA Hiroyuki   [PATCH] support f...
50
  int sysctl_panic_on_oom;
fe071d7e8   David Rientjes   oom: add oom_kill...
51
  int sysctl_oom_kill_allocating_task;
ad915c432   David Rientjes   oom: enable oom t...
52
  int sysctl_oom_dump_tasks = 1;
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
53
54
  
  DEFINE_MUTEX(oom_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55

6f48d0ebd   David Rientjes   oom: select task ...
56
57
58
  #ifdef CONFIG_NUMA
  /**
   * has_intersects_mems_allowed() - check task eligiblity for kill
ad9624417   Oleg Nesterov   oom_kill: has_int...
59
   * @start: task struct of which task to consider
6f48d0ebd   David Rientjes   oom: select task ...
60
61
62
63
64
   * @mask: nodemask passed to page allocator for mempolicy ooms
   *
   * Task eligibility is determined by whether or not a candidate task, @tsk,
   * shares the same mempolicy nodes as current if it is bound by such a policy
   * and whether or not it has the same set of allowed cpuset nodes.
495789a51   KOSAKI Motohiro   oom: make oom_sco...
65
   */
ad9624417   Oleg Nesterov   oom_kill: has_int...
66
  static bool has_intersects_mems_allowed(struct task_struct *start,
6f48d0ebd   David Rientjes   oom: select task ...
67
  					const nodemask_t *mask)
495789a51   KOSAKI Motohiro   oom: make oom_sco...
68
  {
ad9624417   Oleg Nesterov   oom_kill: has_int...
69
70
  	struct task_struct *tsk;
  	bool ret = false;
495789a51   KOSAKI Motohiro   oom: make oom_sco...
71

ad9624417   Oleg Nesterov   oom_kill: has_int...
72
  	rcu_read_lock();
1da4db0cd   Oleg Nesterov   oom_kill: change ...
73
  	for_each_thread(start, tsk) {
6f48d0ebd   David Rientjes   oom: select task ...
74
75
76
77
78
79
80
  		if (mask) {
  			/*
  			 * If this is a mempolicy constrained oom, tsk's
  			 * cpuset is irrelevant.  Only return true if its
  			 * mempolicy intersects current, otherwise it may be
  			 * needlessly killed.
  			 */
ad9624417   Oleg Nesterov   oom_kill: has_int...
81
  			ret = mempolicy_nodemask_intersects(tsk, mask);
6f48d0ebd   David Rientjes   oom: select task ...
82
83
84
85
86
  		} else {
  			/*
  			 * This is not a mempolicy constrained oom, so only
  			 * check the mems of tsk's cpuset.
  			 */
ad9624417   Oleg Nesterov   oom_kill: has_int...
87
  			ret = cpuset_mems_allowed_intersects(current, tsk);
6f48d0ebd   David Rientjes   oom: select task ...
88
  		}
ad9624417   Oleg Nesterov   oom_kill: has_int...
89
90
  		if (ret)
  			break;
1da4db0cd   Oleg Nesterov   oom_kill: change ...
91
  	}
ad9624417   Oleg Nesterov   oom_kill: has_int...
92
  	rcu_read_unlock();
df1090a8d   KOSAKI Motohiro   oom: cleanup has_...
93

ad9624417   Oleg Nesterov   oom_kill: has_int...
94
  	return ret;
6f48d0ebd   David Rientjes   oom: select task ...
95
96
97
98
99
100
  }
  #else
  static bool has_intersects_mems_allowed(struct task_struct *tsk,
  					const nodemask_t *mask)
  {
  	return true;
495789a51   KOSAKI Motohiro   oom: make oom_sco...
101
  }
6f48d0ebd   David Rientjes   oom: select task ...
102
  #endif /* CONFIG_NUMA */
495789a51   KOSAKI Motohiro   oom: make oom_sco...
103

6f48d0ebd   David Rientjes   oom: select task ...
104
105
106
107
108
109
  /*
   * The process p may have detached its own ->mm while exiting or through
   * use_mm(), but one or more of its subthreads may still have a valid
   * pointer.  Return p, or any of its subthreads with a valid ->mm, with
   * task_lock() held.
   */
158e0a2d1   KAMEZAWA Hiroyuki   memcg: use find_l...
110
  struct task_struct *find_lock_task_mm(struct task_struct *p)
dd8e8f405   Oleg Nesterov   oom: introduce fi...
111
  {
1da4db0cd   Oleg Nesterov   oom_kill: change ...
112
  	struct task_struct *t;
dd8e8f405   Oleg Nesterov   oom: introduce fi...
113

4d4048be8   Oleg Nesterov   oom_kill: add rcu...
114
  	rcu_read_lock();
1da4db0cd   Oleg Nesterov   oom_kill: change ...
115
  	for_each_thread(p, t) {
dd8e8f405   Oleg Nesterov   oom: introduce fi...
116
117
  		task_lock(t);
  		if (likely(t->mm))
4d4048be8   Oleg Nesterov   oom_kill: add rcu...
118
  			goto found;
dd8e8f405   Oleg Nesterov   oom: introduce fi...
119
  		task_unlock(t);
1da4db0cd   Oleg Nesterov   oom_kill: change ...
120
  	}
4d4048be8   Oleg Nesterov   oom_kill: add rcu...
121
122
123
  	t = NULL;
  found:
  	rcu_read_unlock();
dd8e8f405   Oleg Nesterov   oom: introduce fi...
124

4d4048be8   Oleg Nesterov   oom_kill: add rcu...
125
  	return t;
dd8e8f405   Oleg Nesterov   oom: introduce fi...
126
  }
db2a0dd7a   Yaowei Bai   mm/oom_kill.c: in...
127
128
129
130
131
132
133
134
  /*
   * order == -1 means the oom kill is required by sysrq, otherwise only
   * for display purposes.
   */
  static inline bool is_sysrq_oom(struct oom_control *oc)
  {
  	return oc->order == -1;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
135
136
137
138
  static inline bool is_memcg_oom(struct oom_control *oc)
  {
  	return oc->memcg != NULL;
  }
ab290adba   KOSAKI Motohiro   oom: make oom_unk...
139
  /* return true if the task is not adequate as candidate victim task. */
e85bfd3aa   David Rientjes   oom: filter unkil...
140
  static bool oom_unkillable_task(struct task_struct *p,
2314b42db   Johannes Weiner   mm: memcontrol: d...
141
  		struct mem_cgroup *memcg, const nodemask_t *nodemask)
ab290adba   KOSAKI Motohiro   oom: make oom_unk...
142
143
144
145
146
147
148
  {
  	if (is_global_init(p))
  		return true;
  	if (p->flags & PF_KTHREAD)
  		return true;
  
  	/* When mem_cgroup_out_of_memory() and p is not member of the group */
72835c86c   Johannes Weiner   mm: unify remaini...
149
  	if (memcg && !task_in_mem_cgroup(p, memcg))
ab290adba   KOSAKI Motohiro   oom: make oom_unk...
150
151
152
153
154
155
156
157
  		return true;
  
  	/* p may not have freeable memory in nodemask */
  	if (!has_intersects_mems_allowed(p, nodemask))
  		return true;
  
  	return false;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
  /**
a63d83f42   David Rientjes   oom: badness heur...
159
   * oom_badness - heuristic function to determine which candidate task to kill
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
   * @p: task struct of which task we should calculate
a63d83f42   David Rientjes   oom: badness heur...
161
   * @totalpages: total present RAM allowed for page allocation
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
   *
a63d83f42   David Rientjes   oom: badness heur...
163
164
165
   * The heuristic for determining which task to kill is made to be as simple and
   * predictable as possible.  The goal is to return the highest value for the
   * task consuming the most memory to avoid subsequent oom failures.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
   */
a7f638f99   David Rientjes   mm, oom: normaliz...
167
168
  unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
  			  const nodemask_t *nodemask, unsigned long totalpages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
  {
1e11ad8dc   David Rientjes   mm, oom: fix badn...
170
  	long points;
61eafb00d   David Rientjes   mm, oom: fix and ...
171
  	long adj;
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
172

72835c86c   Johannes Weiner   mm: unify remaini...
173
  	if (oom_unkillable_task(p, memcg, nodemask))
26ebc9849   KOSAKI Motohiro   oom: /proc/<pid>/...
174
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175

dd8e8f405   Oleg Nesterov   oom: introduce fi...
176
177
  	p = find_lock_task_mm(p);
  	if (!p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
  		return 0;
bb8a4b7fd   Michal Hocko   mm, oom_reaper: h...
179
180
  	/*
  	 * Do not even consider tasks which are explicitly marked oom
b18dc5f29   Michal Hocko   mm, oom: skip vfo...
181
182
  	 * unkillable or have been already oom reaped or the are in
  	 * the middle of vfork
bb8a4b7fd   Michal Hocko   mm, oom_reaper: h...
183
  	 */
a9c58b907   David Rientjes   mm, oom: change t...
184
  	adj = (long)p->signal->oom_score_adj;
bb8a4b7fd   Michal Hocko   mm, oom_reaper: h...
185
  	if (adj == OOM_SCORE_ADJ_MIN ||
862e3073b   Michal Hocko   mm, oom: get rid ...
186
  			test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
b18dc5f29   Michal Hocko   mm, oom: skip vfo...
187
  			in_vfork(p)) {
5aecc85ab   Michal Hocko   oom: do not kill ...
188
189
190
  		task_unlock(p);
  		return 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
  	/*
a63d83f42   David Rientjes   oom: badness heur...
192
  	 * The baseline for the badness score is the proportion of RAM that each
f755a042d   KOSAKI Motohiro   oom: use pte page...
193
  	 * task's rss, pagetable and swap space use.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
  	 */
dc6c9a35b   Kirill A. Shutemov   mm: account pmd p...
195
196
  	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
  		atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm);
a63d83f42   David Rientjes   oom: badness heur...
197
  	task_unlock(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
199
  
  	/*
a63d83f42   David Rientjes   oom: badness heur...
200
201
  	 * Root processes get 3% bonus, just like the __vm_enough_memory()
  	 * implementation used by LSMs.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
  	 */
a63d83f42   David Rientjes   oom: badness heur...
203
  	if (has_capability_noaudit(p, CAP_SYS_ADMIN))
778c14aff   David Rientjes   mm, oom: base roo...
204
  		points -= (points * 3) / 100;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205

61eafb00d   David Rientjes   mm, oom: fix and ...
206
207
208
  	/* Normalize to oom_score_adj units */
  	adj *= totalpages / 1000;
  	points += adj;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209

f19e8aa11   David Rientjes   oom: always retur...
210
  	/*
a7f638f99   David Rientjes   mm, oom: normaliz...
211
212
  	 * Never return 0 for an eligible task regardless of the root bonus and
  	 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
f19e8aa11   David Rientjes   oom: always retur...
213
  	 */
1e11ad8dc   David Rientjes   mm, oom: fix badn...
214
  	return points > 0 ? points : 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
216
217
218
219
220
221
  enum oom_constraint {
  	CONSTRAINT_NONE,
  	CONSTRAINT_CPUSET,
  	CONSTRAINT_MEMORY_POLICY,
  	CONSTRAINT_MEMCG,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
  /*
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
223
224
   * Determine the type of allocation constraint.
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
225
  static enum oom_constraint constrained_alloc(struct oom_control *oc)
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
226
  {
54a6eb5c4   Mel Gorman   mm: use two zonel...
227
  	struct zone *zone;
dd1a239f6   Mel Gorman   mm: have zonelist...
228
  	struct zoneref *z;
6e0fc46dc   David Rientjes   mm, oom: organize...
229
  	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
a63d83f42   David Rientjes   oom: badness heur...
230
231
  	bool cpuset_limited = false;
  	int nid;
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
232

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
233
234
235
236
  	if (is_memcg_oom(oc)) {
  		oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1;
  		return CONSTRAINT_MEMCG;
  	}
a63d83f42   David Rientjes   oom: badness heur...
237
  	/* Default to all available memory */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
238
239
240
241
  	oc->totalpages = totalram_pages + total_swap_pages;
  
  	if (!IS_ENABLED(CONFIG_NUMA))
  		return CONSTRAINT_NONE;
a63d83f42   David Rientjes   oom: badness heur...
242

6e0fc46dc   David Rientjes   mm, oom: organize...
243
  	if (!oc->zonelist)
a63d83f42   David Rientjes   oom: badness heur...
244
  		return CONSTRAINT_NONE;
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
245
246
247
248
249
  	/*
  	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
  	 * to kill current.We have to random task kill in this case.
  	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
  	 */
6e0fc46dc   David Rientjes   mm, oom: organize...
250
  	if (oc->gfp_mask & __GFP_THISNODE)
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
251
  		return CONSTRAINT_NONE;
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
252

4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
253
  	/*
a63d83f42   David Rientjes   oom: badness heur...
254
255
256
  	 * This is not a __GFP_THISNODE allocation, so a truncated nodemask in
  	 * the page allocator means a mempolicy is in effect.  Cpuset policy
  	 * is enforced in get_page_from_freelist().
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
257
  	 */
6e0fc46dc   David Rientjes   mm, oom: organize...
258
259
  	if (oc->nodemask &&
  	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
260
  		oc->totalpages = total_swap_pages;
6e0fc46dc   David Rientjes   mm, oom: organize...
261
  		for_each_node_mask(nid, *oc->nodemask)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
262
  			oc->totalpages += node_spanned_pages(nid);
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
263
  		return CONSTRAINT_MEMORY_POLICY;
a63d83f42   David Rientjes   oom: badness heur...
264
  	}
4365a5676   KAMEZAWA Hiroyuki   oom-kill: fix NUM...
265
266
  
  	/* Check this allocation failure is caused by cpuset's wall function */
6e0fc46dc   David Rientjes   mm, oom: organize...
267
268
269
  	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
  			high_zoneidx, oc->nodemask)
  		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
a63d83f42   David Rientjes   oom: badness heur...
270
  			cpuset_limited = true;
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
271

a63d83f42   David Rientjes   oom: badness heur...
272
  	if (cpuset_limited) {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
273
  		oc->totalpages = total_swap_pages;
a63d83f42   David Rientjes   oom: badness heur...
274
  		for_each_node_mask(nid, cpuset_current_mems_allowed)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
275
  			oc->totalpages += node_spanned_pages(nid);
a63d83f42   David Rientjes   oom: badness heur...
276
277
  		return CONSTRAINT_CPUSET;
  	}
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
278
279
  	return CONSTRAINT_NONE;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
280
  static int oom_evaluate_task(struct task_struct *task, void *arg)
462607ecc   David Rientjes   mm, oom: introduc...
281
  {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
282
283
  	struct oom_control *oc = arg;
  	unsigned long points;
6e0fc46dc   David Rientjes   mm, oom: organize...
284
  	if (oom_unkillable_task(task, NULL, oc->nodemask))
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
285
  		goto next;
462607ecc   David Rientjes   mm, oom: introduc...
286
287
288
  
  	/*
  	 * This task already has access to memory reserves and is being killed.
a373966d1   Michal Hocko   mm, oom: hide mm ...
289
  	 * Don't allow any other task to have access to the reserves unless
862e3073b   Michal Hocko   mm, oom: get rid ...
290
  	 * the task has MMF_OOM_SKIP because chances that it would release
a373966d1   Michal Hocko   mm, oom: hide mm ...
291
  	 * any memory is quite low.
462607ecc   David Rientjes   mm, oom: introduc...
292
  	 */
862e3073b   Michal Hocko   mm, oom: get rid ...
293
294
  	if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
  		if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
295
296
  			goto next;
  		goto abort;
a373966d1   Michal Hocko   mm, oom: hide mm ...
297
  	}
462607ecc   David Rientjes   mm, oom: introduc...
298

e1e12d2f3   David Rientjes   mm, oom: fix race...
299
300
301
302
  	/*
  	 * If task is allocating a lot of memory and has been marked to be
  	 * killed first if it triggers an oom, then select it.
  	 */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
303
304
305
306
  	if (oom_task_origin(task)) {
  		points = ULONG_MAX;
  		goto select;
  	}
e1e12d2f3   David Rientjes   mm, oom: fix race...
307

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
  	points = oom_badness(task, NULL, oc->nodemask, oc->totalpages);
  	if (!points || points < oc->chosen_points)
  		goto next;
  
  	/* Prefer thread group leaders for display purposes */
  	if (points == oc->chosen_points && thread_group_leader(oc->chosen))
  		goto next;
  select:
  	if (oc->chosen)
  		put_task_struct(oc->chosen);
  	get_task_struct(task);
  	oc->chosen = task;
  	oc->chosen_points = points;
  next:
  	return 0;
  abort:
  	if (oc->chosen)
  		put_task_struct(oc->chosen);
  	oc->chosen = (void *)-1UL;
  	return 1;
462607ecc   David Rientjes   mm, oom: introduc...
328
  }
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
329
  /*
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
330
331
   * Simple selection loop. We choose the process with the highest number of
   * 'points'. In case scan was aborted, oc->chosen is set to -1.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
333
  static void select_bad_process(struct oom_control *oc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
  {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
335
336
337
338
  	if (is_memcg_oom(oc))
  		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
  	else {
  		struct task_struct *p;
d49ad9355   David Rientjes   mm, oom: prefer t...
339

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
340
341
342
343
344
  		rcu_read_lock();
  		for_each_process(p)
  			if (oom_evaluate_task(p, oc))
  				break;
  		rcu_read_unlock();
1da4db0cd   Oleg Nesterov   oom_kill: change ...
345
  	}
972c4ea59   Oleg Nesterov   [PATCH] select_ba...
346

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
347
  	oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
349
350
  }
  
  /**
1b578df02   Randy Dunlap   mm/oom_kill: fix ...
351
   * dump_tasks - dump current memory state of all system tasks
dad7557eb   Wanpeng Li   mm: fix kernel-do...
352
   * @memcg: current's memory controller, if constrained
e85bfd3aa   David Rientjes   oom: filter unkil...
353
   * @nodemask: nodemask passed to page allocator for mempolicy ooms
1b578df02   Randy Dunlap   mm/oom_kill: fix ...
354
   *
e85bfd3aa   David Rientjes   oom: filter unkil...
355
356
357
   * Dumps the current memory state of all eligible tasks.  Tasks not in the same
   * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
   * are not shown.
de34d965a   David Rientjes   mm, oom: replace ...
358
359
   * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
   * swapents, oom_score_adj value, and name.
fef1bdd68   David Rientjes   oom: add sysctl t...
360
   */
2314b42db   Johannes Weiner   mm: memcontrol: d...
361
  static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
fef1bdd68   David Rientjes   oom: add sysctl t...
362
  {
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
363
364
  	struct task_struct *p;
  	struct task_struct *task;
fef1bdd68   David Rientjes   oom: add sysctl t...
365

dc6c9a35b   Kirill A. Shutemov   mm: account pmd p...
366
367
  	pr_info("[ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds swapents oom_score_adj name
  ");
6b0c81b3b   David Rientjes   mm, oom: reduce d...
368
  	rcu_read_lock();
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
369
  	for_each_process(p) {
72835c86c   Johannes Weiner   mm: unify remaini...
370
  		if (oom_unkillable_task(p, memcg, nodemask))
b4416d2be   David Rientjes   oom: do not dump ...
371
  			continue;
fef1bdd68   David Rientjes   oom: add sysctl t...
372

c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
373
374
  		task = find_lock_task_mm(p);
  		if (!task) {
6d2661ede   David Rientjes   oom: fix possible...
375
  			/*
74ab7f1d3   David Rientjes   oom: improve comm...
376
377
  			 * This is a kthread or all of p's threads have already
  			 * detached their mm's.  There's no need to report
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
378
  			 * them; they can't be oom killed anyway.
6d2661ede   David Rientjes   oom: fix possible...
379
  			 */
6d2661ede   David Rientjes   oom: fix possible...
380
381
  			continue;
  		}
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
382

dc6c9a35b   Kirill A. Shutemov   mm: account pmd p...
383
384
  		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %8lu         %5hd %s
  ",
078de5f70   Eric W. Biederman   userns: Store uid...
385
386
  			task->pid, from_kuid(&init_user_ns, task_uid(task)),
  			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
e1f56c89b   Kirill A. Shutemov   mm: convert mm->n...
387
  			atomic_long_read(&task->mm->nr_ptes),
dc6c9a35b   Kirill A. Shutemov   mm: account pmd p...
388
  			mm_nr_pmds(task->mm),
de34d965a   David Rientjes   mm, oom: replace ...
389
  			get_mm_counter(task->mm, MM_SWAPENTS),
a63d83f42   David Rientjes   oom: badness heur...
390
  			task->signal->oom_score_adj, task->comm);
c55db9578   KOSAKI Motohiro   oom: dump_tasks u...
391
392
  		task_unlock(task);
  	}
6b0c81b3b   David Rientjes   mm, oom: reduce d...
393
  	rcu_read_unlock();
fef1bdd68   David Rientjes   oom: add sysctl t...
394
  }
2a966b77a   Vladimir Davydov   mm: oom: add memc...
395
  static void dump_header(struct oom_control *oc, struct task_struct *p)
1b604d75b   David Rientjes   oom: dump stack a...
396
  {
299c517ad   David Rientjes   mm, oom: header n...
397
398
399
400
401
402
403
404
405
  	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=",
  		current->comm, oc->gfp_mask, &oc->gfp_mask);
  	if (oc->nodemask)
  		pr_cont("%*pbl", nodemask_pr_args(oc->nodemask));
  	else
  		pr_cont("(null)");
  	pr_cont(",  order=%d, oom_score_adj=%hd
  ",
  		oc->order, current->signal->oom_score_adj);
9254990fb   Michal Hocko   oom: warn if we g...
406
407
408
  	if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
  		pr_warn("COMPACTION is disabled!!!
  ");
a0795cd41   Vlastimil Babka   mm, oom: print sy...
409

da39da3a5   David Rientjes   mm, oom: remove t...
410
  	cpuset_print_current_mems_allowed();
1b604d75b   David Rientjes   oom: dump stack a...
411
  	dump_stack();
2a966b77a   Vladimir Davydov   mm: oom: add memc...
412
413
  	if (oc->memcg)
  		mem_cgroup_print_oom_info(oc->memcg, p);
58cf188ed   Sha Zhengju   memcg, oom: provi...
414
  	else
299c517ad   David Rientjes   mm, oom: header n...
415
  		show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
1b604d75b   David Rientjes   oom: dump stack a...
416
  	if (sysctl_oom_dump_tasks)
2a966b77a   Vladimir Davydov   mm: oom: add memc...
417
  		dump_tasks(oc->memcg, oc->nodemask);
1b604d75b   David Rientjes   oom: dump stack a...
418
  }
5695be142   Michal Hocko   OOM, PM: OOM kill...
419
  /*
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
420
   * Number of OOM victims in flight
5695be142   Michal Hocko   OOM, PM: OOM kill...
421
   */
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
422
423
  static atomic_t oom_victims = ATOMIC_INIT(0);
  static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
5695be142   Michal Hocko   OOM, PM: OOM kill...
424

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
425
  static bool oom_killer_disabled __read_mostly;
5695be142   Michal Hocko   OOM, PM: OOM kill...
426

bc448e897   Michal Hocko   mm, oom_reaper: r...
427
  #define K(x) ((x) << (PAGE_SHIFT-10))
3ef22dfff   Michal Hocko   oom, oom_reaper: ...
428
429
430
431
432
433
  /*
   * task->mm can be NULL if the task is the exited group leader.  So to
   * determine whether the task is using a particular mm, we examine all the
   * task's threads: if one of those is using this mm then this task was also
   * using it.
   */
44a70adec   Michal Hocko   mm, oom_adj: make...
434
  bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
3ef22dfff   Michal Hocko   oom, oom_reaper: ...
435
436
437
438
439
440
441
442
443
444
  {
  	struct task_struct *t;
  
  	for_each_thread(p, t) {
  		struct mm_struct *t_mm = READ_ONCE(t->mm);
  		if (t_mm)
  			return t_mm == mm;
  	}
  	return false;
  }
aac453635   Michal Hocko   mm, oom: introduc...
445
446
447
448
449
450
  #ifdef CONFIG_MMU
  /*
   * OOM Reaper kernel thread which tries to reap the memory used by the OOM
   * victim (if that is possible) to help the OOM killer to move on.
   */
  static struct task_struct *oom_reaper_th;
aac453635   Michal Hocko   mm, oom: introduc...
451
  static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
29c696e1c   Vladimir Davydov   oom: make oom_rea...
452
  static struct task_struct *oom_reaper_list;
03049269d   Michal Hocko   mm, oom_reaper: i...
453
  static DEFINE_SPINLOCK(oom_reaper_lock);
2270dfcc4   David Rientjes   mm, oom: fix conc...
454
  void __oom_reap_task_mm(struct mm_struct *mm)
aac453635   Michal Hocko   mm, oom: introduc...
455
  {
aac453635   Michal Hocko   mm, oom: introduc...
456
  	struct vm_area_struct *vma;
2270dfcc4   David Rientjes   mm, oom: fix conc...
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
  
  	/*
  	 * Tell all users of get_user/copy_from_user etc... that the content
  	 * is no longer stable. No barriers really needed because unmapping
  	 * should imply barriers already and the reader would hit a page fault
  	 * if it stumbled over a reaped memory.
  	 */
  	set_bit(MMF_UNSTABLE, &mm->flags);
  
  	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
  		if (!can_madv_dontneed_vma(vma))
  			continue;
  
  		/*
  		 * Only anonymous pages have a good chance to be dropped
  		 * without additional steps which we cannot afford as we
  		 * are OOM already.
  		 *
  		 * We do not even care about fs backed pages because all
  		 * which are reclaimable have already been reclaimed and
  		 * we do not want to block exit_mmap by keeping mm ref
  		 * count elevated without a good reason.
  		 */
  		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
  			struct mmu_gather tlb;
  
  			tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
  			unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
  					 NULL);
  			tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
  		}
  	}
  }
  
  static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
  {
aac453635   Michal Hocko   mm, oom: introduc...
493
  	bool ret = true;
36324a990   Michal Hocko   oom: clear TIF_ME...
494
  	/*
e2fe14564   Michal Hocko   oom_reaper: close...
495
496
  	 * We have to make sure to not race with the victim exit path
  	 * and cause premature new oom victim selection:
2270dfcc4   David Rientjes   mm, oom: fix conc...
497
  	 * oom_reap_task_mm		exit_mm
e5e3f4c4f   Michal Hocko   mm, oom_reaper: m...
498
  	 *   mmget_not_zero
e2fe14564   Michal Hocko   oom_reaper: close...
499
500
501
502
503
504
505
506
507
508
  	 *				  mmput
  	 *				    atomic_dec_and_test
  	 *				  exit_oom_victim
  	 *				[...]
  	 *				out_of_memory
  	 *				  select_bad_process
  	 *				    # no TIF_MEMDIE task selects new victim
  	 *  unmap_page_range # frees some memory
  	 */
  	mutex_lock(&oom_lock);
aac453635   Michal Hocko   mm, oom: introduc...
509
510
  	if (!down_read_trylock(&mm->mmap_sem)) {
  		ret = false;
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
511
  		trace_skip_task_reaping(tsk->pid);
7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
512
  		goto unlock_oom;
e5e3f4c4f   Michal Hocko   mm, oom_reaper: m...
513
514
515
  	}
  
  	/*
4d4bbd852   Michal Hocko   mm, oom_reaper: s...
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
  	 * If the mm has notifiers then we would need to invalidate them around
  	 * unmap_page_range and that is risky because notifiers can sleep and
  	 * what they do is basically undeterministic.  So let's have a short
  	 * sleep to give the oom victim some more time.
  	 * TODO: we really want to get rid of this ugly hack and make sure that
  	 * notifiers cannot block for unbounded amount of time and add
  	 * mmu_notifier_invalidate_range_{start,end} around unmap_page_range
  	 */
  	if (mm_has_notifiers(mm)) {
  		up_read(&mm->mmap_sem);
  		schedule_timeout_idle(HZ);
  		goto unlock_oom;
  	}
  
  	/*
212925802   Andrea Arcangeli   mm: oom: let oom_...
531
532
533
534
  	 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
  	 * work on the mm anymore. The check for MMF_OOM_SKIP must run
  	 * under mmap_sem for reading because it serializes against the
  	 * down_write();up_write() cycle in exit_mmap().
e5e3f4c4f   Michal Hocko   mm, oom_reaper: m...
535
  	 */
212925802   Andrea Arcangeli   mm: oom: let oom_...
536
  	if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
e5e3f4c4f   Michal Hocko   mm, oom_reaper: m...
537
  		up_read(&mm->mmap_sem);
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
538
  		trace_skip_task_reaping(tsk->pid);
7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
539
  		goto unlock_oom;
aac453635   Michal Hocko   mm, oom: introduc...
540
  	}
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
541
  	trace_start_task_reaping(tsk->pid);
2270dfcc4   David Rientjes   mm, oom: fix conc...
542
  	__oom_reap_task_mm(mm);
aac453635   Michal Hocko   mm, oom: introduc...
543

bc448e897   Michal Hocko   mm, oom_reaper: r...
544
545
546
547
548
549
  	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB
  ",
  			task_pid_nr(tsk), tsk->comm,
  			K(get_mm_counter(mm, MM_ANONPAGES)),
  			K(get_mm_counter(mm, MM_FILEPAGES)),
  			K(get_mm_counter(mm, MM_SHMEMPAGES)));
aac453635   Michal Hocko   mm, oom: introduc...
550
  	up_read(&mm->mmap_sem);
36324a990   Michal Hocko   oom: clear TIF_ME...
551

422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
552
  	trace_finish_task_reaping(tsk->pid);
e5e3f4c4f   Michal Hocko   mm, oom_reaper: m...
553
554
  unlock_oom:
  	mutex_unlock(&oom_lock);
aac453635   Michal Hocko   mm, oom: introduc...
555
556
  	return ret;
  }
bc448e897   Michal Hocko   mm, oom_reaper: r...
557
  #define MAX_OOM_REAP_RETRIES 10
36324a990   Michal Hocko   oom: clear TIF_ME...
558
  static void oom_reap_task(struct task_struct *tsk)
aac453635   Michal Hocko   mm, oom: introduc...
559
560
  {
  	int attempts = 0;
26db62f17   Michal Hocko   oom: keep mm of t...
561
  	struct mm_struct *mm = tsk->signal->oom_mm;
aac453635   Michal Hocko   mm, oom: introduc...
562
563
  
  	/* Retry the down_read_trylock(mmap_sem) a few times */
2270dfcc4   David Rientjes   mm, oom: fix conc...
564
  	while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
aac453635   Michal Hocko   mm, oom: introduc...
565
  		schedule_timeout_idle(HZ/10);
7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
566
567
  	if (attempts <= MAX_OOM_REAP_RETRIES)
  		goto done;
11a410d51   Michal Hocko   mm, oom_reaper: d...
568

7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
569
570
571
  	pr_info("oom_reaper: unable to reap pid:%d (%s)
  ",
  		task_pid_nr(tsk), tsk->comm);
7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
572
  	debug_show_all_locks();
bc448e897   Michal Hocko   mm, oom_reaper: r...
573

7ebffa455   Tetsuo Handa   mm,oom_reaper: re...
574
  done:
449d777d7   Michal Hocko   mm, oom_reaper: c...
575
  	tsk->oom_reaper_list = NULL;
449d777d7   Michal Hocko   mm, oom_reaper: c...
576

26db62f17   Michal Hocko   oom: keep mm of t...
577
578
579
580
  	/*
  	 * Hide this mm from OOM killer because it has been either reaped or
  	 * somebody can't call up_write(mmap_sem).
  	 */
862e3073b   Michal Hocko   mm, oom: get rid ...
581
  	set_bit(MMF_OOM_SKIP, &mm->flags);
26db62f17   Michal Hocko   oom: keep mm of t...
582

aac453635   Michal Hocko   mm, oom: introduc...
583
  	/* Drop a reference taken by wake_oom_reaper */
36324a990   Michal Hocko   oom: clear TIF_ME...
584
  	put_task_struct(tsk);
aac453635   Michal Hocko   mm, oom: introduc...
585
586
587
588
589
  }
  
  static int oom_reaper(void *unused)
  {
  	while (true) {
03049269d   Michal Hocko   mm, oom_reaper: i...
590
  		struct task_struct *tsk = NULL;
aac453635   Michal Hocko   mm, oom: introduc...
591

29c696e1c   Vladimir Davydov   oom: make oom_rea...
592
  		wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
03049269d   Michal Hocko   mm, oom_reaper: i...
593
  		spin_lock(&oom_reaper_lock);
29c696e1c   Vladimir Davydov   oom: make oom_rea...
594
595
596
  		if (oom_reaper_list != NULL) {
  			tsk = oom_reaper_list;
  			oom_reaper_list = tsk->oom_reaper_list;
03049269d   Michal Hocko   mm, oom_reaper: i...
597
598
599
600
601
  		}
  		spin_unlock(&oom_reaper_lock);
  
  		if (tsk)
  			oom_reap_task(tsk);
aac453635   Michal Hocko   mm, oom: introduc...
602
603
604
605
  	}
  
  	return 0;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
606
  static void wake_oom_reaper(struct task_struct *tsk)
aac453635   Michal Hocko   mm, oom: introduc...
607
  {
af8e15cc8   Michal Hocko   oom, oom_reaper: ...
608
609
  	if (!oom_reaper_th)
  		return;
731785481   Tetsuo Handa   oom, oom_reaper: ...
610
611
  	/* mm is already queued? */
  	if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
aac453635   Michal Hocko   mm, oom: introduc...
612
  		return;
36324a990   Michal Hocko   oom: clear TIF_ME...
613
  	get_task_struct(tsk);
aac453635   Michal Hocko   mm, oom: introduc...
614

03049269d   Michal Hocko   mm, oom_reaper: i...
615
  	spin_lock(&oom_reaper_lock);
29c696e1c   Vladimir Davydov   oom: make oom_rea...
616
617
  	tsk->oom_reaper_list = oom_reaper_list;
  	oom_reaper_list = tsk;
03049269d   Michal Hocko   mm, oom_reaper: i...
618
  	spin_unlock(&oom_reaper_lock);
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
619
  	trace_wake_reaper(tsk->pid);
03049269d   Michal Hocko   mm, oom_reaper: i...
620
  	wake_up(&oom_reaper_wait);
aac453635   Michal Hocko   mm, oom: introduc...
621
622
623
624
625
626
627
628
629
630
631
632
633
634
  }
  
  static int __init oom_init(void)
  {
  	oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
  	if (IS_ERR(oom_reaper_th)) {
  		pr_err("Unable to start OOM reaper %ld. Continuing regardless
  ",
  				PTR_ERR(oom_reaper_th));
  		oom_reaper_th = NULL;
  	}
  	return 0;
  }
  subsys_initcall(oom_init)
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
635
636
637
638
639
  #else
  static inline void wake_oom_reaper(struct task_struct *tsk)
  {
  }
  #endif /* CONFIG_MMU */
aac453635   Michal Hocko   mm, oom: introduc...
640

49550b605   Michal Hocko   oom: add helpers ...
641
  /**
16e951966   Johannes Weiner   mm: oom_kill: cle...
642
   * mark_oom_victim - mark the given task as OOM victim
49550b605   Michal Hocko   oom: add helpers ...
643
   * @tsk: task to mark
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
644
   *
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
645
   * Has to be called with oom_lock held and never after
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
646
   * oom has been disabled already.
26db62f17   Michal Hocko   oom: keep mm of t...
647
648
649
   *
   * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
   * under task_lock or operate on the current).
49550b605   Michal Hocko   oom: add helpers ...
650
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
651
  static void mark_oom_victim(struct task_struct *tsk)
49550b605   Michal Hocko   oom: add helpers ...
652
  {
26db62f17   Michal Hocko   oom: keep mm of t...
653
  	struct mm_struct *mm = tsk->mm;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
654
655
656
657
  	WARN_ON(oom_killer_disabled);
  	/* OOM killer might race with memcg OOM */
  	if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
  		return;
26db62f17   Michal Hocko   oom: keep mm of t...
658

26db62f17   Michal Hocko   oom: keep mm of t...
659
  	/* oom_mm is bound to the signal struct life time. */
55fe4698d   Michal Hocko   mm, oom_reaper: f...
660
  	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
f1f100764   Vegard Nossum   mm: add new mmgra...
661
  		mmgrab(tsk->signal->oom_mm);
55fe4698d   Michal Hocko   mm, oom_reaper: f...
662
663
  		set_bit(MMF_OOM_VICTIM, &mm->flags);
  	}
26db62f17   Michal Hocko   oom: keep mm of t...
664

63a8ca9b2   Michal Hocko   oom: thaw the OOM...
665
666
667
668
669
670
671
  	/*
  	 * Make sure that the task is woken up from uninterruptible sleep
  	 * if it is frozen because OOM killer wouldn't be able to free
  	 * any memory and livelock. freezing_slow_path will tell the freezer
  	 * that TIF_MEMDIE tasks should be ignored.
  	 */
  	__thaw_task(tsk);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
672
  	atomic_inc(&oom_victims);
422580c3c   Roman Gushchin   mm/oom_kill.c: ad...
673
  	trace_mark_victim(tsk->pid);
49550b605   Michal Hocko   oom: add helpers ...
674
675
676
  }
  
  /**
16e951966   Johannes Weiner   mm: oom_kill: cle...
677
   * exit_oom_victim - note the exit of an OOM victim
49550b605   Michal Hocko   oom: add helpers ...
678
   */
38531201c   Tetsuo Handa   mm, oom: enforce ...
679
  void exit_oom_victim(void)
49550b605   Michal Hocko   oom: add helpers ...
680
  {
38531201c   Tetsuo Handa   mm, oom: enforce ...
681
  	clear_thread_flag(TIF_MEMDIE);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
682

c38f1025f   Johannes Weiner   mm: oom_kill: gen...
683
  	if (!atomic_dec_return(&oom_victims))
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
684
  		wake_up_all(&oom_victims_wait);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
685
686
687
  }
  
  /**
7d2e7a22c   Michal Hocko   oom, suspend: fix...
688
689
690
691
692
   * oom_killer_enable - enable OOM killer
   */
  void oom_killer_enable(void)
  {
  	oom_killer_disabled = false;
d75da004c   Michal Hocko   oom: improve oom ...
693
694
  	pr_info("OOM killer enabled.
  ");
7d2e7a22c   Michal Hocko   oom, suspend: fix...
695
696
697
  }
  
  /**
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
698
   * oom_killer_disable - disable OOM killer
7d2e7a22c   Michal Hocko   oom, suspend: fix...
699
   * @timeout: maximum timeout to wait for oom victims in jiffies
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
700
701
   *
   * Forces all page allocations to fail rather than trigger OOM killer.
7d2e7a22c   Michal Hocko   oom, suspend: fix...
702
703
   * Will block and wait until all OOM victims are killed or the given
   * timeout expires.
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
704
705
706
707
708
709
710
711
   *
   * The function cannot be called when there are runnable user tasks because
   * the userspace would see unexpected allocation failures as a result. Any
   * new usage of this function should be consulted with MM people.
   *
   * Returns true if successful and false if the OOM killer cannot be
   * disabled.
   */
7d2e7a22c   Michal Hocko   oom, suspend: fix...
712
  bool oom_killer_disable(signed long timeout)
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
713
  {
7d2e7a22c   Michal Hocko   oom, suspend: fix...
714
  	signed long ret;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
715
  	/*
6afcf2895   Tetsuo Handa   mm,oom: make oom_...
716
717
  	 * Make sure to not race with an ongoing OOM killer. Check that the
  	 * current is not killed (possibly due to sharing the victim's memory).
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
718
  	 */
6afcf2895   Tetsuo Handa   mm,oom: make oom_...
719
  	if (mutex_lock_killable(&oom_lock))
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
720
  		return false;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
721
  	oom_killer_disabled = true;
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
722
  	mutex_unlock(&oom_lock);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
723

7d2e7a22c   Michal Hocko   oom, suspend: fix...
724
725
726
727
728
729
  	ret = wait_event_interruptible_timeout(oom_victims_wait,
  			!atomic_read(&oom_victims), timeout);
  	if (ret <= 0) {
  		oom_killer_enable();
  		return false;
  	}
d75da004c   Michal Hocko   oom: improve oom ...
730
731
  	pr_info("OOM killer disabled.
  ");
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
732
733
734
  
  	return true;
  }
1af8bb432   Michal Hocko   mm, oom: fortify ...
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
  static inline bool __task_will_free_mem(struct task_struct *task)
  {
  	struct signal_struct *sig = task->signal;
  
  	/*
  	 * A coredumping process may sleep for an extended period in exit_mm(),
  	 * so the oom killer cannot assume that the process will promptly exit
  	 * and release memory.
  	 */
  	if (sig->flags & SIGNAL_GROUP_COREDUMP)
  		return false;
  
  	if (sig->flags & SIGNAL_GROUP_EXIT)
  		return true;
  
  	if (thread_group_empty(task) && (task->flags & PF_EXITING))
  		return true;
  
  	return false;
  }
  
  /*
   * Checks whether the given task is dying or exiting and likely to
   * release its address space. This means that all threads and processes
   * sharing the same mm have to be killed or exiting.
091f362c5   Michal Hocko   mm, oom: tighten ...
760
761
   * Caller has to make sure that task->mm is stable (hold task_lock or
   * it operates on the current).
1af8bb432   Michal Hocko   mm, oom: fortify ...
762
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
763
  static bool task_will_free_mem(struct task_struct *task)
1af8bb432   Michal Hocko   mm, oom: fortify ...
764
  {
091f362c5   Michal Hocko   mm, oom: tighten ...
765
  	struct mm_struct *mm = task->mm;
1af8bb432   Michal Hocko   mm, oom: fortify ...
766
  	struct task_struct *p;
f33e6f067   Geert Uytterhoeven   mm, oom: fix unin...
767
  	bool ret = true;
1af8bb432   Michal Hocko   mm, oom: fortify ...
768

1af8bb432   Michal Hocko   mm, oom: fortify ...
769
  	/*
091f362c5   Michal Hocko   mm, oom: tighten ...
770
771
772
  	 * Skip tasks without mm because it might have passed its exit_mm and
  	 * exit_oom_victim. oom_reaper could have rescued that but do not rely
  	 * on that for now. We can consider find_lock_task_mm in future.
1af8bb432   Michal Hocko   mm, oom: fortify ...
773
  	 */
091f362c5   Michal Hocko   mm, oom: tighten ...
774
  	if (!mm)
1af8bb432   Michal Hocko   mm, oom: fortify ...
775
  		return false;
091f362c5   Michal Hocko   mm, oom: tighten ...
776
777
  	if (!__task_will_free_mem(task))
  		return false;
696453e66   Michal Hocko   mm, oom: task_wil...
778
779
780
781
782
  
  	/*
  	 * This task has already been drained by the oom reaper so there are
  	 * only small chances it will free some more
  	 */
862e3073b   Michal Hocko   mm, oom: get rid ...
783
  	if (test_bit(MMF_OOM_SKIP, &mm->flags))
696453e66   Michal Hocko   mm, oom: task_wil...
784
  		return false;
696453e66   Michal Hocko   mm, oom: task_wil...
785

091f362c5   Michal Hocko   mm, oom: tighten ...
786
  	if (atomic_read(&mm->mm_users) <= 1)
1af8bb432   Michal Hocko   mm, oom: fortify ...
787
  		return true;
1af8bb432   Michal Hocko   mm, oom: fortify ...
788
789
  
  	/*
5870c2e1d   Michal Hocko   mm/oom_kill.c: fi...
790
791
792
  	 * Make sure that all tasks which share the mm with the given tasks
  	 * are dying as well to make sure that a) nobody pins its mm and
  	 * b) the task is also reapable by the oom reaper.
1af8bb432   Michal Hocko   mm, oom: fortify ...
793
794
795
796
797
798
799
800
801
802
803
804
  	 */
  	rcu_read_lock();
  	for_each_process(p) {
  		if (!process_shares_mm(p, mm))
  			continue;
  		if (same_thread_group(task, p))
  			continue;
  		ret = __task_will_free_mem(p);
  		if (!ret)
  			break;
  	}
  	rcu_read_unlock();
1af8bb432   Michal Hocko   mm, oom: fortify ...
805
806
807
  
  	return ret;
  }
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
808
  static void oom_kill_process(struct oom_control *oc, const char *message)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
  {
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
810
811
  	struct task_struct *p = oc->chosen;
  	unsigned int points = oc->chosen_points;
52d3c0367   Linus Torvalds   Revert "oom: oom_...
812
  	struct task_struct *victim = p;
5e9d834a0   David Rientjes   oom: sacrifice ch...
813
  	struct task_struct *child;
1da4db0cd   Oleg Nesterov   oom_kill: change ...
814
  	struct task_struct *t;
647f2bdf4   David Rientjes   mm, oom: fold oom...
815
  	struct mm_struct *mm;
52d3c0367   Linus Torvalds   Revert "oom: oom_...
816
  	unsigned int victim_points = 0;
dc3f21ead   David Rientjes   mm, oom: introduc...
817
818
  	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
  					      DEFAULT_RATELIMIT_BURST);
bb29902a7   Tetsuo Handa   oom, oom_reaper: ...
819
  	bool can_oom_reap = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820

50ec3bbff   Nick Piggin   [PATCH] oom: hand...
821
822
  	/*
  	 * If the task is already exiting, don't alarm the sysadmin or kill
cd04ae1e2   Michal Hocko   mm, oom: do not r...
823
824
  	 * its children or threads, just give it access to memory reserves
  	 * so it can die quickly
50ec3bbff   Nick Piggin   [PATCH] oom: hand...
825
  	 */
091f362c5   Michal Hocko   mm, oom: tighten ...
826
  	task_lock(p);
1af8bb432   Michal Hocko   mm, oom: fortify ...
827
  	if (task_will_free_mem(p)) {
16e951966   Johannes Weiner   mm: oom_kill: cle...
828
  		mark_oom_victim(p);
1af8bb432   Michal Hocko   mm, oom: fortify ...
829
  		wake_oom_reaper(p);
091f362c5   Michal Hocko   mm, oom: tighten ...
830
  		task_unlock(p);
6b0c81b3b   David Rientjes   mm, oom: reduce d...
831
  		put_task_struct(p);
2a1c9b1fc   David Rientjes   mm, oom: avoid lo...
832
  		return;
50ec3bbff   Nick Piggin   [PATCH] oom: hand...
833
  	}
091f362c5   Michal Hocko   mm, oom: tighten ...
834
  	task_unlock(p);
50ec3bbff   Nick Piggin   [PATCH] oom: hand...
835

dc3f21ead   David Rientjes   mm, oom: introduc...
836
  	if (__ratelimit(&oom_rs))
2a966b77a   Vladimir Davydov   mm: oom: add memc...
837
  		dump_header(oc, p);
8447d950e   David Rientjes   mm, oom: do not e...
838

f0d6647e8   Wang Long   mm/oom_kill.c: pr...
839
840
  	pr_err("%s: Kill process %d (%s) score %u or sacrifice child
  ",
5e9d834a0   David Rientjes   oom: sacrifice ch...
841
  		message, task_pid_nr(p), p->comm, points);
f3af38d30   Nick Piggin   [PATCH] oom: clea...
842

5e9d834a0   David Rientjes   oom: sacrifice ch...
843
844
  	/*
  	 * If any of p's children has a different mm and is eligible for kill,
11239836c   David Rientjes   oom: remove refer...
845
  	 * the one with the highest oom_badness() score is sacrificed for its
5e9d834a0   David Rientjes   oom: sacrifice ch...
846
847
848
  	 * parent.  This attempts to lose the minimal amount of work done while
  	 * still freeing memory.
  	 */
6b0c81b3b   David Rientjes   mm, oom: reduce d...
849
  	read_lock(&tasklist_lock);
43f7e8bea   Shakeel Butt   mm, oom: fix use-...
850
851
852
853
854
855
856
  
  	/*
  	 * The task 'p' might have already exited before reaching here. The
  	 * put_task_struct() will free task_struct 'p' while the loop still try
  	 * to access the field of 'p', so, get an extra reference.
  	 */
  	get_task_struct(p);
1da4db0cd   Oleg Nesterov   oom_kill: change ...
857
  	for_each_thread(p, t) {
5e9d834a0   David Rientjes   oom: sacrifice ch...
858
  		list_for_each_entry(child, &t->children, sibling) {
a63d83f42   David Rientjes   oom: badness heur...
859
  			unsigned int child_points;
5e9d834a0   David Rientjes   oom: sacrifice ch...
860

4d7b3394f   Oleg Nesterov   mm/oom_kill: fix ...
861
  			if (process_shares_mm(child, p->mm))
edd45544c   David Rientjes   oom: avoid deferr...
862
  				continue;
a63d83f42   David Rientjes   oom: badness heur...
863
864
865
  			/*
  			 * oom_badness() returns 0 if the thread is unkillable
  			 */
2a966b77a   Vladimir Davydov   mm: oom: add memc...
866
  			child_points = oom_badness(child,
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
867
  				oc->memcg, oc->nodemask, oc->totalpages);
5e9d834a0   David Rientjes   oom: sacrifice ch...
868
  			if (child_points > victim_points) {
6b0c81b3b   David Rientjes   mm, oom: reduce d...
869
  				put_task_struct(victim);
5e9d834a0   David Rientjes   oom: sacrifice ch...
870
871
  				victim = child;
  				victim_points = child_points;
6b0c81b3b   David Rientjes   mm, oom: reduce d...
872
  				get_task_struct(victim);
5e9d834a0   David Rientjes   oom: sacrifice ch...
873
  			}
dd8e8f405   Oleg Nesterov   oom: introduce fi...
874
  		}
1da4db0cd   Oleg Nesterov   oom_kill: change ...
875
  	}
43f7e8bea   Shakeel Butt   mm, oom: fix use-...
876
  	put_task_struct(p);
6b0c81b3b   David Rientjes   mm, oom: reduce d...
877
  	read_unlock(&tasklist_lock);
dd8e8f405   Oleg Nesterov   oom: introduce fi...
878

6b0c81b3b   David Rientjes   mm, oom: reduce d...
879
880
  	p = find_lock_task_mm(victim);
  	if (!p) {
6b0c81b3b   David Rientjes   mm, oom: reduce d...
881
  		put_task_struct(victim);
647f2bdf4   David Rientjes   mm, oom: fold oom...
882
  		return;
6b0c81b3b   David Rientjes   mm, oom: reduce d...
883
884
885
886
887
  	} else if (victim != p) {
  		get_task_struct(p);
  		put_task_struct(victim);
  		victim = p;
  	}
647f2bdf4   David Rientjes   mm, oom: fold oom...
888

880b76893   Tetsuo Handa   mm/oom_kill.c: fi...
889
  	/* Get a reference to safely compare mm after task_unlock(victim) */
647f2bdf4   David Rientjes   mm, oom: fold oom...
890
  	mm = victim->mm;
f1f100764   Vegard Nossum   mm: add new mmgra...
891
  	mmgrab(mm);
8e675f7af   Konstantin Khlebnikov   mm/oom_kill: coun...
892
893
894
895
  
  	/* Raise event before sending signal: task reaper must see this */
  	count_vm_event(OOM_KILL);
  	count_memcg_event_mm(mm, OOM_KILL);
426fb5e72   Tetsuo Handa   mm/oom_kill.c: re...
896
  	/*
cd04ae1e2   Michal Hocko   mm, oom: do not r...
897
898
899
  	 * We should send SIGKILL before granting access to memory reserves
  	 * in order to prevent the OOM victim from depleting the memory
  	 * reserves from the user space under its control.
426fb5e72   Tetsuo Handa   mm/oom_kill.c: re...
900
901
  	 */
  	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
16e951966   Johannes Weiner   mm: oom_kill: cle...
902
  	mark_oom_victim(victim);
eca56ff90   Jerome Marchand   mm, shmem: add in...
903
904
  	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB
  ",
647f2bdf4   David Rientjes   mm, oom: fold oom...
905
906
  		task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
  		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
eca56ff90   Jerome Marchand   mm, shmem: add in...
907
908
  		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
  		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
647f2bdf4   David Rientjes   mm, oom: fold oom...
909
910
911
912
913
914
915
916
917
918
919
  	task_unlock(victim);
  
  	/*
  	 * Kill all user processes sharing victim->mm in other thread groups, if
  	 * any.  They don't get access to memory reserves, though, to avoid
  	 * depletion of all memory.  This prevents mm->mmap_sem livelock when an
  	 * oom killed thread cannot exit because it requires the semaphore and
  	 * its contended by another thread trying to allocate memory itself.
  	 * That thread will now get access to memory reserves since it has a
  	 * pending fatal signal.
  	 */
4d4048be8   Oleg Nesterov   oom_kill: add rcu...
920
  	rcu_read_lock();
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
921
  	for_each_process(p) {
4d7b3394f   Oleg Nesterov   mm/oom_kill: fix ...
922
  		if (!process_shares_mm(p, mm))
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
923
924
925
  			continue;
  		if (same_thread_group(p, victim))
  			continue;
1b51e65ea   Michal Hocko   oom, oom_reaper: ...
926
  		if (is_global_init(p)) {
aac453635   Michal Hocko   mm, oom: introduc...
927
  			can_oom_reap = false;
862e3073b   Michal Hocko   mm, oom: get rid ...
928
  			set_bit(MMF_OOM_SKIP, &mm->flags);
a373966d1   Michal Hocko   mm, oom: hide mm ...
929
930
931
932
  			pr_info("oom killer %d (%s) has mm pinned by %d (%s)
  ",
  					task_pid_nr(victim), victim->comm,
  					task_pid_nr(p), p->comm);
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
933
  			continue;
aac453635   Michal Hocko   mm, oom: introduc...
934
  		}
1b51e65ea   Michal Hocko   oom, oom_reaper: ...
935
936
937
938
939
940
  		/*
  		 * No use_mm() user needs to read from the userspace so we are
  		 * ok to reap it.
  		 */
  		if (unlikely(p->flags & PF_KTHREAD))
  			continue;
c319025a6   Oleg Nesterov   mm/oom_kill: clea...
941
942
  		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
  	}
6b0c81b3b   David Rientjes   mm, oom: reduce d...
943
  	rcu_read_unlock();
647f2bdf4   David Rientjes   mm, oom: fold oom...
944

aac453635   Michal Hocko   mm, oom: introduc...
945
  	if (can_oom_reap)
36324a990   Michal Hocko   oom: clear TIF_ME...
946
  		wake_oom_reaper(victim);
aac453635   Michal Hocko   mm, oom: introduc...
947

880b76893   Tetsuo Handa   mm/oom_kill.c: fi...
948
  	mmdrop(mm);
6b0c81b3b   David Rientjes   mm, oom: reduce d...
949
  	put_task_struct(victim);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
950
  }
647f2bdf4   David Rientjes   mm, oom: fold oom...
951
  #undef K
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
952

309ed8825   David Rientjes   oom: extract pani...
953
954
955
  /*
   * Determines whether the kernel must panic because of the panic_on_oom sysctl.
   */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
956
957
  static void check_panic_on_oom(struct oom_control *oc,
  			       enum oom_constraint constraint)
309ed8825   David Rientjes   oom: extract pani...
958
959
960
961
962
963
964
965
966
967
968
969
  {
  	if (likely(!sysctl_panic_on_oom))
  		return;
  	if (sysctl_panic_on_oom != 2) {
  		/*
  		 * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
  		 * does not panic for cpuset, mempolicy, or memcg allocation
  		 * failures.
  		 */
  		if (constraint != CONSTRAINT_NONE)
  			return;
  	}
071a4befe   David Rientjes   mm, oom: do not p...
970
  	/* Do not panic for oom kills triggered by sysrq */
db2a0dd7a   Yaowei Bai   mm/oom_kill.c: in...
971
  	if (is_sysrq_oom(oc))
071a4befe   David Rientjes   mm, oom: do not p...
972
  		return;
2a966b77a   Vladimir Davydov   mm: oom: add memc...
973
  	dump_header(oc, NULL);
309ed8825   David Rientjes   oom: extract pani...
974
975
976
977
  	panic("Out of memory: %s panic_on_oom is enabled
  ",
  		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
  }
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
978
979
980
981
982
983
984
985
986
987
988
989
990
  static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
  
  int register_oom_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_register(&oom_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(register_oom_notifier);
  
  int unregister_oom_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_unregister(&oom_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(unregister_oom_notifier);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
991
  /**
6e0fc46dc   David Rientjes   mm, oom: organize...
992
993
   * out_of_memory - kill the "best" process when we run out of memory
   * @oc: pointer to struct oom_control
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
994
995
996
997
998
999
   *
   * If we run out of memory, we have the choice between either
   * killing a random task (bad), letting the system crash (worse)
   * OR try to be smart about which process to kill. Note that we
   * don't have to be perfect here, we just have to be good.
   */
6e0fc46dc   David Rientjes   mm, oom: organize...
1000
  bool out_of_memory(struct oom_control *oc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1001
  {
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
1002
  	unsigned long freed = 0;
e36589323   David Rientjes   oom: remove speci...
1003
  	enum oom_constraint constraint = CONSTRAINT_NONE;
8bc719d3c   Martin Schwidefsky   [PATCH] out of me...
1004

dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1005
1006
  	if (oom_killer_disabled)
  		return false;
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1007
1008
1009
1010
1011
1012
  	if (!is_memcg_oom(oc)) {
  		blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
  		if (freed > 0)
  			/* Got some memory back in the last second. */
  			return true;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1013

7b98c2e40   David Rientjes   oom: give current...
1014
  	/*
9ff4868e3   David Rientjes   mm, oom: allow ex...
1015
1016
1017
  	 * If current has a pending SIGKILL or is exiting, then automatically
  	 * select it.  The goal is to allow it to allocate so that it may
  	 * quickly exit and free its memory.
7b98c2e40   David Rientjes   oom: give current...
1018
  	 */
091f362c5   Michal Hocko   mm, oom: tighten ...
1019
  	if (task_will_free_mem(current)) {
16e951966   Johannes Weiner   mm: oom_kill: cle...
1020
  		mark_oom_victim(current);
1af8bb432   Michal Hocko   mm, oom: fortify ...
1021
  		wake_oom_reaper(current);
75e8f8b24   David Rientjes   mm, oom: remove u...
1022
  		return true;
7b98c2e40   David Rientjes   oom: give current...
1023
  	}
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
1024
  	/*
3da88fb3b   Michal Hocko   mm, oom: move GFP...
1025
1026
1027
1028
1029
  	 * The OOM killer does not compensate for IO-less reclaim.
  	 * pagefault_out_of_memory lost its gfp context so we have to
  	 * make sure exclude 0 mask - all other users should have at least
  	 * ___GFP_DIRECT_RECLAIM to get here.
  	 */
06ad276ac   Michal Hocko   mm, oom: do not e...
1030
  	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
3da88fb3b   Michal Hocko   mm, oom: move GFP...
1031
1032
1033
  		return true;
  
  	/*
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
1034
  	 * Check if there were limitations on the allocation (only relevant for
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1035
  	 * NUMA and memcg) that may require different handling.
9b0f8b040   Christoph Lameter   [PATCH] Terminate...
1036
  	 */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1037
  	constraint = constrained_alloc(oc);
6e0fc46dc   David Rientjes   mm, oom: organize...
1038
1039
  	if (constraint != CONSTRAINT_MEMORY_POLICY)
  		oc->nodemask = NULL;
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1040
  	check_panic_on_oom(oc, constraint);
0aad4b312   David Rientjes   oom: fold __out_o...
1041

7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1042
1043
  	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
  	    current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
121d1ba0a   David Rientjes   mm, oom: fix pote...
1044
  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
6b0c81b3b   David Rientjes   mm, oom: reduce d...
1045
  		get_task_struct(current);
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1046
1047
  		oc->chosen = current;
  		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
75e8f8b24   David Rientjes   mm, oom: remove u...
1048
  		return true;
0aad4b312   David Rientjes   oom: fold __out_o...
1049
  	}
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1050
  	select_bad_process(oc);
0aad4b312   David Rientjes   oom: fold __out_o...
1051
  	/* Found nothing?!?! Either we hang forever, or we panic. */
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1052
  	if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1053
  		dump_header(oc, NULL);
0aad4b312   David Rientjes   oom: fold __out_o...
1054
1055
1056
  		panic("Out of memory and no killable processes...
  ");
  	}
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1057
1058
1059
  	if (oc->chosen && oc->chosen != (void *)-1UL) {
  		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
  				 "Memory cgroup out of memory");
75e8f8b24   David Rientjes   mm, oom: remove u...
1060
1061
1062
1063
  		/*
  		 * Give the killed process a good chance to exit before trying
  		 * to allocate memory again.
  		 */
4f774b912   David Rientjes   mm, oom: do not s...
1064
  		schedule_timeout_killable(1);
75e8f8b24   David Rientjes   mm, oom: remove u...
1065
  	}
7c5f64f84   Vladimir Davydov   mm: oom: deduplic...
1066
  	return !!oc->chosen;
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
1067
  }
e36589323   David Rientjes   oom: remove speci...
1068
1069
  /*
   * The pagefault handler calls here because it is out of memory, so kill a
798fd7569   Vladimir Davydov   mm: zap ZONE_OOM_...
1070
1071
   * memory-hogging task. If oom_lock is held by somebody else, a parallel oom
   * killing is already in progress so do nothing.
e36589323   David Rientjes   oom: remove speci...
1072
1073
1074
   */
  void pagefault_out_of_memory(void)
  {
6e0fc46dc   David Rientjes   mm, oom: organize...
1075
1076
1077
  	struct oom_control oc = {
  		.zonelist = NULL,
  		.nodemask = NULL,
2a966b77a   Vladimir Davydov   mm: oom: add memc...
1078
  		.memcg = NULL,
6e0fc46dc   David Rientjes   mm, oom: organize...
1079
1080
  		.gfp_mask = 0,
  		.order = 0,
6e0fc46dc   David Rientjes   mm, oom: organize...
1081
  	};
494264208   Johannes Weiner   mm: memcg: handle...
1082
  	if (mem_cgroup_oom_synchronize(true))
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1083
  		return;
3812c8c8f   Johannes Weiner   mm: memcg: do not...
1084

dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1085
1086
  	if (!mutex_trylock(&oom_lock))
  		return;
a104808e2   Tetsuo Handa   mm: don't emit wa...
1087
  	out_of_memory(&oc);
dc56401fc   Johannes Weiner   mm: oom_kill: sim...
1088
  	mutex_unlock(&oom_lock);
e36589323   David Rientjes   oom: remove speci...
1089
  }