Blame view
mm/oom_kill.c
19.8 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 |
/* * linux/mm/oom_kill.c * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... |
a63d83f42 oom: badness heur... |
7 8 |
* Copyright (C) 2010 Google, Inc. * Rewritten by David Rientjes |
1da177e4c Linux-2.6.12-rc2 |
9 10 |
* * The routines in this file are used to kill a process when |
a49335cce [PATCH] cpusets: ... |
11 12 |
* we're seriously out of memory. This gets called from __alloc_pages() * in mm/page_alloc.c when we really run out of memory. |
1da177e4c Linux-2.6.12-rc2 |
13 14 15 16 17 18 |
* * Since we won't call these routines often (on a well-configured * machine) this file will double as a 'coding guide' and a signpost * for newbie kernel hackers. It features several pointers to major * kernel subsystems and hints as to where to find out what things do. */ |
8ac773b4f [PATCH] OOM kille... |
19 |
#include <linux/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/mm.h> |
4e950f6f0 Remove fs.h from ... |
21 |
#include <linux/err.h> |
5a0e3ad6a include cleanup: ... |
22 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
23 24 25 26 |
#include <linux/sched.h> #include <linux/swap.h> #include <linux/timex.h> #include <linux/jiffies.h> |
ef08e3b49 [PATCH] cpusets: ... |
27 |
#include <linux/cpuset.h> |
b95f1b31b mm: Map most file... |
28 |
#include <linux/export.h> |
8bc719d3c [PATCH] out of me... |
29 |
#include <linux/notifier.h> |
c7ba5c9e8 Memory controller... |
30 |
#include <linux/memcontrol.h> |
6f48d0ebd oom: select task ... |
31 |
#include <linux/mempolicy.h> |
5cd9c58fb security: Fix set... |
32 |
#include <linux/security.h> |
edd45544c oom: avoid deferr... |
33 |
#include <linux/ptrace.h> |
f660daac4 oom: thaw threads... |
34 |
#include <linux/freezer.h> |
43d2b1132 tracepoint: add t... |
35 |
#include <linux/ftrace.h> |
dc3f21ead mm, oom: introduc... |
36 |
#include <linux/ratelimit.h> |
43d2b1132 tracepoint: add t... |
37 38 39 |
#define CREATE_TRACE_POINTS #include <trace/events/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
40 |
|
fadd8fbd1 [PATCH] support f... |
41 |
int sysctl_panic_on_oom; |
fe071d7e8 oom: add oom_kill... |
42 |
int sysctl_oom_kill_allocating_task; |
ad915c432 oom: enable oom t... |
43 |
int sysctl_oom_dump_tasks = 1; |
c7d4caeb1 oom: fix zone_sca... |
44 |
static DEFINE_SPINLOCK(zone_scan_lock); |
1da177e4c Linux-2.6.12-rc2 |
45 |
|
6f48d0ebd oom: select task ... |
46 47 48 |
#ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill |
ad9624417 oom_kill: has_int... |
49 |
* @start: task struct of which task to consider |
6f48d0ebd oom: select task ... |
50 51 52 53 54 |
* @mask: nodemask passed to page allocator for mempolicy ooms * * Task eligibility is determined by whether or not a candidate task, @tsk, * shares the same mempolicy nodes as current if it is bound by such a policy * and whether or not it has the same set of allowed cpuset nodes. |
495789a51 oom: make oom_sco... |
55 |
*/ |
ad9624417 oom_kill: has_int... |
56 |
static bool has_intersects_mems_allowed(struct task_struct *start, |
6f48d0ebd oom: select task ... |
57 |
const nodemask_t *mask) |
495789a51 oom: make oom_sco... |
58 |
{ |
ad9624417 oom_kill: has_int... |
59 60 |
struct task_struct *tsk; bool ret = false; |
495789a51 oom: make oom_sco... |
61 |
|
ad9624417 oom_kill: has_int... |
62 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
63 |
for_each_thread(start, tsk) { |
6f48d0ebd oom: select task ... |
64 65 66 67 68 69 70 |
if (mask) { /* * If this is a mempolicy constrained oom, tsk's * cpuset is irrelevant. Only return true if its * mempolicy intersects current, otherwise it may be * needlessly killed. */ |
ad9624417 oom_kill: has_int... |
71 |
ret = mempolicy_nodemask_intersects(tsk, mask); |
6f48d0ebd oom: select task ... |
72 73 74 75 76 |
} else { /* * This is not a mempolicy constrained oom, so only * check the mems of tsk's cpuset. */ |
ad9624417 oom_kill: has_int... |
77 |
ret = cpuset_mems_allowed_intersects(current, tsk); |
6f48d0ebd oom: select task ... |
78 |
} |
ad9624417 oom_kill: has_int... |
79 80 |
if (ret) break; |
1da4db0cd oom_kill: change ... |
81 |
} |
ad9624417 oom_kill: has_int... |
82 |
rcu_read_unlock(); |
df1090a8d oom: cleanup has_... |
83 |
|
ad9624417 oom_kill: has_int... |
84 |
return ret; |
6f48d0ebd oom: select task ... |
85 86 87 88 89 90 |
} #else static bool has_intersects_mems_allowed(struct task_struct *tsk, const nodemask_t *mask) { return true; |
495789a51 oom: make oom_sco... |
91 |
} |
6f48d0ebd oom: select task ... |
92 |
#endif /* CONFIG_NUMA */ |
495789a51 oom: make oom_sco... |
93 |
|
6f48d0ebd oom: select task ... |
94 95 96 97 98 99 |
/* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ |
158e0a2d1 memcg: use find_l... |
100 |
struct task_struct *find_lock_task_mm(struct task_struct *p) |
dd8e8f405 oom: introduce fi... |
101 |
{ |
1da4db0cd oom_kill: change ... |
102 |
struct task_struct *t; |
dd8e8f405 oom: introduce fi... |
103 |
|
4d4048be8 oom_kill: add rcu... |
104 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
105 |
for_each_thread(p, t) { |
dd8e8f405 oom: introduce fi... |
106 107 |
task_lock(t); if (likely(t->mm)) |
4d4048be8 oom_kill: add rcu... |
108 |
goto found; |
dd8e8f405 oom: introduce fi... |
109 |
task_unlock(t); |
1da4db0cd oom_kill: change ... |
110 |
} |
4d4048be8 oom_kill: add rcu... |
111 112 113 |
t = NULL; found: rcu_read_unlock(); |
dd8e8f405 oom: introduce fi... |
114 |
|
4d4048be8 oom_kill: add rcu... |
115 |
return t; |
dd8e8f405 oom: introduce fi... |
116 |
} |
ab290adba oom: make oom_unk... |
117 |
/* return true if the task is not adequate as candidate victim task. */ |
e85bfd3aa oom: filter unkil... |
118 |
static bool oom_unkillable_task(struct task_struct *p, |
72835c86c mm: unify remaini... |
119 |
const struct mem_cgroup *memcg, const nodemask_t *nodemask) |
ab290adba oom: make oom_unk... |
120 121 122 123 124 125 126 |
{ if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ |
72835c86c mm: unify remaini... |
127 |
if (memcg && !task_in_mem_cgroup(p, memcg)) |
ab290adba oom: make oom_unk... |
128 129 130 131 132 133 134 135 |
return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } |
1da177e4c Linux-2.6.12-rc2 |
136 |
/** |
a63d83f42 oom: badness heur... |
137 |
* oom_badness - heuristic function to determine which candidate task to kill |
1da177e4c Linux-2.6.12-rc2 |
138 |
* @p: task struct of which task we should calculate |
a63d83f42 oom: badness heur... |
139 |
* @totalpages: total present RAM allowed for page allocation |
1da177e4c Linux-2.6.12-rc2 |
140 |
* |
a63d83f42 oom: badness heur... |
141 142 143 |
* The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. |
1da177e4c Linux-2.6.12-rc2 |
144 |
*/ |
a7f638f99 mm, oom: normaliz... |
145 146 |
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) |
1da177e4c Linux-2.6.12-rc2 |
147 |
{ |
1e11ad8dc mm, oom: fix badn... |
148 |
long points; |
61eafb00d mm, oom: fix and ... |
149 |
long adj; |
28b83c519 oom: move oom_adj... |
150 |
|
72835c86c mm: unify remaini... |
151 |
if (oom_unkillable_task(p, memcg, nodemask)) |
26ebc9849 oom: /proc/<pid>/... |
152 |
return 0; |
1da177e4c Linux-2.6.12-rc2 |
153 |
|
dd8e8f405 oom: introduce fi... |
154 155 |
p = find_lock_task_mm(p); if (!p) |
1da177e4c Linux-2.6.12-rc2 |
156 |
return 0; |
a9c58b907 mm, oom: change t... |
157 |
adj = (long)p->signal->oom_score_adj; |
61eafb00d mm, oom: fix and ... |
158 |
if (adj == OOM_SCORE_ADJ_MIN) { |
5aecc85ab oom: do not kill ... |
159 160 161 |
task_unlock(p); return 0; } |
1da177e4c Linux-2.6.12-rc2 |
162 |
/* |
a63d83f42 oom: badness heur... |
163 |
* The baseline for the badness score is the proportion of RAM that each |
f755a042d oom: use pte page... |
164 |
* task's rss, pagetable and swap space use. |
1da177e4c Linux-2.6.12-rc2 |
165 |
*/ |
e1f56c89b mm: convert mm->n... |
166 |
points = get_mm_rss(p->mm) + atomic_long_read(&p->mm->nr_ptes) + |
a7f638f99 mm, oom: normaliz... |
167 |
get_mm_counter(p->mm, MM_SWAPENTS); |
a63d83f42 oom: badness heur... |
168 |
task_unlock(p); |
1da177e4c Linux-2.6.12-rc2 |
169 170 |
/* |
a63d83f42 oom: badness heur... |
171 172 |
* Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. |
1da177e4c Linux-2.6.12-rc2 |
173 |
*/ |
a63d83f42 oom: badness heur... |
174 |
if (has_capability_noaudit(p, CAP_SYS_ADMIN)) |
778c14aff mm, oom: base roo... |
175 |
points -= (points * 3) / 100; |
1da177e4c Linux-2.6.12-rc2 |
176 |
|
61eafb00d mm, oom: fix and ... |
177 178 179 |
/* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; |
1da177e4c Linux-2.6.12-rc2 |
180 |
|
f19e8aa11 oom: always retur... |
181 |
/* |
a7f638f99 mm, oom: normaliz... |
182 183 |
* Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). |
f19e8aa11 oom: always retur... |
184 |
*/ |
1e11ad8dc mm, oom: fix badn... |
185 |
return points > 0 ? points : 1; |
1da177e4c Linux-2.6.12-rc2 |
186 187 188 |
} /* |
9b0f8b040 [PATCH] Terminate... |
189 190 |
* Determine the type of allocation constraint. */ |
9b0f8b040 [PATCH] Terminate... |
191 |
#ifdef CONFIG_NUMA |
4365a5676 oom-kill: fix NUM... |
192 |
static enum oom_constraint constrained_alloc(struct zonelist *zonelist, |
a63d83f42 oom: badness heur... |
193 194 |
gfp_t gfp_mask, nodemask_t *nodemask, unsigned long *totalpages) |
4365a5676 oom-kill: fix NUM... |
195 |
{ |
54a6eb5c4 mm: use two zonel... |
196 |
struct zone *zone; |
dd1a239f6 mm: have zonelist... |
197 |
struct zoneref *z; |
54a6eb5c4 mm: use two zonel... |
198 |
enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
a63d83f42 oom: badness heur... |
199 200 |
bool cpuset_limited = false; int nid; |
9b0f8b040 [PATCH] Terminate... |
201 |
|
a63d83f42 oom: badness heur... |
202 203 204 205 206 |
/* Default to all available memory */ *totalpages = totalram_pages + total_swap_pages; if (!zonelist) return CONSTRAINT_NONE; |
4365a5676 oom-kill: fix NUM... |
207 208 209 210 211 212 213 |
/* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ if (gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; |
9b0f8b040 [PATCH] Terminate... |
214 |
|
4365a5676 oom-kill: fix NUM... |
215 |
/* |
a63d83f42 oom: badness heur... |
216 217 218 |
* This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). |
4365a5676 oom-kill: fix NUM... |
219 |
*/ |
bd3a66c1c oom: use N_MEMORY... |
220 |
if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) { |
a63d83f42 oom: badness heur... |
221 222 223 |
*totalpages = total_swap_pages; for_each_node_mask(nid, *nodemask) *totalpages += node_spanned_pages(nid); |
9b0f8b040 [PATCH] Terminate... |
224 |
return CONSTRAINT_MEMORY_POLICY; |
a63d83f42 oom: badness heur... |
225 |
} |
4365a5676 oom-kill: fix NUM... |
226 227 228 229 230 |
/* Check this allocation failure is caused by cpuset's wall function */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) |
a63d83f42 oom: badness heur... |
231 |
cpuset_limited = true; |
9b0f8b040 [PATCH] Terminate... |
232 |
|
a63d83f42 oom: badness heur... |
233 234 235 236 237 238 |
if (cpuset_limited) { *totalpages = total_swap_pages; for_each_node_mask(nid, cpuset_current_mems_allowed) *totalpages += node_spanned_pages(nid); return CONSTRAINT_CPUSET; } |
9b0f8b040 [PATCH] Terminate... |
239 240 |
return CONSTRAINT_NONE; } |
4365a5676 oom-kill: fix NUM... |
241 242 |
#else static enum oom_constraint constrained_alloc(struct zonelist *zonelist, |
a63d83f42 oom: badness heur... |
243 244 |
gfp_t gfp_mask, nodemask_t *nodemask, unsigned long *totalpages) |
4365a5676 oom-kill: fix NUM... |
245 |
{ |
a63d83f42 oom: badness heur... |
246 |
*totalpages = totalram_pages + total_swap_pages; |
4365a5676 oom-kill: fix NUM... |
247 248 249 |
return CONSTRAINT_NONE; } #endif |
9b0f8b040 [PATCH] Terminate... |
250 |
|
9cbb78bb3 mm, memcg: introd... |
251 252 253 |
enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill) |
462607ecc mm, oom: introduc... |
254 255 256 |
{ if (task->exit_state) return OOM_SCAN_CONTINUE; |
9cbb78bb3 mm, memcg: introd... |
257 |
if (oom_unkillable_task(task, NULL, nodemask)) |
462607ecc mm, oom: introduc... |
258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
return OOM_SCAN_CONTINUE; /* * This task already has access to memory reserves and is being killed. * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { if (unlikely(frozen(task))) __thaw_task(task); if (!force_kill) return OOM_SCAN_ABORT; } if (!task->mm) return OOM_SCAN_CONTINUE; |
e1e12d2f3 mm, oom: fix race... |
272 273 274 275 276 277 |
/* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ if (oom_task_origin(task)) return OOM_SCAN_SELECT; |
9ff4868e3 mm, oom: allow ex... |
278 |
if (task->flags & PF_EXITING && !force_kill) { |
462607ecc mm, oom: introduc... |
279 |
/* |
9ff4868e3 mm, oom: allow ex... |
280 281 |
* If this task is not being ptraced on exit, then wait for it * to finish before killing some other task unnecessarily. |
462607ecc mm, oom: introduc... |
282 |
*/ |
9ff4868e3 mm, oom: allow ex... |
283 284 |
if (!(task->group_leader->ptrace & PT_TRACE_EXIT)) return OOM_SCAN_ABORT; |
462607ecc mm, oom: introduc... |
285 286 287 |
} return OOM_SCAN_OK; } |
9b0f8b040 [PATCH] Terminate... |
288 |
/* |
1da177e4c Linux-2.6.12-rc2 |
289 |
* Simple selection loop. We chose the process with the highest |
6b4f2b56a mm/oom_kill: remo... |
290 |
* number of 'points'. Returns -1 on scan abort. |
1da177e4c Linux-2.6.12-rc2 |
291 292 293 |
* * (not docbooked, we don't want this one cluttering up the manual) */ |
a63d83f42 oom: badness heur... |
294 |
static struct task_struct *select_bad_process(unsigned int *ppoints, |
9cbb78bb3 mm, memcg: introd... |
295 296 |
unsigned long totalpages, const nodemask_t *nodemask, bool force_kill) |
1da177e4c Linux-2.6.12-rc2 |
297 |
{ |
3a5dda7a1 oom: prevent unne... |
298 |
struct task_struct *g, *p; |
1da177e4c Linux-2.6.12-rc2 |
299 |
struct task_struct *chosen = NULL; |
a7f638f99 mm, oom: normaliz... |
300 |
unsigned long chosen_points = 0; |
1da177e4c Linux-2.6.12-rc2 |
301 |
|
6b0c81b3b mm, oom: reduce d... |
302 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
303 |
for_each_process_thread(g, p) { |
a63d83f42 oom: badness heur... |
304 |
unsigned int points; |
a49335cce [PATCH] cpusets: ... |
305 |
|
9cbb78bb3 mm, memcg: introd... |
306 |
switch (oom_scan_process_thread(p, totalpages, nodemask, |
462607ecc mm, oom: introduc... |
307 308 309 310 311 312 |
force_kill)) { case OOM_SCAN_SELECT: chosen = p; chosen_points = ULONG_MAX; /* fall through */ case OOM_SCAN_CONTINUE: |
c027a474a oom: task->mm == ... |
313 |
continue; |
462607ecc mm, oom: introduc... |
314 |
case OOM_SCAN_ABORT: |
6b0c81b3b mm, oom: reduce d... |
315 |
rcu_read_unlock(); |
6b4f2b56a mm/oom_kill: remo... |
316 |
return (struct task_struct *)(-1UL); |
462607ecc mm, oom: introduc... |
317 318 319 |
case OOM_SCAN_OK: break; }; |
9cbb78bb3 mm, memcg: introd... |
320 |
points = oom_badness(p, NULL, nodemask, totalpages); |
d49ad9355 mm, oom: prefer t... |
321 322 323 324 325 326 327 328 |
if (!points || points < chosen_points) continue; /* Prefer thread group leaders for display purposes */ if (points == chosen_points && thread_group_leader(chosen)) continue; chosen = p; chosen_points = points; |
1da4db0cd oom_kill: change ... |
329 |
} |
6b0c81b3b mm, oom: reduce d... |
330 331 332 |
if (chosen) get_task_struct(chosen); rcu_read_unlock(); |
972c4ea59 [PATCH] select_ba... |
333 |
|
a7f638f99 mm, oom: normaliz... |
334 |
*ppoints = chosen_points * 1000 / totalpages; |
1da177e4c Linux-2.6.12-rc2 |
335 336 337 338 |
return chosen; } /** |
1b578df02 mm/oom_kill: fix ... |
339 |
* dump_tasks - dump current memory state of all system tasks |
dad7557eb mm: fix kernel-do... |
340 |
* @memcg: current's memory controller, if constrained |
e85bfd3aa oom: filter unkil... |
341 |
* @nodemask: nodemask passed to page allocator for mempolicy ooms |
1b578df02 mm/oom_kill: fix ... |
342 |
* |
e85bfd3aa oom: filter unkil... |
343 344 345 |
* Dumps the current memory state of all eligible tasks. Tasks not in the same * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes * are not shown. |
de34d965a mm, oom: replace ... |
346 347 |
* State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, * swapents, oom_score_adj value, and name. |
fef1bdd68 oom: add sysctl t... |
348 |
*/ |
72835c86c mm: unify remaini... |
349 |
static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) |
fef1bdd68 oom: add sysctl t... |
350 |
{ |
c55db9578 oom: dump_tasks u... |
351 352 |
struct task_struct *p; struct task_struct *task; |
fef1bdd68 oom: add sysctl t... |
353 |
|
de34d965a mm, oom: replace ... |
354 355 |
pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name "); |
6b0c81b3b mm, oom: reduce d... |
356 |
rcu_read_lock(); |
c55db9578 oom: dump_tasks u... |
357 |
for_each_process(p) { |
72835c86c mm: unify remaini... |
358 |
if (oom_unkillable_task(p, memcg, nodemask)) |
b4416d2be oom: do not dump ... |
359 |
continue; |
fef1bdd68 oom: add sysctl t... |
360 |
|
c55db9578 oom: dump_tasks u... |
361 362 |
task = find_lock_task_mm(p); if (!task) { |
6d2661ede oom: fix possible... |
363 |
/* |
74ab7f1d3 oom: improve comm... |
364 365 |
* This is a kthread or all of p's threads have already * detached their mm's. There's no need to report |
c55db9578 oom: dump_tasks u... |
366 |
* them; they can't be oom killed anyway. |
6d2661ede oom: fix possible... |
367 |
*/ |
6d2661ede oom: fix possible... |
368 369 |
continue; } |
c55db9578 oom: dump_tasks u... |
370 |
|
e1f56c89b mm: convert mm->n... |
371 372 |
pr_info("[%5d] %5d %5d %8lu %8lu %7ld %8lu %5hd %s ", |
078de5f70 userns: Store uid... |
373 374 |
task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), |
e1f56c89b mm: convert mm->n... |
375 |
atomic_long_read(&task->mm->nr_ptes), |
de34d965a mm, oom: replace ... |
376 |
get_mm_counter(task->mm, MM_SWAPENTS), |
a63d83f42 oom: badness heur... |
377 |
task->signal->oom_score_adj, task->comm); |
c55db9578 oom: dump_tasks u... |
378 379 |
task_unlock(task); } |
6b0c81b3b mm, oom: reduce d... |
380 |
rcu_read_unlock(); |
fef1bdd68 oom: add sysctl t... |
381 |
} |
d31f56dbf memcg: avoid oom-... |
382 |
static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, |
72835c86c mm: unify remaini... |
383 |
struct mem_cgroup *memcg, const nodemask_t *nodemask) |
1b604d75b oom: dump stack a... |
384 |
{ |
5e9d834a0 oom: sacrifice ch... |
385 |
task_lock(current); |
1b604d75b oom: dump stack a... |
386 |
pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " |
a9c58b907 mm, oom: change t... |
387 388 |
"oom_score_adj=%hd ", |
01dc52ebd oom: remove depre... |
389 |
current->comm, gfp_mask, order, |
a63d83f42 oom: badness heur... |
390 |
current->signal->oom_score_adj); |
1b604d75b oom: dump stack a... |
391 392 393 |
cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); |
58cf188ed memcg, oom: provi... |
394 395 396 397 |
if (memcg) mem_cgroup_print_oom_info(memcg, p); else show_mem(SHOW_MEM_FILTER_NODES); |
1b604d75b oom: dump stack a... |
398 |
if (sysctl_oom_dump_tasks) |
72835c86c mm: unify remaini... |
399 |
dump_tasks(memcg, nodemask); |
1b604d75b oom: dump stack a... |
400 |
} |
3b4798cbc oom-kill: show vi... |
401 |
#define K(x) ((x) << (PAGE_SHIFT-10)) |
6b0c81b3b mm, oom: reduce d... |
402 403 404 405 |
/* * Must be called while holding a reference to p, which will be released upon * returning. */ |
9cbb78bb3 mm, memcg: introd... |
406 407 408 409 |
void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message) |
1da177e4c Linux-2.6.12-rc2 |
410 |
{ |
52d3c0367 Revert "oom: oom_... |
411 |
struct task_struct *victim = p; |
5e9d834a0 oom: sacrifice ch... |
412 |
struct task_struct *child; |
1da4db0cd oom_kill: change ... |
413 |
struct task_struct *t; |
647f2bdf4 mm, oom: fold oom... |
414 |
struct mm_struct *mm; |
52d3c0367 Revert "oom: oom_... |
415 |
unsigned int victim_points = 0; |
dc3f21ead mm, oom: introduc... |
416 417 |
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
1da177e4c Linux-2.6.12-rc2 |
418 |
|
50ec3bbff [PATCH] oom: hand... |
419 420 421 422 |
/* * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just set TIF_MEMDIE so it can die quickly */ |
0753ba01e mm: revert "oom: ... |
423 |
if (p->flags & PF_EXITING) { |
4358997ae oom: avoid sendin... |
424 |
set_tsk_thread_flag(p, TIF_MEMDIE); |
6b0c81b3b mm, oom: reduce d... |
425 |
put_task_struct(p); |
2a1c9b1fc mm, oom: avoid lo... |
426 |
return; |
50ec3bbff [PATCH] oom: hand... |
427 |
} |
dc3f21ead mm, oom: introduc... |
428 |
if (__ratelimit(&oom_rs)) |
8447d950e mm, oom: do not e... |
429 |
dump_header(p, gfp_mask, order, memcg, nodemask); |
5e9d834a0 oom: sacrifice ch... |
430 |
task_lock(p); |
a63d83f42 oom: badness heur... |
431 432 |
pr_err("%s: Kill process %d (%s) score %d or sacrifice child ", |
5e9d834a0 oom: sacrifice ch... |
433 434 |
message, task_pid_nr(p), p->comm, points); task_unlock(p); |
f3af38d30 [PATCH] oom: clea... |
435 |
|
5e9d834a0 oom: sacrifice ch... |
436 437 |
/* * If any of p's children has a different mm and is eligible for kill, |
11239836c oom: remove refer... |
438 |
* the one with the highest oom_badness() score is sacrificed for its |
5e9d834a0 oom: sacrifice ch... |
439 440 441 |
* parent. This attempts to lose the minimal amount of work done while * still freeing memory. */ |
6b0c81b3b mm, oom: reduce d... |
442 |
read_lock(&tasklist_lock); |
1da4db0cd oom_kill: change ... |
443 |
for_each_thread(p, t) { |
5e9d834a0 oom: sacrifice ch... |
444 |
list_for_each_entry(child, &t->children, sibling) { |
a63d83f42 oom: badness heur... |
445 |
unsigned int child_points; |
5e9d834a0 oom: sacrifice ch... |
446 |
|
edd45544c oom: avoid deferr... |
447 448 |
if (child->mm == p->mm) continue; |
a63d83f42 oom: badness heur... |
449 450 451 |
/* * oom_badness() returns 0 if the thread is unkillable */ |
72835c86c mm: unify remaini... |
452 |
child_points = oom_badness(child, memcg, nodemask, |
a63d83f42 oom: badness heur... |
453 |
totalpages); |
5e9d834a0 oom: sacrifice ch... |
454 |
if (child_points > victim_points) { |
6b0c81b3b mm, oom: reduce d... |
455 |
put_task_struct(victim); |
5e9d834a0 oom: sacrifice ch... |
456 457 |
victim = child; victim_points = child_points; |
6b0c81b3b mm, oom: reduce d... |
458 |
get_task_struct(victim); |
5e9d834a0 oom: sacrifice ch... |
459 |
} |
dd8e8f405 oom: introduce fi... |
460 |
} |
1da4db0cd oom_kill: change ... |
461 |
} |
6b0c81b3b mm, oom: reduce d... |
462 |
read_unlock(&tasklist_lock); |
dd8e8f405 oom: introduce fi... |
463 |
|
6b0c81b3b mm, oom: reduce d... |
464 465 |
p = find_lock_task_mm(victim); if (!p) { |
6b0c81b3b mm, oom: reduce d... |
466 |
put_task_struct(victim); |
647f2bdf4 mm, oom: fold oom... |
467 |
return; |
6b0c81b3b mm, oom: reduce d... |
468 469 470 471 472 |
} else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } |
647f2bdf4 mm, oom: fold oom... |
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 |
/* mm cannot safely be dereferenced after task_unlock(victim) */ mm = victim->mm; pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB ", task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), K(get_mm_counter(victim->mm, MM_FILEPAGES))); task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_sem livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ |
4d4048be8 oom_kill: add rcu... |
492 |
rcu_read_lock(); |
647f2bdf4 mm, oom: fold oom... |
493 494 495 496 497 498 499 500 501 502 503 |
for_each_process(p) if (p->mm == mm && !same_thread_group(p, victim) && !(p->flags & PF_KTHREAD)) { if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) continue; task_lock(p); /* Protect ->comm from prctl() */ pr_err("Kill process %d (%s) sharing same memory ", task_pid_nr(p), p->comm); task_unlock(p); |
d2d393099 signal: oom_kill_... |
504 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); |
647f2bdf4 mm, oom: fold oom... |
505 |
} |
6b0c81b3b mm, oom: reduce d... |
506 |
rcu_read_unlock(); |
647f2bdf4 mm, oom: fold oom... |
507 508 |
set_tsk_thread_flag(victim, TIF_MEMDIE); |
d2d393099 signal: oom_kill_... |
509 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); |
6b0c81b3b mm, oom: reduce d... |
510 |
put_task_struct(victim); |
1da177e4c Linux-2.6.12-rc2 |
511 |
} |
647f2bdf4 mm, oom: fold oom... |
512 |
#undef K |
1da177e4c Linux-2.6.12-rc2 |
513 |
|
309ed8825 oom: extract pani... |
514 515 516 |
/* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ |
876aafbfd mm, memcg: move a... |
517 518 |
void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, int order, const nodemask_t *nodemask) |
309ed8825 oom: extract pani... |
519 520 521 522 523 524 525 526 527 528 529 530 |
{ if (likely(!sysctl_panic_on_oom)) return; if (sysctl_panic_on_oom != 2) { /* * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ if (constraint != CONSTRAINT_NONE) return; } |
e85bfd3aa oom: filter unkil... |
531 |
dump_header(NULL, gfp_mask, order, NULL, nodemask); |
309ed8825 oom: extract pani... |
532 533 534 535 |
panic("Out of memory: %s panic_on_oom is enabled ", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } |
8bc719d3c [PATCH] out of me... |
536 537 538 539 540 541 542 543 544 545 546 547 548 |
static BLOCKING_NOTIFIER_HEAD(oom_notify_list); int register_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(register_oom_notifier); int unregister_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(unregister_oom_notifier); |
098d7f128 oom: add per-zone... |
549 550 551 552 553 |
/* * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero * if a parallel OOM killing is already taking place that includes a zone in * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. */ |
ff321feac mm: rename try_se... |
554 |
int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) |
098d7f128 oom: add per-zone... |
555 |
{ |
dd1a239f6 mm: have zonelist... |
556 557 |
struct zoneref *z; struct zone *zone; |
098d7f128 oom: add per-zone... |
558 |
int ret = 1; |
c7d4caeb1 oom: fix zone_sca... |
559 |
spin_lock(&zone_scan_lock); |
dd1a239f6 mm: have zonelist... |
560 561 |
for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { if (zone_is_oom_locked(zone)) { |
098d7f128 oom: add per-zone... |
562 563 564 |
ret = 0; goto out; } |
dd1a239f6 mm: have zonelist... |
565 566 567 568 |
} for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { /* |
c7d4caeb1 oom: fix zone_sca... |
569 |
* Lock each zone in the zonelist under zone_scan_lock so a |
ff321feac mm: rename try_se... |
570 |
* parallel invocation of try_set_zonelist_oom() doesn't succeed |
dd1a239f6 mm: have zonelist... |
571 572 573 574 |
* when it shouldn't. */ zone_set_flag(zone, ZONE_OOM_LOCKED); } |
098d7f128 oom: add per-zone... |
575 |
|
098d7f128 oom: add per-zone... |
576 |
out: |
c7d4caeb1 oom: fix zone_sca... |
577 |
spin_unlock(&zone_scan_lock); |
098d7f128 oom: add per-zone... |
578 579 580 581 582 583 584 585 |
return ret; } /* * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed * allocation attempts with zonelists containing them may now recall the OOM * killer, if necessary. */ |
dd1a239f6 mm: have zonelist... |
586 |
void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) |
098d7f128 oom: add per-zone... |
587 |
{ |
dd1a239f6 mm: have zonelist... |
588 589 |
struct zoneref *z; struct zone *zone; |
098d7f128 oom: add per-zone... |
590 |
|
c7d4caeb1 oom: fix zone_sca... |
591 |
spin_lock(&zone_scan_lock); |
dd1a239f6 mm: have zonelist... |
592 593 594 |
for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { zone_clear_flag(zone, ZONE_OOM_LOCKED); } |
c7d4caeb1 oom: fix zone_sca... |
595 |
spin_unlock(&zone_scan_lock); |
098d7f128 oom: add per-zone... |
596 |
} |
1da177e4c Linux-2.6.12-rc2 |
597 |
/** |
6937a25cf [PATCH] mm: fix t... |
598 |
* out_of_memory - kill the "best" process when we run out of memory |
1b578df02 mm/oom_kill: fix ... |
599 600 601 |
* @zonelist: zonelist pointer * @gfp_mask: memory allocation flags * @order: amount of memory being requested as a power of 2 |
6f48d0ebd oom: select task ... |
602 |
* @nodemask: nodemask passed to page allocator |
08ab9b10d mm, oom: force oo... |
603 |
* @force_kill: true if a task must be killed, even if others are exiting |
1da177e4c Linux-2.6.12-rc2 |
604 605 606 607 608 609 |
* * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ |
4365a5676 oom-kill: fix NUM... |
610 |
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
08ab9b10d mm, oom: force oo... |
611 |
int order, nodemask_t *nodemask, bool force_kill) |
1da177e4c Linux-2.6.12-rc2 |
612 |
{ |
e85bfd3aa oom: filter unkil... |
613 |
const nodemask_t *mpol_mask; |
0aad4b312 oom: fold __out_o... |
614 |
struct task_struct *p; |
a63d83f42 oom: badness heur... |
615 |
unsigned long totalpages; |
8bc719d3c [PATCH] out of me... |
616 |
unsigned long freed = 0; |
9cbb78bb3 mm, memcg: introd... |
617 |
unsigned int uninitialized_var(points); |
e36589323 oom: remove speci... |
618 |
enum oom_constraint constraint = CONSTRAINT_NONE; |
b52723c56 oom: fix tasklist... |
619 |
int killed = 0; |
8bc719d3c [PATCH] out of me... |
620 621 622 623 624 |
blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ return; |
1da177e4c Linux-2.6.12-rc2 |
625 |
|
7b98c2e40 oom: give current... |
626 |
/* |
9ff4868e3 mm, oom: allow ex... |
627 628 629 |
* If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. |
7b98c2e40 oom: give current... |
630 |
*/ |
9ff4868e3 mm, oom: allow ex... |
631 |
if (fatal_signal_pending(current) || current->flags & PF_EXITING) { |
7b98c2e40 oom: give current... |
632 633 634 |
set_thread_flag(TIF_MEMDIE); return; } |
9b0f8b040 [PATCH] Terminate... |
635 636 637 638 |
/* * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ |
a63d83f42 oom: badness heur... |
639 640 |
constraint = constrained_alloc(zonelist, gfp_mask, nodemask, &totalpages); |
e85bfd3aa oom: filter unkil... |
641 642 |
mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); |
0aad4b312 oom: fold __out_o... |
643 |
|
121d1ba0a mm, oom: fix pote... |
644 |
if (sysctl_oom_kill_allocating_task && current->mm && |
a96cfd6e9 oom: move OOM_DIS... |
645 |
!oom_unkillable_task(current, NULL, nodemask) && |
121d1ba0a mm, oom: fix pote... |
646 |
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { |
6b0c81b3b mm, oom: reduce d... |
647 |
get_task_struct(current); |
2a1c9b1fc mm, oom: avoid lo... |
648 649 650 651 |
oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, nodemask, "Out of memory (oom_kill_allocating_task)"); goto out; |
0aad4b312 oom: fold __out_o... |
652 |
} |
9cbb78bb3 mm, memcg: introd... |
653 |
p = select_bad_process(&points, totalpages, mpol_mask, force_kill); |
0aad4b312 oom: fold __out_o... |
654 655 |
/* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { |
e85bfd3aa oom: filter unkil... |
656 |
dump_header(NULL, gfp_mask, order, NULL, mpol_mask); |
0aad4b312 oom: fold __out_o... |
657 658 659 |
panic("Out of memory and no killable processes... "); } |
6b4f2b56a mm/oom_kill: remo... |
660 |
if (p != (void *)-1UL) { |
2a1c9b1fc mm, oom: avoid lo... |
661 662 663 664 |
oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, nodemask, "Out of memory"); killed = 1; } |
b52723c56 oom: fix tasklist... |
665 |
out: |
1da177e4c Linux-2.6.12-rc2 |
666 |
/* |
4f774b912 mm, oom: do not s... |
667 668 |
* Give the killed threads a good chance of exiting before trying to * allocate memory again. |
1da177e4c Linux-2.6.12-rc2 |
669 |
*/ |
4f774b912 mm, oom: do not s... |
670 671 |
if (killed) schedule_timeout_killable(1); |
1da177e4c Linux-2.6.12-rc2 |
672 |
} |
e36589323 oom: remove speci... |
673 674 675 |
/* * The pagefault handler calls here because it is out of memory, so kill a |
efacd02e4 mm, oom: cleanup ... |
676 677 |
* memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a * parallel oom killing is already in progress so do nothing. |
e36589323 oom: remove speci... |
678 679 680 |
*/ void pagefault_out_of_memory(void) { |
3812c8c8f mm: memcg: do not... |
681 |
struct zonelist *zonelist; |
efacd02e4 mm, oom: cleanup ... |
682 |
|
494264208 mm: memcg: handle... |
683 |
if (mem_cgroup_oom_synchronize(true)) |
3812c8c8f mm: memcg: do not... |
684 685 686 |
return; zonelist = node_zonelist(first_online_node, GFP_KERNEL); |
efacd02e4 mm, oom: cleanup ... |
687 |
if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { |
08ab9b10d mm, oom: force oo... |
688 |
out_of_memory(NULL, 0, 0, NULL, false); |
efacd02e4 mm, oom: cleanup ... |
689 |
clear_zonelist_oom(zonelist, GFP_KERNEL); |
e36589323 oom: remove speci... |
690 |
} |
e36589323 oom: remove speci... |
691 |
} |