Blame view
mm/oom_kill.c
19.5 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 |
/* * linux/mm/oom_kill.c * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... |
a63d83f42 oom: badness heur... |
7 8 |
* Copyright (C) 2010 Google, Inc. * Rewritten by David Rientjes |
1da177e4c Linux-2.6.12-rc2 |
9 10 |
* * The routines in this file are used to kill a process when |
a49335cce [PATCH] cpusets: ... |
11 12 |
* we're seriously out of memory. This gets called from __alloc_pages() * in mm/page_alloc.c when we really run out of memory. |
1da177e4c Linux-2.6.12-rc2 |
13 14 15 16 17 18 |
* * Since we won't call these routines often (on a well-configured * machine) this file will double as a 'coding guide' and a signpost * for newbie kernel hackers. It features several pointers to major * kernel subsystems and hints as to where to find out what things do. */ |
8ac773b4f [PATCH] OOM kille... |
19 |
#include <linux/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/mm.h> |
4e950f6f0 Remove fs.h from ... |
21 |
#include <linux/err.h> |
5a0e3ad6a include cleanup: ... |
22 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
23 24 25 26 |
#include <linux/sched.h> #include <linux/swap.h> #include <linux/timex.h> #include <linux/jiffies.h> |
ef08e3b49 [PATCH] cpusets: ... |
27 |
#include <linux/cpuset.h> |
b95f1b31b mm: Map most file... |
28 |
#include <linux/export.h> |
8bc719d3c [PATCH] out of me... |
29 |
#include <linux/notifier.h> |
c7ba5c9e8 Memory controller... |
30 |
#include <linux/memcontrol.h> |
6f48d0ebd oom: select task ... |
31 |
#include <linux/mempolicy.h> |
5cd9c58fb security: Fix set... |
32 |
#include <linux/security.h> |
edd45544c oom: avoid deferr... |
33 |
#include <linux/ptrace.h> |
f660daac4 oom: thaw threads... |
34 |
#include <linux/freezer.h> |
43d2b1132 tracepoint: add t... |
35 |
#include <linux/ftrace.h> |
dc3f21ead mm, oom: introduc... |
36 |
#include <linux/ratelimit.h> |
43d2b1132 tracepoint: add t... |
37 38 39 |
#define CREATE_TRACE_POINTS #include <trace/events/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
40 |
|
fadd8fbd1 [PATCH] support f... |
41 |
int sysctl_panic_on_oom; |
fe071d7e8 oom: add oom_kill... |
42 |
int sysctl_oom_kill_allocating_task; |
ad915c432 oom: enable oom t... |
43 |
int sysctl_oom_dump_tasks = 1; |
c7d4caeb1 oom: fix zone_sca... |
44 |
static DEFINE_SPINLOCK(zone_scan_lock); |
1da177e4c Linux-2.6.12-rc2 |
45 |
|
6f48d0ebd oom: select task ... |
46 47 48 49 50 51 52 53 54 |
#ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill * @tsk: task struct of which task to consider * @mask: nodemask passed to page allocator for mempolicy ooms * * Task eligibility is determined by whether or not a candidate task, @tsk, * shares the same mempolicy nodes as current if it is bound by such a policy * and whether or not it has the same set of allowed cpuset nodes. |
495789a51 oom: make oom_sco... |
55 |
*/ |
6f48d0ebd oom: select task ... |
56 57 |
static bool has_intersects_mems_allowed(struct task_struct *tsk, const nodemask_t *mask) |
495789a51 oom: make oom_sco... |
58 |
{ |
6f48d0ebd oom: select task ... |
59 |
struct task_struct *start = tsk; |
495789a51 oom: make oom_sco... |
60 |
|
495789a51 oom: make oom_sco... |
61 |
do { |
6f48d0ebd oom: select task ... |
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
if (mask) { /* * If this is a mempolicy constrained oom, tsk's * cpuset is irrelevant. Only return true if its * mempolicy intersects current, otherwise it may be * needlessly killed. */ if (mempolicy_nodemask_intersects(tsk, mask)) return true; } else { /* * This is not a mempolicy constrained oom, so only * check the mems of tsk's cpuset. */ if (cpuset_mems_allowed_intersects(current, tsk)) return true; } |
df1090a8d oom: cleanup has_... |
79 |
} while_each_thread(start, tsk); |
6f48d0ebd oom: select task ... |
80 81 82 83 84 85 86 |
return false; } #else static bool has_intersects_mems_allowed(struct task_struct *tsk, const nodemask_t *mask) { return true; |
495789a51 oom: make oom_sco... |
87 |
} |
6f48d0ebd oom: select task ... |
88 |
#endif /* CONFIG_NUMA */ |
495789a51 oom: make oom_sco... |
89 |
|
6f48d0ebd oom: select task ... |
90 91 92 93 94 95 |
/* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ |
158e0a2d1 memcg: use find_l... |
96 |
struct task_struct *find_lock_task_mm(struct task_struct *p) |
dd8e8f405 oom: introduce fi... |
97 98 99 100 101 102 103 104 105 106 107 108 |
{ struct task_struct *t = p; do { task_lock(t); if (likely(t->mm)) return t; task_unlock(t); } while_each_thread(p, t); return NULL; } |
ab290adba oom: make oom_unk... |
109 |
/* return true if the task is not adequate as candidate victim task. */ |
e85bfd3aa oom: filter unkil... |
110 |
static bool oom_unkillable_task(struct task_struct *p, |
72835c86c mm: unify remaini... |
111 |
const struct mem_cgroup *memcg, const nodemask_t *nodemask) |
ab290adba oom: make oom_unk... |
112 113 114 115 116 117 118 |
{ if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ |
72835c86c mm: unify remaini... |
119 |
if (memcg && !task_in_mem_cgroup(p, memcg)) |
ab290adba oom: make oom_unk... |
120 121 122 123 124 125 126 127 |
return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } |
1da177e4c Linux-2.6.12-rc2 |
128 |
/** |
a63d83f42 oom: badness heur... |
129 |
* oom_badness - heuristic function to determine which candidate task to kill |
1da177e4c Linux-2.6.12-rc2 |
130 |
* @p: task struct of which task we should calculate |
a63d83f42 oom: badness heur... |
131 |
* @totalpages: total present RAM allowed for page allocation |
1da177e4c Linux-2.6.12-rc2 |
132 |
* |
a63d83f42 oom: badness heur... |
133 134 135 |
* The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. |
1da177e4c Linux-2.6.12-rc2 |
136 |
*/ |
a7f638f99 mm, oom: normaliz... |
137 138 |
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) |
1da177e4c Linux-2.6.12-rc2 |
139 |
{ |
1e11ad8dc mm, oom: fix badn... |
140 |
long points; |
61eafb00d mm, oom: fix and ... |
141 |
long adj; |
28b83c519 oom: move oom_adj... |
142 |
|
72835c86c mm: unify remaini... |
143 |
if (oom_unkillable_task(p, memcg, nodemask)) |
26ebc9849 oom: /proc/<pid>/... |
144 |
return 0; |
1da177e4c Linux-2.6.12-rc2 |
145 |
|
dd8e8f405 oom: introduce fi... |
146 147 |
p = find_lock_task_mm(p); if (!p) |
1da177e4c Linux-2.6.12-rc2 |
148 |
return 0; |
a9c58b907 mm, oom: change t... |
149 |
adj = (long)p->signal->oom_score_adj; |
61eafb00d mm, oom: fix and ... |
150 |
if (adj == OOM_SCORE_ADJ_MIN) { |
5aecc85ab oom: do not kill ... |
151 152 153 |
task_unlock(p); return 0; } |
1da177e4c Linux-2.6.12-rc2 |
154 |
/* |
a63d83f42 oom: badness heur... |
155 |
* The baseline for the badness score is the proportion of RAM that each |
f755a042d oom: use pte page... |
156 |
* task's rss, pagetable and swap space use. |
1da177e4c Linux-2.6.12-rc2 |
157 |
*/ |
a7f638f99 mm, oom: normaliz... |
158 159 |
points = get_mm_rss(p->mm) + p->mm->nr_ptes + get_mm_counter(p->mm, MM_SWAPENTS); |
a63d83f42 oom: badness heur... |
160 |
task_unlock(p); |
1da177e4c Linux-2.6.12-rc2 |
161 162 |
/* |
a63d83f42 oom: badness heur... |
163 164 |
* Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. |
1da177e4c Linux-2.6.12-rc2 |
165 |
*/ |
a63d83f42 oom: badness heur... |
166 |
if (has_capability_noaudit(p, CAP_SYS_ADMIN)) |
61eafb00d mm, oom: fix and ... |
167 |
adj -= 30; |
1da177e4c Linux-2.6.12-rc2 |
168 |
|
61eafb00d mm, oom: fix and ... |
169 170 171 |
/* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; |
1da177e4c Linux-2.6.12-rc2 |
172 |
|
f19e8aa11 oom: always retur... |
173 |
/* |
a7f638f99 mm, oom: normaliz... |
174 175 |
* Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). |
f19e8aa11 oom: always retur... |
176 |
*/ |
1e11ad8dc mm, oom: fix badn... |
177 |
return points > 0 ? points : 1; |
1da177e4c Linux-2.6.12-rc2 |
178 179 180 |
} /* |
9b0f8b040 [PATCH] Terminate... |
181 182 |
* Determine the type of allocation constraint. */ |
9b0f8b040 [PATCH] Terminate... |
183 |
#ifdef CONFIG_NUMA |
4365a5676 oom-kill: fix NUM... |
184 |
static enum oom_constraint constrained_alloc(struct zonelist *zonelist, |
a63d83f42 oom: badness heur... |
185 186 |
gfp_t gfp_mask, nodemask_t *nodemask, unsigned long *totalpages) |
4365a5676 oom-kill: fix NUM... |
187 |
{ |
54a6eb5c4 mm: use two zonel... |
188 |
struct zone *zone; |
dd1a239f6 mm: have zonelist... |
189 |
struct zoneref *z; |
54a6eb5c4 mm: use two zonel... |
190 |
enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
a63d83f42 oom: badness heur... |
191 192 |
bool cpuset_limited = false; int nid; |
9b0f8b040 [PATCH] Terminate... |
193 |
|
a63d83f42 oom: badness heur... |
194 195 196 197 198 |
/* Default to all available memory */ *totalpages = totalram_pages + total_swap_pages; if (!zonelist) return CONSTRAINT_NONE; |
4365a5676 oom-kill: fix NUM... |
199 200 201 202 203 204 205 |
/* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ if (gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; |
9b0f8b040 [PATCH] Terminate... |
206 |
|
4365a5676 oom-kill: fix NUM... |
207 |
/* |
a63d83f42 oom: badness heur... |
208 209 210 |
* This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). |
4365a5676 oom-kill: fix NUM... |
211 |
*/ |
bd3a66c1c oom: use N_MEMORY... |
212 |
if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) { |
a63d83f42 oom: badness heur... |
213 214 215 |
*totalpages = total_swap_pages; for_each_node_mask(nid, *nodemask) *totalpages += node_spanned_pages(nid); |
9b0f8b040 [PATCH] Terminate... |
216 |
return CONSTRAINT_MEMORY_POLICY; |
a63d83f42 oom: badness heur... |
217 |
} |
4365a5676 oom-kill: fix NUM... |
218 219 220 221 222 |
/* Check this allocation failure is caused by cpuset's wall function */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) |
a63d83f42 oom: badness heur... |
223 |
cpuset_limited = true; |
9b0f8b040 [PATCH] Terminate... |
224 |
|
a63d83f42 oom: badness heur... |
225 226 227 228 229 230 |
if (cpuset_limited) { *totalpages = total_swap_pages; for_each_node_mask(nid, cpuset_current_mems_allowed) *totalpages += node_spanned_pages(nid); return CONSTRAINT_CPUSET; } |
9b0f8b040 [PATCH] Terminate... |
231 232 |
return CONSTRAINT_NONE; } |
4365a5676 oom-kill: fix NUM... |
233 234 |
#else static enum oom_constraint constrained_alloc(struct zonelist *zonelist, |
a63d83f42 oom: badness heur... |
235 236 |
gfp_t gfp_mask, nodemask_t *nodemask, unsigned long *totalpages) |
4365a5676 oom-kill: fix NUM... |
237 |
{ |
a63d83f42 oom: badness heur... |
238 |
*totalpages = totalram_pages + total_swap_pages; |
4365a5676 oom-kill: fix NUM... |
239 240 241 |
return CONSTRAINT_NONE; } #endif |
9b0f8b040 [PATCH] Terminate... |
242 |
|
9cbb78bb3 mm, memcg: introd... |
243 244 245 |
enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill) |
462607ecc mm, oom: introduc... |
246 247 248 |
{ if (task->exit_state) return OOM_SCAN_CONTINUE; |
9cbb78bb3 mm, memcg: introd... |
249 |
if (oom_unkillable_task(task, NULL, nodemask)) |
462607ecc mm, oom: introduc... |
250 251 252 253 254 255 256 257 258 259 260 261 262 263 |
return OOM_SCAN_CONTINUE; /* * This task already has access to memory reserves and is being killed. * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { if (unlikely(frozen(task))) __thaw_task(task); if (!force_kill) return OOM_SCAN_ABORT; } if (!task->mm) return OOM_SCAN_CONTINUE; |
e1e12d2f3 mm, oom: fix race... |
264 265 266 267 268 269 |
/* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ if (oom_task_origin(task)) return OOM_SCAN_SELECT; |
9ff4868e3 mm, oom: allow ex... |
270 |
if (task->flags & PF_EXITING && !force_kill) { |
462607ecc mm, oom: introduc... |
271 |
/* |
9ff4868e3 mm, oom: allow ex... |
272 273 |
* If this task is not being ptraced on exit, then wait for it * to finish before killing some other task unnecessarily. |
462607ecc mm, oom: introduc... |
274 |
*/ |
9ff4868e3 mm, oom: allow ex... |
275 276 |
if (!(task->group_leader->ptrace & PT_TRACE_EXIT)) return OOM_SCAN_ABORT; |
462607ecc mm, oom: introduc... |
277 278 279 |
} return OOM_SCAN_OK; } |
9b0f8b040 [PATCH] Terminate... |
280 |
/* |
1da177e4c Linux-2.6.12-rc2 |
281 |
* Simple selection loop. We chose the process with the highest |
6b0c81b3b mm, oom: reduce d... |
282 |
* number of 'points'. |
1da177e4c Linux-2.6.12-rc2 |
283 284 285 |
* * (not docbooked, we don't want this one cluttering up the manual) */ |
a63d83f42 oom: badness heur... |
286 |
static struct task_struct *select_bad_process(unsigned int *ppoints, |
9cbb78bb3 mm, memcg: introd... |
287 288 |
unsigned long totalpages, const nodemask_t *nodemask, bool force_kill) |
1da177e4c Linux-2.6.12-rc2 |
289 |
{ |
3a5dda7a1 oom: prevent unne... |
290 |
struct task_struct *g, *p; |
1da177e4c Linux-2.6.12-rc2 |
291 |
struct task_struct *chosen = NULL; |
a7f638f99 mm, oom: normaliz... |
292 |
unsigned long chosen_points = 0; |
1da177e4c Linux-2.6.12-rc2 |
293 |
|
6b0c81b3b mm, oom: reduce d... |
294 |
rcu_read_lock(); |
3a5dda7a1 oom: prevent unne... |
295 |
do_each_thread(g, p) { |
a63d83f42 oom: badness heur... |
296 |
unsigned int points; |
a49335cce [PATCH] cpusets: ... |
297 |
|
9cbb78bb3 mm, memcg: introd... |
298 |
switch (oom_scan_process_thread(p, totalpages, nodemask, |
462607ecc mm, oom: introduc... |
299 300 301 302 303 304 |
force_kill)) { case OOM_SCAN_SELECT: chosen = p; chosen_points = ULONG_MAX; /* fall through */ case OOM_SCAN_CONTINUE: |
c027a474a oom: task->mm == ... |
305 |
continue; |
462607ecc mm, oom: introduc... |
306 |
case OOM_SCAN_ABORT: |
6b0c81b3b mm, oom: reduce d... |
307 |
rcu_read_unlock(); |
462607ecc mm, oom: introduc... |
308 309 310 311 |
return ERR_PTR(-1UL); case OOM_SCAN_OK: break; }; |
9cbb78bb3 mm, memcg: introd... |
312 |
points = oom_badness(p, NULL, nodemask, totalpages); |
a7f638f99 mm, oom: normaliz... |
313 |
if (points > chosen_points) { |
a49335cce [PATCH] cpusets: ... |
314 |
chosen = p; |
a7f638f99 mm, oom: normaliz... |
315 |
chosen_points = points; |
1da177e4c Linux-2.6.12-rc2 |
316 |
} |
3a5dda7a1 oom: prevent unne... |
317 |
} while_each_thread(g, p); |
6b0c81b3b mm, oom: reduce d... |
318 319 320 |
if (chosen) get_task_struct(chosen); rcu_read_unlock(); |
972c4ea59 [PATCH] select_ba... |
321 |
|
a7f638f99 mm, oom: normaliz... |
322 |
*ppoints = chosen_points * 1000 / totalpages; |
1da177e4c Linux-2.6.12-rc2 |
323 324 325 326 |
return chosen; } /** |
1b578df02 mm/oom_kill: fix ... |
327 |
* dump_tasks - dump current memory state of all system tasks |
dad7557eb mm: fix kernel-do... |
328 |
* @memcg: current's memory controller, if constrained |
e85bfd3aa oom: filter unkil... |
329 |
* @nodemask: nodemask passed to page allocator for mempolicy ooms |
1b578df02 mm/oom_kill: fix ... |
330 |
* |
e85bfd3aa oom: filter unkil... |
331 332 333 |
* Dumps the current memory state of all eligible tasks. Tasks not in the same * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes * are not shown. |
de34d965a mm, oom: replace ... |
334 335 |
* State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, * swapents, oom_score_adj value, and name. |
fef1bdd68 oom: add sysctl t... |
336 |
*/ |
72835c86c mm: unify remaini... |
337 |
static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) |
fef1bdd68 oom: add sysctl t... |
338 |
{ |
c55db9578 oom: dump_tasks u... |
339 340 |
struct task_struct *p; struct task_struct *task; |
fef1bdd68 oom: add sysctl t... |
341 |
|
de34d965a mm, oom: replace ... |
342 343 |
pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name "); |
6b0c81b3b mm, oom: reduce d... |
344 |
rcu_read_lock(); |
c55db9578 oom: dump_tasks u... |
345 |
for_each_process(p) { |
72835c86c mm: unify remaini... |
346 |
if (oom_unkillable_task(p, memcg, nodemask)) |
b4416d2be oom: do not dump ... |
347 |
continue; |
fef1bdd68 oom: add sysctl t... |
348 |
|
c55db9578 oom: dump_tasks u... |
349 350 |
task = find_lock_task_mm(p); if (!task) { |
6d2661ede oom: fix possible... |
351 |
/* |
74ab7f1d3 oom: improve comm... |
352 353 |
* This is a kthread or all of p's threads have already * detached their mm's. There's no need to report |
c55db9578 oom: dump_tasks u... |
354 |
* them; they can't be oom killed anyway. |
6d2661ede oom: fix possible... |
355 |
*/ |
6d2661ede oom: fix possible... |
356 357 |
continue; } |
c55db9578 oom: dump_tasks u... |
358 |
|
a9c58b907 mm, oom: change t... |
359 360 |
pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5hd %s ", |
078de5f70 userns: Store uid... |
361 362 |
task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), |
de34d965a mm, oom: replace ... |
363 364 |
task->mm->nr_ptes, get_mm_counter(task->mm, MM_SWAPENTS), |
a63d83f42 oom: badness heur... |
365 |
task->signal->oom_score_adj, task->comm); |
c55db9578 oom: dump_tasks u... |
366 367 |
task_unlock(task); } |
6b0c81b3b mm, oom: reduce d... |
368 |
rcu_read_unlock(); |
fef1bdd68 oom: add sysctl t... |
369 |
} |
d31f56dbf memcg: avoid oom-... |
370 |
static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, |
72835c86c mm: unify remaini... |
371 |
struct mem_cgroup *memcg, const nodemask_t *nodemask) |
1b604d75b oom: dump stack a... |
372 |
{ |
5e9d834a0 oom: sacrifice ch... |
373 |
task_lock(current); |
1b604d75b oom: dump stack a... |
374 |
pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " |
a9c58b907 mm, oom: change t... |
375 376 |
"oom_score_adj=%hd ", |
01dc52ebd oom: remove depre... |
377 |
current->comm, gfp_mask, order, |
a63d83f42 oom: badness heur... |
378 |
current->signal->oom_score_adj); |
1b604d75b oom: dump stack a... |
379 380 381 |
cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); |
58cf188ed memcg, oom: provi... |
382 383 384 385 |
if (memcg) mem_cgroup_print_oom_info(memcg, p); else show_mem(SHOW_MEM_FILTER_NODES); |
1b604d75b oom: dump stack a... |
386 |
if (sysctl_oom_dump_tasks) |
72835c86c mm: unify remaini... |
387 |
dump_tasks(memcg, nodemask); |
1b604d75b oom: dump stack a... |
388 |
} |
3b4798cbc oom-kill: show vi... |
389 |
#define K(x) ((x) << (PAGE_SHIFT-10)) |
6b0c81b3b mm, oom: reduce d... |
390 391 392 393 |
/* * Must be called while holding a reference to p, which will be released upon * returning. */ |
9cbb78bb3 mm, memcg: introd... |
394 395 396 397 |
void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message) |
1da177e4c Linux-2.6.12-rc2 |
398 |
{ |
52d3c0367 Revert "oom: oom_... |
399 |
struct task_struct *victim = p; |
5e9d834a0 oom: sacrifice ch... |
400 |
struct task_struct *child; |
52d3c0367 Revert "oom: oom_... |
401 |
struct task_struct *t = p; |
647f2bdf4 mm, oom: fold oom... |
402 |
struct mm_struct *mm; |
52d3c0367 Revert "oom: oom_... |
403 |
unsigned int victim_points = 0; |
dc3f21ead mm, oom: introduc... |
404 405 |
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
1da177e4c Linux-2.6.12-rc2 |
406 |
|
50ec3bbff [PATCH] oom: hand... |
407 408 409 410 |
/* * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just set TIF_MEMDIE so it can die quickly */ |
0753ba01e mm: revert "oom: ... |
411 |
if (p->flags & PF_EXITING) { |
4358997ae oom: avoid sendin... |
412 |
set_tsk_thread_flag(p, TIF_MEMDIE); |
6b0c81b3b mm, oom: reduce d... |
413 |
put_task_struct(p); |
2a1c9b1fc mm, oom: avoid lo... |
414 |
return; |
50ec3bbff [PATCH] oom: hand... |
415 |
} |
dc3f21ead mm, oom: introduc... |
416 |
if (__ratelimit(&oom_rs)) |
8447d950e mm, oom: do not e... |
417 |
dump_header(p, gfp_mask, order, memcg, nodemask); |
5e9d834a0 oom: sacrifice ch... |
418 |
task_lock(p); |
a63d83f42 oom: badness heur... |
419 420 |
pr_err("%s: Kill process %d (%s) score %d or sacrifice child ", |
5e9d834a0 oom: sacrifice ch... |
421 422 |
message, task_pid_nr(p), p->comm, points); task_unlock(p); |
f3af38d30 [PATCH] oom: clea... |
423 |
|
5e9d834a0 oom: sacrifice ch... |
424 425 |
/* * If any of p's children has a different mm and is eligible for kill, |
11239836c oom: remove refer... |
426 |
* the one with the highest oom_badness() score is sacrificed for its |
5e9d834a0 oom: sacrifice ch... |
427 428 429 |
* parent. This attempts to lose the minimal amount of work done while * still freeing memory. */ |
6b0c81b3b mm, oom: reduce d... |
430 |
read_lock(&tasklist_lock); |
dd8e8f405 oom: introduce fi... |
431 |
do { |
5e9d834a0 oom: sacrifice ch... |
432 |
list_for_each_entry(child, &t->children, sibling) { |
a63d83f42 oom: badness heur... |
433 |
unsigned int child_points; |
5e9d834a0 oom: sacrifice ch... |
434 |
|
edd45544c oom: avoid deferr... |
435 436 |
if (child->mm == p->mm) continue; |
a63d83f42 oom: badness heur... |
437 438 439 |
/* * oom_badness() returns 0 if the thread is unkillable */ |
72835c86c mm: unify remaini... |
440 |
child_points = oom_badness(child, memcg, nodemask, |
a63d83f42 oom: badness heur... |
441 |
totalpages); |
5e9d834a0 oom: sacrifice ch... |
442 |
if (child_points > victim_points) { |
6b0c81b3b mm, oom: reduce d... |
443 |
put_task_struct(victim); |
5e9d834a0 oom: sacrifice ch... |
444 445 |
victim = child; victim_points = child_points; |
6b0c81b3b mm, oom: reduce d... |
446 |
get_task_struct(victim); |
5e9d834a0 oom: sacrifice ch... |
447 |
} |
dd8e8f405 oom: introduce fi... |
448 449 |
} } while_each_thread(p, t); |
6b0c81b3b mm, oom: reduce d... |
450 |
read_unlock(&tasklist_lock); |
dd8e8f405 oom: introduce fi... |
451 |
|
6b0c81b3b mm, oom: reduce d... |
452 453 454 455 456 |
rcu_read_lock(); p = find_lock_task_mm(victim); if (!p) { rcu_read_unlock(); put_task_struct(victim); |
647f2bdf4 mm, oom: fold oom... |
457 |
return; |
6b0c81b3b mm, oom: reduce d... |
458 459 460 461 462 |
} else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } |
647f2bdf4 mm, oom: fold oom... |
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 |
/* mm cannot safely be dereferenced after task_unlock(victim) */ mm = victim->mm; pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB ", task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), K(get_mm_counter(victim->mm, MM_FILEPAGES))); task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_sem livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ for_each_process(p) if (p->mm == mm && !same_thread_group(p, victim) && !(p->flags & PF_KTHREAD)) { if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) continue; task_lock(p); /* Protect ->comm from prctl() */ pr_err("Kill process %d (%s) sharing same memory ", task_pid_nr(p), p->comm); task_unlock(p); |
d2d393099 signal: oom_kill_... |
493 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); |
647f2bdf4 mm, oom: fold oom... |
494 |
} |
6b0c81b3b mm, oom: reduce d... |
495 |
rcu_read_unlock(); |
647f2bdf4 mm, oom: fold oom... |
496 497 |
set_tsk_thread_flag(victim, TIF_MEMDIE); |
d2d393099 signal: oom_kill_... |
498 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); |
6b0c81b3b mm, oom: reduce d... |
499 |
put_task_struct(victim); |
1da177e4c Linux-2.6.12-rc2 |
500 |
} |
647f2bdf4 mm, oom: fold oom... |
501 |
#undef K |
1da177e4c Linux-2.6.12-rc2 |
502 |
|
309ed8825 oom: extract pani... |
503 504 505 |
/* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ |
876aafbfd mm, memcg: move a... |
506 507 |
void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, int order, const nodemask_t *nodemask) |
309ed8825 oom: extract pani... |
508 509 510 511 512 513 514 515 516 517 518 519 |
{ if (likely(!sysctl_panic_on_oom)) return; if (sysctl_panic_on_oom != 2) { /* * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ if (constraint != CONSTRAINT_NONE) return; } |
e85bfd3aa oom: filter unkil... |
520 |
dump_header(NULL, gfp_mask, order, NULL, nodemask); |
309ed8825 oom: extract pani... |
521 522 523 524 |
panic("Out of memory: %s panic_on_oom is enabled ", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } |
8bc719d3c [PATCH] out of me... |
525 526 527 528 529 530 531 532 533 534 535 536 537 |
static BLOCKING_NOTIFIER_HEAD(oom_notify_list); int register_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(register_oom_notifier); int unregister_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(unregister_oom_notifier); |
098d7f128 oom: add per-zone... |
538 539 540 541 542 |
/* * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero * if a parallel OOM killing is already taking place that includes a zone in * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. */ |
ff321feac mm: rename try_se... |
543 |
int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) |
098d7f128 oom: add per-zone... |
544 |
{ |
dd1a239f6 mm: have zonelist... |
545 546 |
struct zoneref *z; struct zone *zone; |
098d7f128 oom: add per-zone... |
547 |
int ret = 1; |
c7d4caeb1 oom: fix zone_sca... |
548 |
spin_lock(&zone_scan_lock); |
dd1a239f6 mm: have zonelist... |
549 550 |
for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { if (zone_is_oom_locked(zone)) { |
098d7f128 oom: add per-zone... |
551 552 553 |
ret = 0; goto out; } |
dd1a239f6 mm: have zonelist... |
554 555 556 557 |
} for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { /* |
c7d4caeb1 oom: fix zone_sca... |
558 |
* Lock each zone in the zonelist under zone_scan_lock so a |
ff321feac mm: rename try_se... |
559 |
* parallel invocation of try_set_zonelist_oom() doesn't succeed |
dd1a239f6 mm: have zonelist... |
560 561 562 563 |
* when it shouldn't. */ zone_set_flag(zone, ZONE_OOM_LOCKED); } |
098d7f128 oom: add per-zone... |
564 |
|
098d7f128 oom: add per-zone... |
565 |
out: |
c7d4caeb1 oom: fix zone_sca... |
566 |
spin_unlock(&zone_scan_lock); |
098d7f128 oom: add per-zone... |
567 568 569 570 571 572 573 574 |
return ret; } /* * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed * allocation attempts with zonelists containing them may now recall the OOM * killer, if necessary. */ |
dd1a239f6 mm: have zonelist... |
575 |
void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) |
098d7f128 oom: add per-zone... |
576 |
{ |
dd1a239f6 mm: have zonelist... |
577 578 |
struct zoneref *z; struct zone *zone; |
098d7f128 oom: add per-zone... |
579 |
|
c7d4caeb1 oom: fix zone_sca... |
580 |
spin_lock(&zone_scan_lock); |
dd1a239f6 mm: have zonelist... |
581 582 583 |
for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { zone_clear_flag(zone, ZONE_OOM_LOCKED); } |
c7d4caeb1 oom: fix zone_sca... |
584 |
spin_unlock(&zone_scan_lock); |
098d7f128 oom: add per-zone... |
585 |
} |
1da177e4c Linux-2.6.12-rc2 |
586 |
/** |
6937a25cf [PATCH] mm: fix t... |
587 |
* out_of_memory - kill the "best" process when we run out of memory |
1b578df02 mm/oom_kill: fix ... |
588 589 590 |
* @zonelist: zonelist pointer * @gfp_mask: memory allocation flags * @order: amount of memory being requested as a power of 2 |
6f48d0ebd oom: select task ... |
591 |
* @nodemask: nodemask passed to page allocator |
08ab9b10d mm, oom: force oo... |
592 |
* @force_kill: true if a task must be killed, even if others are exiting |
1da177e4c Linux-2.6.12-rc2 |
593 594 595 596 597 598 |
* * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ |
4365a5676 oom-kill: fix NUM... |
599 |
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
08ab9b10d mm, oom: force oo... |
600 |
int order, nodemask_t *nodemask, bool force_kill) |
1da177e4c Linux-2.6.12-rc2 |
601 |
{ |
e85bfd3aa oom: filter unkil... |
602 |
const nodemask_t *mpol_mask; |
0aad4b312 oom: fold __out_o... |
603 |
struct task_struct *p; |
a63d83f42 oom: badness heur... |
604 |
unsigned long totalpages; |
8bc719d3c [PATCH] out of me... |
605 |
unsigned long freed = 0; |
9cbb78bb3 mm, memcg: introd... |
606 |
unsigned int uninitialized_var(points); |
e36589323 oom: remove speci... |
607 |
enum oom_constraint constraint = CONSTRAINT_NONE; |
b52723c56 oom: fix tasklist... |
608 |
int killed = 0; |
8bc719d3c [PATCH] out of me... |
609 610 611 612 613 |
blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ return; |
1da177e4c Linux-2.6.12-rc2 |
614 |
|
7b98c2e40 oom: give current... |
615 |
/* |
9ff4868e3 mm, oom: allow ex... |
616 617 618 |
* If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. |
7b98c2e40 oom: give current... |
619 |
*/ |
9ff4868e3 mm, oom: allow ex... |
620 |
if (fatal_signal_pending(current) || current->flags & PF_EXITING) { |
7b98c2e40 oom: give current... |
621 622 623 |
set_thread_flag(TIF_MEMDIE); return; } |
9b0f8b040 [PATCH] Terminate... |
624 625 626 627 |
/* * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ |
a63d83f42 oom: badness heur... |
628 629 |
constraint = constrained_alloc(zonelist, gfp_mask, nodemask, &totalpages); |
e85bfd3aa oom: filter unkil... |
630 631 |
mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); |
0aad4b312 oom: fold __out_o... |
632 |
|
121d1ba0a mm, oom: fix pote... |
633 |
if (sysctl_oom_kill_allocating_task && current->mm && |
a96cfd6e9 oom: move OOM_DIS... |
634 |
!oom_unkillable_task(current, NULL, nodemask) && |
121d1ba0a mm, oom: fix pote... |
635 |
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { |
6b0c81b3b mm, oom: reduce d... |
636 |
get_task_struct(current); |
2a1c9b1fc mm, oom: avoid lo... |
637 638 639 640 |
oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, nodemask, "Out of memory (oom_kill_allocating_task)"); goto out; |
0aad4b312 oom: fold __out_o... |
641 |
} |
9cbb78bb3 mm, memcg: introd... |
642 |
p = select_bad_process(&points, totalpages, mpol_mask, force_kill); |
0aad4b312 oom: fold __out_o... |
643 644 |
/* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { |
e85bfd3aa oom: filter unkil... |
645 |
dump_header(NULL, gfp_mask, order, NULL, mpol_mask); |
0aad4b312 oom: fold __out_o... |
646 647 648 |
panic("Out of memory and no killable processes... "); } |
2a1c9b1fc mm, oom: avoid lo... |
649 650 651 652 653 |
if (PTR_ERR(p) != -1UL) { oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, nodemask, "Out of memory"); killed = 1; } |
b52723c56 oom: fix tasklist... |
654 |
out: |
1da177e4c Linux-2.6.12-rc2 |
655 |
/* |
4f774b912 mm, oom: do not s... |
656 657 |
* Give the killed threads a good chance of exiting before trying to * allocate memory again. |
1da177e4c Linux-2.6.12-rc2 |
658 |
*/ |
4f774b912 mm, oom: do not s... |
659 660 |
if (killed) schedule_timeout_killable(1); |
1da177e4c Linux-2.6.12-rc2 |
661 |
} |
e36589323 oom: remove speci... |
662 663 664 |
/* * The pagefault handler calls here because it is out of memory, so kill a |
efacd02e4 mm, oom: cleanup ... |
665 666 |
* memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a * parallel oom killing is already in progress so do nothing. |
e36589323 oom: remove speci... |
667 668 669 |
*/ void pagefault_out_of_memory(void) { |
efacd02e4 mm, oom: cleanup ... |
670 671 672 673 |
struct zonelist *zonelist = node_zonelist(first_online_node, GFP_KERNEL); if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { |
08ab9b10d mm, oom: force oo... |
674 |
out_of_memory(NULL, 0, 0, NULL, false); |
efacd02e4 mm, oom: cleanup ... |
675 |
clear_zonelist_oom(zonelist, GFP_KERNEL); |
e36589323 oom: remove speci... |
676 |
} |
e36589323 oom: remove speci... |
677 |
} |