Blame view
mm/oom_kill.c
30.9 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 |
/* * linux/mm/oom_kill.c * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... |
a63d83f42 oom: badness heur... |
7 8 |
* Copyright (C) 2010 Google, Inc. * Rewritten by David Rientjes |
1da177e4c Linux-2.6.12-rc2 |
9 10 |
* * The routines in this file are used to kill a process when |
a49335cce [PATCH] cpusets: ... |
11 12 |
* we're seriously out of memory. This gets called from __alloc_pages() * in mm/page_alloc.c when we really run out of memory. |
1da177e4c Linux-2.6.12-rc2 |
13 14 15 16 17 18 |
* * Since we won't call these routines often (on a well-configured * machine) this file will double as a 'coding guide' and a signpost * for newbie kernel hackers. It features several pointers to major * kernel subsystems and hints as to where to find out what things do. */ |
8ac773b4f [PATCH] OOM kille... |
19 |
#include <linux/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/mm.h> |
4e950f6f0 Remove fs.h from ... |
21 |
#include <linux/err.h> |
5a0e3ad6a include cleanup: ... |
22 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
23 |
#include <linux/sched.h> |
6e84f3152 sched/headers: Pr... |
24 |
#include <linux/sched/mm.h> |
f7ccbae45 sched/headers: Pr... |
25 |
#include <linux/sched/coredump.h> |
299300258 sched/headers: Pr... |
26 |
#include <linux/sched/task.h> |
1da177e4c Linux-2.6.12-rc2 |
27 28 29 |
#include <linux/swap.h> #include <linux/timex.h> #include <linux/jiffies.h> |
ef08e3b49 [PATCH] cpusets: ... |
30 |
#include <linux/cpuset.h> |
b95f1b31b mm: Map most file... |
31 |
#include <linux/export.h> |
8bc719d3c [PATCH] out of me... |
32 |
#include <linux/notifier.h> |
c7ba5c9e8 Memory controller... |
33 |
#include <linux/memcontrol.h> |
6f48d0ebd oom: select task ... |
34 |
#include <linux/mempolicy.h> |
5cd9c58fb security: Fix set... |
35 |
#include <linux/security.h> |
edd45544c oom: avoid deferr... |
36 |
#include <linux/ptrace.h> |
f660daac4 oom: thaw threads... |
37 |
#include <linux/freezer.h> |
43d2b1132 tracepoint: add t... |
38 |
#include <linux/ftrace.h> |
dc3f21ead mm, oom: introduc... |
39 |
#include <linux/ratelimit.h> |
aac453635 mm, oom: introduc... |
40 41 |
#include <linux/kthread.h> #include <linux/init.h> |
4d4bbd852 mm, oom_reaper: s... |
42 |
#include <linux/mmu_notifier.h> |
aac453635 mm, oom: introduc... |
43 44 45 |
#include <asm/tlb.h> #include "internal.h" |
852d8be0a mm: oom: show unr... |
46 |
#include "slab.h" |
43d2b1132 tracepoint: add t... |
47 48 49 |
#define CREATE_TRACE_POINTS #include <trace/events/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
50 |
|
fadd8fbd1 [PATCH] support f... |
51 |
int sysctl_panic_on_oom; |
fe071d7e8 oom: add oom_kill... |
52 |
int sysctl_oom_kill_allocating_task; |
ad915c432 oom: enable oom t... |
53 |
int sysctl_oom_dump_tasks = 1; |
dc56401fc mm: oom_kill: sim... |
54 |
|
a195d3f5b mm/oom_kill.c: do... |
55 56 57 58 59 60 61 62 |
/* * Serializes oom killer invocations (out_of_memory()) from all contexts to * prevent from over eager oom killing (e.g. when the oom killer is invoked * from different domains). * * oom_killer_disable() relies on this lock to stabilize oom_killer_disabled * and mark_oom_victim */ |
dc56401fc mm: oom_kill: sim... |
63 |
DEFINE_MUTEX(oom_lock); |
1da177e4c Linux-2.6.12-rc2 |
64 |
|
6f48d0ebd oom: select task ... |
65 66 67 |
#ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill |
ad9624417 oom_kill: has_int... |
68 |
* @start: task struct of which task to consider |
6f48d0ebd oom: select task ... |
69 70 71 72 73 |
* @mask: nodemask passed to page allocator for mempolicy ooms * * Task eligibility is determined by whether or not a candidate task, @tsk, * shares the same mempolicy nodes as current if it is bound by such a policy * and whether or not it has the same set of allowed cpuset nodes. |
495789a51 oom: make oom_sco... |
74 |
*/ |
ad9624417 oom_kill: has_int... |
75 |
static bool has_intersects_mems_allowed(struct task_struct *start, |
6f48d0ebd oom: select task ... |
76 |
const nodemask_t *mask) |
495789a51 oom: make oom_sco... |
77 |
{ |
ad9624417 oom_kill: has_int... |
78 79 |
struct task_struct *tsk; bool ret = false; |
495789a51 oom: make oom_sco... |
80 |
|
ad9624417 oom_kill: has_int... |
81 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
82 |
for_each_thread(start, tsk) { |
6f48d0ebd oom: select task ... |
83 84 85 86 87 88 89 |
if (mask) { /* * If this is a mempolicy constrained oom, tsk's * cpuset is irrelevant. Only return true if its * mempolicy intersects current, otherwise it may be * needlessly killed. */ |
ad9624417 oom_kill: has_int... |
90 |
ret = mempolicy_nodemask_intersects(tsk, mask); |
6f48d0ebd oom: select task ... |
91 92 93 94 95 |
} else { /* * This is not a mempolicy constrained oom, so only * check the mems of tsk's cpuset. */ |
ad9624417 oom_kill: has_int... |
96 |
ret = cpuset_mems_allowed_intersects(current, tsk); |
6f48d0ebd oom: select task ... |
97 |
} |
ad9624417 oom_kill: has_int... |
98 99 |
if (ret) break; |
1da4db0cd oom_kill: change ... |
100 |
} |
ad9624417 oom_kill: has_int... |
101 |
rcu_read_unlock(); |
df1090a8d oom: cleanup has_... |
102 |
|
ad9624417 oom_kill: has_int... |
103 |
return ret; |
6f48d0ebd oom: select task ... |
104 105 106 107 108 109 |
} #else static bool has_intersects_mems_allowed(struct task_struct *tsk, const nodemask_t *mask) { return true; |
495789a51 oom: make oom_sco... |
110 |
} |
6f48d0ebd oom: select task ... |
111 |
#endif /* CONFIG_NUMA */ |
495789a51 oom: make oom_sco... |
112 |
|
6f48d0ebd oom: select task ... |
113 114 115 116 117 118 |
/* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ |
158e0a2d1 memcg: use find_l... |
119 |
struct task_struct *find_lock_task_mm(struct task_struct *p) |
dd8e8f405 oom: introduce fi... |
120 |
{ |
1da4db0cd oom_kill: change ... |
121 |
struct task_struct *t; |
dd8e8f405 oom: introduce fi... |
122 |
|
4d4048be8 oom_kill: add rcu... |
123 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
124 |
for_each_thread(p, t) { |
dd8e8f405 oom: introduce fi... |
125 126 |
task_lock(t); if (likely(t->mm)) |
4d4048be8 oom_kill: add rcu... |
127 |
goto found; |
dd8e8f405 oom: introduce fi... |
128 |
task_unlock(t); |
1da4db0cd oom_kill: change ... |
129 |
} |
4d4048be8 oom_kill: add rcu... |
130 131 132 |
t = NULL; found: rcu_read_unlock(); |
dd8e8f405 oom: introduce fi... |
133 |
|
4d4048be8 oom_kill: add rcu... |
134 |
return t; |
dd8e8f405 oom: introduce fi... |
135 |
} |
db2a0dd7a mm/oom_kill.c: in... |
136 137 138 139 140 141 142 143 |
/* * order == -1 means the oom kill is required by sysrq, otherwise only * for display purposes. */ static inline bool is_sysrq_oom(struct oom_control *oc) { return oc->order == -1; } |
7c5f64f84 mm: oom: deduplic... |
144 145 146 147 |
static inline bool is_memcg_oom(struct oom_control *oc) { return oc->memcg != NULL; } |
ab290adba oom: make oom_unk... |
148 |
/* return true if the task is not adequate as candidate victim task. */ |
e85bfd3aa oom: filter unkil... |
149 |
static bool oom_unkillable_task(struct task_struct *p, |
2314b42db mm: memcontrol: d... |
150 |
struct mem_cgroup *memcg, const nodemask_t *nodemask) |
ab290adba oom: make oom_unk... |
151 152 153 154 155 156 157 |
{ if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ |
72835c86c mm: unify remaini... |
158 |
if (memcg && !task_in_mem_cgroup(p, memcg)) |
ab290adba oom: make oom_unk... |
159 160 161 162 163 164 165 166 |
return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } |
852d8be0a mm: oom: show unr... |
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
/* * Print out unreclaimble slabs info when unreclaimable slabs amount is greater * than all user memory (LRU pages) */ static bool is_dump_unreclaim_slabs(void) { unsigned long nr_lru; nr_lru = global_node_page_state(NR_ACTIVE_ANON) + global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE) + global_node_page_state(NR_ISOLATED_ANON) + global_node_page_state(NR_ISOLATED_FILE) + global_node_page_state(NR_UNEVICTABLE); return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru); } |
1da177e4c Linux-2.6.12-rc2 |
185 |
/** |
a63d83f42 oom: badness heur... |
186 |
* oom_badness - heuristic function to determine which candidate task to kill |
1da177e4c Linux-2.6.12-rc2 |
187 |
* @p: task struct of which task we should calculate |
a63d83f42 oom: badness heur... |
188 |
* @totalpages: total present RAM allowed for page allocation |
e8b098fc5 mm: kernel-doc: a... |
189 190 |
* @memcg: task's memory controller, if constrained * @nodemask: nodemask passed to page allocator for mempolicy ooms |
1da177e4c Linux-2.6.12-rc2 |
191 |
* |
a63d83f42 oom: badness heur... |
192 193 194 |
* The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. |
1da177e4c Linux-2.6.12-rc2 |
195 |
*/ |
a7f638f99 mm, oom: normaliz... |
196 197 |
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) |
1da177e4c Linux-2.6.12-rc2 |
198 |
{ |
1e11ad8dc mm, oom: fix badn... |
199 |
long points; |
61eafb00d mm, oom: fix and ... |
200 |
long adj; |
28b83c519 oom: move oom_adj... |
201 |
|
72835c86c mm: unify remaini... |
202 |
if (oom_unkillable_task(p, memcg, nodemask)) |
26ebc9849 oom: /proc/<pid>/... |
203 |
return 0; |
1da177e4c Linux-2.6.12-rc2 |
204 |
|
dd8e8f405 oom: introduce fi... |
205 206 |
p = find_lock_task_mm(p); if (!p) |
1da177e4c Linux-2.6.12-rc2 |
207 |
return 0; |
bb8a4b7fd mm, oom_reaper: h... |
208 209 |
/* * Do not even consider tasks which are explicitly marked oom |
b18dc5f29 mm, oom: skip vfo... |
210 211 |
* unkillable or have been already oom reaped or the are in * the middle of vfork |
bb8a4b7fd mm, oom_reaper: h... |
212 |
*/ |
a9c58b907 mm, oom: change t... |
213 |
adj = (long)p->signal->oom_score_adj; |
bb8a4b7fd mm, oom_reaper: h... |
214 |
if (adj == OOM_SCORE_ADJ_MIN || |
862e3073b mm, oom: get rid ... |
215 |
test_bit(MMF_OOM_SKIP, &p->mm->flags) || |
b18dc5f29 mm, oom: skip vfo... |
216 |
in_vfork(p)) { |
5aecc85ab oom: do not kill ... |
217 218 219 |
task_unlock(p); return 0; } |
1da177e4c Linux-2.6.12-rc2 |
220 |
/* |
a63d83f42 oom: badness heur... |
221 |
* The baseline for the badness score is the proportion of RAM that each |
f755a042d oom: use pte page... |
222 |
* task's rss, pagetable and swap space use. |
1da177e4c Linux-2.6.12-rc2 |
223 |
*/ |
dc6c9a35b mm: account pmd p... |
224 |
points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + |
af5b0f6a0 mm: consolidate p... |
225 |
mm_pgtables_bytes(p->mm) / PAGE_SIZE; |
a63d83f42 oom: badness heur... |
226 |
task_unlock(p); |
1da177e4c Linux-2.6.12-rc2 |
227 |
|
61eafb00d mm, oom: fix and ... |
228 229 230 |
/* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; |
1da177e4c Linux-2.6.12-rc2 |
231 |
|
f19e8aa11 oom: always retur... |
232 |
/* |
a7f638f99 mm, oom: normaliz... |
233 234 |
* Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). |
f19e8aa11 oom: always retur... |
235 |
*/ |
1e11ad8dc mm, oom: fix badn... |
236 |
return points > 0 ? points : 1; |
1da177e4c Linux-2.6.12-rc2 |
237 |
} |
7c5f64f84 mm: oom: deduplic... |
238 239 240 241 242 243 |
enum oom_constraint { CONSTRAINT_NONE, CONSTRAINT_CPUSET, CONSTRAINT_MEMORY_POLICY, CONSTRAINT_MEMCG, }; |
1da177e4c Linux-2.6.12-rc2 |
244 |
/* |
9b0f8b040 [PATCH] Terminate... |
245 246 |
* Determine the type of allocation constraint. */ |
7c5f64f84 mm: oom: deduplic... |
247 |
static enum oom_constraint constrained_alloc(struct oom_control *oc) |
4365a5676 oom-kill: fix NUM... |
248 |
{ |
54a6eb5c4 mm: use two zonel... |
249 |
struct zone *zone; |
dd1a239f6 mm: have zonelist... |
250 |
struct zoneref *z; |
6e0fc46dc mm, oom: organize... |
251 |
enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask); |
a63d83f42 oom: badness heur... |
252 253 |
bool cpuset_limited = false; int nid; |
9b0f8b040 [PATCH] Terminate... |
254 |
|
7c5f64f84 mm: oom: deduplic... |
255 |
if (is_memcg_oom(oc)) { |
bbec2e151 mm: rename page_c... |
256 |
oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1; |
7c5f64f84 mm: oom: deduplic... |
257 258 |
return CONSTRAINT_MEMCG; } |
a63d83f42 oom: badness heur... |
259 |
/* Default to all available memory */ |
7c5f64f84 mm: oom: deduplic... |
260 261 262 263 |
oc->totalpages = totalram_pages + total_swap_pages; if (!IS_ENABLED(CONFIG_NUMA)) return CONSTRAINT_NONE; |
a63d83f42 oom: badness heur... |
264 |
|
6e0fc46dc mm, oom: organize... |
265 |
if (!oc->zonelist) |
a63d83f42 oom: badness heur... |
266 |
return CONSTRAINT_NONE; |
4365a5676 oom-kill: fix NUM... |
267 268 269 270 271 |
/* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ |
6e0fc46dc mm, oom: organize... |
272 |
if (oc->gfp_mask & __GFP_THISNODE) |
4365a5676 oom-kill: fix NUM... |
273 |
return CONSTRAINT_NONE; |
9b0f8b040 [PATCH] Terminate... |
274 |
|
4365a5676 oom-kill: fix NUM... |
275 |
/* |
a63d83f42 oom: badness heur... |
276 277 278 |
* This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). |
4365a5676 oom-kill: fix NUM... |
279 |
*/ |
6e0fc46dc mm, oom: organize... |
280 281 |
if (oc->nodemask && !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { |
7c5f64f84 mm: oom: deduplic... |
282 |
oc->totalpages = total_swap_pages; |
6e0fc46dc mm, oom: organize... |
283 |
for_each_node_mask(nid, *oc->nodemask) |
7c5f64f84 mm: oom: deduplic... |
284 |
oc->totalpages += node_spanned_pages(nid); |
9b0f8b040 [PATCH] Terminate... |
285 |
return CONSTRAINT_MEMORY_POLICY; |
a63d83f42 oom: badness heur... |
286 |
} |
4365a5676 oom-kill: fix NUM... |
287 288 |
/* Check this allocation failure is caused by cpuset's wall function */ |
6e0fc46dc mm, oom: organize... |
289 290 291 |
for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, high_zoneidx, oc->nodemask) if (!cpuset_zone_allowed(zone, oc->gfp_mask)) |
a63d83f42 oom: badness heur... |
292 |
cpuset_limited = true; |
9b0f8b040 [PATCH] Terminate... |
293 |
|
a63d83f42 oom: badness heur... |
294 |
if (cpuset_limited) { |
7c5f64f84 mm: oom: deduplic... |
295 |
oc->totalpages = total_swap_pages; |
a63d83f42 oom: badness heur... |
296 |
for_each_node_mask(nid, cpuset_current_mems_allowed) |
7c5f64f84 mm: oom: deduplic... |
297 |
oc->totalpages += node_spanned_pages(nid); |
a63d83f42 oom: badness heur... |
298 299 |
return CONSTRAINT_CPUSET; } |
9b0f8b040 [PATCH] Terminate... |
300 301 |
return CONSTRAINT_NONE; } |
7c5f64f84 mm: oom: deduplic... |
302 |
static int oom_evaluate_task(struct task_struct *task, void *arg) |
462607ecc mm, oom: introduc... |
303 |
{ |
7c5f64f84 mm: oom: deduplic... |
304 305 |
struct oom_control *oc = arg; unsigned long points; |
6e0fc46dc mm, oom: organize... |
306 |
if (oom_unkillable_task(task, NULL, oc->nodemask)) |
7c5f64f84 mm: oom: deduplic... |
307 |
goto next; |
462607ecc mm, oom: introduc... |
308 309 310 |
/* * This task already has access to memory reserves and is being killed. |
a373966d1 mm, oom: hide mm ... |
311 |
* Don't allow any other task to have access to the reserves unless |
862e3073b mm, oom: get rid ... |
312 |
* the task has MMF_OOM_SKIP because chances that it would release |
a373966d1 mm, oom: hide mm ... |
313 |
* any memory is quite low. |
462607ecc mm, oom: introduc... |
314 |
*/ |
862e3073b mm, oom: get rid ... |
315 316 |
if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) { if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) |
7c5f64f84 mm: oom: deduplic... |
317 318 |
goto next; goto abort; |
a373966d1 mm, oom: hide mm ... |
319 |
} |
462607ecc mm, oom: introduc... |
320 |
|
e1e12d2f3 mm, oom: fix race... |
321 322 323 324 |
/* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ |
7c5f64f84 mm: oom: deduplic... |
325 326 327 328 |
if (oom_task_origin(task)) { points = ULONG_MAX; goto select; } |
e1e12d2f3 mm, oom: fix race... |
329 |
|
7c5f64f84 mm: oom: deduplic... |
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 |
points = oom_badness(task, NULL, oc->nodemask, oc->totalpages); if (!points || points < oc->chosen_points) goto next; /* Prefer thread group leaders for display purposes */ if (points == oc->chosen_points && thread_group_leader(oc->chosen)) goto next; select: if (oc->chosen) put_task_struct(oc->chosen); get_task_struct(task); oc->chosen = task; oc->chosen_points = points; next: return 0; abort: if (oc->chosen) put_task_struct(oc->chosen); oc->chosen = (void *)-1UL; return 1; |
462607ecc mm, oom: introduc... |
350 |
} |
9b0f8b040 [PATCH] Terminate... |
351 |
/* |
7c5f64f84 mm: oom: deduplic... |
352 353 |
* Simple selection loop. We choose the process with the highest number of * 'points'. In case scan was aborted, oc->chosen is set to -1. |
1da177e4c Linux-2.6.12-rc2 |
354 |
*/ |
7c5f64f84 mm: oom: deduplic... |
355 |
static void select_bad_process(struct oom_control *oc) |
1da177e4c Linux-2.6.12-rc2 |
356 |
{ |
7c5f64f84 mm: oom: deduplic... |
357 358 359 360 |
if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); else { struct task_struct *p; |
d49ad9355 mm, oom: prefer t... |
361 |
|
7c5f64f84 mm: oom: deduplic... |
362 363 364 365 366 |
rcu_read_lock(); for_each_process(p) if (oom_evaluate_task(p, oc)) break; rcu_read_unlock(); |
1da4db0cd oom_kill: change ... |
367 |
} |
972c4ea59 [PATCH] select_ba... |
368 |
|
7c5f64f84 mm: oom: deduplic... |
369 |
oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages; |
1da177e4c Linux-2.6.12-rc2 |
370 371 372 |
} /** |
1b578df02 mm/oom_kill: fix ... |
373 |
* dump_tasks - dump current memory state of all system tasks |
dad7557eb mm: fix kernel-do... |
374 |
* @memcg: current's memory controller, if constrained |
e85bfd3aa oom: filter unkil... |
375 |
* @nodemask: nodemask passed to page allocator for mempolicy ooms |
1b578df02 mm/oom_kill: fix ... |
376 |
* |
e85bfd3aa oom: filter unkil... |
377 378 379 |
* Dumps the current memory state of all eligible tasks. Tasks not in the same * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes * are not shown. |
af5b0f6a0 mm: consolidate p... |
380 381 |
* State information includes task's pid, uid, tgid, vm size, rss, * pgtables_bytes, swapents, oom_score_adj value, and name. |
fef1bdd68 oom: add sysctl t... |
382 |
*/ |
2314b42db mm: memcontrol: d... |
383 |
static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) |
fef1bdd68 oom: add sysctl t... |
384 |
{ |
c55db9578 oom: dump_tasks u... |
385 386 |
struct task_struct *p; struct task_struct *task; |
fef1bdd68 oom: add sysctl t... |
387 |
|
c3b78b11e mm, oom: describe... |
388 389 390 391 |
pr_info("Tasks state (memory values in pages): "); pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name "); |
6b0c81b3b mm, oom: reduce d... |
392 |
rcu_read_lock(); |
c55db9578 oom: dump_tasks u... |
393 |
for_each_process(p) { |
72835c86c mm: unify remaini... |
394 |
if (oom_unkillable_task(p, memcg, nodemask)) |
b4416d2be oom: do not dump ... |
395 |
continue; |
fef1bdd68 oom: add sysctl t... |
396 |
|
c55db9578 oom: dump_tasks u... |
397 398 |
task = find_lock_task_mm(p); if (!task) { |
6d2661ede oom: fix possible... |
399 |
/* |
74ab7f1d3 oom: improve comm... |
400 401 |
* This is a kthread or all of p's threads have already * detached their mm's. There's no need to report |
c55db9578 oom: dump_tasks u... |
402 |
* them; they can't be oom killed anyway. |
6d2661ede oom: fix possible... |
403 |
*/ |
6d2661ede oom: fix possible... |
404 405 |
continue; } |
c55db9578 oom: dump_tasks u... |
406 |
|
c3b78b11e mm, oom: describe... |
407 408 |
pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s ", |
078de5f70 userns: Store uid... |
409 410 |
task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), |
af5b0f6a0 mm: consolidate p... |
411 |
mm_pgtables_bytes(task->mm), |
de34d965a mm, oom: replace ... |
412 |
get_mm_counter(task->mm, MM_SWAPENTS), |
a63d83f42 oom: badness heur... |
413 |
task->signal->oom_score_adj, task->comm); |
c55db9578 oom: dump_tasks u... |
414 415 |
task_unlock(task); } |
6b0c81b3b mm, oom: reduce d... |
416 |
rcu_read_unlock(); |
fef1bdd68 oom: add sysctl t... |
417 |
} |
2a966b77a mm: oom: add memc... |
418 |
static void dump_header(struct oom_control *oc, struct task_struct *p) |
1b604d75b oom: dump stack a... |
419 |
{ |
0205f7557 mm: simplify node... |
420 421 422 423 424 |
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd ", current->comm, oc->gfp_mask, &oc->gfp_mask, nodemask_pr_args(oc->nodemask), oc->order, current->signal->oom_score_adj); |
9254990fb oom: warn if we g... |
425 426 427 |
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) pr_warn("COMPACTION is disabled!!! "); |
a0795cd41 mm, oom: print sy... |
428 |
|
da39da3a5 mm, oom: remove t... |
429 |
cpuset_print_current_mems_allowed(); |
1b604d75b oom: dump stack a... |
430 |
dump_stack(); |
852d8be0a mm: oom: show unr... |
431 |
if (is_memcg_oom(oc)) |
2a966b77a mm: oom: add memc... |
432 |
mem_cgroup_print_oom_info(oc->memcg, p); |
852d8be0a mm: oom: show unr... |
433 |
else { |
299c517ad mm, oom: header n... |
434 |
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask); |
852d8be0a mm: oom: show unr... |
435 436 437 |
if (is_dump_unreclaim_slabs()) dump_unreclaimable_slab(); } |
1b604d75b oom: dump stack a... |
438 |
if (sysctl_oom_dump_tasks) |
2a966b77a mm: oom: add memc... |
439 |
dump_tasks(oc->memcg, oc->nodemask); |
1b604d75b oom: dump stack a... |
440 |
} |
5695be142 OOM, PM: OOM kill... |
441 |
/* |
c32b3cbe0 oom, PM: make OOM... |
442 |
* Number of OOM victims in flight |
5695be142 OOM, PM: OOM kill... |
443 |
*/ |
c32b3cbe0 oom, PM: make OOM... |
444 445 |
static atomic_t oom_victims = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); |
5695be142 OOM, PM: OOM kill... |
446 |
|
7c5f64f84 mm: oom: deduplic... |
447 |
static bool oom_killer_disabled __read_mostly; |
5695be142 OOM, PM: OOM kill... |
448 |
|
bc448e897 mm, oom_reaper: r... |
449 |
#define K(x) ((x) << (PAGE_SHIFT-10)) |
3ef22dfff oom, oom_reaper: ... |
450 451 452 453 454 455 |
/* * task->mm can be NULL if the task is the exited group leader. So to * determine whether the task is using a particular mm, we examine all the * task's threads: if one of those is using this mm then this task was also * using it. */ |
44a70adec mm, oom_adj: make... |
456 |
bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) |
3ef22dfff oom, oom_reaper: ... |
457 458 459 460 461 462 463 464 465 466 |
{ struct task_struct *t; for_each_thread(p, t) { struct mm_struct *t_mm = READ_ONCE(t->mm); if (t_mm) return t_mm == mm; } return false; } |
aac453635 mm, oom: introduc... |
467 468 469 470 471 472 |
#ifdef CONFIG_MMU /* * OOM Reaper kernel thread which tries to reap the memory used by the OOM * victim (if that is possible) to help the OOM killer to move on. */ static struct task_struct *oom_reaper_th; |
aac453635 mm, oom: introduc... |
473 |
static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); |
29c696e1c oom: make oom_rea... |
474 |
static struct task_struct *oom_reaper_list; |
03049269d mm, oom_reaper: i... |
475 |
static DEFINE_SPINLOCK(oom_reaper_lock); |
93065ac75 mm, oom: distingu... |
476 |
bool __oom_reap_task_mm(struct mm_struct *mm) |
aac453635 mm, oom: introduc... |
477 |
{ |
aac453635 mm, oom: introduc... |
478 |
struct vm_area_struct *vma; |
93065ac75 mm, oom: distingu... |
479 |
bool ret = true; |
27ae357fa mm, oom: fix conc... |
480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 |
/* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping * should imply barriers already and the reader would hit a page fault * if it stumbled over a reaped memory. */ set_bit(MMF_UNSTABLE, &mm->flags); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { const unsigned long start = vma->vm_start; const unsigned long end = vma->vm_end; struct mmu_gather tlb; tlb_gather_mmu(&tlb, mm, start, end); |
93065ac75 mm, oom: distingu... |
509 |
if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) { |
79cc81057 mm, oom: fix miss... |
510 |
tlb_finish_mmu(&tlb, start, end); |
93065ac75 mm, oom: distingu... |
511 512 513 |
ret = false; continue; } |
27ae357fa mm, oom: fix conc... |
514 515 516 517 518 |
unmap_page_range(&tlb, vma, start, end, NULL); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } } |
93065ac75 mm, oom: distingu... |
519 520 |
return ret; |
27ae357fa mm, oom: fix conc... |
521 |
} |
431f42fdf mm/oom_kill.c: cl... |
522 523 524 525 526 527 |
/* * Reaps the address space of the give task. * * Returns true on success and false if none or part of the address space * has been reclaimed and the caller should retry later. */ |
27ae357fa mm, oom: fix conc... |
528 529 |
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) { |
aac453635 mm, oom: introduc... |
530 |
bool ret = true; |
aac453635 mm, oom: introduc... |
531 |
if (!down_read_trylock(&mm->mmap_sem)) { |
422580c3c mm/oom_kill.c: ad... |
532 |
trace_skip_task_reaping(tsk->pid); |
af5679fbc mm, oom: remove o... |
533 |
return false; |
4d4bbd852 mm, oom_reaper: s... |
534 535 536 |
} /* |
212925802 mm: oom: let oom_... |
537 538 539 540 |
* MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't * work on the mm anymore. The check for MMF_OOM_SKIP must run * under mmap_sem for reading because it serializes against the * down_write();up_write() cycle in exit_mmap(). |
e5e3f4c4f mm, oom_reaper: m... |
541 |
*/ |
212925802 mm: oom: let oom_... |
542 |
if (test_bit(MMF_OOM_SKIP, &mm->flags)) { |
422580c3c mm/oom_kill.c: ad... |
543 |
trace_skip_task_reaping(tsk->pid); |
431f42fdf mm/oom_kill.c: cl... |
544 |
goto out_unlock; |
aac453635 mm, oom: introduc... |
545 |
} |
422580c3c mm/oom_kill.c: ad... |
546 |
trace_start_task_reaping(tsk->pid); |
93065ac75 mm, oom: distingu... |
547 |
/* failed to reap part of the address space. Try again later */ |
431f42fdf mm/oom_kill.c: cl... |
548 549 550 |
ret = __oom_reap_task_mm(mm); if (!ret) goto out_finish; |
aac453635 mm, oom: introduc... |
551 |
|
bc448e897 mm, oom_reaper: r... |
552 553 554 555 556 557 |
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB ", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); |
431f42fdf mm/oom_kill.c: cl... |
558 559 560 |
out_finish: trace_finish_task_reaping(tsk->pid); out_unlock: |
aac453635 mm, oom: introduc... |
561 |
up_read(&mm->mmap_sem); |
36324a990 oom: clear TIF_ME... |
562 |
|
aac453635 mm, oom: introduc... |
563 564 |
return ret; } |
bc448e897 mm, oom_reaper: r... |
565 |
#define MAX_OOM_REAP_RETRIES 10 |
36324a990 oom: clear TIF_ME... |
566 |
static void oom_reap_task(struct task_struct *tsk) |
aac453635 mm, oom: introduc... |
567 568 |
{ int attempts = 0; |
26db62f17 oom: keep mm of t... |
569 |
struct mm_struct *mm = tsk->signal->oom_mm; |
aac453635 mm, oom: introduc... |
570 571 |
/* Retry the down_read_trylock(mmap_sem) a few times */ |
27ae357fa mm, oom: fix conc... |
572 |
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) |
aac453635 mm, oom: introduc... |
573 |
schedule_timeout_idle(HZ/10); |
97b1255cb mm,oom_reaper: ch... |
574 575 |
if (attempts <= MAX_OOM_REAP_RETRIES || test_bit(MMF_OOM_SKIP, &mm->flags)) |
7ebffa455 mm,oom_reaper: re... |
576 |
goto done; |
11a410d51 mm, oom_reaper: d... |
577 |
|
7ebffa455 mm,oom_reaper: re... |
578 579 580 |
pr_info("oom_reaper: unable to reap pid:%d (%s) ", task_pid_nr(tsk), tsk->comm); |
7ebffa455 mm,oom_reaper: re... |
581 |
debug_show_all_locks(); |
bc448e897 mm, oom_reaper: r... |
582 |
|
7ebffa455 mm,oom_reaper: re... |
583 |
done: |
449d777d7 mm, oom_reaper: c... |
584 |
tsk->oom_reaper_list = NULL; |
449d777d7 mm, oom_reaper: c... |
585 |
|
26db62f17 oom: keep mm of t... |
586 587 588 589 |
/* * Hide this mm from OOM killer because it has been either reaped or * somebody can't call up_write(mmap_sem). */ |
862e3073b mm, oom: get rid ... |
590 |
set_bit(MMF_OOM_SKIP, &mm->flags); |
26db62f17 oom: keep mm of t... |
591 |
|
aac453635 mm, oom: introduc... |
592 |
/* Drop a reference taken by wake_oom_reaper */ |
36324a990 oom: clear TIF_ME... |
593 |
put_task_struct(tsk); |
aac453635 mm, oom: introduc... |
594 595 596 597 598 |
} static int oom_reaper(void *unused) { while (true) { |
03049269d mm, oom_reaper: i... |
599 |
struct task_struct *tsk = NULL; |
aac453635 mm, oom: introduc... |
600 |
|
29c696e1c oom: make oom_rea... |
601 |
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); |
03049269d mm, oom_reaper: i... |
602 |
spin_lock(&oom_reaper_lock); |
29c696e1c oom: make oom_rea... |
603 604 605 |
if (oom_reaper_list != NULL) { tsk = oom_reaper_list; oom_reaper_list = tsk->oom_reaper_list; |
03049269d mm, oom_reaper: i... |
606 607 608 609 610 |
} spin_unlock(&oom_reaper_lock); if (tsk) oom_reap_task(tsk); |
aac453635 mm, oom: introduc... |
611 612 613 614 |
} return 0; } |
7c5f64f84 mm: oom: deduplic... |
615 |
static void wake_oom_reaper(struct task_struct *tsk) |
aac453635 mm, oom: introduc... |
616 |
{ |
7e70ddc33 oom, oom_reaper: ... |
617 618 |
/* mm is already queued? */ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) |
aac453635 mm, oom: introduc... |
619 |
return; |
36324a990 oom: clear TIF_ME... |
620 |
get_task_struct(tsk); |
aac453635 mm, oom: introduc... |
621 |
|
03049269d mm, oom_reaper: i... |
622 |
spin_lock(&oom_reaper_lock); |
29c696e1c oom: make oom_rea... |
623 624 |
tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; |
03049269d mm, oom_reaper: i... |
625 |
spin_unlock(&oom_reaper_lock); |
422580c3c mm/oom_kill.c: ad... |
626 |
trace_wake_reaper(tsk->pid); |
03049269d mm, oom_reaper: i... |
627 |
wake_up(&oom_reaper_wait); |
aac453635 mm, oom: introduc... |
628 629 630 631 632 |
} static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); |
aac453635 mm, oom: introduc... |
633 634 635 |
return 0; } subsys_initcall(oom_init) |
7c5f64f84 mm: oom: deduplic... |
636 637 638 639 640 |
#else static inline void wake_oom_reaper(struct task_struct *tsk) { } #endif /* CONFIG_MMU */ |
aac453635 mm, oom: introduc... |
641 |
|
49550b605 oom: add helpers ... |
642 |
/** |
16e951966 mm: oom_kill: cle... |
643 |
* mark_oom_victim - mark the given task as OOM victim |
49550b605 oom: add helpers ... |
644 |
* @tsk: task to mark |
c32b3cbe0 oom, PM: make OOM... |
645 |
* |
dc56401fc mm: oom_kill: sim... |
646 |
* Has to be called with oom_lock held and never after |
c32b3cbe0 oom, PM: make OOM... |
647 |
* oom has been disabled already. |
26db62f17 oom: keep mm of t... |
648 649 650 |
* * tsk->mm has to be non NULL and caller has to guarantee it is stable (either * under task_lock or operate on the current). |
49550b605 oom: add helpers ... |
651 |
*/ |
7c5f64f84 mm: oom: deduplic... |
652 |
static void mark_oom_victim(struct task_struct *tsk) |
49550b605 oom: add helpers ... |
653 |
{ |
26db62f17 oom: keep mm of t... |
654 |
struct mm_struct *mm = tsk->mm; |
c32b3cbe0 oom, PM: make OOM... |
655 656 657 658 |
WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; |
26db62f17 oom: keep mm of t... |
659 |
|
26db62f17 oom: keep mm of t... |
660 |
/* oom_mm is bound to the signal struct life time. */ |
4837fe37a mm, oom_reaper: f... |
661 |
if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { |
f1f100764 mm: add new mmgra... |
662 |
mmgrab(tsk->signal->oom_mm); |
4837fe37a mm, oom_reaper: f... |
663 664 |
set_bit(MMF_OOM_VICTIM, &mm->flags); } |
26db62f17 oom: keep mm of t... |
665 |
|
63a8ca9b2 oom: thaw the OOM... |
666 667 668 669 670 671 672 |
/* * Make sure that the task is woken up from uninterruptible sleep * if it is frozen because OOM killer wouldn't be able to free * any memory and livelock. freezing_slow_path will tell the freezer * that TIF_MEMDIE tasks should be ignored. */ __thaw_task(tsk); |
c32b3cbe0 oom, PM: make OOM... |
673 |
atomic_inc(&oom_victims); |
422580c3c mm/oom_kill.c: ad... |
674 |
trace_mark_victim(tsk->pid); |
49550b605 oom: add helpers ... |
675 676 677 |
} /** |
16e951966 mm: oom_kill: cle... |
678 |
* exit_oom_victim - note the exit of an OOM victim |
49550b605 oom: add helpers ... |
679 |
*/ |
38531201c mm, oom: enforce ... |
680 |
void exit_oom_victim(void) |
49550b605 oom: add helpers ... |
681 |
{ |
38531201c mm, oom: enforce ... |
682 |
clear_thread_flag(TIF_MEMDIE); |
c32b3cbe0 oom, PM: make OOM... |
683 |
|
c38f1025f mm: oom_kill: gen... |
684 |
if (!atomic_dec_return(&oom_victims)) |
c32b3cbe0 oom, PM: make OOM... |
685 |
wake_up_all(&oom_victims_wait); |
c32b3cbe0 oom, PM: make OOM... |
686 687 688 |
} /** |
7d2e7a22c oom, suspend: fix... |
689 690 691 692 693 |
* oom_killer_enable - enable OOM killer */ void oom_killer_enable(void) { oom_killer_disabled = false; |
d75da004c oom: improve oom ... |
694 695 |
pr_info("OOM killer enabled. "); |
7d2e7a22c oom, suspend: fix... |
696 697 698 |
} /** |
c32b3cbe0 oom, PM: make OOM... |
699 |
* oom_killer_disable - disable OOM killer |
7d2e7a22c oom, suspend: fix... |
700 |
* @timeout: maximum timeout to wait for oom victims in jiffies |
c32b3cbe0 oom, PM: make OOM... |
701 702 |
* * Forces all page allocations to fail rather than trigger OOM killer. |
7d2e7a22c oom, suspend: fix... |
703 704 |
* Will block and wait until all OOM victims are killed or the given * timeout expires. |
c32b3cbe0 oom, PM: make OOM... |
705 706 707 708 709 710 711 712 |
* * The function cannot be called when there are runnable user tasks because * the userspace would see unexpected allocation failures as a result. Any * new usage of this function should be consulted with MM people. * * Returns true if successful and false if the OOM killer cannot be * disabled. */ |
7d2e7a22c oom, suspend: fix... |
713 |
bool oom_killer_disable(signed long timeout) |
c32b3cbe0 oom, PM: make OOM... |
714 |
{ |
7d2e7a22c oom, suspend: fix... |
715 |
signed long ret; |
c32b3cbe0 oom, PM: make OOM... |
716 |
/* |
6afcf2895 mm,oom: make oom_... |
717 718 |
* Make sure to not race with an ongoing OOM killer. Check that the * current is not killed (possibly due to sharing the victim's memory). |
c32b3cbe0 oom, PM: make OOM... |
719 |
*/ |
6afcf2895 mm,oom: make oom_... |
720 |
if (mutex_lock_killable(&oom_lock)) |
c32b3cbe0 oom, PM: make OOM... |
721 |
return false; |
c32b3cbe0 oom, PM: make OOM... |
722 |
oom_killer_disabled = true; |
dc56401fc mm: oom_kill: sim... |
723 |
mutex_unlock(&oom_lock); |
c32b3cbe0 oom, PM: make OOM... |
724 |
|
7d2e7a22c oom, suspend: fix... |
725 726 727 728 729 730 |
ret = wait_event_interruptible_timeout(oom_victims_wait, !atomic_read(&oom_victims), timeout); if (ret <= 0) { oom_killer_enable(); return false; } |
d75da004c oom: improve oom ... |
731 732 |
pr_info("OOM killer disabled. "); |
c32b3cbe0 oom, PM: make OOM... |
733 734 735 |
return true; } |
1af8bb432 mm, oom: fortify ... |
736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 |
static inline bool __task_will_free_mem(struct task_struct *task) { struct signal_struct *sig = task->signal; /* * A coredumping process may sleep for an extended period in exit_mm(), * so the oom killer cannot assume that the process will promptly exit * and release memory. */ if (sig->flags & SIGNAL_GROUP_COREDUMP) return false; if (sig->flags & SIGNAL_GROUP_EXIT) return true; if (thread_group_empty(task) && (task->flags & PF_EXITING)) return true; return false; } /* * Checks whether the given task is dying or exiting and likely to * release its address space. This means that all threads and processes * sharing the same mm have to be killed or exiting. |
091f362c5 mm, oom: tighten ... |
761 762 |
* Caller has to make sure that task->mm is stable (hold task_lock or * it operates on the current). |
1af8bb432 mm, oom: fortify ... |
763 |
*/ |
7c5f64f84 mm: oom: deduplic... |
764 |
static bool task_will_free_mem(struct task_struct *task) |
1af8bb432 mm, oom: fortify ... |
765 |
{ |
091f362c5 mm, oom: tighten ... |
766 |
struct mm_struct *mm = task->mm; |
1af8bb432 mm, oom: fortify ... |
767 |
struct task_struct *p; |
f33e6f067 mm, oom: fix unin... |
768 |
bool ret = true; |
1af8bb432 mm, oom: fortify ... |
769 |
|
1af8bb432 mm, oom: fortify ... |
770 |
/* |
091f362c5 mm, oom: tighten ... |
771 772 773 |
* Skip tasks without mm because it might have passed its exit_mm and * exit_oom_victim. oom_reaper could have rescued that but do not rely * on that for now. We can consider find_lock_task_mm in future. |
1af8bb432 mm, oom: fortify ... |
774 |
*/ |
091f362c5 mm, oom: tighten ... |
775 |
if (!mm) |
1af8bb432 mm, oom: fortify ... |
776 |
return false; |
091f362c5 mm, oom: tighten ... |
777 778 |
if (!__task_will_free_mem(task)) return false; |
696453e66 mm, oom: task_wil... |
779 780 781 782 783 |
/* * This task has already been drained by the oom reaper so there are * only small chances it will free some more */ |
862e3073b mm, oom: get rid ... |
784 |
if (test_bit(MMF_OOM_SKIP, &mm->flags)) |
696453e66 mm, oom: task_wil... |
785 |
return false; |
696453e66 mm, oom: task_wil... |
786 |
|
091f362c5 mm, oom: tighten ... |
787 |
if (atomic_read(&mm->mm_users) <= 1) |
1af8bb432 mm, oom: fortify ... |
788 |
return true; |
1af8bb432 mm, oom: fortify ... |
789 790 |
/* |
5870c2e1d mm/oom_kill.c: fi... |
791 792 793 |
* Make sure that all tasks which share the mm with the given tasks * are dying as well to make sure that a) nobody pins its mm and * b) the task is also reapable by the oom reaper. |
1af8bb432 mm, oom: fortify ... |
794 795 796 797 798 799 800 801 802 803 804 805 |
*/ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(task, p)) continue; ret = __task_will_free_mem(p); if (!ret) break; } rcu_read_unlock(); |
1af8bb432 mm, oom: fortify ... |
806 807 808 |
return ret; } |
5989ad7b5 mm, oom: refactor... |
809 |
static void __oom_kill_process(struct task_struct *victim) |
1da177e4c Linux-2.6.12-rc2 |
810 |
{ |
5989ad7b5 mm, oom: refactor... |
811 |
struct task_struct *p; |
647f2bdf4 mm, oom: fold oom... |
812 |
struct mm_struct *mm; |
bb29902a7 oom, oom_reaper: ... |
813 |
bool can_oom_reap = true; |
1da177e4c Linux-2.6.12-rc2 |
814 |
|
6b0c81b3b mm, oom: reduce d... |
815 816 |
p = find_lock_task_mm(victim); if (!p) { |
6b0c81b3b mm, oom: reduce d... |
817 |
put_task_struct(victim); |
647f2bdf4 mm, oom: fold oom... |
818 |
return; |
6b0c81b3b mm, oom: reduce d... |
819 820 821 822 823 |
} else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } |
647f2bdf4 mm, oom: fold oom... |
824 |
|
880b76893 mm/oom_kill.c: fi... |
825 |
/* Get a reference to safely compare mm after task_unlock(victim) */ |
647f2bdf4 mm, oom: fold oom... |
826 |
mm = victim->mm; |
f1f100764 mm: add new mmgra... |
827 |
mmgrab(mm); |
8e675f7af mm/oom_kill: coun... |
828 829 830 |
/* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); |
fe6bdfc8e mm: fix oom_kill ... |
831 |
memcg_memory_event_mm(mm, MEMCG_OOM_KILL); |
8e675f7af mm/oom_kill: coun... |
832 |
|
426fb5e72 mm/oom_kill.c: re... |
833 |
/* |
cd04ae1e2 mm, oom: do not r... |
834 835 836 |
* We should send SIGKILL before granting access to memory reserves * in order to prevent the OOM victim from depleting the memory * reserves from the user space under its control. |
426fb5e72 mm/oom_kill.c: re... |
837 |
*/ |
40b3b0253 signal: Pass pid ... |
838 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID); |
16e951966 mm: oom_kill: cle... |
839 |
mark_oom_victim(victim); |
eca56ff90 mm, shmem: add in... |
840 841 |
pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB ", |
647f2bdf4 mm, oom: fold oom... |
842 843 |
task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), |
eca56ff90 mm, shmem: add in... |
844 845 |
K(get_mm_counter(victim->mm, MM_FILEPAGES)), K(get_mm_counter(victim->mm, MM_SHMEMPAGES))); |
647f2bdf4 mm, oom: fold oom... |
846 847 848 849 850 851 852 853 854 855 856 |
task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_sem livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ |
4d4048be8 oom_kill: add rcu... |
857 |
rcu_read_lock(); |
c319025a6 mm/oom_kill: clea... |
858 |
for_each_process(p) { |
4d7b3394f mm/oom_kill: fix ... |
859 |
if (!process_shares_mm(p, mm)) |
c319025a6 mm/oom_kill: clea... |
860 861 862 |
continue; if (same_thread_group(p, victim)) continue; |
1b51e65ea oom, oom_reaper: ... |
863 |
if (is_global_init(p)) { |
aac453635 mm, oom: introduc... |
864 |
can_oom_reap = false; |
862e3073b mm, oom: get rid ... |
865 |
set_bit(MMF_OOM_SKIP, &mm->flags); |
a373966d1 mm, oom: hide mm ... |
866 867 868 869 |
pr_info("oom killer %d (%s) has mm pinned by %d (%s) ", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); |
c319025a6 mm/oom_kill: clea... |
870 |
continue; |
aac453635 mm, oom: introduc... |
871 |
} |
1b51e65ea oom, oom_reaper: ... |
872 873 874 875 876 877 |
/* * No use_mm() user needs to read from the userspace so we are * ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; |
40b3b0253 signal: Pass pid ... |
878 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID); |
c319025a6 mm/oom_kill: clea... |
879 |
} |
6b0c81b3b mm, oom: reduce d... |
880 |
rcu_read_unlock(); |
647f2bdf4 mm, oom: fold oom... |
881 |
|
aac453635 mm, oom: introduc... |
882 |
if (can_oom_reap) |
36324a990 oom: clear TIF_ME... |
883 |
wake_oom_reaper(victim); |
aac453635 mm, oom: introduc... |
884 |
|
880b76893 mm/oom_kill.c: fi... |
885 |
mmdrop(mm); |
6b0c81b3b mm, oom: reduce d... |
886 |
put_task_struct(victim); |
1da177e4c Linux-2.6.12-rc2 |
887 |
} |
647f2bdf4 mm, oom: fold oom... |
888 |
#undef K |
1da177e4c Linux-2.6.12-rc2 |
889 |
|
309ed8825 oom: extract pani... |
890 |
/* |
3d8b38eb8 mm, oom: introduc... |
891 892 893 894 895 |
* Kill provided task unless it's secured by setting * oom_score_adj to OOM_SCORE_ADJ_MIN. */ static int oom_kill_memcg_member(struct task_struct *task, void *unused) { |
eed3ca0a6 mm,oom: don't kil... |
896 897 |
if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN && !is_global_init(task)) { |
3d8b38eb8 mm, oom: introduc... |
898 899 900 901 902 |
get_task_struct(task); __oom_kill_process(task); } return 0; } |
5989ad7b5 mm, oom: refactor... |
903 904 905 906 907 908 909 |
static void oom_kill_process(struct oom_control *oc, const char *message) { struct task_struct *p = oc->chosen; unsigned int points = oc->chosen_points; struct task_struct *victim = p; struct task_struct *child; struct task_struct *t; |
3d8b38eb8 mm, oom: introduc... |
910 |
struct mem_cgroup *oom_group; |
5989ad7b5 mm, oom: refactor... |
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 |
unsigned int victim_points = 0; static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); /* * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just give it access to memory reserves * so it can die quickly */ task_lock(p); if (task_will_free_mem(p)) { mark_oom_victim(p); wake_oom_reaper(p); task_unlock(p); put_task_struct(p); return; } task_unlock(p); if (__ratelimit(&oom_rs)) dump_header(oc, p); pr_err("%s: Kill process %d (%s) score %u or sacrifice child ", message, task_pid_nr(p), p->comm, points); /* * If any of p's children has a different mm and is eligible for kill, * the one with the highest oom_badness() score is sacrificed for its * parent. This attempts to lose the minimal amount of work done while * still freeing memory. */ read_lock(&tasklist_lock); |
b6f534ab6 mm, oom: fix use-... |
944 945 946 947 948 949 950 |
/* * The task 'p' might have already exited before reaching here. The * put_task_struct() will free task_struct 'p' while the loop still try * to access the field of 'p', so, get an extra reference. */ get_task_struct(p); |
5989ad7b5 mm, oom: refactor... |
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 |
for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; if (process_shares_mm(child, p->mm)) continue; /* * oom_badness() returns 0 if the thread is unkillable */ child_points = oom_badness(child, oc->memcg, oc->nodemask, oc->totalpages); if (child_points > victim_points) { put_task_struct(victim); victim = child; victim_points = child_points; get_task_struct(victim); } } } |
b6f534ab6 mm, oom: fix use-... |
970 |
put_task_struct(p); |
5989ad7b5 mm, oom: refactor... |
971 |
read_unlock(&tasklist_lock); |
3d8b38eb8 mm, oom: introduc... |
972 973 974 975 976 977 |
/* * Do we need to kill the entire memory cgroup? * Or even one of the ancestor memory cgroups? * Check this out before killing the victim task. */ oom_group = mem_cgroup_get_oom_group(victim, oc->memcg); |
5989ad7b5 mm, oom: refactor... |
978 |
__oom_kill_process(victim); |
3d8b38eb8 mm, oom: introduc... |
979 980 981 982 983 984 985 986 987 |
/* * If necessary, kill all tasks in the selected memory cgroup. */ if (oom_group) { mem_cgroup_print_oom_group(oom_group); mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL); mem_cgroup_put(oom_group); } |
5989ad7b5 mm, oom: refactor... |
988 |
} |
309ed8825 oom: extract pani... |
989 990 991 |
/* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ |
7c5f64f84 mm: oom: deduplic... |
992 993 |
static void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint) |
309ed8825 oom: extract pani... |
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 |
{ if (likely(!sysctl_panic_on_oom)) return; if (sysctl_panic_on_oom != 2) { /* * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ if (constraint != CONSTRAINT_NONE) return; } |
071a4befe mm, oom: do not p... |
1006 |
/* Do not panic for oom kills triggered by sysrq */ |
db2a0dd7a mm/oom_kill.c: in... |
1007 |
if (is_sysrq_oom(oc)) |
071a4befe mm, oom: do not p... |
1008 |
return; |
2a966b77a mm: oom: add memc... |
1009 |
dump_header(oc, NULL); |
309ed8825 oom: extract pani... |
1010 1011 1012 1013 |
panic("Out of memory: %s panic_on_oom is enabled ", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } |
8bc719d3c [PATCH] out of me... |
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 |
static BLOCKING_NOTIFIER_HEAD(oom_notify_list); int register_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(register_oom_notifier); int unregister_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(unregister_oom_notifier); |
1da177e4c Linux-2.6.12-rc2 |
1027 |
/** |
6e0fc46dc mm, oom: organize... |
1028 1029 |
* out_of_memory - kill the "best" process when we run out of memory * @oc: pointer to struct oom_control |
1da177e4c Linux-2.6.12-rc2 |
1030 1031 1032 1033 1034 1035 |
* * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ |
6e0fc46dc mm, oom: organize... |
1036 |
bool out_of_memory(struct oom_control *oc) |
1da177e4c Linux-2.6.12-rc2 |
1037 |
{ |
8bc719d3c [PATCH] out of me... |
1038 |
unsigned long freed = 0; |
e36589323 oom: remove speci... |
1039 |
enum oom_constraint constraint = CONSTRAINT_NONE; |
8bc719d3c [PATCH] out of me... |
1040 |
|
dc56401fc mm: oom_kill: sim... |
1041 1042 |
if (oom_killer_disabled) return false; |
7c5f64f84 mm: oom: deduplic... |
1043 1044 1045 1046 1047 1048 |
if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ return true; } |
1da177e4c Linux-2.6.12-rc2 |
1049 |
|
7b98c2e40 oom: give current... |
1050 |
/* |
9ff4868e3 mm, oom: allow ex... |
1051 1052 1053 |
* If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. |
7b98c2e40 oom: give current... |
1054 |
*/ |
091f362c5 mm, oom: tighten ... |
1055 |
if (task_will_free_mem(current)) { |
16e951966 mm: oom_kill: cle... |
1056 |
mark_oom_victim(current); |
1af8bb432 mm, oom: fortify ... |
1057 |
wake_oom_reaper(current); |
75e8f8b24 mm, oom: remove u... |
1058 |
return true; |
7b98c2e40 oom: give current... |
1059 |
} |
9b0f8b040 [PATCH] Terminate... |
1060 |
/* |
3da88fb3b mm, oom: move GFP... |
1061 1062 1063 1064 1065 |
* The OOM killer does not compensate for IO-less reclaim. * pagefault_out_of_memory lost its gfp context so we have to * make sure exclude 0 mask - all other users should have at least * ___GFP_DIRECT_RECLAIM to get here. */ |
06ad276ac mm, oom: do not e... |
1066 |
if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS)) |
3da88fb3b mm, oom: move GFP... |
1067 1068 1069 |
return true; /* |
9b0f8b040 [PATCH] Terminate... |
1070 |
* Check if there were limitations on the allocation (only relevant for |
7c5f64f84 mm: oom: deduplic... |
1071 |
* NUMA and memcg) that may require different handling. |
9b0f8b040 [PATCH] Terminate... |
1072 |
*/ |
7c5f64f84 mm: oom: deduplic... |
1073 |
constraint = constrained_alloc(oc); |
6e0fc46dc mm, oom: organize... |
1074 1075 |
if (constraint != CONSTRAINT_MEMORY_POLICY) oc->nodemask = NULL; |
2a966b77a mm: oom: add memc... |
1076 |
check_panic_on_oom(oc, constraint); |
0aad4b312 oom: fold __out_o... |
1077 |
|
7c5f64f84 mm: oom: deduplic... |
1078 1079 |
if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && |
121d1ba0a mm, oom: fix pote... |
1080 |
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { |
6b0c81b3b mm, oom: reduce d... |
1081 |
get_task_struct(current); |
7c5f64f84 mm: oom: deduplic... |
1082 1083 |
oc->chosen = current; oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); |
75e8f8b24 mm, oom: remove u... |
1084 |
return true; |
0aad4b312 oom: fold __out_o... |
1085 |
} |
7c5f64f84 mm: oom: deduplic... |
1086 |
select_bad_process(oc); |
3100dab2a mm: memcontrol: p... |
1087 1088 |
/* Found nothing?!?! */ if (!oc->chosen) { |
2a966b77a mm: oom: add memc... |
1089 |
dump_header(oc, NULL); |
3100dab2a mm: memcontrol: p... |
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 |
pr_warn("Out of memory and no killable processes... "); /* * If we got here due to an actual allocation at the * system level, we cannot survive this and will enter * an endless loop in the allocator. Bail out now. */ if (!is_sysrq_oom(oc) && !is_memcg_oom(oc)) panic("System is deadlocked on memory "); |
0aad4b312 oom: fold __out_o... |
1100 |
} |
9bfe5ded0 mm, oom: remove s... |
1101 |
if (oc->chosen && oc->chosen != (void *)-1UL) |
7c5f64f84 mm: oom: deduplic... |
1102 1103 |
oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : "Memory cgroup out of memory"); |
7c5f64f84 mm: oom: deduplic... |
1104 |
return !!oc->chosen; |
c32b3cbe0 oom, PM: make OOM... |
1105 |
} |
e36589323 oom: remove speci... |
1106 1107 |
/* * The pagefault handler calls here because it is out of memory, so kill a |
798fd7569 mm: zap ZONE_OOM_... |
1108 1109 |
* memory-hogging task. If oom_lock is held by somebody else, a parallel oom * killing is already in progress so do nothing. |
e36589323 oom: remove speci... |
1110 1111 1112 |
*/ void pagefault_out_of_memory(void) { |
6e0fc46dc mm, oom: organize... |
1113 1114 1115 |
struct oom_control oc = { .zonelist = NULL, .nodemask = NULL, |
2a966b77a mm: oom: add memc... |
1116 |
.memcg = NULL, |
6e0fc46dc mm, oom: organize... |
1117 1118 |
.gfp_mask = 0, .order = 0, |
6e0fc46dc mm, oom: organize... |
1119 |
}; |
494264208 mm: memcg: handle... |
1120 |
if (mem_cgroup_oom_synchronize(true)) |
dc56401fc mm: oom_kill: sim... |
1121 |
return; |
3812c8c8f mm: memcg: do not... |
1122 |
|
dc56401fc mm: oom_kill: sim... |
1123 1124 |
if (!mutex_trylock(&oom_lock)) return; |
a104808e2 mm: don't emit wa... |
1125 |
out_of_memory(&oc); |
dc56401fc mm: oom_kill: sim... |
1126 |
mutex_unlock(&oom_lock); |
e36589323 oom: remove speci... |
1127 |
} |