Blame view
mm/oom_kill.c
29.7 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 |
/* * linux/mm/oom_kill.c * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... |
a63d83f42 oom: badness heur... |
7 8 |
* Copyright (C) 2010 Google, Inc. * Rewritten by David Rientjes |
1da177e4c Linux-2.6.12-rc2 |
9 10 |
* * The routines in this file are used to kill a process when |
a49335cce [PATCH] cpusets: ... |
11 12 |
* we're seriously out of memory. This gets called from __alloc_pages() * in mm/page_alloc.c when we really run out of memory. |
1da177e4c Linux-2.6.12-rc2 |
13 14 15 16 17 18 |
* * Since we won't call these routines often (on a well-configured * machine) this file will double as a 'coding guide' and a signpost * for newbie kernel hackers. It features several pointers to major * kernel subsystems and hints as to where to find out what things do. */ |
8ac773b4f [PATCH] OOM kille... |
19 |
#include <linux/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
#include <linux/mm.h> |
4e950f6f0 Remove fs.h from ... |
21 |
#include <linux/err.h> |
5a0e3ad6a include cleanup: ... |
22 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
23 |
#include <linux/sched.h> |
6e84f3152 sched/headers: Pr... |
24 |
#include <linux/sched/mm.h> |
f7ccbae45 sched/headers: Pr... |
25 |
#include <linux/sched/coredump.h> |
299300258 sched/headers: Pr... |
26 |
#include <linux/sched/task.h> |
1da177e4c Linux-2.6.12-rc2 |
27 28 29 |
#include <linux/swap.h> #include <linux/timex.h> #include <linux/jiffies.h> |
ef08e3b49 [PATCH] cpusets: ... |
30 |
#include <linux/cpuset.h> |
b95f1b31b mm: Map most file... |
31 |
#include <linux/export.h> |
8bc719d3c [PATCH] out of me... |
32 |
#include <linux/notifier.h> |
c7ba5c9e8 Memory controller... |
33 |
#include <linux/memcontrol.h> |
6f48d0ebd oom: select task ... |
34 |
#include <linux/mempolicy.h> |
5cd9c58fb security: Fix set... |
35 |
#include <linux/security.h> |
edd45544c oom: avoid deferr... |
36 |
#include <linux/ptrace.h> |
f660daac4 oom: thaw threads... |
37 |
#include <linux/freezer.h> |
43d2b1132 tracepoint: add t... |
38 |
#include <linux/ftrace.h> |
dc3f21ead mm, oom: introduc... |
39 |
#include <linux/ratelimit.h> |
aac453635 mm, oom: introduc... |
40 41 |
#include <linux/kthread.h> #include <linux/init.h> |
4d4bbd852 mm, oom_reaper: s... |
42 |
#include <linux/mmu_notifier.h> |
aac453635 mm, oom: introduc... |
43 44 45 |
#include <asm/tlb.h> #include "internal.h" |
43d2b1132 tracepoint: add t... |
46 47 48 |
#define CREATE_TRACE_POINTS #include <trace/events/oom.h> |
1da177e4c Linux-2.6.12-rc2 |
49 |
|
fadd8fbd1 [PATCH] support f... |
50 |
int sysctl_panic_on_oom; |
fe071d7e8 oom: add oom_kill... |
51 |
int sysctl_oom_kill_allocating_task; |
ad915c432 oom: enable oom t... |
52 |
int sysctl_oom_dump_tasks = 1; |
dc56401fc mm: oom_kill: sim... |
53 54 |
DEFINE_MUTEX(oom_lock); |
1da177e4c Linux-2.6.12-rc2 |
55 |
|
6f48d0ebd oom: select task ... |
56 57 58 |
#ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill |
ad9624417 oom_kill: has_int... |
59 |
* @start: task struct of which task to consider |
6f48d0ebd oom: select task ... |
60 61 62 63 64 |
* @mask: nodemask passed to page allocator for mempolicy ooms * * Task eligibility is determined by whether or not a candidate task, @tsk, * shares the same mempolicy nodes as current if it is bound by such a policy * and whether or not it has the same set of allowed cpuset nodes. |
495789a51 oom: make oom_sco... |
65 |
*/ |
ad9624417 oom_kill: has_int... |
66 |
static bool has_intersects_mems_allowed(struct task_struct *start, |
6f48d0ebd oom: select task ... |
67 |
const nodemask_t *mask) |
495789a51 oom: make oom_sco... |
68 |
{ |
ad9624417 oom_kill: has_int... |
69 70 |
struct task_struct *tsk; bool ret = false; |
495789a51 oom: make oom_sco... |
71 |
|
ad9624417 oom_kill: has_int... |
72 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
73 |
for_each_thread(start, tsk) { |
6f48d0ebd oom: select task ... |
74 75 76 77 78 79 80 |
if (mask) { /* * If this is a mempolicy constrained oom, tsk's * cpuset is irrelevant. Only return true if its * mempolicy intersects current, otherwise it may be * needlessly killed. */ |
ad9624417 oom_kill: has_int... |
81 |
ret = mempolicy_nodemask_intersects(tsk, mask); |
6f48d0ebd oom: select task ... |
82 83 84 85 86 |
} else { /* * This is not a mempolicy constrained oom, so only * check the mems of tsk's cpuset. */ |
ad9624417 oom_kill: has_int... |
87 |
ret = cpuset_mems_allowed_intersects(current, tsk); |
6f48d0ebd oom: select task ... |
88 |
} |
ad9624417 oom_kill: has_int... |
89 90 |
if (ret) break; |
1da4db0cd oom_kill: change ... |
91 |
} |
ad9624417 oom_kill: has_int... |
92 |
rcu_read_unlock(); |
df1090a8d oom: cleanup has_... |
93 |
|
ad9624417 oom_kill: has_int... |
94 |
return ret; |
6f48d0ebd oom: select task ... |
95 96 97 98 99 100 |
} #else static bool has_intersects_mems_allowed(struct task_struct *tsk, const nodemask_t *mask) { return true; |
495789a51 oom: make oom_sco... |
101 |
} |
6f48d0ebd oom: select task ... |
102 |
#endif /* CONFIG_NUMA */ |
495789a51 oom: make oom_sco... |
103 |
|
6f48d0ebd oom: select task ... |
104 105 106 107 108 109 |
/* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ |
158e0a2d1 memcg: use find_l... |
110 |
struct task_struct *find_lock_task_mm(struct task_struct *p) |
dd8e8f405 oom: introduce fi... |
111 |
{ |
1da4db0cd oom_kill: change ... |
112 |
struct task_struct *t; |
dd8e8f405 oom: introduce fi... |
113 |
|
4d4048be8 oom_kill: add rcu... |
114 |
rcu_read_lock(); |
1da4db0cd oom_kill: change ... |
115 |
for_each_thread(p, t) { |
dd8e8f405 oom: introduce fi... |
116 117 |
task_lock(t); if (likely(t->mm)) |
4d4048be8 oom_kill: add rcu... |
118 |
goto found; |
dd8e8f405 oom: introduce fi... |
119 |
task_unlock(t); |
1da4db0cd oom_kill: change ... |
120 |
} |
4d4048be8 oom_kill: add rcu... |
121 122 123 |
t = NULL; found: rcu_read_unlock(); |
dd8e8f405 oom: introduce fi... |
124 |
|
4d4048be8 oom_kill: add rcu... |
125 |
return t; |
dd8e8f405 oom: introduce fi... |
126 |
} |
db2a0dd7a mm/oom_kill.c: in... |
127 128 129 130 131 132 133 134 |
/* * order == -1 means the oom kill is required by sysrq, otherwise only * for display purposes. */ static inline bool is_sysrq_oom(struct oom_control *oc) { return oc->order == -1; } |
7c5f64f84 mm: oom: deduplic... |
135 136 137 138 |
static inline bool is_memcg_oom(struct oom_control *oc) { return oc->memcg != NULL; } |
ab290adba oom: make oom_unk... |
139 |
/* return true if the task is not adequate as candidate victim task. */ |
e85bfd3aa oom: filter unkil... |
140 |
static bool oom_unkillable_task(struct task_struct *p, |
2314b42db mm: memcontrol: d... |
141 |
struct mem_cgroup *memcg, const nodemask_t *nodemask) |
ab290adba oom: make oom_unk... |
142 143 144 145 146 147 148 |
{ if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ |
72835c86c mm: unify remaini... |
149 |
if (memcg && !task_in_mem_cgroup(p, memcg)) |
ab290adba oom: make oom_unk... |
150 151 152 153 154 155 156 157 |
return true; /* p may not have freeable memory in nodemask */ if (!has_intersects_mems_allowed(p, nodemask)) return true; return false; } |
1da177e4c Linux-2.6.12-rc2 |
158 |
/** |
a63d83f42 oom: badness heur... |
159 |
* oom_badness - heuristic function to determine which candidate task to kill |
1da177e4c Linux-2.6.12-rc2 |
160 |
* @p: task struct of which task we should calculate |
a63d83f42 oom: badness heur... |
161 |
* @totalpages: total present RAM allowed for page allocation |
1da177e4c Linux-2.6.12-rc2 |
162 |
* |
a63d83f42 oom: badness heur... |
163 164 165 |
* The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. |
1da177e4c Linux-2.6.12-rc2 |
166 |
*/ |
a7f638f99 mm, oom: normaliz... |
167 168 |
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) |
1da177e4c Linux-2.6.12-rc2 |
169 |
{ |
1e11ad8dc mm, oom: fix badn... |
170 |
long points; |
61eafb00d mm, oom: fix and ... |
171 |
long adj; |
28b83c519 oom: move oom_adj... |
172 |
|
72835c86c mm: unify remaini... |
173 |
if (oom_unkillable_task(p, memcg, nodemask)) |
26ebc9849 oom: /proc/<pid>/... |
174 |
return 0; |
1da177e4c Linux-2.6.12-rc2 |
175 |
|
dd8e8f405 oom: introduce fi... |
176 177 |
p = find_lock_task_mm(p); if (!p) |
1da177e4c Linux-2.6.12-rc2 |
178 |
return 0; |
bb8a4b7fd mm, oom_reaper: h... |
179 180 |
/* * Do not even consider tasks which are explicitly marked oom |
b18dc5f29 mm, oom: skip vfo... |
181 182 |
* unkillable or have been already oom reaped or the are in * the middle of vfork |
bb8a4b7fd mm, oom_reaper: h... |
183 |
*/ |
a9c58b907 mm, oom: change t... |
184 |
adj = (long)p->signal->oom_score_adj; |
bb8a4b7fd mm, oom_reaper: h... |
185 |
if (adj == OOM_SCORE_ADJ_MIN || |
862e3073b mm, oom: get rid ... |
186 |
test_bit(MMF_OOM_SKIP, &p->mm->flags) || |
b18dc5f29 mm, oom: skip vfo... |
187 |
in_vfork(p)) { |
5aecc85ab oom: do not kill ... |
188 189 190 |
task_unlock(p); return 0; } |
1da177e4c Linux-2.6.12-rc2 |
191 |
/* |
a63d83f42 oom: badness heur... |
192 |
* The baseline for the badness score is the proportion of RAM that each |
f755a042d oom: use pte page... |
193 |
* task's rss, pagetable and swap space use. |
1da177e4c Linux-2.6.12-rc2 |
194 |
*/ |
dc6c9a35b mm: account pmd p... |
195 196 |
points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm); |
a63d83f42 oom: badness heur... |
197 |
task_unlock(p); |
1da177e4c Linux-2.6.12-rc2 |
198 199 |
/* |
a63d83f42 oom: badness heur... |
200 201 |
* Root processes get 3% bonus, just like the __vm_enough_memory() * implementation used by LSMs. |
1da177e4c Linux-2.6.12-rc2 |
202 |
*/ |
a63d83f42 oom: badness heur... |
203 |
if (has_capability_noaudit(p, CAP_SYS_ADMIN)) |
778c14aff mm, oom: base roo... |
204 |
points -= (points * 3) / 100; |
1da177e4c Linux-2.6.12-rc2 |
205 |
|
61eafb00d mm, oom: fix and ... |
206 207 208 |
/* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; |
1da177e4c Linux-2.6.12-rc2 |
209 |
|
f19e8aa11 oom: always retur... |
210 |
/* |
a7f638f99 mm, oom: normaliz... |
211 212 |
* Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). |
f19e8aa11 oom: always retur... |
213 |
*/ |
1e11ad8dc mm, oom: fix badn... |
214 |
return points > 0 ? points : 1; |
1da177e4c Linux-2.6.12-rc2 |
215 |
} |
7c5f64f84 mm: oom: deduplic... |
216 217 218 219 220 221 |
enum oom_constraint { CONSTRAINT_NONE, CONSTRAINT_CPUSET, CONSTRAINT_MEMORY_POLICY, CONSTRAINT_MEMCG, }; |
1da177e4c Linux-2.6.12-rc2 |
222 |
/* |
9b0f8b040 [PATCH] Terminate... |
223 224 |
* Determine the type of allocation constraint. */ |
7c5f64f84 mm: oom: deduplic... |
225 |
static enum oom_constraint constrained_alloc(struct oom_control *oc) |
4365a5676 oom-kill: fix NUM... |
226 |
{ |
54a6eb5c4 mm: use two zonel... |
227 |
struct zone *zone; |
dd1a239f6 mm: have zonelist... |
228 |
struct zoneref *z; |
6e0fc46dc mm, oom: organize... |
229 |
enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask); |
a63d83f42 oom: badness heur... |
230 231 |
bool cpuset_limited = false; int nid; |
9b0f8b040 [PATCH] Terminate... |
232 |
|
7c5f64f84 mm: oom: deduplic... |
233 234 235 236 |
if (is_memcg_oom(oc)) { oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1; return CONSTRAINT_MEMCG; } |
a63d83f42 oom: badness heur... |
237 |
/* Default to all available memory */ |
7c5f64f84 mm: oom: deduplic... |
238 239 240 241 |
oc->totalpages = totalram_pages + total_swap_pages; if (!IS_ENABLED(CONFIG_NUMA)) return CONSTRAINT_NONE; |
a63d83f42 oom: badness heur... |
242 |
|
6e0fc46dc mm, oom: organize... |
243 |
if (!oc->zonelist) |
a63d83f42 oom: badness heur... |
244 |
return CONSTRAINT_NONE; |
4365a5676 oom-kill: fix NUM... |
245 246 247 248 249 |
/* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ |
6e0fc46dc mm, oom: organize... |
250 |
if (oc->gfp_mask & __GFP_THISNODE) |
4365a5676 oom-kill: fix NUM... |
251 |
return CONSTRAINT_NONE; |
9b0f8b040 [PATCH] Terminate... |
252 |
|
4365a5676 oom-kill: fix NUM... |
253 |
/* |
a63d83f42 oom: badness heur... |
254 255 256 |
* This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). |
4365a5676 oom-kill: fix NUM... |
257 |
*/ |
6e0fc46dc mm, oom: organize... |
258 259 |
if (oc->nodemask && !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { |
7c5f64f84 mm: oom: deduplic... |
260 |
oc->totalpages = total_swap_pages; |
6e0fc46dc mm, oom: organize... |
261 |
for_each_node_mask(nid, *oc->nodemask) |
7c5f64f84 mm: oom: deduplic... |
262 |
oc->totalpages += node_spanned_pages(nid); |
9b0f8b040 [PATCH] Terminate... |
263 |
return CONSTRAINT_MEMORY_POLICY; |
a63d83f42 oom: badness heur... |
264 |
} |
4365a5676 oom-kill: fix NUM... |
265 266 |
/* Check this allocation failure is caused by cpuset's wall function */ |
6e0fc46dc mm, oom: organize... |
267 268 269 |
for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, high_zoneidx, oc->nodemask) if (!cpuset_zone_allowed(zone, oc->gfp_mask)) |
a63d83f42 oom: badness heur... |
270 |
cpuset_limited = true; |
9b0f8b040 [PATCH] Terminate... |
271 |
|
a63d83f42 oom: badness heur... |
272 |
if (cpuset_limited) { |
7c5f64f84 mm: oom: deduplic... |
273 |
oc->totalpages = total_swap_pages; |
a63d83f42 oom: badness heur... |
274 |
for_each_node_mask(nid, cpuset_current_mems_allowed) |
7c5f64f84 mm: oom: deduplic... |
275 |
oc->totalpages += node_spanned_pages(nid); |
a63d83f42 oom: badness heur... |
276 277 |
return CONSTRAINT_CPUSET; } |
9b0f8b040 [PATCH] Terminate... |
278 279 |
return CONSTRAINT_NONE; } |
7c5f64f84 mm: oom: deduplic... |
280 |
static int oom_evaluate_task(struct task_struct *task, void *arg) |
462607ecc mm, oom: introduc... |
281 |
{ |
7c5f64f84 mm: oom: deduplic... |
282 283 |
struct oom_control *oc = arg; unsigned long points; |
6e0fc46dc mm, oom: organize... |
284 |
if (oom_unkillable_task(task, NULL, oc->nodemask)) |
7c5f64f84 mm: oom: deduplic... |
285 |
goto next; |
462607ecc mm, oom: introduc... |
286 287 288 |
/* * This task already has access to memory reserves and is being killed. |
a373966d1 mm, oom: hide mm ... |
289 |
* Don't allow any other task to have access to the reserves unless |
862e3073b mm, oom: get rid ... |
290 |
* the task has MMF_OOM_SKIP because chances that it would release |
a373966d1 mm, oom: hide mm ... |
291 |
* any memory is quite low. |
462607ecc mm, oom: introduc... |
292 |
*/ |
862e3073b mm, oom: get rid ... |
293 294 |
if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) { if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) |
7c5f64f84 mm: oom: deduplic... |
295 296 |
goto next; goto abort; |
a373966d1 mm, oom: hide mm ... |
297 |
} |
462607ecc mm, oom: introduc... |
298 |
|
e1e12d2f3 mm, oom: fix race... |
299 300 301 302 |
/* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ |
7c5f64f84 mm: oom: deduplic... |
303 304 305 306 |
if (oom_task_origin(task)) { points = ULONG_MAX; goto select; } |
e1e12d2f3 mm, oom: fix race... |
307 |
|
7c5f64f84 mm: oom: deduplic... |
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
points = oom_badness(task, NULL, oc->nodemask, oc->totalpages); if (!points || points < oc->chosen_points) goto next; /* Prefer thread group leaders for display purposes */ if (points == oc->chosen_points && thread_group_leader(oc->chosen)) goto next; select: if (oc->chosen) put_task_struct(oc->chosen); get_task_struct(task); oc->chosen = task; oc->chosen_points = points; next: return 0; abort: if (oc->chosen) put_task_struct(oc->chosen); oc->chosen = (void *)-1UL; return 1; |
462607ecc mm, oom: introduc... |
328 |
} |
9b0f8b040 [PATCH] Terminate... |
329 |
/* |
7c5f64f84 mm: oom: deduplic... |
330 331 |
* Simple selection loop. We choose the process with the highest number of * 'points'. In case scan was aborted, oc->chosen is set to -1. |
1da177e4c Linux-2.6.12-rc2 |
332 |
*/ |
7c5f64f84 mm: oom: deduplic... |
333 |
static void select_bad_process(struct oom_control *oc) |
1da177e4c Linux-2.6.12-rc2 |
334 |
{ |
7c5f64f84 mm: oom: deduplic... |
335 336 337 338 |
if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); else { struct task_struct *p; |
d49ad9355 mm, oom: prefer t... |
339 |
|
7c5f64f84 mm: oom: deduplic... |
340 341 342 343 344 |
rcu_read_lock(); for_each_process(p) if (oom_evaluate_task(p, oc)) break; rcu_read_unlock(); |
1da4db0cd oom_kill: change ... |
345 |
} |
972c4ea59 [PATCH] select_ba... |
346 |
|
7c5f64f84 mm: oom: deduplic... |
347 |
oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages; |
1da177e4c Linux-2.6.12-rc2 |
348 349 350 |
} /** |
1b578df02 mm/oom_kill: fix ... |
351 |
* dump_tasks - dump current memory state of all system tasks |
dad7557eb mm: fix kernel-do... |
352 |
* @memcg: current's memory controller, if constrained |
e85bfd3aa oom: filter unkil... |
353 |
* @nodemask: nodemask passed to page allocator for mempolicy ooms |
1b578df02 mm/oom_kill: fix ... |
354 |
* |
e85bfd3aa oom: filter unkil... |
355 356 357 |
* Dumps the current memory state of all eligible tasks. Tasks not in the same * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes * are not shown. |
de34d965a mm, oom: replace ... |
358 359 |
* State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, * swapents, oom_score_adj value, and name. |
fef1bdd68 oom: add sysctl t... |
360 |
*/ |
2314b42db mm: memcontrol: d... |
361 |
static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) |
fef1bdd68 oom: add sysctl t... |
362 |
{ |
c55db9578 oom: dump_tasks u... |
363 364 |
struct task_struct *p; struct task_struct *task; |
fef1bdd68 oom: add sysctl t... |
365 |
|
dc6c9a35b mm: account pmd p... |
366 367 |
pr_info("[ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents oom_score_adj name "); |
6b0c81b3b mm, oom: reduce d... |
368 |
rcu_read_lock(); |
c55db9578 oom: dump_tasks u... |
369 |
for_each_process(p) { |
72835c86c mm: unify remaini... |
370 |
if (oom_unkillable_task(p, memcg, nodemask)) |
b4416d2be oom: do not dump ... |
371 |
continue; |
fef1bdd68 oom: add sysctl t... |
372 |
|
c55db9578 oom: dump_tasks u... |
373 374 |
task = find_lock_task_mm(p); if (!task) { |
6d2661ede oom: fix possible... |
375 |
/* |
74ab7f1d3 oom: improve comm... |
376 377 |
* This is a kthread or all of p's threads have already * detached their mm's. There's no need to report |
c55db9578 oom: dump_tasks u... |
378 |
* them; they can't be oom killed anyway. |
6d2661ede oom: fix possible... |
379 |
*/ |
6d2661ede oom: fix possible... |
380 381 |
continue; } |
c55db9578 oom: dump_tasks u... |
382 |
|
dc6c9a35b mm: account pmd p... |
383 384 |
pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %8lu %5hd %s ", |
078de5f70 userns: Store uid... |
385 386 |
task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), |
e1f56c89b mm: convert mm->n... |
387 |
atomic_long_read(&task->mm->nr_ptes), |
dc6c9a35b mm: account pmd p... |
388 |
mm_nr_pmds(task->mm), |
de34d965a mm, oom: replace ... |
389 |
get_mm_counter(task->mm, MM_SWAPENTS), |
a63d83f42 oom: badness heur... |
390 |
task->signal->oom_score_adj, task->comm); |
c55db9578 oom: dump_tasks u... |
391 392 |
task_unlock(task); } |
6b0c81b3b mm, oom: reduce d... |
393 |
rcu_read_unlock(); |
fef1bdd68 oom: add sysctl t... |
394 |
} |
2a966b77a mm: oom: add memc... |
395 |
static void dump_header(struct oom_control *oc, struct task_struct *p) |
1b604d75b oom: dump stack a... |
396 |
{ |
299c517ad mm, oom: header n... |
397 398 399 400 401 402 403 404 405 |
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=", current->comm, oc->gfp_mask, &oc->gfp_mask); if (oc->nodemask) pr_cont("%*pbl", nodemask_pr_args(oc->nodemask)); else pr_cont("(null)"); pr_cont(", order=%d, oom_score_adj=%hd ", oc->order, current->signal->oom_score_adj); |
9254990fb oom: warn if we g... |
406 407 408 |
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) pr_warn("COMPACTION is disabled!!! "); |
a0795cd41 mm, oom: print sy... |
409 |
|
da39da3a5 mm, oom: remove t... |
410 |
cpuset_print_current_mems_allowed(); |
1b604d75b oom: dump stack a... |
411 |
dump_stack(); |
2a966b77a mm: oom: add memc... |
412 413 |
if (oc->memcg) mem_cgroup_print_oom_info(oc->memcg, p); |
58cf188ed memcg, oom: provi... |
414 |
else |
299c517ad mm, oom: header n... |
415 |
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask); |
1b604d75b oom: dump stack a... |
416 |
if (sysctl_oom_dump_tasks) |
2a966b77a mm: oom: add memc... |
417 |
dump_tasks(oc->memcg, oc->nodemask); |
1b604d75b oom: dump stack a... |
418 |
} |
5695be142 OOM, PM: OOM kill... |
419 |
/* |
c32b3cbe0 oom, PM: make OOM... |
420 |
* Number of OOM victims in flight |
5695be142 OOM, PM: OOM kill... |
421 |
*/ |
c32b3cbe0 oom, PM: make OOM... |
422 423 |
static atomic_t oom_victims = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); |
5695be142 OOM, PM: OOM kill... |
424 |
|
7c5f64f84 mm: oom: deduplic... |
425 |
static bool oom_killer_disabled __read_mostly; |
5695be142 OOM, PM: OOM kill... |
426 |
|
bc448e897 mm, oom_reaper: r... |
427 |
#define K(x) ((x) << (PAGE_SHIFT-10)) |
3ef22dfff oom, oom_reaper: ... |
428 429 430 431 432 433 |
/* * task->mm can be NULL if the task is the exited group leader. So to * determine whether the task is using a particular mm, we examine all the * task's threads: if one of those is using this mm then this task was also * using it. */ |
44a70adec mm, oom_adj: make... |
434 |
bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) |
3ef22dfff oom, oom_reaper: ... |
435 436 437 438 439 440 441 442 443 444 |
{ struct task_struct *t; for_each_thread(p, t) { struct mm_struct *t_mm = READ_ONCE(t->mm); if (t_mm) return t_mm == mm; } return false; } |
aac453635 mm, oom: introduc... |
445 446 447 448 449 450 |
#ifdef CONFIG_MMU /* * OOM Reaper kernel thread which tries to reap the memory used by the OOM * victim (if that is possible) to help the OOM killer to move on. */ static struct task_struct *oom_reaper_th; |
aac453635 mm, oom: introduc... |
451 |
static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); |
29c696e1c oom: make oom_rea... |
452 |
static struct task_struct *oom_reaper_list; |
03049269d mm, oom_reaper: i... |
453 |
static DEFINE_SPINLOCK(oom_reaper_lock); |
2270dfcc4 mm, oom: fix conc... |
454 |
void __oom_reap_task_mm(struct mm_struct *mm) |
aac453635 mm, oom: introduc... |
455 |
{ |
aac453635 mm, oom: introduc... |
456 |
struct vm_area_struct *vma; |
2270dfcc4 mm, oom: fix conc... |
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 |
/* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping * should imply barriers already and the reader would hit a page fault * if it stumbled over a reaped memory. */ set_bit(MMF_UNSTABLE, &mm->flags); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { struct mmu_gather tlb; tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, NULL); tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); } } } static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) { |
aac453635 mm, oom: introduc... |
493 |
bool ret = true; |
36324a990 oom: clear TIF_ME... |
494 |
/* |
e2fe14564 oom_reaper: close... |
495 496 |
* We have to make sure to not race with the victim exit path * and cause premature new oom victim selection: |
2270dfcc4 mm, oom: fix conc... |
497 |
* oom_reap_task_mm exit_mm |
e5e3f4c4f mm, oom_reaper: m... |
498 |
* mmget_not_zero |
e2fe14564 oom_reaper: close... |
499 500 501 502 503 504 505 506 507 508 |
* mmput * atomic_dec_and_test * exit_oom_victim * [...] * out_of_memory * select_bad_process * # no TIF_MEMDIE task selects new victim * unmap_page_range # frees some memory */ mutex_lock(&oom_lock); |
aac453635 mm, oom: introduc... |
509 510 |
if (!down_read_trylock(&mm->mmap_sem)) { ret = false; |
422580c3c mm/oom_kill.c: ad... |
511 |
trace_skip_task_reaping(tsk->pid); |
7ebffa455 mm,oom_reaper: re... |
512 |
goto unlock_oom; |
e5e3f4c4f mm, oom_reaper: m... |
513 514 515 |
} /* |
4d4bbd852 mm, oom_reaper: s... |
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 |
* If the mm has notifiers then we would need to invalidate them around * unmap_page_range and that is risky because notifiers can sleep and * what they do is basically undeterministic. So let's have a short * sleep to give the oom victim some more time. * TODO: we really want to get rid of this ugly hack and make sure that * notifiers cannot block for unbounded amount of time and add * mmu_notifier_invalidate_range_{start,end} around unmap_page_range */ if (mm_has_notifiers(mm)) { up_read(&mm->mmap_sem); schedule_timeout_idle(HZ); goto unlock_oom; } /* |
212925802 mm: oom: let oom_... |
531 532 533 534 |
* MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't * work on the mm anymore. The check for MMF_OOM_SKIP must run * under mmap_sem for reading because it serializes against the * down_write();up_write() cycle in exit_mmap(). |
e5e3f4c4f mm, oom_reaper: m... |
535 |
*/ |
212925802 mm: oom: let oom_... |
536 |
if (test_bit(MMF_OOM_SKIP, &mm->flags)) { |
e5e3f4c4f mm, oom_reaper: m... |
537 |
up_read(&mm->mmap_sem); |
422580c3c mm/oom_kill.c: ad... |
538 |
trace_skip_task_reaping(tsk->pid); |
7ebffa455 mm,oom_reaper: re... |
539 |
goto unlock_oom; |
aac453635 mm, oom: introduc... |
540 |
} |
422580c3c mm/oom_kill.c: ad... |
541 |
trace_start_task_reaping(tsk->pid); |
2270dfcc4 mm, oom: fix conc... |
542 |
__oom_reap_task_mm(mm); |
aac453635 mm, oom: introduc... |
543 |
|
bc448e897 mm, oom_reaper: r... |
544 545 546 547 548 549 |
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB ", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); |
aac453635 mm, oom: introduc... |
550 |
up_read(&mm->mmap_sem); |
36324a990 oom: clear TIF_ME... |
551 |
|
422580c3c mm/oom_kill.c: ad... |
552 |
trace_finish_task_reaping(tsk->pid); |
e5e3f4c4f mm, oom_reaper: m... |
553 554 |
unlock_oom: mutex_unlock(&oom_lock); |
aac453635 mm, oom: introduc... |
555 556 |
return ret; } |
bc448e897 mm, oom_reaper: r... |
557 |
#define MAX_OOM_REAP_RETRIES 10 |
36324a990 oom: clear TIF_ME... |
558 |
static void oom_reap_task(struct task_struct *tsk) |
aac453635 mm, oom: introduc... |
559 560 |
{ int attempts = 0; |
26db62f17 oom: keep mm of t... |
561 |
struct mm_struct *mm = tsk->signal->oom_mm; |
aac453635 mm, oom: introduc... |
562 563 |
/* Retry the down_read_trylock(mmap_sem) a few times */ |
2270dfcc4 mm, oom: fix conc... |
564 |
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) |
aac453635 mm, oom: introduc... |
565 |
schedule_timeout_idle(HZ/10); |
7ebffa455 mm,oom_reaper: re... |
566 567 |
if (attempts <= MAX_OOM_REAP_RETRIES) goto done; |
11a410d51 mm, oom_reaper: d... |
568 |
|
7ebffa455 mm,oom_reaper: re... |
569 570 571 |
pr_info("oom_reaper: unable to reap pid:%d (%s) ", task_pid_nr(tsk), tsk->comm); |
7ebffa455 mm,oom_reaper: re... |
572 |
debug_show_all_locks(); |
bc448e897 mm, oom_reaper: r... |
573 |
|
7ebffa455 mm,oom_reaper: re... |
574 |
done: |
449d777d7 mm, oom_reaper: c... |
575 |
tsk->oom_reaper_list = NULL; |
449d777d7 mm, oom_reaper: c... |
576 |
|
26db62f17 oom: keep mm of t... |
577 578 579 580 |
/* * Hide this mm from OOM killer because it has been either reaped or * somebody can't call up_write(mmap_sem). */ |
862e3073b mm, oom: get rid ... |
581 |
set_bit(MMF_OOM_SKIP, &mm->flags); |
26db62f17 oom: keep mm of t... |
582 |
|
aac453635 mm, oom: introduc... |
583 |
/* Drop a reference taken by wake_oom_reaper */ |
36324a990 oom: clear TIF_ME... |
584 |
put_task_struct(tsk); |
aac453635 mm, oom: introduc... |
585 586 587 588 589 |
} static int oom_reaper(void *unused) { while (true) { |
03049269d mm, oom_reaper: i... |
590 |
struct task_struct *tsk = NULL; |
aac453635 mm, oom: introduc... |
591 |
|
29c696e1c oom: make oom_rea... |
592 |
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); |
03049269d mm, oom_reaper: i... |
593 |
spin_lock(&oom_reaper_lock); |
29c696e1c oom: make oom_rea... |
594 595 596 |
if (oom_reaper_list != NULL) { tsk = oom_reaper_list; oom_reaper_list = tsk->oom_reaper_list; |
03049269d mm, oom_reaper: i... |
597 598 599 600 601 |
} spin_unlock(&oom_reaper_lock); if (tsk) oom_reap_task(tsk); |
aac453635 mm, oom: introduc... |
602 603 604 605 |
} return 0; } |
7c5f64f84 mm: oom: deduplic... |
606 |
static void wake_oom_reaper(struct task_struct *tsk) |
aac453635 mm, oom: introduc... |
607 |
{ |
af8e15cc8 oom, oom_reaper: ... |
608 609 |
if (!oom_reaper_th) return; |
731785481 oom, oom_reaper: ... |
610 611 |
/* mm is already queued? */ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) |
aac453635 mm, oom: introduc... |
612 |
return; |
36324a990 oom: clear TIF_ME... |
613 |
get_task_struct(tsk); |
aac453635 mm, oom: introduc... |
614 |
|
03049269d mm, oom_reaper: i... |
615 |
spin_lock(&oom_reaper_lock); |
29c696e1c oom: make oom_rea... |
616 617 |
tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; |
03049269d mm, oom_reaper: i... |
618 |
spin_unlock(&oom_reaper_lock); |
422580c3c mm/oom_kill.c: ad... |
619 |
trace_wake_reaper(tsk->pid); |
03049269d mm, oom_reaper: i... |
620 |
wake_up(&oom_reaper_wait); |
aac453635 mm, oom: introduc... |
621 622 623 624 625 626 627 628 629 630 631 632 633 634 |
} static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); if (IS_ERR(oom_reaper_th)) { pr_err("Unable to start OOM reaper %ld. Continuing regardless ", PTR_ERR(oom_reaper_th)); oom_reaper_th = NULL; } return 0; } subsys_initcall(oom_init) |
7c5f64f84 mm: oom: deduplic... |
635 636 637 638 639 |
#else static inline void wake_oom_reaper(struct task_struct *tsk) { } #endif /* CONFIG_MMU */ |
aac453635 mm, oom: introduc... |
640 |
|
49550b605 oom: add helpers ... |
641 |
/** |
16e951966 mm: oom_kill: cle... |
642 |
* mark_oom_victim - mark the given task as OOM victim |
49550b605 oom: add helpers ... |
643 |
* @tsk: task to mark |
c32b3cbe0 oom, PM: make OOM... |
644 |
* |
dc56401fc mm: oom_kill: sim... |
645 |
* Has to be called with oom_lock held and never after |
c32b3cbe0 oom, PM: make OOM... |
646 |
* oom has been disabled already. |
26db62f17 oom: keep mm of t... |
647 648 649 |
* * tsk->mm has to be non NULL and caller has to guarantee it is stable (either * under task_lock or operate on the current). |
49550b605 oom: add helpers ... |
650 |
*/ |
7c5f64f84 mm: oom: deduplic... |
651 |
static void mark_oom_victim(struct task_struct *tsk) |
49550b605 oom: add helpers ... |
652 |
{ |
26db62f17 oom: keep mm of t... |
653 |
struct mm_struct *mm = tsk->mm; |
c32b3cbe0 oom, PM: make OOM... |
654 655 656 657 |
WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; |
26db62f17 oom: keep mm of t... |
658 |
|
26db62f17 oom: keep mm of t... |
659 |
/* oom_mm is bound to the signal struct life time. */ |
55fe4698d mm, oom_reaper: f... |
660 |
if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { |
f1f100764 mm: add new mmgra... |
661 |
mmgrab(tsk->signal->oom_mm); |
55fe4698d mm, oom_reaper: f... |
662 663 |
set_bit(MMF_OOM_VICTIM, &mm->flags); } |
26db62f17 oom: keep mm of t... |
664 |
|
63a8ca9b2 oom: thaw the OOM... |
665 666 667 668 669 670 671 |
/* * Make sure that the task is woken up from uninterruptible sleep * if it is frozen because OOM killer wouldn't be able to free * any memory and livelock. freezing_slow_path will tell the freezer * that TIF_MEMDIE tasks should be ignored. */ __thaw_task(tsk); |
c32b3cbe0 oom, PM: make OOM... |
672 |
atomic_inc(&oom_victims); |
422580c3c mm/oom_kill.c: ad... |
673 |
trace_mark_victim(tsk->pid); |
49550b605 oom: add helpers ... |
674 675 676 |
} /** |
16e951966 mm: oom_kill: cle... |
677 |
* exit_oom_victim - note the exit of an OOM victim |
49550b605 oom: add helpers ... |
678 |
*/ |
38531201c mm, oom: enforce ... |
679 |
void exit_oom_victim(void) |
49550b605 oom: add helpers ... |
680 |
{ |
38531201c mm, oom: enforce ... |
681 |
clear_thread_flag(TIF_MEMDIE); |
c32b3cbe0 oom, PM: make OOM... |
682 |
|
c38f1025f mm: oom_kill: gen... |
683 |
if (!atomic_dec_return(&oom_victims)) |
c32b3cbe0 oom, PM: make OOM... |
684 |
wake_up_all(&oom_victims_wait); |
c32b3cbe0 oom, PM: make OOM... |
685 686 687 |
} /** |
7d2e7a22c oom, suspend: fix... |
688 689 690 691 692 |
* oom_killer_enable - enable OOM killer */ void oom_killer_enable(void) { oom_killer_disabled = false; |
d75da004c oom: improve oom ... |
693 694 |
pr_info("OOM killer enabled. "); |
7d2e7a22c oom, suspend: fix... |
695 696 697 |
} /** |
c32b3cbe0 oom, PM: make OOM... |
698 |
* oom_killer_disable - disable OOM killer |
7d2e7a22c oom, suspend: fix... |
699 |
* @timeout: maximum timeout to wait for oom victims in jiffies |
c32b3cbe0 oom, PM: make OOM... |
700 701 |
* * Forces all page allocations to fail rather than trigger OOM killer. |
7d2e7a22c oom, suspend: fix... |
702 703 |
* Will block and wait until all OOM victims are killed or the given * timeout expires. |
c32b3cbe0 oom, PM: make OOM... |
704 705 706 707 708 709 710 711 |
* * The function cannot be called when there are runnable user tasks because * the userspace would see unexpected allocation failures as a result. Any * new usage of this function should be consulted with MM people. * * Returns true if successful and false if the OOM killer cannot be * disabled. */ |
7d2e7a22c oom, suspend: fix... |
712 |
bool oom_killer_disable(signed long timeout) |
c32b3cbe0 oom, PM: make OOM... |
713 |
{ |
7d2e7a22c oom, suspend: fix... |
714 |
signed long ret; |
c32b3cbe0 oom, PM: make OOM... |
715 |
/* |
6afcf2895 mm,oom: make oom_... |
716 717 |
* Make sure to not race with an ongoing OOM killer. Check that the * current is not killed (possibly due to sharing the victim's memory). |
c32b3cbe0 oom, PM: make OOM... |
718 |
*/ |
6afcf2895 mm,oom: make oom_... |
719 |
if (mutex_lock_killable(&oom_lock)) |
c32b3cbe0 oom, PM: make OOM... |
720 |
return false; |
c32b3cbe0 oom, PM: make OOM... |
721 |
oom_killer_disabled = true; |
dc56401fc mm: oom_kill: sim... |
722 |
mutex_unlock(&oom_lock); |
c32b3cbe0 oom, PM: make OOM... |
723 |
|
7d2e7a22c oom, suspend: fix... |
724 725 726 727 728 729 |
ret = wait_event_interruptible_timeout(oom_victims_wait, !atomic_read(&oom_victims), timeout); if (ret <= 0) { oom_killer_enable(); return false; } |
d75da004c oom: improve oom ... |
730 731 |
pr_info("OOM killer disabled. "); |
c32b3cbe0 oom, PM: make OOM... |
732 733 734 |
return true; } |
1af8bb432 mm, oom: fortify ... |
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 |
static inline bool __task_will_free_mem(struct task_struct *task) { struct signal_struct *sig = task->signal; /* * A coredumping process may sleep for an extended period in exit_mm(), * so the oom killer cannot assume that the process will promptly exit * and release memory. */ if (sig->flags & SIGNAL_GROUP_COREDUMP) return false; if (sig->flags & SIGNAL_GROUP_EXIT) return true; if (thread_group_empty(task) && (task->flags & PF_EXITING)) return true; return false; } /* * Checks whether the given task is dying or exiting and likely to * release its address space. This means that all threads and processes * sharing the same mm have to be killed or exiting. |
091f362c5 mm, oom: tighten ... |
760 761 |
* Caller has to make sure that task->mm is stable (hold task_lock or * it operates on the current). |
1af8bb432 mm, oom: fortify ... |
762 |
*/ |
7c5f64f84 mm: oom: deduplic... |
763 |
static bool task_will_free_mem(struct task_struct *task) |
1af8bb432 mm, oom: fortify ... |
764 |
{ |
091f362c5 mm, oom: tighten ... |
765 |
struct mm_struct *mm = task->mm; |
1af8bb432 mm, oom: fortify ... |
766 |
struct task_struct *p; |
f33e6f067 mm, oom: fix unin... |
767 |
bool ret = true; |
1af8bb432 mm, oom: fortify ... |
768 |
|
1af8bb432 mm, oom: fortify ... |
769 |
/* |
091f362c5 mm, oom: tighten ... |
770 771 772 |
* Skip tasks without mm because it might have passed its exit_mm and * exit_oom_victim. oom_reaper could have rescued that but do not rely * on that for now. We can consider find_lock_task_mm in future. |
1af8bb432 mm, oom: fortify ... |
773 |
*/ |
091f362c5 mm, oom: tighten ... |
774 |
if (!mm) |
1af8bb432 mm, oom: fortify ... |
775 |
return false; |
091f362c5 mm, oom: tighten ... |
776 777 |
if (!__task_will_free_mem(task)) return false; |
696453e66 mm, oom: task_wil... |
778 779 780 781 782 |
/* * This task has already been drained by the oom reaper so there are * only small chances it will free some more */ |
862e3073b mm, oom: get rid ... |
783 |
if (test_bit(MMF_OOM_SKIP, &mm->flags)) |
696453e66 mm, oom: task_wil... |
784 |
return false; |
696453e66 mm, oom: task_wil... |
785 |
|
091f362c5 mm, oom: tighten ... |
786 |
if (atomic_read(&mm->mm_users) <= 1) |
1af8bb432 mm, oom: fortify ... |
787 |
return true; |
1af8bb432 mm, oom: fortify ... |
788 789 |
/* |
5870c2e1d mm/oom_kill.c: fi... |
790 791 792 |
* Make sure that all tasks which share the mm with the given tasks * are dying as well to make sure that a) nobody pins its mm and * b) the task is also reapable by the oom reaper. |
1af8bb432 mm, oom: fortify ... |
793 794 795 796 797 798 799 800 801 802 803 804 |
*/ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(task, p)) continue; ret = __task_will_free_mem(p); if (!ret) break; } rcu_read_unlock(); |
1af8bb432 mm, oom: fortify ... |
805 806 807 |
return ret; } |
7c5f64f84 mm: oom: deduplic... |
808 |
static void oom_kill_process(struct oom_control *oc, const char *message) |
1da177e4c Linux-2.6.12-rc2 |
809 |
{ |
7c5f64f84 mm: oom: deduplic... |
810 811 |
struct task_struct *p = oc->chosen; unsigned int points = oc->chosen_points; |
52d3c0367 Revert "oom: oom_... |
812 |
struct task_struct *victim = p; |
5e9d834a0 oom: sacrifice ch... |
813 |
struct task_struct *child; |
1da4db0cd oom_kill: change ... |
814 |
struct task_struct *t; |
647f2bdf4 mm, oom: fold oom... |
815 |
struct mm_struct *mm; |
52d3c0367 Revert "oom: oom_... |
816 |
unsigned int victim_points = 0; |
dc3f21ead mm, oom: introduc... |
817 818 |
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
bb29902a7 oom, oom_reaper: ... |
819 |
bool can_oom_reap = true; |
1da177e4c Linux-2.6.12-rc2 |
820 |
|
50ec3bbff [PATCH] oom: hand... |
821 822 |
/* * If the task is already exiting, don't alarm the sysadmin or kill |
cd04ae1e2 mm, oom: do not r... |
823 824 |
* its children or threads, just give it access to memory reserves * so it can die quickly |
50ec3bbff [PATCH] oom: hand... |
825 |
*/ |
091f362c5 mm, oom: tighten ... |
826 |
task_lock(p); |
1af8bb432 mm, oom: fortify ... |
827 |
if (task_will_free_mem(p)) { |
16e951966 mm: oom_kill: cle... |
828 |
mark_oom_victim(p); |
1af8bb432 mm, oom: fortify ... |
829 |
wake_oom_reaper(p); |
091f362c5 mm, oom: tighten ... |
830 |
task_unlock(p); |
6b0c81b3b mm, oom: reduce d... |
831 |
put_task_struct(p); |
2a1c9b1fc mm, oom: avoid lo... |
832 |
return; |
50ec3bbff [PATCH] oom: hand... |
833 |
} |
091f362c5 mm, oom: tighten ... |
834 |
task_unlock(p); |
50ec3bbff [PATCH] oom: hand... |
835 |
|
dc3f21ead mm, oom: introduc... |
836 |
if (__ratelimit(&oom_rs)) |
2a966b77a mm: oom: add memc... |
837 |
dump_header(oc, p); |
8447d950e mm, oom: do not e... |
838 |
|
f0d6647e8 mm/oom_kill.c: pr... |
839 840 |
pr_err("%s: Kill process %d (%s) score %u or sacrifice child ", |
5e9d834a0 oom: sacrifice ch... |
841 |
message, task_pid_nr(p), p->comm, points); |
f3af38d30 [PATCH] oom: clea... |
842 |
|
5e9d834a0 oom: sacrifice ch... |
843 844 |
/* * If any of p's children has a different mm and is eligible for kill, |
11239836c oom: remove refer... |
845 |
* the one with the highest oom_badness() score is sacrificed for its |
5e9d834a0 oom: sacrifice ch... |
846 847 848 |
* parent. This attempts to lose the minimal amount of work done while * still freeing memory. */ |
6b0c81b3b mm, oom: reduce d... |
849 |
read_lock(&tasklist_lock); |
43f7e8bea mm, oom: fix use-... |
850 851 852 853 854 855 856 |
/* * The task 'p' might have already exited before reaching here. The * put_task_struct() will free task_struct 'p' while the loop still try * to access the field of 'p', so, get an extra reference. */ get_task_struct(p); |
1da4db0cd oom_kill: change ... |
857 |
for_each_thread(p, t) { |
5e9d834a0 oom: sacrifice ch... |
858 |
list_for_each_entry(child, &t->children, sibling) { |
a63d83f42 oom: badness heur... |
859 |
unsigned int child_points; |
5e9d834a0 oom: sacrifice ch... |
860 |
|
4d7b3394f mm/oom_kill: fix ... |
861 |
if (process_shares_mm(child, p->mm)) |
edd45544c oom: avoid deferr... |
862 |
continue; |
a63d83f42 oom: badness heur... |
863 864 865 |
/* * oom_badness() returns 0 if the thread is unkillable */ |
2a966b77a mm: oom: add memc... |
866 |
child_points = oom_badness(child, |
7c5f64f84 mm: oom: deduplic... |
867 |
oc->memcg, oc->nodemask, oc->totalpages); |
5e9d834a0 oom: sacrifice ch... |
868 |
if (child_points > victim_points) { |
6b0c81b3b mm, oom: reduce d... |
869 |
put_task_struct(victim); |
5e9d834a0 oom: sacrifice ch... |
870 871 |
victim = child; victim_points = child_points; |
6b0c81b3b mm, oom: reduce d... |
872 |
get_task_struct(victim); |
5e9d834a0 oom: sacrifice ch... |
873 |
} |
dd8e8f405 oom: introduce fi... |
874 |
} |
1da4db0cd oom_kill: change ... |
875 |
} |
43f7e8bea mm, oom: fix use-... |
876 |
put_task_struct(p); |
6b0c81b3b mm, oom: reduce d... |
877 |
read_unlock(&tasklist_lock); |
dd8e8f405 oom: introduce fi... |
878 |
|
6b0c81b3b mm, oom: reduce d... |
879 880 |
p = find_lock_task_mm(victim); if (!p) { |
6b0c81b3b mm, oom: reduce d... |
881 |
put_task_struct(victim); |
647f2bdf4 mm, oom: fold oom... |
882 |
return; |
6b0c81b3b mm, oom: reduce d... |
883 884 885 886 887 |
} else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } |
647f2bdf4 mm, oom: fold oom... |
888 |
|
880b76893 mm/oom_kill.c: fi... |
889 |
/* Get a reference to safely compare mm after task_unlock(victim) */ |
647f2bdf4 mm, oom: fold oom... |
890 |
mm = victim->mm; |
f1f100764 mm: add new mmgra... |
891 |
mmgrab(mm); |
8e675f7af mm/oom_kill: coun... |
892 893 894 895 |
/* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); count_memcg_event_mm(mm, OOM_KILL); |
426fb5e72 mm/oom_kill.c: re... |
896 |
/* |
cd04ae1e2 mm, oom: do not r... |
897 898 899 |
* We should send SIGKILL before granting access to memory reserves * in order to prevent the OOM victim from depleting the memory * reserves from the user space under its control. |
426fb5e72 mm/oom_kill.c: re... |
900 901 |
*/ do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); |
16e951966 mm: oom_kill: cle... |
902 |
mark_oom_victim(victim); |
eca56ff90 mm, shmem: add in... |
903 904 |
pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB ", |
647f2bdf4 mm, oom: fold oom... |
905 906 |
task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), |
eca56ff90 mm, shmem: add in... |
907 908 |
K(get_mm_counter(victim->mm, MM_FILEPAGES)), K(get_mm_counter(victim->mm, MM_SHMEMPAGES))); |
647f2bdf4 mm, oom: fold oom... |
909 910 911 912 913 914 915 916 917 918 919 |
task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_sem livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ |
4d4048be8 oom_kill: add rcu... |
920 |
rcu_read_lock(); |
c319025a6 mm/oom_kill: clea... |
921 |
for_each_process(p) { |
4d7b3394f mm/oom_kill: fix ... |
922 |
if (!process_shares_mm(p, mm)) |
c319025a6 mm/oom_kill: clea... |
923 924 925 |
continue; if (same_thread_group(p, victim)) continue; |
1b51e65ea oom, oom_reaper: ... |
926 |
if (is_global_init(p)) { |
aac453635 mm, oom: introduc... |
927 |
can_oom_reap = false; |
862e3073b mm, oom: get rid ... |
928 |
set_bit(MMF_OOM_SKIP, &mm->flags); |
a373966d1 mm, oom: hide mm ... |
929 930 931 932 |
pr_info("oom killer %d (%s) has mm pinned by %d (%s) ", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); |
c319025a6 mm/oom_kill: clea... |
933 |
continue; |
aac453635 mm, oom: introduc... |
934 |
} |
1b51e65ea oom, oom_reaper: ... |
935 936 937 938 939 940 |
/* * No use_mm() user needs to read from the userspace so we are * ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; |
c319025a6 mm/oom_kill: clea... |
941 942 |
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); } |
6b0c81b3b mm, oom: reduce d... |
943 |
rcu_read_unlock(); |
647f2bdf4 mm, oom: fold oom... |
944 |
|
aac453635 mm, oom: introduc... |
945 |
if (can_oom_reap) |
36324a990 oom: clear TIF_ME... |
946 |
wake_oom_reaper(victim); |
aac453635 mm, oom: introduc... |
947 |
|
880b76893 mm/oom_kill.c: fi... |
948 |
mmdrop(mm); |
6b0c81b3b mm, oom: reduce d... |
949 |
put_task_struct(victim); |
1da177e4c Linux-2.6.12-rc2 |
950 |
} |
647f2bdf4 mm, oom: fold oom... |
951 |
#undef K |
1da177e4c Linux-2.6.12-rc2 |
952 |
|
309ed8825 oom: extract pani... |
953 954 955 |
/* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ |
7c5f64f84 mm: oom: deduplic... |
956 957 |
static void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint) |
309ed8825 oom: extract pani... |
958 959 960 961 962 963 964 965 966 967 968 969 |
{ if (likely(!sysctl_panic_on_oom)) return; if (sysctl_panic_on_oom != 2) { /* * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ if (constraint != CONSTRAINT_NONE) return; } |
071a4befe mm, oom: do not p... |
970 |
/* Do not panic for oom kills triggered by sysrq */ |
db2a0dd7a mm/oom_kill.c: in... |
971 |
if (is_sysrq_oom(oc)) |
071a4befe mm, oom: do not p... |
972 |
return; |
2a966b77a mm: oom: add memc... |
973 |
dump_header(oc, NULL); |
309ed8825 oom: extract pani... |
974 975 976 977 |
panic("Out of memory: %s panic_on_oom is enabled ", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } |
8bc719d3c [PATCH] out of me... |
978 979 980 981 982 983 984 985 986 987 988 989 990 |
static BLOCKING_NOTIFIER_HEAD(oom_notify_list); int register_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(register_oom_notifier); int unregister_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(unregister_oom_notifier); |
1da177e4c Linux-2.6.12-rc2 |
991 |
/** |
6e0fc46dc mm, oom: organize... |
992 993 |
* out_of_memory - kill the "best" process when we run out of memory * @oc: pointer to struct oom_control |
1da177e4c Linux-2.6.12-rc2 |
994 995 996 997 998 999 |
* * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ |
6e0fc46dc mm, oom: organize... |
1000 |
bool out_of_memory(struct oom_control *oc) |
1da177e4c Linux-2.6.12-rc2 |
1001 |
{ |
8bc719d3c [PATCH] out of me... |
1002 |
unsigned long freed = 0; |
e36589323 oom: remove speci... |
1003 |
enum oom_constraint constraint = CONSTRAINT_NONE; |
8bc719d3c [PATCH] out of me... |
1004 |
|
dc56401fc mm: oom_kill: sim... |
1005 1006 |
if (oom_killer_disabled) return false; |
7c5f64f84 mm: oom: deduplic... |
1007 1008 1009 1010 1011 1012 |
if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ return true; } |
1da177e4c Linux-2.6.12-rc2 |
1013 |
|
7b98c2e40 oom: give current... |
1014 |
/* |
9ff4868e3 mm, oom: allow ex... |
1015 1016 1017 |
* If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. |
7b98c2e40 oom: give current... |
1018 |
*/ |
091f362c5 mm, oom: tighten ... |
1019 |
if (task_will_free_mem(current)) { |
16e951966 mm: oom_kill: cle... |
1020 |
mark_oom_victim(current); |
1af8bb432 mm, oom: fortify ... |
1021 |
wake_oom_reaper(current); |
75e8f8b24 mm, oom: remove u... |
1022 |
return true; |
7b98c2e40 oom: give current... |
1023 |
} |
9b0f8b040 [PATCH] Terminate... |
1024 |
/* |
3da88fb3b mm, oom: move GFP... |
1025 1026 1027 1028 1029 |
* The OOM killer does not compensate for IO-less reclaim. * pagefault_out_of_memory lost its gfp context so we have to * make sure exclude 0 mask - all other users should have at least * ___GFP_DIRECT_RECLAIM to get here. */ |
06ad276ac mm, oom: do not e... |
1030 |
if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS)) |
3da88fb3b mm, oom: move GFP... |
1031 1032 1033 |
return true; /* |
9b0f8b040 [PATCH] Terminate... |
1034 |
* Check if there were limitations on the allocation (only relevant for |
7c5f64f84 mm: oom: deduplic... |
1035 |
* NUMA and memcg) that may require different handling. |
9b0f8b040 [PATCH] Terminate... |
1036 |
*/ |
7c5f64f84 mm: oom: deduplic... |
1037 |
constraint = constrained_alloc(oc); |
6e0fc46dc mm, oom: organize... |
1038 1039 |
if (constraint != CONSTRAINT_MEMORY_POLICY) oc->nodemask = NULL; |
2a966b77a mm: oom: add memc... |
1040 |
check_panic_on_oom(oc, constraint); |
0aad4b312 oom: fold __out_o... |
1041 |
|
7c5f64f84 mm: oom: deduplic... |
1042 1043 |
if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && |
121d1ba0a mm, oom: fix pote... |
1044 |
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { |
6b0c81b3b mm, oom: reduce d... |
1045 |
get_task_struct(current); |
7c5f64f84 mm: oom: deduplic... |
1046 1047 |
oc->chosen = current; oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); |
75e8f8b24 mm, oom: remove u... |
1048 |
return true; |
0aad4b312 oom: fold __out_o... |
1049 |
} |
7c5f64f84 mm: oom: deduplic... |
1050 |
select_bad_process(oc); |
0aad4b312 oom: fold __out_o... |
1051 |
/* Found nothing?!?! Either we hang forever, or we panic. */ |
7c5f64f84 mm: oom: deduplic... |
1052 |
if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) { |
2a966b77a mm: oom: add memc... |
1053 |
dump_header(oc, NULL); |
0aad4b312 oom: fold __out_o... |
1054 1055 1056 |
panic("Out of memory and no killable processes... "); } |
7c5f64f84 mm: oom: deduplic... |
1057 1058 1059 |
if (oc->chosen && oc->chosen != (void *)-1UL) { oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : "Memory cgroup out of memory"); |
75e8f8b24 mm, oom: remove u... |
1060 1061 1062 1063 |
/* * Give the killed process a good chance to exit before trying * to allocate memory again. */ |
4f774b912 mm, oom: do not s... |
1064 |
schedule_timeout_killable(1); |
75e8f8b24 mm, oom: remove u... |
1065 |
} |
7c5f64f84 mm: oom: deduplic... |
1066 |
return !!oc->chosen; |
c32b3cbe0 oom, PM: make OOM... |
1067 |
} |
e36589323 oom: remove speci... |
1068 1069 |
/* * The pagefault handler calls here because it is out of memory, so kill a |
798fd7569 mm: zap ZONE_OOM_... |
1070 1071 |
* memory-hogging task. If oom_lock is held by somebody else, a parallel oom * killing is already in progress so do nothing. |
e36589323 oom: remove speci... |
1072 1073 1074 |
*/ void pagefault_out_of_memory(void) { |
6e0fc46dc mm, oom: organize... |
1075 1076 1077 |
struct oom_control oc = { .zonelist = NULL, .nodemask = NULL, |
2a966b77a mm: oom: add memc... |
1078 |
.memcg = NULL, |
6e0fc46dc mm, oom: organize... |
1079 1080 |
.gfp_mask = 0, .order = 0, |
6e0fc46dc mm, oom: organize... |
1081 |
}; |
494264208 mm: memcg: handle... |
1082 |
if (mem_cgroup_oom_synchronize(true)) |
dc56401fc mm: oom_kill: sim... |
1083 |
return; |
3812c8c8f mm: memcg: do not... |
1084 |
|
dc56401fc mm: oom_kill: sim... |
1085 1086 |
if (!mutex_trylock(&oom_lock)) return; |
a104808e2 mm: don't emit wa... |
1087 |
out_of_memory(&oc); |
dc56401fc mm: oom_kill: sim... |
1088 |
mutex_unlock(&oom_lock); |
e36589323 oom: remove speci... |
1089 |
} |