Commit dc98df5a1b7be402a0e1c71f1b89ccf249ac15ee
Committed by
Linus Torvalds
1 parent
595f4b694c
Exists in
master
and in
4 other branches
memcg: oom wakeup filter
memcg's oom waitqueue is a system-wide wait_queue (for handling hierarchy.) So, it's better to add custom wake function and do filtering in wake up path. This patch adds a filtering feature for waking up oom-waiters. Hierarchy is properly handled. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 46 additions and 17 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -1293,14 +1293,56 @@ |
1293 | 1293 | static DEFINE_MUTEX(memcg_oom_mutex); |
1294 | 1294 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); |
1295 | 1295 | |
1296 | +struct oom_wait_info { | |
1297 | + struct mem_cgroup *mem; | |
1298 | + wait_queue_t wait; | |
1299 | +}; | |
1300 | + | |
1301 | +static int memcg_oom_wake_function(wait_queue_t *wait, | |
1302 | + unsigned mode, int sync, void *arg) | |
1303 | +{ | |
1304 | + struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg; | |
1305 | + struct oom_wait_info *oom_wait_info; | |
1306 | + | |
1307 | + oom_wait_info = container_of(wait, struct oom_wait_info, wait); | |
1308 | + | |
1309 | + if (oom_wait_info->mem == wake_mem) | |
1310 | + goto wakeup; | |
1311 | + /* if no hierarchy, no match */ | |
1312 | + if (!oom_wait_info->mem->use_hierarchy || !wake_mem->use_hierarchy) | |
1313 | + return 0; | |
1314 | + /* | |
1315 | + * Both of oom_wait_info->mem and wake_mem are stable under us. | |
1316 | + * Then we can use css_is_ancestor without taking care of RCU. | |
1317 | + */ | |
1318 | + if (!css_is_ancestor(&oom_wait_info->mem->css, &wake_mem->css) && | |
1319 | + !css_is_ancestor(&wake_mem->css, &oom_wait_info->mem->css)) | |
1320 | + return 0; | |
1321 | + | |
1322 | +wakeup: | |
1323 | + return autoremove_wake_function(wait, mode, sync, arg); | |
1324 | +} | |
1325 | + | |
1326 | +static void memcg_wakeup_oom(struct mem_cgroup *mem) | |
1327 | +{ | |
1328 | + /* for filtering, pass "mem" as argument. */ | |
1329 | + __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem); | |
1330 | +} | |
1331 | + | |
1296 | 1332 | /* |
1297 | 1333 | * try to call OOM killer. returns false if we should exit memory-reclaim loop. |
1298 | 1334 | */ |
1299 | 1335 | bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) |
1300 | 1336 | { |
1301 | - DEFINE_WAIT(wait); | |
1337 | + struct oom_wait_info owait; | |
1302 | 1338 | bool locked; |
1303 | 1339 | |
1340 | + owait.mem = mem; | |
1341 | + owait.wait.flags = 0; | |
1342 | + owait.wait.func = memcg_oom_wake_function; | |
1343 | + owait.wait.private = current; | |
1344 | + INIT_LIST_HEAD(&owait.wait.task_list); | |
1345 | + | |
1304 | 1346 | /* At first, try to OOM lock hierarchy under mem.*/ |
1305 | 1347 | mutex_lock(&memcg_oom_mutex); |
1306 | 1348 | locked = mem_cgroup_oom_lock(mem); |
1307 | 1349 | |
1308 | 1350 | |
... | ... | @@ -1310,31 +1352,18 @@ |
1310 | 1352 | * under OOM is always welcomed, use TASK_KILLABLE here. |
1311 | 1353 | */ |
1312 | 1354 | if (!locked) |
1313 | - prepare_to_wait(&memcg_oom_waitq, &wait, TASK_KILLABLE); | |
1355 | + prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); | |
1314 | 1356 | mutex_unlock(&memcg_oom_mutex); |
1315 | 1357 | |
1316 | 1358 | if (locked) |
1317 | 1359 | mem_cgroup_out_of_memory(mem, mask); |
1318 | 1360 | else { |
1319 | 1361 | schedule(); |
1320 | - finish_wait(&memcg_oom_waitq, &wait); | |
1362 | + finish_wait(&memcg_oom_waitq, &owait.wait); | |
1321 | 1363 | } |
1322 | 1364 | mutex_lock(&memcg_oom_mutex); |
1323 | 1365 | mem_cgroup_oom_unlock(mem); |
1324 | - /* | |
1325 | - * Here, we use global waitq .....more fine grained waitq ? | |
1326 | - * Assume following hierarchy. | |
1327 | - * A/ | |
1328 | - * 01 | |
1329 | - * 02 | |
1330 | - * assume OOM happens both in A and 01 at the same time. Tthey are | |
1331 | - * mutually exclusive by lock. (kill in 01 helps A.) | |
1332 | - * When we use per memcg waitq, we have to wake up waiters on A and 02 | |
1333 | - * in addtion to waiters on 01. We use global waitq for avoiding mess. | |
1334 | - * It will not be a big problem. | |
1335 | - * (And a task may be moved to other groups while it's waiting for OOM.) | |
1336 | - */ | |
1337 | - wake_up_all(&memcg_oom_waitq); | |
1366 | + memcg_wakeup_oom(mem); | |
1338 | 1367 | mutex_unlock(&memcg_oom_mutex); |
1339 | 1368 | |
1340 | 1369 | if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) |