Commit 7d74b06f240f1bd1b4b68dd6fe84164d8bf4e315
Committed by
Linus Torvalds
1 parent
32047e2a85
Exists in
master
and in
4 other branches
memcg: use for_each_mem_cgroup
In memory cgroup management, we sometimes have to walk through subhierarchy of cgroup to gather informaiton, or lock something, etc. Now, to do that, mem_cgroup_walk_tree() function is provided. It calls given callback function per cgroup found. But the bad thing is that it has to pass a fixed style function and argument, "void*" and it adds much type casting to memcontrol.c. To make the code clean, this patch replaces walk_tree() with for_each_mem_cgroup_tree(iter, root) An iterator style call. The good point is that iterator call doesn't have to assume what kind of function is called under it. A bad point is that it may cause reference-count leak if a caller use "break" from the loop by mistake. I think the benefit is larger. The modified code seems straigtforward and easy to read because we don't have misterious callbacks and pointer cast. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 83 additions and 87 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -660,41 +660,58 @@ |
660 | 660 | return mem; |
661 | 661 | } |
662 | 662 | |
663 | -/* | |
664 | - * Call callback function against all cgroup under hierarchy tree. | |
665 | - */ | |
666 | -static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | |
667 | - int (*func)(struct mem_cgroup *, void *)) | |
663 | +/* The caller has to guarantee "mem" exists before calling this */ | |
664 | +static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) | |
668 | 665 | { |
669 | - int found, ret, nextid; | |
666 | + if (mem && css_tryget(&mem->css)) | |
667 | + return mem; | |
668 | + return NULL; | |
669 | +} | |
670 | + | |
671 | +static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, | |
672 | + struct mem_cgroup *root, | |
673 | + bool cond) | |
674 | +{ | |
675 | + int nextid = css_id(&iter->css) + 1; | |
676 | + int found; | |
677 | + int hierarchy_used; | |
670 | 678 | struct cgroup_subsys_state *css; |
671 | - struct mem_cgroup *mem; | |
672 | 679 | |
673 | - if (!root->use_hierarchy) | |
674 | - return (*func)(root, data); | |
680 | + hierarchy_used = iter->use_hierarchy; | |
675 | 681 | |
676 | - nextid = 1; | |
677 | - do { | |
678 | - ret = 0; | |
679 | - mem = NULL; | |
682 | + css_put(&iter->css); | |
683 | + if (!cond || !hierarchy_used) | |
684 | + return NULL; | |
680 | 685 | |
686 | + do { | |
687 | + iter = NULL; | |
681 | 688 | rcu_read_lock(); |
682 | - css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, | |
683 | - &found); | |
689 | + | |
690 | + css = css_get_next(&mem_cgroup_subsys, nextid, | |
691 | + &root->css, &found); | |
684 | 692 | if (css && css_tryget(css)) |
685 | - mem = container_of(css, struct mem_cgroup, css); | |
693 | + iter = container_of(css, struct mem_cgroup, css); | |
686 | 694 | rcu_read_unlock(); |
687 | - | |
688 | - if (mem) { | |
689 | - ret = (*func)(mem, data); | |
690 | - css_put(&mem->css); | |
691 | - } | |
695 | + /* If css is NULL, no more cgroups will be found */ | |
692 | 696 | nextid = found + 1; |
693 | - } while (!ret && css); | |
697 | + } while (css && !iter); | |
694 | 698 | |
695 | - return ret; | |
699 | + return iter; | |
696 | 700 | } |
701 | +/* | |
702 | + * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please | |
703 | + * be careful that "break" loop is not allowed. We have reference count. | |
704 | + * Instead of that modify "cond" to be false and "continue" to exit the loop. | |
705 | + */ | |
706 | +#define for_each_mem_cgroup_tree_cond(iter, root, cond) \ | |
707 | + for (iter = mem_cgroup_start_loop(root);\ | |
708 | + iter != NULL;\ | |
709 | + iter = mem_cgroup_get_next(iter, root, cond)) | |
697 | 710 | |
711 | +#define for_each_mem_cgroup_tree(iter, root) \ | |
712 | + for_each_mem_cgroup_tree_cond(iter, root, true) | |
713 | + | |
714 | + | |
698 | 715 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) |
699 | 716 | { |
700 | 717 | return (mem == root_mem_cgroup); |
... | ... | @@ -1132,13 +1149,6 @@ |
1132 | 1149 | return false; |
1133 | 1150 | } |
1134 | 1151 | |
1135 | -static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) | |
1136 | -{ | |
1137 | - int *val = data; | |
1138 | - (*val)++; | |
1139 | - return 0; | |
1140 | -} | |
1141 | - | |
1142 | 1152 | /** |
1143 | 1153 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. |
1144 | 1154 | * @memcg: The memory cgroup that went over limit |
... | ... | @@ -1213,7 +1223,10 @@ |
1213 | 1223 | static int mem_cgroup_count_children(struct mem_cgroup *mem) |
1214 | 1224 | { |
1215 | 1225 | int num = 0; |
1216 | - mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); | |
1226 | + struct mem_cgroup *iter; | |
1227 | + | |
1228 | + for_each_mem_cgroup_tree(iter, mem) | |
1229 | + num++; | |
1217 | 1230 | return num; |
1218 | 1231 | } |
1219 | 1232 | |
1220 | 1233 | |
1221 | 1234 | |
1222 | 1235 | |
1223 | 1236 | |
1224 | 1237 | |
1225 | 1238 | |
... | ... | @@ -1362,49 +1375,39 @@ |
1362 | 1375 | return total; |
1363 | 1376 | } |
1364 | 1377 | |
1365 | -static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data) | |
1366 | -{ | |
1367 | - int *val = (int *)data; | |
1368 | - int x; | |
1369 | - /* | |
1370 | - * Logically, we can stop scanning immediately when we find | |
1371 | - * a memcg is already locked. But condidering unlock ops and | |
1372 | - * creation/removal of memcg, scan-all is simple operation. | |
1373 | - */ | |
1374 | - x = atomic_inc_return(&mem->oom_lock); | |
1375 | - *val = max(x, *val); | |
1376 | - return 0; | |
1377 | -} | |
1378 | 1378 | /* |
1379 | 1379 | * Check OOM-Killer is already running under our hierarchy. |
1380 | 1380 | * If someone is running, return false. |
1381 | 1381 | */ |
1382 | 1382 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) |
1383 | 1383 | { |
1384 | - int lock_count = 0; | |
1384 | + int x, lock_count = 0; | |
1385 | + struct mem_cgroup *iter; | |
1385 | 1386 | |
1386 | - mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb); | |
1387 | + for_each_mem_cgroup_tree(iter, mem) { | |
1388 | + x = atomic_inc_return(&iter->oom_lock); | |
1389 | + lock_count = max(x, lock_count); | |
1390 | + } | |
1387 | 1391 | |
1388 | 1392 | if (lock_count == 1) |
1389 | 1393 | return true; |
1390 | 1394 | return false; |
1391 | 1395 | } |
1392 | 1396 | |
1393 | -static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data) | |
1397 | +static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) | |
1394 | 1398 | { |
1399 | + struct mem_cgroup *iter; | |
1400 | + | |
1395 | 1401 | /* |
1396 | 1402 | * When a new child is created while the hierarchy is under oom, |
1397 | 1403 | * mem_cgroup_oom_lock() may not be called. We have to use |
1398 | 1404 | * atomic_add_unless() here. |
1399 | 1405 | */ |
1400 | - atomic_add_unless(&mem->oom_lock, -1, 0); | |
1406 | + for_each_mem_cgroup_tree(iter, mem) | |
1407 | + atomic_add_unless(&iter->oom_lock, -1, 0); | |
1401 | 1408 | return 0; |
1402 | 1409 | } |
1403 | 1410 | |
1404 | -static void mem_cgroup_oom_unlock(struct mem_cgroup *mem) | |
1405 | -{ | |
1406 | - mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb); | |
1407 | -} | |
1408 | 1411 | |
1409 | 1412 | static DEFINE_MUTEX(memcg_oom_mutex); |
1410 | 1413 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); |
1411 | 1414 | |
1412 | 1415 | |
1413 | 1416 | |
1414 | 1417 | |
... | ... | @@ -3207,33 +3210,25 @@ |
3207 | 3210 | return retval; |
3208 | 3211 | } |
3209 | 3212 | |
3210 | -struct mem_cgroup_idx_data { | |
3211 | - s64 val; | |
3212 | - enum mem_cgroup_stat_index idx; | |
3213 | -}; | |
3214 | 3213 | |
3215 | -static int | |
3216 | -mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | |
3214 | +static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | |
3215 | + enum mem_cgroup_stat_index idx) | |
3217 | 3216 | { |
3218 | - struct mem_cgroup_idx_data *d = data; | |
3219 | - d->val += mem_cgroup_read_stat(mem, d->idx); | |
3220 | - return 0; | |
3221 | -} | |
3217 | + struct mem_cgroup *iter; | |
3218 | + s64 val = 0; | |
3222 | 3219 | |
3223 | -static void | |
3224 | -mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | |
3225 | - enum mem_cgroup_stat_index idx, s64 *val) | |
3226 | -{ | |
3227 | - struct mem_cgroup_idx_data d; | |
3228 | - d.idx = idx; | |
3229 | - d.val = 0; | |
3230 | - mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); | |
3231 | - *val = d.val; | |
3220 | + /* each per cpu's value can be minus.Then, use s64 */ | |
3221 | + for_each_mem_cgroup_tree(iter, mem) | |
3222 | + val += mem_cgroup_read_stat(iter, idx); | |
3223 | + | |
3224 | + if (val < 0) /* race ? */ | |
3225 | + val = 0; | |
3226 | + return val; | |
3232 | 3227 | } |
3233 | 3228 | |
3234 | 3229 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) |
3235 | 3230 | { |
3236 | - u64 idx_val, val; | |
3231 | + u64 val; | |
3237 | 3232 | |
3238 | 3233 | if (!mem_cgroup_is_root(mem)) { |
3239 | 3234 | if (!swap) |
3240 | 3235 | |
... | ... | @@ -3242,16 +3237,12 @@ |
3242 | 3237 | return res_counter_read_u64(&mem->memsw, RES_USAGE); |
3243 | 3238 | } |
3244 | 3239 | |
3245 | - mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val); | |
3246 | - val = idx_val; | |
3247 | - mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val); | |
3248 | - val += idx_val; | |
3240 | + val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); | |
3241 | + val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); | |
3249 | 3242 | |
3250 | - if (swap) { | |
3251 | - mem_cgroup_get_recursive_idx_stat(mem, | |
3252 | - MEM_CGROUP_STAT_SWAPOUT, &idx_val); | |
3253 | - val += idx_val; | |
3254 | - } | |
3243 | + if (swap) | |
3244 | + val += mem_cgroup_get_recursive_idx_stat(mem, | |
3245 | + MEM_CGROUP_STAT_SWAPOUT); | |
3255 | 3246 | |
3256 | 3247 | return val << PAGE_SHIFT; |
3257 | 3248 | } |
3258 | 3249 | |
... | ... | @@ -3459,9 +3450,9 @@ |
3459 | 3450 | }; |
3460 | 3451 | |
3461 | 3452 | |
3462 | -static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |
3453 | +static void | |
3454 | +mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | |
3463 | 3455 | { |
3464 | - struct mcs_total_stat *s = data; | |
3465 | 3456 | s64 val; |
3466 | 3457 | |
3467 | 3458 | /* per cpu stat */ |
3468 | 3459 | |
... | ... | @@ -3491,13 +3482,15 @@ |
3491 | 3482 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; |
3492 | 3483 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); |
3493 | 3484 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; |
3494 | - return 0; | |
3495 | 3485 | } |
3496 | 3486 | |
3497 | 3487 | static void |
3498 | 3488 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) |
3499 | 3489 | { |
3500 | - mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); | |
3490 | + struct mem_cgroup *iter; | |
3491 | + | |
3492 | + for_each_mem_cgroup_tree(iter, mem) | |
3493 | + mem_cgroup_get_local_stat(iter, s); | |
3501 | 3494 | } |
3502 | 3495 | |
3503 | 3496 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, |
... | ... | @@ -3674,7 +3667,7 @@ |
3674 | 3667 | return _a->threshold - _b->threshold; |
3675 | 3668 | } |
3676 | 3669 | |
3677 | -static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | |
3670 | +static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) | |
3678 | 3671 | { |
3679 | 3672 | struct mem_cgroup_eventfd_list *ev; |
3680 | 3673 | |
... | ... | @@ -3685,7 +3678,10 @@ |
3685 | 3678 | |
3686 | 3679 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) |
3687 | 3680 | { |
3688 | - mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb); | |
3681 | + struct mem_cgroup *iter; | |
3682 | + | |
3683 | + for_each_mem_cgroup_tree(iter, mem) | |
3684 | + mem_cgroup_oom_notify_cb(iter); | |
3689 | 3685 | } |
3690 | 3686 | |
3691 | 3687 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, |