Commit c268e9946d7dc30ac4e55cdc3f43c8af1ae8153c
Committed by
Linus Torvalds
1 parent
40d58138f8
Exists in
master
and in
4 other branches
memcg: fix hierarchical reclaim
If root_mem has no children, last_scaned_child is set to root_mem itself. But after some children added to root_mem, mem_cgroup_get_next_node can mem_cgroup_put the root_mem although root_mem has not been mem_cgroup_get. This patch fixes this behavior by: - Set last_scanned_child to NULL if root_mem has no children or DFS search has returned to root_mem itself(root_mem is not a "child" of root_mem). Make mem_cgroup_get_first_node return root_mem in this case. There are no mem_cgroup_get/put for root_mem. - Rename mem_cgroup_get_next_node to __mem_cgroup_get_next_node, and mem_cgroup_get_first_node to mem_cgroup_get_next_node. Make mem_cgroup_hierarchical_reclaim call only new mem_cgroup_get_next_node. Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 36 additions and 32 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -633,7 +633,7 @@ |
633 | 633 | * called with hierarchy_mutex held |
634 | 634 | */ |
635 | 635 | static struct mem_cgroup * |
636 | -mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |
636 | +__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |
637 | 637 | { |
638 | 638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; |
639 | 639 | |
640 | 640 | |
641 | 641 | |
... | ... | @@ -644,19 +644,16 @@ |
644 | 644 | /* |
645 | 645 | * Walk down to children |
646 | 646 | */ |
647 | - mem_cgroup_put(curr); | |
648 | 647 | cgroup = list_entry(curr_cgroup->children.next, |
649 | 648 | struct cgroup, sibling); |
650 | 649 | curr = mem_cgroup_from_cont(cgroup); |
651 | - mem_cgroup_get(curr); | |
652 | 650 | goto done; |
653 | 651 | } |
654 | 652 | |
655 | 653 | visit_parent: |
656 | 654 | if (curr_cgroup == root_cgroup) { |
657 | - mem_cgroup_put(curr); | |
658 | - curr = root_mem; | |
659 | - mem_cgroup_get(curr); | |
655 | + /* caller handles NULL case */ | |
656 | + curr = NULL; | |
660 | 657 | goto done; |
661 | 658 | } |
662 | 659 | |
663 | 660 | |
... | ... | @@ -664,11 +661,9 @@ |
664 | 661 | * Goto next sibling |
665 | 662 | */ |
666 | 663 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { |
667 | - mem_cgroup_put(curr); | |
668 | 664 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, |
669 | 665 | sibling); |
670 | 666 | curr = mem_cgroup_from_cont(cgroup); |
671 | - mem_cgroup_get(curr); | |
672 | 667 | goto done; |
673 | 668 | } |
674 | 669 | |
... | ... | @@ -679,7 +674,6 @@ |
679 | 674 | goto visit_parent; |
680 | 675 | |
681 | 676 | done: |
682 | - root_mem->last_scanned_child = curr; | |
683 | 677 | return curr; |
684 | 678 | } |
685 | 679 | |
686 | 680 | |
687 | 681 | |
688 | 682 | |
689 | 683 | |
690 | 684 | |
691 | 685 | |
692 | 686 | |
693 | 687 | |
694 | 688 | |
... | ... | @@ -689,40 +683,46 @@ |
689 | 683 | * that to reclaim free pages from. |
690 | 684 | */ |
691 | 685 | static struct mem_cgroup * |
692 | -mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | |
686 | +mem_cgroup_get_next_node(struct mem_cgroup *root_mem) | |
693 | 687 | { |
694 | 688 | struct cgroup *cgroup; |
695 | - struct mem_cgroup *ret; | |
689 | + struct mem_cgroup *orig, *next; | |
696 | 690 | bool obsolete; |
697 | 691 | |
698 | - obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | |
699 | - | |
700 | 692 | /* |
701 | 693 | * Scan all children under the mem_cgroup mem |
702 | 694 | */ |
703 | 695 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); |
696 | + | |
697 | + orig = root_mem->last_scanned_child; | |
698 | + obsolete = mem_cgroup_is_obsolete(orig); | |
699 | + | |
704 | 700 | if (list_empty(&root_mem->css.cgroup->children)) { |
705 | - ret = root_mem; | |
701 | + /* | |
702 | + * root_mem might have children before and last_scanned_child | |
703 | + * may point to one of them. We put it later. | |
704 | + */ | |
705 | + if (orig) | |
706 | + VM_BUG_ON(!obsolete); | |
707 | + next = NULL; | |
706 | 708 | goto done; |
707 | 709 | } |
708 | 710 | |
709 | - if (!root_mem->last_scanned_child || obsolete) { | |
710 | - | |
711 | - if (obsolete && root_mem->last_scanned_child) | |
712 | - mem_cgroup_put(root_mem->last_scanned_child); | |
713 | - | |
711 | + if (!orig || obsolete) { | |
714 | 712 | cgroup = list_first_entry(&root_mem->css.cgroup->children, |
715 | 713 | struct cgroup, sibling); |
716 | - ret = mem_cgroup_from_cont(cgroup); | |
717 | - mem_cgroup_get(ret); | |
714 | + next = mem_cgroup_from_cont(cgroup); | |
718 | 715 | } else |
719 | - ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, | |
720 | - root_mem); | |
716 | + next = __mem_cgroup_get_next_node(orig, root_mem); | |
721 | 717 | |
722 | 718 | done: |
723 | - root_mem->last_scanned_child = ret; | |
719 | + if (next) | |
720 | + mem_cgroup_get(next); | |
721 | + root_mem->last_scanned_child = next; | |
722 | + if (orig) | |
723 | + mem_cgroup_put(orig); | |
724 | 724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); |
725 | - return ret; | |
725 | + return (next) ? next : root_mem; | |
726 | 726 | } |
727 | 727 | |
728 | 728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) |
729 | 729 | |
730 | 730 | |
... | ... | @@ -780,21 +780,18 @@ |
780 | 780 | if (!root_mem->use_hierarchy) |
781 | 781 | return ret; |
782 | 782 | |
783 | - next_mem = mem_cgroup_get_first_node(root_mem); | |
783 | + next_mem = mem_cgroup_get_next_node(root_mem); | |
784 | 784 | |
785 | 785 | while (next_mem != root_mem) { |
786 | 786 | if (mem_cgroup_is_obsolete(next_mem)) { |
787 | - mem_cgroup_put(next_mem); | |
788 | - next_mem = mem_cgroup_get_first_node(root_mem); | |
787 | + next_mem = mem_cgroup_get_next_node(root_mem); | |
789 | 788 | continue; |
790 | 789 | } |
791 | 790 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
792 | 791 | get_swappiness(next_mem)); |
793 | 792 | if (mem_cgroup_check_under_limit(root_mem)) |
794 | 793 | return 0; |
795 | - mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | |
796 | - next_mem = mem_cgroup_get_next_node(next_mem, root_mem); | |
797 | - mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | |
794 | + next_mem = mem_cgroup_get_next_node(root_mem); | |
798 | 795 | } |
799 | 796 | return ret; |
800 | 797 | } |
... | ... | @@ -2254,7 +2251,14 @@ |
2254 | 2251 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
2255 | 2252 | struct cgroup *cont) |
2256 | 2253 | { |
2257 | - mem_cgroup_put(mem_cgroup_from_cont(cont)); | |
2254 | + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | |
2255 | + struct mem_cgroup *last_scanned_child = mem->last_scanned_child; | |
2256 | + | |
2257 | + if (last_scanned_child) { | |
2258 | + VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); | |
2259 | + mem_cgroup_put(last_scanned_child); | |
2260 | + } | |
2261 | + mem_cgroup_put(mem); | |
2258 | 2262 | } |
2259 | 2263 | |
2260 | 2264 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |