Commit 072c56c13e1302fcdc39961dc64e76485731ad67
Committed by
Linus Torvalds
1 parent
1ecaab2bd2
Exists in
master
and in
4 other branches
per-zone and reclaim enhancements for memory controller: per-zone-lock for cgroup
Now, lru is per-zone. Then, lru_lock can be (should be) per-zone, too. This patch implementes per-zone lru lock. lru_lock is placed into mem_cgroup_per_zone struct. lock can be accessed by mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone); &mz->lru_lock or mz = page_cgroup_zoneinfo(page_cgroup); &mz->lru_lock Signed-off-by: KAMEZAWA hiroyuki <kmaezawa.hiroyu@jp.fujitsu.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: David Rientjes <rientjes@google.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Paul Menage <menage@google.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 44 additions and 27 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -89,6 +89,10 @@ |
89 | 89 | }; |
90 | 90 | |
91 | 91 | struct mem_cgroup_per_zone { |
92 | + /* | |
93 | + * spin_lock to protect the per cgroup LRU | |
94 | + */ | |
95 | + spinlock_t lru_lock; | |
92 | 96 | struct list_head active_list; |
93 | 97 | struct list_head inactive_list; |
94 | 98 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; |
... | ... | @@ -126,10 +130,7 @@ |
126 | 130 | * per zone LRU lists. |
127 | 131 | */ |
128 | 132 | struct mem_cgroup_lru_info info; |
129 | - /* | |
130 | - * spin_lock to protect the per cgroup LRU | |
131 | - */ | |
132 | - spinlock_t lru_lock; | |
133 | + | |
133 | 134 | unsigned long control_type; /* control RSS or RSS+Pagecache */ |
134 | 135 | int prev_priority; /* for recording reclaim priority */ |
135 | 136 | /* |
136 | 137 | |
137 | 138 | |
... | ... | @@ -409,15 +410,16 @@ |
409 | 410 | */ |
410 | 411 | void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) |
411 | 412 | { |
412 | - struct mem_cgroup *mem; | |
413 | + struct mem_cgroup_per_zone *mz; | |
414 | + unsigned long flags; | |
415 | + | |
413 | 416 | if (!pc) |
414 | 417 | return; |
415 | 418 | |
416 | - mem = pc->mem_cgroup; | |
417 | - | |
418 | - spin_lock(&mem->lru_lock); | |
419 | + mz = page_cgroup_zoneinfo(pc); | |
420 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
419 | 421 | __mem_cgroup_move_lists(pc, active); |
420 | - spin_unlock(&mem->lru_lock); | |
422 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
421 | 423 | } |
422 | 424 | |
423 | 425 | /* |
... | ... | @@ -527,7 +529,7 @@ |
527 | 529 | src = &mz->inactive_list; |
528 | 530 | |
529 | 531 | |
530 | - spin_lock(&mem_cont->lru_lock); | |
532 | + spin_lock(&mz->lru_lock); | |
531 | 533 | scan = 0; |
532 | 534 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
533 | 535 | if (scan >= nr_to_scan) |
... | ... | @@ -557,7 +559,7 @@ |
557 | 559 | } |
558 | 560 | |
559 | 561 | list_splice(&pc_list, src); |
560 | - spin_unlock(&mem_cont->lru_lock); | |
562 | + spin_unlock(&mz->lru_lock); | |
561 | 563 | |
562 | 564 | *scanned = scan; |
563 | 565 | return nr_taken; |
... | ... | @@ -576,6 +578,7 @@ |
576 | 578 | struct page_cgroup *pc; |
577 | 579 | unsigned long flags; |
578 | 580 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
581 | + struct mem_cgroup_per_zone *mz; | |
579 | 582 | |
580 | 583 | /* |
581 | 584 | * Should page_cgroup's go to their own slab? |
582 | 585 | |
... | ... | @@ -677,10 +680,11 @@ |
677 | 680 | goto retry; |
678 | 681 | } |
679 | 682 | |
680 | - spin_lock_irqsave(&mem->lru_lock, flags); | |
683 | + mz = page_cgroup_zoneinfo(pc); | |
684 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
681 | 685 | /* Update statistics vector */ |
682 | 686 | __mem_cgroup_add_list(pc); |
683 | - spin_unlock_irqrestore(&mem->lru_lock, flags); | |
687 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
684 | 688 | |
685 | 689 | done: |
686 | 690 | return 0; |
... | ... | @@ -727,6 +731,7 @@ |
727 | 731 | void mem_cgroup_uncharge(struct page_cgroup *pc) |
728 | 732 | { |
729 | 733 | struct mem_cgroup *mem; |
734 | + struct mem_cgroup_per_zone *mz; | |
730 | 735 | struct page *page; |
731 | 736 | unsigned long flags; |
732 | 737 | |
... | ... | @@ -739,6 +744,7 @@ |
739 | 744 | |
740 | 745 | if (atomic_dec_and_test(&pc->ref_cnt)) { |
741 | 746 | page = pc->page; |
747 | + mz = page_cgroup_zoneinfo(pc); | |
742 | 748 | /* |
743 | 749 | * get page->cgroup and clear it under lock. |
744 | 750 | * force_empty can drop page->cgroup without checking refcnt. |
745 | 751 | |
... | ... | @@ -747,9 +753,9 @@ |
747 | 753 | mem = pc->mem_cgroup; |
748 | 754 | css_put(&mem->css); |
749 | 755 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
750 | - spin_lock_irqsave(&mem->lru_lock, flags); | |
756 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
751 | 757 | __mem_cgroup_remove_list(pc); |
752 | - spin_unlock_irqrestore(&mem->lru_lock, flags); | |
758 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
753 | 759 | kfree(pc); |
754 | 760 | } |
755 | 761 | } |
756 | 762 | |
757 | 763 | |
758 | 764 | |
759 | 765 | |
760 | 766 | |
761 | 767 | |
... | ... | @@ -788,24 +794,29 @@ |
788 | 794 | struct page_cgroup *pc; |
789 | 795 | struct mem_cgroup *mem; |
790 | 796 | unsigned long flags; |
797 | + struct mem_cgroup_per_zone *mz; | |
791 | 798 | retry: |
792 | 799 | pc = page_get_page_cgroup(page); |
793 | 800 | if (!pc) |
794 | 801 | return; |
795 | 802 | mem = pc->mem_cgroup; |
803 | + mz = page_cgroup_zoneinfo(pc); | |
796 | 804 | if (clear_page_cgroup(page, pc) != pc) |
797 | 805 | goto retry; |
806 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
798 | 807 | |
799 | - spin_lock_irqsave(&mem->lru_lock, flags); | |
800 | - | |
801 | 808 | __mem_cgroup_remove_list(pc); |
809 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
810 | + | |
802 | 811 | pc->page = newpage; |
803 | 812 | lock_page_cgroup(newpage); |
804 | 813 | page_assign_page_cgroup(newpage, pc); |
805 | 814 | unlock_page_cgroup(newpage); |
806 | - __mem_cgroup_add_list(pc); | |
807 | 815 | |
808 | - spin_unlock_irqrestore(&mem->lru_lock, flags); | |
816 | + mz = page_cgroup_zoneinfo(pc); | |
817 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
818 | + __mem_cgroup_add_list(pc); | |
819 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
809 | 820 | return; |
810 | 821 | } |
811 | 822 | |
812 | 823 | |
813 | 824 | |
814 | 825 | |
... | ... | @@ -816,18 +827,26 @@ |
816 | 827 | */ |
817 | 828 | #define FORCE_UNCHARGE_BATCH (128) |
818 | 829 | static void |
819 | -mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list) | |
830 | +mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |
831 | + struct mem_cgroup_per_zone *mz, | |
832 | + int active) | |
820 | 833 | { |
821 | 834 | struct page_cgroup *pc; |
822 | 835 | struct page *page; |
823 | 836 | int count; |
824 | 837 | unsigned long flags; |
838 | + struct list_head *list; | |
825 | 839 | |
840 | + if (active) | |
841 | + list = &mz->active_list; | |
842 | + else | |
843 | + list = &mz->inactive_list; | |
844 | + | |
826 | 845 | if (list_empty(list)) |
827 | 846 | return; |
828 | 847 | retry: |
829 | 848 | count = FORCE_UNCHARGE_BATCH; |
830 | - spin_lock_irqsave(&mem->lru_lock, flags); | |
849 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
831 | 850 | |
832 | 851 | while (--count && !list_empty(list)) { |
833 | 852 | pc = list_entry(list->prev, struct page_cgroup, lru); |
... | ... | @@ -842,7 +861,7 @@ |
842 | 861 | } else /* being uncharged ? ...do relax */ |
843 | 862 | break; |
844 | 863 | } |
845 | - spin_unlock_irqrestore(&mem->lru_lock, flags); | |
864 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
846 | 865 | if (!list_empty(list)) { |
847 | 866 | cond_resched(); |
848 | 867 | goto retry; |
849 | 868 | |
... | ... | @@ -873,11 +892,9 @@ |
873 | 892 | struct mem_cgroup_per_zone *mz; |
874 | 893 | mz = mem_cgroup_zoneinfo(mem, node, zid); |
875 | 894 | /* drop all page_cgroup in active_list */ |
876 | - mem_cgroup_force_empty_list(mem, | |
877 | - &mz->active_list); | |
895 | + mem_cgroup_force_empty_list(mem, mz, 1); | |
878 | 896 | /* drop all page_cgroup in inactive_list */ |
879 | - mem_cgroup_force_empty_list(mem, | |
880 | - &mz->inactive_list); | |
897 | + mem_cgroup_force_empty_list(mem, mz, 0); | |
881 | 898 | } |
882 | 899 | } |
883 | 900 | ret = 0; |
... | ... | @@ -1114,6 +1131,7 @@ |
1114 | 1131 | mz = &pn->zoneinfo[zone]; |
1115 | 1132 | INIT_LIST_HEAD(&mz->active_list); |
1116 | 1133 | INIT_LIST_HEAD(&mz->inactive_list); |
1134 | + spin_lock_init(&mz->lru_lock); | |
1117 | 1135 | } |
1118 | 1136 | return 0; |
1119 | 1137 | } |
... | ... | @@ -1143,7 +1161,6 @@ |
1143 | 1161 | |
1144 | 1162 | res_counter_init(&mem->res); |
1145 | 1163 | |
1146 | - spin_lock_init(&mem->lru_lock); | |
1147 | 1164 | mem->control_type = MEM_CGROUP_TYPE_ALL; |
1148 | 1165 | memset(&mem->info, 0, sizeof(mem->info)); |
1149 | 1166 |