Commit b9c565d5a29a795f970b4a1340393d8fc6722fb9
Committed by
Linus Torvalds
1 parent
d5b69e38f8
Exists in
master
and in
4 other branches
memcg: remove clear_page_cgroup and atomics
Remove clear_page_cgroup: it's an unhelpful helper, see for example how mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it (serious races from that? I'm not sure). Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be atomic: it's always manipulated under lock_page_cgroup, except where force_empty unilaterally reset it to 0 (and how does uncharge's atomic_dec_and_test protect against that?). Simplify this page_cgroup locking: if you've got the lock and the pc is attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to check that pc->page matches page (we're on the way to finding why sometimes it doesn't, but this patch doesn't fix that). Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 43 additions and 63 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -161,8 +161,7 @@ |
161 | 161 | struct list_head lru; /* per cgroup LRU list */ |
162 | 162 | struct page *page; |
163 | 163 | struct mem_cgroup *mem_cgroup; |
164 | - atomic_t ref_cnt; /* Helpful when pages move b/w */ | |
165 | - /* mapped and cached states */ | |
164 | + int ref_cnt; /* cached, mapped, migrating */ | |
166 | 165 | int flags; |
167 | 166 | }; |
168 | 167 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
... | ... | @@ -283,27 +282,6 @@ |
283 | 282 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
284 | 283 | } |
285 | 284 | |
286 | -/* | |
287 | - * Clear page->page_cgroup member under lock_page_cgroup(). | |
288 | - * If given "pc" value is different from one page->page_cgroup, | |
289 | - * page->cgroup is not cleared. | |
290 | - * Returns a value of page->page_cgroup at lock taken. | |
291 | - * A can can detect failure of clearing by following | |
292 | - * clear_page_cgroup(page, pc) == pc | |
293 | - */ | |
294 | -static struct page_cgroup *clear_page_cgroup(struct page *page, | |
295 | - struct page_cgroup *pc) | |
296 | -{ | |
297 | - struct page_cgroup *ret; | |
298 | - /* lock and clear */ | |
299 | - lock_page_cgroup(page); | |
300 | - ret = page_get_page_cgroup(page); | |
301 | - if (likely(ret == pc)) | |
302 | - page_assign_page_cgroup(page, NULL); | |
303 | - unlock_page_cgroup(page); | |
304 | - return ret; | |
305 | -} | |
306 | - | |
307 | 285 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) |
308 | 286 | { |
309 | 287 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
... | ... | @@ -555,15 +533,12 @@ |
555 | 533 | * the page has already been accounted. |
556 | 534 | */ |
557 | 535 | if (pc) { |
558 | - if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { | |
559 | - /* this page is under being uncharged ? */ | |
560 | - unlock_page_cgroup(page); | |
561 | - cpu_relax(); | |
562 | - goto retry; | |
563 | - } else { | |
564 | - unlock_page_cgroup(page); | |
565 | - goto done; | |
566 | - } | |
536 | + VM_BUG_ON(pc->page != page); | |
537 | + VM_BUG_ON(pc->ref_cnt <= 0); | |
538 | + | |
539 | + pc->ref_cnt++; | |
540 | + unlock_page_cgroup(page); | |
541 | + goto done; | |
567 | 542 | } |
568 | 543 | unlock_page_cgroup(page); |
569 | 544 | |
... | ... | @@ -612,7 +587,7 @@ |
612 | 587 | congestion_wait(WRITE, HZ/10); |
613 | 588 | } |
614 | 589 | |
615 | - atomic_set(&pc->ref_cnt, 1); | |
590 | + pc->ref_cnt = 1; | |
616 | 591 | pc->mem_cgroup = mem; |
617 | 592 | pc->page = page; |
618 | 593 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
619 | 594 | |
... | ... | @@ -683,24 +658,24 @@ |
683 | 658 | if (!pc) |
684 | 659 | goto unlock; |
685 | 660 | |
686 | - if (atomic_dec_and_test(&pc->ref_cnt)) { | |
687 | - page = pc->page; | |
688 | - mz = page_cgroup_zoneinfo(pc); | |
689 | - /* | |
690 | - * get page->cgroup and clear it under lock. | |
691 | - * force_empty can drop page->cgroup without checking refcnt. | |
692 | - */ | |
661 | + VM_BUG_ON(pc->page != page); | |
662 | + VM_BUG_ON(pc->ref_cnt <= 0); | |
663 | + | |
664 | + if (--(pc->ref_cnt) == 0) { | |
665 | + page_assign_page_cgroup(page, NULL); | |
693 | 666 | unlock_page_cgroup(page); |
694 | - if (clear_page_cgroup(page, pc) == pc) { | |
695 | - mem = pc->mem_cgroup; | |
696 | - css_put(&mem->css); | |
697 | - res_counter_uncharge(&mem->res, PAGE_SIZE); | |
698 | - spin_lock_irqsave(&mz->lru_lock, flags); | |
699 | - __mem_cgroup_remove_list(pc); | |
700 | - spin_unlock_irqrestore(&mz->lru_lock, flags); | |
701 | - kfree(pc); | |
702 | - } | |
703 | - lock_page_cgroup(page); | |
667 | + | |
668 | + mem = pc->mem_cgroup; | |
669 | + css_put(&mem->css); | |
670 | + res_counter_uncharge(&mem->res, PAGE_SIZE); | |
671 | + | |
672 | + mz = page_cgroup_zoneinfo(pc); | |
673 | + spin_lock_irqsave(&mz->lru_lock, flags); | |
674 | + __mem_cgroup_remove_list(pc); | |
675 | + spin_unlock_irqrestore(&mz->lru_lock, flags); | |
676 | + | |
677 | + kfree(pc); | |
678 | + return; | |
704 | 679 | } |
705 | 680 | |
706 | 681 | unlock: |
707 | 682 | |
708 | 683 | |
... | ... | @@ -714,14 +689,13 @@ |
714 | 689 | int mem_cgroup_prepare_migration(struct page *page) |
715 | 690 | { |
716 | 691 | struct page_cgroup *pc; |
717 | - int ret = 0; | |
718 | 692 | |
719 | 693 | lock_page_cgroup(page); |
720 | 694 | pc = page_get_page_cgroup(page); |
721 | - if (pc && atomic_inc_not_zero(&pc->ref_cnt)) | |
722 | - ret = 1; | |
695 | + if (pc) | |
696 | + pc->ref_cnt++; | |
723 | 697 | unlock_page_cgroup(page); |
724 | - return ret; | |
698 | + return pc != NULL; | |
725 | 699 | } |
726 | 700 | |
727 | 701 | void mem_cgroup_end_migration(struct page *page) |
728 | 702 | |
729 | 703 | |
730 | 704 | |
731 | 705 | |
... | ... | @@ -740,15 +714,17 @@ |
740 | 714 | struct mem_cgroup_per_zone *mz; |
741 | 715 | unsigned long flags; |
742 | 716 | |
743 | -retry: | |
717 | + lock_page_cgroup(page); | |
744 | 718 | pc = page_get_page_cgroup(page); |
745 | - if (!pc) | |
719 | + if (!pc) { | |
720 | + unlock_page_cgroup(page); | |
746 | 721 | return; |
722 | + } | |
747 | 723 | |
748 | - mz = page_cgroup_zoneinfo(pc); | |
749 | - if (clear_page_cgroup(page, pc) != pc) | |
750 | - goto retry; | |
724 | + page_assign_page_cgroup(page, NULL); | |
725 | + unlock_page_cgroup(page); | |
751 | 726 | |
727 | + mz = page_cgroup_zoneinfo(pc); | |
752 | 728 | spin_lock_irqsave(&mz->lru_lock, flags); |
753 | 729 | __mem_cgroup_remove_list(pc); |
754 | 730 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
755 | 731 | |
756 | 732 | |
... | ... | @@ -794,15 +770,19 @@ |
794 | 770 | while (--count && !list_empty(list)) { |
795 | 771 | pc = list_entry(list->prev, struct page_cgroup, lru); |
796 | 772 | page = pc->page; |
797 | - /* Avoid race with charge */ | |
798 | - atomic_set(&pc->ref_cnt, 0); | |
799 | - if (clear_page_cgroup(page, pc) == pc) { | |
773 | + lock_page_cgroup(page); | |
774 | + if (page_get_page_cgroup(page) == pc) { | |
775 | + page_assign_page_cgroup(page, NULL); | |
776 | + unlock_page_cgroup(page); | |
800 | 777 | css_put(&mem->css); |
801 | 778 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
802 | 779 | __mem_cgroup_remove_list(pc); |
803 | 780 | kfree(pc); |
804 | - } else /* being uncharged ? ...do relax */ | |
781 | + } else { | |
782 | + /* racing uncharge: let page go then retry */ | |
783 | + unlock_page_cgroup(page); | |
805 | 784 | break; |
785 | + } | |
806 | 786 | } |
807 | 787 | |
808 | 788 | spin_unlock_irqrestore(&mz->lru_lock, flags); |