Commit b9c565d5a29a795f970b4a1340393d8fc6722fb9

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent d5b69e38f8

memcg: remove clear_page_cgroup and atomics

Remove clear_page_cgroup: it's an unhelpful helper, see for example how
mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it
(serious races from that?  I'm not sure).

Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be
atomic: it's always manipulated under lock_page_cgroup, except where
force_empty unilaterally reset it to 0 (and how does uncharge's
atomic_dec_and_test protect against that?).

Simplify this page_cgroup locking: if you've got the lock and the pc is
attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to
check that pc->page matches page (we're on the way to finding why sometimes it
doesn't, but this patch doesn't fix that).

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 43 additions and 63 deletions Side-by-side Diff

... ... @@ -161,8 +161,7 @@
161 161 struct list_head lru; /* per cgroup LRU list */
162 162 struct page *page;
163 163 struct mem_cgroup *mem_cgroup;
164   - atomic_t ref_cnt; /* Helpful when pages move b/w */
165   - /* mapped and cached states */
  164 + int ref_cnt; /* cached, mapped, migrating */
166 165 int flags;
167 166 };
168 167 #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
... ... @@ -283,27 +282,6 @@
283 282 bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
284 283 }
285 284  
286   -/*
287   - * Clear page->page_cgroup member under lock_page_cgroup().
288   - * If given "pc" value is different from one page->page_cgroup,
289   - * page->cgroup is not cleared.
290   - * Returns a value of page->page_cgroup at lock taken.
291   - * A can can detect failure of clearing by following
292   - * clear_page_cgroup(page, pc) == pc
293   - */
294   -static struct page_cgroup *clear_page_cgroup(struct page *page,
295   - struct page_cgroup *pc)
296   -{
297   - struct page_cgroup *ret;
298   - /* lock and clear */
299   - lock_page_cgroup(page);
300   - ret = page_get_page_cgroup(page);
301   - if (likely(ret == pc))
302   - page_assign_page_cgroup(page, NULL);
303   - unlock_page_cgroup(page);
304   - return ret;
305   -}
306   -
307 285 static void __mem_cgroup_remove_list(struct page_cgroup *pc)
308 286 {
309 287 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
... ... @@ -555,15 +533,12 @@
555 533 * the page has already been accounted.
556 534 */
557 535 if (pc) {
558   - if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
559   - /* this page is under being uncharged ? */
560   - unlock_page_cgroup(page);
561   - cpu_relax();
562   - goto retry;
563   - } else {
564   - unlock_page_cgroup(page);
565   - goto done;
566   - }
  536 + VM_BUG_ON(pc->page != page);
  537 + VM_BUG_ON(pc->ref_cnt <= 0);
  538 +
  539 + pc->ref_cnt++;
  540 + unlock_page_cgroup(page);
  541 + goto done;
567 542 }
568 543 unlock_page_cgroup(page);
569 544  
... ... @@ -612,7 +587,7 @@
612 587 congestion_wait(WRITE, HZ/10);
613 588 }
614 589  
615   - atomic_set(&pc->ref_cnt, 1);
  590 + pc->ref_cnt = 1;
616 591 pc->mem_cgroup = mem;
617 592 pc->page = page;
618 593 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
619 594  
... ... @@ -683,24 +658,24 @@
683 658 if (!pc)
684 659 goto unlock;
685 660  
686   - if (atomic_dec_and_test(&pc->ref_cnt)) {
687   - page = pc->page;
688   - mz = page_cgroup_zoneinfo(pc);
689   - /*
690   - * get page->cgroup and clear it under lock.
691   - * force_empty can drop page->cgroup without checking refcnt.
692   - */
  661 + VM_BUG_ON(pc->page != page);
  662 + VM_BUG_ON(pc->ref_cnt <= 0);
  663 +
  664 + if (--(pc->ref_cnt) == 0) {
  665 + page_assign_page_cgroup(page, NULL);
693 666 unlock_page_cgroup(page);
694   - if (clear_page_cgroup(page, pc) == pc) {
695   - mem = pc->mem_cgroup;
696   - css_put(&mem->css);
697   - res_counter_uncharge(&mem->res, PAGE_SIZE);
698   - spin_lock_irqsave(&mz->lru_lock, flags);
699   - __mem_cgroup_remove_list(pc);
700   - spin_unlock_irqrestore(&mz->lru_lock, flags);
701   - kfree(pc);
702   - }
703   - lock_page_cgroup(page);
  667 +
  668 + mem = pc->mem_cgroup;
  669 + css_put(&mem->css);
  670 + res_counter_uncharge(&mem->res, PAGE_SIZE);
  671 +
  672 + mz = page_cgroup_zoneinfo(pc);
  673 + spin_lock_irqsave(&mz->lru_lock, flags);
  674 + __mem_cgroup_remove_list(pc);
  675 + spin_unlock_irqrestore(&mz->lru_lock, flags);
  676 +
  677 + kfree(pc);
  678 + return;
704 679 }
705 680  
706 681 unlock:
707 682  
708 683  
... ... @@ -714,14 +689,13 @@
714 689 int mem_cgroup_prepare_migration(struct page *page)
715 690 {
716 691 struct page_cgroup *pc;
717   - int ret = 0;
718 692  
719 693 lock_page_cgroup(page);
720 694 pc = page_get_page_cgroup(page);
721   - if (pc && atomic_inc_not_zero(&pc->ref_cnt))
722   - ret = 1;
  695 + if (pc)
  696 + pc->ref_cnt++;
723 697 unlock_page_cgroup(page);
724   - return ret;
  698 + return pc != NULL;
725 699 }
726 700  
727 701 void mem_cgroup_end_migration(struct page *page)
728 702  
729 703  
730 704  
731 705  
... ... @@ -740,15 +714,17 @@
740 714 struct mem_cgroup_per_zone *mz;
741 715 unsigned long flags;
742 716  
743   -retry:
  717 + lock_page_cgroup(page);
744 718 pc = page_get_page_cgroup(page);
745   - if (!pc)
  719 + if (!pc) {
  720 + unlock_page_cgroup(page);
746 721 return;
  722 + }
747 723  
748   - mz = page_cgroup_zoneinfo(pc);
749   - if (clear_page_cgroup(page, pc) != pc)
750   - goto retry;
  724 + page_assign_page_cgroup(page, NULL);
  725 + unlock_page_cgroup(page);
751 726  
  727 + mz = page_cgroup_zoneinfo(pc);
752 728 spin_lock_irqsave(&mz->lru_lock, flags);
753 729 __mem_cgroup_remove_list(pc);
754 730 spin_unlock_irqrestore(&mz->lru_lock, flags);
755 731  
756 732  
... ... @@ -794,15 +770,19 @@
794 770 while (--count && !list_empty(list)) {
795 771 pc = list_entry(list->prev, struct page_cgroup, lru);
796 772 page = pc->page;
797   - /* Avoid race with charge */
798   - atomic_set(&pc->ref_cnt, 0);
799   - if (clear_page_cgroup(page, pc) == pc) {
  773 + lock_page_cgroup(page);
  774 + if (page_get_page_cgroup(page) == pc) {
  775 + page_assign_page_cgroup(page, NULL);
  776 + unlock_page_cgroup(page);
800 777 css_put(&mem->css);
801 778 res_counter_uncharge(&mem->res, PAGE_SIZE);
802 779 __mem_cgroup_remove_list(pc);
803 780 kfree(pc);
804   - } else /* being uncharged ? ...do relax */
  781 + } else {
  782 + /* racing uncharge: let page go then retry */
  783 + unlock_page_cgroup(page);
805 784 break;
  785 + }
806 786 }
807 787  
808 788 spin_unlock_irqrestore(&mz->lru_lock, flags);