Commit ca3e021417eed30ec2b64ce88eb0acf64aa9bc29

Authored by KAMEZAWA Hiroyuki
Committed by Linus Torvalds
1 parent e401f1761c

memcg: fix USED bit handling at uncharge in THP

Now, under THP:

at charge:
  - PageCgroupUsed bit is set to all page_cgroup on a hugepage.
    ....set to 512 pages.
at uncharge
  - PageCgroupUsed bit is unset on the head page.

So, some pages will remain with "Used" bit.

This patch fixes that Used bit is set only to the head page.
Used bits for tail pages will be set at splitting if necessary.

This patch adds this lock order:
   compound_lock() -> page_cgroup_move_lock().

[akpm@linux-foundation.org: fix warning]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 62 additions and 40 deletions Side-by-side Diff

include/linux/memcontrol.h
... ... @@ -146,6 +146,10 @@
146 146 gfp_t gfp_mask);
147 147 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
148 148  
  149 +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  150 +void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
  151 +#endif
  152 +
149 153 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
150 154 struct mem_cgroup;
151 155  
... ... @@ -333,6 +337,11 @@
333 337 u64 mem_cgroup_get_limit(struct mem_cgroup *mem)
334 338 {
335 339 return 0;
  340 +}
  341 +
  342 +static inline void mem_cgroup_split_huge_fixup(struct page *head,
  343 + struct page *tail)
  344 +{
336 345 }
337 346  
338 347 #endif /* CONFIG_CGROUP_MEM_CONT */
... ... @@ -1203,6 +1203,8 @@
1203 1203 BUG_ON(!PageDirty(page_tail));
1204 1204 BUG_ON(!PageSwapBacked(page_tail));
1205 1205  
  1206 + mem_cgroup_split_huge_fixup(page, page_tail);
  1207 +
1206 1208 lru_add_page_tail(zone, page, page_tail);
1207 1209 }
1208 1210  
... ... @@ -1614,7 +1614,7 @@
1614 1614 if (unlikely(!mem || !PageCgroupUsed(pc)))
1615 1615 goto out;
1616 1616 /* pc->mem_cgroup is unstable ? */
1617   - if (unlikely(mem_cgroup_stealed(mem))) {
  1617 + if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
1618 1618 /* take a lock against to access pc->mem_cgroup */
1619 1619 move_lock_page_cgroup(pc, &flags);
1620 1620 need_unlock = true;
1621 1621  
... ... @@ -2083,14 +2083,27 @@
2083 2083 return mem;
2084 2084 }
2085 2085  
2086   -/*
2087   - * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
2088   - * USED state. If already USED, uncharge and return.
2089   - */
2090   -static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2091   - struct page_cgroup *pc,
2092   - enum charge_type ctype)
  2086 +static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
  2087 + struct page_cgroup *pc,
  2088 + enum charge_type ctype,
  2089 + int page_size)
2093 2090 {
  2091 + int nr_pages = page_size >> PAGE_SHIFT;
  2092 +
  2093 + /* try_charge() can return NULL to *memcg, taking care of it. */
  2094 + if (!mem)
  2095 + return;
  2096 +
  2097 + lock_page_cgroup(pc);
  2098 + if (unlikely(PageCgroupUsed(pc))) {
  2099 + unlock_page_cgroup(pc);
  2100 + mem_cgroup_cancel_charge(mem, page_size);
  2101 + return;
  2102 + }
  2103 + /*
  2104 + * we don't need page_cgroup_lock about tail pages, becase they are not
  2105 + * accessed by any other context at this point.
  2106 + */
2094 2107 pc->mem_cgroup = mem;
2095 2108 /*
2096 2109 * We access a page_cgroup asynchronously without lock_page_cgroup().
... ... @@ -2114,35 +2127,7 @@
2114 2127 break;
2115 2128 }
2116 2129  
2117   - mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), 1);
2118   -}
2119   -
2120   -static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2121   - struct page_cgroup *pc,
2122   - enum charge_type ctype,
2123   - int page_size)
2124   -{
2125   - int i;
2126   - int count = page_size >> PAGE_SHIFT;
2127   -
2128   - /* try_charge() can return NULL to *memcg, taking care of it. */
2129   - if (!mem)
2130   - return;
2131   -
2132   - lock_page_cgroup(pc);
2133   - if (unlikely(PageCgroupUsed(pc))) {
2134   - unlock_page_cgroup(pc);
2135   - mem_cgroup_cancel_charge(mem, page_size);
2136   - return;
2137   - }
2138   -
2139   - /*
2140   - * we don't need page_cgroup_lock about tail pages, becase they are not
2141   - * accessed by any other context at this point.
2142   - */
2143   - for (i = 0; i < count; i++)
2144   - ____mem_cgroup_commit_charge(mem, pc + i, ctype);
2145   -
  2130 + mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
2146 2131 unlock_page_cgroup(pc);
2147 2132 /*
2148 2133 * "charge_statistics" updated event counter. Then, check it.
... ... @@ -2152,6 +2137,34 @@
2152 2137 memcg_check_events(mem, pc->page);
2153 2138 }
2154 2139  
  2140 +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  2141 +
  2142 +#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
  2143 + (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
  2144 +/*
  2145 + * Because tail pages are not marked as "used", set it. We're under
  2146 + * zone->lru_lock, 'splitting on pmd' and compund_lock.
  2147 + */
  2148 +void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
  2149 +{
  2150 + struct page_cgroup *head_pc = lookup_page_cgroup(head);
  2151 + struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
  2152 + unsigned long flags;
  2153 +
  2154 + /*
  2155 + * We have no races witch charge/uncharge but will have races with
  2156 + * page state accounting.
  2157 + */
  2158 + move_lock_page_cgroup(head_pc, &flags);
  2159 +
  2160 + tail_pc->mem_cgroup = head_pc->mem_cgroup;
  2161 + smp_wmb(); /* see __commit_charge() */
  2162 + /* we don't need to copy all flags...*/
  2163 + tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
  2164 + move_unlock_page_cgroup(head_pc, &flags);
  2165 +}
  2166 +#endif
  2167 +
2155 2168 /**
2156 2169 * __mem_cgroup_move_account - move account of the page
2157 2170 * @pc: page_cgroup of the page.
... ... @@ -2545,7 +2558,6 @@
2545 2558 static struct mem_cgroup *
2546 2559 __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2547 2560 {
2548   - int i;
2549 2561 int count;
2550 2562 struct page_cgroup *pc;
2551 2563 struct mem_cgroup *mem = NULL;
... ... @@ -2595,8 +2607,7 @@
2595 2607 break;
2596 2608 }
2597 2609  
2598   - for (i = 0; i < count; i++)
2599   - mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -1);
  2610 + mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
2600 2611  
2601 2612 ClearPageCgroupUsed(pc);
2602 2613 /*