Commit ec1685109f1314a30919489ef2800ed626a38c1e

Authored by Andrea Arcangeli
Committed by Linus Torvalds
1 parent 500d65d471

thp: memcg compound

Teach memcg to charge/uncharge compound pages.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 53 additions and 30 deletions Side-by-side Diff

... ... @@ -1027,7 +1027,11 @@
1027 1027 {
1028 1028 struct page_cgroup *pc;
1029 1029 struct mem_cgroup_per_zone *mz;
  1030 + int page_size = PAGE_SIZE;
1030 1031  
  1032 + if (PageTransHuge(page))
  1033 + page_size <<= compound_order(page);
  1034 +
1031 1035 if (mem_cgroup_disabled())
1032 1036 return NULL;
1033 1037  
1034 1038  
... ... @@ -1887,12 +1891,14 @@
1887 1891 * oom-killer can be invoked.
1888 1892 */
1889 1893 static int __mem_cgroup_try_charge(struct mm_struct *mm,
1890   - gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom)
  1894 + gfp_t gfp_mask,
  1895 + struct mem_cgroup **memcg, bool oom,
  1896 + int page_size)
1891 1897 {
1892 1898 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
1893 1899 struct mem_cgroup *mem = NULL;
1894 1900 int ret;
1895   - int csize = CHARGE_SIZE;
  1901 + int csize = max(CHARGE_SIZE, (unsigned long) page_size);
1896 1902  
1897 1903 /*
1898 1904 * Unlike gloval-vm's OOM-kill, we're not in memory shortage
... ... @@ -1917,7 +1923,7 @@
1917 1923 VM_BUG_ON(css_is_removed(&mem->css));
1918 1924 if (mem_cgroup_is_root(mem))
1919 1925 goto done;
1920   - if (consume_stock(mem))
  1926 + if (page_size == PAGE_SIZE && consume_stock(mem))
1921 1927 goto done;
1922 1928 css_get(&mem->css);
1923 1929 } else {
... ... @@ -1940,7 +1946,7 @@
1940 1946 rcu_read_unlock();
1941 1947 goto done;
1942 1948 }
1943   - if (consume_stock(mem)) {
  1949 + if (page_size == PAGE_SIZE && consume_stock(mem)) {
1944 1950 /*
1945 1951 * It seems dagerous to access memcg without css_get().
1946 1952 * But considering how consume_stok works, it's not
... ... @@ -1981,7 +1987,7 @@
1981 1987 case CHARGE_OK:
1982 1988 break;
1983 1989 case CHARGE_RETRY: /* not in OOM situation but retry */
1984   - csize = PAGE_SIZE;
  1990 + csize = page_size;
1985 1991 css_put(&mem->css);
1986 1992 mem = NULL;
1987 1993 goto again;
... ... @@ -2002,8 +2008,8 @@
2002 2008 }
2003 2009 } while (ret != CHARGE_OK);
2004 2010  
2005   - if (csize > PAGE_SIZE)
2006   - refill_stock(mem, csize - PAGE_SIZE);
  2011 + if (csize > page_size)
  2012 + refill_stock(mem, csize - page_size);
2007 2013 css_put(&mem->css);
2008 2014 done:
2009 2015 *memcg = mem;
2010 2016  
... ... @@ -2031,9 +2037,10 @@
2031 2037 }
2032 2038 }
2033 2039  
2034   -static void mem_cgroup_cancel_charge(struct mem_cgroup *mem)
  2040 +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem,
  2041 + int page_size)
2035 2042 {
2036   - __mem_cgroup_cancel_charge(mem, 1);
  2043 + __mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT);
2037 2044 }
2038 2045  
2039 2046 /*
... ... @@ -2089,8 +2096,9 @@
2089 2096 */
2090 2097  
2091 2098 static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2092   - struct page_cgroup *pc,
2093   - enum charge_type ctype)
  2099 + struct page_cgroup *pc,
  2100 + enum charge_type ctype,
  2101 + int page_size)
2094 2102 {
2095 2103 /* try_charge() can return NULL to *memcg, taking care of it. */
2096 2104 if (!mem)
... ... @@ -2099,7 +2107,7 @@
2099 2107 lock_page_cgroup(pc);
2100 2108 if (unlikely(PageCgroupUsed(pc))) {
2101 2109 unlock_page_cgroup(pc);
2102   - mem_cgroup_cancel_charge(mem);
  2110 + mem_cgroup_cancel_charge(mem, page_size);
2103 2111 return;
2104 2112 }
2105 2113  
... ... @@ -2173,7 +2181,7 @@
2173 2181 mem_cgroup_charge_statistics(from, pc, false);
2174 2182 if (uncharge)
2175 2183 /* This is not "cancel", but cancel_charge does all we need. */
2176   - mem_cgroup_cancel_charge(from);
  2184 + mem_cgroup_cancel_charge(from, PAGE_SIZE);
2177 2185  
2178 2186 /* caller should have done css_get */
2179 2187 pc->mem_cgroup = to;
2180 2188  
... ... @@ -2234,13 +2242,14 @@
2234 2242 goto put;
2235 2243  
2236 2244 parent = mem_cgroup_from_cont(pcg);
2237   - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
  2245 + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false,
  2246 + PAGE_SIZE);
2238 2247 if (ret || !parent)
2239 2248 goto put_back;
2240 2249  
2241 2250 ret = mem_cgroup_move_account(pc, child, parent, true);
2242 2251 if (ret)
2243   - mem_cgroup_cancel_charge(parent);
  2252 + mem_cgroup_cancel_charge(parent, PAGE_SIZE);
2244 2253 put_back:
2245 2254 putback_lru_page(page);
2246 2255 put:
2247 2256  
2248 2257  
2249 2258  
... ... @@ -2261,18 +2270,22 @@
2261 2270 struct mem_cgroup *mem = NULL;
2262 2271 struct page_cgroup *pc;
2263 2272 int ret;
  2273 + int page_size = PAGE_SIZE;
2264 2274  
  2275 + if (PageTransHuge(page))
  2276 + page_size <<= compound_order(page);
  2277 +
2265 2278 pc = lookup_page_cgroup(page);
2266 2279 /* can happen at boot */
2267 2280 if (unlikely(!pc))
2268 2281 return 0;
2269 2282 prefetchw(pc);
2270 2283  
2271   - ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
  2284 + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size);
2272 2285 if (ret || !mem)
2273 2286 return ret;
2274 2287  
2275   - __mem_cgroup_commit_charge(mem, pc, ctype);
  2288 + __mem_cgroup_commit_charge(mem, pc, ctype, page_size);
2276 2289 return 0;
2277 2290 }
2278 2291  
... ... @@ -2281,8 +2294,6 @@
2281 2294 {
2282 2295 if (mem_cgroup_disabled())
2283 2296 return 0;
2284   - if (PageCompound(page))
2285   - return 0;
2286 2297 /*
2287 2298 * If already mapped, we don't have to account.
2288 2299 * If page cache, page->mapping has address_space.
2289 2300  
... ... @@ -2388,13 +2399,13 @@
2388 2399 if (!mem)
2389 2400 goto charge_cur_mm;
2390 2401 *ptr = mem;
2391   - ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
  2402 + ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, PAGE_SIZE);
2392 2403 css_put(&mem->css);
2393 2404 return ret;
2394 2405 charge_cur_mm:
2395 2406 if (unlikely(!mm))
2396 2407 mm = &init_mm;
2397   - return __mem_cgroup_try_charge(mm, mask, ptr, true);
  2408 + return __mem_cgroup_try_charge(mm, mask, ptr, true, PAGE_SIZE);
2398 2409 }
2399 2410  
2400 2411 static void
... ... @@ -2410,7 +2421,7 @@
2410 2421 cgroup_exclude_rmdir(&ptr->css);
2411 2422 pc = lookup_page_cgroup(page);
2412 2423 mem_cgroup_lru_del_before_commit_swapcache(page);
2413   - __mem_cgroup_commit_charge(ptr, pc, ctype);
  2424 + __mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE);
2414 2425 mem_cgroup_lru_add_after_commit_swapcache(page);
2415 2426 /*
2416 2427 * Now swap is on-memory. This means this page may be
2417 2428  
... ... @@ -2459,11 +2470,12 @@
2459 2470 return;
2460 2471 if (!mem)
2461 2472 return;
2462   - mem_cgroup_cancel_charge(mem);
  2473 + mem_cgroup_cancel_charge(mem, PAGE_SIZE);
2463 2474 }
2464 2475  
2465 2476 static void
2466   -__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype)
  2477 +__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype,
  2478 + int page_size)
2467 2479 {
2468 2480 struct memcg_batch_info *batch = NULL;
2469 2481 bool uncharge_memsw = true;
... ... @@ -2490,6 +2502,9 @@
2490 2502 if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
2491 2503 goto direct_uncharge;
2492 2504  
  2505 + if (page_size != PAGE_SIZE)
  2506 + goto direct_uncharge;
  2507 +
2493 2508 /*
2494 2509 * In typical case, batch->memcg == mem. This means we can
2495 2510 * merge a series of uncharges to an uncharge of res_counter.
2496 2511  
... ... @@ -2503,9 +2518,9 @@
2503 2518 batch->memsw_bytes += PAGE_SIZE;
2504 2519 return;
2505 2520 direct_uncharge:
2506   - res_counter_uncharge(&mem->res, PAGE_SIZE);
  2521 + res_counter_uncharge(&mem->res, page_size);
2507 2522 if (uncharge_memsw)
2508   - res_counter_uncharge(&mem->memsw, PAGE_SIZE);
  2523 + res_counter_uncharge(&mem->memsw, page_size);
2509 2524 if (unlikely(batch->memcg != mem))
2510 2525 memcg_oom_recover(mem);
2511 2526 return;
... ... @@ -2519,6 +2534,7 @@
2519 2534 {
2520 2535 struct page_cgroup *pc;
2521 2536 struct mem_cgroup *mem = NULL;
  2537 + int page_size = PAGE_SIZE;
2522 2538  
2523 2539 if (mem_cgroup_disabled())
2524 2540 return NULL;
... ... @@ -2526,6 +2542,9 @@
2526 2542 if (PageSwapCache(page))
2527 2543 return NULL;
2528 2544  
  2545 + if (PageTransHuge(page))
  2546 + page_size <<= compound_order(page);
  2547 +
2529 2548 /*
2530 2549 * Check if our page_cgroup is valid
2531 2550 */
... ... @@ -2579,7 +2598,7 @@
2579 2598 mem_cgroup_get(mem);
2580 2599 }
2581 2600 if (!mem_cgroup_is_root(mem))
2582   - __do_uncharge(mem, ctype);
  2601 + __do_uncharge(mem, ctype, page_size);
2583 2602  
2584 2603 return mem;
2585 2604  
... ... @@ -2774,6 +2793,7 @@
2774 2793 enum charge_type ctype;
2775 2794 int ret = 0;
2776 2795  
  2796 + VM_BUG_ON(PageTransHuge(page));
2777 2797 if (mem_cgroup_disabled())
2778 2798 return 0;
2779 2799  
... ... @@ -2823,7 +2843,7 @@
2823 2843 return 0;
2824 2844  
2825 2845 *ptr = mem;
2826   - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
  2846 + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE);
2827 2847 css_put(&mem->css);/* drop extra refcnt */
2828 2848 if (ret || *ptr == NULL) {
2829 2849 if (PageAnon(page)) {
... ... @@ -2850,7 +2870,7 @@
2850 2870 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
2851 2871 else
2852 2872 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
2853   - __mem_cgroup_commit_charge(mem, pc, ctype);
  2873 + __mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE);
2854 2874 return ret;
2855 2875 }
2856 2876  
... ... @@ -4461,7 +4481,8 @@
4461 4481 batch_count = PRECHARGE_COUNT_AT_ONCE;
4462 4482 cond_resched();
4463 4483 }
4464   - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
  4484 + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
  4485 + PAGE_SIZE);
4465 4486 if (ret || !mem)
4466 4487 /* mem_cgroup_clear_mc() will do uncharge later */
4467 4488 return -ENOMEM;
... ... @@ -4623,6 +4644,7 @@
4623 4644 pte_t *pte;
4624 4645 spinlock_t *ptl;
4625 4646  
  4647 + VM_BUG_ON(pmd_trans_huge(*pmd));
4626 4648 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
4627 4649 for (; addr != end; pte++, addr += PAGE_SIZE)
4628 4650 if (is_target_pte_for_mc(vma, addr, *pte, NULL))
... ... @@ -4789,6 +4811,7 @@
4789 4811 spinlock_t *ptl;
4790 4812  
4791 4813 retry:
  4814 + VM_BUG_ON(pmd_trans_huge(*pmd));
4792 4815 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
4793 4816 for (; addr != end; addr += PAGE_SIZE) {
4794 4817 pte_t ptent = *(pte++);