Commit ec1685109f1314a30919489ef2800ed626a38c1e
Committed by
Linus Torvalds
1 parent
500d65d471
Exists in
master
and in
4 other branches
thp: memcg compound
Teach memcg to charge/uncharge compound pages. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 53 additions and 30 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -1027,7 +1027,11 @@ |
1027 | 1027 | { |
1028 | 1028 | struct page_cgroup *pc; |
1029 | 1029 | struct mem_cgroup_per_zone *mz; |
1030 | + int page_size = PAGE_SIZE; | |
1030 | 1031 | |
1032 | + if (PageTransHuge(page)) | |
1033 | + page_size <<= compound_order(page); | |
1034 | + | |
1031 | 1035 | if (mem_cgroup_disabled()) |
1032 | 1036 | return NULL; |
1033 | 1037 | |
1034 | 1038 | |
... | ... | @@ -1887,12 +1891,14 @@ |
1887 | 1891 | * oom-killer can be invoked. |
1888 | 1892 | */ |
1889 | 1893 | static int __mem_cgroup_try_charge(struct mm_struct *mm, |
1890 | - gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) | |
1894 | + gfp_t gfp_mask, | |
1895 | + struct mem_cgroup **memcg, bool oom, | |
1896 | + int page_size) | |
1891 | 1897 | { |
1892 | 1898 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1893 | 1899 | struct mem_cgroup *mem = NULL; |
1894 | 1900 | int ret; |
1895 | - int csize = CHARGE_SIZE; | |
1901 | + int csize = max(CHARGE_SIZE, (unsigned long) page_size); | |
1896 | 1902 | |
1897 | 1903 | /* |
1898 | 1904 | * Unlike gloval-vm's OOM-kill, we're not in memory shortage |
... | ... | @@ -1917,7 +1923,7 @@ |
1917 | 1923 | VM_BUG_ON(css_is_removed(&mem->css)); |
1918 | 1924 | if (mem_cgroup_is_root(mem)) |
1919 | 1925 | goto done; |
1920 | - if (consume_stock(mem)) | |
1926 | + if (page_size == PAGE_SIZE && consume_stock(mem)) | |
1921 | 1927 | goto done; |
1922 | 1928 | css_get(&mem->css); |
1923 | 1929 | } else { |
... | ... | @@ -1940,7 +1946,7 @@ |
1940 | 1946 | rcu_read_unlock(); |
1941 | 1947 | goto done; |
1942 | 1948 | } |
1943 | - if (consume_stock(mem)) { | |
1949 | + if (page_size == PAGE_SIZE && consume_stock(mem)) { | |
1944 | 1950 | /* |
1945 | 1951 | * It seems dagerous to access memcg without css_get(). |
1946 | 1952 | * But considering how consume_stok works, it's not |
... | ... | @@ -1981,7 +1987,7 @@ |
1981 | 1987 | case CHARGE_OK: |
1982 | 1988 | break; |
1983 | 1989 | case CHARGE_RETRY: /* not in OOM situation but retry */ |
1984 | - csize = PAGE_SIZE; | |
1990 | + csize = page_size; | |
1985 | 1991 | css_put(&mem->css); |
1986 | 1992 | mem = NULL; |
1987 | 1993 | goto again; |
... | ... | @@ -2002,8 +2008,8 @@ |
2002 | 2008 | } |
2003 | 2009 | } while (ret != CHARGE_OK); |
2004 | 2010 | |
2005 | - if (csize > PAGE_SIZE) | |
2006 | - refill_stock(mem, csize - PAGE_SIZE); | |
2011 | + if (csize > page_size) | |
2012 | + refill_stock(mem, csize - page_size); | |
2007 | 2013 | css_put(&mem->css); |
2008 | 2014 | done: |
2009 | 2015 | *memcg = mem; |
2010 | 2016 | |
... | ... | @@ -2031,9 +2037,10 @@ |
2031 | 2037 | } |
2032 | 2038 | } |
2033 | 2039 | |
2034 | -static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | |
2040 | +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem, | |
2041 | + int page_size) | |
2035 | 2042 | { |
2036 | - __mem_cgroup_cancel_charge(mem, 1); | |
2043 | + __mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT); | |
2037 | 2044 | } |
2038 | 2045 | |
2039 | 2046 | /* |
... | ... | @@ -2089,8 +2096,9 @@ |
2089 | 2096 | */ |
2090 | 2097 | |
2091 | 2098 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2092 | - struct page_cgroup *pc, | |
2093 | - enum charge_type ctype) | |
2099 | + struct page_cgroup *pc, | |
2100 | + enum charge_type ctype, | |
2101 | + int page_size) | |
2094 | 2102 | { |
2095 | 2103 | /* try_charge() can return NULL to *memcg, taking care of it. */ |
2096 | 2104 | if (!mem) |
... | ... | @@ -2099,7 +2107,7 @@ |
2099 | 2107 | lock_page_cgroup(pc); |
2100 | 2108 | if (unlikely(PageCgroupUsed(pc))) { |
2101 | 2109 | unlock_page_cgroup(pc); |
2102 | - mem_cgroup_cancel_charge(mem); | |
2110 | + mem_cgroup_cancel_charge(mem, page_size); | |
2103 | 2111 | return; |
2104 | 2112 | } |
2105 | 2113 | |
... | ... | @@ -2173,7 +2181,7 @@ |
2173 | 2181 | mem_cgroup_charge_statistics(from, pc, false); |
2174 | 2182 | if (uncharge) |
2175 | 2183 | /* This is not "cancel", but cancel_charge does all we need. */ |
2176 | - mem_cgroup_cancel_charge(from); | |
2184 | + mem_cgroup_cancel_charge(from, PAGE_SIZE); | |
2177 | 2185 | |
2178 | 2186 | /* caller should have done css_get */ |
2179 | 2187 | pc->mem_cgroup = to; |
2180 | 2188 | |
... | ... | @@ -2234,13 +2242,14 @@ |
2234 | 2242 | goto put; |
2235 | 2243 | |
2236 | 2244 | parent = mem_cgroup_from_cont(pcg); |
2237 | - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); | |
2245 | + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, | |
2246 | + PAGE_SIZE); | |
2238 | 2247 | if (ret || !parent) |
2239 | 2248 | goto put_back; |
2240 | 2249 | |
2241 | 2250 | ret = mem_cgroup_move_account(pc, child, parent, true); |
2242 | 2251 | if (ret) |
2243 | - mem_cgroup_cancel_charge(parent); | |
2252 | + mem_cgroup_cancel_charge(parent, PAGE_SIZE); | |
2244 | 2253 | put_back: |
2245 | 2254 | putback_lru_page(page); |
2246 | 2255 | put: |
2247 | 2256 | |
2248 | 2257 | |
2249 | 2258 | |
... | ... | @@ -2261,18 +2270,22 @@ |
2261 | 2270 | struct mem_cgroup *mem = NULL; |
2262 | 2271 | struct page_cgroup *pc; |
2263 | 2272 | int ret; |
2273 | + int page_size = PAGE_SIZE; | |
2264 | 2274 | |
2275 | + if (PageTransHuge(page)) | |
2276 | + page_size <<= compound_order(page); | |
2277 | + | |
2265 | 2278 | pc = lookup_page_cgroup(page); |
2266 | 2279 | /* can happen at boot */ |
2267 | 2280 | if (unlikely(!pc)) |
2268 | 2281 | return 0; |
2269 | 2282 | prefetchw(pc); |
2270 | 2283 | |
2271 | - ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); | |
2284 | + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); | |
2272 | 2285 | if (ret || !mem) |
2273 | 2286 | return ret; |
2274 | 2287 | |
2275 | - __mem_cgroup_commit_charge(mem, pc, ctype); | |
2288 | + __mem_cgroup_commit_charge(mem, pc, ctype, page_size); | |
2276 | 2289 | return 0; |
2277 | 2290 | } |
2278 | 2291 | |
... | ... | @@ -2281,8 +2294,6 @@ |
2281 | 2294 | { |
2282 | 2295 | if (mem_cgroup_disabled()) |
2283 | 2296 | return 0; |
2284 | - if (PageCompound(page)) | |
2285 | - return 0; | |
2286 | 2297 | /* |
2287 | 2298 | * If already mapped, we don't have to account. |
2288 | 2299 | * If page cache, page->mapping has address_space. |
2289 | 2300 | |
... | ... | @@ -2388,13 +2399,13 @@ |
2388 | 2399 | if (!mem) |
2389 | 2400 | goto charge_cur_mm; |
2390 | 2401 | *ptr = mem; |
2391 | - ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); | |
2402 | + ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, PAGE_SIZE); | |
2392 | 2403 | css_put(&mem->css); |
2393 | 2404 | return ret; |
2394 | 2405 | charge_cur_mm: |
2395 | 2406 | if (unlikely(!mm)) |
2396 | 2407 | mm = &init_mm; |
2397 | - return __mem_cgroup_try_charge(mm, mask, ptr, true); | |
2408 | + return __mem_cgroup_try_charge(mm, mask, ptr, true, PAGE_SIZE); | |
2398 | 2409 | } |
2399 | 2410 | |
2400 | 2411 | static void |
... | ... | @@ -2410,7 +2421,7 @@ |
2410 | 2421 | cgroup_exclude_rmdir(&ptr->css); |
2411 | 2422 | pc = lookup_page_cgroup(page); |
2412 | 2423 | mem_cgroup_lru_del_before_commit_swapcache(page); |
2413 | - __mem_cgroup_commit_charge(ptr, pc, ctype); | |
2424 | + __mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE); | |
2414 | 2425 | mem_cgroup_lru_add_after_commit_swapcache(page); |
2415 | 2426 | /* |
2416 | 2427 | * Now swap is on-memory. This means this page may be |
2417 | 2428 | |
... | ... | @@ -2459,11 +2470,12 @@ |
2459 | 2470 | return; |
2460 | 2471 | if (!mem) |
2461 | 2472 | return; |
2462 | - mem_cgroup_cancel_charge(mem); | |
2473 | + mem_cgroup_cancel_charge(mem, PAGE_SIZE); | |
2463 | 2474 | } |
2464 | 2475 | |
2465 | 2476 | static void |
2466 | -__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) | |
2477 | +__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, | |
2478 | + int page_size) | |
2467 | 2479 | { |
2468 | 2480 | struct memcg_batch_info *batch = NULL; |
2469 | 2481 | bool uncharge_memsw = true; |
... | ... | @@ -2490,6 +2502,9 @@ |
2490 | 2502 | if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) |
2491 | 2503 | goto direct_uncharge; |
2492 | 2504 | |
2505 | + if (page_size != PAGE_SIZE) | |
2506 | + goto direct_uncharge; | |
2507 | + | |
2493 | 2508 | /* |
2494 | 2509 | * In typical case, batch->memcg == mem. This means we can |
2495 | 2510 | * merge a series of uncharges to an uncharge of res_counter. |
2496 | 2511 | |
... | ... | @@ -2503,9 +2518,9 @@ |
2503 | 2518 | batch->memsw_bytes += PAGE_SIZE; |
2504 | 2519 | return; |
2505 | 2520 | direct_uncharge: |
2506 | - res_counter_uncharge(&mem->res, PAGE_SIZE); | |
2521 | + res_counter_uncharge(&mem->res, page_size); | |
2507 | 2522 | if (uncharge_memsw) |
2508 | - res_counter_uncharge(&mem->memsw, PAGE_SIZE); | |
2523 | + res_counter_uncharge(&mem->memsw, page_size); | |
2509 | 2524 | if (unlikely(batch->memcg != mem)) |
2510 | 2525 | memcg_oom_recover(mem); |
2511 | 2526 | return; |
... | ... | @@ -2519,6 +2534,7 @@ |
2519 | 2534 | { |
2520 | 2535 | struct page_cgroup *pc; |
2521 | 2536 | struct mem_cgroup *mem = NULL; |
2537 | + int page_size = PAGE_SIZE; | |
2522 | 2538 | |
2523 | 2539 | if (mem_cgroup_disabled()) |
2524 | 2540 | return NULL; |
... | ... | @@ -2526,6 +2542,9 @@ |
2526 | 2542 | if (PageSwapCache(page)) |
2527 | 2543 | return NULL; |
2528 | 2544 | |
2545 | + if (PageTransHuge(page)) | |
2546 | + page_size <<= compound_order(page); | |
2547 | + | |
2529 | 2548 | /* |
2530 | 2549 | * Check if our page_cgroup is valid |
2531 | 2550 | */ |
... | ... | @@ -2579,7 +2598,7 @@ |
2579 | 2598 | mem_cgroup_get(mem); |
2580 | 2599 | } |
2581 | 2600 | if (!mem_cgroup_is_root(mem)) |
2582 | - __do_uncharge(mem, ctype); | |
2601 | + __do_uncharge(mem, ctype, page_size); | |
2583 | 2602 | |
2584 | 2603 | return mem; |
2585 | 2604 | |
... | ... | @@ -2774,6 +2793,7 @@ |
2774 | 2793 | enum charge_type ctype; |
2775 | 2794 | int ret = 0; |
2776 | 2795 | |
2796 | + VM_BUG_ON(PageTransHuge(page)); | |
2777 | 2797 | if (mem_cgroup_disabled()) |
2778 | 2798 | return 0; |
2779 | 2799 | |
... | ... | @@ -2823,7 +2843,7 @@ |
2823 | 2843 | return 0; |
2824 | 2844 | |
2825 | 2845 | *ptr = mem; |
2826 | - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); | |
2846 | + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE); | |
2827 | 2847 | css_put(&mem->css);/* drop extra refcnt */ |
2828 | 2848 | if (ret || *ptr == NULL) { |
2829 | 2849 | if (PageAnon(page)) { |
... | ... | @@ -2850,7 +2870,7 @@ |
2850 | 2870 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; |
2851 | 2871 | else |
2852 | 2872 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
2853 | - __mem_cgroup_commit_charge(mem, pc, ctype); | |
2873 | + __mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE); | |
2854 | 2874 | return ret; |
2855 | 2875 | } |
2856 | 2876 | |
... | ... | @@ -4461,7 +4481,8 @@ |
4461 | 4481 | batch_count = PRECHARGE_COUNT_AT_ONCE; |
4462 | 4482 | cond_resched(); |
4463 | 4483 | } |
4464 | - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); | |
4484 | + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, | |
4485 | + PAGE_SIZE); | |
4465 | 4486 | if (ret || !mem) |
4466 | 4487 | /* mem_cgroup_clear_mc() will do uncharge later */ |
4467 | 4488 | return -ENOMEM; |
... | ... | @@ -4623,6 +4644,7 @@ |
4623 | 4644 | pte_t *pte; |
4624 | 4645 | spinlock_t *ptl; |
4625 | 4646 | |
4647 | + VM_BUG_ON(pmd_trans_huge(*pmd)); | |
4626 | 4648 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
4627 | 4649 | for (; addr != end; pte++, addr += PAGE_SIZE) |
4628 | 4650 | if (is_target_pte_for_mc(vma, addr, *pte, NULL)) |
... | ... | @@ -4789,6 +4811,7 @@ |
4789 | 4811 | spinlock_t *ptl; |
4790 | 4812 | |
4791 | 4813 | retry: |
4814 | + VM_BUG_ON(pmd_trans_huge(*pmd)); | |
4792 | 4815 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
4793 | 4816 | for (; addr != end; addr += PAGE_SIZE) { |
4794 | 4817 | pte_t ptent = *(pte++); |