Commit 0ae5e89c60c9eb87da36a2614836bc434b0ec2ad
Committed by
Linus Torvalds
1 parent
f042e707ee
Exists in
master
and in
7 other branches
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the cgroup. It breaks memory isolation since one cgroup can end up reclaiming pages from another cgroup. Instead we should rely on memcg-aware target reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under memory pressure. In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. This patch is the first step to skip shrink_zone() if soft_limit reclaim does enough work. This is part of the effort which tries to reduce reclaiming pages in global LRU in memcg. The per-memcg background reclaim patchset further enhances the per-cgroup targetting reclaim, which I should have V4 posted shortly. Try running multiple memory intensive workloads within seperate memcgs. Watch the counters of soft_steal in memory.stat. $ cat /dev/cgroup/A/memory.stat | grep 'soft' soft_steal 240000 soft_scan 240000 total_soft_steal 240000 total_soft_scan 240000 This patch: In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. We would like to skip shrink_zone() if soft_limit reclaim does enough work. Also, we need to make the memory pressure balanced across per-memcg zones, like the logic vm-core. This patch is the first step where we start with counting the nr_scanned and nr_reclaimed from soft_limit reclaim into the global scan_control. Signed-off-by: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 39 additions and 15 deletions Side-by-side Diff
include/linux/memcontrol.h
... | ... | @@ -144,7 +144,8 @@ |
144 | 144 | } |
145 | 145 | |
146 | 146 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
147 | - gfp_t gfp_mask); | |
147 | + gfp_t gfp_mask, | |
148 | + unsigned long *total_scanned); | |
148 | 149 | u64 mem_cgroup_get_limit(struct mem_cgroup *mem); |
149 | 150 | |
150 | 151 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
... | ... | @@ -338,7 +339,8 @@ |
338 | 339 | |
339 | 340 | static inline |
340 | 341 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
341 | - gfp_t gfp_mask) | |
342 | + gfp_t gfp_mask, | |
343 | + unsigned long *total_scanned) | |
342 | 344 | { |
343 | 345 | return 0; |
344 | 346 | } |
include/linux/swap.h
... | ... | @@ -257,7 +257,8 @@ |
257 | 257 | extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
258 | 258 | gfp_t gfp_mask, bool noswap, |
259 | 259 | unsigned int swappiness, |
260 | - struct zone *zone); | |
260 | + struct zone *zone, | |
261 | + unsigned long *nr_scanned); | |
261 | 262 | extern int __isolate_lru_page(struct page *page, int mode, int file); |
262 | 263 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
263 | 264 | extern int vm_swappiness; |
mm/memcontrol.c
... | ... | @@ -1433,7 +1433,8 @@ |
1433 | 1433 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, |
1434 | 1434 | struct zone *zone, |
1435 | 1435 | gfp_t gfp_mask, |
1436 | - unsigned long reclaim_options) | |
1436 | + unsigned long reclaim_options, | |
1437 | + unsigned long *total_scanned) | |
1437 | 1438 | { |
1438 | 1439 | struct mem_cgroup *victim; |
1439 | 1440 | int ret, total = 0; |
... | ... | @@ -1442,6 +1443,7 @@ |
1442 | 1443 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1443 | 1444 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1444 | 1445 | unsigned long excess; |
1446 | + unsigned long nr_scanned; | |
1445 | 1447 | |
1446 | 1448 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
1447 | 1449 | |
1448 | 1450 | |
... | ... | @@ -1484,10 +1486,12 @@ |
1484 | 1486 | continue; |
1485 | 1487 | } |
1486 | 1488 | /* we use swappiness of local cgroup */ |
1487 | - if (check_soft) | |
1489 | + if (check_soft) { | |
1488 | 1490 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1489 | - noswap, get_swappiness(victim), zone); | |
1490 | - else | |
1491 | + noswap, get_swappiness(victim), zone, | |
1492 | + &nr_scanned); | |
1493 | + *total_scanned += nr_scanned; | |
1494 | + } else | |
1491 | 1495 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
1492 | 1496 | noswap, get_swappiness(victim)); |
1493 | 1497 | css_put(&victim->css); |
... | ... | @@ -1928,7 +1932,7 @@ |
1928 | 1932 | return CHARGE_WOULDBLOCK; |
1929 | 1933 | |
1930 | 1934 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1931 | - gfp_mask, flags); | |
1935 | + gfp_mask, flags, NULL); | |
1932 | 1936 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) |
1933 | 1937 | return CHARGE_RETRY; |
1934 | 1938 | /* |
... | ... | @@ -3211,7 +3215,8 @@ |
3211 | 3215 | break; |
3212 | 3216 | |
3213 | 3217 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
3214 | - MEM_CGROUP_RECLAIM_SHRINK); | |
3218 | + MEM_CGROUP_RECLAIM_SHRINK, | |
3219 | + NULL); | |
3215 | 3220 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
3216 | 3221 | /* Usage is reduced ? */ |
3217 | 3222 | if (curusage >= oldusage) |
... | ... | @@ -3271,7 +3276,8 @@ |
3271 | 3276 | |
3272 | 3277 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
3273 | 3278 | MEM_CGROUP_RECLAIM_NOSWAP | |
3274 | - MEM_CGROUP_RECLAIM_SHRINK); | |
3279 | + MEM_CGROUP_RECLAIM_SHRINK, | |
3280 | + NULL); | |
3275 | 3281 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
3276 | 3282 | /* Usage is reduced ? */ |
3277 | 3283 | if (curusage >= oldusage) |
... | ... | @@ -3285,7 +3291,8 @@ |
3285 | 3291 | } |
3286 | 3292 | |
3287 | 3293 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
3288 | - gfp_t gfp_mask) | |
3294 | + gfp_t gfp_mask, | |
3295 | + unsigned long *total_scanned) | |
3289 | 3296 | { |
3290 | 3297 | unsigned long nr_reclaimed = 0; |
3291 | 3298 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; |
... | ... | @@ -3293,6 +3300,7 @@ |
3293 | 3300 | int loop = 0; |
3294 | 3301 | struct mem_cgroup_tree_per_zone *mctz; |
3295 | 3302 | unsigned long long excess; |
3303 | + unsigned long nr_scanned; | |
3296 | 3304 | |
3297 | 3305 | if (order > 0) |
3298 | 3306 | return 0; |
3299 | 3307 | |
3300 | 3308 | |
... | ... | @@ -3311,10 +3319,13 @@ |
3311 | 3319 | if (!mz) |
3312 | 3320 | break; |
3313 | 3321 | |
3322 | + nr_scanned = 0; | |
3314 | 3323 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, |
3315 | 3324 | gfp_mask, |
3316 | - MEM_CGROUP_RECLAIM_SOFT); | |
3325 | + MEM_CGROUP_RECLAIM_SOFT, | |
3326 | + &nr_scanned); | |
3317 | 3327 | nr_reclaimed += reclaimed; |
3328 | + *total_scanned += nr_scanned; | |
3318 | 3329 | spin_lock(&mctz->lock); |
3319 | 3330 | |
3320 | 3331 | /* |
mm/vmscan.c
... | ... | @@ -2171,9 +2171,11 @@ |
2171 | 2171 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
2172 | 2172 | gfp_t gfp_mask, bool noswap, |
2173 | 2173 | unsigned int swappiness, |
2174 | - struct zone *zone) | |
2174 | + struct zone *zone, | |
2175 | + unsigned long *nr_scanned) | |
2175 | 2176 | { |
2176 | 2177 | struct scan_control sc = { |
2178 | + .nr_scanned = 0, | |
2177 | 2179 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2178 | 2180 | .may_writepage = !laptop_mode, |
2179 | 2181 | .may_unmap = 1, |
... | ... | @@ -2182,6 +2184,7 @@ |
2182 | 2184 | .order = 0, |
2183 | 2185 | .mem_cgroup = mem, |
2184 | 2186 | }; |
2187 | + | |
2185 | 2188 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2186 | 2189 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
2187 | 2190 | |
... | ... | @@ -2200,6 +2203,7 @@ |
2200 | 2203 | |
2201 | 2204 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2202 | 2205 | |
2206 | + *nr_scanned = sc.nr_scanned; | |
2203 | 2207 | return sc.nr_reclaimed; |
2204 | 2208 | } |
2205 | 2209 | |
... | ... | @@ -2347,6 +2351,8 @@ |
2347 | 2351 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2348 | 2352 | unsigned long total_scanned; |
2349 | 2353 | struct reclaim_state *reclaim_state = current->reclaim_state; |
2354 | + unsigned long nr_soft_reclaimed; | |
2355 | + unsigned long nr_soft_scanned; | |
2350 | 2356 | struct scan_control sc = { |
2351 | 2357 | .gfp_mask = GFP_KERNEL, |
2352 | 2358 | .may_unmap = 1, |
2353 | 2359 | |
2354 | 2360 | |
... | ... | @@ -2439,11 +2445,15 @@ |
2439 | 2445 | |
2440 | 2446 | sc.nr_scanned = 0; |
2441 | 2447 | |
2448 | + nr_soft_scanned = 0; | |
2442 | 2449 | /* |
2443 | 2450 | * Call soft limit reclaim before calling shrink_zone. |
2444 | - * For now we ignore the return value | |
2445 | 2451 | */ |
2446 | - mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); | |
2452 | + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | |
2453 | + order, sc.gfp_mask, | |
2454 | + &nr_soft_scanned); | |
2455 | + sc.nr_reclaimed += nr_soft_reclaimed; | |
2456 | + total_scanned += nr_soft_scanned; | |
2447 | 2457 | |
2448 | 2458 | /* |
2449 | 2459 | * We put equal pressure on every zone, unless |