Commit 0ae5e89c60c9eb87da36a2614836bc434b0ec2ad

Authored by Ying Han
Committed by Linus Torvalds
1 parent f042e707ee

memcg: count the soft_limit reclaim in global background reclaim

The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.

In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.

This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.

Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.

  $ cat /dev/cgroup/A/memory.stat | grep 'soft'
  soft_steal 240000
  soft_scan 240000
  total_soft_steal 240000
  total_soft_scan 240000

This patch:

In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU.  However, the return value is ignored.

We would like to skip shrink_zone() if soft_limit reclaim does enough
work.  Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core.  This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.

Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 39 additions and 15 deletions Side-by-side Diff

include/linux/memcontrol.h
... ... @@ -144,7 +144,8 @@
144 144 }
145 145  
146 146 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
147   - gfp_t gfp_mask);
  147 + gfp_t gfp_mask,
  148 + unsigned long *total_scanned);
148 149 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
149 150  
150 151 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
... ... @@ -338,7 +339,8 @@
338 339  
339 340 static inline
340 341 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
341   - gfp_t gfp_mask)
  342 + gfp_t gfp_mask,
  343 + unsigned long *total_scanned)
342 344 {
343 345 return 0;
344 346 }
include/linux/swap.h
... ... @@ -257,7 +257,8 @@
257 257 extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
258 258 gfp_t gfp_mask, bool noswap,
259 259 unsigned int swappiness,
260   - struct zone *zone);
  260 + struct zone *zone,
  261 + unsigned long *nr_scanned);
261 262 extern int __isolate_lru_page(struct page *page, int mode, int file);
262 263 extern unsigned long shrink_all_memory(unsigned long nr_pages);
263 264 extern int vm_swappiness;
... ... @@ -1433,7 +1433,8 @@
1433 1433 static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1434 1434 struct zone *zone,
1435 1435 gfp_t gfp_mask,
1436   - unsigned long reclaim_options)
  1436 + unsigned long reclaim_options,
  1437 + unsigned long *total_scanned)
1437 1438 {
1438 1439 struct mem_cgroup *victim;
1439 1440 int ret, total = 0;
... ... @@ -1442,6 +1443,7 @@
1442 1443 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1443 1444 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1444 1445 unsigned long excess;
  1446 + unsigned long nr_scanned;
1445 1447  
1446 1448 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1447 1449  
1448 1450  
... ... @@ -1484,10 +1486,12 @@
1484 1486 continue;
1485 1487 }
1486 1488 /* we use swappiness of local cgroup */
1487   - if (check_soft)
  1489 + if (check_soft) {
1488 1490 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1489   - noswap, get_swappiness(victim), zone);
1490   - else
  1491 + noswap, get_swappiness(victim), zone,
  1492 + &nr_scanned);
  1493 + *total_scanned += nr_scanned;
  1494 + } else
1491 1495 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1492 1496 noswap, get_swappiness(victim));
1493 1497 css_put(&victim->css);
... ... @@ -1928,7 +1932,7 @@
1928 1932 return CHARGE_WOULDBLOCK;
1929 1933  
1930 1934 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1931   - gfp_mask, flags);
  1935 + gfp_mask, flags, NULL);
1932 1936 if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
1933 1937 return CHARGE_RETRY;
1934 1938 /*
... ... @@ -3211,7 +3215,8 @@
3211 3215 break;
3212 3216  
3213 3217 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
3214   - MEM_CGROUP_RECLAIM_SHRINK);
  3218 + MEM_CGROUP_RECLAIM_SHRINK,
  3219 + NULL);
3215 3220 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
3216 3221 /* Usage is reduced ? */
3217 3222 if (curusage >= oldusage)
... ... @@ -3271,7 +3276,8 @@
3271 3276  
3272 3277 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
3273 3278 MEM_CGROUP_RECLAIM_NOSWAP |
3274   - MEM_CGROUP_RECLAIM_SHRINK);
  3279 + MEM_CGROUP_RECLAIM_SHRINK,
  3280 + NULL);
3275 3281 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
3276 3282 /* Usage is reduced ? */
3277 3283 if (curusage >= oldusage)
... ... @@ -3285,7 +3291,8 @@
3285 3291 }
3286 3292  
3287 3293 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3288   - gfp_t gfp_mask)
  3294 + gfp_t gfp_mask,
  3295 + unsigned long *total_scanned)
3289 3296 {
3290 3297 unsigned long nr_reclaimed = 0;
3291 3298 struct mem_cgroup_per_zone *mz, *next_mz = NULL;
... ... @@ -3293,6 +3300,7 @@
3293 3300 int loop = 0;
3294 3301 struct mem_cgroup_tree_per_zone *mctz;
3295 3302 unsigned long long excess;
  3303 + unsigned long nr_scanned;
3296 3304  
3297 3305 if (order > 0)
3298 3306 return 0;
3299 3307  
3300 3308  
... ... @@ -3311,10 +3319,13 @@
3311 3319 if (!mz)
3312 3320 break;
3313 3321  
  3322 + nr_scanned = 0;
3314 3323 reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
3315 3324 gfp_mask,
3316   - MEM_CGROUP_RECLAIM_SOFT);
  3325 + MEM_CGROUP_RECLAIM_SOFT,
  3326 + &nr_scanned);
3317 3327 nr_reclaimed += reclaimed;
  3328 + *total_scanned += nr_scanned;
3318 3329 spin_lock(&mctz->lock);
3319 3330  
3320 3331 /*
... ... @@ -2171,9 +2171,11 @@
2171 2171 unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2172 2172 gfp_t gfp_mask, bool noswap,
2173 2173 unsigned int swappiness,
2174   - struct zone *zone)
  2174 + struct zone *zone,
  2175 + unsigned long *nr_scanned)
2175 2176 {
2176 2177 struct scan_control sc = {
  2178 + .nr_scanned = 0,
2177 2179 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2178 2180 .may_writepage = !laptop_mode,
2179 2181 .may_unmap = 1,
... ... @@ -2182,6 +2184,7 @@
2182 2184 .order = 0,
2183 2185 .mem_cgroup = mem,
2184 2186 };
  2187 +
2185 2188 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2186 2189 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
2187 2190  
... ... @@ -2200,6 +2203,7 @@
2200 2203  
2201 2204 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2202 2205  
  2206 + *nr_scanned = sc.nr_scanned;
2203 2207 return sc.nr_reclaimed;
2204 2208 }
2205 2209  
... ... @@ -2347,6 +2351,8 @@
2347 2351 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2348 2352 unsigned long total_scanned;
2349 2353 struct reclaim_state *reclaim_state = current->reclaim_state;
  2354 + unsigned long nr_soft_reclaimed;
  2355 + unsigned long nr_soft_scanned;
2350 2356 struct scan_control sc = {
2351 2357 .gfp_mask = GFP_KERNEL,
2352 2358 .may_unmap = 1,
2353 2359  
2354 2360  
... ... @@ -2439,11 +2445,15 @@
2439 2445  
2440 2446 sc.nr_scanned = 0;
2441 2447  
  2448 + nr_soft_scanned = 0;
2442 2449 /*
2443 2450 * Call soft limit reclaim before calling shrink_zone.
2444   - * For now we ignore the return value
2445 2451 */
2446   - mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
  2452 + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
  2453 + order, sc.gfp_mask,
  2454 + &nr_soft_scanned);
  2455 + sc.nr_reclaimed += nr_soft_reclaimed;
  2456 + total_scanned += nr_soft_scanned;
2447 2457  
2448 2458 /*
2449 2459 * We put equal pressure on every zone, unless