Commit 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9

Authored by KOSAKI Motohiro
Committed by Linus Torvalds
1 parent 549927620b

memcg: add inactive_anon_is_low()

The inactive_anon_is_low() is key component of active/inactive anon
balancing on reclaim.  However current inactive_anon_is_low() function
only consider global reclaim.

Therefore, we need following ugly scan_global_lru() condition.

	if (lru == LRU_ACTIVE_ANON &&
	    (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
		shrink_active_list(nr_to_scan, zone, sc, priority, file);
		return 0;

it cause that memcg reclaim always deactivate pages when shrink_list() is
called.  To make mem_cgroup_inactive_anon_is_low() improve active/inactive
anon balancing of memcgroup.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Pekka Enberg" <penberg@cs.helsinki.fi>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 77 additions and 15 deletions Side-by-side Diff

include/linux/memcontrol.h
... ... @@ -100,6 +100,8 @@
100 100  
101 101 extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
102 102 int priority, enum lru_list lru);
  103 +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
  104 + struct zone *zone);
103 105  
104 106 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
105 107 extern int do_swap_account;
... ... @@ -251,6 +253,13 @@
251 253 {
252 254 return false;
253 255 }
  256 +
  257 +static inline int
  258 +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
  259 +{
  260 + return 1;
  261 +}
  262 +
254 263 #endif /* CONFIG_CGROUP_MEM_CONT */
255 264  
256 265 #endif /* _LINUX_MEMCONTROL_H */
... ... @@ -156,6 +156,9 @@
156 156 unsigned long last_oom_jiffies;
157 157 int obsolete;
158 158 atomic_t refcnt;
  159 +
  160 + unsigned int inactive_ratio;
  161 +
159 162 /*
160 163 * statistics. This must be placed at the end of memcg.
161 164 */
... ... @@ -431,6 +434,20 @@
431 434 return (nr_pages >> priority);
432 435 }
433 436  
  437 +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
  438 +{
  439 + unsigned long active;
  440 + unsigned long inactive;
  441 +
  442 + inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON);
  443 + active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON);
  444 +
  445 + if (inactive * memcg->inactive_ratio < active)
  446 + return 1;
  447 +
  448 + return 0;
  449 +}
  450 +
434 451 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
435 452 struct list_head *dst,
436 453 unsigned long *scanned, int order,
... ... @@ -1360,6 +1377,29 @@
1360 1377 return 0;
1361 1378 }
1362 1379  
  1380 +/*
  1381 + * The inactive anon list should be small enough that the VM never has to
  1382 + * do too much work, but large enough that each inactive page has a chance
  1383 + * to be referenced again before it is swapped out.
  1384 + *
  1385 + * this calculation is straightforward porting from
  1386 + * page_alloc.c::setup_per_zone_inactive_ratio().
  1387 + * it describe more detail.
  1388 + */
  1389 +static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg)
  1390 +{
  1391 + unsigned int gb, ratio;
  1392 +
  1393 + gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30;
  1394 + if (gb)
  1395 + ratio = int_sqrt(10 * gb);
  1396 + else
  1397 + ratio = 1;
  1398 +
  1399 + memcg->inactive_ratio = ratio;
  1400 +
  1401 +}
  1402 +
1363 1403 static DEFINE_MUTEX(set_limit_mutex);
1364 1404  
1365 1405 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
... ... @@ -1398,6 +1438,10 @@
1398 1438 GFP_KERNEL, false);
1399 1439 if (!progress) retry_count--;
1400 1440 }
  1441 +
  1442 + if (!ret)
  1443 + mem_cgroup_set_inactive_ratio(memcg);
  1444 +
1401 1445 return ret;
1402 1446 }
1403 1447  
... ... @@ -1982,7 +2026,7 @@
1982 2026 res_counter_init(&mem->res, NULL);
1983 2027 res_counter_init(&mem->memsw, NULL);
1984 2028 }
1985   -
  2029 + mem_cgroup_set_inactive_ratio(mem);
1986 2030 mem->last_scanned_child = NULL;
1987 2031  
1988 2032 return &mem->css;
... ... @@ -1310,14 +1310,7 @@
1310 1310 pagevec_release(&pvec);
1311 1311 }
1312 1312  
1313   -/**
1314   - * inactive_anon_is_low - check if anonymous pages need to be deactivated
1315   - * @zone: zone to check
1316   - *
1317   - * Returns true if the zone does not have enough inactive anon pages,
1318   - * meaning some active anon pages need to be deactivated.
1319   - */
1320   -static int inactive_anon_is_low(struct zone *zone)
  1313 +static int inactive_anon_is_low_global(struct zone *zone)
1321 1314 {
1322 1315 unsigned long active, inactive;
1323 1316  
... ... @@ -1330,6 +1323,25 @@
1330 1323 return 0;
1331 1324 }
1332 1325  
  1326 +/**
  1327 + * inactive_anon_is_low - check if anonymous pages need to be deactivated
  1328 + * @zone: zone to check
  1329 + * @sc: scan control of this context
  1330 + *
  1331 + * Returns true if the zone does not have enough inactive anon pages,
  1332 + * meaning some active anon pages need to be deactivated.
  1333 + */
  1334 +static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
  1335 +{
  1336 + int low;
  1337 +
  1338 + if (scan_global_lru(sc))
  1339 + low = inactive_anon_is_low_global(zone);
  1340 + else
  1341 + low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
  1342 + return low;
  1343 +}
  1344 +
1333 1345 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1334 1346 struct zone *zone, struct scan_control *sc, int priority)
1335 1347 {
... ... @@ -1340,8 +1352,7 @@
1340 1352 return 0;
1341 1353 }
1342 1354  
1343   - if (lru == LRU_ACTIVE_ANON &&
1344   - (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
  1355 + if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
1345 1356 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1346 1357 return 0;
1347 1358 }
1348 1359  
... ... @@ -1509,10 +1520,8 @@
1509 1520 * Even if we did not try to evict anon pages at all, we want to
1510 1521 * rebalance the anon lru active/inactive ratio.
1511 1522 */
1512   - if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
  1523 + if (inactive_anon_is_low(zone, sc))
1513 1524 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1514   - else if (!scan_global_lru(sc))
1515   - shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1516 1525  
1517 1526 throttle_vm_writeout(sc->gfp_mask);
1518 1527 }
... ... @@ -1807,7 +1816,7 @@
1807 1816 * Do some background aging of the anon list, to give
1808 1817 * pages a chance to be referenced before reclaiming.
1809 1818 */
1810   - if (inactive_anon_is_low(zone))
  1819 + if (inactive_anon_is_low(zone, &sc))
1811 1820 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1812 1821 &sc, priority, 0);
1813 1822