Commit 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9
Committed by
Linus Torvalds
1 parent
549927620b
Exists in
master
and in
4 other branches
memcg: add inactive_anon_is_low()
The inactive_anon_is_low() is key component of active/inactive anon balancing on reclaim. However current inactive_anon_is_low() function only consider global reclaim. Therefore, we need following ugly scan_global_lru() condition. if (lru == LRU_ACTIVE_ANON && (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; it cause that memcg reclaim always deactivate pages when shrink_list() is called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive anon balancing of memcgroup. Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: "Pekka Enberg" <penberg@cs.helsinki.fi> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Hugh Dickins <hugh@veritas.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 77 additions and 15 deletions Side-by-side Diff
include/linux/memcontrol.h
... | ... | @@ -100,6 +100,8 @@ |
100 | 100 | |
101 | 101 | extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, |
102 | 102 | int priority, enum lru_list lru); |
103 | +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, | |
104 | + struct zone *zone); | |
103 | 105 | |
104 | 106 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
105 | 107 | extern int do_swap_account; |
... | ... | @@ -251,6 +253,13 @@ |
251 | 253 | { |
252 | 254 | return false; |
253 | 255 | } |
256 | + | |
257 | +static inline int | |
258 | +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | |
259 | +{ | |
260 | + return 1; | |
261 | +} | |
262 | + | |
254 | 263 | #endif /* CONFIG_CGROUP_MEM_CONT */ |
255 | 264 | |
256 | 265 | #endif /* _LINUX_MEMCONTROL_H */ |
mm/memcontrol.c
... | ... | @@ -156,6 +156,9 @@ |
156 | 156 | unsigned long last_oom_jiffies; |
157 | 157 | int obsolete; |
158 | 158 | atomic_t refcnt; |
159 | + | |
160 | + unsigned int inactive_ratio; | |
161 | + | |
159 | 162 | /* |
160 | 163 | * statistics. This must be placed at the end of memcg. |
161 | 164 | */ |
... | ... | @@ -431,6 +434,20 @@ |
431 | 434 | return (nr_pages >> priority); |
432 | 435 | } |
433 | 436 | |
437 | +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | |
438 | +{ | |
439 | + unsigned long active; | |
440 | + unsigned long inactive; | |
441 | + | |
442 | + inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON); | |
443 | + active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON); | |
444 | + | |
445 | + if (inactive * memcg->inactive_ratio < active) | |
446 | + return 1; | |
447 | + | |
448 | + return 0; | |
449 | +} | |
450 | + | |
434 | 451 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
435 | 452 | struct list_head *dst, |
436 | 453 | unsigned long *scanned, int order, |
... | ... | @@ -1360,6 +1377,29 @@ |
1360 | 1377 | return 0; |
1361 | 1378 | } |
1362 | 1379 | |
1380 | +/* | |
1381 | + * The inactive anon list should be small enough that the VM never has to | |
1382 | + * do too much work, but large enough that each inactive page has a chance | |
1383 | + * to be referenced again before it is swapped out. | |
1384 | + * | |
1385 | + * this calculation is straightforward porting from | |
1386 | + * page_alloc.c::setup_per_zone_inactive_ratio(). | |
1387 | + * it describe more detail. | |
1388 | + */ | |
1389 | +static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg) | |
1390 | +{ | |
1391 | + unsigned int gb, ratio; | |
1392 | + | |
1393 | + gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30; | |
1394 | + if (gb) | |
1395 | + ratio = int_sqrt(10 * gb); | |
1396 | + else | |
1397 | + ratio = 1; | |
1398 | + | |
1399 | + memcg->inactive_ratio = ratio; | |
1400 | + | |
1401 | +} | |
1402 | + | |
1363 | 1403 | static DEFINE_MUTEX(set_limit_mutex); |
1364 | 1404 | |
1365 | 1405 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, |
... | ... | @@ -1398,6 +1438,10 @@ |
1398 | 1438 | GFP_KERNEL, false); |
1399 | 1439 | if (!progress) retry_count--; |
1400 | 1440 | } |
1441 | + | |
1442 | + if (!ret) | |
1443 | + mem_cgroup_set_inactive_ratio(memcg); | |
1444 | + | |
1401 | 1445 | return ret; |
1402 | 1446 | } |
1403 | 1447 | |
... | ... | @@ -1982,7 +2026,7 @@ |
1982 | 2026 | res_counter_init(&mem->res, NULL); |
1983 | 2027 | res_counter_init(&mem->memsw, NULL); |
1984 | 2028 | } |
1985 | - | |
2029 | + mem_cgroup_set_inactive_ratio(mem); | |
1986 | 2030 | mem->last_scanned_child = NULL; |
1987 | 2031 | |
1988 | 2032 | return &mem->css; |
mm/vmscan.c
... | ... | @@ -1310,14 +1310,7 @@ |
1310 | 1310 | pagevec_release(&pvec); |
1311 | 1311 | } |
1312 | 1312 | |
1313 | -/** | |
1314 | - * inactive_anon_is_low - check if anonymous pages need to be deactivated | |
1315 | - * @zone: zone to check | |
1316 | - * | |
1317 | - * Returns true if the zone does not have enough inactive anon pages, | |
1318 | - * meaning some active anon pages need to be deactivated. | |
1319 | - */ | |
1320 | -static int inactive_anon_is_low(struct zone *zone) | |
1313 | +static int inactive_anon_is_low_global(struct zone *zone) | |
1321 | 1314 | { |
1322 | 1315 | unsigned long active, inactive; |
1323 | 1316 | |
... | ... | @@ -1330,6 +1323,25 @@ |
1330 | 1323 | return 0; |
1331 | 1324 | } |
1332 | 1325 | |
1326 | +/** | |
1327 | + * inactive_anon_is_low - check if anonymous pages need to be deactivated | |
1328 | + * @zone: zone to check | |
1329 | + * @sc: scan control of this context | |
1330 | + * | |
1331 | + * Returns true if the zone does not have enough inactive anon pages, | |
1332 | + * meaning some active anon pages need to be deactivated. | |
1333 | + */ | |
1334 | +static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |
1335 | +{ | |
1336 | + int low; | |
1337 | + | |
1338 | + if (scan_global_lru(sc)) | |
1339 | + low = inactive_anon_is_low_global(zone); | |
1340 | + else | |
1341 | + low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); | |
1342 | + return low; | |
1343 | +} | |
1344 | + | |
1333 | 1345 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1334 | 1346 | struct zone *zone, struct scan_control *sc, int priority) |
1335 | 1347 | { |
... | ... | @@ -1340,8 +1352,7 @@ |
1340 | 1352 | return 0; |
1341 | 1353 | } |
1342 | 1354 | |
1343 | - if (lru == LRU_ACTIVE_ANON && | |
1344 | - (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { | |
1355 | + if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) { | |
1345 | 1356 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1346 | 1357 | return 0; |
1347 | 1358 | } |
1348 | 1359 | |
... | ... | @@ -1509,10 +1520,8 @@ |
1509 | 1520 | * Even if we did not try to evict anon pages at all, we want to |
1510 | 1521 | * rebalance the anon lru active/inactive ratio. |
1511 | 1522 | */ |
1512 | - if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) | |
1523 | + if (inactive_anon_is_low(zone, sc)) | |
1513 | 1524 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1514 | - else if (!scan_global_lru(sc)) | |
1515 | - shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | |
1516 | 1525 | |
1517 | 1526 | throttle_vm_writeout(sc->gfp_mask); |
1518 | 1527 | } |
... | ... | @@ -1807,7 +1816,7 @@ |
1807 | 1816 | * Do some background aging of the anon list, to give |
1808 | 1817 | * pages a chance to be referenced before reclaiming. |
1809 | 1818 | */ |
1810 | - if (inactive_anon_is_low(zone)) | |
1819 | + if (inactive_anon_is_low(zone, &sc)) | |
1811 | 1820 | shrink_active_list(SWAP_CLUSTER_MAX, zone, |
1812 | 1821 | &sc, priority, 0); |
1813 | 1822 |