Commit c772be939e078afd2505ede7d596a30f8f61de95
Committed by
Linus Torvalds
1 parent
a7885eb8ad
Exists in
master
and in
4 other branches
memcg: fix calculation of active_ratio
Currently, inactive_ratio of memcg is calculated at setting limit. because page_alloc.c does so and current implementation is straightforward porting. However, memcg introduced hierarchy feature recently. In hierarchy restriction, memory limit is not only decided memory.limit_in_bytes of current cgroup, but also parent limit and sibling memory usage. Then, The optimal inactive_ratio is changed frequently. So, everytime calculation is better. Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 34 additions and 37 deletions Side-by-side Diff
include/linux/memcontrol.h
... | ... | @@ -97,8 +97,7 @@ |
97 | 97 | int priority); |
98 | 98 | extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, |
99 | 99 | int priority); |
100 | -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, | |
101 | - struct zone *zone); | |
100 | +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); | |
102 | 101 | unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, |
103 | 102 | struct zone *zone, |
104 | 103 | enum lru_list lru); |
... | ... | @@ -252,7 +251,7 @@ |
252 | 251 | } |
253 | 252 | |
254 | 253 | static inline int |
255 | -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | |
254 | +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) | |
256 | 255 | { |
257 | 256 | return 1; |
258 | 257 | } |
mm/memcontrol.c
... | ... | @@ -166,9 +166,6 @@ |
166 | 166 | |
167 | 167 | unsigned int swappiness; |
168 | 168 | |
169 | - | |
170 | - unsigned int inactive_ratio; | |
171 | - | |
172 | 169 | /* |
173 | 170 | * statistics. This must be placed at the end of memcg. |
174 | 171 | */ |
175 | 172 | |
176 | 173 | |
... | ... | @@ -432,15 +429,43 @@ |
432 | 429 | spin_unlock(&mem->reclaim_param_lock); |
433 | 430 | } |
434 | 431 | |
435 | -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | |
432 | +static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) | |
436 | 433 | { |
437 | 434 | unsigned long active; |
438 | 435 | unsigned long inactive; |
436 | + unsigned long gb; | |
437 | + unsigned long inactive_ratio; | |
439 | 438 | |
440 | 439 | inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON); |
441 | 440 | active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON); |
442 | 441 | |
443 | - if (inactive * memcg->inactive_ratio < active) | |
442 | + gb = (inactive + active) >> (30 - PAGE_SHIFT); | |
443 | + if (gb) | |
444 | + inactive_ratio = int_sqrt(10 * gb); | |
445 | + else | |
446 | + inactive_ratio = 1; | |
447 | + | |
448 | + if (present_pages) { | |
449 | + present_pages[0] = inactive; | |
450 | + present_pages[1] = active; | |
451 | + } | |
452 | + | |
453 | + return inactive_ratio; | |
454 | +} | |
455 | + | |
456 | +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) | |
457 | +{ | |
458 | + unsigned long active; | |
459 | + unsigned long inactive; | |
460 | + unsigned long present_pages[2]; | |
461 | + unsigned long inactive_ratio; | |
462 | + | |
463 | + inactive_ratio = calc_inactive_ratio(memcg, present_pages); | |
464 | + | |
465 | + inactive = present_pages[0]; | |
466 | + active = present_pages[1]; | |
467 | + | |
468 | + if (inactive * inactive_ratio < active) | |
444 | 469 | return 1; |
445 | 470 | |
446 | 471 | return 0; |
... | ... | @@ -1432,29 +1457,6 @@ |
1432 | 1457 | return 0; |
1433 | 1458 | } |
1434 | 1459 | |
1435 | -/* | |
1436 | - * The inactive anon list should be small enough that the VM never has to | |
1437 | - * do too much work, but large enough that each inactive page has a chance | |
1438 | - * to be referenced again before it is swapped out. | |
1439 | - * | |
1440 | - * this calculation is straightforward porting from | |
1441 | - * page_alloc.c::setup_per_zone_inactive_ratio(). | |
1442 | - * it describe more detail. | |
1443 | - */ | |
1444 | -static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg) | |
1445 | -{ | |
1446 | - unsigned int gb, ratio; | |
1447 | - | |
1448 | - gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30; | |
1449 | - if (gb) | |
1450 | - ratio = int_sqrt(10 * gb); | |
1451 | - else | |
1452 | - ratio = 1; | |
1453 | - | |
1454 | - memcg->inactive_ratio = ratio; | |
1455 | - | |
1456 | -} | |
1457 | - | |
1458 | 1460 | static DEFINE_MUTEX(set_limit_mutex); |
1459 | 1461 | |
1460 | 1462 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, |
... | ... | @@ -1496,9 +1498,6 @@ |
1496 | 1498 | if (!progress) retry_count--; |
1497 | 1499 | } |
1498 | 1500 | |
1499 | - if (!ret) | |
1500 | - mem_cgroup_set_inactive_ratio(memcg); | |
1501 | - | |
1502 | 1501 | return ret; |
1503 | 1502 | } |
1504 | 1503 | |
... | ... | @@ -1858,7 +1857,7 @@ |
1858 | 1857 | } |
1859 | 1858 | |
1860 | 1859 | #ifdef CONFIG_DEBUG_VM |
1861 | - cb->fill(cb, "inactive_ratio", mem_cont->inactive_ratio); | |
1860 | + cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); | |
1862 | 1861 | |
1863 | 1862 | { |
1864 | 1863 | int nid, zid; |
... | ... | @@ -2150,7 +2149,6 @@ |
2150 | 2149 | res_counter_init(&mem->res, NULL); |
2151 | 2150 | res_counter_init(&mem->memsw, NULL); |
2152 | 2151 | } |
2153 | - mem_cgroup_set_inactive_ratio(mem); | |
2154 | 2152 | mem->last_scanned_child = NULL; |
2155 | 2153 | spin_lock_init(&mem->reclaim_param_lock); |
2156 | 2154 |
mm/vmscan.c
... | ... | @@ -1340,7 +1340,7 @@ |
1340 | 1340 | if (scanning_global_lru(sc)) |
1341 | 1341 | low = inactive_anon_is_low_global(zone); |
1342 | 1342 | else |
1343 | - low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); | |
1343 | + low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); | |
1344 | 1344 | return low; |
1345 | 1345 | } |
1346 | 1346 |