Commit f86296317434b21585e229f6c49a33cb9ebab4d3
Committed by
Linus Torvalds
1 parent
1a8670a29b
Exists in
master
and in
39 other branches
mm: do batched scans for mem_cgroup
For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in which case shrink_list() _still_ calls isolate_pages() with the much larger SWAP_CLUSTER_MAX. It effectively scales up the inactive list scan rate by up to 32 times. For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4. So when shrink_zone() expects to scan 4 pages in the active/inactive list, the active list will be scanned 4 pages, while the inactive list will be (over) scanned SWAP_CLUSTER_MAX=32 pages in effect. And that could break the balance between the two lists. It can further impact the scan of anon active list, due to the anon active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone(): inactive anon list over scanned => inactive_anon_is_low() == TRUE => shrink_active_list() => active anon list over scanned So the end result may be - anon inactive => over scanned - anon active => over scanned (maybe not as much) - file inactive => over scanned - file active => under scanned (relatively) The accesses to nr_saved_scan are not lock protected and so not 100% accurate, however we can tolerate small errors and the resulted small imbalanced scan rates between zones. Cc: Rik van Riel <riel@redhat.com> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 17 additions and 11 deletions Side-by-side Diff
include/linux/mmzone.h
... | ... | @@ -273,6 +273,11 @@ |
273 | 273 | */ |
274 | 274 | unsigned long recent_rotated[2]; |
275 | 275 | unsigned long recent_scanned[2]; |
276 | + | |
277 | + /* | |
278 | + * accumulated for batching | |
279 | + */ | |
280 | + unsigned long nr_saved_scan[NR_LRU_LISTS]; | |
276 | 281 | }; |
277 | 282 | |
278 | 283 | struct zone { |
... | ... | @@ -327,7 +332,6 @@ |
327 | 332 | spinlock_t lru_lock; |
328 | 333 | struct zone_lru { |
329 | 334 | struct list_head list; |
330 | - unsigned long nr_saved_scan; /* accumulated for batching */ | |
331 | 335 | } lru[NR_LRU_LISTS]; |
332 | 336 | |
333 | 337 | struct zone_reclaim_stat reclaim_stat; |
mm/page_alloc.c
... | ... | @@ -3809,7 +3809,7 @@ |
3809 | 3809 | zone_pcp_init(zone); |
3810 | 3810 | for_each_lru(l) { |
3811 | 3811 | INIT_LIST_HEAD(&zone->lru[l].list); |
3812 | - zone->lru[l].nr_saved_scan = 0; | |
3812 | + zone->reclaim_stat.nr_saved_scan[l] = 0; | |
3813 | 3813 | } |
3814 | 3814 | zone->reclaim_stat.recent_rotated[0] = 0; |
3815 | 3815 | zone->reclaim_stat.recent_rotated[1] = 0; |
mm/vmscan.c
... | ... | @@ -1586,6 +1586,7 @@ |
1586 | 1586 | enum lru_list l; |
1587 | 1587 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
1588 | 1588 | unsigned long swap_cluster_max = sc->swap_cluster_max; |
1589 | + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | |
1589 | 1590 | int noswap = 0; |
1590 | 1591 | |
1591 | 1592 | /* If we have no swap space, do not bother scanning anon pages. */ |
... | ... | @@ -1605,12 +1606,9 @@ |
1605 | 1606 | scan >>= priority; |
1606 | 1607 | scan = (scan * percent[file]) / 100; |
1607 | 1608 | } |
1608 | - if (scanning_global_lru(sc)) | |
1609 | - nr[l] = nr_scan_try_batch(scan, | |
1610 | - &zone->lru[l].nr_saved_scan, | |
1611 | - swap_cluster_max); | |
1612 | - else | |
1613 | - nr[l] = scan; | |
1609 | + nr[l] = nr_scan_try_batch(scan, | |
1610 | + &reclaim_stat->nr_saved_scan[l], | |
1611 | + swap_cluster_max); | |
1614 | 1612 | } |
1615 | 1613 | |
1616 | 1614 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
... | ... | @@ -2220,6 +2218,7 @@ |
2220 | 2218 | { |
2221 | 2219 | struct zone *zone; |
2222 | 2220 | unsigned long nr_reclaimed = 0; |
2221 | + struct zone_reclaim_stat *reclaim_stat; | |
2223 | 2222 | |
2224 | 2223 | for_each_populated_zone(zone) { |
2225 | 2224 | enum lru_list l; |
2226 | 2225 | |
... | ... | @@ -2236,11 +2235,14 @@ |
2236 | 2235 | l == LRU_ACTIVE_FILE)) |
2237 | 2236 | continue; |
2238 | 2237 | |
2239 | - zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; | |
2240 | - if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { | |
2238 | + reclaim_stat = get_reclaim_stat(zone, sc); | |
2239 | + reclaim_stat->nr_saved_scan[l] += | |
2240 | + (lru_pages >> prio) + 1; | |
2241 | + if (reclaim_stat->nr_saved_scan[l] | |
2242 | + >= nr_pages || pass > 3) { | |
2241 | 2243 | unsigned long nr_to_scan; |
2242 | 2244 | |
2243 | - zone->lru[l].nr_saved_scan = 0; | |
2245 | + reclaim_stat->nr_saved_scan[l] = 0; | |
2244 | 2246 | nr_to_scan = min(nr_pages, lru_pages); |
2245 | 2247 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
2246 | 2248 | sc, prio); |