Commit 744ed1442757767ffede5008bb13e0805085902e
Committed by
Linus Torvalds
1 parent
d8505dee1a
Exists in
master
and in
4 other branches
mm: batch activate_page() to reduce lock contention
The zone->lru_lock is heavily contented in workload where activate_page() is frequently used. We could do batch activate_page() to reduce the lock contention. The batched pages will be added into zone list when the pool is full or page reclaim is trying to drain them. For example, in a 4 socket 64 CPU system, create a sparse file and 64 processes, processes shared map to the file. Each process read access the whole file and then exit. The process exit will do unmap_vmas() and cause a lot of activate_page() call. In such workload, we saw about 58% total time reduction with below patch. Other workloads with a lot of activate_page also benefits a lot too. I tested some microbenchmarks: case-anon-cow-rand-mt 0.58% case-anon-cow-rand -3.30% case-anon-cow-seq-mt -0.51% case-anon-cow-seq -5.68% case-anon-r-rand-mt 0.23% case-anon-r-rand 0.81% case-anon-r-seq-mt -0.71% case-anon-r-seq -1.99% case-anon-rx-rand-mt 2.11% case-anon-rx-seq-mt 3.46% case-anon-w-rand-mt -0.03% case-anon-w-rand -0.50% case-anon-w-seq-mt -1.08% case-anon-w-seq -0.12% case-anon-wx-rand-mt -5.02% case-anon-wx-seq-mt -1.43% case-fork 1.65% case-fork-sleep -0.07% case-fork-withmem 1.39% case-hugetlb -0.59% case-lru-file-mmap-read-mt -0.54% case-lru-file-mmap-read 0.61% case-lru-file-mmap-read-rand -2.24% case-lru-file-readonce -0.64% case-lru-file-readtwice -11.69% case-lru-memcg -1.35% case-mmap-pread-rand-mt 1.88% case-mmap-pread-rand -15.26% case-mmap-pread-seq-mt 0.89% case-mmap-pread-seq -69.72% case-mmap-xread-rand-mt 0.71% case-mmap-xread-seq-mt 0.38% The most significent are: case-lru-file-readtwice -11.69% case-mmap-pread-rand -15.26% case-mmap-pread-seq -69.72% which use activate_page a lot. others are basically variations because each run has slightly difference. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Shaohua Li <shaohua.li@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 92 additions and 13 deletions Side-by-side Diff
mm/internal.h
... | ... | @@ -39,6 +39,15 @@ |
39 | 39 | |
40 | 40 | extern unsigned long highest_memmap_pfn; |
41 | 41 | |
42 | +#ifdef CONFIG_SMP | |
43 | +extern int putback_active_lru_page(struct zone *zone, struct page *page); | |
44 | +#else | |
45 | +static inline int putback_active_lru_page(struct zone *zone, struct page *page) | |
46 | +{ | |
47 | + return 0; | |
48 | +} | |
49 | +#endif | |
50 | + | |
42 | 51 | /* |
43 | 52 | * in mm/vmscan.c: |
44 | 53 | */ |
mm/swap.c
... | ... | @@ -271,27 +271,94 @@ |
271 | 271 | } |
272 | 272 | |
273 | 273 | /* |
274 | - * FIXME: speed this up? | |
274 | + * A page will go to active list either by activate_page or putback_lru_page. | |
275 | + * In the activate_page case, the page hasn't active bit set. The page might | |
276 | + * not in LRU list because it's isolated before it gets a chance to be moved to | |
277 | + * active list. The window is small because pagevec just stores several pages. | |
278 | + * For such case, we do nothing for such page. | |
279 | + * In the putback_lru_page case, the page isn't in lru list but has active | |
280 | + * bit set | |
275 | 281 | */ |
276 | -void activate_page(struct page *page) | |
282 | +static void __activate_page(struct page *page, void *arg) | |
277 | 283 | { |
278 | 284 | struct zone *zone = page_zone(page); |
285 | + int file = page_is_file_cache(page); | |
286 | + int lru = page_lru_base_type(page); | |
287 | + bool putback = !PageLRU(page); | |
279 | 288 | |
280 | - spin_lock_irq(&zone->lru_lock); | |
281 | - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
282 | - int file = page_is_file_cache(page); | |
283 | - int lru = page_lru_base_type(page); | |
289 | + /* The page is isolated before it's moved to active list */ | |
290 | + if (!PageLRU(page) && !PageActive(page)) | |
291 | + return; | |
292 | + if ((PageLRU(page) && PageActive(page)) || PageUnevictable(page)) | |
293 | + return; | |
294 | + | |
295 | + if (!putback) | |
284 | 296 | del_page_from_lru_list(zone, page, lru); |
297 | + else | |
298 | + SetPageLRU(page); | |
285 | 299 | |
286 | - SetPageActive(page); | |
287 | - lru += LRU_ACTIVE; | |
288 | - add_page_to_lru_list(zone, page, lru); | |
289 | - __count_vm_event(PGACTIVATE); | |
300 | + SetPageActive(page); | |
301 | + lru += LRU_ACTIVE; | |
302 | + add_page_to_lru_list(zone, page, lru); | |
290 | 303 | |
291 | - update_page_reclaim_stat(zone, page, file, 1); | |
304 | + if (putback) | |
305 | + return; | |
306 | + __count_vm_event(PGACTIVATE); | |
307 | + update_page_reclaim_stat(zone, page, file, 1); | |
308 | +} | |
309 | + | |
310 | +#ifdef CONFIG_SMP | |
311 | +static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); | |
312 | + | |
313 | +static void activate_page_drain(int cpu) | |
314 | +{ | |
315 | + struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); | |
316 | + | |
317 | + if (pagevec_count(pvec)) | |
318 | + pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
319 | +} | |
320 | + | |
321 | +void activate_page(struct page *page) | |
322 | +{ | |
323 | + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
324 | + struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
325 | + | |
326 | + page_cache_get(page); | |
327 | + if (!pagevec_add(pvec, page)) | |
328 | + pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
329 | + put_cpu_var(activate_page_pvecs); | |
292 | 330 | } |
331 | +} | |
332 | + | |
333 | +/* Caller should hold zone->lru_lock */ | |
334 | +int putback_active_lru_page(struct zone *zone, struct page *page) | |
335 | +{ | |
336 | + struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
337 | + | |
338 | + if (!pagevec_add(pvec, page)) { | |
339 | + spin_unlock_irq(&zone->lru_lock); | |
340 | + pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
341 | + spin_lock_irq(&zone->lru_lock); | |
342 | + } | |
343 | + put_cpu_var(activate_page_pvecs); | |
344 | + return 1; | |
345 | +} | |
346 | + | |
347 | +#else | |
348 | +static inline void activate_page_drain(int cpu) | |
349 | +{ | |
350 | +} | |
351 | + | |
352 | +void activate_page(struct page *page) | |
353 | +{ | |
354 | + struct zone *zone = page_zone(page); | |
355 | + | |
356 | + spin_lock_irq(&zone->lru_lock); | |
357 | + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) | |
358 | + __activate_page(page, NULL); | |
293 | 359 | spin_unlock_irq(&zone->lru_lock); |
294 | 360 | } |
361 | +#endif | |
295 | 362 | |
296 | 363 | /* |
297 | 364 | * Mark a page as having seen activity. |
... | ... | @@ -390,6 +457,7 @@ |
390 | 457 | pagevec_move_tail(pvec); |
391 | 458 | local_irq_restore(flags); |
392 | 459 | } |
460 | + activate_page_drain(cpu); | |
393 | 461 | } |
394 | 462 | |
395 | 463 | void lru_add_drain(void) |
mm/vmscan.c
... | ... | @@ -1271,14 +1271,16 @@ |
1271 | 1271 | spin_lock_irq(&zone->lru_lock); |
1272 | 1272 | continue; |
1273 | 1273 | } |
1274 | - SetPageLRU(page); | |
1275 | 1274 | lru = page_lru(page); |
1276 | - add_page_to_lru_list(zone, page, lru); | |
1277 | 1275 | if (is_active_lru(lru)) { |
1278 | 1276 | int file = is_file_lru(lru); |
1279 | 1277 | int numpages = hpage_nr_pages(page); |
1280 | 1278 | reclaim_stat->recent_rotated[file] += numpages; |
1279 | + if (putback_active_lru_page(zone, page)) | |
1280 | + continue; | |
1281 | 1281 | } |
1282 | + SetPageLRU(page); | |
1283 | + add_page_to_lru_list(zone, page, lru); | |
1282 | 1284 | if (!pagevec_add(&pvec, page)) { |
1283 | 1285 | spin_unlock_irq(&zone->lru_lock); |
1284 | 1286 | __pagevec_release(&pvec); |