Commit 744ed1442757767ffede5008bb13e0805085902e

Authored by Shaohua Li
Committed by Linus Torvalds
1 parent d8505dee1a

mm: batch activate_page() to reduce lock contention

The zone->lru_lock is heavily contented in workload where activate_page()
is frequently used.  We could do batch activate_page() to reduce the lock
contention.  The batched pages will be added into zone list when the pool
is full or page reclaim is trying to drain them.

For example, in a 4 socket 64 CPU system, create a sparse file and 64
processes, processes shared map to the file.  Each process read access the
whole file and then exit.  The process exit will do unmap_vmas() and cause
a lot of activate_page() call.  In such workload, we saw about 58% total
time reduction with below patch.  Other workloads with a lot of
activate_page also benefits a lot too.

I tested some microbenchmarks:
case-anon-cow-rand-mt		0.58%
case-anon-cow-rand		-3.30%
case-anon-cow-seq-mt		-0.51%
case-anon-cow-seq		-5.68%
case-anon-r-rand-mt		0.23%
case-anon-r-rand		0.81%
case-anon-r-seq-mt		-0.71%
case-anon-r-seq			-1.99%
case-anon-rx-rand-mt		2.11%
case-anon-rx-seq-mt		3.46%
case-anon-w-rand-mt		-0.03%
case-anon-w-rand		-0.50%
case-anon-w-seq-mt		-1.08%
case-anon-w-seq			-0.12%
case-anon-wx-rand-mt		-5.02%
case-anon-wx-seq-mt		-1.43%
case-fork			1.65%
case-fork-sleep			-0.07%
case-fork-withmem		1.39%
case-hugetlb			-0.59%
case-lru-file-mmap-read-mt	-0.54%
case-lru-file-mmap-read		0.61%
case-lru-file-mmap-read-rand	-2.24%
case-lru-file-readonce		-0.64%
case-lru-file-readtwice		-11.69%
case-lru-memcg			-1.35%
case-mmap-pread-rand-mt		1.88%
case-mmap-pread-rand		-15.26%
case-mmap-pread-seq-mt		0.89%
case-mmap-pread-seq		-69.72%
case-mmap-xread-rand-mt		0.71%
case-mmap-xread-seq-mt		0.38%

The most significent are:
case-lru-file-readtwice		-11.69%
case-mmap-pread-rand		-15.26%
case-mmap-pread-seq		-69.72%

which use activate_page a lot.  others are basically variations because
each run has slightly difference.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 92 additions and 13 deletions Side-by-side Diff

... ... @@ -39,6 +39,15 @@
39 39  
40 40 extern unsigned long highest_memmap_pfn;
41 41  
  42 +#ifdef CONFIG_SMP
  43 +extern int putback_active_lru_page(struct zone *zone, struct page *page);
  44 +#else
  45 +static inline int putback_active_lru_page(struct zone *zone, struct page *page)
  46 +{
  47 + return 0;
  48 +}
  49 +#endif
  50 +
42 51 /*
43 52 * in mm/vmscan.c:
44 53 */
... ... @@ -271,27 +271,94 @@
271 271 }
272 272  
273 273 /*
274   - * FIXME: speed this up?
  274 + * A page will go to active list either by activate_page or putback_lru_page.
  275 + * In the activate_page case, the page hasn't active bit set. The page might
  276 + * not in LRU list because it's isolated before it gets a chance to be moved to
  277 + * active list. The window is small because pagevec just stores several pages.
  278 + * For such case, we do nothing for such page.
  279 + * In the putback_lru_page case, the page isn't in lru list but has active
  280 + * bit set
275 281 */
276   -void activate_page(struct page *page)
  282 +static void __activate_page(struct page *page, void *arg)
277 283 {
278 284 struct zone *zone = page_zone(page);
  285 + int file = page_is_file_cache(page);
  286 + int lru = page_lru_base_type(page);
  287 + bool putback = !PageLRU(page);
279 288  
280   - spin_lock_irq(&zone->lru_lock);
281   - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
282   - int file = page_is_file_cache(page);
283   - int lru = page_lru_base_type(page);
  289 + /* The page is isolated before it's moved to active list */
  290 + if (!PageLRU(page) && !PageActive(page))
  291 + return;
  292 + if ((PageLRU(page) && PageActive(page)) || PageUnevictable(page))
  293 + return;
  294 +
  295 + if (!putback)
284 296 del_page_from_lru_list(zone, page, lru);
  297 + else
  298 + SetPageLRU(page);
285 299  
286   - SetPageActive(page);
287   - lru += LRU_ACTIVE;
288   - add_page_to_lru_list(zone, page, lru);
289   - __count_vm_event(PGACTIVATE);
  300 + SetPageActive(page);
  301 + lru += LRU_ACTIVE;
  302 + add_page_to_lru_list(zone, page, lru);
290 303  
291   - update_page_reclaim_stat(zone, page, file, 1);
  304 + if (putback)
  305 + return;
  306 + __count_vm_event(PGACTIVATE);
  307 + update_page_reclaim_stat(zone, page, file, 1);
  308 +}
  309 +
  310 +#ifdef CONFIG_SMP
  311 +static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  312 +
  313 +static void activate_page_drain(int cpu)
  314 +{
  315 + struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
  316 +
  317 + if (pagevec_count(pvec))
  318 + pagevec_lru_move_fn(pvec, __activate_page, NULL);
  319 +}
  320 +
  321 +void activate_page(struct page *page)
  322 +{
  323 + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  324 + struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  325 +
  326 + page_cache_get(page);
  327 + if (!pagevec_add(pvec, page))
  328 + pagevec_lru_move_fn(pvec, __activate_page, NULL);
  329 + put_cpu_var(activate_page_pvecs);
292 330 }
  331 +}
  332 +
  333 +/* Caller should hold zone->lru_lock */
  334 +int putback_active_lru_page(struct zone *zone, struct page *page)
  335 +{
  336 + struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  337 +
  338 + if (!pagevec_add(pvec, page)) {
  339 + spin_unlock_irq(&zone->lru_lock);
  340 + pagevec_lru_move_fn(pvec, __activate_page, NULL);
  341 + spin_lock_irq(&zone->lru_lock);
  342 + }
  343 + put_cpu_var(activate_page_pvecs);
  344 + return 1;
  345 +}
  346 +
  347 +#else
  348 +static inline void activate_page_drain(int cpu)
  349 +{
  350 +}
  351 +
  352 +void activate_page(struct page *page)
  353 +{
  354 + struct zone *zone = page_zone(page);
  355 +
  356 + spin_lock_irq(&zone->lru_lock);
  357 + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page))
  358 + __activate_page(page, NULL);
293 359 spin_unlock_irq(&zone->lru_lock);
294 360 }
  361 +#endif
295 362  
296 363 /*
297 364 * Mark a page as having seen activity.
... ... @@ -390,6 +457,7 @@
390 457 pagevec_move_tail(pvec);
391 458 local_irq_restore(flags);
392 459 }
  460 + activate_page_drain(cpu);
393 461 }
394 462  
395 463 void lru_add_drain(void)
... ... @@ -1271,14 +1271,16 @@
1271 1271 spin_lock_irq(&zone->lru_lock);
1272 1272 continue;
1273 1273 }
1274   - SetPageLRU(page);
1275 1274 lru = page_lru(page);
1276   - add_page_to_lru_list(zone, page, lru);
1277 1275 if (is_active_lru(lru)) {
1278 1276 int file = is_file_lru(lru);
1279 1277 int numpages = hpage_nr_pages(page);
1280 1278 reclaim_stat->recent_rotated[file] += numpages;
  1279 + if (putback_active_lru_page(zone, page))
  1280 + continue;
1281 1281 }
  1282 + SetPageLRU(page);
  1283 + add_page_to_lru_list(zone, page, lru);
1282 1284 if (!pagevec_add(&pvec, page)) {
1283 1285 spin_unlock_irq(&zone->lru_lock);
1284 1286 __pagevec_release(&pvec);