Commit 7db8889ab05b57200158432755af318fb68854a2

Authored by Rik van Riel
Committed by Linus Torvalds
1 parent ab21588487

mm: have order > 0 compaction start off where it left

Order > 0 compaction stops when enough free pages of the correct page
order have been coalesced.  When doing subsequent higher order
allocations, it is possible for compaction to be invoked many times.

However, the compaction code always starts out looking for things to
compact at the start of the zone, and for free pages to compact things to
at the end of the zone.

This can cause quadratic behaviour, with isolate_freepages starting at the
end of the zone each time, even though previous invocations of the
compaction code already filled up all free memory on that end of the zone.

This can cause isolate_freepages to take enormous amounts of CPU with
certain workloads on larger memory systems.

The obvious solution is to have isolate_freepages remember where it left
off last time, and continue at that point the next time it gets invoked
for an order > 0 compaction.  This could cause compaction to fail if
cc->free_pfn and cc->migrate_pfn are close together initially, in that
case we restart from the end of the zone and try once more.

Forced full (order == -1) compactions are left alone.

[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: s/laste/last/, use 80 cols]
Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Jim Schutt <jaschut@sandia.gov>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 73 additions and 5 deletions Side-by-side Diff

include/linux/mmzone.h
... ... @@ -368,6 +368,10 @@
368 368 */
369 369 spinlock_t lock;
370 370 int all_unreclaimable; /* All pages pinned */
  371 +#if defined CONFIG_COMPACTION || defined CONFIG_CMA
  372 + /* pfn where the last incremental compaction isolated free pages */
  373 + unsigned long compact_cached_free_pfn;
  374 +#endif
371 375 #ifdef CONFIG_MEMORY_HOTPLUG
372 376 /* see spanned/present_pages for more description */
373 377 seqlock_t span_seqlock;
... ... @@ -422,6 +422,17 @@
422 422 pfn -= pageblock_nr_pages) {
423 423 unsigned long isolated;
424 424  
  425 + /*
  426 + * Skip ahead if another thread is compacting in the area
  427 + * simultaneously. If we wrapped around, we can only skip
  428 + * ahead if zone->compact_cached_free_pfn also wrapped to
  429 + * above our starting point.
  430 + */
  431 + if (cc->order > 0 && (!cc->wrapped ||
  432 + zone->compact_cached_free_pfn >
  433 + cc->start_free_pfn))
  434 + pfn = min(pfn, zone->compact_cached_free_pfn);
  435 +
425 436 if (!pfn_valid(pfn))
426 437 continue;
427 438  
428 439  
... ... @@ -461,8 +472,11 @@
461 472 * looking for free pages, the search will restart here as
462 473 * page migration may have returned some pages to the allocator
463 474 */
464   - if (isolated)
  475 + if (isolated) {
465 476 high_pfn = max(high_pfn, pfn);
  477 + if (cc->order > 0)
  478 + zone->compact_cached_free_pfn = high_pfn;
  479 + }
466 480 }
467 481  
468 482 /* split_free_page does not map the pages */
... ... @@ -556,6 +570,20 @@
556 570 return ISOLATE_SUCCESS;
557 571 }
558 572  
  573 +/*
  574 + * Returns the start pfn of the last page block in a zone. This is the starting
  575 + * point for full compaction of a zone. Compaction searches for free pages from
  576 + * the end of each zone, while isolate_freepages_block scans forward inside each
  577 + * page block.
  578 + */
  579 +static unsigned long start_free_pfn(struct zone *zone)
  580 +{
  581 + unsigned long free_pfn;
  582 + free_pfn = zone->zone_start_pfn + zone->spanned_pages;
  583 + free_pfn &= ~(pageblock_nr_pages-1);
  584 + return free_pfn;
  585 +}
  586 +
559 587 static int compact_finished(struct zone *zone,
560 588 struct compact_control *cc)
561 589 {
562 590  
563 591  
... ... @@ -565,10 +593,28 @@
565 593 if (fatal_signal_pending(current))
566 594 return COMPACT_PARTIAL;
567 595  
568   - /* Compaction run completes if the migrate and free scanner meet */
569   - if (cc->free_pfn <= cc->migrate_pfn)
  596 + /*
  597 + * A full (order == -1) compaction run starts at the beginning and
  598 + * end of a zone; it completes when the migrate and free scanner meet.
  599 + * A partial (order > 0) compaction can start with the free scanner
  600 + * at a random point in the zone, and may have to restart.
  601 + */
  602 + if (cc->free_pfn <= cc->migrate_pfn) {
  603 + if (cc->order > 0 && !cc->wrapped) {
  604 + /* We started partway through; restart at the end. */
  605 + unsigned long free_pfn = start_free_pfn(zone);
  606 + zone->compact_cached_free_pfn = free_pfn;
  607 + cc->free_pfn = free_pfn;
  608 + cc->wrapped = 1;
  609 + return COMPACT_CONTINUE;
  610 + }
570 611 return COMPACT_COMPLETE;
  612 + }
571 613  
  614 + /* We wrapped around and ended up where we started. */
  615 + if (cc->wrapped && cc->free_pfn <= cc->start_free_pfn)
  616 + return COMPACT_COMPLETE;
  617 +
572 618 /*
573 619 * order == -1 is expected when compacting via
574 620 * /proc/sys/vm/compact_memory
... ... @@ -664,8 +710,15 @@
664 710  
665 711 /* Setup to move all movable pages to the end of the zone */
666 712 cc->migrate_pfn = zone->zone_start_pfn;
667   - cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
668   - cc->free_pfn &= ~(pageblock_nr_pages-1);
  713 +
  714 + if (cc->order > 0) {
  715 + /* Incremental compaction. Start where the last one stopped. */
  716 + cc->free_pfn = zone->compact_cached_free_pfn;
  717 + cc->start_free_pfn = cc->free_pfn;
  718 + } else {
  719 + /* Order == -1 starts at the end of the zone. */
  720 + cc->free_pfn = start_free_pfn(zone);
  721 + }
669 722  
670 723 migrate_prep_local();
671 724  
... ... @@ -118,8 +118,14 @@
118 118 unsigned long nr_freepages; /* Number of isolated free pages */
119 119 unsigned long nr_migratepages; /* Number of pages to migrate */
120 120 unsigned long free_pfn; /* isolate_freepages search base */
  121 + unsigned long start_free_pfn; /* where we started the search */
121 122 unsigned long migrate_pfn; /* isolate_migratepages search base */
122 123 bool sync; /* Synchronous migration */
  124 + bool wrapped; /* Order > 0 compactions are
  125 + incremental, once free_pfn
  126 + and migrate_pfn meet, we restart
  127 + from the top of the zone;
  128 + remember we wrapped around. */
123 129  
124 130 int order; /* order a direct compactor needs */
125 131 int migratetype; /* MOVABLE, RECLAIMABLE etc */
... ... @@ -4397,6 +4397,11 @@
4397 4397  
4398 4398 zone->spanned_pages = size;
4399 4399 zone->present_pages = realsize;
  4400 +#if defined CONFIG_COMPACTION || defined CONFIG_CMA
  4401 + zone->compact_cached_free_pfn = zone->zone_start_pfn +
  4402 + zone->spanned_pages;
  4403 + zone->compact_cached_free_pfn &= ~(pageblock_nr_pages-1);
  4404 +#endif
4400 4405 #ifdef CONFIG_NUMA
4401 4406 zone->node = nid;
4402 4407 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)