Commit 56de7263fcf3eb10c8dcdf8d59a9cec831795f3f

Authored by Mel Gorman
Committed by Linus Torvalds
1 parent ed4a6d7f06

mm: compaction: direct compact when a high-order allocation fails

Ordinarily when a high-order allocation fails, direct reclaim is entered
to free pages to satisfy the allocation.  With this patch, it is
determined if an allocation failed due to external fragmentation instead
of low memory and if so, the calling process will compact until a suitable
page is freed.  Compaction by moving pages in memory is considerably
cheaper than paging out to disk and works where there are locked pages or
no swap.  If compaction fails to free a page of a suitable size, then
reclaim will still occur.

Direct compaction returns as soon as possible.  As each block is
compacted, it is checked if a suitable page has been freed and if so, it
returns.

[akpm@linux-foundation.org: Fix build errors]
[aarcange@redhat.com: fix count_vm_event preempt in memory compaction direct reclaim]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 215 additions and 6 deletions Side-by-side Diff

include/linux/compaction.h
1 1 #ifndef _LINUX_COMPACTION_H
2 2 #define _LINUX_COMPACTION_H
3 3  
4   -/* Return values for compact_zone() */
5   -#define COMPACT_CONTINUE 0
6   -#define COMPACT_PARTIAL 1
7   -#define COMPACT_COMPLETE 2
  4 +/* Return values for compact_zone() and try_to_compact_pages() */
  5 +/* compaction didn't start as it was not possible or direct reclaim was more suitable */
  6 +#define COMPACT_SKIPPED 0
  7 +/* compaction should continue to another pageblock */
  8 +#define COMPACT_CONTINUE 1
  9 +/* direct compaction partially compacted a zone and there are suitable pages */
  10 +#define COMPACT_PARTIAL 2
  11 +/* The full zone was compacted */
  12 +#define COMPACT_COMPLETE 3
8 13  
9 14 #ifdef CONFIG_COMPACTION
10 15 extern int sysctl_compact_memory;
11 16 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
12 17 void __user *buffer, size_t *length, loff_t *ppos);
  18 +
  19 +extern int fragmentation_index(struct zone *zone, unsigned int order);
  20 +extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
  21 + int order, gfp_t gfp_mask, nodemask_t *mask);
  22 +#else
  23 +static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
  24 + int order, gfp_t gfp_mask, nodemask_t *nodemask)
  25 +{
  26 + return COMPACT_CONTINUE;
  27 +}
  28 +
13 29 #endif /* CONFIG_COMPACTION */
14 30  
15 31 #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
include/linux/vmstat.h
... ... @@ -45,6 +45,7 @@
45 45 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
46 46 #ifdef CONFIG_COMPACTION
47 47 COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
  48 + COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
48 49 #endif
49 50 #ifdef CONFIG_HUGETLB_PAGE
50 51 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
... ... @@ -35,6 +35,8 @@
35 35 unsigned long nr_anon;
36 36 unsigned long nr_file;
37 37  
  38 + unsigned int order; /* order a direct compactor needs */
  39 + int migratetype; /* MOVABLE, RECLAIMABLE etc */
38 40 struct zone *zone;
39 41 };
40 42  
... ... @@ -341,6 +343,9 @@
341 343 static int compact_finished(struct zone *zone,
342 344 struct compact_control *cc)
343 345 {
  346 + unsigned int order;
  347 + unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
  348 +
344 349 if (fatal_signal_pending(current))
345 350 return COMPACT_PARTIAL;
346 351  
... ... @@ -348,6 +353,24 @@
348 353 if (cc->free_pfn <= cc->migrate_pfn)
349 354 return COMPACT_COMPLETE;
350 355  
  356 + /* Compaction run is not finished if the watermark is not met */
  357 + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
  358 + return COMPACT_CONTINUE;
  359 +
  360 + if (cc->order == -1)
  361 + return COMPACT_CONTINUE;
  362 +
  363 + /* Direct compactor: Is a suitable page free? */
  364 + for (order = cc->order; order < MAX_ORDER; order++) {
  365 + /* Job done if page is free of the right migratetype */
  366 + if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
  367 + return COMPACT_PARTIAL;
  368 +
  369 + /* Job done if allocation would set block type */
  370 + if (order >= pageblock_order && zone->free_area[order].nr_free)
  371 + return COMPACT_PARTIAL;
  372 + }
  373 +
351 374 return COMPACT_CONTINUE;
352 375 }
353 376  
... ... @@ -394,6 +417,99 @@
394 417 return ret;
395 418 }
396 419  
  420 +static unsigned long compact_zone_order(struct zone *zone,
  421 + int order, gfp_t gfp_mask)
  422 +{
  423 + struct compact_control cc = {
  424 + .nr_freepages = 0,
  425 + .nr_migratepages = 0,
  426 + .order = order,
  427 + .migratetype = allocflags_to_migratetype(gfp_mask),
  428 + .zone = zone,
  429 + };
  430 + INIT_LIST_HEAD(&cc.freepages);
  431 + INIT_LIST_HEAD(&cc.migratepages);
  432 +
  433 + return compact_zone(zone, &cc);
  434 +}
  435 +
  436 +/**
  437 + * try_to_compact_pages - Direct compact to satisfy a high-order allocation
  438 + * @zonelist: The zonelist used for the current allocation
  439 + * @order: The order of the current allocation
  440 + * @gfp_mask: The GFP mask of the current allocation
  441 + * @nodemask: The allowed nodes to allocate from
  442 + *
  443 + * This is the main entry point for direct page compaction.
  444 + */
  445 +unsigned long try_to_compact_pages(struct zonelist *zonelist,
  446 + int order, gfp_t gfp_mask, nodemask_t *nodemask)
  447 +{
  448 + enum zone_type high_zoneidx = gfp_zone(gfp_mask);
  449 + int may_enter_fs = gfp_mask & __GFP_FS;
  450 + int may_perform_io = gfp_mask & __GFP_IO;
  451 + unsigned long watermark;
  452 + struct zoneref *z;
  453 + struct zone *zone;
  454 + int rc = COMPACT_SKIPPED;
  455 +
  456 + /*
  457 + * Check whether it is worth even starting compaction. The order check is
  458 + * made because an assumption is made that the page allocator can satisfy
  459 + * the "cheaper" orders without taking special steps
  460 + */
  461 + if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
  462 + return rc;
  463 +
  464 + count_vm_event(COMPACTSTALL);
  465 +
  466 + /* Compact each zone in the list */
  467 + for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
  468 + nodemask) {
  469 + int fragindex;
  470 + int status;
  471 +
  472 + /*
  473 + * Watermarks for order-0 must be met for compaction. Note
  474 + * the 2UL. This is because during migration, copies of
  475 + * pages need to be allocated and for a short time, the
  476 + * footprint is higher
  477 + */
  478 + watermark = low_wmark_pages(zone) + (2UL << order);
  479 + if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
  480 + continue;
  481 +
  482 + /*
  483 + * fragmentation index determines if allocation failures are
  484 + * due to low memory or external fragmentation
  485 + *
  486 + * index of -1 implies allocations might succeed depending
  487 + * on watermarks
  488 + * index towards 0 implies failure is due to lack of memory
  489 + * index towards 1000 implies failure is due to fragmentation
  490 + *
  491 + * Only compact if a failure would be due to fragmentation.
  492 + */
  493 + fragindex = fragmentation_index(zone, order);
  494 + if (fragindex >= 0 && fragindex <= 500)
  495 + continue;
  496 +
  497 + if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
  498 + rc = COMPACT_PARTIAL;
  499 + break;
  500 + }
  501 +
  502 + status = compact_zone_order(zone, order, gfp_mask);
  503 + rc = max(status, rc);
  504 +
  505 + if (zone_watermark_ok(zone, order, watermark, 0, 0))
  506 + break;
  507 + }
  508 +
  509 + return rc;
  510 +}
  511 +
  512 +
397 513 /* Compact all zones within a node */
398 514 static int compact_node(int nid)
399 515 {
... ... @@ -412,6 +528,7 @@
412 528 struct compact_control cc = {
413 529 .nr_freepages = 0,
414 530 .nr_migratepages = 0,
  531 + .order = -1,
415 532 };
416 533  
417 534 zone = &pgdat->node_zones[zoneid];
... ... @@ -49,6 +49,7 @@
49 49 #include <linux/debugobjects.h>
50 50 #include <linux/kmemleak.h>
51 51 #include <linux/memory.h>
  52 +#include <linux/compaction.h>
52 53 #include <trace/events/kmem.h>
53 54 #include <linux/ftrace_event.h>
54 55  
... ... @@ -1758,6 +1759,59 @@
1758 1759 return page;
1759 1760 }
1760 1761  
  1762 +#ifdef CONFIG_COMPACTION
  1763 +/* Try memory compaction for high-order allocations before reclaim */
  1764 +static struct page *
  1765 +__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
  1766 + struct zonelist *zonelist, enum zone_type high_zoneidx,
  1767 + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
  1768 + int migratetype, unsigned long *did_some_progress)
  1769 +{
  1770 + struct page *page;
  1771 +
  1772 + if (!order)
  1773 + return NULL;
  1774 +
  1775 + *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
  1776 + nodemask);
  1777 + if (*did_some_progress != COMPACT_SKIPPED) {
  1778 +
  1779 + /* Page migration frees to the PCP lists but we want merging */
  1780 + drain_pages(get_cpu());
  1781 + put_cpu();
  1782 +
  1783 + page = get_page_from_freelist(gfp_mask, nodemask,
  1784 + order, zonelist, high_zoneidx,
  1785 + alloc_flags, preferred_zone,
  1786 + migratetype);
  1787 + if (page) {
  1788 + count_vm_event(COMPACTSUCCESS);
  1789 + return page;
  1790 + }
  1791 +
  1792 + /*
  1793 + * It's bad if compaction run occurs and fails.
  1794 + * The most likely reason is that pages exist,
  1795 + * but not enough to satisfy watermarks.
  1796 + */
  1797 + count_vm_event(COMPACTFAIL);
  1798 +
  1799 + cond_resched();
  1800 + }
  1801 +
  1802 + return NULL;
  1803 +}
  1804 +#else
  1805 +static inline struct page *
  1806 +__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
  1807 + struct zonelist *zonelist, enum zone_type high_zoneidx,
  1808 + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
  1809 + int migratetype, unsigned long *did_some_progress)
  1810 +{
  1811 + return NULL;
  1812 +}
  1813 +#endif /* CONFIG_COMPACTION */
  1814 +
1761 1815 /* The really slow allocator path where we enter direct reclaim */
1762 1816 static inline struct page *
1763 1817 __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
... ... @@ -1943,6 +1997,15 @@
1943 1997 /* Avoid allocations with no watermarks from looping endlessly */
1944 1998 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
1945 1999 goto nopage;
  2000 +
  2001 + /* Try direct compaction */
  2002 + page = __alloc_pages_direct_compact(gfp_mask, order,
  2003 + zonelist, high_zoneidx,
  2004 + nodemask,
  2005 + alloc_flags, preferred_zone,
  2006 + migratetype, &did_some_progress);
  2007 + if (page)
  2008 + goto got_pg;
1946 2009  
1947 2010 /* Try direct reclaim and then allocating */
1948 2011 page = __alloc_pages_direct_reclaim(gfp_mask, order,
... ... @@ -429,7 +429,7 @@
429 429 * The value can be used to determine if page reclaim or compaction
430 430 * should be used
431 431 */
432   -int fragmentation_index(unsigned int order, struct contig_page_info *info)
  432 +static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
433 433 {
434 434 unsigned long requested = 1UL << order;
435 435  
... ... @@ -448,6 +448,15 @@
448 448 */
449 449 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
450 450 }
  451 +
  452 +/* Same as __fragmentation index but allocs contig_page_info on stack */
  453 +int fragmentation_index(struct zone *zone, unsigned int order)
  454 +{
  455 + struct contig_page_info info;
  456 +
  457 + fill_contig_page_info(zone, order, &info);
  458 + return __fragmentation_index(order, &info);
  459 +}
451 460 #endif
452 461  
453 462 #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
... ... @@ -771,6 +780,9 @@
771 780 "compact_blocks_moved",
772 781 "compact_pages_moved",
773 782 "compact_pagemigrate_failed",
  783 + "compact_stall",
  784 + "compact_fail",
  785 + "compact_success",
774 786 #endif
775 787  
776 788 #ifdef CONFIG_HUGETLB_PAGE
... ... @@ -1136,7 +1148,7 @@
1136 1148 zone->name);
1137 1149 for (order = 0; order < MAX_ORDER; ++order) {
1138 1150 fill_contig_page_info(zone, order, &info);
1139   - index = fragmentation_index(order, &info);
  1151 + index = __fragmentation_index(order, &info);
1140 1152 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1141 1153 }
1142 1154