thp: use compaction in kswapd for GFP_ATOMIC order > 0

This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

thp: use compaction in kswapd for GFP_ATOMIC order > 0
This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Andrea Arcangeli · Linus Torvalds
1 parent 878aee7d6b
Showing 3 changed files with 54 additions and 18 deletions Side-by-side Diff
include/linux/compaction.h
mm/compaction.c
mm/vmscan.c
@@ -11,6 +11,9 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
  
+#define COMPACT_MODE_DIRECT_RECLAIM	0
+#define COMPACT_MODE_KSWAPD		1
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -25,7 +28,8 @@
 			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-						gfp_t gfp_mask, bool sync);
+					gfp_t gfp_mask, bool sync,
+					int compact_mode);
  
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
  
@@ -70,9 +74,10 @@
 }
  
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-						gfp_t gfp_mask, bool sync)
+					       gfp_t gfp_mask, bool sync,
+					       int compact_mode)
 {
-	return 0;
+	return COMPACT_CONTINUE;
 }
  
 static inline void defer_compaction(struct zone *zone)
@@ -42,6 +42,8 @@
 	unsigned int order;		/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
+
+	int compact_mode;
 };
  
 static unsigned long release_freepages(struct list_head *freelist)
  
@@ -382,10 +384,10 @@
 }
  
 static int compact_finished(struct zone *zone,
-						struct compact_control *cc)
+			    struct compact_control *cc)
 {
 	unsigned int order;
-	unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
+	unsigned long watermark;
  
 	if (fatal_signal_pending(current))
 		return COMPACT_PARTIAL;
  
@@ -395,12 +397,27 @@
 		return COMPACT_COMPLETE;
  
 	/* Compaction run is not finished if the watermark is not met */
+	if (cc->compact_mode != COMPACT_MODE_KSWAPD)
+		watermark = low_wmark_pages(zone);
+	else
+		watermark = high_wmark_pages(zone);
+	watermark += (1 << cc->order);
+
 	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
 		return COMPACT_CONTINUE;
  
 	if (cc->order == -1)
 		return COMPACT_CONTINUE;
  
+	/*
+	 * Generating only one page of the right order is not enough
+	 * for kswapd, we must continue until we're above the high
+	 * watermark as a pool for high order GFP_ATOMIC allocations
+	 * too.
+	 */
+	if (cc->compact_mode == COMPACT_MODE_KSWAPD)
+		return COMPACT_CONTINUE;
+
 	/* Direct compactor: Is a suitable page free? */
 	for (order = cc->order; order < MAX_ORDER; order++) {
 		/* Job done if page is free of the right migratetype */
@@ -514,8 +531,9 @@
 }
  
 unsigned long compact_zone_order(struct zone *zone,
-						int order, gfp_t gfp_mask,
-						bool sync)
+				 int order, gfp_t gfp_mask,
+				 bool sync,
+				 int compact_mode)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -524,6 +542,7 @@
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
+		.compact_mode = compact_mode,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -569,7 +588,8 @@
 								nodemask) {
 		int status;
  
-		status = compact_zone_order(zone, order, gfp_mask, sync);
+		status = compact_zone_order(zone, order, gfp_mask, sync,
+					    COMPACT_MODE_DIRECT_RECLAIM);
 		rc = max(status, rc);
  
 		/* If a normal allocation would succeed, stop compacting */
@@ -600,6 +620,7 @@
 			.nr_freepages = 0,
 			.nr_migratepages = 0,
 			.order = -1,
+			.compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
 		};
  
 		zone = &pgdat->node_zones[zoneid];
@@ -41,6 +41,7 @@
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
+#include <linux/compaction.h>
  
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -2382,6 +2383,7 @@
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
+			int compaction;
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
  
  
@@ -2411,9 +2413,26 @@
 						lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
+
+			compaction = 0;
+			if (order &&
+			    zone_watermark_ok(zone, 0,
+					       high_wmark_pages(zone),
+					      end_zone, 0) &&
+			    !zone_watermark_ok(zone, order,
+					       high_wmark_pages(zone),
+					       end_zone, 0)) {
+				compact_zone_order(zone,
+						   order,
+						   sc.gfp_mask, false,
+						   COMPACT_MODE_KSWAPD);
+				compaction = 1;
+			}
+
 			if (zone->all_unreclaimable)
 				continue;
-			if (nr_slab == 0 && !zone_reclaimable(zone))
+			if (!compaction && nr_slab == 0 &&
+			    !zone_reclaimable(zone))
 				zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and
@@ -2423,15 +2442,6 @@
 			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
 			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
 				sc.may_writepage = 1;
-
-			/*
-			 * Compact the zone for higher orders to reduce
-			 * latencies for higher-order allocations that
-			 * would ordinarily call try_to_compact_pages()
-			 */
-			if (sc.order > PAGE_ALLOC_COSTLY_ORDER)
-				compact_zone_order(zone, sc.order, sc.gfp_mask,
-							false);
  
 			if (!zone_watermark_ok_safe(zone, order,
 					high_wmark_pages(zone), end_zone, 0)) {
...	...	@@ -11,6 +11,9 @@
11	11	/* The full zone was compacted */
12	12	#define COMPACT_COMPLETE 3
13	13
	14	+#define COMPACT_MODE_DIRECT_RECLAIM 0
	15	+#define COMPACT_MODE_KSWAPD 1
	16	+
14	17	#ifdef CONFIG_COMPACTION
15	18	extern int sysctl_compact_memory;
16	19	extern int sysctl_compaction_handler(struct ctl_table *table, int write,
...	...	@@ -25,7 +28,8 @@
25	28	bool sync);
26	29	extern unsigned long compaction_suitable(struct zone *zone, int order);
27	30	extern unsigned long compact_zone_order(struct zone *zone, int order,
28		- gfp_t gfp_mask, bool sync);
	31	+ gfp_t gfp_mask, bool sync,
	32	+ int compact_mode);
29	33
30	34	/* Do not skip compaction more than 64 times */
31	35	#define COMPACT_MAX_DEFER_SHIFT 6
32	36
...	...	@@ -70,9 +74,10 @@
70	74	}
71	75
72	76	static inline unsigned long compact_zone_order(struct zone *zone, int order,
73		- gfp_t gfp_mask, bool sync)
	77	+ gfp_t gfp_mask, bool sync,
	78	+ int compact_mode)
74	79	{
75		- return 0;
	80	+ return COMPACT_CONTINUE;
76	81	}
77	82
78	83	static inline void defer_compaction(struct zone *zone)
...	...	@@ -42,6 +42,8 @@
42	42	unsigned int order; /* order a direct compactor needs */
43	43	int migratetype; /* MOVABLE, RECLAIMABLE etc */
44	44	struct zone *zone;
	45	+
	46	+ int compact_mode;
45	47	};
46	48
47	49	static unsigned long release_freepages(struct list_head *freelist)
48	50
...	...	@@ -382,10 +384,10 @@
382	384	}
383	385
384	386	static int compact_finished(struct zone *zone,
385		- struct compact_control *cc)
	387	+ struct compact_control *cc)
386	388	{
387	389	unsigned int order;
388		- unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
	390	+ unsigned long watermark;
389	391
390	392	if (fatal_signal_pending(current))
391	393	return COMPACT_PARTIAL;
392	394
...	...	@@ -395,12 +397,27 @@
395	397	return COMPACT_COMPLETE;
396	398
397	399	/* Compaction run is not finished if the watermark is not met */
	400	+ if (cc->compact_mode != COMPACT_MODE_KSWAPD)
	401	+ watermark = low_wmark_pages(zone);
	402	+ else
	403	+ watermark = high_wmark_pages(zone);
	404	+ watermark += (1 << cc->order);
	405	+
398	406	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
399	407	return COMPACT_CONTINUE;
400	408
401	409	if (cc->order == -1)
402	410	return COMPACT_CONTINUE;
403	411
	412	+ /*
	413	+ * Generating only one page of the right order is not enough
	414	+ * for kswapd, we must continue until we're above the high
	415	+ * watermark as a pool for high order GFP_ATOMIC allocations
	416	+ * too.
	417	+ */
	418	+ if (cc->compact_mode == COMPACT_MODE_KSWAPD)
	419	+ return COMPACT_CONTINUE;
	420	+
404	421	/* Direct compactor: Is a suitable page free? */
405	422	for (order = cc->order; order < MAX_ORDER; order++) {
406	423	/* Job done if page is free of the right migratetype */
...	...	@@ -514,8 +531,9 @@
514	531	}
515	532
516	533	unsigned long compact_zone_order(struct zone *zone,
517		- int order, gfp_t gfp_mask,
518		- bool sync)
	534	+ int order, gfp_t gfp_mask,
	535	+ bool sync,
	536	+ int compact_mode)
519	537	{
520	538	struct compact_control cc = {
521	539	.nr_freepages = 0,
...	...	@@ -524,6 +542,7 @@
524	542	.migratetype = allocflags_to_migratetype(gfp_mask),
525	543	.zone = zone,
526	544	.sync = sync,
	545	+ .compact_mode = compact_mode,
527	546	};
528	547	INIT_LIST_HEAD(&cc.freepages);
529	548	INIT_LIST_HEAD(&cc.migratepages);
...	...	@@ -569,7 +588,8 @@
569	588	nodemask) {
570	589	int status;
571	590
572		- status = compact_zone_order(zone, order, gfp_mask, sync);
	591	+ status = compact_zone_order(zone, order, gfp_mask, sync,
	592	+ COMPACT_MODE_DIRECT_RECLAIM);
573	593	rc = max(status, rc);
574	594
575	595	/* If a normal allocation would succeed, stop compacting */
...	...	@@ -600,6 +620,7 @@
600	620	.nr_freepages = 0,
601	621	.nr_migratepages = 0,
602	622	.order = -1,
	623	+ .compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
603	624	};
604	625
605	626	zone = &pgdat->node_zones[zoneid];
...	...	@@ -41,6 +41,7 @@
41	41	#include <linux/memcontrol.h>
42	42	#include <linux/delayacct.h>
43	43	#include <linux/sysctl.h>
	44	+#include <linux/compaction.h>
44	45
45	46	#include <asm/tlbflush.h>
46	47	#include <asm/div64.h>
...	...	@@ -2382,6 +2383,7 @@
2382	2383	* cause too much scanning of the lower zones.
2383	2384	*/
2384	2385	for (i = 0; i <= end_zone; i++) {
	2386	+ int compaction;
2385	2387	struct zone *zone = pgdat->node_zones + i;
2386	2388	int nr_slab;
2387	2389
2388	2390
...	...	@@ -2411,9 +2413,26 @@
2411	2413	lru_pages);
2412	2414	sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2413	2415	total_scanned += sc.nr_scanned;
	2416	+
	2417	+ compaction = 0;
	2418	+ if (order &&
	2419	+ zone_watermark_ok(zone, 0,
	2420	+ high_wmark_pages(zone),
	2421	+ end_zone, 0) &&
	2422	+ !zone_watermark_ok(zone, order,
	2423	+ high_wmark_pages(zone),
	2424	+ end_zone, 0)) {
	2425	+ compact_zone_order(zone,
	2426	+ order,
	2427	+ sc.gfp_mask, false,
	2428	+ COMPACT_MODE_KSWAPD);
	2429	+ compaction = 1;
	2430	+ }
	2431	+
2414	2432	if (zone->all_unreclaimable)
2415	2433	continue;
2416		- if (nr_slab == 0 && !zone_reclaimable(zone))
	2434	+ if (!compaction && nr_slab == 0 &&
	2435	+ !zone_reclaimable(zone))
2417	2436	zone->all_unreclaimable = 1;
2418	2437	/*
2419	2438	* If we've done a decent amount of scanning and
...	...	@@ -2423,15 +2442,6 @@
2423	2442	if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2424	2443	total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2425	2444	sc.may_writepage = 1;
2426		-
2427		- /*
2428		- * Compact the zone for higher orders to reduce
2429		- * latencies for higher-order allocations that
2430		- * would ordinarily call try_to_compact_pages()
2431		- */
2432		- if (sc.order > PAGE_ALLOC_COSTLY_ORDER)
2433		- compact_zone_order(zone, sc.order, sc.gfp_mask,
2434		- false);
2435	2445
2436	2446	if (!zone_watermark_ok_safe(zone, order,
2437	2447	high_wmark_pages(zone), end_zone, 0)) {