Commit 5a03b051ed87e72b959f32a86054e1142ac4cf55

Authored by Andrea Arcangeli
Committed by Linus Torvalds
1 parent 878aee7d6b

thp: use compaction in kswapd for GFP_ATOMIC order > 0

This takes advantage of memory compaction to properly generate pages of
order > 0 if regular page reclaim fails and priority level becomes more
severe and we don't reach the proper watermarks.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 54 additions and 18 deletions Side-by-side Diff

include/linux/compaction.h
... ... @@ -11,6 +11,9 @@
11 11 /* The full zone was compacted */
12 12 #define COMPACT_COMPLETE 3
13 13  
  14 +#define COMPACT_MODE_DIRECT_RECLAIM 0
  15 +#define COMPACT_MODE_KSWAPD 1
  16 +
14 17 #ifdef CONFIG_COMPACTION
15 18 extern int sysctl_compact_memory;
16 19 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
... ... @@ -25,7 +28,8 @@
25 28 bool sync);
26 29 extern unsigned long compaction_suitable(struct zone *zone, int order);
27 30 extern unsigned long compact_zone_order(struct zone *zone, int order,
28   - gfp_t gfp_mask, bool sync);
  31 + gfp_t gfp_mask, bool sync,
  32 + int compact_mode);
29 33  
30 34 /* Do not skip compaction more than 64 times */
31 35 #define COMPACT_MAX_DEFER_SHIFT 6
32 36  
... ... @@ -70,9 +74,10 @@
70 74 }
71 75  
72 76 static inline unsigned long compact_zone_order(struct zone *zone, int order,
73   - gfp_t gfp_mask, bool sync)
  77 + gfp_t gfp_mask, bool sync,
  78 + int compact_mode)
74 79 {
75   - return 0;
  80 + return COMPACT_CONTINUE;
76 81 }
77 82  
78 83 static inline void defer_compaction(struct zone *zone)
... ... @@ -42,6 +42,8 @@
42 42 unsigned int order; /* order a direct compactor needs */
43 43 int migratetype; /* MOVABLE, RECLAIMABLE etc */
44 44 struct zone *zone;
  45 +
  46 + int compact_mode;
45 47 };
46 48  
47 49 static unsigned long release_freepages(struct list_head *freelist)
48 50  
... ... @@ -382,10 +384,10 @@
382 384 }
383 385  
384 386 static int compact_finished(struct zone *zone,
385   - struct compact_control *cc)
  387 + struct compact_control *cc)
386 388 {
387 389 unsigned int order;
388   - unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
  390 + unsigned long watermark;
389 391  
390 392 if (fatal_signal_pending(current))
391 393 return COMPACT_PARTIAL;
392 394  
... ... @@ -395,12 +397,27 @@
395 397 return COMPACT_COMPLETE;
396 398  
397 399 /* Compaction run is not finished if the watermark is not met */
  400 + if (cc->compact_mode != COMPACT_MODE_KSWAPD)
  401 + watermark = low_wmark_pages(zone);
  402 + else
  403 + watermark = high_wmark_pages(zone);
  404 + watermark += (1 << cc->order);
  405 +
398 406 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
399 407 return COMPACT_CONTINUE;
400 408  
401 409 if (cc->order == -1)
402 410 return COMPACT_CONTINUE;
403 411  
  412 + /*
  413 + * Generating only one page of the right order is not enough
  414 + * for kswapd, we must continue until we're above the high
  415 + * watermark as a pool for high order GFP_ATOMIC allocations
  416 + * too.
  417 + */
  418 + if (cc->compact_mode == COMPACT_MODE_KSWAPD)
  419 + return COMPACT_CONTINUE;
  420 +
404 421 /* Direct compactor: Is a suitable page free? */
405 422 for (order = cc->order; order < MAX_ORDER; order++) {
406 423 /* Job done if page is free of the right migratetype */
... ... @@ -514,8 +531,9 @@
514 531 }
515 532  
516 533 unsigned long compact_zone_order(struct zone *zone,
517   - int order, gfp_t gfp_mask,
518   - bool sync)
  534 + int order, gfp_t gfp_mask,
  535 + bool sync,
  536 + int compact_mode)
519 537 {
520 538 struct compact_control cc = {
521 539 .nr_freepages = 0,
... ... @@ -524,6 +542,7 @@
524 542 .migratetype = allocflags_to_migratetype(gfp_mask),
525 543 .zone = zone,
526 544 .sync = sync,
  545 + .compact_mode = compact_mode,
527 546 };
528 547 INIT_LIST_HEAD(&cc.freepages);
529 548 INIT_LIST_HEAD(&cc.migratepages);
... ... @@ -569,7 +588,8 @@
569 588 nodemask) {
570 589 int status;
571 590  
572   - status = compact_zone_order(zone, order, gfp_mask, sync);
  591 + status = compact_zone_order(zone, order, gfp_mask, sync,
  592 + COMPACT_MODE_DIRECT_RECLAIM);
573 593 rc = max(status, rc);
574 594  
575 595 /* If a normal allocation would succeed, stop compacting */
... ... @@ -600,6 +620,7 @@
600 620 .nr_freepages = 0,
601 621 .nr_migratepages = 0,
602 622 .order = -1,
  623 + .compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
603 624 };
604 625  
605 626 zone = &pgdat->node_zones[zoneid];
... ... @@ -41,6 +41,7 @@
41 41 #include <linux/memcontrol.h>
42 42 #include <linux/delayacct.h>
43 43 #include <linux/sysctl.h>
  44 +#include <linux/compaction.h>
44 45  
45 46 #include <asm/tlbflush.h>
46 47 #include <asm/div64.h>
... ... @@ -2382,6 +2383,7 @@
2382 2383 * cause too much scanning of the lower zones.
2383 2384 */
2384 2385 for (i = 0; i <= end_zone; i++) {
  2386 + int compaction;
2385 2387 struct zone *zone = pgdat->node_zones + i;
2386 2388 int nr_slab;
2387 2389  
2388 2390  
... ... @@ -2411,9 +2413,26 @@
2411 2413 lru_pages);
2412 2414 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2413 2415 total_scanned += sc.nr_scanned;
  2416 +
  2417 + compaction = 0;
  2418 + if (order &&
  2419 + zone_watermark_ok(zone, 0,
  2420 + high_wmark_pages(zone),
  2421 + end_zone, 0) &&
  2422 + !zone_watermark_ok(zone, order,
  2423 + high_wmark_pages(zone),
  2424 + end_zone, 0)) {
  2425 + compact_zone_order(zone,
  2426 + order,
  2427 + sc.gfp_mask, false,
  2428 + COMPACT_MODE_KSWAPD);
  2429 + compaction = 1;
  2430 + }
  2431 +
2414 2432 if (zone->all_unreclaimable)
2415 2433 continue;
2416   - if (nr_slab == 0 && !zone_reclaimable(zone))
  2434 + if (!compaction && nr_slab == 0 &&
  2435 + !zone_reclaimable(zone))
2417 2436 zone->all_unreclaimable = 1;
2418 2437 /*
2419 2438 * If we've done a decent amount of scanning and
... ... @@ -2423,15 +2442,6 @@
2423 2442 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2424 2443 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2425 2444 sc.may_writepage = 1;
2426   -
2427   - /*
2428   - * Compact the zone for higher orders to reduce
2429   - * latencies for higher-order allocations that
2430   - * would ordinarily call try_to_compact_pages()
2431   - */
2432   - if (sc.order > PAGE_ALLOC_COSTLY_ORDER)
2433   - compact_zone_order(zone, sc.order, sc.gfp_mask,
2434   - false);
2435 2445  
2436 2446 if (!zone_watermark_ok_safe(zone, order,
2437 2447 high_wmark_pages(zone), end_zone, 0)) {