Commit 5a03b051ed87e72b959f32a86054e1142ac4cf55
Committed by
Linus Torvalds
1 parent
878aee7d6b
Exists in
master
and in
20 other branches
thp: use compaction in kswapd for GFP_ATOMIC order > 0
This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 54 additions and 18 deletions Side-by-side Diff
include/linux/compaction.h
... | ... | @@ -11,6 +11,9 @@ |
11 | 11 | /* The full zone was compacted */ |
12 | 12 | #define COMPACT_COMPLETE 3 |
13 | 13 | |
14 | +#define COMPACT_MODE_DIRECT_RECLAIM 0 | |
15 | +#define COMPACT_MODE_KSWAPD 1 | |
16 | + | |
14 | 17 | #ifdef CONFIG_COMPACTION |
15 | 18 | extern int sysctl_compact_memory; |
16 | 19 | extern int sysctl_compaction_handler(struct ctl_table *table, int write, |
... | ... | @@ -25,7 +28,8 @@ |
25 | 28 | bool sync); |
26 | 29 | extern unsigned long compaction_suitable(struct zone *zone, int order); |
27 | 30 | extern unsigned long compact_zone_order(struct zone *zone, int order, |
28 | - gfp_t gfp_mask, bool sync); | |
31 | + gfp_t gfp_mask, bool sync, | |
32 | + int compact_mode); | |
29 | 33 | |
30 | 34 | /* Do not skip compaction more than 64 times */ |
31 | 35 | #define COMPACT_MAX_DEFER_SHIFT 6 |
32 | 36 | |
... | ... | @@ -70,9 +74,10 @@ |
70 | 74 | } |
71 | 75 | |
72 | 76 | static inline unsigned long compact_zone_order(struct zone *zone, int order, |
73 | - gfp_t gfp_mask, bool sync) | |
77 | + gfp_t gfp_mask, bool sync, | |
78 | + int compact_mode) | |
74 | 79 | { |
75 | - return 0; | |
80 | + return COMPACT_CONTINUE; | |
76 | 81 | } |
77 | 82 | |
78 | 83 | static inline void defer_compaction(struct zone *zone) |
mm/compaction.c
... | ... | @@ -42,6 +42,8 @@ |
42 | 42 | unsigned int order; /* order a direct compactor needs */ |
43 | 43 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ |
44 | 44 | struct zone *zone; |
45 | + | |
46 | + int compact_mode; | |
45 | 47 | }; |
46 | 48 | |
47 | 49 | static unsigned long release_freepages(struct list_head *freelist) |
48 | 50 | |
... | ... | @@ -382,10 +384,10 @@ |
382 | 384 | } |
383 | 385 | |
384 | 386 | static int compact_finished(struct zone *zone, |
385 | - struct compact_control *cc) | |
387 | + struct compact_control *cc) | |
386 | 388 | { |
387 | 389 | unsigned int order; |
388 | - unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); | |
390 | + unsigned long watermark; | |
389 | 391 | |
390 | 392 | if (fatal_signal_pending(current)) |
391 | 393 | return COMPACT_PARTIAL; |
392 | 394 | |
... | ... | @@ -395,12 +397,27 @@ |
395 | 397 | return COMPACT_COMPLETE; |
396 | 398 | |
397 | 399 | /* Compaction run is not finished if the watermark is not met */ |
400 | + if (cc->compact_mode != COMPACT_MODE_KSWAPD) | |
401 | + watermark = low_wmark_pages(zone); | |
402 | + else | |
403 | + watermark = high_wmark_pages(zone); | |
404 | + watermark += (1 << cc->order); | |
405 | + | |
398 | 406 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) |
399 | 407 | return COMPACT_CONTINUE; |
400 | 408 | |
401 | 409 | if (cc->order == -1) |
402 | 410 | return COMPACT_CONTINUE; |
403 | 411 | |
412 | + /* | |
413 | + * Generating only one page of the right order is not enough | |
414 | + * for kswapd, we must continue until we're above the high | |
415 | + * watermark as a pool for high order GFP_ATOMIC allocations | |
416 | + * too. | |
417 | + */ | |
418 | + if (cc->compact_mode == COMPACT_MODE_KSWAPD) | |
419 | + return COMPACT_CONTINUE; | |
420 | + | |
404 | 421 | /* Direct compactor: Is a suitable page free? */ |
405 | 422 | for (order = cc->order; order < MAX_ORDER; order++) { |
406 | 423 | /* Job done if page is free of the right migratetype */ |
... | ... | @@ -514,8 +531,9 @@ |
514 | 531 | } |
515 | 532 | |
516 | 533 | unsigned long compact_zone_order(struct zone *zone, |
517 | - int order, gfp_t gfp_mask, | |
518 | - bool sync) | |
534 | + int order, gfp_t gfp_mask, | |
535 | + bool sync, | |
536 | + int compact_mode) | |
519 | 537 | { |
520 | 538 | struct compact_control cc = { |
521 | 539 | .nr_freepages = 0, |
... | ... | @@ -524,6 +542,7 @@ |
524 | 542 | .migratetype = allocflags_to_migratetype(gfp_mask), |
525 | 543 | .zone = zone, |
526 | 544 | .sync = sync, |
545 | + .compact_mode = compact_mode, | |
527 | 546 | }; |
528 | 547 | INIT_LIST_HEAD(&cc.freepages); |
529 | 548 | INIT_LIST_HEAD(&cc.migratepages); |
... | ... | @@ -569,7 +588,8 @@ |
569 | 588 | nodemask) { |
570 | 589 | int status; |
571 | 590 | |
572 | - status = compact_zone_order(zone, order, gfp_mask, sync); | |
591 | + status = compact_zone_order(zone, order, gfp_mask, sync, | |
592 | + COMPACT_MODE_DIRECT_RECLAIM); | |
573 | 593 | rc = max(status, rc); |
574 | 594 | |
575 | 595 | /* If a normal allocation would succeed, stop compacting */ |
... | ... | @@ -600,6 +620,7 @@ |
600 | 620 | .nr_freepages = 0, |
601 | 621 | .nr_migratepages = 0, |
602 | 622 | .order = -1, |
623 | + .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, | |
603 | 624 | }; |
604 | 625 | |
605 | 626 | zone = &pgdat->node_zones[zoneid]; |
mm/vmscan.c
... | ... | @@ -41,6 +41,7 @@ |
41 | 41 | #include <linux/memcontrol.h> |
42 | 42 | #include <linux/delayacct.h> |
43 | 43 | #include <linux/sysctl.h> |
44 | +#include <linux/compaction.h> | |
44 | 45 | |
45 | 46 | #include <asm/tlbflush.h> |
46 | 47 | #include <asm/div64.h> |
... | ... | @@ -2382,6 +2383,7 @@ |
2382 | 2383 | * cause too much scanning of the lower zones. |
2383 | 2384 | */ |
2384 | 2385 | for (i = 0; i <= end_zone; i++) { |
2386 | + int compaction; | |
2385 | 2387 | struct zone *zone = pgdat->node_zones + i; |
2386 | 2388 | int nr_slab; |
2387 | 2389 | |
2388 | 2390 | |
... | ... | @@ -2411,9 +2413,26 @@ |
2411 | 2413 | lru_pages); |
2412 | 2414 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; |
2413 | 2415 | total_scanned += sc.nr_scanned; |
2416 | + | |
2417 | + compaction = 0; | |
2418 | + if (order && | |
2419 | + zone_watermark_ok(zone, 0, | |
2420 | + high_wmark_pages(zone), | |
2421 | + end_zone, 0) && | |
2422 | + !zone_watermark_ok(zone, order, | |
2423 | + high_wmark_pages(zone), | |
2424 | + end_zone, 0)) { | |
2425 | + compact_zone_order(zone, | |
2426 | + order, | |
2427 | + sc.gfp_mask, false, | |
2428 | + COMPACT_MODE_KSWAPD); | |
2429 | + compaction = 1; | |
2430 | + } | |
2431 | + | |
2414 | 2432 | if (zone->all_unreclaimable) |
2415 | 2433 | continue; |
2416 | - if (nr_slab == 0 && !zone_reclaimable(zone)) | |
2434 | + if (!compaction && nr_slab == 0 && | |
2435 | + !zone_reclaimable(zone)) | |
2417 | 2436 | zone->all_unreclaimable = 1; |
2418 | 2437 | /* |
2419 | 2438 | * If we've done a decent amount of scanning and |
... | ... | @@ -2423,15 +2442,6 @@ |
2423 | 2442 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && |
2424 | 2443 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
2425 | 2444 | sc.may_writepage = 1; |
2426 | - | |
2427 | - /* | |
2428 | - * Compact the zone for higher orders to reduce | |
2429 | - * latencies for higher-order allocations that | |
2430 | - * would ordinarily call try_to_compact_pages() | |
2431 | - */ | |
2432 | - if (sc.order > PAGE_ALLOC_COSTLY_ORDER) | |
2433 | - compact_zone_order(zone, sc.order, sc.gfp_mask, | |
2434 | - false); | |
2435 | 2445 | |
2436 | 2446 | if (!zone_watermark_ok_safe(zone, order, |
2437 | 2447 | high_wmark_pages(zone), end_zone, 0)) { |