Commit 9d8cebd4bcd7c3878462fdfda34bbcdeb4df7ef4
mm: fix mbind vma merge problem
Strangely, current mbind() doesn't merge vma with neighbor vma although it's possible. Unfortunately, many vma can reduce performance... This patch fixes it. reproduced program ---------------------------------------------------------------- #include <numaif.h> #include <numa.h> #include <sys/mman.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <string.h> static unsigned long pagesize; int main(int argc, char** argv) { void* addr; int ch; int node; struct bitmask *nmask = numa_allocate_nodemask(); int err; int node_set = 0; char buf[128]; while ((ch = getopt(argc, argv, "n:")) != -1){ switch (ch){ case 'n': node = strtol(optarg, NULL, 0); numa_bitmask_setbit(nmask, node); node_set = 1; break; default: ; } } argc -= optind; argv += optind; if (!node_set) numa_bitmask_setbit(nmask, 0); pagesize = getpagesize(); addr = mmap(NULL, pagesize*3, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, 0, 0); if (addr == MAP_FAILED) perror("mmap "), exit(1); fprintf(stderr, "pid = %d \n" "addr = %p\n", getpid(), addr); /* make page populate */ memset(addr, 0, pagesize*3); /* first mbind */ err = mbind(addr+pagesize, pagesize, MPOL_BIND, nmask->maskp, nmask->size, MPOL_MF_MOVE_ALL); if (err) error("mbind1 "); /* second mbind */ err = mbind(addr, pagesize*3, MPOL_DEFAULT, NULL, 0, 0); if (err) error("mbind2 "); sprintf(buf, "cat /proc/%d/maps", getpid()); system(buf); return 0; } ---------------------------------------------------------------- result without this patch addr = 0x7fe26ef09000 [snip] 7fe26ef09000-7fe26ef0a000 rw-p 00000000 00:00 0 7fe26ef0a000-7fe26ef0b000 rw-p 00000000 00:00 0 7fe26ef0b000-7fe26ef0c000 rw-p 00000000 00:00 0 7fe26ef0c000-7fe26ef0d000 rw-p 00000000 00:00 0 => 0x7fe26ef09000-0x7fe26ef0c000 have three vmas. result with this patch addr = 0x7fc9ebc76000 [snip] 7fc9ebc76000-7fc9ebc7a000 rw-p 00000000 00:00 0 7fffbe690000-7fffbe6a5000 rw-p 00000000 00:00 0 [stack] => 0x7fc9ebc76000-0x7fc9ebc7a000 have only one vma. [minchan.kim@gmail.com: fix file offset passed to vma_merge()] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 39 additions and 13 deletions Side-by-side Diff
... | ... | @@ -563,24 +563,50 @@ |
563 | 563 | } |
564 | 564 | |
565 | 565 | /* Step 2: apply policy to a range and do splits. */ |
566 | -static int mbind_range(struct vm_area_struct *vma, unsigned long start, | |
567 | - unsigned long end, struct mempolicy *new) | |
566 | +static int mbind_range(struct mm_struct *mm, unsigned long start, | |
567 | + unsigned long end, struct mempolicy *new_pol) | |
568 | 568 | { |
569 | 569 | struct vm_area_struct *next; |
570 | - int err; | |
570 | + struct vm_area_struct *prev; | |
571 | + struct vm_area_struct *vma; | |
572 | + int err = 0; | |
573 | + pgoff_t pgoff; | |
574 | + unsigned long vmstart; | |
575 | + unsigned long vmend; | |
571 | 576 | |
572 | - err = 0; | |
573 | - for (; vma && vma->vm_start < end; vma = next) { | |
577 | + vma = find_vma_prev(mm, start, &prev); | |
578 | + if (!vma || vma->vm_start > start) | |
579 | + return -EFAULT; | |
580 | + | |
581 | + for (; vma && vma->vm_start < end; prev = vma, vma = next) { | |
574 | 582 | next = vma->vm_next; |
575 | - if (vma->vm_start < start) | |
576 | - err = split_vma(vma->vm_mm, vma, start, 1); | |
577 | - if (!err && vma->vm_end > end) | |
578 | - err = split_vma(vma->vm_mm, vma, end, 0); | |
579 | - if (!err) | |
580 | - err = policy_vma(vma, new); | |
583 | + vmstart = max(start, vma->vm_start); | |
584 | + vmend = min(end, vma->vm_end); | |
585 | + | |
586 | + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | |
587 | + prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, | |
588 | + vma->anon_vma, vma->vm_file, pgoff, new_pol); | |
589 | + if (prev) { | |
590 | + vma = prev; | |
591 | + next = vma->vm_next; | |
592 | + continue; | |
593 | + } | |
594 | + if (vma->vm_start != vmstart) { | |
595 | + err = split_vma(vma->vm_mm, vma, vmstart, 1); | |
596 | + if (err) | |
597 | + goto out; | |
598 | + } | |
599 | + if (vma->vm_end != vmend) { | |
600 | + err = split_vma(vma->vm_mm, vma, vmend, 0); | |
601 | + if (err) | |
602 | + goto out; | |
603 | + } | |
604 | + err = policy_vma(vma, new_pol); | |
581 | 605 | if (err) |
582 | - break; | |
606 | + goto out; | |
583 | 607 | } |
608 | + | |
609 | + out: | |
584 | 610 | return err; |
585 | 611 | } |
586 | 612 | |
... | ... | @@ -1047,7 +1073,7 @@ |
1047 | 1073 | if (!IS_ERR(vma)) { |
1048 | 1074 | int nr_failed = 0; |
1049 | 1075 | |
1050 | - err = mbind_range(vma, start, end, new); | |
1076 | + err = mbind_range(mm, start, end, new); | |
1051 | 1077 | |
1052 | 1078 | if (!list_empty(&pagelist)) |
1053 | 1079 | nr_failed = migrate_pages(&pagelist, new_vma_page, |
-
mentioned in commit 3eef6e
-
mentioned in commit 3eef6e
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit bc4dd9
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c
-
mentioned in commit d05f0c