Commit 0a27a14a62921b438bb6f33772690d345a089be6

Authored by Nick Piggin
Committed by Linus Torvalds
1 parent b4169525bc

mm: madvise avoid exclusive mmap_sem

Avoid down_write of the mmap_sem in madvise when we can help it.

Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 29 additions and 4 deletions Side-by-side Diff

... ... @@ -12,6 +12,24 @@
12 12 #include <linux/hugetlb.h>
13 13  
14 14 /*
  15 + * Any behaviour which results in changes to the vma->vm_flags needs to
  16 + * take mmap_sem for writing. Others, which simply traverse vmas, need
  17 + * to only take it for reading.
  18 + */
  19 +static int madvise_need_mmap_write(int behavior)
  20 +{
  21 + switch (behavior) {
  22 + case MADV_REMOVE:
  23 + case MADV_WILLNEED:
  24 + case MADV_DONTNEED:
  25 + return 0;
  26 + default:
  27 + /* be safe, default to 1. list exceptions explicitly */
  28 + return 1;
  29 + }
  30 +}
  31 +
  32 +/*
15 33 * We can potentially split a vm area into separate
16 34 * areas, each area with its own behavior.
17 35 */
18 36  
... ... @@ -183,9 +201,9 @@
183 201 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
184 202  
185 203 /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
186   - up_write(&current->mm->mmap_sem);
  204 + up_read(&current->mm->mmap_sem);
187 205 error = vmtruncate_range(mapping->host, offset, endoff);
188   - down_write(&current->mm->mmap_sem);
  206 + down_read(&current->mm->mmap_sem);
189 207 return error;
190 208 }
191 209  
... ... @@ -270,7 +288,10 @@
270 288 int error = -EINVAL;
271 289 size_t len;
272 290  
273   - down_write(&current->mm->mmap_sem);
  291 + if (madvise_need_mmap_write(behavior))
  292 + down_write(&current->mm->mmap_sem);
  293 + else
  294 + down_read(&current->mm->mmap_sem);
274 295  
275 296 if (start & ~PAGE_MASK)
276 297 goto out;
... ... @@ -332,7 +353,11 @@
332 353 vma = find_vma(current->mm, start);
333 354 }
334 355 out:
335   - up_write(&current->mm->mmap_sem);
  356 + if (madvise_need_mmap_write(behavior))
  357 + up_write(&current->mm->mmap_sem);
  358 + else
  359 + up_read(&current->mm->mmap_sem);
  360 +
336 361 return error;
337 362 }