Commit 95042f9eb78a8d9a17455e2ef263f2f310ecef15

Authored by Linus Torvalds
1 parent be85bccaa5

vm: fix mlock() on stack guard page

Commit 53a7706d5ed8 ("mlock: do not hold mmap_sem for extended periods
of time") changed mlock() to care about the exact number of pages that
__get_user_pages() had brought it.  Before, it would only care about
errors.

And that doesn't work, because we also handled one page specially in
__mlock_vma_pages_range(), namely the stack guard page.  So when that
case was handled, the number of pages that the function returned was off
by one.  In particular, it could be zero, and then the caller would end
up not making any progress at all.

Rather than try to fix up that off-by-one error for the mlock case
specially, this just moves the logic to handle the stack guard page
into__get_user_pages() itself, thus making all the counts come out
right automatically.

Reported-by: Robert Święcki <robert@swiecki.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 18 additions and 21 deletions Side-by-side Diff

... ... @@ -1410,6 +1410,13 @@
1410 1410 return page;
1411 1411 }
1412 1412  
  1413 +static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
  1414 +{
  1415 + return (vma->vm_flags & VM_GROWSDOWN) &&
  1416 + (vma->vm_start == addr) &&
  1417 + !vma_stack_continue(vma->vm_prev, addr);
  1418 +}
  1419 +
1413 1420 /**
1414 1421 * __get_user_pages() - pin user pages in memory
1415 1422 * @tsk: task_struct of target task
... ... @@ -1488,7 +1495,6 @@
1488 1495 vma = find_extend_vma(mm, start);
1489 1496 if (!vma && in_gate_area(mm, start)) {
1490 1497 unsigned long pg = start & PAGE_MASK;
1491   - struct vm_area_struct *gate_vma = get_gate_vma(mm);
1492 1498 pgd_t *pgd;
1493 1499 pud_t *pud;
1494 1500 pmd_t *pmd;
1495 1501  
... ... @@ -1513,10 +1519,11 @@
1513 1519 pte_unmap(pte);
1514 1520 return i ? : -EFAULT;
1515 1521 }
  1522 + vma = get_gate_vma(mm);
1516 1523 if (pages) {
1517 1524 struct page *page;
1518 1525  
1519   - page = vm_normal_page(gate_vma, start, *pte);
  1526 + page = vm_normal_page(vma, start, *pte);
1520 1527 if (!page) {
1521 1528 if (!(gup_flags & FOLL_DUMP) &&
1522 1529 is_zero_pfn(pte_pfn(*pte)))
... ... @@ -1530,12 +1537,7 @@
1530 1537 get_page(page);
1531 1538 }
1532 1539 pte_unmap(pte);
1533   - if (vmas)
1534   - vmas[i] = gate_vma;
1535   - i++;
1536   - start += PAGE_SIZE;
1537   - nr_pages--;
1538   - continue;
  1540 + goto next_page;
1539 1541 }
1540 1542  
1541 1543 if (!vma ||
... ... @@ -1549,6 +1551,13 @@
1549 1551 continue;
1550 1552 }
1551 1553  
  1554 + /*
  1555 + * If we don't actually want the page itself,
  1556 + * and it's the stack guard page, just skip it.
  1557 + */
  1558 + if (!pages && stack_guard_page(vma, start))
  1559 + goto next_page;
  1560 +
1552 1561 do {
1553 1562 struct page *page;
1554 1563 unsigned int foll_flags = gup_flags;
... ... @@ -1631,6 +1640,7 @@
1631 1640 flush_anon_page(vma, page, start);
1632 1641 flush_dcache_page(page);
1633 1642 }
  1643 +next_page:
1634 1644 if (vmas)
1635 1645 vmas[i] = vma;
1636 1646 i++;
... ... @@ -135,13 +135,6 @@
135 135 }
136 136 }
137 137  
138   -static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
139   -{
140   - return (vma->vm_flags & VM_GROWSDOWN) &&
141   - (vma->vm_start == addr) &&
142   - !vma_stack_continue(vma->vm_prev, addr);
143   -}
144   -
145 138 /**
146 139 * __mlock_vma_pages_range() - mlock a range of pages in the vma.
147 140 * @vma: target vma
... ... @@ -187,12 +180,6 @@
187 180  
188 181 if (vma->vm_flags & VM_LOCKED)
189 182 gup_flags |= FOLL_MLOCK;
190   -
191   - /* We don't try to access the guard page of a stack vma */
192   - if (stack_guard_page(vma, start)) {
193   - addr += PAGE_SIZE;
194   - nr_pages--;
195   - }
196 183  
197 184 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
198 185 NULL, NULL, nonblocking);