vm: fix mlock() on stack guard page

Commit 53a7706d5ed8 ("mlock: do not hold mmap_sem for extended periods of time") changed mlock() to care about the exact number of pages that __get_user_pages() had brought it. Before, it would only care about errors. And that doesn't work, because we also handled one page specially in __mlock_vma_pages_range(), namely the stack guard page. So when that case was handled, the number of pages that the function returned was off by one. In particular, it could be zero, and then the caller would end up not making any progress at all. Rather than try to fix up that off-by-one error for the mlock case specially, this just moves the logic to handle the stack guard page into__get_user_pages() itself, thus making all the counts come out right automatically. Reported-by: Robert Święcki <robert@swiecki.net> Cc: Hugh Dickins <hughd@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

vm: fix mlock() on stack guard page
Commit 53a7706d5ed8 ("mlock: do not hold mmap_sem for extended periods of time") changed mlock() to care about the exact number of pages that __get_user_pages() had brought it. Before, it would only care about errors. And that doesn't work, because we also handled one page specially in __mlock_vma_pages_range(), namely the stack guard page. So when that case was handled, the number of pages that the function returned was off by one. In particular, it could be zero, and then the caller would end up not making any progress at all. Rather than try to fix up that off-by-one error for the mlock case specially, this just moves the logic to handle the stack guard page into__get_user_pages() itself, thus making all the counts come out right automatically. Reported-by: Robert Święcki <robert@swiecki.net> Cc: Hugh Dickins <hughd@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Linus Torvalds
1 parent be85bccaa5
Showing 2 changed files with 18 additions and 21 deletions Side-by-side Diff
mm/memory.c
mm/mlock.c
@@ -1410,6 +1410,13 @@
 	return page;
 }
  
+static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSDOWN) &&
+		(vma->vm_start == addr) &&
+		!vma_stack_continue(vma->vm_prev, addr);
+}
+
 /**
  * __get_user_pages() - pin user pages in memory
  * @tsk:	task_struct of target task
@@ -1488,7 +1495,6 @@
 		vma = find_extend_vma(mm, start);
 		if (!vma && in_gate_area(mm, start)) {
 			unsigned long pg = start & PAGE_MASK;
-			struct vm_area_struct *gate_vma = get_gate_vma(mm);
 			pgd_t *pgd;
 			pud_t *pud;
 			pmd_t *pmd;
  
@@ -1513,10 +1519,11 @@
 				pte_unmap(pte);
 				return i ? : -EFAULT;
 			}
+			vma = get_gate_vma(mm);
 			if (pages) {
 				struct page *page;
  
-				page = vm_normal_page(gate_vma, start, *pte);
+				page = vm_normal_page(vma, start, *pte);
 				if (!page) {
 					if (!(gup_flags & FOLL_DUMP) &&
 					     is_zero_pfn(pte_pfn(*pte)))
@@ -1530,12 +1537,7 @@
 				get_page(page);
 			}
 			pte_unmap(pte);
-			if (vmas)
-				vmas[i] = gate_vma;
-			i++;
-			start += PAGE_SIZE;
-			nr_pages--;
-			continue;
+			goto next_page;
 		}
  
 		if (!vma ||
@@ -1549,6 +1551,13 @@
 			continue;
 		}
  
+		/*
+		 * If we don't actually want the page itself,
+		 * and it's the stack guard page, just skip it.
+		 */
+		if (!pages && stack_guard_page(vma, start))
+			goto next_page;
+
 		do {
 			struct page *page;
 			unsigned int foll_flags = gup_flags;
@@ -1631,6 +1640,7 @@
 				flush_anon_page(vma, page, start);
 				flush_dcache_page(page);
 			}
+next_page:
 			if (vmas)
 				vmas[i] = vma;
 			i++;
@@ -135,13 +135,6 @@
 	}
 }
  
-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_stack_continue(vma->vm_prev, addr);
-}
-
 /**
  * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
  * @vma:   target vma
@@ -187,12 +180,6 @@
  
 	if (vma->vm_flags & VM_LOCKED)
 		gup_flags |= FOLL_MLOCK;
-
-	/* We don't try to access the guard page of a stack vma */
-	if (stack_guard_page(vma, start)) {
-		addr += PAGE_SIZE;
-		nr_pages--;
-	}
  
 	return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
 				NULL, NULL, nonblocking);
...	...	@@ -1410,6 +1410,13 @@
1410	1410	return page;
1411	1411	}
1412	1412
	1413	+static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
	1414	+{
	1415	+ return (vma->vm_flags & VM_GROWSDOWN) &&
	1416	+ (vma->vm_start == addr) &&
	1417	+ !vma_stack_continue(vma->vm_prev, addr);
	1418	+}
	1419	+
1413	1420	/**
1414	1421	* __get_user_pages() - pin user pages in memory
1415	1422	* @tsk: task_struct of target task
...	...	@@ -1488,7 +1495,6 @@
1488	1495	vma = find_extend_vma(mm, start);
1489	1496	if (!vma && in_gate_area(mm, start)) {
1490	1497	unsigned long pg = start & PAGE_MASK;
1491		- struct vm_area_struct *gate_vma = get_gate_vma(mm);
1492	1498	pgd_t *pgd;
1493	1499	pud_t *pud;
1494	1500	pmd_t *pmd;
1495	1501
...	...	@@ -1513,10 +1519,11 @@
1513	1519	pte_unmap(pte);
1514	1520	return i ? : -EFAULT;
1515	1521	}
	1522	+ vma = get_gate_vma(mm);
1516	1523	if (pages) {
1517	1524	struct page *page;
1518	1525
1519		- page = vm_normal_page(gate_vma, start, *pte);
	1526	+ page = vm_normal_page(vma, start, *pte);
1520	1527	if (!page) {
1521	1528	if (!(gup_flags & FOLL_DUMP) &&
1522	1529	is_zero_pfn(pte_pfn(*pte)))
...	...	@@ -1530,12 +1537,7 @@
1530	1537	get_page(page);
1531	1538	}
1532	1539	pte_unmap(pte);
1533		- if (vmas)
1534		- vmas[i] = gate_vma;
1535		- i++;
1536		- start += PAGE_SIZE;
1537		- nr_pages--;
1538		- continue;
	1540	+ goto next_page;
1539	1541	}
1540	1542
1541	1543	if (!vma \|\|
...	...	@@ -1549,6 +1551,13 @@
1549	1551	continue;
1550	1552	}
1551	1553
	1554	+ /*
	1555	+ * If we don't actually want the page itself,
	1556	+ * and it's the stack guard page, just skip it.
	1557	+ */
	1558	+ if (!pages && stack_guard_page(vma, start))
	1559	+ goto next_page;
	1560	+
1552	1561	do {
1553	1562	struct page *page;
1554	1563	unsigned int foll_flags = gup_flags;
...	...	@@ -1631,6 +1640,7 @@
1631	1640	flush_anon_page(vma, page, start);
1632	1641	flush_dcache_page(page);
1633	1642	}
	1643	+next_page:
1634	1644	if (vmas)
1635	1645	vmas[i] = vma;
1636	1646	i++;
...	...	@@ -135,13 +135,6 @@
135	135	}
136	136	}
137	137
138		-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
139		-{
140		- return (vma->vm_flags & VM_GROWSDOWN) &&
141		- (vma->vm_start == addr) &&
142		- !vma_stack_continue(vma->vm_prev, addr);
143		-}
144		-
145	138	/**
146	139	* __mlock_vma_pages_range() - mlock a range of pages in the vma.
147	140	* @vma: target vma
...	...	@@ -187,12 +180,6 @@
187	180
188	181	if (vma->vm_flags & VM_LOCKED)
189	182	gup_flags \|= FOLL_MLOCK;
190		-
191		- /* We don't try to access the guard page of a stack vma */
192		- if (stack_guard_page(vma, start)) {
193		- addr += PAGE_SIZE;
194		- nr_pages--;
195		- }
196	183
197	184	return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
198	185	NULL, NULL, nonblocking);