Blame view
mm/mremap.c
19.4 KB
b24413180 License cleanup: ... |
1 |
// SPDX-License-Identifier: GPL-2.0 |
1da177e4c Linux-2.6.12-rc2 |
2 3 4 5 6 |
/* * mm/mremap.c * * (C) Copyright 1996 Linus Torvalds * |
046c68842 mm: update my add... |
7 |
* Address space accounting code <alan@lxorguk.ukuu.org.uk> |
1da177e4c Linux-2.6.12-rc2 |
8 9 10 11 12 |
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include <linux/mm.h> #include <linux/hugetlb.h> |
1da177e4c Linux-2.6.12-rc2 |
13 |
#include <linux/shm.h> |
1ff829957 ksm: prevent mrem... |
14 |
#include <linux/ksm.h> |
1da177e4c Linux-2.6.12-rc2 |
15 16 |
#include <linux/mman.h> #include <linux/swap.h> |
c59ede7b7 [PATCH] move capa... |
17 |
#include <linux/capability.h> |
1da177e4c Linux-2.6.12-rc2 |
18 |
#include <linux/fs.h> |
6dec97dc9 mm: move_ptes -- ... |
19 |
#include <linux/swapops.h> |
1da177e4c Linux-2.6.12-rc2 |
20 21 22 |
#include <linux/highmem.h> #include <linux/security.h> #include <linux/syscalls.h> |
cddb8a5c1 mmu-notifiers: core |
23 |
#include <linux/mmu_notifier.h> |
2581d2023 mm/mremap.c: use ... |
24 |
#include <linux/uaccess.h> |
4abad2ca4 mm: new arch_rema... |
25 |
#include <linux/mm-arch-hooks.h> |
72f87654c userfaultfd: non-... |
26 |
#include <linux/userfaultfd_k.h> |
1da177e4c Linux-2.6.12-rc2 |
27 |
|
1da177e4c Linux-2.6.12-rc2 |
28 29 |
#include <asm/cacheflush.h> #include <asm/tlbflush.h> |
ba470de43 mmap: handle mloc... |
30 |
#include "internal.h" |
7be7a5469 [PATCH] mm: move_... |
31 |
static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) |
1da177e4c Linux-2.6.12-rc2 |
32 33 |
{ pgd_t *pgd; |
c2febafc6 mm: convert gener... |
34 |
p4d_t *p4d; |
1da177e4c Linux-2.6.12-rc2 |
35 36 37 38 39 40 |
pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); if (pgd_none_or_clear_bad(pgd)) return NULL; |
c2febafc6 mm: convert gener... |
41 42 43 44 45 |
p4d = p4d_offset(pgd, addr); if (p4d_none_or_clear_bad(p4d)) return NULL; pud = pud_offset(p4d, addr); |
1da177e4c Linux-2.6.12-rc2 |
46 47 48 49 |
if (pud_none_or_clear_bad(pud)) return NULL; pmd = pmd_offset(pud, addr); |
37a1c49a9 thp: mremap suppo... |
50 |
if (pmd_none(*pmd)) |
1da177e4c Linux-2.6.12-rc2 |
51 |
return NULL; |
7be7a5469 [PATCH] mm: move_... |
52 |
return pmd; |
1da177e4c Linux-2.6.12-rc2 |
53 |
} |
8ac1f8320 thp: pte alloc tr... |
54 55 |
static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) |
1da177e4c Linux-2.6.12-rc2 |
56 57 |
{ pgd_t *pgd; |
c2febafc6 mm: convert gener... |
58 |
p4d_t *p4d; |
1da177e4c Linux-2.6.12-rc2 |
59 |
pud_t *pud; |
c74df32c7 [PATCH] mm: ptd_a... |
60 |
pmd_t *pmd; |
1da177e4c Linux-2.6.12-rc2 |
61 62 |
pgd = pgd_offset(mm, addr); |
c2febafc6 mm: convert gener... |
63 64 65 66 |
p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return NULL; pud = pud_alloc(mm, p4d, addr); |
1da177e4c Linux-2.6.12-rc2 |
67 |
if (!pud) |
c74df32c7 [PATCH] mm: ptd_a... |
68 |
return NULL; |
7be7a5469 [PATCH] mm: move_... |
69 |
|
1da177e4c Linux-2.6.12-rc2 |
70 |
pmd = pmd_alloc(mm, pud, addr); |
57a8f0cdb mm: revert mremap... |
71 |
if (!pmd) |
c74df32c7 [PATCH] mm: ptd_a... |
72 |
return NULL; |
7be7a5469 [PATCH] mm: move_... |
73 |
|
8ac1f8320 thp: pte alloc tr... |
74 |
VM_BUG_ON(pmd_trans_huge(*pmd)); |
c74df32c7 [PATCH] mm: ptd_a... |
75 |
|
7be7a5469 [PATCH] mm: move_... |
76 |
return pmd; |
1da177e4c Linux-2.6.12-rc2 |
77 |
} |
1d069b7dd huge pagecache: e... |
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
static void take_rmap_locks(struct vm_area_struct *vma) { if (vma->vm_file) i_mmap_lock_write(vma->vm_file->f_mapping); if (vma->anon_vma) anon_vma_lock_write(vma->anon_vma); } static void drop_rmap_locks(struct vm_area_struct *vma) { if (vma->anon_vma) anon_vma_unlock_write(vma->anon_vma); if (vma->vm_file) i_mmap_unlock_write(vma->vm_file->f_mapping); } |
6dec97dc9 mm: move_ptes -- ... |
93 94 95 96 97 98 99 100 101 102 103 |
static pte_t move_soft_dirty_pte(pte_t pte) { /* * Set soft dirty bit so we can notice * in userspace the ptes were moved. */ #ifdef CONFIG_MEM_SOFT_DIRTY if (pte_present(pte)) pte = pte_mksoft_dirty(pte); else if (is_swap_pte(pte)) pte = pte_swp_mksoft_dirty(pte); |
6dec97dc9 mm: move_ptes -- ... |
104 105 106 |
#endif return pte; } |
7be7a5469 [PATCH] mm: move_... |
107 108 109 |
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, |
eb66ae030 mremap: properly ... |
110 |
unsigned long new_addr, bool need_rmap_locks) |
1da177e4c Linux-2.6.12-rc2 |
111 |
{ |
1da177e4c Linux-2.6.12-rc2 |
112 |
struct mm_struct *mm = vma->vm_mm; |
7be7a5469 [PATCH] mm: move_... |
113 |
pte_t *old_pte, *new_pte, pte; |
4c21e2f24 [PATCH] mm: split... |
114 |
spinlock_t *old_ptl, *new_ptl; |
5d1904204 mremap: fix race ... |
115 116 |
bool force_flush = false; unsigned long len = old_end - old_addr; |
1da177e4c Linux-2.6.12-rc2 |
117 |
|
38a76013a mm: avoid taking ... |
118 |
/* |
c8c06efa8 mm: convert i_mma... |
119 |
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma |
38a76013a mm: avoid taking ... |
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
* locks to ensure that rmap will always observe either the old or the * new ptes. This is the easiest way to avoid races with * truncate_pagecache(), page migration, etc... * * When need_rmap_locks is false, we use other ways to avoid * such races: * * - During exec() shift_arg_pages(), we use a specially tagged vma * which rmap call sites look for using is_vma_temporary_stack(). * * - During mremap(), new_vma is often known to be placed after vma * in rmap traversal order. This ensures rmap will always observe * either the old pte, or the new pte, or both (the page table locks * serialize access to individual ptes, but only rmap traversal * order guarantees that we won't miss both the old and new ptes). */ |
1d069b7dd huge pagecache: e... |
136 137 |
if (need_rmap_locks) take_rmap_locks(vma); |
1da177e4c Linux-2.6.12-rc2 |
138 |
|
4c21e2f24 [PATCH] mm: split... |
139 140 141 142 |
/* * We don't have to worry about the ordering of src and dst * pte locks because exclusive mmap_sem prevents deadlock. */ |
c74df32c7 [PATCH] mm: ptd_a... |
143 |
old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); |
ece0e2b64 mm: remove pte_*m... |
144 |
new_pte = pte_offset_map(new_pmd, new_addr); |
4c21e2f24 [PATCH] mm: split... |
145 146 |
new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) |
f20dc5f7c [PATCH] lockdep: ... |
147 |
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
3ea277194 mm, mprotect: flu... |
148 |
flush_tlb_batched_pending(vma->vm_mm); |
6606c3e0d [PATCH] paravirt:... |
149 |
arch_enter_lazy_mmu_mode(); |
7be7a5469 [PATCH] mm: move_... |
150 151 152 153 154 |
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) continue; |
5d1904204 mremap: fix race ... |
155 |
|
a2ce2666a mremap: move_ptes... |
156 |
pte = ptep_get_and_clear(mm, old_addr, old_pte); |
5d1904204 mremap: fix race ... |
157 |
/* |
eb66ae030 mremap: properly ... |
158 |
* If we are remapping a valid PTE, make sure |
a2ce2666a mremap: move_ptes... |
159 |
* to flush TLB before we drop the PTL for the |
eb66ae030 mremap: properly ... |
160 |
* PTE. |
a2ce2666a mremap: move_ptes... |
161 |
* |
eb66ae030 mremap: properly ... |
162 163 164 165 166 |
* NOTE! Both old and new PTL matter: the old one * for racing with page_mkclean(), the new one to * make sure the physical page stays valid until * the TLB entry for the old mapping has been * flushed. |
5d1904204 mremap: fix race ... |
167 |
*/ |
eb66ae030 mremap: properly ... |
168 |
if (pte_present(pte)) |
5d1904204 mremap: fix race ... |
169 |
force_flush = true; |
7be7a5469 [PATCH] mm: move_... |
170 |
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); |
6dec97dc9 mm: move_ptes -- ... |
171 172 |
pte = move_soft_dirty_pte(pte); set_pte_at(mm, new_addr, new_pte, pte); |
1da177e4c Linux-2.6.12-rc2 |
173 |
} |
7be7a5469 [PATCH] mm: move_... |
174 |
|
6606c3e0d [PATCH] paravirt:... |
175 |
arch_leave_lazy_mmu_mode(); |
eb66ae030 mremap: properly ... |
176 177 |
if (force_flush) flush_tlb_range(vma, old_end - len, old_end); |
4c21e2f24 [PATCH] mm: split... |
178 179 |
if (new_ptl != old_ptl) spin_unlock(new_ptl); |
ece0e2b64 mm: remove pte_*m... |
180 |
pte_unmap(new_pte - 1); |
c74df32c7 [PATCH] mm: ptd_a... |
181 |
pte_unmap_unlock(old_pte - 1, old_ptl); |
1d069b7dd huge pagecache: e... |
182 183 |
if (need_rmap_locks) drop_rmap_locks(vma); |
1da177e4c Linux-2.6.12-rc2 |
184 |
} |
2c91bd4a4 mm: speed up mrem... |
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
#ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; struct mm_struct *mm = vma->vm_mm; pmd_t pmd; if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK) || old_end - old_addr < PMD_SIZE) return false; /* * The destination pmd shouldn't be established, free_pgtables() * should have release it. */ if (WARN_ON(!pmd_none(*new_pmd))) return false; /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_sem prevents deadlock. */ old_ptl = pmd_lock(vma->vm_mm, old_pmd); new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); /* Clear the pmd */ pmd = *old_pmd; pmd_clear(old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); /* Set the new pmd */ set_pmd_at(mm, new_addr, new_pmd, pmd); flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } #endif |
b6a2fea39 mm: variable leng... |
230 |
unsigned long move_page_tables(struct vm_area_struct *vma, |
1da177e4c Linux-2.6.12-rc2 |
231 |
unsigned long old_addr, struct vm_area_struct *new_vma, |
38a76013a mm: avoid taking ... |
232 233 |
unsigned long new_addr, unsigned long len, bool need_rmap_locks) |
1da177e4c Linux-2.6.12-rc2 |
234 |
{ |
7be7a5469 [PATCH] mm: move_... |
235 |
unsigned long extent, next, old_end; |
ac46d4f3c mm/mmu_notifier: ... |
236 |
struct mmu_notifier_range range; |
7be7a5469 [PATCH] mm: move_... |
237 |
pmd_t *old_pmd, *new_pmd; |
1da177e4c Linux-2.6.12-rc2 |
238 |
|
7be7a5469 [PATCH] mm: move_... |
239 240 |
old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); |
1da177e4c Linux-2.6.12-rc2 |
241 |
|
6f4f13e8d mm/mmu_notifier: ... |
242 243 |
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, old_addr, old_end); |
ac46d4f3c mm/mmu_notifier: ... |
244 |
mmu_notifier_invalidate_range_start(&range); |
7b6efc2bc mremap: avoid sen... |
245 |
|
7be7a5469 [PATCH] mm: move_... |
246 |
for (; old_addr < old_end; old_addr += extent, new_addr += extent) { |
1da177e4c Linux-2.6.12-rc2 |
247 |
cond_resched(); |
7be7a5469 [PATCH] mm: move_... |
248 |
next = (old_addr + PMD_SIZE) & PMD_MASK; |
ebed48460 mremap: check for... |
249 |
/* even if next overflowed, extent below will be ok */ |
7be7a5469 [PATCH] mm: move_... |
250 |
extent = next - old_addr; |
ebed48460 mremap: check for... |
251 252 |
if (extent > old_end - old_addr) extent = old_end - old_addr; |
7be7a5469 [PATCH] mm: move_... |
253 254 255 |
old_pmd = get_old_pmd(vma->vm_mm, old_addr); if (!old_pmd) continue; |
8ac1f8320 thp: pte alloc tr... |
256 |
new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); |
7be7a5469 [PATCH] mm: move_... |
257 258 |
if (!new_pmd) break; |
84c3fc4e9 mm: thp: check pm... |
259 |
if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) { |
dd18dbc2d mm, thp: close ra... |
260 |
if (extent == HPAGE_PMD_SIZE) { |
4b471e889 mm, thp: remove i... |
261 |
bool moved; |
dd18dbc2d mm, thp: close ra... |
262 263 |
/* See comment in move_ptes() */ if (need_rmap_locks) |
1d069b7dd huge pagecache: e... |
264 |
take_rmap_locks(vma); |
bf8616d5f huge mm: move_hug... |
265 |
moved = move_huge_pmd(vma, old_addr, new_addr, |
eb66ae030 mremap: properly ... |
266 |
old_end, old_pmd, new_pmd); |
dd18dbc2d mm, thp: close ra... |
267 |
if (need_rmap_locks) |
1d069b7dd huge pagecache: e... |
268 |
drop_rmap_locks(vma); |
5d1904204 mremap: fix race ... |
269 |
if (moved) |
4b471e889 mm, thp: remove i... |
270 |
continue; |
dd18dbc2d mm, thp: close ra... |
271 |
} |
4b471e889 mm, thp: remove i... |
272 |
split_huge_pmd(vma, old_pmd, old_addr); |
337d9abf1 mm: thp: check pm... |
273 |
if (pmd_trans_unstable(old_pmd)) |
6b9116a65 mm, dax: check fo... |
274 |
continue; |
2c91bd4a4 mm: speed up mrem... |
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 |
} else if (extent == PMD_SIZE) { #ifdef CONFIG_HAVE_MOVE_PMD /* * If the extent is PMD-sized, try to speed the move by * moving at the PMD level if possible. */ bool moved; if (need_rmap_locks) take_rmap_locks(vma); moved = move_normal_pmd(vma, old_addr, new_addr, old_end, old_pmd, new_pmd); if (need_rmap_locks) drop_rmap_locks(vma); if (moved) continue; #endif |
37a1c49a9 thp: mremap suppo... |
292 |
} |
2c91bd4a4 mm: speed up mrem... |
293 |
|
4cf589249 mm: treewide: rem... |
294 |
if (pte_alloc(new_vma->vm_mm, new_pmd)) |
37a1c49a9 thp: mremap suppo... |
295 |
break; |
7be7a5469 [PATCH] mm: move_... |
296 297 298 |
next = (new_addr + PMD_SIZE) & PMD_MASK; if (extent > next - new_addr) extent = next - new_addr; |
5d1904204 mremap: fix race ... |
299 |
move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, |
eb66ae030 mremap: properly ... |
300 |
new_pmd, new_addr, need_rmap_locks); |
1da177e4c Linux-2.6.12-rc2 |
301 |
} |
7b6efc2bc mremap: avoid sen... |
302 |
|
ac46d4f3c mm/mmu_notifier: ... |
303 |
mmu_notifier_invalidate_range_end(&range); |
7be7a5469 [PATCH] mm: move_... |
304 305 |
return len + old_addr - old_end; /* how much done */ |
1da177e4c Linux-2.6.12-rc2 |
306 307 308 309 |
} static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, |
72f87654c userfaultfd: non-... |
310 |
unsigned long new_len, unsigned long new_addr, |
897ab3e0c userfaultfd: non-... |
311 312 |
bool *locked, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) |
1da177e4c Linux-2.6.12-rc2 |
313 314 315 316 317 318 319 |
{ struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; unsigned long vm_flags = vma->vm_flags; unsigned long new_pgoff; unsigned long moved_len; unsigned long excess = 0; |
365e9c87a [PATCH] mm: updat... |
320 |
unsigned long hiwater_vm; |
1da177e4c Linux-2.6.12-rc2 |
321 |
int split = 0; |
7103ad323 ksm: mremap use e... |
322 |
int err; |
38a76013a mm: avoid taking ... |
323 |
bool need_rmap_locks; |
1da177e4c Linux-2.6.12-rc2 |
324 325 326 327 328 329 330 |
/* * We'd prefer to avoid failure later on in do_munmap: * which may split one vma into three before unmapping. */ if (mm->map_count >= sysctl_max_map_count - 3) return -ENOMEM; |
1ff829957 ksm: prevent mrem... |
331 332 333 334 335 336 337 |
/* * Advise KSM to break any KSM pages in the area to be moved: * it would be confusing if they were to turn up at the new * location, where they happen to coincide with different KSM * pages recently unmapped. But leave vma->vm_flags as it was, * so KSM can come around to merge on vma and new_vma afterwards. */ |
7103ad323 ksm: mremap use e... |
338 339 340 341 |
err = ksm_madvise(vma, old_addr, old_addr + old_len, MADV_UNMERGEABLE, &vm_flags); if (err) return err; |
1ff829957 ksm: prevent mrem... |
342 |
|
1da177e4c Linux-2.6.12-rc2 |
343 |
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); |
38a76013a mm: avoid taking ... |
344 345 |
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, &need_rmap_locks); |
1da177e4c Linux-2.6.12-rc2 |
346 347 |
if (!new_vma) return -ENOMEM; |
38a76013a mm: avoid taking ... |
348 349 |
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len, need_rmap_locks); |
1da177e4c Linux-2.6.12-rc2 |
350 |
if (moved_len < old_len) { |
df1eab303 mremap: don't lea... |
351 |
err = -ENOMEM; |
5477e70a6 mm: move ->mremap... |
352 353 |
} else if (vma->vm_ops && vma->vm_ops->mremap) { err = vma->vm_ops->mremap(new_vma); |
df1eab303 mremap: don't lea... |
354 355 356 |
} if (unlikely(err)) { |
1da177e4c Linux-2.6.12-rc2 |
357 358 359 360 361 |
/* * On error, move entries back from new area to old, * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. */ |
38a76013a mm: avoid taking ... |
362 363 |
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len, true); |
1da177e4c Linux-2.6.12-rc2 |
364 365 366 |
vma = new_vma; old_len = new_len; old_addr = new_addr; |
df1eab303 mremap: don't lea... |
367 |
new_addr = err; |
4abad2ca4 mm: new arch_rema... |
368 |
} else { |
72f87654c userfaultfd: non-... |
369 |
mremap_userfaultfd_prep(new_vma, uf); |
4abad2ca4 mm: new arch_rema... |
370 371 |
arch_remap(mm, old_addr, old_addr + old_len, new_addr, new_addr + new_len); |
b2edffdd9 fix mremap() vs. ... |
372 |
} |
1da177e4c Linux-2.6.12-rc2 |
373 374 375 376 377 378 379 380 381 |
/* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT) { vma->vm_flags &= ~VM_ACCOUNT; excess = vma->vm_end - vma->vm_start - old_len; if (old_addr > vma->vm_start && old_addr + old_len < vma->vm_end) split = 1; } |
717990629 [PATCH] mm acct a... |
382 |
/* |
365e9c87a [PATCH] mm: updat... |
383 384 385 386 387 388 389 |
* If we failed to move page tables we still do total_vm increment * since do_munmap() will decrement it by old_len == new_len. * * Since total_vm is about to be raised artificially high for a * moment, we need to restore high watermark afterwards: if stats * are taken meanwhile, total_vm and hiwater_vm appear too high. * If this were a serious issue, we'd add a flag to do_munmap(). |
717990629 [PATCH] mm acct a... |
390 |
*/ |
365e9c87a [PATCH] mm: updat... |
391 |
hiwater_vm = mm->hiwater_vm; |
846383359 mm: rework virtua... |
392 |
vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT); |
717990629 [PATCH] mm acct a... |
393 |
|
d9fe4fab1 x86/mm/pat: Add u... |
394 395 396 |
/* Tell pfnmap has moved from this vma */ if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn_moved(vma); |
897ab3e0c userfaultfd: non-... |
397 |
if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { |
1da177e4c Linux-2.6.12-rc2 |
398 399 400 401 |
/* OOM: unable to split vma, just get accounts right */ vm_unacct_memory(excess >> PAGE_SHIFT); excess = 0; } |
365e9c87a [PATCH] mm: updat... |
402 |
mm->hiwater_vm = hiwater_vm; |
1da177e4c Linux-2.6.12-rc2 |
403 404 405 406 407 408 409 |
/* Restore VM_ACCOUNT if one or two pieces of vma left */ if (excess) { vma->vm_flags |= VM_ACCOUNT; if (split) vma->vm_next->vm_flags |= VM_ACCOUNT; } |
1da177e4c Linux-2.6.12-rc2 |
410 411 |
if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; |
81909b842 mm: use mm_popula... |
412 |
*locked = true; |
1da177e4c Linux-2.6.12-rc2 |
413 414 415 416 |
} return new_addr; } |
54f5de709 untangling do_mre... |
417 418 419 420 421 |
static struct vm_area_struct *vma_to_resize(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long *p) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = find_vma(mm, addr); |
1d3916869 mremap: don't do ... |
422 |
unsigned long pgoff; |
54f5de709 untangling do_mre... |
423 424 |
if (!vma || vma->vm_start > addr) |
6cd576130 mm/mremap.c: clea... |
425 |
return ERR_PTR(-EFAULT); |
54f5de709 untangling do_mre... |
426 |
|
dba58d3b8 mm/mremap: fail m... |
427 428 429 430 431 432 433 434 435 436 437 438 439 |
/* * !old_len is a special case where an attempt is made to 'duplicate' * a mapping. This makes no sense for private mappings as it will * instead create a fresh/new mapping unrelated to the original. This * is contrary to the basic idea of mremap which creates new mappings * based on the original. There are no known use cases for this * behavior. As a result, fail such attempts. */ if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) { pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported. ", current->comm, current->pid); return ERR_PTR(-EINVAL); } |
54f5de709 untangling do_mre... |
440 |
if (is_vm_hugetlb_page(vma)) |
6cd576130 mm/mremap.c: clea... |
441 |
return ERR_PTR(-EINVAL); |
54f5de709 untangling do_mre... |
442 443 444 |
/* We can't remap across vm area boundaries */ if (old_len > vma->vm_end - addr) |
6cd576130 mm/mremap.c: clea... |
445 |
return ERR_PTR(-EFAULT); |
54f5de709 untangling do_mre... |
446 |
|
1d3916869 mremap: don't do ... |
447 448 |
if (new_len == old_len) return vma; |
982134ba6 mm: avoid wrappin... |
449 |
/* Need to be careful about a growing mapping */ |
1d3916869 mremap: don't do ... |
450 451 452 453 454 455 456 |
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; if (pgoff + (new_len >> PAGE_SHIFT) < pgoff) return ERR_PTR(-EINVAL); if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) return ERR_PTR(-EFAULT); |
54f5de709 untangling do_mre... |
457 458 459 460 |
if (vma->vm_flags & VM_LOCKED) { unsigned long locked, lock_limit; locked = mm->locked_vm << PAGE_SHIFT; |
59e99e5b9 mm: use rlimit he... |
461 |
lock_limit = rlimit(RLIMIT_MEMLOCK); |
54f5de709 untangling do_mre... |
462 463 |
locked += new_len - old_len; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) |
6cd576130 mm/mremap.c: clea... |
464 |
return ERR_PTR(-EAGAIN); |
54f5de709 untangling do_mre... |
465 |
} |
846383359 mm: rework virtua... |
466 467 |
if (!may_expand_vm(mm, vma->vm_flags, (new_len - old_len) >> PAGE_SHIFT)) |
6cd576130 mm/mremap.c: clea... |
468 |
return ERR_PTR(-ENOMEM); |
54f5de709 untangling do_mre... |
469 470 471 |
if (vma->vm_flags & VM_ACCOUNT) { unsigned long charged = (new_len - old_len) >> PAGE_SHIFT; |
191c54244 mm: collapse secu... |
472 |
if (security_vm_enough_memory_mm(mm, charged)) |
6cd576130 mm/mremap.c: clea... |
473 |
return ERR_PTR(-ENOMEM); |
54f5de709 untangling do_mre... |
474 475 476 477 |
*p = charged; } return vma; |
54f5de709 untangling do_mre... |
478 |
} |
81909b842 mm: use mm_popula... |
479 |
static unsigned long mremap_to(unsigned long addr, unsigned long old_len, |
72f87654c userfaultfd: non-... |
480 |
unsigned long new_addr, unsigned long new_len, bool *locked, |
897ab3e0c userfaultfd: non-... |
481 |
struct vm_userfaultfd_ctx *uf, |
b22823719 userfaultfd: non-... |
482 |
struct list_head *uf_unmap_early, |
897ab3e0c userfaultfd: non-... |
483 |
struct list_head *uf_unmap) |
ecc1a8993 do_mremap() untan... |
484 485 486 487 488 |
{ struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; |
097eed103 fix the arch chec... |
489 |
unsigned long map_flags; |
ecc1a8993 do_mremap() untan... |
490 |
|
f19cb115a mm/mremap: use of... |
491 |
if (offset_in_page(new_addr)) |
ecc1a8993 do_mremap() untan... |
492 493 494 495 |
goto out; if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) goto out; |
9943242ca mremap: simplify ... |
496 497 |
/* Ensure the old/new locations do not overlap */ if (addr + old_len > new_addr && new_addr + new_len > addr) |
ecc1a8993 do_mremap() untan... |
498 |
goto out; |
ea2c3f6f5 mm,mremap: bail o... |
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 |
/* * move_vma() need us to stay 4 maps below the threshold, otherwise * it will bail out at the very beginning. * That is a problem if we have already unmaped the regions here * (new_addr, and old_addr), because userspace will not know the * state of the vma's after it gets -ENOMEM. * So, to avoid such scenario we can pre-compute if the whole * operation has high chances to success map-wise. * Worst-scenario case is when both vma's (new_addr and old_addr) get * split in 3 before unmaping it. * That means 2 more maps (1 for each) to the ones we already hold. * Check whether current map count plus 2 still leads us to 4 maps below * the threshold, otherwise return -ENOMEM here to be more safe. */ if ((mm->map_count + 2) >= sysctl_max_map_count - 3) return -ENOMEM; |
b22823719 userfaultfd: non-... |
515 |
ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); |
ecc1a8993 do_mremap() untan... |
516 517 518 519 |
if (ret) goto out; if (old_len >= new_len) { |
897ab3e0c userfaultfd: non-... |
520 |
ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap); |
ecc1a8993 do_mremap() untan... |
521 522 523 524 525 526 527 528 529 530 |
if (ret && old_len != new_len) goto out; old_len = new_len; } vma = vma_to_resize(addr, old_len, new_len, &charged); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } |
097eed103 fix the arch chec... |
531 532 533 |
map_flags = MAP_FIXED; if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; |
9206de95b Take arch_mmap_ch... |
534 |
|
097eed103 fix the arch chec... |
535 536 537 |
ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT), map_flags); |
f19cb115a mm/mremap: use of... |
538 |
if (offset_in_page(ret)) |
097eed103 fix the arch chec... |
539 |
goto out1; |
897ab3e0c userfaultfd: non-... |
540 541 |
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, uf_unmap); |
f19cb115a mm/mremap: use of... |
542 |
if (!(offset_in_page(ret))) |
097eed103 fix the arch chec... |
543 544 545 |
goto out; out1: vm_unacct_memory(charged); |
ecc1a8993 do_mremap() untan... |
546 547 548 549 |
out: return ret; } |
1a0ef85f8 do_mremap() untan... |
550 551 |
static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) { |
f106af4e9 fix checks for ex... |
552 |
unsigned long end = vma->vm_end + delta; |
9206de95b Take arch_mmap_ch... |
553 |
if (end < vma->vm_end) /* overflow */ |
f106af4e9 fix checks for ex... |
554 |
return 0; |
9206de95b Take arch_mmap_ch... |
555 |
if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */ |
f106af4e9 fix checks for ex... |
556 557 558 |
return 0; if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start, 0, MAP_FIXED) & ~PAGE_MASK) |
1a0ef85f8 do_mremap() untan... |
559 |
return 0; |
1a0ef85f8 do_mremap() untan... |
560 561 |
return 1; } |
1da177e4c Linux-2.6.12-rc2 |
562 563 564 565 566 567 568 |
/* * Expand (or shrink) an existing mapping, potentially moving it at the * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) * * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise * This option implies MREMAP_MAYMOVE. */ |
63a81db13 merge do_mremap()... |
569 570 571 |
SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr) |
1da177e4c Linux-2.6.12-rc2 |
572 |
{ |
d0de32d9b [PATCH] mm: do_mr... |
573 |
struct mm_struct *mm = current->mm; |
1da177e4c Linux-2.6.12-rc2 |
574 575 576 |
struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; |
81909b842 mm: use mm_popula... |
577 |
bool locked = false; |
85a06835f mm: mremap: downg... |
578 |
bool downgraded = false; |
72f87654c userfaultfd: non-... |
579 |
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; |
b22823719 userfaultfd: non-... |
580 |
LIST_HEAD(uf_unmap_early); |
897ab3e0c userfaultfd: non-... |
581 |
LIST_HEAD(uf_unmap); |
1da177e4c Linux-2.6.12-rc2 |
582 |
|
057d33891 mm: untag user po... |
583 |
addr = untagged_addr(addr); |
1da177e4c Linux-2.6.12-rc2 |
584 |
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) |
9a2458a63 mm: mremap: valid... |
585 586 587 588 |
return ret; if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) return ret; |
1da177e4c Linux-2.6.12-rc2 |
589 |
|
f19cb115a mm/mremap: use of... |
590 |
if (offset_in_page(addr)) |
9a2458a63 mm: mremap: valid... |
591 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
592 593 594 595 596 597 598 599 600 601 |
old_len = PAGE_ALIGN(old_len); new_len = PAGE_ALIGN(new_len); /* * We allow a zero old-len as a special case * for DOS-emu "duplicate shm area" thing. But * a zero new-len is nonsensical. */ if (!new_len) |
9a2458a63 mm: mremap: valid... |
602 |
return ret; |
dc0ef0df7 mm: make mmap_sem... |
603 604 |
if (down_write_killable(¤t->mm->mmap_sem)) return -EINTR; |
1da177e4c Linux-2.6.12-rc2 |
605 |
|
1da177e4c Linux-2.6.12-rc2 |
606 |
if (flags & MREMAP_FIXED) { |
9a2458a63 mm: mremap: valid... |
607 |
ret = mremap_to(addr, old_len, new_addr, new_len, |
b22823719 userfaultfd: non-... |
608 |
&locked, &uf, &uf_unmap_early, &uf_unmap); |
ecc1a8993 do_mremap() untan... |
609 |
goto out; |
1da177e4c Linux-2.6.12-rc2 |
610 611 612 613 614 |
} /* * Always allow a shrinking remap: that just unmaps * the unnecessary pages.. |
85a06835f mm: mremap: downg... |
615 616 |
* __do_munmap does all the needed commit accounting, and * downgrades mmap_sem to read if so directed. |
1da177e4c Linux-2.6.12-rc2 |
617 618 |
*/ if (old_len >= new_len) { |
85a06835f mm: mremap: downg... |
619 620 621 622 623 624 |
int retval; retval = __do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap, true); if (retval < 0 && old_len != new_len) { ret = retval; |
1da177e4c Linux-2.6.12-rc2 |
625 |
goto out; |
85a06835f mm: mremap: downg... |
626 627 628 |
/* Returning 1 indicates mmap_sem is downgraded to read. */ } else if (retval == 1) downgraded = true; |
1da177e4c Linux-2.6.12-rc2 |
629 |
ret = addr; |
ecc1a8993 do_mremap() untan... |
630 |
goto out; |
1da177e4c Linux-2.6.12-rc2 |
631 632 633 |
} /* |
ecc1a8993 do_mremap() untan... |
634 |
* Ok, we need to grow.. |
1da177e4c Linux-2.6.12-rc2 |
635 |
*/ |
54f5de709 untangling do_mre... |
636 637 638 |
vma = vma_to_resize(addr, old_len, new_len, &charged); if (IS_ERR(vma)) { ret = PTR_ERR(vma); |
1da177e4c Linux-2.6.12-rc2 |
639 |
goto out; |
119f657c7 [PATCH] RLIMIT_AS... |
640 |
} |
1da177e4c Linux-2.6.12-rc2 |
641 |
|
1da177e4c Linux-2.6.12-rc2 |
642 |
/* old_len exactly to the end of the area.. |
1da177e4c Linux-2.6.12-rc2 |
643 |
*/ |
ecc1a8993 do_mremap() untan... |
644 |
if (old_len == vma->vm_end - addr) { |
1da177e4c Linux-2.6.12-rc2 |
645 |
/* can we just expand the current mapping? */ |
1a0ef85f8 do_mremap() untan... |
646 |
if (vma_expandable(vma, new_len - old_len)) { |
1da177e4c Linux-2.6.12-rc2 |
647 |
int pages = (new_len - old_len) >> PAGE_SHIFT; |
5beb49305 mm: change anon_v... |
648 649 650 651 652 |
if (vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL)) { ret = -ENOMEM; goto out; } |
1da177e4c Linux-2.6.12-rc2 |
653 |
|
846383359 mm: rework virtua... |
654 |
vm_stat_account(mm, vma->vm_flags, pages); |
1da177e4c Linux-2.6.12-rc2 |
655 |
if (vma->vm_flags & VM_LOCKED) { |
d0de32d9b [PATCH] mm: do_mr... |
656 |
mm->locked_vm += pages; |
81909b842 mm: use mm_popula... |
657 658 |
locked = true; new_addr = addr; |
1da177e4c Linux-2.6.12-rc2 |
659 660 661 662 663 664 665 666 667 668 669 670 |
} ret = addr; goto out; } } /* * We weren't able to just expand or shrink the area, * we need to create a new one and move it.. */ ret = -ENOMEM; if (flags & MREMAP_MAYMOVE) { |
ecc1a8993 do_mremap() untan... |
671 672 673 674 675 |
unsigned long map_flags = 0; if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; new_addr = get_unmapped_area(vma->vm_file, 0, new_len, |
935874141 fix pgoff in "hav... |
676 677 678 |
vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT), map_flags); |
f19cb115a mm/mremap: use of... |
679 |
if (offset_in_page(new_addr)) { |
ecc1a8993 do_mremap() untan... |
680 681 |
ret = new_addr; goto out; |
1da177e4c Linux-2.6.12-rc2 |
682 |
} |
ecc1a8993 do_mremap() untan... |
683 |
|
72f87654c userfaultfd: non-... |
684 |
ret = move_vma(vma, addr, old_len, new_len, new_addr, |
897ab3e0c userfaultfd: non-... |
685 |
&locked, &uf, &uf_unmap); |
1da177e4c Linux-2.6.12-rc2 |
686 687 |
} out: |
f19cb115a mm/mremap: use of... |
688 |
if (offset_in_page(ret)) { |
1da177e4c Linux-2.6.12-rc2 |
689 |
vm_unacct_memory(charged); |
d456fb9e5 mremap: don't do ... |
690 691 |
locked = 0; } |
85a06835f mm: mremap: downg... |
692 693 694 695 |
if (downgraded) up_read(¤t->mm->mmap_sem); else up_write(¤t->mm->mmap_sem); |
81909b842 mm: use mm_popula... |
696 697 |
if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); |
b22823719 userfaultfd: non-... |
698 |
userfaultfd_unmap_complete(mm, &uf_unmap_early); |
90794bf19 userfaultfd: non-... |
699 |
mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); |
897ab3e0c userfaultfd: non-... |
700 |
userfaultfd_unmap_complete(mm, &uf_unmap); |
1da177e4c Linux-2.6.12-rc2 |
701 702 |
return ret; } |