Blame view
mm/mprotect.c
11.2 KB
1da177e4c
|
1 2 3 4 5 6 |
/* * mm/mprotect.c * * (C) Copyright 1994 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig * |
046c68842
|
7 |
* Address space accounting code <alan@lxorguk.ukuu.org.uk> |
1da177e4c
|
8 9 10 11 12 |
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include <linux/mm.h> #include <linux/hugetlb.h> |
1da177e4c
|
13 14 15 16 17 18 19 20 |
#include <linux/shm.h> #include <linux/mman.h> #include <linux/fs.h> #include <linux/highmem.h> #include <linux/security.h> #include <linux/mempolicy.h> #include <linux/personality.h> #include <linux/syscalls.h> |
0697212a4
|
21 22 |
#include <linux/swap.h> #include <linux/swapops.h> |
cddb8a5c1
|
23 |
#include <linux/mmu_notifier.h> |
64cdd548f
|
24 |
#include <linux/migrate.h> |
cdd6c482c
|
25 |
#include <linux/perf_event.h> |
64a9a34e2
|
26 |
#include <linux/ksm.h> |
62b5f7d01
|
27 |
#include <linux/pkeys.h> |
1da177e4c
|
28 29 30 31 |
#include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> |
36f881883
|
32 |
#include "internal.h" |
1ad9f620c
|
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
/* * For a prot_numa update we only hold mmap_sem for read so there is a * potential race with faulting where a pmd was temporarily none. This * function checks for a transhuge pmd under the appropriate lock. It * returns a pte if it was successfully locked or NULL if it raced with * a transhuge insertion. */ static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, int prot_numa, spinlock_t **ptl) { pte_t *pte; spinlock_t *pmdl; /* !prot_numa is protected by mmap_sem held for write */ if (!prot_numa) return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); pmdl = pmd_lock(vma->vm_mm, pmd); if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { spin_unlock(pmdl); return NULL; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); spin_unlock(pmdl); return pte; } |
4b10e7d56
|
60 |
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
c1e6098b2
|
61 |
unsigned long addr, unsigned long end, pgprot_t newprot, |
0f19c1792
|
62 |
int dirty_accountable, int prot_numa) |
1da177e4c
|
63 |
{ |
4b10e7d56
|
64 |
struct mm_struct *mm = vma->vm_mm; |
0697212a4
|
65 |
pte_t *pte, oldpte; |
705e87c0c
|
66 |
spinlock_t *ptl; |
7da4d641c
|
67 |
unsigned long pages = 0; |
1da177e4c
|
68 |
|
1ad9f620c
|
69 70 71 |
pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); if (!pte) return 0; |
6606c3e0d
|
72 |
arch_enter_lazy_mmu_mode(); |
1da177e4c
|
73 |
do { |
0697212a4
|
74 75 |
oldpte = *pte; if (pte_present(oldpte)) { |
1da177e4c
|
76 |
pte_t ptent; |
b191f9b10
|
77 |
bool preserve_write = prot_numa && pte_write(oldpte); |
1da177e4c
|
78 |
|
e944fd67b
|
79 80 81 82 83 84 85 86 87 88 |
/* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ if (prot_numa) { struct page *page; page = vm_normal_page(vma, addr, oldpte); if (!page || PageKsm(page)) continue; |
10c1045f2
|
89 90 91 92 |
/* Avoid TLB flush if possible */ if (pte_protnone(oldpte)) continue; |
e944fd67b
|
93 |
} |
8a0516ed8
|
94 95 |
ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); |
b191f9b10
|
96 97 |
if (preserve_write) ptent = pte_mkwrite(ptent); |
4b10e7d56
|
98 |
|
8a0516ed8
|
99 100 101 102 103 |
/* Avoid taking write faults for known dirty pages */ if (dirty_accountable && pte_dirty(ptent) && (pte_soft_dirty(ptent) || !(vma->vm_flags & VM_SOFTDIRTY))) { ptent = pte_mkwrite(ptent); |
4b10e7d56
|
104 |
} |
8a0516ed8
|
105 106 |
ptep_modify_prot_commit(mm, addr, pte, ptent); pages++; |
0661a3361
|
107 |
} else if (IS_ENABLED(CONFIG_MIGRATION)) { |
0697212a4
|
108 109 110 |
swp_entry_t entry = pte_to_swp_entry(oldpte); if (is_write_migration_entry(entry)) { |
c3d16e165
|
111 |
pte_t newpte; |
0697212a4
|
112 113 114 115 116 |
/* * A protection check is difficult so * just be safe and disable write */ make_migration_entry_read(&entry); |
c3d16e165
|
117 118 119 120 |
newpte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(oldpte)) newpte = pte_swp_mksoft_dirty(newpte); set_pte_at(mm, addr, pte, newpte); |
e920e14ca
|
121 122 |
pages++; |
0697212a4
|
123 |
} |
1da177e4c
|
124 125 |
} } while (pte++, addr += PAGE_SIZE, addr != end); |
6606c3e0d
|
126 |
arch_leave_lazy_mmu_mode(); |
705e87c0c
|
127 |
pte_unmap_unlock(pte - 1, ptl); |
7da4d641c
|
128 129 |
return pages; |
1da177e4c
|
130 |
} |
7d12efaea
|
131 132 133 |
static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) |
1da177e4c
|
134 135 |
{ pmd_t *pmd; |
a5338093b
|
136 |
struct mm_struct *mm = vma->vm_mm; |
1da177e4c
|
137 |
unsigned long next; |
7da4d641c
|
138 |
unsigned long pages = 0; |
72403b4a0
|
139 |
unsigned long nr_huge_updates = 0; |
a5338093b
|
140 |
unsigned long mni_start = 0; |
1da177e4c
|
141 142 143 |
pmd = pmd_offset(pud, addr); do { |
25cbbef19
|
144 |
unsigned long this_pages; |
1da177e4c
|
145 |
next = pmd_addr_end(addr, end); |
5c7fb56e5
|
146 147 |
if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) && pmd_none_or_clear_bad(pmd)) |
88a9ab6e3
|
148 |
continue; |
a5338093b
|
149 150 151 152 153 154 |
/* invoke the mmu notifier if the pmd is populated */ if (!mni_start) { mni_start = addr; mmu_notifier_invalidate_range_start(mm, mni_start, end); } |
5c7fb56e5
|
155 |
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { |
6b9116a65
|
156 |
if (next - addr != HPAGE_PMD_SIZE) { |
78ddc5347
|
157 |
split_huge_pmd(vma, pmd, addr); |
6b9116a65
|
158 159 160 |
if (pmd_none(*pmd)) continue; } else { |
f123d74ab
|
161 |
int nr_ptes = change_huge_pmd(vma, pmd, addr, |
e944fd67b
|
162 |
newprot, prot_numa); |
f123d74ab
|
163 164 |
if (nr_ptes) { |
72403b4a0
|
165 166 167 168 |
if (nr_ptes == HPAGE_PMD_NR) { pages += HPAGE_PMD_NR; nr_huge_updates++; } |
1ad9f620c
|
169 170 |
/* huge pmd was handled */ |
f123d74ab
|
171 172 |
continue; } |
7da4d641c
|
173 |
} |
88a9ab6e3
|
174 |
/* fall through, the trans huge pmd just split */ |
cd7548ab3
|
175 |
} |
25cbbef19
|
176 |
this_pages = change_pte_range(vma, pmd, addr, next, newprot, |
0f19c1792
|
177 |
dirty_accountable, prot_numa); |
25cbbef19
|
178 |
pages += this_pages; |
1da177e4c
|
179 |
} while (pmd++, addr = next, addr != end); |
7da4d641c
|
180 |
|
a5338093b
|
181 182 |
if (mni_start) mmu_notifier_invalidate_range_end(mm, mni_start, end); |
72403b4a0
|
183 184 |
if (nr_huge_updates) count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); |
7da4d641c
|
185 |
return pages; |
1da177e4c
|
186 |
} |
7d12efaea
|
187 188 189 |
static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) |
1da177e4c
|
190 191 192 |
{ pud_t *pud; unsigned long next; |
7da4d641c
|
193 |
unsigned long pages = 0; |
1da177e4c
|
194 195 196 197 198 199 |
pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; |
7da4d641c
|
200 |
pages += change_pmd_range(vma, pud, addr, next, newprot, |
4b10e7d56
|
201 |
dirty_accountable, prot_numa); |
1da177e4c
|
202 |
} while (pud++, addr = next, addr != end); |
7da4d641c
|
203 204 |
return pages; |
1da177e4c
|
205 |
} |
7da4d641c
|
206 |
static unsigned long change_protection_range(struct vm_area_struct *vma, |
c1e6098b2
|
207 |
unsigned long addr, unsigned long end, pgprot_t newprot, |
4b10e7d56
|
208 |
int dirty_accountable, int prot_numa) |
1da177e4c
|
209 210 211 212 213 |
{ struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; unsigned long next; unsigned long start = addr; |
7da4d641c
|
214 |
unsigned long pages = 0; |
1da177e4c
|
215 216 217 218 |
BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); |
208414059
|
219 |
set_tlb_flush_pending(mm); |
1da177e4c
|
220 221 222 223 |
do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; |
7da4d641c
|
224 |
pages += change_pud_range(vma, pgd, addr, next, newprot, |
4b10e7d56
|
225 |
dirty_accountable, prot_numa); |
1da177e4c
|
226 |
} while (pgd++, addr = next, addr != end); |
7da4d641c
|
227 |
|
1233d5882
|
228 229 230 |
/* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); |
208414059
|
231 |
clear_tlb_flush_pending(mm); |
7da4d641c
|
232 233 234 235 236 237 |
return pages; } unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, |
4b10e7d56
|
238 |
int dirty_accountable, int prot_numa) |
7da4d641c
|
239 |
{ |
7da4d641c
|
240 |
unsigned long pages; |
7da4d641c
|
241 242 243 |
if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot); else |
4b10e7d56
|
244 |
pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); |
7da4d641c
|
245 246 |
return pages; |
1da177e4c
|
247 |
} |
b6a2fea39
|
248 |
int |
1da177e4c
|
249 250 251 252 253 254 255 |
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) { struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; unsigned long charged = 0; |
1da177e4c
|
256 257 |
pgoff_t pgoff; int error; |
c1e6098b2
|
258 |
int dirty_accountable = 0; |
1da177e4c
|
259 260 261 262 263 264 265 266 267 |
if (newflags == oldflags) { *pprev = vma; return 0; } /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we |
5a6fe1259
|
268 269 |
* make it unwritable again. hugetlb mapping were accounted for * even if read-only so there is no need to account for them here |
1da177e4c
|
270 271 |
*/ if (newflags & VM_WRITE) { |
846383359
|
272 273 274 275 |
/* Check space limits when area turns into data. */ if (!may_expand_vm(mm, newflags, nrpages) && may_expand_vm(mm, oldflags, nrpages)) return -ENOMEM; |
5a6fe1259
|
276 |
if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| |
cdfd4325c
|
277 |
VM_SHARED|VM_NORESERVE))) { |
1da177e4c
|
278 |
charged = nrpages; |
191c54244
|
279 |
if (security_vm_enough_memory_mm(mm, charged)) |
1da177e4c
|
280 281 282 283 |
return -ENOMEM; newflags |= VM_ACCOUNT; } } |
1da177e4c
|
284 285 286 287 288 |
/* * First try to merge with previous and/or next vma. */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, |
19a809afe
|
289 290 |
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx); |
1da177e4c
|
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
if (*pprev) { vma = *pprev; goto success; } *pprev = vma; if (start != vma->vm_start) { error = split_vma(mm, vma, start, 1); if (error) goto fail; } if (end != vma->vm_end) { error = split_vma(mm, vma, end, 0); if (error) goto fail; } success: /* * vm_flags and vm_page_prot are protected by the mmap_sem * held in write mode. */ vma->vm_flags = newflags; |
64e455079
|
316 317 |
dirty_accountable = vma_wants_writenotify(vma); vma_set_page_prot(vma); |
d08b3851d
|
318 |
|
7d12efaea
|
319 320 |
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); |
7da4d641c
|
321 |
|
36f881883
|
322 323 324 325 326 327 328 329 |
/* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major * fault on access. */ if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && (newflags & VM_WRITE)) { populate_vma_page_range(vma, start, end, NULL); } |
846383359
|
330 331 |
vm_stat_account(mm, oldflags, -nrpages); vm_stat_account(mm, newflags, nrpages); |
63bfd7384
|
332 |
perf_event_mmap(vma); |
1da177e4c
|
333 334 335 336 337 338 |
return 0; fail: vm_unacct_memory(charged); return error; } |
6a6160a7b
|
339 340 |
SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot) |
1da177e4c
|
341 |
{ |
62b5f7d01
|
342 |
unsigned long nstart, end, tmp, reqprot; |
1da177e4c
|
343 344 345 |
struct vm_area_struct *vma, *prev; int error = -EINVAL; const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); |
f138556da
|
346 347 |
const bool rier = (current->personality & READ_IMPLIES_EXEC) && (prot & PROT_READ); |
1da177e4c
|
348 349 350 351 352 353 354 355 356 357 358 359 |
prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ return -EINVAL; if (start & ~PAGE_MASK) return -EINVAL; if (!len) return 0; len = PAGE_ALIGN(len); end = start + len; if (end <= start) return -ENOMEM; |
b845f313d
|
360 |
if (!arch_validate_prot(prot)) |
1da177e4c
|
361 362 363 |
return -EINVAL; reqprot = prot; |
1da177e4c
|
364 |
|
1da177e4c
|
365 |
down_write(¤t->mm->mmap_sem); |
097d59106
|
366 |
vma = find_vma(current->mm, start); |
1da177e4c
|
367 368 369 |
error = -ENOMEM; if (!vma) goto out; |
097d59106
|
370 |
prev = vma->vm_prev; |
1da177e4c
|
371 372 373 374 375 376 377 |
if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out; start = vma->vm_start; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; |
7d12efaea
|
378 |
} else { |
1da177e4c
|
379 380 381 382 383 384 385 386 387 388 389 390 391 392 |
if (vma->vm_start > start) goto out; if (unlikely(grows & PROT_GROWSUP)) { end = vma->vm_end; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSUP)) goto out; } } if (start > vma->vm_start) prev = vma; for (nstart = start ; ; ) { unsigned long newflags; |
62b5f7d01
|
393 |
int pkey = arch_override_mprotect_pkey(vma, prot, -1); |
1da177e4c
|
394 |
|
7d12efaea
|
395 |
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */ |
1da177e4c
|
396 |
|
f138556da
|
397 398 399 |
/* Does the application expect PROT_READ to imply PROT_EXEC */ if (rier && (vma->vm_flags & VM_MAYEXEC)) prot |= PROT_EXEC; |
62b5f7d01
|
400 |
newflags = calc_vm_prot_bits(prot, pkey); |
7d12efaea
|
401 |
newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); |
1da177e4c
|
402 |
|
7e2cff42c
|
403 404 |
/* newflags >> 4 shift VM_MAY% in place of VM_% */ if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { |
1da177e4c
|
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 |
error = -EACCES; goto out; } error = security_file_mprotect(vma, reqprot, prot); if (error) goto out; tmp = vma->vm_end; if (tmp > end) tmp = end; error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); if (error) goto out; nstart = tmp; if (nstart < prev->vm_end) nstart = prev->vm_end; if (nstart >= end) goto out; vma = prev->vm_next; if (!vma || vma->vm_start != nstart) { error = -ENOMEM; goto out; } |
f138556da
|
431 |
prot = reqprot; |
1da177e4c
|
432 433 434 435 436 |
} out: up_write(¤t->mm->mmap_sem); return error; } |