Blame view
mm/mprotect.c
10.5 KB
1da177e4c
|
1 2 3 4 5 6 |
/* * mm/mprotect.c * * (C) Copyright 1994 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig * |
046c68842
|
7 |
* Address space accounting code <alan@lxorguk.ukuu.org.uk> |
1da177e4c
|
8 9 10 11 12 |
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include <linux/mm.h> #include <linux/hugetlb.h> |
1da177e4c
|
13 14 15 16 17 18 19 20 |
#include <linux/shm.h> #include <linux/mman.h> #include <linux/fs.h> #include <linux/highmem.h> #include <linux/security.h> #include <linux/mempolicy.h> #include <linux/personality.h> #include <linux/syscalls.h> |
0697212a4
|
21 22 |
#include <linux/swap.h> #include <linux/swapops.h> |
cddb8a5c1
|
23 |
#include <linux/mmu_notifier.h> |
64cdd548f
|
24 |
#include <linux/migrate.h> |
cdd6c482c
|
25 |
#include <linux/perf_event.h> |
64a9a34e2
|
26 |
#include <linux/ksm.h> |
1da177e4c
|
27 28 29 30 |
#include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> |
1ad9f620c
|
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
/* * For a prot_numa update we only hold mmap_sem for read so there is a * potential race with faulting where a pmd was temporarily none. This * function checks for a transhuge pmd under the appropriate lock. It * returns a pte if it was successfully locked or NULL if it raced with * a transhuge insertion. */ static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, int prot_numa, spinlock_t **ptl) { pte_t *pte; spinlock_t *pmdl; /* !prot_numa is protected by mmap_sem held for write */ if (!prot_numa) return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); pmdl = pmd_lock(vma->vm_mm, pmd); if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { spin_unlock(pmdl); return NULL; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); spin_unlock(pmdl); return pte; } |
4b10e7d56
|
58 |
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
c1e6098b2
|
59 |
unsigned long addr, unsigned long end, pgprot_t newprot, |
0f19c1792
|
60 |
int dirty_accountable, int prot_numa) |
1da177e4c
|
61 |
{ |
4b10e7d56
|
62 |
struct mm_struct *mm = vma->vm_mm; |
0697212a4
|
63 |
pte_t *pte, oldpte; |
705e87c0c
|
64 |
spinlock_t *ptl; |
7da4d641c
|
65 |
unsigned long pages = 0; |
1da177e4c
|
66 |
|
1ad9f620c
|
67 68 69 |
pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); if (!pte) return 0; |
6606c3e0d
|
70 |
arch_enter_lazy_mmu_mode(); |
1da177e4c
|
71 |
do { |
0697212a4
|
72 73 |
oldpte = *pte; if (pte_present(oldpte)) { |
1da177e4c
|
74 |
pte_t ptent; |
4b10e7d56
|
75 |
bool updated = false; |
1da177e4c
|
76 |
|
4b10e7d56
|
77 |
if (!prot_numa) { |
0c5f83c23
|
78 |
ptent = ptep_modify_prot_start(mm, addr, pte); |
1667918b6
|
79 80 |
if (pte_numa(ptent)) ptent = pte_mknonnuma(ptent); |
4b10e7d56
|
81 |
ptent = pte_modify(ptent, newprot); |
9d85d5863
|
82 83 84 85 |
/* * Avoid taking write faults for pages we * know to be dirty. */ |
64e455079
|
86 87 88 |
if (dirty_accountable && pte_dirty(ptent) && (pte_soft_dirty(ptent) || !(vma->vm_flags & VM_SOFTDIRTY))) |
9d85d5863
|
89 90 |
ptent = pte_mkwrite(ptent); ptep_modify_prot_commit(mm, addr, pte, ptent); |
4b10e7d56
|
91 92 93 94 95 |
updated = true; } else { struct page *page; page = vm_normal_page(vma, addr, oldpte); |
64a9a34e2
|
96 |
if (page && !PageKsm(page)) { |
1bc115d87
|
97 |
if (!pte_numa(oldpte)) { |
56eecdb91
|
98 |
ptep_set_numa(mm, addr, pte); |
4b10e7d56
|
99 100 101 102 |
updated = true; } } } |
4b10e7d56
|
103 104 |
if (updated) pages++; |
ce1744f4e
|
105 |
} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { |
0697212a4
|
106 107 108 |
swp_entry_t entry = pte_to_swp_entry(oldpte); if (is_write_migration_entry(entry)) { |
c3d16e165
|
109 |
pte_t newpte; |
0697212a4
|
110 111 112 113 114 |
/* * A protection check is difficult so * just be safe and disable write */ make_migration_entry_read(&entry); |
c3d16e165
|
115 116 117 118 |
newpte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(oldpte)) newpte = pte_swp_mksoft_dirty(newpte); set_pte_at(mm, addr, pte, newpte); |
e920e14ca
|
119 120 |
pages++; |
0697212a4
|
121 |
} |
1da177e4c
|
122 123 |
} } while (pte++, addr += PAGE_SIZE, addr != end); |
6606c3e0d
|
124 |
arch_leave_lazy_mmu_mode(); |
705e87c0c
|
125 |
pte_unmap_unlock(pte - 1, ptl); |
7da4d641c
|
126 127 |
return pages; |
1da177e4c
|
128 |
} |
7d12efaea
|
129 130 131 |
static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) |
1da177e4c
|
132 133 |
{ pmd_t *pmd; |
a5338093b
|
134 |
struct mm_struct *mm = vma->vm_mm; |
1da177e4c
|
135 |
unsigned long next; |
7da4d641c
|
136 |
unsigned long pages = 0; |
72403b4a0
|
137 |
unsigned long nr_huge_updates = 0; |
a5338093b
|
138 |
unsigned long mni_start = 0; |
1da177e4c
|
139 140 141 |
pmd = pmd_offset(pud, addr); do { |
25cbbef19
|
142 |
unsigned long this_pages; |
1da177e4c
|
143 |
next = pmd_addr_end(addr, end); |
88a9ab6e3
|
144 145 |
if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd)) continue; |
a5338093b
|
146 147 148 149 150 151 |
/* invoke the mmu notifier if the pmd is populated */ if (!mni_start) { mni_start = addr; mmu_notifier_invalidate_range_start(mm, mni_start, end); } |
cd7548ab3
|
152 153 |
if (pmd_trans_huge(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) |
e180377f1
|
154 |
split_huge_page_pmd(vma, addr, pmd); |
f123d74ab
|
155 156 157 158 159 |
else { int nr_ptes = change_huge_pmd(vma, pmd, addr, newprot, prot_numa); if (nr_ptes) { |
72403b4a0
|
160 161 162 163 |
if (nr_ptes == HPAGE_PMD_NR) { pages += HPAGE_PMD_NR; nr_huge_updates++; } |
1ad9f620c
|
164 165 |
/* huge pmd was handled */ |
f123d74ab
|
166 167 |
continue; } |
7da4d641c
|
168 |
} |
88a9ab6e3
|
169 |
/* fall through, the trans huge pmd just split */ |
cd7548ab3
|
170 |
} |
25cbbef19
|
171 |
this_pages = change_pte_range(vma, pmd, addr, next, newprot, |
0f19c1792
|
172 |
dirty_accountable, prot_numa); |
25cbbef19
|
173 |
pages += this_pages; |
1da177e4c
|
174 |
} while (pmd++, addr = next, addr != end); |
7da4d641c
|
175 |
|
a5338093b
|
176 177 |
if (mni_start) mmu_notifier_invalidate_range_end(mm, mni_start, end); |
72403b4a0
|
178 179 |
if (nr_huge_updates) count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); |
7da4d641c
|
180 |
return pages; |
1da177e4c
|
181 |
} |
7d12efaea
|
182 183 184 |
static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) |
1da177e4c
|
185 186 187 |
{ pud_t *pud; unsigned long next; |
7da4d641c
|
188 |
unsigned long pages = 0; |
1da177e4c
|
189 190 191 192 193 194 |
pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; |
7da4d641c
|
195 |
pages += change_pmd_range(vma, pud, addr, next, newprot, |
4b10e7d56
|
196 |
dirty_accountable, prot_numa); |
1da177e4c
|
197 |
} while (pud++, addr = next, addr != end); |
7da4d641c
|
198 199 |
return pages; |
1da177e4c
|
200 |
} |
7da4d641c
|
201 |
static unsigned long change_protection_range(struct vm_area_struct *vma, |
c1e6098b2
|
202 |
unsigned long addr, unsigned long end, pgprot_t newprot, |
4b10e7d56
|
203 |
int dirty_accountable, int prot_numa) |
1da177e4c
|
204 205 206 207 208 |
{ struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; unsigned long next; unsigned long start = addr; |
7da4d641c
|
209 |
unsigned long pages = 0; |
1da177e4c
|
210 211 212 213 |
BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); |
208414059
|
214 |
set_tlb_flush_pending(mm); |
1da177e4c
|
215 216 217 218 |
do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; |
7da4d641c
|
219 |
pages += change_pud_range(vma, pgd, addr, next, newprot, |
4b10e7d56
|
220 |
dirty_accountable, prot_numa); |
1da177e4c
|
221 |
} while (pgd++, addr = next, addr != end); |
7da4d641c
|
222 |
|
1233d5882
|
223 224 225 |
/* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); |
208414059
|
226 |
clear_tlb_flush_pending(mm); |
7da4d641c
|
227 228 229 230 231 232 |
return pages; } unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, |
4b10e7d56
|
233 |
int dirty_accountable, int prot_numa) |
7da4d641c
|
234 |
{ |
7da4d641c
|
235 |
unsigned long pages; |
7da4d641c
|
236 237 238 |
if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot); else |
4b10e7d56
|
239 |
pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); |
7da4d641c
|
240 241 |
return pages; |
1da177e4c
|
242 |
} |
b6a2fea39
|
243 |
int |
1da177e4c
|
244 245 246 247 248 249 250 |
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) { struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; unsigned long charged = 0; |
1da177e4c
|
251 252 |
pgoff_t pgoff; int error; |
c1e6098b2
|
253 |
int dirty_accountable = 0; |
1da177e4c
|
254 255 256 257 258 259 260 261 262 |
if (newflags == oldflags) { *pprev = vma; return 0; } /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we |
5a6fe1259
|
263 264 |
* make it unwritable again. hugetlb mapping were accounted for * even if read-only so there is no need to account for them here |
1da177e4c
|
265 266 |
*/ if (newflags & VM_WRITE) { |
5a6fe1259
|
267 |
if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| |
cdfd4325c
|
268 |
VM_SHARED|VM_NORESERVE))) { |
1da177e4c
|
269 |
charged = nrpages; |
191c54244
|
270 |
if (security_vm_enough_memory_mm(mm, charged)) |
1da177e4c
|
271 272 273 274 |
return -ENOMEM; newflags |= VM_ACCOUNT; } } |
1da177e4c
|
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
/* * First try to merge with previous and/or next vma. */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); if (*pprev) { vma = *pprev; goto success; } *pprev = vma; if (start != vma->vm_start) { error = split_vma(mm, vma, start, 1); if (error) goto fail; } if (end != vma->vm_end) { error = split_vma(mm, vma, end, 0); if (error) goto fail; } success: /* * vm_flags and vm_page_prot are protected by the mmap_sem * held in write mode. */ vma->vm_flags = newflags; |
64e455079
|
306 307 |
dirty_accountable = vma_wants_writenotify(vma); vma_set_page_prot(vma); |
d08b3851d
|
308 |
|
7d12efaea
|
309 310 |
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); |
7da4d641c
|
311 |
|
ab50b8ed8
|
312 313 |
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); |
63bfd7384
|
314 |
perf_event_mmap(vma); |
1da177e4c
|
315 316 317 318 319 320 |
return 0; fail: vm_unacct_memory(charged); return error; } |
6a6160a7b
|
321 322 |
SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot) |
1da177e4c
|
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 |
{ unsigned long vm_flags, nstart, end, tmp, reqprot; struct vm_area_struct *vma, *prev; int error = -EINVAL; const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ return -EINVAL; if (start & ~PAGE_MASK) return -EINVAL; if (!len) return 0; len = PAGE_ALIGN(len); end = start + len; if (end <= start) return -ENOMEM; |
b845f313d
|
340 |
if (!arch_validate_prot(prot)) |
1da177e4c
|
341 342 343 344 345 346 |
return -EINVAL; reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC: */ |
b344e05c5
|
347 |
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) |
1da177e4c
|
348 349 350 351 352 |
prot |= PROT_EXEC; vm_flags = calc_vm_prot_bits(prot); down_write(¤t->mm->mmap_sem); |
097d59106
|
353 |
vma = find_vma(current->mm, start); |
1da177e4c
|
354 355 356 |
error = -ENOMEM; if (!vma) goto out; |
097d59106
|
357 |
prev = vma->vm_prev; |
1da177e4c
|
358 359 360 361 362 363 364 |
if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out; start = vma->vm_start; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; |
7d12efaea
|
365 |
} else { |
1da177e4c
|
366 367 368 369 370 371 372 373 374 375 376 377 378 379 |
if (vma->vm_start > start) goto out; if (unlikely(grows & PROT_GROWSUP)) { end = vma->vm_end; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSUP)) goto out; } } if (start > vma->vm_start) prev = vma; for (nstart = start ; ; ) { unsigned long newflags; |
7d12efaea
|
380 |
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */ |
1da177e4c
|
381 |
|
7d12efaea
|
382 383 |
newflags = vm_flags; newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); |
1da177e4c
|
384 |
|
7e2cff42c
|
385 386 |
/* newflags >> 4 shift VM_MAY% in place of VM_% */ if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { |
1da177e4c
|
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 |
error = -EACCES; goto out; } error = security_file_mprotect(vma, reqprot, prot); if (error) goto out; tmp = vma->vm_end; if (tmp > end) tmp = end; error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); if (error) goto out; nstart = tmp; if (nstart < prev->vm_end) nstart = prev->vm_end; if (nstart >= end) goto out; vma = prev->vm_next; if (!vma || vma->vm_start != nstart) { error = -ENOMEM; goto out; } } out: up_write(¤t->mm->mmap_sem); return error; } |