Blame view
mm/fremap.c
7.22 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 |
/* * linux/mm/fremap.c * * Explicit pagetable population and nonlinear (random) mappings support. * * started by Ingo Molnar, Copyright (C) 2002, 2003 */ |
0b173bc4d mm: kill vma flag... |
8 |
#include <linux/export.h> |
4af3c9cc4 Drop some headers... |
9 |
#include <linux/backing-dev.h> |
1da177e4c Linux-2.6.12-rc2 |
10 11 12 13 14 15 16 |
#include <linux/mm.h> #include <linux/swap.h> #include <linux/file.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/swapops.h> #include <linux/rmap.h> |
1da177e4c Linux-2.6.12-rc2 |
17 |
#include <linux/syscalls.h> |
cddb8a5c1 mmu-notifiers: core |
18 |
#include <linux/mmu_notifier.h> |
1da177e4c Linux-2.6.12-rc2 |
19 20 21 22 |
#include <asm/mmu_context.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> |
ba470de43 mmap: handle mloc... |
23 |
#include "internal.h" |
887843961 mm: fix bad rss-c... |
24 25 26 27 |
static int mm_counter(struct page *page) { return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; } |
d0217ac04 mm: fault feedbac... |
28 |
static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
1da177e4c Linux-2.6.12-rc2 |
29 30 31 |
unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; |
887843961 mm: fix bad rss-c... |
32 33 |
struct page *page; swp_entry_t entry; |
1da177e4c Linux-2.6.12-rc2 |
34 |
|
1da177e4c Linux-2.6.12-rc2 |
35 |
if (pte_present(pte)) { |
6aab341e0 mm: re-architect ... |
36 |
flush_cache_page(vma, addr, pte_pfn(pte)); |
1da177e4c Linux-2.6.12-rc2 |
37 |
pte = ptep_clear_flush(vma, addr, ptep); |
6aab341e0 mm: re-architect ... |
38 39 40 41 |
page = vm_normal_page(vma, addr, pte); if (page) { if (pte_dirty(pte)) set_page_dirty(page); |
887843961 mm: fix bad rss-c... |
42 43 |
update_hiwater_rss(mm); dec_mm_counter(mm, mm_counter(page)); |
edc315fd2 badpage: remove v... |
44 |
page_remove_rmap(page); |
6aab341e0 mm: re-architect ... |
45 |
page_cache_release(page); |
887843961 mm: fix bad rss-c... |
46 47 48 |
} } else { /* zap_pte() is not called when pte_none() */ if (!pte_file(pte)) { |
d0217ac04 mm: fault feedbac... |
49 |
update_hiwater_rss(mm); |
887843961 mm: fix bad rss-c... |
50 51 52 53 54 55 56 57 58 59 |
entry = pte_to_swp_entry(pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); dec_mm_counter(mm, mm_counter(page)); } } else { free_swap_and_cache(entry); dec_mm_counter(mm, MM_SWAPENTS); } |
1da177e4c Linux-2.6.12-rc2 |
60 |
} |
9888a1cae [PATCH] paravirt:... |
61 |
pte_clear_not_present_full(mm, addr, ptep, 0); |
1da177e4c Linux-2.6.12-rc2 |
62 63 64 65 |
} } /* |
1da177e4c Linux-2.6.12-rc2 |
66 67 68 |
* Install a file pte to a given virtual memory address, release any * previously existing mapping. */ |
d0217ac04 mm: fault feedbac... |
69 |
static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
1da177e4c Linux-2.6.12-rc2 |
70 71 72 |
unsigned long addr, unsigned long pgoff, pgprot_t prot) { int err = -ENOMEM; |
41bb3476b mm: save soft-dir... |
73 |
pte_t *pte, ptfile; |
c74df32c7 [PATCH] mm: ptd_a... |
74 |
spinlock_t *ptl; |
1da177e4c Linux-2.6.12-rc2 |
75 |
|
c9cfcddfd VM: add common he... |
76 |
pte = get_locked_pte(mm, addr, &ptl); |
1da177e4c Linux-2.6.12-rc2 |
77 |
if (!pte) |
c74df32c7 [PATCH] mm: ptd_a... |
78 |
goto out; |
1da177e4c Linux-2.6.12-rc2 |
79 |
|
41bb3476b mm: save soft-dir... |
80 81 82 83 84 |
ptfile = pgoff_to_pte(pgoff); if (!pte_none(*pte)) { if (pte_present(*pte) && pte_soft_dirty(*pte)) pte_file_mksoft_dirty(ptfile); |
d0217ac04 mm: fault feedbac... |
85 |
zap_pte(mm, vma, addr, pte); |
41bb3476b mm: save soft-dir... |
86 |
} |
1da177e4c Linux-2.6.12-rc2 |
87 |
|
41bb3476b mm: save soft-dir... |
88 |
set_pte_at(mm, addr, pte, ptfile); |
668e0d8f1 [PATCH] fix updat... |
89 90 91 92 93 94 95 |
/* * We don't need to run update_mmu_cache() here because the "file pte" * being installed by install_file_pte() is not a real pte - it's a * non-present entry (like a swap entry), noting what file offset should * be mapped there when there's a fault (in a non-linear vma where * that's not obvious). */ |
c74df32c7 [PATCH] mm: ptd_a... |
96 97 98 |
pte_unmap_unlock(pte, ptl); err = 0; out: |
1da177e4c Linux-2.6.12-rc2 |
99 100 |
return err; } |
0b173bc4d mm: kill vma flag... |
101 102 |
int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, unsigned long size, pgoff_t pgoff) |
54cb8821d mm: merge populat... |
103 |
{ |
0b173bc4d mm: kill vma flag... |
104 |
struct mm_struct *mm = vma->vm_mm; |
54cb8821d mm: merge populat... |
105 106 107 108 109 110 111 112 113 114 115 |
int err; do { err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); if (err) return err; size -= PAGE_SIZE; addr += PAGE_SIZE; pgoff++; } while (size); |
0b173bc4d mm: kill vma flag... |
116 |
return 0; |
54cb8821d mm: merge populat... |
117 |
} |
0b173bc4d mm: kill vma flag... |
118 |
EXPORT_SYMBOL(generic_file_remap_pages); |
54cb8821d mm: merge populat... |
119 |
|
8d63494f7 remap_file_pages:... |
120 121 |
/** * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma |
1da177e4c Linux-2.6.12-rc2 |
122 123 |
* @start: start of the remapped virtual memory range * @size: size of the remapped virtual memory range |
8d63494f7 remap_file_pages:... |
124 125 |
* @prot: new protection bits of the range (see NOTE) * @pgoff: to-be-mapped page of the backing store file |
1da177e4c Linux-2.6.12-rc2 |
126 127 |
* @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. * |
8d63494f7 remap_file_pages:... |
128 129 130 131 |
* sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma * (shared backing store file). * * This syscall works purely via pagetables, so it's the most efficient |
1da177e4c Linux-2.6.12-rc2 |
132 133 134 135 |
* way to map the same (large) file into a given virtual window. Unlike * mmap()/mremap() it does not create any new vmas. The new mappings are * also safe across swapout. * |
7682486b3 mm: fix various k... |
136 |
* NOTE: the @prot parameter right now is ignored (but must be zero), |
8d63494f7 remap_file_pages:... |
137 138 |
* and the vma's default protection is used. Arbitrary protections * might be implemented in the future. |
1da177e4c Linux-2.6.12-rc2 |
139 |
*/ |
6a6160a7b [CVE-2009-0029] S... |
140 141 |
SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags) |
1da177e4c Linux-2.6.12-rc2 |
142 143 144 |
{ struct mm_struct *mm = current->mm; struct address_space *mapping; |
1da177e4c Linux-2.6.12-rc2 |
145 146 147 |
struct vm_area_struct *vma; int err = -EINVAL; int has_write_lock = 0; |
a2362d247 mm/fremap.c: fix ... |
148 |
vm_flags_t vm_flags = 0; |
1da177e4c Linux-2.6.12-rc2 |
149 |
|
8d63494f7 remap_file_pages:... |
150 |
if (prot) |
1da177e4c Linux-2.6.12-rc2 |
151 152 153 154 155 156 157 158 159 160 |
return err; /* * Sanitize the syscall parameters: */ start = start & PAGE_MASK; size = size & PAGE_MASK; /* Does the address range wrap, or is the span zero-sized? */ if (start + size <= start) return err; |
5ec1055aa Avoid pgoff overf... |
161 162 163 |
/* Does pgoff wrap? */ if (pgoff + (size >> PAGE_SHIFT) < pgoff) return err; |
1da177e4c Linux-2.6.12-rc2 |
164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
/* Can we represent this offset inside this architecture's pte's? */ #if PTE_FILE_MAX_BITS < BITS_PER_LONG if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) return err; #endif /* We need down_write() to change vma->vm_flags. */ down_read(&mm->mmap_sem); retry: vma = find_vma(mm, start); /* * Make sure the vma is shared, that it supports prefaulting, * and that the remapped range is valid and fully within |
940e7da51 mm: remap_file_pa... |
178 |
* the single existing vma. |
1da177e4c Linux-2.6.12-rc2 |
179 |
*/ |
a2362d247 mm/fremap.c: fix ... |
180 |
if (!vma || !(vma->vm_flags & VM_SHARED)) |
54cb8821d mm: merge populat... |
181 |
goto out; |
deb521c44 remap_file_pages:... |
182 |
if (!vma->vm_ops || !vma->vm_ops->remap_pages) |
54cb8821d mm: merge populat... |
183 |
goto out; |
e92b05dec fremap: get rid o... |
184 |
if (start < vma->vm_start || start + size > vma->vm_end) |
54cb8821d mm: merge populat... |
185 186 187 188 |
goto out; /* Must set VM_NONLINEAR before any pages are populated. */ if (!(vma->vm_flags & VM_NONLINEAR)) { |
940e7da51 mm: remap_file_pa... |
189 190 191 192 193 194 |
/* * vm_private_data is used as a swapout cursor * in a VM_NONLINEAR vma. */ if (vma->vm_private_data) goto out; |
54cb8821d mm: merge populat... |
195 196 197 198 199 200 201 |
/* Don't need a nonlinear mapping, exit success */ if (pgoff == linear_page_index(vma, start)) { err = 0; goto out; } if (!has_write_lock) { |
940e7da51 mm: remap_file_pa... |
202 |
get_write_lock: |
54cb8821d mm: merge populat... |
203 204 205 206 207 208 |
up_read(&mm->mmap_sem); down_write(&mm->mmap_sem); has_write_lock = 1; goto retry; } mapping = vma->vm_file->f_mapping; |
3ee6dafc6 only allow nonlin... |
209 210 211 212 213 214 215 |
/* * page_mkclean doesn't work on nonlinear vmas, so if * dirty pages need to be accounted, emulate with linear * vmas. */ if (mapping_cap_account_dirty(mapping)) { unsigned long addr; |
cb0942b81 make get_file() r... |
216 |
struct file *file = get_file(vma->vm_file); |
4eb919825 mm: fix use-after... |
217 218 |
/* mmap_region may free vma; grab the info now */ vm_flags = vma->vm_flags; |
3ee6dafc6 only allow nonlin... |
219 |
|
4eb919825 mm: fix use-after... |
220 |
addr = mmap_region(file, start, size, vm_flags, pgoff); |
8a459e44a sys_remap_file_pa... |
221 |
fput(file); |
3ee6dafc6 only allow nonlin... |
222 223 224 225 226 227 |
if (IS_ERR_VALUE(addr)) { err = addr; } else { BUG_ON(addr != start); err = 0; } |
4eb919825 mm: fix use-after... |
228 |
goto out_freed; |
3ee6dafc6 only allow nonlin... |
229 |
} |
3d48ae45e mm: Convert i_mma... |
230 |
mutex_lock(&mapping->i_mmap_mutex); |
54cb8821d mm: merge populat... |
231 232 |
flush_dcache_mmap_lock(mapping); vma->vm_flags |= VM_NONLINEAR; |
6b2dbba8b mm: replace vma p... |
233 |
vma_interval_tree_remove(vma, &mapping->i_mmap); |
54cb8821d mm: merge populat... |
234 235 |
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); flush_dcache_mmap_unlock(mapping); |
3d48ae45e mm: Convert i_mma... |
236 |
mutex_unlock(&mapping->i_mmap_mutex); |
54cb8821d mm: merge populat... |
237 |
} |
ba470de43 mmap: handle mloc... |
238 239 240 241 |
if (vma->vm_flags & VM_LOCKED) { /* * drop PG_Mlocked flag for over-mapped range */ |
940e7da51 mm: remap_file_pa... |
242 243 |
if (!has_write_lock) goto get_write_lock; |
a1ea9549a mm: use mm_popula... |
244 |
vm_flags = vma->vm_flags; |
ba470de43 mmap: handle mloc... |
245 |
munlock_vma_pages_range(vma, start, start + size); |
a1ea9549a mm: use mm_popula... |
246 |
vma->vm_flags = vm_flags; |
ba470de43 mmap: handle mloc... |
247 |
} |
cddb8a5c1 mmu-notifiers: core |
248 |
mmu_notifier_invalidate_range_start(mm, start, start + size); |
0b173bc4d mm: kill vma flag... |
249 |
err = vma->vm_ops->remap_pages(vma, start, size, pgoff); |
cddb8a5c1 mmu-notifiers: core |
250 |
mmu_notifier_invalidate_range_end(mm, start, start + size); |
1da177e4c Linux-2.6.12-rc2 |
251 |
|
54cb8821d mm: merge populat... |
252 253 254 255 256 |
/* * We can't clear VM_NONLINEAR because we'd have to do * it after ->populate completes, and that would prevent * downgrading the lock. (Locks can't be upgraded). */ |
1da177e4c Linux-2.6.12-rc2 |
257 |
|
54cb8821d mm: merge populat... |
258 |
out: |
6d7825b10 mm/fremap.c: fix ... |
259 260 |
if (vma) vm_flags = vma->vm_flags; |
4eb919825 mm: fix use-after... |
261 |
out_freed: |
1da177e4c Linux-2.6.12-rc2 |
262 263 264 265 |
if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else up_write(&mm->mmap_sem); |
a1ea9549a mm: use mm_popula... |
266 267 |
if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK))) mm_populate(start, size); |
1da177e4c Linux-2.6.12-rc2 |
268 269 270 |
return err; } |