Blame view
mm/fremap.c
7.3 KB
1da177e4c
|
1 2 3 4 5 6 7 |
/* * linux/mm/fremap.c * * Explicit pagetable population and nonlinear (random) mappings support. * * started by Ingo Molnar, Copyright (C) 2002, 2003 */ |
0b173bc4d
|
8 |
#include <linux/export.h> |
4af3c9cc4
|
9 |
#include <linux/backing-dev.h> |
1da177e4c
|
10 11 12 13 14 15 16 |
#include <linux/mm.h> #include <linux/swap.h> #include <linux/file.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/swapops.h> #include <linux/rmap.h> |
1da177e4c
|
17 |
#include <linux/syscalls.h> |
cddb8a5c1
|
18 |
#include <linux/mmu_notifier.h> |
1da177e4c
|
19 20 21 22 |
#include <asm/mmu_context.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> |
ba470de43
|
23 |
#include "internal.h" |
887843961
|
24 25 26 27 |
static int mm_counter(struct page *page) { return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; } |
d0217ac04
|
28 |
static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
1da177e4c
|
29 30 31 |
unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; |
887843961
|
32 33 |
struct page *page; swp_entry_t entry; |
1da177e4c
|
34 |
|
1da177e4c
|
35 |
if (pte_present(pte)) { |
6aab341e0
|
36 |
flush_cache_page(vma, addr, pte_pfn(pte)); |
34ee645e8
|
37 |
pte = ptep_clear_flush_notify(vma, addr, ptep); |
6aab341e0
|
38 39 40 41 |
page = vm_normal_page(vma, addr, pte); if (page) { if (pte_dirty(pte)) set_page_dirty(page); |
887843961
|
42 43 |
update_hiwater_rss(mm); dec_mm_counter(mm, mm_counter(page)); |
edc315fd2
|
44 |
page_remove_rmap(page); |
6aab341e0
|
45 |
page_cache_release(page); |
887843961
|
46 47 48 |
} } else { /* zap_pte() is not called when pte_none() */ if (!pte_file(pte)) { |
d0217ac04
|
49 |
update_hiwater_rss(mm); |
887843961
|
50 51 52 53 54 55 56 57 58 59 |
entry = pte_to_swp_entry(pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); dec_mm_counter(mm, mm_counter(page)); } } else { free_swap_and_cache(entry); dec_mm_counter(mm, MM_SWAPENTS); } |
1da177e4c
|
60 |
} |
9888a1cae
|
61 |
pte_clear_not_present_full(mm, addr, ptep, 0); |
1da177e4c
|
62 63 64 65 |
} } /* |
1da177e4c
|
66 67 68 |
* Install a file pte to a given virtual memory address, release any * previously existing mapping. */ |
d0217ac04
|
69 |
static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
1da177e4c
|
70 71 72 |
unsigned long addr, unsigned long pgoff, pgprot_t prot) { int err = -ENOMEM; |
41bb3476b
|
73 |
pte_t *pte, ptfile; |
c74df32c7
|
74 |
spinlock_t *ptl; |
1da177e4c
|
75 |
|
c9cfcddfd
|
76 |
pte = get_locked_pte(mm, addr, &ptl); |
1da177e4c
|
77 |
if (!pte) |
c74df32c7
|
78 |
goto out; |
1da177e4c
|
79 |
|
41bb3476b
|
80 |
ptfile = pgoff_to_pte(pgoff); |
0bf073315
|
81 |
if (!pte_none(*pte)) |
d0217ac04
|
82 |
zap_pte(mm, vma, addr, pte); |
1da177e4c
|
83 |
|
0bf073315
|
84 |
set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile)); |
668e0d8f1
|
85 86 87 88 89 90 91 |
/* * We don't need to run update_mmu_cache() here because the "file pte" * being installed by install_file_pte() is not a real pte - it's a * non-present entry (like a swap entry), noting what file offset should * be mapped there when there's a fault (in a non-linear vma where * that's not obvious). */ |
c74df32c7
|
92 93 94 |
pte_unmap_unlock(pte, ptl); err = 0; out: |
1da177e4c
|
95 96 |
return err; } |
0b173bc4d
|
97 98 |
int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, unsigned long size, pgoff_t pgoff) |
54cb8821d
|
99 |
{ |
0b173bc4d
|
100 |
struct mm_struct *mm = vma->vm_mm; |
54cb8821d
|
101 102 103 104 105 106 107 108 109 110 111 |
int err; do { err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); if (err) return err; size -= PAGE_SIZE; addr += PAGE_SIZE; pgoff++; } while (size); |
0b173bc4d
|
112 |
return 0; |
54cb8821d
|
113 |
} |
0b173bc4d
|
114 |
EXPORT_SYMBOL(generic_file_remap_pages); |
54cb8821d
|
115 |
|
8d63494f7
|
116 117 |
/** * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma |
1da177e4c
|
118 119 |
* @start: start of the remapped virtual memory range * @size: size of the remapped virtual memory range |
8d63494f7
|
120 121 |
* @prot: new protection bits of the range (see NOTE) * @pgoff: to-be-mapped page of the backing store file |
1da177e4c
|
122 123 |
* @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. * |
8d63494f7
|
124 125 126 127 |
* sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma * (shared backing store file). * * This syscall works purely via pagetables, so it's the most efficient |
1da177e4c
|
128 129 130 131 |
* way to map the same (large) file into a given virtual window. Unlike * mmap()/mremap() it does not create any new vmas. The new mappings are * also safe across swapout. * |
7682486b3
|
132 |
* NOTE: the @prot parameter right now is ignored (but must be zero), |
8d63494f7
|
133 134 |
* and the vma's default protection is used. Arbitrary protections * might be implemented in the future. |
1da177e4c
|
135 |
*/ |
6a6160a7b
|
136 137 |
SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags) |
1da177e4c
|
138 139 140 |
{ struct mm_struct *mm = current->mm; struct address_space *mapping; |
1da177e4c
|
141 142 143 |
struct vm_area_struct *vma; int err = -EINVAL; int has_write_lock = 0; |
a2362d247
|
144 |
vm_flags_t vm_flags = 0; |
1da177e4c
|
145 |
|
33041a0d7
|
146 147 148 149 |
pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " "See Documentation/vm/remap_file_pages.txt. ", current->comm, current->pid); |
8d63494f7
|
150 |
if (prot) |
1da177e4c
|
151 152 153 154 155 156 157 158 159 160 |
return err; /* * Sanitize the syscall parameters: */ start = start & PAGE_MASK; size = size & PAGE_MASK; /* Does the address range wrap, or is the span zero-sized? */ if (start + size <= start) return err; |
5ec1055aa
|
161 162 163 |
/* Does pgoff wrap? */ if (pgoff + (size >> PAGE_SHIFT) < pgoff) return err; |
1da177e4c
|
164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
/* Can we represent this offset inside this architecture's pte's? */ #if PTE_FILE_MAX_BITS < BITS_PER_LONG if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) return err; #endif /* We need down_write() to change vma->vm_flags. */ down_read(&mm->mmap_sem); retry: vma = find_vma(mm, start); /* * Make sure the vma is shared, that it supports prefaulting, * and that the remapped range is valid and fully within |
940e7da51
|
178 |
* the single existing vma. |
1da177e4c
|
179 |
*/ |
a2362d247
|
180 |
if (!vma || !(vma->vm_flags & VM_SHARED)) |
54cb8821d
|
181 |
goto out; |
deb521c44
|
182 |
if (!vma->vm_ops || !vma->vm_ops->remap_pages) |
54cb8821d
|
183 |
goto out; |
e92b05dec
|
184 |
if (start < vma->vm_start || start + size > vma->vm_end) |
54cb8821d
|
185 186 187 188 |
goto out; /* Must set VM_NONLINEAR before any pages are populated. */ if (!(vma->vm_flags & VM_NONLINEAR)) { |
940e7da51
|
189 190 191 192 193 194 |
/* * vm_private_data is used as a swapout cursor * in a VM_NONLINEAR vma. */ if (vma->vm_private_data) goto out; |
54cb8821d
|
195 196 197 198 199 200 201 |
/* Don't need a nonlinear mapping, exit success */ if (pgoff == linear_page_index(vma, start)) { err = 0; goto out; } if (!has_write_lock) { |
940e7da51
|
202 |
get_write_lock: |
54cb8821d
|
203 204 205 206 207 208 |
up_read(&mm->mmap_sem); down_write(&mm->mmap_sem); has_write_lock = 1; goto retry; } mapping = vma->vm_file->f_mapping; |
3ee6dafc6
|
209 210 211 212 213 214 215 |
/* * page_mkclean doesn't work on nonlinear vmas, so if * dirty pages need to be accounted, emulate with linear * vmas. */ if (mapping_cap_account_dirty(mapping)) { unsigned long addr; |
cb0942b81
|
216 |
struct file *file = get_file(vma->vm_file); |
4eb919825
|
217 218 |
/* mmap_region may free vma; grab the info now */ vm_flags = vma->vm_flags; |
3ee6dafc6
|
219 |
|
4eb919825
|
220 |
addr = mmap_region(file, start, size, vm_flags, pgoff); |
8a459e44a
|
221 |
fput(file); |
3ee6dafc6
|
222 223 224 225 226 227 |
if (IS_ERR_VALUE(addr)) { err = addr; } else { BUG_ON(addr != start); err = 0; } |
4eb919825
|
228 |
goto out_freed; |
3ee6dafc6
|
229 |
} |
83cde9e8b
|
230 |
i_mmap_lock_write(mapping); |
54cb8821d
|
231 232 |
flush_dcache_mmap_lock(mapping); vma->vm_flags |= VM_NONLINEAR; |
6b2dbba8b
|
233 |
vma_interval_tree_remove(vma, &mapping->i_mmap); |
54cb8821d
|
234 235 |
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); flush_dcache_mmap_unlock(mapping); |
83cde9e8b
|
236 |
i_mmap_unlock_write(mapping); |
54cb8821d
|
237 |
} |
ba470de43
|
238 239 240 241 |
if (vma->vm_flags & VM_LOCKED) { /* * drop PG_Mlocked flag for over-mapped range */ |
940e7da51
|
242 243 |
if (!has_write_lock) goto get_write_lock; |
a1ea9549a
|
244 |
vm_flags = vma->vm_flags; |
ba470de43
|
245 |
munlock_vma_pages_range(vma, start, start + size); |
a1ea9549a
|
246 |
vma->vm_flags = vm_flags; |
ba470de43
|
247 |
} |
cddb8a5c1
|
248 |
mmu_notifier_invalidate_range_start(mm, start, start + size); |
0b173bc4d
|
249 |
err = vma->vm_ops->remap_pages(vma, start, size, pgoff); |
cddb8a5c1
|
250 |
mmu_notifier_invalidate_range_end(mm, start, start + size); |
1da177e4c
|
251 |
|
54cb8821d
|
252 253 254 255 256 |
/* * We can't clear VM_NONLINEAR because we'd have to do * it after ->populate completes, and that would prevent * downgrading the lock. (Locks can't be upgraded). */ |
1da177e4c
|
257 |
|
54cb8821d
|
258 |
out: |
6d7825b10
|
259 260 |
if (vma) vm_flags = vma->vm_flags; |
4eb919825
|
261 |
out_freed: |
1da177e4c
|
262 263 264 265 |
if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else up_write(&mm->mmap_sem); |
a1ea9549a
|
266 267 |
if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK))) mm_populate(start, size); |
1da177e4c
|
268 269 270 |
return err; } |