Blame view
mm/rmap.c
43.8 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
/* * mm/rmap.c - physical to virtual reverse mappings * * Copyright 2001, Rik van Riel <riel@conectiva.com.br> * Released under the General Public License (GPL). * * Simple, low overhead reverse mapping scheme. * Please try to keep this thing as modular as possible. * * Provides methods for unmapping each kind of mapped page: * the anon methods track anonymous pages, and * the file methods track pages belonging to an inode. * * Original design by Rik van Riel <riel@conectiva.com.br> 2001 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004 |
98f32602d
|
17 |
* Contributions by Hugh Dickins 2003, 2004 |
1da177e4c
|
18 19 20 21 22 |
*/ /* * Lock ordering in mm: * |
1b1dcc1b5
|
23 |
* inode->i_mutex (while writing or truncating, not reading or faulting) |
82591e6ea
|
24 25 |
* mm->mmap_sem * page->flags PG_locked (lock_page) |
c8c06efa8
|
26 |
* mapping->i_mmap_rwsem |
5a505085f
|
27 |
* anon_vma->rwsem |
82591e6ea
|
28 29 30 31 32 |
* mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed, isolate_lru_page) * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) * mapping->private_lock (in __set_page_dirty_buffers) |
c4843a759
|
33 34 |
* mem_cgroup_{begin,end}_page_stat (memcg->move_lock) * mapping->tree_lock (widely used) |
250df6ed2
|
35 |
* inode->i_lock (in set_page_dirty's __mark_inode_dirty) |
f758eeabe
|
36 |
* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) |
82591e6ea
|
37 38 39 |
* sb_lock (within inode_lock in fs/fs-writeback.c) * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, |
f758eeabe
|
40 |
* within bdi.wb->list_lock in __sync_single_inode) |
6a46079cf
|
41 |
* |
5a505085f
|
42 |
* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) |
9b679320a
|
43 |
* ->tasklist_lock |
6a46079cf
|
44 |
* pte map lock |
1da177e4c
|
45 46 47 48 49 50 51 52 |
*/ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/slab.h> #include <linux/init.h> |
5ad646880
|
53 |
#include <linux/ksm.h> |
1da177e4c
|
54 55 |
#include <linux/rmap.h> #include <linux/rcupdate.h> |
b95f1b31b
|
56 |
#include <linux/export.h> |
8a9f3ccd2
|
57 |
#include <linux/memcontrol.h> |
cddb8a5c1
|
58 |
#include <linux/mmu_notifier.h> |
64cdd548f
|
59 |
#include <linux/migrate.h> |
0fe6e20b9
|
60 |
#include <linux/hugetlb.h> |
ef5d437f7
|
61 |
#include <linux/backing-dev.h> |
1da177e4c
|
62 63 |
#include <asm/tlbflush.h> |
b291f0003
|
64 |
#include "internal.h" |
fdd2e5f88
|
65 |
static struct kmem_cache *anon_vma_cachep; |
5beb49305
|
66 |
static struct kmem_cache *anon_vma_chain_cachep; |
fdd2e5f88
|
67 68 69 |
static inline struct anon_vma *anon_vma_alloc(void) { |
01d8b20de
|
70 71 72 73 74 |
struct anon_vma *anon_vma; anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { atomic_set(&anon_vma->refcount, 1); |
7a3ef208e
|
75 76 |
anon_vma->degree = 1; /* Reference for first vma */ anon_vma->parent = anon_vma; |
01d8b20de
|
77 78 79 80 81 82 83 84 |
/* * Initialise the anon_vma root to point to itself. If called * from fork, the root will be reset to the parents anon_vma. */ anon_vma->root = anon_vma; } return anon_vma; |
fdd2e5f88
|
85 |
} |
01d8b20de
|
86 |
static inline void anon_vma_free(struct anon_vma *anon_vma) |
fdd2e5f88
|
87 |
{ |
01d8b20de
|
88 |
VM_BUG_ON(atomic_read(&anon_vma->refcount)); |
88c22088b
|
89 90 |
/* |
4fc3f1d66
|
91 |
* Synchronize against page_lock_anon_vma_read() such that |
88c22088b
|
92 93 94 95 96 |
* we can safely hold the lock without the anon_vma getting * freed. * * Relies on the full mb implied by the atomic_dec_and_test() from * put_anon_vma() against the acquire barrier implied by |
4fc3f1d66
|
97 |
* down_read_trylock() from page_lock_anon_vma_read(). This orders: |
88c22088b
|
98 |
* |
4fc3f1d66
|
99 100 |
* page_lock_anon_vma_read() VS put_anon_vma() * down_read_trylock() atomic_dec_and_test() |
88c22088b
|
101 |
* LOCK MB |
4fc3f1d66
|
102 |
* atomic_read() rwsem_is_locked() |
88c22088b
|
103 104 105 106 |
* * LOCK should suffice since the actual taking of the lock must * happen _before_ what follows. */ |
7f39dda9d
|
107 |
might_sleep(); |
5a505085f
|
108 |
if (rwsem_is_locked(&anon_vma->root->rwsem)) { |
4fc3f1d66
|
109 |
anon_vma_lock_write(anon_vma); |
08b52706d
|
110 |
anon_vma_unlock_write(anon_vma); |
88c22088b
|
111 |
} |
fdd2e5f88
|
112 113 |
kmem_cache_free(anon_vma_cachep, anon_vma); } |
1da177e4c
|
114 |
|
dd34739c0
|
115 |
static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp) |
5beb49305
|
116 |
{ |
dd34739c0
|
117 |
return kmem_cache_alloc(anon_vma_chain_cachep, gfp); |
5beb49305
|
118 |
} |
e574b5fd2
|
119 |
static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) |
5beb49305
|
120 121 122 |
{ kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain); } |
6583a8430
|
123 124 125 126 127 128 129 |
static void anon_vma_chain_link(struct vm_area_struct *vma, struct anon_vma_chain *avc, struct anon_vma *anon_vma) { avc->vma = vma; avc->anon_vma = anon_vma; list_add(&avc->same_vma, &vma->anon_vma_chain); |
bf181b9f9
|
130 |
anon_vma_interval_tree_insert(avc, &anon_vma->rb_root); |
6583a8430
|
131 |
} |
d9d332e08
|
132 133 134 135 136 137 138 139 140 |
/** * anon_vma_prepare - attach an anon_vma to a memory region * @vma: the memory region in question * * This makes sure the memory mapping described by 'vma' has * an 'anon_vma' attached to it, so that we can associate the * anonymous pages mapped into it with that anon_vma. * * The common case will be that we already have one, but if |
23a0790af
|
141 |
* not we either need to find an adjacent mapping that we |
d9d332e08
|
142 143 144 145 146 |
* can re-use the anon_vma from (very common when the only * reason for splitting a vma has been mprotect()), or we * allocate a new one. * * Anon-vma allocations are very subtle, because we may have |
4fc3f1d66
|
147 |
* optimistically looked up an anon_vma in page_lock_anon_vma_read() |
d9d332e08
|
148 149 150 151 152 153 154 155 156 157 158 |
* and that may actually touch the spinlock even in the newly * allocated vma (it depends on RCU to make sure that the * anon_vma isn't actually destroyed). * * As a result, we need to do proper anon_vma locking even * for the new allocation. At the same time, we do not want * to do any locking for the common case of already having * an anon_vma. * * This must be called with the mmap_sem held for reading. */ |
1da177e4c
|
159 160 161 |
int anon_vma_prepare(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; |
5beb49305
|
162 |
struct anon_vma_chain *avc; |
1da177e4c
|
163 164 165 166 |
might_sleep(); if (unlikely(!anon_vma)) { struct mm_struct *mm = vma->vm_mm; |
d9d332e08
|
167 |
struct anon_vma *allocated; |
1da177e4c
|
168 |
|
dd34739c0
|
169 |
avc = anon_vma_chain_alloc(GFP_KERNEL); |
5beb49305
|
170 171 |
if (!avc) goto out_enomem; |
1da177e4c
|
172 |
anon_vma = find_mergeable_anon_vma(vma); |
d9d332e08
|
173 174 |
allocated = NULL; if (!anon_vma) { |
1da177e4c
|
175 176 |
anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) |
5beb49305
|
177 |
goto out_enomem_free_avc; |
1da177e4c
|
178 |
allocated = anon_vma; |
1da177e4c
|
179 |
} |
4fc3f1d66
|
180 |
anon_vma_lock_write(anon_vma); |
1da177e4c
|
181 182 183 184 |
/* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; |
6583a8430
|
185 |
anon_vma_chain_link(vma, avc, anon_vma); |
7a3ef208e
|
186 187 |
/* vma reference or self-parent link for new root */ anon_vma->degree++; |
1da177e4c
|
188 |
allocated = NULL; |
31f2b0ebc
|
189 |
avc = NULL; |
1da177e4c
|
190 191 |
} spin_unlock(&mm->page_table_lock); |
08b52706d
|
192 |
anon_vma_unlock_write(anon_vma); |
31f2b0ebc
|
193 194 |
if (unlikely(allocated)) |
01d8b20de
|
195 |
put_anon_vma(allocated); |
31f2b0ebc
|
196 |
if (unlikely(avc)) |
5beb49305
|
197 |
anon_vma_chain_free(avc); |
1da177e4c
|
198 199 |
} return 0; |
5beb49305
|
200 201 202 203 204 |
out_enomem_free_avc: anon_vma_chain_free(avc); out_enomem: return -ENOMEM; |
1da177e4c
|
205 |
} |
bb4aa3967
|
206 207 208 209 210 211 212 213 214 215 216 217 218 |
/* * This is a useful helper function for locking the anon_vma root as * we traverse the vma->anon_vma_chain, looping over anon_vma's that * have the same vma. * * Such anon_vma's should have the same root, so you'd expect to see * just a single mutex_lock for the whole traversal. */ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma) { struct anon_vma *new_root = anon_vma->root; if (new_root != root) { if (WARN_ON_ONCE(root)) |
5a505085f
|
219 |
up_write(&root->rwsem); |
bb4aa3967
|
220 |
root = new_root; |
5a505085f
|
221 |
down_write(&root->rwsem); |
bb4aa3967
|
222 223 224 225 226 227 228 |
} return root; } static inline void unlock_anon_vma_root(struct anon_vma *root) { if (root) |
5a505085f
|
229 |
up_write(&root->rwsem); |
bb4aa3967
|
230 |
} |
5beb49305
|
231 232 233 |
/* * Attach the anon_vmas from src to dst. * Returns 0 on success, -ENOMEM on failure. |
7a3ef208e
|
234 235 236 237 238 239 240 241 |
* * If dst->anon_vma is NULL this function tries to find and reuse existing * anon_vma which has no vmas and only one child anon_vma. This prevents * degradation of anon_vma hierarchy to endless linear chain in case of * constantly forking task. On the other hand, an anon_vma with more than one * child isn't reused even if there was no alive vma, thus rmap walker has a * good chance of avoiding scanning the whole hierarchy when it searches where * page is mapped. |
5beb49305
|
242 243 |
*/ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) |
1da177e4c
|
244 |
{ |
5beb49305
|
245 |
struct anon_vma_chain *avc, *pavc; |
bb4aa3967
|
246 |
struct anon_vma *root = NULL; |
5beb49305
|
247 |
|
646d87b48
|
248 |
list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { |
bb4aa3967
|
249 |
struct anon_vma *anon_vma; |
dd34739c0
|
250 251 252 253 254 255 256 257 |
avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN); if (unlikely(!avc)) { unlock_anon_vma_root(root); root = NULL; avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto enomem_failure; } |
bb4aa3967
|
258 259 260 |
anon_vma = pavc->anon_vma; root = lock_anon_vma_root(root, anon_vma); anon_vma_chain_link(dst, avc, anon_vma); |
7a3ef208e
|
261 262 263 264 265 266 267 268 269 270 271 272 |
/* * Reuse existing anon_vma if its degree lower than two, * that means it has no vma and only one anon_vma child. * * Do not chose parent anon_vma, otherwise first child * will always reuse it. Root anon_vma is never reused: * it has self-parent reference and at least one child. */ if (!dst->anon_vma && anon_vma != src->anon_vma && anon_vma->degree < 2) dst->anon_vma = anon_vma; |
5beb49305
|
273 |
} |
7a3ef208e
|
274 275 |
if (dst->anon_vma) dst->anon_vma->degree++; |
bb4aa3967
|
276 |
unlock_anon_vma_root(root); |
5beb49305
|
277 |
return 0; |
1da177e4c
|
278 |
|
5beb49305
|
279 |
enomem_failure: |
3fe89b3e2
|
280 281 282 283 284 285 286 |
/* * dst->anon_vma is dropped here otherwise its degree can be incorrectly * decremented in unlink_anon_vmas(). * We can safely do this because callers of anon_vma_clone() don't care * about dst->anon_vma if anon_vma_clone() failed. */ dst->anon_vma = NULL; |
5beb49305
|
287 288 |
unlink_anon_vmas(dst); return -ENOMEM; |
1da177e4c
|
289 |
} |
5beb49305
|
290 291 292 293 294 295 |
/* * Attach vma to its own anon_vma, as well as to the anon_vmas that * the corresponding VMA in the parent process is attached to. * Returns 0 on success, non-zero on failure. */ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) |
1da177e4c
|
296 |
{ |
5beb49305
|
297 298 |
struct anon_vma_chain *avc; struct anon_vma *anon_vma; |
c4ea95d7c
|
299 |
int error; |
1da177e4c
|
300 |
|
5beb49305
|
301 302 303 |
/* Don't bother if the parent process has no anon_vma here. */ if (!pvma->anon_vma) return 0; |
7a3ef208e
|
304 305 |
/* Drop inherited anon_vma, we'll reuse existing or allocate new. */ vma->anon_vma = NULL; |
5beb49305
|
306 307 308 309 |
/* * First, attach the new VMA to the parent VMA's anon_vmas, * so rmap can find non-COWed pages in child processes. */ |
c4ea95d7c
|
310 311 312 |
error = anon_vma_clone(vma, pvma); if (error) return error; |
5beb49305
|
313 |
|
7a3ef208e
|
314 315 316 |
/* An existing anon_vma has been reused, all done then. */ if (vma->anon_vma) return 0; |
5beb49305
|
317 318 319 320 |
/* Then add our own anon_vma. */ anon_vma = anon_vma_alloc(); if (!anon_vma) goto out_error; |
dd34739c0
|
321 |
avc = anon_vma_chain_alloc(GFP_KERNEL); |
5beb49305
|
322 323 |
if (!avc) goto out_error_free_anon_vma; |
5c341ee1d
|
324 325 326 327 328 329 |
/* * The root anon_vma's spinlock is the lock actually used when we * lock any of the anon_vmas in this anon_vma tree. */ anon_vma->root = pvma->anon_vma->root; |
7a3ef208e
|
330 |
anon_vma->parent = pvma->anon_vma; |
76545066c
|
331 |
/* |
01d8b20de
|
332 333 334 |
* With refcounts, an anon_vma can stay around longer than the * process it belongs to. The root anon_vma needs to be pinned until * this anon_vma is freed, because the lock lives in the root. |
76545066c
|
335 336 |
*/ get_anon_vma(anon_vma->root); |
5beb49305
|
337 338 |
/* Mark this anon_vma as the one where our new (COWed) pages go. */ vma->anon_vma = anon_vma; |
4fc3f1d66
|
339 |
anon_vma_lock_write(anon_vma); |
5c341ee1d
|
340 |
anon_vma_chain_link(vma, avc, anon_vma); |
7a3ef208e
|
341 |
anon_vma->parent->degree++; |
08b52706d
|
342 |
anon_vma_unlock_write(anon_vma); |
5beb49305
|
343 344 345 346 |
return 0; out_error_free_anon_vma: |
01d8b20de
|
347 |
put_anon_vma(anon_vma); |
5beb49305
|
348 |
out_error: |
4946d54cb
|
349 |
unlink_anon_vmas(vma); |
5beb49305
|
350 |
return -ENOMEM; |
1da177e4c
|
351 |
} |
5beb49305
|
352 353 354 |
void unlink_anon_vmas(struct vm_area_struct *vma) { struct anon_vma_chain *avc, *next; |
eee2acbae
|
355 |
struct anon_vma *root = NULL; |
5beb49305
|
356 |
|
5c341ee1d
|
357 358 359 360 |
/* * Unlink each anon_vma chained to the VMA. This list is ordered * from newest to oldest, ensuring the root anon_vma gets freed last. */ |
5beb49305
|
361 |
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { |
eee2acbae
|
362 363 364 |
struct anon_vma *anon_vma = avc->anon_vma; root = lock_anon_vma_root(root, anon_vma); |
bf181b9f9
|
365 |
anon_vma_interval_tree_remove(avc, &anon_vma->rb_root); |
eee2acbae
|
366 367 368 369 370 |
/* * Leave empty anon_vmas on the list - we'll need * to free them outside the lock. */ |
7a3ef208e
|
371 372 |
if (RB_EMPTY_ROOT(&anon_vma->rb_root)) { anon_vma->parent->degree--; |
eee2acbae
|
373 |
continue; |
7a3ef208e
|
374 |
} |
eee2acbae
|
375 376 377 378 |
list_del(&avc->same_vma); anon_vma_chain_free(avc); } |
7a3ef208e
|
379 380 |
if (vma->anon_vma) vma->anon_vma->degree--; |
eee2acbae
|
381 382 383 384 385 |
unlock_anon_vma_root(root); /* * Iterate the list once more, it now only contains empty and unlinked * anon_vmas, destroy them. Could not do before due to __put_anon_vma() |
5a505085f
|
386 |
* needing to write-acquire the anon_vma->root->rwsem. |
eee2acbae
|
387 388 389 |
*/ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; |
7a3ef208e
|
390 |
BUG_ON(anon_vma->degree); |
eee2acbae
|
391 |
put_anon_vma(anon_vma); |
5beb49305
|
392 393 394 395 |
list_del(&avc->same_vma); anon_vma_chain_free(avc); } } |
51cc50685
|
396 |
static void anon_vma_ctor(void *data) |
1da177e4c
|
397 |
{ |
a35afb830
|
398 |
struct anon_vma *anon_vma = data; |
1da177e4c
|
399 |
|
5a505085f
|
400 |
init_rwsem(&anon_vma->rwsem); |
83813267c
|
401 |
atomic_set(&anon_vma->refcount, 0); |
bf181b9f9
|
402 |
anon_vma->rb_root = RB_ROOT; |
1da177e4c
|
403 404 405 406 407 |
} void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), |
20c2df83d
|
408 |
0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); |
5beb49305
|
409 |
anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC); |
1da177e4c
|
410 411 412 |
} /* |
6111e4ca6
|
413 414 415 416 417 418 419 420 421 |
* Getting a lock on a stable anon_vma from a page off the LRU is tricky! * * Since there is no serialization what so ever against page_remove_rmap() * the best this function can do is return a locked anon_vma that might * have been relevant to this page. * * The page might have been remapped to a different anon_vma or the anon_vma * returned may already be freed (and even reused). * |
bc658c960
|
422 423 424 425 426 |
* In case it was remapped to a different anon_vma, the new anon_vma will be a * child of the old anon_vma, and the anon_vma lifetime rules will therefore * ensure that any anon_vma obtained from the page will still be valid for as * long as we observe page_mapped() [ hence all those page_mapped() tests ]. * |
6111e4ca6
|
427 428 429 430 431 432 433 |
* All users of this function must be very careful when walking the anon_vma * chain and verify that the page in question is indeed mapped in it * [ something equivalent to page_mapped_in_vma() ]. * * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap() * that the anon_vma pointer from page->mapping is valid if there is a * mapcount, we can dereference the anon_vma after observing those. |
1da177e4c
|
434 |
*/ |
746b18d42
|
435 |
struct anon_vma *page_get_anon_vma(struct page *page) |
1da177e4c
|
436 |
{ |
746b18d42
|
437 |
struct anon_vma *anon_vma = NULL; |
1da177e4c
|
438 439 440 |
unsigned long anon_mapping; rcu_read_lock(); |
4db0c3c29
|
441 |
anon_mapping = (unsigned long)READ_ONCE(page->mapping); |
3ca7b3c5b
|
442 |
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) |
1da177e4c
|
443 444 445 446 447 |
goto out; if (!page_mapped(page)) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); |
746b18d42
|
448 449 450 451 |
if (!atomic_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } |
f18194275
|
452 453 454 |
/* * If this page is still mapped, then its anon_vma cannot have been |
746b18d42
|
455 456 457 458 |
* freed. But if it has been unmapped, we have no security against the * anon_vma structure being freed and reused (for another anon_vma: * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero() * above cannot corrupt). |
f18194275
|
459 |
*/ |
746b18d42
|
460 |
if (!page_mapped(page)) { |
7f39dda9d
|
461 |
rcu_read_unlock(); |
746b18d42
|
462 |
put_anon_vma(anon_vma); |
7f39dda9d
|
463 |
return NULL; |
746b18d42
|
464 |
} |
1da177e4c
|
465 466 |
out: rcu_read_unlock(); |
746b18d42
|
467 468 469 |
return anon_vma; } |
88c22088b
|
470 471 472 473 474 475 476 |
/* * Similar to page_get_anon_vma() except it locks the anon_vma. * * Its a little more complex as it tries to keep the fast path to a single * atomic op -- the trylock. If we fail the trylock, we fall back to getting a * reference like with page_get_anon_vma() and then block on the mutex. */ |
4fc3f1d66
|
477 |
struct anon_vma *page_lock_anon_vma_read(struct page *page) |
746b18d42
|
478 |
{ |
88c22088b
|
479 |
struct anon_vma *anon_vma = NULL; |
eee0f252c
|
480 |
struct anon_vma *root_anon_vma; |
88c22088b
|
481 |
unsigned long anon_mapping; |
746b18d42
|
482 |
|
88c22088b
|
483 |
rcu_read_lock(); |
4db0c3c29
|
484 |
anon_mapping = (unsigned long)READ_ONCE(page->mapping); |
88c22088b
|
485 486 487 488 489 490 |
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) goto out; if (!page_mapped(page)) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); |
4db0c3c29
|
491 |
root_anon_vma = READ_ONCE(anon_vma->root); |
4fc3f1d66
|
492 |
if (down_read_trylock(&root_anon_vma->rwsem)) { |
88c22088b
|
493 |
/* |
eee0f252c
|
494 495 |
* If the page is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will |
bc658c960
|
496 |
* not go away, see anon_vma_free(). |
88c22088b
|
497 |
*/ |
eee0f252c
|
498 |
if (!page_mapped(page)) { |
4fc3f1d66
|
499 |
up_read(&root_anon_vma->rwsem); |
88c22088b
|
500 501 502 503 |
anon_vma = NULL; } goto out; } |
746b18d42
|
504 |
|
88c22088b
|
505 506 507 508 509 510 511 |
/* trylock failed, we got to sleep */ if (!atomic_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } if (!page_mapped(page)) { |
7f39dda9d
|
512 |
rcu_read_unlock(); |
88c22088b
|
513 |
put_anon_vma(anon_vma); |
7f39dda9d
|
514 |
return NULL; |
88c22088b
|
515 516 517 518 |
} /* we pinned the anon_vma, its safe to sleep */ rcu_read_unlock(); |
4fc3f1d66
|
519 |
anon_vma_lock_read(anon_vma); |
88c22088b
|
520 521 522 523 524 |
if (atomic_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock * and bail -- can't simply use put_anon_vma() because |
4fc3f1d66
|
525 |
* we'll deadlock on the anon_vma_lock_write() recursion. |
88c22088b
|
526 |
*/ |
4fc3f1d66
|
527 |
anon_vma_unlock_read(anon_vma); |
88c22088b
|
528 529 530 531 532 533 534 535 |
__put_anon_vma(anon_vma); anon_vma = NULL; } return anon_vma; out: rcu_read_unlock(); |
746b18d42
|
536 |
return anon_vma; |
34bbd7040
|
537 |
} |
4fc3f1d66
|
538 |
void page_unlock_anon_vma_read(struct anon_vma *anon_vma) |
34bbd7040
|
539 |
{ |
4fc3f1d66
|
540 |
anon_vma_unlock_read(anon_vma); |
1da177e4c
|
541 542 543 |
} /* |
3ad33b243
|
544 |
* At what user virtual address is page expected in @vma? |
1da177e4c
|
545 |
*/ |
86c2ad199
|
546 547 |
static inline unsigned long __vma_address(struct page *page, struct vm_area_struct *vma) |
1da177e4c
|
548 |
{ |
a0f7a756c
|
549 |
pgoff_t pgoff = page_to_pgoff(page); |
86c2ad199
|
550 551 552 553 554 555 556 557 558 |
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); } inline unsigned long vma_address(struct page *page, struct vm_area_struct *vma) { unsigned long address = __vma_address(page, vma); /* page should be within @vma mapping range */ |
81d1b09c6
|
559 |
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); |
86c2ad199
|
560 |
|
1da177e4c
|
561 562 563 564 |
return address; } /* |
bf89c8c86
|
565 |
* At what user virtual address is page expected in vma? |
ab941e0ff
|
566 |
* Caller should check the page is actually part of the vma. |
1da177e4c
|
567 568 569 |
*/ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { |
86c2ad199
|
570 |
unsigned long address; |
21d0d443c
|
571 |
if (PageAnon(page)) { |
4829b906c
|
572 573 574 575 576 577 578 |
struct anon_vma *page__anon_vma = page_anon_vma(page); /* * Note: swapoff's unuse_vma() is more efficient with this * check, and needs it to match anon_vma when KSM is active. */ if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) |
21d0d443c
|
579 |
return -EFAULT; |
27ba0644e
|
580 581 |
} else if (page->mapping) { if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) |
1da177e4c
|
582 583 584 |
return -EFAULT; } else return -EFAULT; |
86c2ad199
|
585 586 587 588 |
address = __vma_address(page, vma); if (unlikely(address < vma->vm_start || address >= vma->vm_end)) return -EFAULT; return address; |
1da177e4c
|
589 |
} |
6219049ae
|
590 591 592 593 594 |
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; pud_t *pud; pmd_t *pmd = NULL; |
f72e7dcdd
|
595 |
pmd_t pmde; |
6219049ae
|
596 597 598 599 600 601 602 603 604 605 |
pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; pud = pud_offset(pgd, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); |
f72e7dcdd
|
606 |
/* |
8809aa2d2
|
607 |
* Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at() |
f72e7dcdd
|
608 609 610 |
* without holding anon_vma lock for write. So when looking for a * genuine pmde (in which to find pte), test present and !THP together. */ |
e37c69827
|
611 612 |
pmde = *pmd; barrier(); |
f72e7dcdd
|
613 |
if (!pmd_present(pmde) || pmd_trans_huge(pmde)) |
6219049ae
|
614 615 616 617 |
pmd = NULL; out: return pmd; } |
1da177e4c
|
618 |
/* |
81b4082dc
|
619 620 |
* Check that @page is mapped at @address into @mm. * |
479db0bf4
|
621 622 623 624 |
* If @sync is false, page_check_address may perform a racy check to avoid * the page table lock when the pte is not present (helpful when reclaiming * highly shared pages). * |
b8072f099
|
625 |
* On success returns with pte mapped and locked. |
81b4082dc
|
626 |
*/ |
e9a81a821
|
627 |
pte_t *__page_check_address(struct page *page, struct mm_struct *mm, |
479db0bf4
|
628 |
unsigned long address, spinlock_t **ptlp, int sync) |
81b4082dc
|
629 |
{ |
81b4082dc
|
630 631 |
pmd_t *pmd; pte_t *pte; |
c0718806c
|
632 |
spinlock_t *ptl; |
81b4082dc
|
633 |
|
0fe6e20b9
|
634 |
if (unlikely(PageHuge(page))) { |
98398c32f
|
635 |
/* when pud is not present, pte will be NULL */ |
0fe6e20b9
|
636 |
pte = huge_pte_offset(mm, address); |
98398c32f
|
637 638 |
if (!pte) return NULL; |
cb900f412
|
639 |
ptl = huge_pte_lockptr(page_hstate(page), mm, pte); |
0fe6e20b9
|
640 641 |
goto check; } |
6219049ae
|
642 643 |
pmd = mm_find_pmd(mm, address); if (!pmd) |
c0718806c
|
644 |
return NULL; |
c0718806c
|
645 646 |
pte = pte_offset_map(pmd, address); /* Make a quick check before getting the lock */ |
479db0bf4
|
647 |
if (!sync && !pte_present(*pte)) { |
c0718806c
|
648 649 650 |
pte_unmap(pte); return NULL; } |
4c21e2f24
|
651 |
ptl = pte_lockptr(mm, pmd); |
0fe6e20b9
|
652 |
check: |
c0718806c
|
653 654 655 656 |
spin_lock(ptl); if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { *ptlp = ptl; return pte; |
81b4082dc
|
657 |
} |
c0718806c
|
658 659 |
pte_unmap_unlock(pte, ptl); return NULL; |
81b4082dc
|
660 |
} |
b291f0003
|
661 662 663 664 665 666 667 668 669 |
/** * page_mapped_in_vma - check whether a page is really mapped in a VMA * @page: the page to test * @vma: the VMA to test * * Returns 1 if the page is mapped into the page tables of the VMA, 0 * if the page is not mapped into the page tables of this VMA. Only * valid for normal file or anonymous VMAs. */ |
6a46079cf
|
670 |
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) |
b291f0003
|
671 672 673 674 |
{ unsigned long address; pte_t *pte; spinlock_t *ptl; |
86c2ad199
|
675 676 |
address = __vma_address(page, vma); if (unlikely(address < vma->vm_start || address >= vma->vm_end)) |
b291f0003
|
677 678 679 680 681 682 683 684 |
return 0; pte = page_check_address(page, vma->vm_mm, address, &ptl, 1); if (!pte) /* the page is not in this mm */ return 0; pte_unmap_unlock(pte, ptl); return 1; } |
9f32624be
|
685 686 687 688 689 690 |
struct page_referenced_arg { int mapcount; int referenced; unsigned long vm_flags; struct mem_cgroup *memcg; }; |
81b4082dc
|
691 |
/* |
9f32624be
|
692 |
* arg: page_referenced_arg will be passed |
1da177e4c
|
693 |
*/ |
ac7695012
|
694 |
static int page_referenced_one(struct page *page, struct vm_area_struct *vma, |
9f32624be
|
695 |
unsigned long address, void *arg) |
1da177e4c
|
696 697 |
{ struct mm_struct *mm = vma->vm_mm; |
117b0791a
|
698 |
spinlock_t *ptl; |
1da177e4c
|
699 |
int referenced = 0; |
9f32624be
|
700 |
struct page_referenced_arg *pra = arg; |
1da177e4c
|
701 |
|
71e3aac07
|
702 703 |
if (unlikely(PageTransHuge(page))) { pmd_t *pmd; |
2da28bfd9
|
704 705 706 707 |
/* * rmap might return false positives; we must filter * these out using page_check_address_pmd(). */ |
71e3aac07
|
708 |
pmd = page_check_address_pmd(page, mm, address, |
117b0791a
|
709 710 |
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); if (!pmd) |
9f32624be
|
711 |
return SWAP_AGAIN; |
2da28bfd9
|
712 713 |
if (vma->vm_flags & VM_LOCKED) { |
117b0791a
|
714 |
spin_unlock(ptl); |
9f32624be
|
715 716 |
pra->vm_flags |= VM_LOCKED; return SWAP_FAIL; /* To break the loop */ |
2da28bfd9
|
717 718 719 720 |
} /* go ahead even if the pmd is pmd_trans_splitting() */ if (pmdp_clear_flush_young_notify(vma, address, pmd)) |
71e3aac07
|
721 |
referenced++; |
117b0791a
|
722 |
spin_unlock(ptl); |
71e3aac07
|
723 724 |
} else { pte_t *pte; |
71e3aac07
|
725 |
|
2da28bfd9
|
726 727 728 729 |
/* * rmap might return false positives; we must filter * these out using page_check_address(). */ |
71e3aac07
|
730 731 |
pte = page_check_address(page, mm, address, &ptl, 0); if (!pte) |
9f32624be
|
732 |
return SWAP_AGAIN; |
71e3aac07
|
733 |
|
2da28bfd9
|
734 735 |
if (vma->vm_flags & VM_LOCKED) { pte_unmap_unlock(pte, ptl); |
9f32624be
|
736 737 |
pra->vm_flags |= VM_LOCKED; return SWAP_FAIL; /* To break the loop */ |
2da28bfd9
|
738 |
} |
71e3aac07
|
739 740 741 742 743 744 745 746 |
if (ptep_clear_flush_young_notify(vma, address, pte)) { /* * Don't treat a reference through a sequentially read * mapping as such. If the page has been used in * another mapping, we will catch it; if this other * mapping is already gone, the unmap path will have * set PG_referenced or activated the page. */ |
64363aad5
|
747 |
if (likely(!(vma->vm_flags & VM_SEQ_READ))) |
71e3aac07
|
748 749 750 751 |
referenced++; } pte_unmap_unlock(pte, ptl); } |
9f32624be
|
752 753 754 |
if (referenced) { pra->referenced++; pra->vm_flags |= vma->vm_flags; |
1da177e4c
|
755 |
} |
34bbd7040
|
756 |
|
9f32624be
|
757 758 759 760 761 |
pra->mapcount--; if (!pra->mapcount) return SWAP_SUCCESS; /* To break the loop */ return SWAP_AGAIN; |
1da177e4c
|
762 |
} |
9f32624be
|
763 |
static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) |
1da177e4c
|
764 |
{ |
9f32624be
|
765 766 |
struct page_referenced_arg *pra = arg; struct mem_cgroup *memcg = pra->memcg; |
1da177e4c
|
767 |
|
9f32624be
|
768 769 |
if (!mm_match_cgroup(vma->vm_mm, memcg)) return true; |
1da177e4c
|
770 |
|
9f32624be
|
771 |
return false; |
1da177e4c
|
772 773 774 775 776 777 |
} /** * page_referenced - test if the page was referenced * @page: the page to test * @is_locked: caller holds lock on the page |
72835c86c
|
778 |
* @memcg: target memory cgroup |
6fe6b7e35
|
779 |
* @vm_flags: collect encountered vma->vm_flags who actually referenced the page |
1da177e4c
|
780 781 782 783 |
* * Quick test_and_clear_referenced for all mappings to a page, * returns the number of ptes which referenced the page. */ |
6fe6b7e35
|
784 785 |
int page_referenced(struct page *page, int is_locked, |
72835c86c
|
786 |
struct mem_cgroup *memcg, |
6fe6b7e35
|
787 |
unsigned long *vm_flags) |
1da177e4c
|
788 |
{ |
9f32624be
|
789 |
int ret; |
5ad646880
|
790 |
int we_locked = 0; |
9f32624be
|
791 792 793 794 795 796 797 798 799 |
struct page_referenced_arg pra = { .mapcount = page_mapcount(page), .memcg = memcg, }; struct rmap_walk_control rwc = { .rmap_one = page_referenced_one, .arg = (void *)&pra, .anon_lock = page_lock_anon_vma_read, }; |
1da177e4c
|
800 |
|
6fe6b7e35
|
801 |
*vm_flags = 0; |
9f32624be
|
802 803 804 805 806 807 808 809 810 811 |
if (!page_mapped(page)) return 0; if (!page_rmapping(page)) return 0; if (!is_locked && (!PageAnon(page) || PageKsm(page))) { we_locked = trylock_page(page); if (!we_locked) return 1; |
1da177e4c
|
812 |
} |
9f32624be
|
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 |
/* * If we are reclaiming on behalf of a cgroup, skip * counting on behalf of references from different * cgroups */ if (memcg) { rwc.invalid_vma = invalid_page_referenced_vma; } ret = rmap_walk(page, &rwc); *vm_flags = pra.vm_flags; if (we_locked) unlock_page(page); return pra.referenced; |
1da177e4c
|
830 |
} |
1cb1729b1
|
831 |
static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, |
9853a407b
|
832 |
unsigned long address, void *arg) |
d08b3851d
|
833 834 |
{ struct mm_struct *mm = vma->vm_mm; |
c2fda5fed
|
835 |
pte_t *pte; |
d08b3851d
|
836 837 |
spinlock_t *ptl; int ret = 0; |
9853a407b
|
838 |
int *cleaned = arg; |
d08b3851d
|
839 |
|
479db0bf4
|
840 |
pte = page_check_address(page, mm, address, &ptl, 1); |
d08b3851d
|
841 842 |
if (!pte) goto out; |
c2fda5fed
|
843 844 |
if (pte_dirty(*pte) || pte_write(*pte)) { pte_t entry; |
d08b3851d
|
845 |
|
c2fda5fed
|
846 |
flush_cache_page(vma, address, pte_pfn(*pte)); |
2ec74c3ef
|
847 |
entry = ptep_clear_flush(vma, address, pte); |
c2fda5fed
|
848 849 |
entry = pte_wrprotect(entry); entry = pte_mkclean(entry); |
d6e88e671
|
850 |
set_pte_at(mm, address, pte, entry); |
c2fda5fed
|
851 852 |
ret = 1; } |
d08b3851d
|
853 |
|
d08b3851d
|
854 |
pte_unmap_unlock(pte, ptl); |
2ec74c3ef
|
855 |
|
9853a407b
|
856 |
if (ret) { |
2ec74c3ef
|
857 |
mmu_notifier_invalidate_page(mm, address); |
9853a407b
|
858 859 |
(*cleaned)++; } |
d08b3851d
|
860 |
out: |
9853a407b
|
861 |
return SWAP_AGAIN; |
d08b3851d
|
862 |
} |
9853a407b
|
863 |
static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) |
d08b3851d
|
864 |
{ |
9853a407b
|
865 |
if (vma->vm_flags & VM_SHARED) |
871beb8c3
|
866 |
return false; |
d08b3851d
|
867 |
|
871beb8c3
|
868 |
return true; |
d08b3851d
|
869 870 871 872 |
} int page_mkclean(struct page *page) { |
9853a407b
|
873 874 875 876 877 878 879 |
int cleaned = 0; struct address_space *mapping; struct rmap_walk_control rwc = { .arg = (void *)&cleaned, .rmap_one = page_mkclean_one, .invalid_vma = invalid_mkclean_vma, }; |
d08b3851d
|
880 881 |
BUG_ON(!PageLocked(page)); |
9853a407b
|
882 883 884 885 886 887 888 889 |
if (!page_mapped(page)) return 0; mapping = page_mapping(page); if (!mapping) return 0; rmap_walk(page, &rwc); |
d08b3851d
|
890 |
|
9853a407b
|
891 |
return cleaned; |
d08b3851d
|
892 |
} |
60b59beaf
|
893 |
EXPORT_SYMBOL_GPL(page_mkclean); |
d08b3851d
|
894 |
|
1da177e4c
|
895 |
/** |
c44b67432
|
896 897 898 899 900 901 902 903 904 905 906 907 908 909 |
* page_move_anon_rmap - move a page to our anon_vma * @page: the page to move to our anon_vma * @vma: the vma the page belongs to * @address: the user virtual address mapped * * When a page belongs exclusively to one process after a COW event, * that page can be moved into the anon_vma that belongs to just that * process, so the rmap code will not search the parent or sibling * processes. */ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = vma->anon_vma; |
309381fea
|
910 |
VM_BUG_ON_PAGE(!PageLocked(page), page); |
81d1b09c6
|
911 |
VM_BUG_ON_VMA(!anon_vma, vma); |
309381fea
|
912 |
VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page); |
c44b67432
|
913 914 |
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
414e2fb8c
|
915 916 917 918 919 920 |
/* * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written * simultaneously, so a concurrent reader (eg page_referenced()'s * PageAnon()) will not see one without the other. */ WRITE_ONCE(page->mapping, (struct address_space *) anon_vma); |
c44b67432
|
921 922 923 |
} /** |
4e1c19750
|
924 925 926 927 |
* __page_set_anon_rmap - set up new anonymous rmap * @page: Page to add to rmap * @vma: VM area to add page to. * @address: User virtual address of the mapping |
e8a03feb5
|
928 |
* @exclusive: the page is exclusively owned by the current process |
9617d95e6
|
929 930 |
*/ static void __page_set_anon_rmap(struct page *page, |
e8a03feb5
|
931 |
struct vm_area_struct *vma, unsigned long address, int exclusive) |
9617d95e6
|
932 |
{ |
e8a03feb5
|
933 |
struct anon_vma *anon_vma = vma->anon_vma; |
ea90002b0
|
934 |
|
e8a03feb5
|
935 |
BUG_ON(!anon_vma); |
ea90002b0
|
936 |
|
4e1c19750
|
937 938 |
if (PageAnon(page)) return; |
ea90002b0
|
939 |
/* |
e8a03feb5
|
940 941 942 |
* If the page isn't exclusively mapped into this vma, * we must use the _oldest_ possible anon_vma for the * page mapping! |
ea90002b0
|
943 |
*/ |
4e1c19750
|
944 |
if (!exclusive) |
288468c33
|
945 |
anon_vma = anon_vma->root; |
9617d95e6
|
946 |
|
9617d95e6
|
947 948 |
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; |
9617d95e6
|
949 |
page->index = linear_page_index(vma, address); |
9617d95e6
|
950 951 952 |
} /** |
43d8eac44
|
953 |
* __page_check_anon_rmap - sanity check anonymous rmap addition |
c97a9e10e
|
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 |
* @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped */ static void __page_check_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { #ifdef CONFIG_DEBUG_VM /* * The page's anon-rmap details (mapping and index) are guaranteed to * be set up correctly at this point. * * We have exclusion against page_add_anon_rmap because the caller * always holds the page locked, except if called from page_dup_rmap, * in which case the page is already known to be setup. * * We have exclusion against page_add_new_anon_rmap because those pages * are initially only visible via the pagetables, and the pte is locked * over the call to page_add_new_anon_rmap. */ |
44ab57a06
|
974 |
BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); |
c97a9e10e
|
975 976 977 978 979 |
BUG_ON(page->index != linear_page_index(vma, address)); #endif } /** |
1da177e4c
|
980 981 982 983 984 |
* page_add_anon_rmap - add pte mapping to an anonymous page * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * |
5ad646880
|
985 |
* The caller needs to hold the pte lock, and the page must be locked in |
80e148226
|
986 987 988 |
* the anon_vma case: to serialize mapping,index checking after setting, * and to ensure that PageAnon is not being upgraded racily to PageKsm * (but PageKsm is never downgraded to PageAnon). |
1da177e4c
|
989 990 991 992 |
*/ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { |
ad8c2ee80
|
993 994 995 996 997 998 999 1000 1001 1002 1003 |
do_page_add_anon_rmap(page, vma, address, 0); } /* * Special version of the above for do_swap_page, which often runs * into pages that are exclusively owned by the current process. * Everybody else should continue to use page_add_anon_rmap above. */ void do_page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int exclusive) { |
5ad646880
|
1004 |
int first = atomic_inc_and_test(&page->_mapcount); |
79134171d
|
1005 |
if (first) { |
bea04b073
|
1006 1007 1008 1009 1010 1011 |
/* * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption * disabled. */ |
3cd14fcd3
|
1012 |
if (PageTransHuge(page)) |
79134171d
|
1013 1014 |
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
3cd14fcd3
|
1015 1016 |
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, hpage_nr_pages(page)); |
79134171d
|
1017 |
} |
5ad646880
|
1018 1019 |
if (unlikely(PageKsm(page))) return; |
309381fea
|
1020 |
VM_BUG_ON_PAGE(!PageLocked(page), page); |
5dbe0af47
|
1021 |
/* address might be in next vma when migration races vma_adjust */ |
5ad646880
|
1022 |
if (first) |
ad8c2ee80
|
1023 |
__page_set_anon_rmap(page, vma, address, exclusive); |
69029cd55
|
1024 |
else |
c97a9e10e
|
1025 |
__page_check_anon_rmap(page, vma, address); |
1da177e4c
|
1026 |
} |
43d8eac44
|
1027 |
/** |
9617d95e6
|
1028 1029 1030 1031 1032 1033 1034 |
* page_add_new_anon_rmap - add pte mapping to a new anonymous page * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * * Same as page_add_anon_rmap but must only be called on *new* pages. * This means the inc-and-test can be bypassed. |
c97a9e10e
|
1035 |
* Page does not have to be locked. |
9617d95e6
|
1036 1037 1038 1039 |
*/ void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { |
81d1b09c6
|
1040 |
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); |
cbf84b7ad
|
1041 1042 |
SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ |
3cd14fcd3
|
1043 |
if (PageTransHuge(page)) |
79134171d
|
1044 |
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
3cd14fcd3
|
1045 1046 |
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, hpage_nr_pages(page)); |
e8a03feb5
|
1047 |
__page_set_anon_rmap(page, vma, address, 1); |
9617d95e6
|
1048 |
} |
1da177e4c
|
1049 1050 1051 1052 |
/** * page_add_file_rmap - add pte mapping to a file page * @page: the page to add the mapping to * |
b8072f099
|
1053 |
* The caller needs to hold the pte lock. |
1da177e4c
|
1054 1055 1056 |
*/ void page_add_file_rmap(struct page *page) { |
d7365e783
|
1057 |
struct mem_cgroup *memcg; |
89c06bd52
|
1058 |
|
6de226191
|
1059 |
memcg = mem_cgroup_begin_page_stat(page); |
d69b042f3
|
1060 |
if (atomic_inc_and_test(&page->_mapcount)) { |
65ba55f50
|
1061 |
__inc_zone_page_state(page, NR_FILE_MAPPED); |
d7365e783
|
1062 |
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); |
d69b042f3
|
1063 |
} |
6de226191
|
1064 |
mem_cgroup_end_page_stat(memcg); |
1da177e4c
|
1065 |
} |
8186eb6a7
|
1066 1067 1068 |
static void page_remove_file_rmap(struct page *page) { struct mem_cgroup *memcg; |
8186eb6a7
|
1069 |
|
6de226191
|
1070 |
memcg = mem_cgroup_begin_page_stat(page); |
8186eb6a7
|
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 |
/* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) goto out; /* Hugepages are not counted in NR_FILE_MAPPED for now. */ if (unlikely(PageHuge(page))) goto out; /* * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption disabled. */ __dec_zone_page_state(page, NR_FILE_MAPPED); mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); if (unlikely(PageMlocked(page))) clear_page_mlock(page); out: |
6de226191
|
1091 |
mem_cgroup_end_page_stat(memcg); |
8186eb6a7
|
1092 |
} |
1da177e4c
|
1093 1094 1095 1096 |
/** * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from * |
b8072f099
|
1097 |
* The caller needs to hold the pte lock. |
1da177e4c
|
1098 |
*/ |
edc315fd2
|
1099 |
void page_remove_rmap(struct page *page) |
1da177e4c
|
1100 |
{ |
8186eb6a7
|
1101 1102 1103 1104 |
if (!PageAnon(page)) { page_remove_file_rmap(page); return; } |
89c06bd52
|
1105 |
|
b904dcfed
|
1106 1107 |
/* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) |
8186eb6a7
|
1108 1109 1110 1111 1112 |
return; /* Hugepages are not counted in NR_ANON_PAGES for now. */ if (unlikely(PageHuge(page))) return; |
b904dcfed
|
1113 1114 |
/* |
bea04b073
|
1115 1116 |
* We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and |
bea04b073
|
1117 |
* pte lock(a spinlock) is held, which implies preemption disabled. |
0fe6e20b9
|
1118 |
*/ |
8186eb6a7
|
1119 1120 1121 1122 1123 |
if (PageTransHuge(page)) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -hpage_nr_pages(page)); |
e6c509f85
|
1124 1125 |
if (unlikely(PageMlocked(page))) clear_page_mlock(page); |
8186eb6a7
|
1126 |
|
b904dcfed
|
1127 1128 1129 1130 1131 1132 1133 1134 1135 |
/* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap * which increments mapcount after us but sets mapping * before us: so leave the reset to free_hot_cold_page, * and remember that it's only reliable while mapped. * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ |
1da177e4c
|
1136 1137 1138 |
} /* |
526295064
|
1139 |
* @arg: enum ttu_flags will be passed to this argument |
1da177e4c
|
1140 |
*/ |
ac7695012
|
1141 |
static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, |
526295064
|
1142 |
unsigned long address, void *arg) |
1da177e4c
|
1143 1144 |
{ struct mm_struct *mm = vma->vm_mm; |
1da177e4c
|
1145 1146 |
pte_t *pte; pte_t pteval; |
c0718806c
|
1147 |
spinlock_t *ptl; |
1da177e4c
|
1148 |
int ret = SWAP_AGAIN; |
526295064
|
1149 |
enum ttu_flags flags = (enum ttu_flags)arg; |
1da177e4c
|
1150 |
|
479db0bf4
|
1151 |
pte = page_check_address(page, mm, address, &ptl, 0); |
c0718806c
|
1152 |
if (!pte) |
81b4082dc
|
1153 |
goto out; |
1da177e4c
|
1154 1155 1156 1157 1158 1159 |
/* * If the page is mlock()d, we cannot swap it out. * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. */ |
14fa31b89
|
1160 |
if (!(flags & TTU_IGNORE_MLOCK)) { |
caed0f486
|
1161 1162 |
if (vma->vm_flags & VM_LOCKED) goto out_mlock; |
daa5ba768
|
1163 |
if (flags & TTU_MUNLOCK) |
53f79acb6
|
1164 |
goto out_unmap; |
14fa31b89
|
1165 1166 |
} if (!(flags & TTU_IGNORE_ACCESS)) { |
b291f0003
|
1167 1168 1169 1170 1171 |
if (ptep_clear_flush_young_notify(vma, address, pte)) { ret = SWAP_FAIL; goto out_unmap; } } |
1da177e4c
|
1172 |
|
1da177e4c
|
1173 1174 |
/* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); |
2ec74c3ef
|
1175 |
pteval = ptep_clear_flush(vma, address, pte); |
1da177e4c
|
1176 1177 1178 1179 |
/* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) set_page_dirty(page); |
365e9c87a
|
1180 1181 |
/* Update high watermark before we lower rss */ update_hiwater_rss(mm); |
888b9f7c5
|
1182 |
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { |
5f24ae585
|
1183 1184 1185 1186 1187 1188 |
if (!PageHuge(page)) { if (PageAnon(page)) dec_mm_counter(mm, MM_ANONPAGES); else dec_mm_counter(mm, MM_FILEPAGES); } |
888b9f7c5
|
1189 |
set_pte_at(mm, address, pte, |
5f24ae585
|
1190 |
swp_entry_to_pte(make_hwpoison_entry(page))); |
45961722f
|
1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 |
} else if (pte_unused(pteval)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan * will take care of the rest. */ if (PageAnon(page)) dec_mm_counter(mm, MM_ANONPAGES); else dec_mm_counter(mm, MM_FILEPAGES); |
888b9f7c5
|
1201 |
} else if (PageAnon(page)) { |
4c21e2f24
|
1202 |
swp_entry_t entry = { .val = page_private(page) }; |
179ef71cb
|
1203 |
pte_t swp_pte; |
0697212a4
|
1204 1205 1206 1207 1208 1209 |
if (PageSwapCache(page)) { /* * Store the swap location in the pte. * See handle_pte_fault() ... */ |
570a335b8
|
1210 1211 1212 1213 1214 |
if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pte, pteval); ret = SWAP_FAIL; goto out_unmap; } |
0697212a4
|
1215 1216 1217 1218 1219 1220 |
if (list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); if (list_empty(&mm->mmlist)) list_add(&mm->mmlist, &init_mm.mmlist); spin_unlock(&mmlist_lock); } |
d559db086
|
1221 |
dec_mm_counter(mm, MM_ANONPAGES); |
b084d4353
|
1222 |
inc_mm_counter(mm, MM_SWAPENTS); |
ce1744f4e
|
1223 |
} else if (IS_ENABLED(CONFIG_MIGRATION)) { |
0697212a4
|
1224 1225 1226 1227 1228 |
/* * Store the pfn of the page in a special migration * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ |
daa5ba768
|
1229 |
BUG_ON(!(flags & TTU_MIGRATION)); |
0697212a4
|
1230 |
entry = make_migration_entry(page, pte_write(pteval)); |
1da177e4c
|
1231 |
} |
179ef71cb
|
1232 1233 1234 1235 |
swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); set_pte_at(mm, address, pte, swp_pte); |
ce1744f4e
|
1236 |
} else if (IS_ENABLED(CONFIG_MIGRATION) && |
daa5ba768
|
1237 |
(flags & TTU_MIGRATION)) { |
04e62a29b
|
1238 1239 1240 1241 1242 |
/* Establish migration entry for a file page */ swp_entry_t entry; entry = make_migration_entry(page, pte_write(pteval)); set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); } else |
d559db086
|
1243 |
dec_mm_counter(mm, MM_FILEPAGES); |
1da177e4c
|
1244 |
|
edc315fd2
|
1245 |
page_remove_rmap(page); |
1da177e4c
|
1246 1247 1248 |
page_cache_release(page); out_unmap: |
c0718806c
|
1249 |
pte_unmap_unlock(pte, ptl); |
daa5ba768
|
1250 |
if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK)) |
2ec74c3ef
|
1251 |
mmu_notifier_invalidate_page(mm, address); |
caed0f486
|
1252 1253 |
out: return ret; |
53f79acb6
|
1254 |
|
caed0f486
|
1255 1256 1257 1258 1259 1260 1261 |
out_mlock: pte_unmap_unlock(pte, ptl); /* * We need mmap_sem locking, Otherwise VM_LOCKED check makes * unstable result and race. Plus, We can't wait here because |
c8c06efa8
|
1262 |
* we now hold anon_vma->rwsem or mapping->i_mmap_rwsem. |
caed0f486
|
1263 1264 1265 1266 1267 1268 1269 1270 |
* if trylock failed, the page remain in evictable lru and later * vmscan could retry to move the page to unevictable lru if the * page is actually mlocked. */ if (down_read_trylock(&vma->vm_mm->mmap_sem)) { if (vma->vm_flags & VM_LOCKED) { mlock_vma_page(page); ret = SWAP_MLOCK; |
53f79acb6
|
1271 |
} |
caed0f486
|
1272 |
up_read(&vma->vm_mm->mmap_sem); |
53f79acb6
|
1273 |
} |
1da177e4c
|
1274 1275 |
return ret; } |
71e3aac07
|
1276 |
bool is_vma_temporary_stack(struct vm_area_struct *vma) |
a8bef8ff6
|
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 |
{ int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); if (!maybe_stack) return false; if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == VM_STACK_INCOMPLETE_SETUP) return true; return false; } |
526295064
|
1289 1290 1291 1292 |
static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) { return is_vma_temporary_stack(vma); } |
526295064
|
1293 1294 1295 1296 |
static int page_not_mapped(struct page *page) { return !page_mapped(page); }; |
1da177e4c
|
1297 1298 1299 |
/** * try_to_unmap - try to remove all page table mappings to a page * @page: the page to get unmapped |
14fa31b89
|
1300 |
* @flags: action and flags |
1da177e4c
|
1301 1302 1303 1304 1305 1306 1307 1308 |
* * Tries to remove all the page table entries which are mapping this * page, used in the pageout path. Caller must hold the page lock. * Return values are: * * SWAP_SUCCESS - we succeeded in removing all mappings * SWAP_AGAIN - we missed a mapping, try again later * SWAP_FAIL - the page is unswappable |
b291f0003
|
1309 |
* SWAP_MLOCK - page is mlocked. |
1da177e4c
|
1310 |
*/ |
14fa31b89
|
1311 |
int try_to_unmap(struct page *page, enum ttu_flags flags) |
1da177e4c
|
1312 1313 |
{ int ret; |
526295064
|
1314 1315 1316 1317 |
struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)flags, .done = page_not_mapped, |
526295064
|
1318 1319 |
.anon_lock = page_lock_anon_vma_read, }; |
1da177e4c
|
1320 |
|
309381fea
|
1321 |
VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page); |
1da177e4c
|
1322 |
|
526295064
|
1323 1324 1325 1326 1327 1328 1329 1330 |
/* * During exec, a temporary VMA is setup and later moved. * The VMA is moved under the anon_vma lock but not the * page tables leading to a race where migration cannot * find the migration ptes. Rather than increasing the * locking requirements of exec(), migration skips * temporary VMAs until after exec() completes. */ |
daa5ba768
|
1331 |
if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page)) |
526295064
|
1332 1333 1334 |
rwc.invalid_vma = invalid_migration_vma; ret = rmap_walk(page, &rwc); |
b291f0003
|
1335 |
if (ret != SWAP_MLOCK && !page_mapped(page)) |
1da177e4c
|
1336 1337 1338 |
ret = SWAP_SUCCESS; return ret; } |
81b4082dc
|
1339 |
|
b291f0003
|
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 |
/** * try_to_munlock - try to munlock a page * @page: the page to be munlocked * * Called from munlock code. Checks all of the VMAs mapping the page * to make sure nobody else has this page mlocked. The page will be * returned with PG_mlocked cleared if no other vmas have it mlocked. * * Return values are: * |
53f79acb6
|
1350 |
* SWAP_AGAIN - no vma is holding page mlocked, or, |
b291f0003
|
1351 |
* SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem |
5ad646880
|
1352 |
* SWAP_FAIL - page cannot be located at present |
b291f0003
|
1353 1354 1355 1356 |
* SWAP_MLOCK - page is now mlocked. */ int try_to_munlock(struct page *page) { |
e8351ac9b
|
1357 1358 1359 1360 1361 |
int ret; struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)TTU_MUNLOCK, .done = page_not_mapped, |
e8351ac9b
|
1362 1363 1364 |
.anon_lock = page_lock_anon_vma_read, }; |
309381fea
|
1365 |
VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); |
b291f0003
|
1366 |
|
e8351ac9b
|
1367 1368 |
ret = rmap_walk(page, &rwc); return ret; |
b291f0003
|
1369 |
} |
e9995ef97
|
1370 |
|
01d8b20de
|
1371 |
void __put_anon_vma(struct anon_vma *anon_vma) |
76545066c
|
1372 |
{ |
01d8b20de
|
1373 |
struct anon_vma *root = anon_vma->root; |
76545066c
|
1374 |
|
624483f3e
|
1375 |
anon_vma_free(anon_vma); |
01d8b20de
|
1376 1377 |
if (root != anon_vma && atomic_dec_and_test(&root->refcount)) anon_vma_free(root); |
76545066c
|
1378 |
} |
76545066c
|
1379 |
|
0dd1c7bbc
|
1380 1381 |
static struct anon_vma *rmap_walk_anon_lock(struct page *page, struct rmap_walk_control *rwc) |
faecd8dd8
|
1382 1383 |
{ struct anon_vma *anon_vma; |
0dd1c7bbc
|
1384 1385 |
if (rwc->anon_lock) return rwc->anon_lock(page); |
faecd8dd8
|
1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 |
/* * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() * because that depends on page_mapped(); but not all its usages * are holding mmap_sem. Users without mmap_sem are required to * take a reference count to prevent the anon_vma disappearing */ anon_vma = page_anon_vma(page); if (!anon_vma) return NULL; anon_vma_lock_read(anon_vma); return anon_vma; } |
e9995ef97
|
1399 |
/* |
e8351ac9b
|
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 |
* rmap_walk_anon - do something to anonymous page using the object-based * rmap method * @page: the page to be handled * @rwc: control variable according to each walk type * * Find all the mappings of a page using the mapping pointer and the vma chains * contained in the anon_vma struct it points to. * * When called from try_to_munlock(), the mmap_sem of the mm containing the vma * where the page was found will be held for write. So, we won't recheck * vm_flags for that VMA. That should be OK, because that vma shouldn't be * LOCKED. |
e9995ef97
|
1412 |
*/ |
051ac83ad
|
1413 |
static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) |
e9995ef97
|
1414 1415 |
{ struct anon_vma *anon_vma; |
b258d8606
|
1416 |
pgoff_t pgoff; |
5beb49305
|
1417 |
struct anon_vma_chain *avc; |
e9995ef97
|
1418 |
int ret = SWAP_AGAIN; |
0dd1c7bbc
|
1419 |
anon_vma = rmap_walk_anon_lock(page, rwc); |
e9995ef97
|
1420 1421 |
if (!anon_vma) return ret; |
faecd8dd8
|
1422 |
|
b258d8606
|
1423 |
pgoff = page_to_pgoff(page); |
bf181b9f9
|
1424 |
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { |
5beb49305
|
1425 |
struct vm_area_struct *vma = avc->vma; |
e9995ef97
|
1426 |
unsigned long address = vma_address(page, vma); |
0dd1c7bbc
|
1427 1428 1429 |
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; |
051ac83ad
|
1430 |
ret = rwc->rmap_one(page, vma, address, rwc->arg); |
e9995ef97
|
1431 1432 |
if (ret != SWAP_AGAIN) break; |
0dd1c7bbc
|
1433 1434 |
if (rwc->done && rwc->done(page)) break; |
e9995ef97
|
1435 |
} |
4fc3f1d66
|
1436 |
anon_vma_unlock_read(anon_vma); |
e9995ef97
|
1437 1438 |
return ret; } |
e8351ac9b
|
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 |
/* * rmap_walk_file - do something to file page using the object-based rmap method * @page: the page to be handled * @rwc: control variable according to each walk type * * Find all the mappings of a page using the mapping pointer and the vma chains * contained in the address_space struct it points to. * * When called from try_to_munlock(), the mmap_sem of the mm containing the vma * where the page was found will be held for write. So, we won't recheck * vm_flags for that VMA. That should be OK, because that vma shouldn't be * LOCKED. */ |
051ac83ad
|
1452 |
static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) |
e9995ef97
|
1453 1454 |
{ struct address_space *mapping = page->mapping; |
b258d8606
|
1455 |
pgoff_t pgoff; |
e9995ef97
|
1456 |
struct vm_area_struct *vma; |
e9995ef97
|
1457 |
int ret = SWAP_AGAIN; |
9f32624be
|
1458 1459 1460 1461 |
/* * The page lock not only makes sure that page->mapping cannot * suddenly be NULLified by truncation, it makes sure that the * structure at mapping cannot be freed and reused yet, |
c8c06efa8
|
1462 |
* so we can safely take mapping->i_mmap_rwsem. |
9f32624be
|
1463 |
*/ |
81d1b09c6
|
1464 |
VM_BUG_ON_PAGE(!PageLocked(page), page); |
9f32624be
|
1465 |
|
e9995ef97
|
1466 1467 |
if (!mapping) return ret; |
3dec0ba0b
|
1468 |
|
b258d8606
|
1469 |
pgoff = page_to_pgoff(page); |
3dec0ba0b
|
1470 |
i_mmap_lock_read(mapping); |
6b2dbba8b
|
1471 |
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { |
e9995ef97
|
1472 |
unsigned long address = vma_address(page, vma); |
0dd1c7bbc
|
1473 1474 1475 |
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; |
051ac83ad
|
1476 |
ret = rwc->rmap_one(page, vma, address, rwc->arg); |
e9995ef97
|
1477 |
if (ret != SWAP_AGAIN) |
0dd1c7bbc
|
1478 1479 1480 |
goto done; if (rwc->done && rwc->done(page)) goto done; |
e9995ef97
|
1481 |
} |
0dd1c7bbc
|
1482 |
|
0dd1c7bbc
|
1483 |
done: |
3dec0ba0b
|
1484 |
i_mmap_unlock_read(mapping); |
e9995ef97
|
1485 1486 |
return ret; } |
051ac83ad
|
1487 |
int rmap_walk(struct page *page, struct rmap_walk_control *rwc) |
e9995ef97
|
1488 |
{ |
e9995ef97
|
1489 |
if (unlikely(PageKsm(page))) |
051ac83ad
|
1490 |
return rmap_walk_ksm(page, rwc); |
e9995ef97
|
1491 |
else if (PageAnon(page)) |
051ac83ad
|
1492 |
return rmap_walk_anon(page, rwc); |
e9995ef97
|
1493 |
else |
051ac83ad
|
1494 |
return rmap_walk_file(page, rwc); |
e9995ef97
|
1495 |
} |
0fe6e20b9
|
1496 |
|
e3390f67a
|
1497 |
#ifdef CONFIG_HUGETLB_PAGE |
0fe6e20b9
|
1498 1499 1500 1501 1502 1503 1504 1505 1506 |
/* * The following three functions are for anonymous (private mapped) hugepages. * Unlike common anonymous pages, anonymous hugepages have no accounting code * and no lru code, because we handle hugepages differently from common pages. */ static void __hugepage_set_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int exclusive) { struct anon_vma *anon_vma = vma->anon_vma; |
433abed6c
|
1507 |
|
0fe6e20b9
|
1508 |
BUG_ON(!anon_vma); |
433abed6c
|
1509 1510 1511 1512 1513 |
if (PageAnon(page)) return; if (!exclusive) anon_vma = anon_vma->root; |
0fe6e20b9
|
1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 |
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address); } void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = vma->anon_vma; int first; |
a850ea303
|
1524 1525 |
BUG_ON(!PageLocked(page)); |
0fe6e20b9
|
1526 |
BUG_ON(!anon_vma); |
5dbe0af47
|
1527 |
/* address might be in next vma when migration races vma_adjust */ |
0fe6e20b9
|
1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 |
first = atomic_inc_and_test(&page->_mapcount); if (first) __hugepage_set_anon_rmap(page, vma, address, 0); } void hugepage_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { BUG_ON(address < vma->vm_start || address >= vma->vm_end); atomic_set(&page->_mapcount, 0); __hugepage_set_anon_rmap(page, vma, address, 1); } |
e3390f67a
|
1540 |
#endif /* CONFIG_HUGETLB_PAGE */ |