Blame view

mm/rmap.c 43.8 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  /*
   * mm/rmap.c - physical to virtual reverse mappings
   *
   * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
   * Released under the General Public License (GPL).
   *
   * Simple, low overhead reverse mapping scheme.
   * Please try to keep this thing as modular as possible.
   *
   * Provides methods for unmapping each kind of mapped page:
   * the anon methods track anonymous pages, and
   * the file methods track pages belonging to an inode.
   *
   * Original design by Rik van Riel <riel@conectiva.com.br> 2001
   * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
   * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
98f32602d   Hugh Dickins   hugh: update emai...
17
   * Contributions by Hugh Dickins 2003, 2004
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
21
22
   */
  
  /*
   * Lock ordering in mm:
   *
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
23
   * inode->i_mutex	(while writing or truncating, not reading or faulting)
82591e6ea   Nick Piggin   [PATCH] mm: more ...
24
25
   *   mm->mmap_sem
   *     page->flags PG_locked (lock_page)
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
26
   *       mapping->i_mmap_rwsem
5a505085f   Ingo Molnar   mm/rmap: Convert ...
27
   *         anon_vma->rwsem
82591e6ea   Nick Piggin   [PATCH] mm: more ...
28
29
30
31
32
   *           mm->page_table_lock or pte_lock
   *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
   *             swap_lock (in swap_duplicate, swap_info_get)
   *               mmlist_lock (in mmput, drain_mmlist and others)
   *               mapping->private_lock (in __set_page_dirty_buffers)
c4843a759   Greg Thelen   memcg: add per cg...
33
34
   *                 mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
   *                   mapping->tree_lock (widely used)
250df6ed2   Dave Chinner   fs: protect inode...
35
   *               inode->i_lock (in set_page_dirty's __mark_inode_dirty)
f758eeabe   Christoph Hellwig   writeback: split ...
36
   *               bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
82591e6ea   Nick Piggin   [PATCH] mm: more ...
37
38
39
   *                 sb_lock (within inode_lock in fs/fs-writeback.c)
   *                 mapping->tree_lock (widely used, in set_page_dirty,
   *                           in arch-dependent flush_dcache_mmap_lock,
f758eeabe   Christoph Hellwig   writeback: split ...
40
   *                           within bdi.wb->list_lock in __sync_single_inode)
6a46079cf   Andi Kleen   HWPOISON: The hig...
41
   *
5a505085f   Ingo Molnar   mm/rmap: Convert ...
42
   * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
9b679320a   Peter Zijlstra   mm/memory-failure...
43
   *   ->tasklist_lock
6a46079cf   Andi Kleen   HWPOISON: The hig...
44
   *     pte map lock
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
49
50
51
52
   */
  
  #include <linux/mm.h>
  #include <linux/pagemap.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/slab.h>
  #include <linux/init.h>
5ad646880   Hugh Dickins   ksm: let shared p...
53
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
  #include <linux/rmap.h>
  #include <linux/rcupdate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
56
  #include <linux/export.h>
8a9f3ccd2   Balbir Singh   Memory controller...
57
  #include <linux/memcontrol.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
58
  #include <linux/mmu_notifier.h>
64cdd548f   KOSAKI Motohiro   mm: cleanup: remo...
59
  #include <linux/migrate.h>
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
60
  #include <linux/hugetlb.h>
ef5d437f7   Jan Kara   mm: fix XFS oops ...
61
  #include <linux/backing-dev.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
  
  #include <asm/tlbflush.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
64
  #include "internal.h"
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
65
  static struct kmem_cache *anon_vma_cachep;
5beb49305   Rik van Riel   mm: change anon_v...
66
  static struct kmem_cache *anon_vma_chain_cachep;
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
67
68
69
  
  static inline struct anon_vma *anon_vma_alloc(void)
  {
01d8b20de   Peter Zijlstra   mm: simplify anon...
70
71
72
73
74
  	struct anon_vma *anon_vma;
  
  	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
  	if (anon_vma) {
  		atomic_set(&anon_vma->refcount, 1);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
75
76
  		anon_vma->degree = 1;	/* Reference for first vma */
  		anon_vma->parent = anon_vma;
01d8b20de   Peter Zijlstra   mm: simplify anon...
77
78
79
80
81
82
83
84
  		/*
  		 * Initialise the anon_vma root to point to itself. If called
  		 * from fork, the root will be reset to the parents anon_vma.
  		 */
  		anon_vma->root = anon_vma;
  	}
  
  	return anon_vma;
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
85
  }
01d8b20de   Peter Zijlstra   mm: simplify anon...
86
  static inline void anon_vma_free(struct anon_vma *anon_vma)
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
87
  {
01d8b20de   Peter Zijlstra   mm: simplify anon...
88
  	VM_BUG_ON(atomic_read(&anon_vma->refcount));
88c22088b   Peter Zijlstra   mm: optimize page...
89
90
  
  	/*
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
91
  	 * Synchronize against page_lock_anon_vma_read() such that
88c22088b   Peter Zijlstra   mm: optimize page...
92
93
94
95
96
  	 * we can safely hold the lock without the anon_vma getting
  	 * freed.
  	 *
  	 * Relies on the full mb implied by the atomic_dec_and_test() from
  	 * put_anon_vma() against the acquire barrier implied by
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
97
  	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
88c22088b   Peter Zijlstra   mm: optimize page...
98
  	 *
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
99
100
  	 * page_lock_anon_vma_read()	VS	put_anon_vma()
  	 *   down_read_trylock()		  atomic_dec_and_test()
88c22088b   Peter Zijlstra   mm: optimize page...
101
  	 *   LOCK				  MB
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
102
  	 *   atomic_read()			  rwsem_is_locked()
88c22088b   Peter Zijlstra   mm: optimize page...
103
104
105
106
  	 *
  	 * LOCK should suffice since the actual taking of the lock must
  	 * happen _before_ what follows.
  	 */
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
107
  	might_sleep();
5a505085f   Ingo Molnar   mm/rmap: Convert ...
108
  	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
109
  		anon_vma_lock_write(anon_vma);
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
110
  		anon_vma_unlock_write(anon_vma);
88c22088b   Peter Zijlstra   mm: optimize page...
111
  	}
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
112
113
  	kmem_cache_free(anon_vma_cachep, anon_vma);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
114

dd34739c0   Linus Torvalds   mm: avoid anon_vm...
115
  static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
5beb49305   Rik van Riel   mm: change anon_v...
116
  {
dd34739c0   Linus Torvalds   mm: avoid anon_vm...
117
  	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
5beb49305   Rik van Riel   mm: change anon_v...
118
  }
e574b5fd2   Namhyung Kim   rmap: make anon_v...
119
  static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
5beb49305   Rik van Riel   mm: change anon_v...
120
121
122
  {
  	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
  }
6583a8430   Kautuk Consul   rmap: anon_vma_pr...
123
124
125
126
127
128
129
  static void anon_vma_chain_link(struct vm_area_struct *vma,
  				struct anon_vma_chain *avc,
  				struct anon_vma *anon_vma)
  {
  	avc->vma = vma;
  	avc->anon_vma = anon_vma;
  	list_add(&avc->same_vma, &vma->anon_vma_chain);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
130
  	anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
6583a8430   Kautuk Consul   rmap: anon_vma_pr...
131
  }
d9d332e08   Linus Torvalds   anon_vma_prepare:...
132
133
134
135
136
137
138
139
140
  /**
   * anon_vma_prepare - attach an anon_vma to a memory region
   * @vma: the memory region in question
   *
   * This makes sure the memory mapping described by 'vma' has
   * an 'anon_vma' attached to it, so that we can associate the
   * anonymous pages mapped into it with that anon_vma.
   *
   * The common case will be that we already have one, but if
23a0790af   Figo.zhang   mm/rmap.c: fix co...
141
   * not we either need to find an adjacent mapping that we
d9d332e08   Linus Torvalds   anon_vma_prepare:...
142
143
144
145
146
   * can re-use the anon_vma from (very common when the only
   * reason for splitting a vma has been mprotect()), or we
   * allocate a new one.
   *
   * Anon-vma allocations are very subtle, because we may have
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
147
   * optimistically looked up an anon_vma in page_lock_anon_vma_read()
d9d332e08   Linus Torvalds   anon_vma_prepare:...
148
149
150
151
152
153
154
155
156
157
158
   * and that may actually touch the spinlock even in the newly
   * allocated vma (it depends on RCU to make sure that the
   * anon_vma isn't actually destroyed).
   *
   * As a result, we need to do proper anon_vma locking even
   * for the new allocation. At the same time, we do not want
   * to do any locking for the common case of already having
   * an anon_vma.
   *
   * This must be called with the mmap_sem held for reading.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159
160
161
  int anon_vma_prepare(struct vm_area_struct *vma)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
162
  	struct anon_vma_chain *avc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
164
165
166
  
  	might_sleep();
  	if (unlikely(!anon_vma)) {
  		struct mm_struct *mm = vma->vm_mm;
d9d332e08   Linus Torvalds   anon_vma_prepare:...
167
  		struct anon_vma *allocated;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168

dd34739c0   Linus Torvalds   mm: avoid anon_vm...
169
  		avc = anon_vma_chain_alloc(GFP_KERNEL);
5beb49305   Rik van Riel   mm: change anon_v...
170
171
  		if (!avc)
  			goto out_enomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
  		anon_vma = find_mergeable_anon_vma(vma);
d9d332e08   Linus Torvalds   anon_vma_prepare:...
173
174
  		allocated = NULL;
  		if (!anon_vma) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
  			anon_vma = anon_vma_alloc();
  			if (unlikely(!anon_vma))
5beb49305   Rik van Riel   mm: change anon_v...
177
  				goto out_enomem_free_avc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
  			allocated = anon_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  		}
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
180
  		anon_vma_lock_write(anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
182
183
184
  		/* page_table_lock to protect against threads */
  		spin_lock(&mm->page_table_lock);
  		if (likely(!vma->anon_vma)) {
  			vma->anon_vma = anon_vma;
6583a8430   Kautuk Consul   rmap: anon_vma_pr...
185
  			anon_vma_chain_link(vma, avc, anon_vma);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
186
187
  			/* vma reference or self-parent link for new root */
  			anon_vma->degree++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
  			allocated = NULL;
31f2b0ebc   Oleg Nesterov   rmap: anon_vma_pr...
189
  			avc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
  		}
  		spin_unlock(&mm->page_table_lock);
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
192
  		anon_vma_unlock_write(anon_vma);
31f2b0ebc   Oleg Nesterov   rmap: anon_vma_pr...
193
194
  
  		if (unlikely(allocated))
01d8b20de   Peter Zijlstra   mm: simplify anon...
195
  			put_anon_vma(allocated);
31f2b0ebc   Oleg Nesterov   rmap: anon_vma_pr...
196
  		if (unlikely(avc))
5beb49305   Rik van Riel   mm: change anon_v...
197
  			anon_vma_chain_free(avc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
199
  	}
  	return 0;
5beb49305   Rik van Riel   mm: change anon_v...
200
201
202
203
204
  
   out_enomem_free_avc:
  	anon_vma_chain_free(avc);
   out_enomem:
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
  }
bb4aa3967   Linus Torvalds   mm: avoid repeate...
206
207
208
209
210
211
212
213
214
215
216
217
218
  /*
   * This is a useful helper function for locking the anon_vma root as
   * we traverse the vma->anon_vma_chain, looping over anon_vma's that
   * have the same vma.
   *
   * Such anon_vma's should have the same root, so you'd expect to see
   * just a single mutex_lock for the whole traversal.
   */
  static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
  {
  	struct anon_vma *new_root = anon_vma->root;
  	if (new_root != root) {
  		if (WARN_ON_ONCE(root))
5a505085f   Ingo Molnar   mm/rmap: Convert ...
219
  			up_write(&root->rwsem);
bb4aa3967   Linus Torvalds   mm: avoid repeate...
220
  		root = new_root;
5a505085f   Ingo Molnar   mm/rmap: Convert ...
221
  		down_write(&root->rwsem);
bb4aa3967   Linus Torvalds   mm: avoid repeate...
222
223
224
225
226
227
228
  	}
  	return root;
  }
  
  static inline void unlock_anon_vma_root(struct anon_vma *root)
  {
  	if (root)
5a505085f   Ingo Molnar   mm/rmap: Convert ...
229
  		up_write(&root->rwsem);
bb4aa3967   Linus Torvalds   mm: avoid repeate...
230
  }
5beb49305   Rik van Riel   mm: change anon_v...
231
232
233
  /*
   * Attach the anon_vmas from src to dst.
   * Returns 0 on success, -ENOMEM on failure.
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
234
235
236
237
238
239
240
241
   *
   * If dst->anon_vma is NULL this function tries to find and reuse existing
   * anon_vma which has no vmas and only one child anon_vma. This prevents
   * degradation of anon_vma hierarchy to endless linear chain in case of
   * constantly forking task. On the other hand, an anon_vma with more than one
   * child isn't reused even if there was no alive vma, thus rmap walker has a
   * good chance of avoiding scanning the whole hierarchy when it searches where
   * page is mapped.
5beb49305   Rik van Riel   mm: change anon_v...
242
243
   */
  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
  {
5beb49305   Rik van Riel   mm: change anon_v...
245
  	struct anon_vma_chain *avc, *pavc;
bb4aa3967   Linus Torvalds   mm: avoid repeate...
246
  	struct anon_vma *root = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
247

646d87b48   Linus Torvalds   anon_vma: clone t...
248
  	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
bb4aa3967   Linus Torvalds   mm: avoid repeate...
249
  		struct anon_vma *anon_vma;
dd34739c0   Linus Torvalds   mm: avoid anon_vm...
250
251
252
253
254
255
256
257
  		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
  		if (unlikely(!avc)) {
  			unlock_anon_vma_root(root);
  			root = NULL;
  			avc = anon_vma_chain_alloc(GFP_KERNEL);
  			if (!avc)
  				goto enomem_failure;
  		}
bb4aa3967   Linus Torvalds   mm: avoid repeate...
258
259
260
  		anon_vma = pavc->anon_vma;
  		root = lock_anon_vma_root(root, anon_vma);
  		anon_vma_chain_link(dst, avc, anon_vma);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
261
262
263
264
265
266
267
268
269
270
271
272
  
  		/*
  		 * Reuse existing anon_vma if its degree lower than two,
  		 * that means it has no vma and only one anon_vma child.
  		 *
  		 * Do not chose parent anon_vma, otherwise first child
  		 * will always reuse it. Root anon_vma is never reused:
  		 * it has self-parent reference and at least one child.
  		 */
  		if (!dst->anon_vma && anon_vma != src->anon_vma &&
  				anon_vma->degree < 2)
  			dst->anon_vma = anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
273
  	}
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
274
275
  	if (dst->anon_vma)
  		dst->anon_vma->degree++;
bb4aa3967   Linus Torvalds   mm: avoid repeate...
276
  	unlock_anon_vma_root(root);
5beb49305   Rik van Riel   mm: change anon_v...
277
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278

5beb49305   Rik van Riel   mm: change anon_v...
279
   enomem_failure:
3fe89b3e2   Leon Yu   mm: fix anon_vma-...
280
281
282
283
284
285
286
  	/*
  	 * dst->anon_vma is dropped here otherwise its degree can be incorrectly
  	 * decremented in unlink_anon_vmas().
  	 * We can safely do this because callers of anon_vma_clone() don't care
  	 * about dst->anon_vma if anon_vma_clone() failed.
  	 */
  	dst->anon_vma = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
287
288
  	unlink_anon_vmas(dst);
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
  }
5beb49305   Rik van Riel   mm: change anon_v...
290
291
292
293
294
295
  /*
   * Attach vma to its own anon_vma, as well as to the anon_vmas that
   * the corresponding VMA in the parent process is attached to.
   * Returns 0 on success, non-zero on failure.
   */
  int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
  {
5beb49305   Rik van Riel   mm: change anon_v...
297
298
  	struct anon_vma_chain *avc;
  	struct anon_vma *anon_vma;
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
299
  	int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
300

5beb49305   Rik van Riel   mm: change anon_v...
301
302
303
  	/* Don't bother if the parent process has no anon_vma here. */
  	if (!pvma->anon_vma)
  		return 0;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
304
305
  	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
  	vma->anon_vma = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
306
307
308
309
  	/*
  	 * First, attach the new VMA to the parent VMA's anon_vmas,
  	 * so rmap can find non-COWed pages in child processes.
  	 */
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
310
311
312
  	error = anon_vma_clone(vma, pvma);
  	if (error)
  		return error;
5beb49305   Rik van Riel   mm: change anon_v...
313

7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
314
315
316
  	/* An existing anon_vma has been reused, all done then. */
  	if (vma->anon_vma)
  		return 0;
5beb49305   Rik van Riel   mm: change anon_v...
317
318
319
320
  	/* Then add our own anon_vma. */
  	anon_vma = anon_vma_alloc();
  	if (!anon_vma)
  		goto out_error;
dd34739c0   Linus Torvalds   mm: avoid anon_vm...
321
  	avc = anon_vma_chain_alloc(GFP_KERNEL);
5beb49305   Rik van Riel   mm: change anon_v...
322
323
  	if (!avc)
  		goto out_error_free_anon_vma;
5c341ee1d   Rik van Riel   mm: track the roo...
324
325
326
327
328
329
  
  	/*
  	 * The root anon_vma's spinlock is the lock actually used when we
  	 * lock any of the anon_vmas in this anon_vma tree.
  	 */
  	anon_vma->root = pvma->anon_vma->root;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
330
  	anon_vma->parent = pvma->anon_vma;
76545066c   Rik van Riel   mm: extend KSM re...
331
  	/*
01d8b20de   Peter Zijlstra   mm: simplify anon...
332
333
334
  	 * With refcounts, an anon_vma can stay around longer than the
  	 * process it belongs to. The root anon_vma needs to be pinned until
  	 * this anon_vma is freed, because the lock lives in the root.
76545066c   Rik van Riel   mm: extend KSM re...
335
336
  	 */
  	get_anon_vma(anon_vma->root);
5beb49305   Rik van Riel   mm: change anon_v...
337
338
  	/* Mark this anon_vma as the one where our new (COWed) pages go. */
  	vma->anon_vma = anon_vma;
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
339
  	anon_vma_lock_write(anon_vma);
5c341ee1d   Rik van Riel   mm: track the roo...
340
  	anon_vma_chain_link(vma, avc, anon_vma);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
341
  	anon_vma->parent->degree++;
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
342
  	anon_vma_unlock_write(anon_vma);
5beb49305   Rik van Riel   mm: change anon_v...
343
344
345
346
  
  	return 0;
  
   out_error_free_anon_vma:
01d8b20de   Peter Zijlstra   mm: simplify anon...
347
  	put_anon_vma(anon_vma);
5beb49305   Rik van Riel   mm: change anon_v...
348
   out_error:
4946d54cb   Rik van Riel   rmap: fix anon_vm...
349
  	unlink_anon_vmas(vma);
5beb49305   Rik van Riel   mm: change anon_v...
350
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
351
  }
5beb49305   Rik van Riel   mm: change anon_v...
352
353
354
  void unlink_anon_vmas(struct vm_area_struct *vma)
  {
  	struct anon_vma_chain *avc, *next;
eee2acbae   Peter Zijlstra   mm: avoid repeate...
355
  	struct anon_vma *root = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
356

5c341ee1d   Rik van Riel   mm: track the roo...
357
358
359
360
  	/*
  	 * Unlink each anon_vma chained to the VMA.  This list is ordered
  	 * from newest to oldest, ensuring the root anon_vma gets freed last.
  	 */
5beb49305   Rik van Riel   mm: change anon_v...
361
  	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
eee2acbae   Peter Zijlstra   mm: avoid repeate...
362
363
364
  		struct anon_vma *anon_vma = avc->anon_vma;
  
  		root = lock_anon_vma_root(root, anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
365
  		anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
eee2acbae   Peter Zijlstra   mm: avoid repeate...
366
367
368
369
370
  
  		/*
  		 * Leave empty anon_vmas on the list - we'll need
  		 * to free them outside the lock.
  		 */
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
371
372
  		if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
  			anon_vma->parent->degree--;
eee2acbae   Peter Zijlstra   mm: avoid repeate...
373
  			continue;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
374
  		}
eee2acbae   Peter Zijlstra   mm: avoid repeate...
375
376
377
378
  
  		list_del(&avc->same_vma);
  		anon_vma_chain_free(avc);
  	}
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
379
380
  	if (vma->anon_vma)
  		vma->anon_vma->degree--;
eee2acbae   Peter Zijlstra   mm: avoid repeate...
381
382
383
384
385
  	unlock_anon_vma_root(root);
  
  	/*
  	 * Iterate the list once more, it now only contains empty and unlinked
  	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
5a505085f   Ingo Molnar   mm/rmap: Convert ...
386
  	 * needing to write-acquire the anon_vma->root->rwsem.
eee2acbae   Peter Zijlstra   mm: avoid repeate...
387
388
389
  	 */
  	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
  		struct anon_vma *anon_vma = avc->anon_vma;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
390
  		BUG_ON(anon_vma->degree);
eee2acbae   Peter Zijlstra   mm: avoid repeate...
391
  		put_anon_vma(anon_vma);
5beb49305   Rik van Riel   mm: change anon_v...
392
393
394
395
  		list_del(&avc->same_vma);
  		anon_vma_chain_free(avc);
  	}
  }
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
396
  static void anon_vma_ctor(void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397
  {
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
398
  	struct anon_vma *anon_vma = data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399

5a505085f   Ingo Molnar   mm/rmap: Convert ...
400
  	init_rwsem(&anon_vma->rwsem);
83813267c   Peter Zijlstra   mm: move anon_vma...
401
  	atomic_set(&anon_vma->refcount, 0);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
402
  	anon_vma->rb_root = RB_ROOT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
405
406
407
  }
  
  void __init anon_vma_init(void)
  {
  	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
20c2df83d   Paul Mundt   mm: Remove slab d...
408
  			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
5beb49305   Rik van Riel   mm: change anon_v...
409
  	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
412
  }
  
  /*
6111e4ca6   Peter Zijlstra   mm: improve page_...
413
414
415
416
417
418
419
420
421
   * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
   *
   * Since there is no serialization what so ever against page_remove_rmap()
   * the best this function can do is return a locked anon_vma that might
   * have been relevant to this page.
   *
   * The page might have been remapped to a different anon_vma or the anon_vma
   * returned may already be freed (and even reused).
   *
bc658c960   Peter Zijlstra   mm, rmap: Add yet...
422
423
424
425
426
   * In case it was remapped to a different anon_vma, the new anon_vma will be a
   * child of the old anon_vma, and the anon_vma lifetime rules will therefore
   * ensure that any anon_vma obtained from the page will still be valid for as
   * long as we observe page_mapped() [ hence all those page_mapped() tests ].
   *
6111e4ca6   Peter Zijlstra   mm: improve page_...
427
428
429
430
431
432
433
   * All users of this function must be very careful when walking the anon_vma
   * chain and verify that the page in question is indeed mapped in it
   * [ something equivalent to page_mapped_in_vma() ].
   *
   * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
   * that the anon_vma pointer from page->mapping is valid if there is a
   * mapcount, we can dereference the anon_vma after observing those.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
   */
746b18d42   Peter Zijlstra   mm: use refcounts...
435
  struct anon_vma *page_get_anon_vma(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
  {
746b18d42   Peter Zijlstra   mm: use refcounts...
437
  	struct anon_vma *anon_vma = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
439
440
  	unsigned long anon_mapping;
  
  	rcu_read_lock();
4db0c3c29   Jason Low   mm: remove rest o...
441
  	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
3ca7b3c5b   Hugh Dickins   mm: define PAGE_M...
442
  	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
444
445
446
447
  		goto out;
  	if (!page_mapped(page))
  		goto out;
  
  	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
746b18d42   Peter Zijlstra   mm: use refcounts...
448
449
450
451
  	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
  		anon_vma = NULL;
  		goto out;
  	}
f18194275   Hugh Dickins   mm: fix hang on a...
452
453
454
  
  	/*
  	 * If this page is still mapped, then its anon_vma cannot have been
746b18d42   Peter Zijlstra   mm: use refcounts...
455
456
457
458
  	 * freed.  But if it has been unmapped, we have no security against the
  	 * anon_vma structure being freed and reused (for another anon_vma:
  	 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
  	 * above cannot corrupt).
f18194275   Hugh Dickins   mm: fix hang on a...
459
  	 */
746b18d42   Peter Zijlstra   mm: use refcounts...
460
  	if (!page_mapped(page)) {
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
461
  		rcu_read_unlock();
746b18d42   Peter Zijlstra   mm: use refcounts...
462
  		put_anon_vma(anon_vma);
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
463
  		return NULL;
746b18d42   Peter Zijlstra   mm: use refcounts...
464
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
465
466
  out:
  	rcu_read_unlock();
746b18d42   Peter Zijlstra   mm: use refcounts...
467
468
469
  
  	return anon_vma;
  }
88c22088b   Peter Zijlstra   mm: optimize page...
470
471
472
473
474
475
476
  /*
   * Similar to page_get_anon_vma() except it locks the anon_vma.
   *
   * Its a little more complex as it tries to keep the fast path to a single
   * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
   * reference like with page_get_anon_vma() and then block on the mutex.
   */
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
477
  struct anon_vma *page_lock_anon_vma_read(struct page *page)
746b18d42   Peter Zijlstra   mm: use refcounts...
478
  {
88c22088b   Peter Zijlstra   mm: optimize page...
479
  	struct anon_vma *anon_vma = NULL;
eee0f252c   Hugh Dickins   mm: fix page_lock...
480
  	struct anon_vma *root_anon_vma;
88c22088b   Peter Zijlstra   mm: optimize page...
481
  	unsigned long anon_mapping;
746b18d42   Peter Zijlstra   mm: use refcounts...
482

88c22088b   Peter Zijlstra   mm: optimize page...
483
  	rcu_read_lock();
4db0c3c29   Jason Low   mm: remove rest o...
484
  	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
88c22088b   Peter Zijlstra   mm: optimize page...
485
486
487
488
489
490
  	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
  		goto out;
  	if (!page_mapped(page))
  		goto out;
  
  	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
4db0c3c29   Jason Low   mm: remove rest o...
491
  	root_anon_vma = READ_ONCE(anon_vma->root);
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
492
  	if (down_read_trylock(&root_anon_vma->rwsem)) {
88c22088b   Peter Zijlstra   mm: optimize page...
493
  		/*
eee0f252c   Hugh Dickins   mm: fix page_lock...
494
495
  		 * If the page is still mapped, then this anon_vma is still
  		 * its anon_vma, and holding the mutex ensures that it will
bc658c960   Peter Zijlstra   mm, rmap: Add yet...
496
  		 * not go away, see anon_vma_free().
88c22088b   Peter Zijlstra   mm: optimize page...
497
  		 */
eee0f252c   Hugh Dickins   mm: fix page_lock...
498
  		if (!page_mapped(page)) {
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
499
  			up_read(&root_anon_vma->rwsem);
88c22088b   Peter Zijlstra   mm: optimize page...
500
501
502
503
  			anon_vma = NULL;
  		}
  		goto out;
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
504

88c22088b   Peter Zijlstra   mm: optimize page...
505
506
507
508
509
510
511
  	/* trylock failed, we got to sleep */
  	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
  		anon_vma = NULL;
  		goto out;
  	}
  
  	if (!page_mapped(page)) {
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
512
  		rcu_read_unlock();
88c22088b   Peter Zijlstra   mm: optimize page...
513
  		put_anon_vma(anon_vma);
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
514
  		return NULL;
88c22088b   Peter Zijlstra   mm: optimize page...
515
516
517
518
  	}
  
  	/* we pinned the anon_vma, its safe to sleep */
  	rcu_read_unlock();
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
519
  	anon_vma_lock_read(anon_vma);
88c22088b   Peter Zijlstra   mm: optimize page...
520
521
522
523
524
  
  	if (atomic_dec_and_test(&anon_vma->refcount)) {
  		/*
  		 * Oops, we held the last refcount, release the lock
  		 * and bail -- can't simply use put_anon_vma() because
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
525
  		 * we'll deadlock on the anon_vma_lock_write() recursion.
88c22088b   Peter Zijlstra   mm: optimize page...
526
  		 */
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
527
  		anon_vma_unlock_read(anon_vma);
88c22088b   Peter Zijlstra   mm: optimize page...
528
529
530
531
532
533
534
535
  		__put_anon_vma(anon_vma);
  		anon_vma = NULL;
  	}
  
  	return anon_vma;
  
  out:
  	rcu_read_unlock();
746b18d42   Peter Zijlstra   mm: use refcounts...
536
  	return anon_vma;
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
537
  }
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
538
  void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
539
  {
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
540
  	anon_vma_unlock_read(anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
542
543
  }
  
  /*
3ad33b243   Lee Schermerhorn   Migration: find c...
544
   * At what user virtual address is page expected in @vma?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
   */
86c2ad199   Michel Lespinasse   mm rmap: remove v...
546
547
  static inline unsigned long
  __vma_address(struct page *page, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
  {
a0f7a756c   Naoya Horiguchi   mm/rmap.c: fix pg...
549
  	pgoff_t pgoff = page_to_pgoff(page);
86c2ad199   Michel Lespinasse   mm rmap: remove v...
550
551
552
553
554
555
556
557
558
  	return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  }
  
  inline unsigned long
  vma_address(struct page *page, struct vm_area_struct *vma)
  {
  	unsigned long address = __vma_address(page, vma);
  
  	/* page should be within @vma mapping range */
81d1b09c6   Sasha Levin   mm: convert a few...
559
  	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
86c2ad199   Michel Lespinasse   mm rmap: remove v...
560

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
563
564
  	return address;
  }
  
  /*
bf89c8c86   Huang Shijie   mm/rmap.c: fix co...
565
   * At what user virtual address is page expected in vma?
ab941e0ff   Naoya Horiguchi   rmap: remove anon...
566
   * Caller should check the page is actually part of the vma.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
569
   */
  unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
  {
86c2ad199   Michel Lespinasse   mm rmap: remove v...
570
  	unsigned long address;
21d0d443c   Andrea Arcangeli   rmap: resurrect p...
571
  	if (PageAnon(page)) {
4829b906c   Hugh Dickins   ksm: fix page_add...
572
573
574
575
576
577
578
  		struct anon_vma *page__anon_vma = page_anon_vma(page);
  		/*
  		 * Note: swapoff's unuse_vma() is more efficient with this
  		 * check, and needs it to match anon_vma when KSM is active.
  		 */
  		if (!vma->anon_vma || !page__anon_vma ||
  		    vma->anon_vma->root != page__anon_vma->root)
21d0d443c   Andrea Arcangeli   rmap: resurrect p...
579
  			return -EFAULT;
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
580
581
  	} else if (page->mapping) {
  		if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
583
584
  			return -EFAULT;
  	} else
  		return -EFAULT;
86c2ad199   Michel Lespinasse   mm rmap: remove v...
585
586
587
588
  	address = __vma_address(page, vma);
  	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
  		return -EFAULT;
  	return address;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
  }
6219049ae   Bob Liu   mm: introduce mm_...
590
591
592
593
594
  pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
  {
  	pgd_t *pgd;
  	pud_t *pud;
  	pmd_t *pmd = NULL;
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
595
  	pmd_t pmde;
6219049ae   Bob Liu   mm: introduce mm_...
596
597
598
599
600
601
602
603
604
605
  
  	pgd = pgd_offset(mm, address);
  	if (!pgd_present(*pgd))
  		goto out;
  
  	pud = pud_offset(pgd, address);
  	if (!pud_present(*pud))
  		goto out;
  
  	pmd = pmd_offset(pud, address);
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
606
  	/*
8809aa2d2   Aneesh Kumar K.V   mm: clarify that ...
607
  	 * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
608
609
610
  	 * without holding anon_vma lock for write.  So when looking for a
  	 * genuine pmde (in which to find pte), test present and !THP together.
  	 */
e37c69827   Christian Borntraeger   mm: replace ACCES...
611
612
  	pmde = *pmd;
  	barrier();
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
613
  	if (!pmd_present(pmde) || pmd_trans_huge(pmde))
6219049ae   Bob Liu   mm: introduce mm_...
614
615
616
617
  		pmd = NULL;
  out:
  	return pmd;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
  /*
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
619
620
   * Check that @page is mapped at @address into @mm.
   *
479db0bf4   Nick Piggin   mm: dirty page tr...
621
622
623
624
   * If @sync is false, page_check_address may perform a racy check to avoid
   * the page table lock when the pte is not present (helpful when reclaiming
   * highly shared pages).
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
625
   * On success returns with pte mapped and locked.
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
626
   */
e9a81a821   Namhyung Kim   rmap: wrap page_c...
627
  pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
479db0bf4   Nick Piggin   mm: dirty page tr...
628
  			  unsigned long address, spinlock_t **ptlp, int sync)
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
629
  {
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
630
631
  	pmd_t *pmd;
  	pte_t *pte;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
632
  	spinlock_t *ptl;
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
633

0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
634
  	if (unlikely(PageHuge(page))) {
98398c32f   Jianguo Wu   mm/hugetlb: check...
635
  		/* when pud is not present, pte will be NULL */
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
636
  		pte = huge_pte_offset(mm, address);
98398c32f   Jianguo Wu   mm/hugetlb: check...
637
638
  		if (!pte)
  			return NULL;
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
639
  		ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
640
641
  		goto check;
  	}
6219049ae   Bob Liu   mm: introduce mm_...
642
643
  	pmd = mm_find_pmd(mm, address);
  	if (!pmd)
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
644
  		return NULL;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
645
646
  	pte = pte_offset_map(pmd, address);
  	/* Make a quick check before getting the lock */
479db0bf4   Nick Piggin   mm: dirty page tr...
647
  	if (!sync && !pte_present(*pte)) {
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
648
649
650
  		pte_unmap(pte);
  		return NULL;
  	}
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
651
  	ptl = pte_lockptr(mm, pmd);
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
652
  check:
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
653
654
655
656
  	spin_lock(ptl);
  	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
  		*ptlp = ptl;
  		return pte;
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
657
  	}
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
658
659
  	pte_unmap_unlock(pte, ptl);
  	return NULL;
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
660
  }
b291f0003   Nick Piggin   mlock: mlocked pa...
661
662
663
664
665
666
667
668
669
  /**
   * page_mapped_in_vma - check whether a page is really mapped in a VMA
   * @page: the page to test
   * @vma: the VMA to test
   *
   * Returns 1 if the page is mapped into the page tables of the VMA, 0
   * if the page is not mapped into the page tables of this VMA.  Only
   * valid for normal file or anonymous VMAs.
   */
6a46079cf   Andi Kleen   HWPOISON: The hig...
670
  int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
b291f0003   Nick Piggin   mlock: mlocked pa...
671
672
673
674
  {
  	unsigned long address;
  	pte_t *pte;
  	spinlock_t *ptl;
86c2ad199   Michel Lespinasse   mm rmap: remove v...
675
676
  	address = __vma_address(page, vma);
  	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
b291f0003   Nick Piggin   mlock: mlocked pa...
677
678
679
680
681
682
683
684
  		return 0;
  	pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
  	if (!pte)			/* the page is not in this mm */
  		return 0;
  	pte_unmap_unlock(pte, ptl);
  
  	return 1;
  }
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
685
686
687
688
689
690
  struct page_referenced_arg {
  	int mapcount;
  	int referenced;
  	unsigned long vm_flags;
  	struct mem_cgroup *memcg;
  };
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
691
  /*
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
692
   * arg: page_referenced_arg will be passed
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
   */
ac7695012   Kirill A. Shutemov   mm/rmap.c: make p...
694
  static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
695
  			unsigned long address, void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
696
697
  {
  	struct mm_struct *mm = vma->vm_mm;
117b0791a   Kirill A. Shutemov   mm, thp: move ptl...
698
  	spinlock_t *ptl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699
  	int referenced = 0;
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
700
  	struct page_referenced_arg *pra = arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
701

71e3aac07   Andrea Arcangeli   thp: transparent ...
702
703
  	if (unlikely(PageTransHuge(page))) {
  		pmd_t *pmd;
2da28bfd9   Andrea Arcangeli   thp: fix page_ref...
704
705
706
707
  		/*
  		 * rmap might return false positives; we must filter
  		 * these out using page_check_address_pmd().
  		 */
71e3aac07   Andrea Arcangeli   thp: transparent ...
708
  		pmd = page_check_address_pmd(page, mm, address,
117b0791a   Kirill A. Shutemov   mm, thp: move ptl...
709
710
  					     PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
  		if (!pmd)
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
711
  			return SWAP_AGAIN;
2da28bfd9   Andrea Arcangeli   thp: fix page_ref...
712
713
  
  		if (vma->vm_flags & VM_LOCKED) {
117b0791a   Kirill A. Shutemov   mm, thp: move ptl...
714
  			spin_unlock(ptl);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
715
716
  			pra->vm_flags |= VM_LOCKED;
  			return SWAP_FAIL; /* To break the loop */
2da28bfd9   Andrea Arcangeli   thp: fix page_ref...
717
718
719
720
  		}
  
  		/* go ahead even if the pmd is pmd_trans_splitting() */
  		if (pmdp_clear_flush_young_notify(vma, address, pmd))
71e3aac07   Andrea Arcangeli   thp: transparent ...
721
  			referenced++;
117b0791a   Kirill A. Shutemov   mm, thp: move ptl...
722
  		spin_unlock(ptl);
71e3aac07   Andrea Arcangeli   thp: transparent ...
723
724
  	} else {
  		pte_t *pte;
71e3aac07   Andrea Arcangeli   thp: transparent ...
725

2da28bfd9   Andrea Arcangeli   thp: fix page_ref...
726
727
728
729
  		/*
  		 * rmap might return false positives; we must filter
  		 * these out using page_check_address().
  		 */
71e3aac07   Andrea Arcangeli   thp: transparent ...
730
731
  		pte = page_check_address(page, mm, address, &ptl, 0);
  		if (!pte)
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
732
  			return SWAP_AGAIN;
71e3aac07   Andrea Arcangeli   thp: transparent ...
733

2da28bfd9   Andrea Arcangeli   thp: fix page_ref...
734
735
  		if (vma->vm_flags & VM_LOCKED) {
  			pte_unmap_unlock(pte, ptl);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
736
737
  			pra->vm_flags |= VM_LOCKED;
  			return SWAP_FAIL; /* To break the loop */
2da28bfd9   Andrea Arcangeli   thp: fix page_ref...
738
  		}
71e3aac07   Andrea Arcangeli   thp: transparent ...
739
740
741
742
743
744
745
746
  		if (ptep_clear_flush_young_notify(vma, address, pte)) {
  			/*
  			 * Don't treat a reference through a sequentially read
  			 * mapping as such.  If the page has been used in
  			 * another mapping, we will catch it; if this other
  			 * mapping is already gone, the unmap path will have
  			 * set PG_referenced or activated the page.
  			 */
64363aad5   Joe Perches   mm: remove unused...
747
  			if (likely(!(vma->vm_flags & VM_SEQ_READ)))
71e3aac07   Andrea Arcangeli   thp: transparent ...
748
749
750
751
  				referenced++;
  		}
  		pte_unmap_unlock(pte, ptl);
  	}
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
752
753
754
  	if (referenced) {
  		pra->referenced++;
  		pra->vm_flags |= vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
755
  	}
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
756

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
757
758
759
760
761
  	pra->mapcount--;
  	if (!pra->mapcount)
  		return SWAP_SUCCESS; /* To break the loop */
  
  	return SWAP_AGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762
  }
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
763
  static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
  {
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
765
766
  	struct page_referenced_arg *pra = arg;
  	struct mem_cgroup *memcg = pra->memcg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
768
769
  	if (!mm_match_cgroup(vma->vm_mm, memcg))
  		return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
770

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
771
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
775
776
777
  }
  
  /**
   * page_referenced - test if the page was referenced
   * @page: the page to test
   * @is_locked: caller holds lock on the page
72835c86c   Johannes Weiner   mm: unify remaini...
778
   * @memcg: target memory cgroup
6fe6b7e35   Wu Fengguang   vmscan: report vm...
779
   * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
782
783
   *
   * Quick test_and_clear_referenced for all mappings to a page,
   * returns the number of ptes which referenced the page.
   */
6fe6b7e35   Wu Fengguang   vmscan: report vm...
784
785
  int page_referenced(struct page *page,
  		    int is_locked,
72835c86c   Johannes Weiner   mm: unify remaini...
786
  		    struct mem_cgroup *memcg,
6fe6b7e35   Wu Fengguang   vmscan: report vm...
787
  		    unsigned long *vm_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
788
  {
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
789
  	int ret;
5ad646880   Hugh Dickins   ksm: let shared p...
790
  	int we_locked = 0;
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
791
792
793
794
795
796
797
798
799
  	struct page_referenced_arg pra = {
  		.mapcount = page_mapcount(page),
  		.memcg = memcg,
  	};
  	struct rmap_walk_control rwc = {
  		.rmap_one = page_referenced_one,
  		.arg = (void *)&pra,
  		.anon_lock = page_lock_anon_vma_read,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
800

6fe6b7e35   Wu Fengguang   vmscan: report vm...
801
  	*vm_flags = 0;
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
802
803
804
805
806
807
808
809
810
811
  	if (!page_mapped(page))
  		return 0;
  
  	if (!page_rmapping(page))
  		return 0;
  
  	if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
  		we_locked = trylock_page(page);
  		if (!we_locked)
  			return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
812
  	}
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
  
  	/*
  	 * If we are reclaiming on behalf of a cgroup, skip
  	 * counting on behalf of references from different
  	 * cgroups
  	 */
  	if (memcg) {
  		rwc.invalid_vma = invalid_page_referenced_vma;
  	}
  
  	ret = rmap_walk(page, &rwc);
  	*vm_flags = pra.vm_flags;
  
  	if (we_locked)
  		unlock_page(page);
  
  	return pra.referenced;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
830
  }
1cb1729b1   Hugh Dickins   mm: pass address ...
831
  static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
832
  			    unsigned long address, void *arg)
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
833
834
  {
  	struct mm_struct *mm = vma->vm_mm;
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
835
  	pte_t *pte;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
836
837
  	spinlock_t *ptl;
  	int ret = 0;
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
838
  	int *cleaned = arg;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
839

479db0bf4   Nick Piggin   mm: dirty page tr...
840
  	pte = page_check_address(page, mm, address, &ptl, 1);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
841
842
  	if (!pte)
  		goto out;
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
843
844
  	if (pte_dirty(*pte) || pte_write(*pte)) {
  		pte_t entry;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
845

c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
846
  		flush_cache_page(vma, address, pte_pfn(*pte));
2ec74c3ef   Sagi Grimberg   mm: move all mmu ...
847
  		entry = ptep_clear_flush(vma, address, pte);
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
848
849
  		entry = pte_wrprotect(entry);
  		entry = pte_mkclean(entry);
d6e88e671   Al Viro   [PATCH] page_mkcl...
850
  		set_pte_at(mm, address, pte, entry);
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
851
852
  		ret = 1;
  	}
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
853

d08b3851d   Peter Zijlstra   [PATCH] mm: track...
854
  	pte_unmap_unlock(pte, ptl);
2ec74c3ef   Sagi Grimberg   mm: move all mmu ...
855

9853a407b   Joonsoo Kim   mm/rmap: use rmap...
856
  	if (ret) {
2ec74c3ef   Sagi Grimberg   mm: move all mmu ...
857
  		mmu_notifier_invalidate_page(mm, address);
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
858
859
  		(*cleaned)++;
  	}
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
860
  out:
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
861
  	return SWAP_AGAIN;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
862
  }
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
863
  static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
864
  {
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
865
  	if (vma->vm_flags & VM_SHARED)
871beb8c3   Fengguang Wu   mm/rmap: fix cocc...
866
  		return false;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
867

871beb8c3   Fengguang Wu   mm/rmap: fix cocc...
868
  	return true;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
869
870
871
872
  }
  
  int page_mkclean(struct page *page)
  {
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
873
874
875
876
877
878
879
  	int cleaned = 0;
  	struct address_space *mapping;
  	struct rmap_walk_control rwc = {
  		.arg = (void *)&cleaned,
  		.rmap_one = page_mkclean_one,
  		.invalid_vma = invalid_mkclean_vma,
  	};
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
880
881
  
  	BUG_ON(!PageLocked(page));
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
882
883
884
885
886
887
888
889
  	if (!page_mapped(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (!mapping)
  		return 0;
  
  	rmap_walk(page, &rwc);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
890

9853a407b   Joonsoo Kim   mm/rmap: use rmap...
891
  	return cleaned;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
892
  }
60b59beaf   Jaya Kumar   fbdev: mm: Deferr...
893
  EXPORT_SYMBOL_GPL(page_mkclean);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
894

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
895
  /**
c44b67432   Rik van Riel   rmap: move exclus...
896
897
898
899
900
901
902
903
904
905
906
907
908
909
   * page_move_anon_rmap - move a page to our anon_vma
   * @page:	the page to move to our anon_vma
   * @vma:	the vma the page belongs to
   * @address:	the user virtual address mapped
   *
   * When a page belongs exclusively to one process after a COW event,
   * that page can be moved into the anon_vma that belongs to just that
   * process, so the rmap code will not search the parent or sibling
   * processes.
   */
  void page_move_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
309381fea   Sasha Levin   mm: dump page whe...
910
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
81d1b09c6   Sasha Levin   mm: convert a few...
911
  	VM_BUG_ON_VMA(!anon_vma, vma);
309381fea   Sasha Levin   mm: dump page whe...
912
  	VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
c44b67432   Rik van Riel   rmap: move exclus...
913
914
  
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
414e2fb8c   Vladimir Davydov   rmap: fix theoret...
915
916
917
918
919
920
  	/*
  	 * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
  	 * simultaneously, so a concurrent reader (eg page_referenced()'s
  	 * PageAnon()) will not see one without the other.
  	 */
  	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
c44b67432   Rik van Riel   rmap: move exclus...
921
922
923
  }
  
  /**
4e1c19750   Andi Kleen   Clean up __page_s...
924
925
926
927
   * __page_set_anon_rmap - set up new anonymous rmap
   * @page:	Page to add to rmap	
   * @vma:	VM area to add page to.
   * @address:	User virtual address of the mapping	
e8a03feb5   Rik van Riel   rmap: add exclusi...
928
   * @exclusive:	the page is exclusively owned by the current process
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
929
930
   */
  static void __page_set_anon_rmap(struct page *page,
e8a03feb5   Rik van Riel   rmap: add exclusi...
931
  	struct vm_area_struct *vma, unsigned long address, int exclusive)
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
932
  {
e8a03feb5   Rik van Riel   rmap: add exclusi...
933
  	struct anon_vma *anon_vma = vma->anon_vma;
ea90002b0   Linus Torvalds   anonvma: when set...
934

e8a03feb5   Rik van Riel   rmap: add exclusi...
935
  	BUG_ON(!anon_vma);
ea90002b0   Linus Torvalds   anonvma: when set...
936

4e1c19750   Andi Kleen   Clean up __page_s...
937
938
  	if (PageAnon(page))
  		return;
ea90002b0   Linus Torvalds   anonvma: when set...
939
  	/*
e8a03feb5   Rik van Riel   rmap: add exclusi...
940
941
942
  	 * If the page isn't exclusively mapped into this vma,
  	 * we must use the _oldest_ possible anon_vma for the
  	 * page mapping!
ea90002b0   Linus Torvalds   anonvma: when set...
943
  	 */
4e1c19750   Andi Kleen   Clean up __page_s...
944
  	if (!exclusive)
288468c33   Andrea Arcangeli   rmap: always use ...
945
  		anon_vma = anon_vma->root;
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
946

9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
947
948
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
  	page->mapping = (struct address_space *) anon_vma;
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
949
  	page->index = linear_page_index(vma, address);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
950
951
952
  }
  
  /**
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
953
   * __page_check_anon_rmap - sanity check anonymous rmap addition
c97a9e10e   Nick Piggin   mm: more rmap che...
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   */
  static void __page_check_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
  #ifdef CONFIG_DEBUG_VM
  	/*
  	 * The page's anon-rmap details (mapping and index) are guaranteed to
  	 * be set up correctly at this point.
  	 *
  	 * We have exclusion against page_add_anon_rmap because the caller
  	 * always holds the page locked, except if called from page_dup_rmap,
  	 * in which case the page is already known to be setup.
  	 *
  	 * We have exclusion against page_add_new_anon_rmap because those pages
  	 * are initially only visible via the pagetables, and the pte is locked
  	 * over the call to page_add_new_anon_rmap.
  	 */
44ab57a06   Andrea Arcangeli   rmap: add anon_vm...
974
  	BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
c97a9e10e   Nick Piggin   mm: more rmap che...
975
976
977
978
979
  	BUG_ON(page->index != linear_page_index(vma, address));
  #endif
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
981
982
983
984
   * page_add_anon_rmap - add pte mapping to an anonymous page
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   *
5ad646880   Hugh Dickins   ksm: let shared p...
985
   * The caller needs to hold the pte lock, and the page must be locked in
80e148226   Hugh Dickins   ksm: share anon p...
986
987
988
   * the anon_vma case: to serialize mapping,index checking after setting,
   * and to ensure that PageAnon is not being upgraded racily to PageKsm
   * (but PageKsm is never downgraded to PageAnon).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
989
990
991
992
   */
  void page_add_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
ad8c2ee80   Rik van Riel   rmap: add exclusi...
993
994
995
996
997
998
999
1000
1001
1002
1003
  	do_page_add_anon_rmap(page, vma, address, 0);
  }
  
  /*
   * Special version of the above for do_swap_page, which often runs
   * into pages that are exclusively owned by the current process.
   * Everybody else should continue to use page_add_anon_rmap above.
   */
  void do_page_add_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address, int exclusive)
  {
5ad646880   Hugh Dickins   ksm: let shared p...
1004
  	int first = atomic_inc_and_test(&page->_mapcount);
79134171d   Andrea Arcangeli   thp: transparent ...
1005
  	if (first) {
bea04b073   Jianyu Zhan   mm: use the light...
1006
1007
1008
1009
1010
1011
  		/*
  		 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
  		 * these counters are not modified in interrupt context, and
  		 * pte lock(a spinlock) is held, which implies preemption
  		 * disabled.
  		 */
3cd14fcd3   Kirill A. Shutemov   thp: account anon...
1012
  		if (PageTransHuge(page))
79134171d   Andrea Arcangeli   thp: transparent ...
1013
1014
  			__inc_zone_page_state(page,
  					      NR_ANON_TRANSPARENT_HUGEPAGES);
3cd14fcd3   Kirill A. Shutemov   thp: account anon...
1015
1016
  		__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
  				hpage_nr_pages(page));
79134171d   Andrea Arcangeli   thp: transparent ...
1017
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
1018
1019
  	if (unlikely(PageKsm(page)))
  		return;
309381fea   Sasha Levin   mm: dump page whe...
1020
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
5dbe0af47   Hugh Dickins   mm: fix kernel BU...
1021
  	/* address might be in next vma when migration races vma_adjust */
5ad646880   Hugh Dickins   ksm: let shared p...
1022
  	if (first)
ad8c2ee80   Rik van Riel   rmap: add exclusi...
1023
  		__page_set_anon_rmap(page, vma, address, exclusive);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1024
  	else
c97a9e10e   Nick Piggin   mm: more rmap che...
1025
  		__page_check_anon_rmap(page, vma, address);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
  }
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
1027
  /**
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1028
1029
1030
1031
1032
1033
1034
   * page_add_new_anon_rmap - add pte mapping to a new anonymous page
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   *
   * Same as page_add_anon_rmap but must only be called on *new* pages.
   * This means the inc-and-test can be bypassed.
c97a9e10e   Nick Piggin   mm: more rmap che...
1035
   * Page does not have to be locked.
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1036
1037
1038
1039
   */
  void page_add_new_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
81d1b09c6   Sasha Levin   mm: convert a few...
1040
  	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
cbf84b7ad   Hugh Dickins   mm: further clean...
1041
1042
  	SetPageSwapBacked(page);
  	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
3cd14fcd3   Kirill A. Shutemov   thp: account anon...
1043
  	if (PageTransHuge(page))
79134171d   Andrea Arcangeli   thp: transparent ...
1044
  		__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
3cd14fcd3   Kirill A. Shutemov   thp: account anon...
1045
1046
  	__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
  			hpage_nr_pages(page));
e8a03feb5   Rik van Riel   rmap: add exclusi...
1047
  	__page_set_anon_rmap(page, vma, address, 1);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1048
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1049
1050
1051
1052
  /**
   * page_add_file_rmap - add pte mapping to a file page
   * @page: the page to add the mapping to
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
1053
   * The caller needs to hold the pte lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1054
1055
1056
   */
  void page_add_file_rmap(struct page *page)
  {
d7365e783   Johannes Weiner   mm: memcontrol: f...
1057
  	struct mem_cgroup *memcg;
89c06bd52   KAMEZAWA Hiroyuki   memcg: use new lo...
1058

6de226191   Johannes Weiner   mm: memcontrol: t...
1059
  	memcg = mem_cgroup_begin_page_stat(page);
d69b042f3   Balbir Singh   memcg: add file-b...
1060
  	if (atomic_inc_and_test(&page->_mapcount)) {
65ba55f50   Christoph Lameter   [PATCH] zoned vm ...
1061
  		__inc_zone_page_state(page, NR_FILE_MAPPED);
d7365e783   Johannes Weiner   mm: memcontrol: f...
1062
  		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
d69b042f3   Balbir Singh   memcg: add file-b...
1063
  	}
6de226191   Johannes Weiner   mm: memcontrol: t...
1064
  	mem_cgroup_end_page_stat(memcg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1065
  }
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1066
1067
1068
  static void page_remove_file_rmap(struct page *page)
  {
  	struct mem_cgroup *memcg;
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1069

6de226191   Johannes Weiner   mm: memcontrol: t...
1070
  	memcg = mem_cgroup_begin_page_stat(page);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
  
  	/* page still mapped by someone else? */
  	if (!atomic_add_negative(-1, &page->_mapcount))
  		goto out;
  
  	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
  	if (unlikely(PageHuge(page)))
  		goto out;
  
  	/*
  	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
  	 * these counters are not modified in interrupt context, and
  	 * pte lock(a spinlock) is held, which implies preemption disabled.
  	 */
  	__dec_zone_page_state(page, NR_FILE_MAPPED);
  	mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
  
  	if (unlikely(PageMlocked(page)))
  		clear_page_mlock(page);
  out:
6de226191   Johannes Weiner   mm: memcontrol: t...
1091
  	mem_cgroup_end_page_stat(memcg);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1092
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
1094
1095
1096
  /**
   * page_remove_rmap - take down pte mapping from a page
   * @page: page to remove mapping from
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
1097
   * The caller needs to hold the pte lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1098
   */
edc315fd2   Hugh Dickins   badpage: remove v...
1099
  void page_remove_rmap(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1100
  {
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1101
1102
1103
1104
  	if (!PageAnon(page)) {
  		page_remove_file_rmap(page);
  		return;
  	}
89c06bd52   KAMEZAWA Hiroyuki   memcg: use new lo...
1105

b904dcfed   KOSAKI Motohiro   mm: clean up page...
1106
1107
  	/* page still mapped by someone else? */
  	if (!atomic_add_negative(-1, &page->_mapcount))
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1108
1109
1110
1111
1112
  		return;
  
  	/* Hugepages are not counted in NR_ANON_PAGES for now. */
  	if (unlikely(PageHuge(page)))
  		return;
b904dcfed   KOSAKI Motohiro   mm: clean up page...
1113
1114
  
  	/*
bea04b073   Jianyu Zhan   mm: use the light...
1115
1116
  	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
  	 * these counters are not modified in interrupt context, and
bea04b073   Jianyu Zhan   mm: use the light...
1117
  	 * pte lock(a spinlock) is held, which implies preemption disabled.
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1118
  	 */
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1119
1120
1121
1122
1123
  	if (PageTransHuge(page))
  		__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
  
  	__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
  			      -hpage_nr_pages(page));
e6c509f85   Hugh Dickins   mm: use clear_pag...
1124
1125
  	if (unlikely(PageMlocked(page)))
  		clear_page_mlock(page);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1126

b904dcfed   KOSAKI Motohiro   mm: clean up page...
1127
1128
1129
1130
1131
1132
1133
1134
1135
  	/*
  	 * It would be tidy to reset the PageAnon mapping here,
  	 * but that might overwrite a racing page_add_anon_rmap
  	 * which increments mapcount after us but sets mapping
  	 * before us: so leave the reset to free_hot_cold_page,
  	 * and remember that it's only reliable while mapped.
  	 * Leaving it set also helps swapoff to reinstate ptes
  	 * faster for those pages still in swapcache.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1136
1137
1138
  }
  
  /*
526295064   Joonsoo Kim   mm/rmap: use rmap...
1139
   * @arg: enum ttu_flags will be passed to this argument
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
   */
ac7695012   Kirill A. Shutemov   mm/rmap.c: make p...
1141
  static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
526295064   Joonsoo Kim   mm/rmap: use rmap...
1142
  		     unsigned long address, void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
1144
  {
  	struct mm_struct *mm = vma->vm_mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1145
1146
  	pte_t *pte;
  	pte_t pteval;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
1147
  	spinlock_t *ptl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1148
  	int ret = SWAP_AGAIN;
526295064   Joonsoo Kim   mm/rmap: use rmap...
1149
  	enum ttu_flags flags = (enum ttu_flags)arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150

479db0bf4   Nick Piggin   mm: dirty page tr...
1151
  	pte = page_check_address(page, mm, address, &ptl, 0);
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
1152
  	if (!pte)
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
1153
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
1155
1156
1157
1158
1159
  
  	/*
  	 * If the page is mlock()d, we cannot swap it out.
  	 * If it's recently referenced (perhaps page_referenced
  	 * skipped over this mm) then we should reactivate it.
  	 */
14fa31b89   Andi Kleen   HWPOISON: Use bit...
1160
  	if (!(flags & TTU_IGNORE_MLOCK)) {
caed0f486   KOSAKI Motohiro   mm: simplify try_...
1161
1162
  		if (vma->vm_flags & VM_LOCKED)
  			goto out_mlock;
daa5ba768   Konstantin Khlebnikov   mm/rmap.c: cleanu...
1163
  		if (flags & TTU_MUNLOCK)
53f79acb6   Hugh Dickins   mm: mlocking in t...
1164
  			goto out_unmap;
14fa31b89   Andi Kleen   HWPOISON: Use bit...
1165
1166
  	}
  	if (!(flags & TTU_IGNORE_ACCESS)) {
b291f0003   Nick Piggin   mlock: mlocked pa...
1167
1168
1169
1170
1171
  		if (ptep_clear_flush_young_notify(vma, address, pte)) {
  			ret = SWAP_FAIL;
  			goto out_unmap;
  		}
    	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
1174
  	/* Nuke the page table entry. */
  	flush_cache_page(vma, address, page_to_pfn(page));
2ec74c3ef   Sagi Grimberg   mm: move all mmu ...
1175
  	pteval = ptep_clear_flush(vma, address, pte);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1176
1177
1178
1179
  
  	/* Move the dirty bit to the physical page now the pte is gone. */
  	if (pte_dirty(pteval))
  		set_page_dirty(page);
365e9c87a   Hugh Dickins   [PATCH] mm: updat...
1180
1181
  	/* Update high watermark before we lower rss */
  	update_hiwater_rss(mm);
888b9f7c5   Andi Kleen   HWPOISON: Handle ...
1182
  	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
5f24ae585   Naoya Horiguchi   hwpoison, hugetlb...
1183
1184
1185
1186
1187
1188
  		if (!PageHuge(page)) {
  			if (PageAnon(page))
  				dec_mm_counter(mm, MM_ANONPAGES);
  			else
  				dec_mm_counter(mm, MM_FILEPAGES);
  		}
888b9f7c5   Andi Kleen   HWPOISON: Handle ...
1189
  		set_pte_at(mm, address, pte,
5f24ae585   Naoya Horiguchi   hwpoison, hugetlb...
1190
  			   swp_entry_to_pte(make_hwpoison_entry(page)));
45961722f   Konstantin Weitz   mm: add support f...
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
  	} else if (pte_unused(pteval)) {
  		/*
  		 * The guest indicated that the page content is of no
  		 * interest anymore. Simply discard the pte, vmscan
  		 * will take care of the rest.
  		 */
  		if (PageAnon(page))
  			dec_mm_counter(mm, MM_ANONPAGES);
  		else
  			dec_mm_counter(mm, MM_FILEPAGES);
888b9f7c5   Andi Kleen   HWPOISON: Handle ...
1201
  	} else if (PageAnon(page)) {
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
1202
  		swp_entry_t entry = { .val = page_private(page) };
179ef71cb   Cyrill Gorcunov   mm: save soft-dir...
1203
  		pte_t swp_pte;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
1204
1205
1206
1207
1208
1209
  
  		if (PageSwapCache(page)) {
  			/*
  			 * Store the swap location in the pte.
  			 * See handle_pte_fault() ...
  			 */
570a335b8   Hugh Dickins   swap_info: swap c...
1210
1211
1212
1213
1214
  			if (swap_duplicate(entry) < 0) {
  				set_pte_at(mm, address, pte, pteval);
  				ret = SWAP_FAIL;
  				goto out_unmap;
  			}
0697212a4   Christoph Lameter   [PATCH] Swapless ...
1215
1216
1217
1218
1219
1220
  			if (list_empty(&mm->mmlist)) {
  				spin_lock(&mmlist_lock);
  				if (list_empty(&mm->mmlist))
  					list_add(&mm->mmlist, &init_mm.mmlist);
  				spin_unlock(&mmlist_lock);
  			}
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
1221
  			dec_mm_counter(mm, MM_ANONPAGES);
b084d4353   KAMEZAWA Hiroyuki   mm: count swap usage
1222
  			inc_mm_counter(mm, MM_SWAPENTS);
ce1744f4e   Konstantin Khlebnikov   mm: replace PAGE_...
1223
  		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
0697212a4   Christoph Lameter   [PATCH] Swapless ...
1224
1225
1226
1227
1228
  			/*
  			 * Store the pfn of the page in a special migration
  			 * pte. do_swap_page() will wait until the migration
  			 * pte is removed and then restart fault handling.
  			 */
daa5ba768   Konstantin Khlebnikov   mm/rmap.c: cleanu...
1229
  			BUG_ON(!(flags & TTU_MIGRATION));
0697212a4   Christoph Lameter   [PATCH] Swapless ...
1230
  			entry = make_migration_entry(page, pte_write(pteval));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
  		}
179ef71cb   Cyrill Gorcunov   mm: save soft-dir...
1232
1233
1234
1235
  		swp_pte = swp_entry_to_pte(entry);
  		if (pte_soft_dirty(pteval))
  			swp_pte = pte_swp_mksoft_dirty(swp_pte);
  		set_pte_at(mm, address, pte, swp_pte);
ce1744f4e   Konstantin Khlebnikov   mm: replace PAGE_...
1236
  	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
daa5ba768   Konstantin Khlebnikov   mm/rmap.c: cleanu...
1237
  		   (flags & TTU_MIGRATION)) {
04e62a29b   Christoph Lameter   [PATCH] More page...
1238
1239
1240
1241
1242
  		/* Establish migration entry for a file page */
  		swp_entry_t entry;
  		entry = make_migration_entry(page, pte_write(pteval));
  		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
  	} else
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
1243
  		dec_mm_counter(mm, MM_FILEPAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244

edc315fd2   Hugh Dickins   badpage: remove v...
1245
  	page_remove_rmap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246
1247
1248
  	page_cache_release(page);
  
  out_unmap:
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
1249
  	pte_unmap_unlock(pte, ptl);
daa5ba768   Konstantin Khlebnikov   mm/rmap.c: cleanu...
1250
  	if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK))
2ec74c3ef   Sagi Grimberg   mm: move all mmu ...
1251
  		mmu_notifier_invalidate_page(mm, address);
caed0f486   KOSAKI Motohiro   mm: simplify try_...
1252
1253
  out:
  	return ret;
53f79acb6   Hugh Dickins   mm: mlocking in t...
1254

caed0f486   KOSAKI Motohiro   mm: simplify try_...
1255
1256
1257
1258
1259
1260
1261
  out_mlock:
  	pte_unmap_unlock(pte, ptl);
  
  
  	/*
  	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
  	 * unstable result and race. Plus, We can't wait here because
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
1262
  	 * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem.
caed0f486   KOSAKI Motohiro   mm: simplify try_...
1263
1264
1265
1266
1267
1268
1269
1270
  	 * if trylock failed, the page remain in evictable lru and later
  	 * vmscan could retry to move the page to unevictable lru if the
  	 * page is actually mlocked.
  	 */
  	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
  		if (vma->vm_flags & VM_LOCKED) {
  			mlock_vma_page(page);
  			ret = SWAP_MLOCK;
53f79acb6   Hugh Dickins   mm: mlocking in t...
1271
  		}
caed0f486   KOSAKI Motohiro   mm: simplify try_...
1272
  		up_read(&vma->vm_mm->mmap_sem);
53f79acb6   Hugh Dickins   mm: mlocking in t...
1273
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274
1275
  	return ret;
  }
71e3aac07   Andrea Arcangeli   thp: transparent ...
1276
  bool is_vma_temporary_stack(struct vm_area_struct *vma)
a8bef8ff6   Mel Gorman   mm: migration: av...
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
  {
  	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
  
  	if (!maybe_stack)
  		return false;
  
  	if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
  						VM_STACK_INCOMPLETE_SETUP)
  		return true;
  
  	return false;
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1289
1290
1291
1292
  static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
  {
  	return is_vma_temporary_stack(vma);
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1293
1294
1295
1296
  static int page_not_mapped(struct page *page)
  {
  	return !page_mapped(page);
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
1298
1299
  /**
   * try_to_unmap - try to remove all page table mappings to a page
   * @page: the page to get unmapped
14fa31b89   Andi Kleen   HWPOISON: Use bit...
1300
   * @flags: action and flags
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
1303
1304
1305
1306
1307
1308
   *
   * Tries to remove all the page table entries which are mapping this
   * page, used in the pageout path.  Caller must hold the page lock.
   * Return values are:
   *
   * SWAP_SUCCESS	- we succeeded in removing all mappings
   * SWAP_AGAIN	- we missed a mapping, try again later
   * SWAP_FAIL	- the page is unswappable
b291f0003   Nick Piggin   mlock: mlocked pa...
1309
   * SWAP_MLOCK	- page is mlocked.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
   */
14fa31b89   Andi Kleen   HWPOISON: Use bit...
1311
  int try_to_unmap(struct page *page, enum ttu_flags flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312
1313
  {
  	int ret;
526295064   Joonsoo Kim   mm/rmap: use rmap...
1314
1315
1316
1317
  	struct rmap_walk_control rwc = {
  		.rmap_one = try_to_unmap_one,
  		.arg = (void *)flags,
  		.done = page_not_mapped,
526295064   Joonsoo Kim   mm/rmap: use rmap...
1318
1319
  		.anon_lock = page_lock_anon_vma_read,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320

309381fea   Sasha Levin   mm: dump page whe...
1321
  	VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1322

526295064   Joonsoo Kim   mm/rmap: use rmap...
1323
1324
1325
1326
1327
1328
1329
1330
  	/*
  	 * During exec, a temporary VMA is setup and later moved.
  	 * The VMA is moved under the anon_vma lock but not the
  	 * page tables leading to a race where migration cannot
  	 * find the migration ptes. Rather than increasing the
  	 * locking requirements of exec(), migration skips
  	 * temporary VMAs until after exec() completes.
  	 */
daa5ba768   Konstantin Khlebnikov   mm/rmap.c: cleanu...
1331
  	if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page))
526295064   Joonsoo Kim   mm/rmap: use rmap...
1332
1333
1334
  		rwc.invalid_vma = invalid_migration_vma;
  
  	ret = rmap_walk(page, &rwc);
b291f0003   Nick Piggin   mlock: mlocked pa...
1335
  	if (ret != SWAP_MLOCK && !page_mapped(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336
1337
1338
  		ret = SWAP_SUCCESS;
  	return ret;
  }
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
1339

b291f0003   Nick Piggin   mlock: mlocked pa...
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
  /**
   * try_to_munlock - try to munlock a page
   * @page: the page to be munlocked
   *
   * Called from munlock code.  Checks all of the VMAs mapping the page
   * to make sure nobody else has this page mlocked. The page will be
   * returned with PG_mlocked cleared if no other vmas have it mlocked.
   *
   * Return values are:
   *
53f79acb6   Hugh Dickins   mm: mlocking in t...
1350
   * SWAP_AGAIN	- no vma is holding page mlocked, or,
b291f0003   Nick Piggin   mlock: mlocked pa...
1351
   * SWAP_AGAIN	- page mapped in mlocked vma -- couldn't acquire mmap sem
5ad646880   Hugh Dickins   ksm: let shared p...
1352
   * SWAP_FAIL	- page cannot be located at present
b291f0003   Nick Piggin   mlock: mlocked pa...
1353
1354
1355
1356
   * SWAP_MLOCK	- page is now mlocked.
   */
  int try_to_munlock(struct page *page)
  {
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1357
1358
1359
1360
1361
  	int ret;
  	struct rmap_walk_control rwc = {
  		.rmap_one = try_to_unmap_one,
  		.arg = (void *)TTU_MUNLOCK,
  		.done = page_not_mapped,
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1362
1363
1364
  		.anon_lock = page_lock_anon_vma_read,
  
  	};
309381fea   Sasha Levin   mm: dump page whe...
1365
  	VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
b291f0003   Nick Piggin   mlock: mlocked pa...
1366

e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1367
1368
  	ret = rmap_walk(page, &rwc);
  	return ret;
b291f0003   Nick Piggin   mlock: mlocked pa...
1369
  }
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1370

01d8b20de   Peter Zijlstra   mm: simplify anon...
1371
  void __put_anon_vma(struct anon_vma *anon_vma)
76545066c   Rik van Riel   mm: extend KSM re...
1372
  {
01d8b20de   Peter Zijlstra   mm: simplify anon...
1373
  	struct anon_vma *root = anon_vma->root;
76545066c   Rik van Riel   mm: extend KSM re...
1374

624483f3e   Andrey Ryabinin   mm: rmap: fix use...
1375
  	anon_vma_free(anon_vma);
01d8b20de   Peter Zijlstra   mm: simplify anon...
1376
1377
  	if (root != anon_vma && atomic_dec_and_test(&root->refcount))
  		anon_vma_free(root);
76545066c   Rik van Riel   mm: extend KSM re...
1378
  }
76545066c   Rik van Riel   mm: extend KSM re...
1379

0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1380
1381
  static struct anon_vma *rmap_walk_anon_lock(struct page *page,
  					struct rmap_walk_control *rwc)
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1382
1383
  {
  	struct anon_vma *anon_vma;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1384
1385
  	if (rwc->anon_lock)
  		return rwc->anon_lock(page);
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
  	/*
  	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
  	 * because that depends on page_mapped(); but not all its usages
  	 * are holding mmap_sem. Users without mmap_sem are required to
  	 * take a reference count to prevent the anon_vma disappearing
  	 */
  	anon_vma = page_anon_vma(page);
  	if (!anon_vma)
  		return NULL;
  
  	anon_vma_lock_read(anon_vma);
  	return anon_vma;
  }
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1399
  /*
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
   * rmap_walk_anon - do something to anonymous page using the object-based
   * rmap method
   * @page: the page to be handled
   * @rwc: control variable according to each walk type
   *
   * Find all the mappings of a page using the mapping pointer and the vma chains
   * contained in the anon_vma struct it points to.
   *
   * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
   * where the page was found will be held for write.  So, we won't recheck
   * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
   * LOCKED.
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1412
   */
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1413
  static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1414
1415
  {
  	struct anon_vma *anon_vma;
b258d8606   Davidlohr Bueso   mm/rmap: calculat...
1416
  	pgoff_t pgoff;
5beb49305   Rik van Riel   mm: change anon_v...
1417
  	struct anon_vma_chain *avc;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1418
  	int ret = SWAP_AGAIN;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1419
  	anon_vma = rmap_walk_anon_lock(page, rwc);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1420
1421
  	if (!anon_vma)
  		return ret;
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1422

b258d8606   Davidlohr Bueso   mm/rmap: calculat...
1423
  	pgoff = page_to_pgoff(page);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
1424
  	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
5beb49305   Rik van Riel   mm: change anon_v...
1425
  		struct vm_area_struct *vma = avc->vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1426
  		unsigned long address = vma_address(page, vma);
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1427
1428
1429
  
  		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  			continue;
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1430
  		ret = rwc->rmap_one(page, vma, address, rwc->arg);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1431
1432
  		if (ret != SWAP_AGAIN)
  			break;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1433
1434
  		if (rwc->done && rwc->done(page))
  			break;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1435
  	}
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
1436
  	anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1437
1438
  	return ret;
  }
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
  /*
   * rmap_walk_file - do something to file page using the object-based rmap method
   * @page: the page to be handled
   * @rwc: control variable according to each walk type
   *
   * Find all the mappings of a page using the mapping pointer and the vma chains
   * contained in the address_space struct it points to.
   *
   * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
   * where the page was found will be held for write.  So, we won't recheck
   * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
   * LOCKED.
   */
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1452
  static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1453
1454
  {
  	struct address_space *mapping = page->mapping;
b258d8606   Davidlohr Bueso   mm/rmap: calculat...
1455
  	pgoff_t pgoff;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1456
  	struct vm_area_struct *vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1457
  	int ret = SWAP_AGAIN;
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1458
1459
1460
1461
  	/*
  	 * The page lock not only makes sure that page->mapping cannot
  	 * suddenly be NULLified by truncation, it makes sure that the
  	 * structure at mapping cannot be freed and reused yet,
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
1462
  	 * so we can safely take mapping->i_mmap_rwsem.
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1463
  	 */
81d1b09c6   Sasha Levin   mm: convert a few...
1464
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1465

e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1466
1467
  	if (!mapping)
  		return ret;
3dec0ba0b   Davidlohr Bueso   mm/rmap: share th...
1468

b258d8606   Davidlohr Bueso   mm/rmap: calculat...
1469
  	pgoff = page_to_pgoff(page);
3dec0ba0b   Davidlohr Bueso   mm/rmap: share th...
1470
  	i_mmap_lock_read(mapping);
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
1471
  	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1472
  		unsigned long address = vma_address(page, vma);
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1473
1474
1475
  
  		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  			continue;
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1476
  		ret = rwc->rmap_one(page, vma, address, rwc->arg);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1477
  		if (ret != SWAP_AGAIN)
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1478
1479
1480
  			goto done;
  		if (rwc->done && rwc->done(page))
  			goto done;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1481
  	}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1482

0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1483
  done:
3dec0ba0b   Davidlohr Bueso   mm/rmap: share th...
1484
  	i_mmap_unlock_read(mapping);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1485
1486
  	return ret;
  }
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1487
  int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1488
  {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1489
  	if (unlikely(PageKsm(page)))
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1490
  		return rmap_walk_ksm(page, rwc);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1491
  	else if (PageAnon(page))
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1492
  		return rmap_walk_anon(page, rwc);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1493
  	else
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1494
  		return rmap_walk_file(page, rwc);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1495
  }
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1496

e3390f67a   Naoya Horiguchi   hwpoison: rename ...
1497
  #ifdef CONFIG_HUGETLB_PAGE
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1498
1499
1500
1501
1502
1503
1504
1505
1506
  /*
   * The following three functions are for anonymous (private mapped) hugepages.
   * Unlike common anonymous pages, anonymous hugepages have no accounting code
   * and no lru code, because we handle hugepages differently from common pages.
   */
  static void __hugepage_set_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address, int exclusive)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
433abed6c   Naoya Horiguchi   hugetlb, rmap: al...
1507

0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1508
  	BUG_ON(!anon_vma);
433abed6c   Naoya Horiguchi   hugetlb, rmap: al...
1509
1510
1511
1512
1513
  
  	if (PageAnon(page))
  		return;
  	if (!exclusive)
  		anon_vma = anon_vma->root;
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
  	page->mapping = (struct address_space *) anon_vma;
  	page->index = linear_page_index(vma, address);
  }
  
  void hugepage_add_anon_rmap(struct page *page,
  			    struct vm_area_struct *vma, unsigned long address)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
  	int first;
a850ea303   Naoya Horiguchi   hugetlb, rmap: ad...
1524
1525
  
  	BUG_ON(!PageLocked(page));
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1526
  	BUG_ON(!anon_vma);
5dbe0af47   Hugh Dickins   mm: fix kernel BU...
1527
  	/* address might be in next vma when migration races vma_adjust */
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
  	first = atomic_inc_and_test(&page->_mapcount);
  	if (first)
  		__hugepage_set_anon_rmap(page, vma, address, 0);
  }
  
  void hugepage_add_new_anon_rmap(struct page *page,
  			struct vm_area_struct *vma, unsigned long address)
  {
  	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
  	atomic_set(&page->_mapcount, 0);
  	__hugepage_set_anon_rmap(page, vma, address, 1);
  }
e3390f67a   Naoya Horiguchi   hwpoison: rename ...
1540
  #endif /* CONFIG_HUGETLB_PAGE */