Blame view

mm/rmap.c 55.3 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  /*
   * mm/rmap.c - physical to virtual reverse mappings
   *
   * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
   * Released under the General Public License (GPL).
   *
   * Simple, low overhead reverse mapping scheme.
   * Please try to keep this thing as modular as possible.
   *
   * Provides methods for unmapping each kind of mapped page:
   * the anon methods track anonymous pages, and
   * the file methods track pages belonging to an inode.
   *
   * Original design by Rik van Riel <riel@conectiva.com.br> 2001
   * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
   * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
98f32602d   Hugh Dickins   hugh: update emai...
17
   * Contributions by Hugh Dickins 2003, 2004
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
21
22
   */
  
  /*
   * Lock ordering in mm:
   *
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
23
   * inode->i_mutex	(while writing or truncating, not reading or faulting)
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
24
   *   mm->mmap_lock
c0d0381ad   Mike Kravetz   hugetlbfs: use i_...
25
   *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
88f306b68   Kirill A. Shutemov   mm: fix locking o...
26
27
   *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
   *         mapping->i_mmap_rwsem
c0d0381ad   Mike Kravetz   hugetlbfs: use i_...
28
   *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
88f306b68   Kirill A. Shutemov   mm: fix locking o...
29
30
   *           anon_vma->rwsem
   *             mm->page_table_lock or pte_lock
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
31
   *               pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
88f306b68   Kirill A. Shutemov   mm: fix locking o...
32
33
34
35
   *               swap_lock (in swap_duplicate, swap_info_get)
   *                 mmlist_lock (in mmput, drain_mmlist and others)
   *                 mapping->private_lock (in __set_page_dirty_buffers)
   *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
b93b01631   Matthew Wilcox   page cache: use x...
36
   *                     i_pages lock (widely used)
88f306b68   Kirill A. Shutemov   mm: fix locking o...
37
38
39
   *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
   *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
   *                   sb_lock (within inode_lock in fs/fs-writeback.c)
b93b01631   Matthew Wilcox   page cache: use x...
40
   *                   i_pages lock (widely used, in set_page_dirty,
88f306b68   Kirill A. Shutemov   mm: fix locking o...
41
42
   *                             in arch-dependent flush_dcache_mmap_lock,
   *                             within bdi.wb->list_lock in __sync_single_inode)
6a46079cf   Andi Kleen   HWPOISON: The hig...
43
   *
5a505085f   Ingo Molnar   mm/rmap: Convert ...
44
   * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
9b679320a   Peter Zijlstra   mm/memory-failure...
45
   *   ->tasklist_lock
6a46079cf   Andi Kleen   HWPOISON: The hig...
46
   *     pte map lock
c0d0381ad   Mike Kravetz   hugetlbfs: use i_...
47
48
49
50
51
   *
   * * hugetlbfs PageHuge() pages take locks in this order:
   *         mapping->i_mmap_rwsem
   *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
   *             page->flags PG_locked (lock_page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
54
   */
  
  #include <linux/mm.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
55
  #include <linux/sched/mm.h>
299300258   Ingo Molnar   sched/headers: Pr...
56
  #include <linux/sched/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
58
59
60
61
  #include <linux/pagemap.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/slab.h>
  #include <linux/init.h>
5ad646880   Hugh Dickins   ksm: let shared p...
62
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
64
  #include <linux/rmap.h>
  #include <linux/rcupdate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
65
  #include <linux/export.h>
8a9f3ccd2   Balbir Singh   Memory controller...
66
  #include <linux/memcontrol.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
67
  #include <linux/mmu_notifier.h>
64cdd548f   KOSAKI Motohiro   mm: cleanup: remo...
68
  #include <linux/migrate.h>
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
69
  #include <linux/hugetlb.h>
444f84fd2   Ben Dooks   mm: include <linu...
70
  #include <linux/huge_mm.h>
ef5d437f7   Jan Kara   mm: fix XFS oops ...
71
  #include <linux/backing-dev.h>
33c3fc71c   Vladimir Davydov   mm: introduce idl...
72
  #include <linux/page_idle.h>
a5430dda8   Jérôme Glisse   mm/migrate: suppo...
73
  #include <linux/memremap.h>
bce73e484   Christian Borntraeger   mm: do not drop u...
74
  #include <linux/userfaultfd_k.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
  
  #include <asm/tlbflush.h>
72b252aed   Mel Gorman   mm: send one IPI ...
77
  #include <trace/events/tlb.h>
b291f0003   Nick Piggin   mlock: mlocked pa...
78
  #include "internal.h"
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
79
  static struct kmem_cache *anon_vma_cachep;
5beb49305   Rik van Riel   mm: change anon_v...
80
  static struct kmem_cache *anon_vma_chain_cachep;
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
81
82
83
  
  static inline struct anon_vma *anon_vma_alloc(void)
  {
01d8b20de   Peter Zijlstra   mm: simplify anon...
84
85
86
87
88
  	struct anon_vma *anon_vma;
  
  	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
  	if (anon_vma) {
  		atomic_set(&anon_vma->refcount, 1);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
89
90
  		anon_vma->degree = 1;	/* Reference for first vma */
  		anon_vma->parent = anon_vma;
01d8b20de   Peter Zijlstra   mm: simplify anon...
91
92
93
94
95
96
97
98
  		/*
  		 * Initialise the anon_vma root to point to itself. If called
  		 * from fork, the root will be reset to the parents anon_vma.
  		 */
  		anon_vma->root = anon_vma;
  	}
  
  	return anon_vma;
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
99
  }
01d8b20de   Peter Zijlstra   mm: simplify anon...
100
  static inline void anon_vma_free(struct anon_vma *anon_vma)
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
101
  {
01d8b20de   Peter Zijlstra   mm: simplify anon...
102
  	VM_BUG_ON(atomic_read(&anon_vma->refcount));
88c22088b   Peter Zijlstra   mm: optimize page...
103
104
  
  	/*
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
105
  	 * Synchronize against page_lock_anon_vma_read() such that
88c22088b   Peter Zijlstra   mm: optimize page...
106
107
108
109
110
  	 * we can safely hold the lock without the anon_vma getting
  	 * freed.
  	 *
  	 * Relies on the full mb implied by the atomic_dec_and_test() from
  	 * put_anon_vma() against the acquire barrier implied by
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
111
  	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
88c22088b   Peter Zijlstra   mm: optimize page...
112
  	 *
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
113
114
  	 * page_lock_anon_vma_read()	VS	put_anon_vma()
  	 *   down_read_trylock()		  atomic_dec_and_test()
88c22088b   Peter Zijlstra   mm: optimize page...
115
  	 *   LOCK				  MB
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
116
  	 *   atomic_read()			  rwsem_is_locked()
88c22088b   Peter Zijlstra   mm: optimize page...
117
118
119
120
  	 *
  	 * LOCK should suffice since the actual taking of the lock must
  	 * happen _before_ what follows.
  	 */
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
121
  	might_sleep();
5a505085f   Ingo Molnar   mm/rmap: Convert ...
122
  	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
123
  		anon_vma_lock_write(anon_vma);
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
124
  		anon_vma_unlock_write(anon_vma);
88c22088b   Peter Zijlstra   mm: optimize page...
125
  	}
fdd2e5f88   Adrian Bunk   make mm/rmap.c:an...
126
127
  	kmem_cache_free(anon_vma_cachep, anon_vma);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128

dd34739c0   Linus Torvalds   mm: avoid anon_vm...
129
  static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
5beb49305   Rik van Riel   mm: change anon_v...
130
  {
dd34739c0   Linus Torvalds   mm: avoid anon_vm...
131
  	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
5beb49305   Rik van Riel   mm: change anon_v...
132
  }
e574b5fd2   Namhyung Kim   rmap: make anon_v...
133
  static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
5beb49305   Rik van Riel   mm: change anon_v...
134
135
136
  {
  	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
  }
6583a8430   Kautuk Consul   rmap: anon_vma_pr...
137
138
139
140
141
142
143
  static void anon_vma_chain_link(struct vm_area_struct *vma,
  				struct anon_vma_chain *avc,
  				struct anon_vma *anon_vma)
  {
  	avc->vma = vma;
  	avc->anon_vma = anon_vma;
  	list_add(&avc->same_vma, &vma->anon_vma_chain);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
144
  	anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
6583a8430   Kautuk Consul   rmap: anon_vma_pr...
145
  }
d9d332e08   Linus Torvalds   anon_vma_prepare:...
146
  /**
d5a187daf   Vlastimil Babka   mm, rmap: handle ...
147
   * __anon_vma_prepare - attach an anon_vma to a memory region
d9d332e08   Linus Torvalds   anon_vma_prepare:...
148
149
150
151
152
153
   * @vma: the memory region in question
   *
   * This makes sure the memory mapping described by 'vma' has
   * an 'anon_vma' attached to it, so that we can associate the
   * anonymous pages mapped into it with that anon_vma.
   *
d5a187daf   Vlastimil Babka   mm, rmap: handle ...
154
155
   * The common case will be that we already have one, which
   * is handled inline by anon_vma_prepare(). But if
23a0790af   Figo.zhang   mm/rmap.c: fix co...
156
   * not we either need to find an adjacent mapping that we
d9d332e08   Linus Torvalds   anon_vma_prepare:...
157
158
159
160
161
   * can re-use the anon_vma from (very common when the only
   * reason for splitting a vma has been mprotect()), or we
   * allocate a new one.
   *
   * Anon-vma allocations are very subtle, because we may have
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
162
   * optimistically looked up an anon_vma in page_lock_anon_vma_read()
d9d332e08   Linus Torvalds   anon_vma_prepare:...
163
164
165
166
167
168
169
170
171
   * and that may actually touch the spinlock even in the newly
   * allocated vma (it depends on RCU to make sure that the
   * anon_vma isn't actually destroyed).
   *
   * As a result, we need to do proper anon_vma locking even
   * for the new allocation. At the same time, we do not want
   * to do any locking for the common case of already having
   * an anon_vma.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
172
   * This must be called with the mmap_lock held for reading.
d9d332e08   Linus Torvalds   anon_vma_prepare:...
173
   */
d5a187daf   Vlastimil Babka   mm, rmap: handle ...
174
  int __anon_vma_prepare(struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
  {
d5a187daf   Vlastimil Babka   mm, rmap: handle ...
176
177
  	struct mm_struct *mm = vma->vm_mm;
  	struct anon_vma *anon_vma, *allocated;
5beb49305   Rik van Riel   mm: change anon_v...
178
  	struct anon_vma_chain *avc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
180
  
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181

d5a187daf   Vlastimil Babka   mm, rmap: handle ...
182
183
184
185
186
187
188
189
190
191
192
193
  	avc = anon_vma_chain_alloc(GFP_KERNEL);
  	if (!avc)
  		goto out_enomem;
  
  	anon_vma = find_mergeable_anon_vma(vma);
  	allocated = NULL;
  	if (!anon_vma) {
  		anon_vma = anon_vma_alloc();
  		if (unlikely(!anon_vma))
  			goto out_enomem_free_avc;
  		allocated = anon_vma;
  	}
5beb49305   Rik van Riel   mm: change anon_v...
194

d5a187daf   Vlastimil Babka   mm, rmap: handle ...
195
196
197
198
199
200
201
202
  	anon_vma_lock_write(anon_vma);
  	/* page_table_lock to protect against threads */
  	spin_lock(&mm->page_table_lock);
  	if (likely(!vma->anon_vma)) {
  		vma->anon_vma = anon_vma;
  		anon_vma_chain_link(vma, avc, anon_vma);
  		/* vma reference or self-parent link for new root */
  		anon_vma->degree++;
d9d332e08   Linus Torvalds   anon_vma_prepare:...
203
  		allocated = NULL;
d5a187daf   Vlastimil Babka   mm, rmap: handle ...
204
205
206
207
  		avc = NULL;
  	}
  	spin_unlock(&mm->page_table_lock);
  	anon_vma_unlock_write(anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208

d5a187daf   Vlastimil Babka   mm, rmap: handle ...
209
210
211
212
  	if (unlikely(allocated))
  		put_anon_vma(allocated);
  	if (unlikely(avc))
  		anon_vma_chain_free(avc);
31f2b0ebc   Oleg Nesterov   rmap: anon_vma_pr...
213

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
  	return 0;
5beb49305   Rik van Riel   mm: change anon_v...
215
216
217
218
219
  
   out_enomem_free_avc:
  	anon_vma_chain_free(avc);
   out_enomem:
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
  }
bb4aa3967   Linus Torvalds   mm: avoid repeate...
221
222
223
224
225
226
227
228
229
230
231
232
233
  /*
   * This is a useful helper function for locking the anon_vma root as
   * we traverse the vma->anon_vma_chain, looping over anon_vma's that
   * have the same vma.
   *
   * Such anon_vma's should have the same root, so you'd expect to see
   * just a single mutex_lock for the whole traversal.
   */
  static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
  {
  	struct anon_vma *new_root = anon_vma->root;
  	if (new_root != root) {
  		if (WARN_ON_ONCE(root))
5a505085f   Ingo Molnar   mm/rmap: Convert ...
234
  			up_write(&root->rwsem);
bb4aa3967   Linus Torvalds   mm: avoid repeate...
235
  		root = new_root;
5a505085f   Ingo Molnar   mm/rmap: Convert ...
236
  		down_write(&root->rwsem);
bb4aa3967   Linus Torvalds   mm: avoid repeate...
237
238
239
240
241
242
243
  	}
  	return root;
  }
  
  static inline void unlock_anon_vma_root(struct anon_vma *root)
  {
  	if (root)
5a505085f   Ingo Molnar   mm/rmap: Convert ...
244
  		up_write(&root->rwsem);
bb4aa3967   Linus Torvalds   mm: avoid repeate...
245
  }
5beb49305   Rik van Riel   mm: change anon_v...
246
247
248
  /*
   * Attach the anon_vmas from src to dst.
   * Returns 0 on success, -ENOMEM on failure.
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
249
   *
47b390d23   Wei Yang   mm/rmap.c: don't ...
250
251
252
253
254
255
256
257
258
259
260
261
262
   * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
   * anon_vma_fork(). The first three want an exact copy of src, while the last
   * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
   * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
   * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
   *
   * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
   * and reuse existing anon_vma which has no vmas and only one child anon_vma.
   * This prevents degradation of anon_vma hierarchy to endless linear chain in
   * case of constantly forking task. On the other hand, an anon_vma with more
   * than one child isn't reused even if there was no alive vma, thus rmap
   * walker has a good chance of avoiding scanning the whole hierarchy when it
   * searches where page is mapped.
5beb49305   Rik van Riel   mm: change anon_v...
263
264
   */
  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
  {
5beb49305   Rik van Riel   mm: change anon_v...
266
  	struct anon_vma_chain *avc, *pavc;
bb4aa3967   Linus Torvalds   mm: avoid repeate...
267
  	struct anon_vma *root = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
268

646d87b48   Linus Torvalds   anon_vma: clone t...
269
  	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
bb4aa3967   Linus Torvalds   mm: avoid repeate...
270
  		struct anon_vma *anon_vma;
dd34739c0   Linus Torvalds   mm: avoid anon_vm...
271
272
273
274
275
276
277
278
  		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
  		if (unlikely(!avc)) {
  			unlock_anon_vma_root(root);
  			root = NULL;
  			avc = anon_vma_chain_alloc(GFP_KERNEL);
  			if (!avc)
  				goto enomem_failure;
  		}
bb4aa3967   Linus Torvalds   mm: avoid repeate...
279
280
281
  		anon_vma = pavc->anon_vma;
  		root = lock_anon_vma_root(root, anon_vma);
  		anon_vma_chain_link(dst, avc, anon_vma);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
282
283
284
285
286
287
288
289
290
  
  		/*
  		 * Reuse existing anon_vma if its degree lower than two,
  		 * that means it has no vma and only one anon_vma child.
  		 *
  		 * Do not chose parent anon_vma, otherwise first child
  		 * will always reuse it. Root anon_vma is never reused:
  		 * it has self-parent reference and at least one child.
  		 */
47b390d23   Wei Yang   mm/rmap.c: don't ...
291
292
  		if (!dst->anon_vma && src->anon_vma &&
  		    anon_vma != src->anon_vma && anon_vma->degree < 2)
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
293
  			dst->anon_vma = anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
294
  	}
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
295
296
  	if (dst->anon_vma)
  		dst->anon_vma->degree++;
bb4aa3967   Linus Torvalds   mm: avoid repeate...
297
  	unlock_anon_vma_root(root);
5beb49305   Rik van Riel   mm: change anon_v...
298
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299

5beb49305   Rik van Riel   mm: change anon_v...
300
   enomem_failure:
3fe89b3e2   Leon Yu   mm: fix anon_vma-...
301
302
303
304
305
306
307
  	/*
  	 * dst->anon_vma is dropped here otherwise its degree can be incorrectly
  	 * decremented in unlink_anon_vmas().
  	 * We can safely do this because callers of anon_vma_clone() don't care
  	 * about dst->anon_vma if anon_vma_clone() failed.
  	 */
  	dst->anon_vma = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
308
309
  	unlink_anon_vmas(dst);
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
  }
5beb49305   Rik van Riel   mm: change anon_v...
311
312
313
314
315
316
  /*
   * Attach vma to its own anon_vma, as well as to the anon_vmas that
   * the corresponding VMA in the parent process is attached to.
   * Returns 0 on success, non-zero on failure.
   */
  int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
  {
5beb49305   Rik van Riel   mm: change anon_v...
318
319
  	struct anon_vma_chain *avc;
  	struct anon_vma *anon_vma;
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
320
  	int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321

5beb49305   Rik van Riel   mm: change anon_v...
322
323
324
  	/* Don't bother if the parent process has no anon_vma here. */
  	if (!pvma->anon_vma)
  		return 0;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
325
326
  	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
  	vma->anon_vma = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
327
328
329
330
  	/*
  	 * First, attach the new VMA to the parent VMA's anon_vmas,
  	 * so rmap can find non-COWed pages in child processes.
  	 */
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
331
332
333
  	error = anon_vma_clone(vma, pvma);
  	if (error)
  		return error;
5beb49305   Rik van Riel   mm: change anon_v...
334

7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
335
336
337
  	/* An existing anon_vma has been reused, all done then. */
  	if (vma->anon_vma)
  		return 0;
5beb49305   Rik van Riel   mm: change anon_v...
338
339
340
341
  	/* Then add our own anon_vma. */
  	anon_vma = anon_vma_alloc();
  	if (!anon_vma)
  		goto out_error;
dd34739c0   Linus Torvalds   mm: avoid anon_vm...
342
  	avc = anon_vma_chain_alloc(GFP_KERNEL);
5beb49305   Rik van Riel   mm: change anon_v...
343
344
  	if (!avc)
  		goto out_error_free_anon_vma;
5c341ee1d   Rik van Riel   mm: track the roo...
345
346
347
348
349
350
  
  	/*
  	 * The root anon_vma's spinlock is the lock actually used when we
  	 * lock any of the anon_vmas in this anon_vma tree.
  	 */
  	anon_vma->root = pvma->anon_vma->root;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
351
  	anon_vma->parent = pvma->anon_vma;
76545066c   Rik van Riel   mm: extend KSM re...
352
  	/*
01d8b20de   Peter Zijlstra   mm: simplify anon...
353
354
355
  	 * With refcounts, an anon_vma can stay around longer than the
  	 * process it belongs to. The root anon_vma needs to be pinned until
  	 * this anon_vma is freed, because the lock lives in the root.
76545066c   Rik van Riel   mm: extend KSM re...
356
357
  	 */
  	get_anon_vma(anon_vma->root);
5beb49305   Rik van Riel   mm: change anon_v...
358
359
  	/* Mark this anon_vma as the one where our new (COWed) pages go. */
  	vma->anon_vma = anon_vma;
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
360
  	anon_vma_lock_write(anon_vma);
5c341ee1d   Rik van Riel   mm: track the roo...
361
  	anon_vma_chain_link(vma, avc, anon_vma);
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
362
  	anon_vma->parent->degree++;
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
363
  	anon_vma_unlock_write(anon_vma);
5beb49305   Rik van Riel   mm: change anon_v...
364
365
366
367
  
  	return 0;
  
   out_error_free_anon_vma:
01d8b20de   Peter Zijlstra   mm: simplify anon...
368
  	put_anon_vma(anon_vma);
5beb49305   Rik van Riel   mm: change anon_v...
369
   out_error:
4946d54cb   Rik van Riel   rmap: fix anon_vm...
370
  	unlink_anon_vmas(vma);
5beb49305   Rik van Riel   mm: change anon_v...
371
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
  }
5beb49305   Rik van Riel   mm: change anon_v...
373
374
375
  void unlink_anon_vmas(struct vm_area_struct *vma)
  {
  	struct anon_vma_chain *avc, *next;
eee2acbae   Peter Zijlstra   mm: avoid repeate...
376
  	struct anon_vma *root = NULL;
5beb49305   Rik van Riel   mm: change anon_v...
377

5c341ee1d   Rik van Riel   mm: track the roo...
378
379
380
381
  	/*
  	 * Unlink each anon_vma chained to the VMA.  This list is ordered
  	 * from newest to oldest, ensuring the root anon_vma gets freed last.
  	 */
5beb49305   Rik van Riel   mm: change anon_v...
382
  	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
eee2acbae   Peter Zijlstra   mm: avoid repeate...
383
384
385
  		struct anon_vma *anon_vma = avc->anon_vma;
  
  		root = lock_anon_vma_root(root, anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
386
  		anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
eee2acbae   Peter Zijlstra   mm: avoid repeate...
387
388
389
390
391
  
  		/*
  		 * Leave empty anon_vmas on the list - we'll need
  		 * to free them outside the lock.
  		 */
f808c13fd   Davidlohr Bueso   lib/interval_tree...
392
  		if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
393
  			anon_vma->parent->degree--;
eee2acbae   Peter Zijlstra   mm: avoid repeate...
394
  			continue;
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
395
  		}
eee2acbae   Peter Zijlstra   mm: avoid repeate...
396
397
398
399
  
  		list_del(&avc->same_vma);
  		anon_vma_chain_free(avc);
  	}
7a3ef208e   Konstantin Khlebnikov   mm: prevent endle...
400
401
  	if (vma->anon_vma)
  		vma->anon_vma->degree--;
eee2acbae   Peter Zijlstra   mm: avoid repeate...
402
403
404
405
406
  	unlock_anon_vma_root(root);
  
  	/*
  	 * Iterate the list once more, it now only contains empty and unlinked
  	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
5a505085f   Ingo Molnar   mm/rmap: Convert ...
407
  	 * needing to write-acquire the anon_vma->root->rwsem.
eee2acbae   Peter Zijlstra   mm: avoid repeate...
408
409
410
  	 */
  	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
  		struct anon_vma *anon_vma = avc->anon_vma;
e4c5800a3   Konstantin Khlebnikov   mm/rmap: replace ...
411
  		VM_WARN_ON(anon_vma->degree);
eee2acbae   Peter Zijlstra   mm: avoid repeate...
412
  		put_anon_vma(anon_vma);
5beb49305   Rik van Riel   mm: change anon_v...
413
414
415
416
  		list_del(&avc->same_vma);
  		anon_vma_chain_free(avc);
  	}
  }
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
417
  static void anon_vma_ctor(void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
  {
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
419
  	struct anon_vma *anon_vma = data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420

5a505085f   Ingo Molnar   mm/rmap: Convert ...
421
  	init_rwsem(&anon_vma->rwsem);
83813267c   Peter Zijlstra   mm: move anon_vma...
422
  	atomic_set(&anon_vma->refcount, 0);
f808c13fd   Davidlohr Bueso   lib/interval_tree...
423
  	anon_vma->rb_root = RB_ROOT_CACHED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
425
426
427
428
  }
  
  void __init anon_vma_init(void)
  {
  	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
429
  			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
5d097056c   Vladimir Davydov   kmemcg: account c...
430
431
432
  			anon_vma_ctor);
  	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
  			SLAB_PANIC|SLAB_ACCOUNT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433
434
435
  }
  
  /*
6111e4ca6   Peter Zijlstra   mm: improve page_...
436
437
438
439
440
441
442
443
444
   * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
   *
   * Since there is no serialization what so ever against page_remove_rmap()
   * the best this function can do is return a locked anon_vma that might
   * have been relevant to this page.
   *
   * The page might have been remapped to a different anon_vma or the anon_vma
   * returned may already be freed (and even reused).
   *
bc658c960   Peter Zijlstra   mm, rmap: Add yet...
445
446
447
448
449
   * In case it was remapped to a different anon_vma, the new anon_vma will be a
   * child of the old anon_vma, and the anon_vma lifetime rules will therefore
   * ensure that any anon_vma obtained from the page will still be valid for as
   * long as we observe page_mapped() [ hence all those page_mapped() tests ].
   *
6111e4ca6   Peter Zijlstra   mm: improve page_...
450
451
452
453
   * All users of this function must be very careful when walking the anon_vma
   * chain and verify that the page in question is indeed mapped in it
   * [ something equivalent to page_mapped_in_vma() ].
   *
091e42995   Miles Chen   mm/rmap.c: fix ou...
454
455
456
457
   * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
   * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
   * if there is a mapcount, we can dereference the anon_vma after observing
   * those.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
458
   */
746b18d42   Peter Zijlstra   mm: use refcounts...
459
  struct anon_vma *page_get_anon_vma(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
460
  {
746b18d42   Peter Zijlstra   mm: use refcounts...
461
  	struct anon_vma *anon_vma = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
463
464
  	unsigned long anon_mapping;
  
  	rcu_read_lock();
4db0c3c29   Jason Low   mm: remove rest o...
465
  	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
3ca7b3c5b   Hugh Dickins   mm: define PAGE_M...
466
  	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
468
469
470
471
  		goto out;
  	if (!page_mapped(page))
  		goto out;
  
  	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
746b18d42   Peter Zijlstra   mm: use refcounts...
472
473
474
475
  	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
  		anon_vma = NULL;
  		goto out;
  	}
f18194275   Hugh Dickins   mm: fix hang on a...
476
477
478
  
  	/*
  	 * If this page is still mapped, then its anon_vma cannot have been
746b18d42   Peter Zijlstra   mm: use refcounts...
479
480
  	 * freed.  But if it has been unmapped, we have no security against the
  	 * anon_vma structure being freed and reused (for another anon_vma:
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
481
  	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
746b18d42   Peter Zijlstra   mm: use refcounts...
482
  	 * above cannot corrupt).
f18194275   Hugh Dickins   mm: fix hang on a...
483
  	 */
746b18d42   Peter Zijlstra   mm: use refcounts...
484
  	if (!page_mapped(page)) {
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
485
  		rcu_read_unlock();
746b18d42   Peter Zijlstra   mm: use refcounts...
486
  		put_anon_vma(anon_vma);
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
487
  		return NULL;
746b18d42   Peter Zijlstra   mm: use refcounts...
488
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
  out:
  	rcu_read_unlock();
746b18d42   Peter Zijlstra   mm: use refcounts...
491
492
493
  
  	return anon_vma;
  }
88c22088b   Peter Zijlstra   mm: optimize page...
494
495
496
497
498
499
500
  /*
   * Similar to page_get_anon_vma() except it locks the anon_vma.
   *
   * Its a little more complex as it tries to keep the fast path to a single
   * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
   * reference like with page_get_anon_vma() and then block on the mutex.
   */
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
501
  struct anon_vma *page_lock_anon_vma_read(struct page *page)
746b18d42   Peter Zijlstra   mm: use refcounts...
502
  {
88c22088b   Peter Zijlstra   mm: optimize page...
503
  	struct anon_vma *anon_vma = NULL;
eee0f252c   Hugh Dickins   mm: fix page_lock...
504
  	struct anon_vma *root_anon_vma;
88c22088b   Peter Zijlstra   mm: optimize page...
505
  	unsigned long anon_mapping;
746b18d42   Peter Zijlstra   mm: use refcounts...
506

88c22088b   Peter Zijlstra   mm: optimize page...
507
  	rcu_read_lock();
4db0c3c29   Jason Low   mm: remove rest o...
508
  	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
88c22088b   Peter Zijlstra   mm: optimize page...
509
510
511
512
513
514
  	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
  		goto out;
  	if (!page_mapped(page))
  		goto out;
  
  	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
4db0c3c29   Jason Low   mm: remove rest o...
515
  	root_anon_vma = READ_ONCE(anon_vma->root);
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
516
  	if (down_read_trylock(&root_anon_vma->rwsem)) {
88c22088b   Peter Zijlstra   mm: optimize page...
517
  		/*
eee0f252c   Hugh Dickins   mm: fix page_lock...
518
519
  		 * If the page is still mapped, then this anon_vma is still
  		 * its anon_vma, and holding the mutex ensures that it will
bc658c960   Peter Zijlstra   mm, rmap: Add yet...
520
  		 * not go away, see anon_vma_free().
88c22088b   Peter Zijlstra   mm: optimize page...
521
  		 */
eee0f252c   Hugh Dickins   mm: fix page_lock...
522
  		if (!page_mapped(page)) {
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
523
  			up_read(&root_anon_vma->rwsem);
88c22088b   Peter Zijlstra   mm: optimize page...
524
525
526
527
  			anon_vma = NULL;
  		}
  		goto out;
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
528

88c22088b   Peter Zijlstra   mm: optimize page...
529
530
531
532
533
534
535
  	/* trylock failed, we got to sleep */
  	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
  		anon_vma = NULL;
  		goto out;
  	}
  
  	if (!page_mapped(page)) {
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
536
  		rcu_read_unlock();
88c22088b   Peter Zijlstra   mm: optimize page...
537
  		put_anon_vma(anon_vma);
7f39dda9d   Hugh Dickins   mm: fix sleeping ...
538
  		return NULL;
88c22088b   Peter Zijlstra   mm: optimize page...
539
540
541
542
  	}
  
  	/* we pinned the anon_vma, its safe to sleep */
  	rcu_read_unlock();
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
543
  	anon_vma_lock_read(anon_vma);
88c22088b   Peter Zijlstra   mm: optimize page...
544
545
546
547
548
  
  	if (atomic_dec_and_test(&anon_vma->refcount)) {
  		/*
  		 * Oops, we held the last refcount, release the lock
  		 * and bail -- can't simply use put_anon_vma() because
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
549
  		 * we'll deadlock on the anon_vma_lock_write() recursion.
88c22088b   Peter Zijlstra   mm: optimize page...
550
  		 */
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
551
  		anon_vma_unlock_read(anon_vma);
88c22088b   Peter Zijlstra   mm: optimize page...
552
553
554
555
556
557
558
559
  		__put_anon_vma(anon_vma);
  		anon_vma = NULL;
  	}
  
  	return anon_vma;
  
  out:
  	rcu_read_unlock();
746b18d42   Peter Zijlstra   mm: use refcounts...
560
  	return anon_vma;
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
561
  }
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
562
  void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
563
  {
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
564
  	anon_vma_unlock_read(anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
  }
72b252aed   Mel Gorman   mm: send one IPI ...
566
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
72b252aed   Mel Gorman   mm: send one IPI ...
567
568
569
570
571
572
573
574
575
  /*
   * Flush TLB entries for recently unmapped pages from remote CPUs. It is
   * important if a PTE was dirty when it was unmapped that it's flushed
   * before any IO is initiated on the page to prevent lost writes. Similarly,
   * it must be flushed before freeing to prevent data leakage.
   */
  void try_to_unmap_flush(void)
  {
  	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
72b252aed   Mel Gorman   mm: send one IPI ...
576
577
578
  
  	if (!tlb_ubc->flush_required)
  		return;
e73ad5ff2   Andy Lutomirski   mm, x86/mm: Make ...
579
  	arch_tlbbatch_flush(&tlb_ubc->arch);
72b252aed   Mel Gorman   mm: send one IPI ...
580
  	tlb_ubc->flush_required = false;
d950c9477   Mel Gorman   mm: defer flush o...
581
  	tlb_ubc->writable = false;
72b252aed   Mel Gorman   mm: send one IPI ...
582
  }
d950c9477   Mel Gorman   mm: defer flush o...
583
584
585
586
587
588
589
590
  /* Flush iff there are potentially writable TLB entries that can race with IO */
  void try_to_unmap_flush_dirty(void)
  {
  	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
  
  	if (tlb_ubc->writable)
  		try_to_unmap_flush();
  }
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
591
  static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
72b252aed   Mel Gorman   mm: send one IPI ...
592
593
  {
  	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
e73ad5ff2   Andy Lutomirski   mm, x86/mm: Make ...
594
  	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
72b252aed   Mel Gorman   mm: send one IPI ...
595
  	tlb_ubc->flush_required = true;
d950c9477   Mel Gorman   mm: defer flush o...
596
597
  
  	/*
3ea277194   Mel Gorman   mm, mprotect: flu...
598
599
600
601
602
603
604
  	 * Ensure compiler does not re-order the setting of tlb_flush_batched
  	 * before the PTE is cleared.
  	 */
  	barrier();
  	mm->tlb_flush_batched = true;
  
  	/*
d950c9477   Mel Gorman   mm: defer flush o...
605
606
607
608
609
610
  	 * If the PTE was dirty then it's best to assume it's writable. The
  	 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
  	 * before the page is queued for IO.
  	 */
  	if (writable)
  		tlb_ubc->writable = true;
72b252aed   Mel Gorman   mm: send one IPI ...
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
  }
  
  /*
   * Returns true if the TLB flush should be deferred to the end of a batch of
   * unmap operations to reduce IPIs.
   */
  static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
  {
  	bool should_defer = false;
  
  	if (!(flags & TTU_BATCH_FLUSH))
  		return false;
  
  	/* If remote CPUs need to be flushed then defer batch the flush */
  	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
  		should_defer = true;
  	put_cpu();
  
  	return should_defer;
  }
3ea277194   Mel Gorman   mm, mprotect: flu...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
  
  /*
   * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
   * releasing the PTL if TLB flushes are batched. It's possible for a parallel
   * operation such as mprotect or munmap to race between reclaim unmapping
   * the page and flushing the page. If this race occurs, it potentially allows
   * access to data via a stale TLB entry. Tracking all mm's that have TLB
   * batching in flight would be expensive during reclaim so instead track
   * whether TLB batching occurred in the past and if so then do a flush here
   * if required. This will cost one additional flush per reclaim cycle paid
   * by the first operation at risk such as mprotect and mumap.
   *
   * This must be called under the PTL so that an access to tlb_flush_batched
   * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
   * via the PTL.
   */
  void flush_tlb_batched_pending(struct mm_struct *mm)
  {
9c1177b62   Qian Cai   mm/rmap: annotate...
649
  	if (data_race(mm->tlb_flush_batched)) {
3ea277194   Mel Gorman   mm, mprotect: flu...
650
651
652
653
654
655
656
657
658
659
  		flush_tlb_mm(mm);
  
  		/*
  		 * Do not allow the compiler to re-order the clearing of
  		 * tlb_flush_batched before the tlb is flushed.
  		 */
  		barrier();
  		mm->tlb_flush_batched = false;
  	}
  }
72b252aed   Mel Gorman   mm: send one IPI ...
660
  #else
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
661
  static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
72b252aed   Mel Gorman   mm: send one IPI ...
662
663
664
665
666
667
668
669
  {
  }
  
  static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
  {
  	return false;
  }
  #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
670
  /*
bf89c8c86   Huang Shijie   mm/rmap.c: fix co...
671
   * At what user virtual address is page expected in vma?
ab941e0ff   Naoya Horiguchi   rmap: remove anon...
672
   * Caller should check the page is actually part of the vma.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673
674
675
   */
  unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
  {
86c2ad199   Michel Lespinasse   mm rmap: remove v...
676
  	unsigned long address;
21d0d443c   Andrea Arcangeli   rmap: resurrect p...
677
  	if (PageAnon(page)) {
4829b906c   Hugh Dickins   ksm: fix page_add...
678
679
680
681
682
683
684
  		struct anon_vma *page__anon_vma = page_anon_vma(page);
  		/*
  		 * Note: swapoff's unuse_vma() is more efficient with this
  		 * check, and needs it to match anon_vma when KSM is active.
  		 */
  		if (!vma->anon_vma || !page__anon_vma ||
  		    vma->anon_vma->root != page__anon_vma->root)
21d0d443c   Andrea Arcangeli   rmap: resurrect p...
685
  			return -EFAULT;
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
686
687
  	} else if (page->mapping) {
  		if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688
689
690
  			return -EFAULT;
  	} else
  		return -EFAULT;
86c2ad199   Michel Lespinasse   mm rmap: remove v...
691
692
693
694
  	address = __vma_address(page, vma);
  	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
  		return -EFAULT;
  	return address;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
  }
6219049ae   Bob Liu   mm: introduce mm_...
696
697
698
  pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
  {
  	pgd_t *pgd;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
699
  	p4d_t *p4d;
6219049ae   Bob Liu   mm: introduce mm_...
700
701
  	pud_t *pud;
  	pmd_t *pmd = NULL;
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
702
  	pmd_t pmde;
6219049ae   Bob Liu   mm: introduce mm_...
703
704
705
706
  
  	pgd = pgd_offset(mm, address);
  	if (!pgd_present(*pgd))
  		goto out;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
707
708
709
710
711
  	p4d = p4d_offset(pgd, address);
  	if (!p4d_present(*p4d))
  		goto out;
  
  	pud = pud_offset(p4d, address);
6219049ae   Bob Liu   mm: introduce mm_...
712
713
714
715
  	if (!pud_present(*pud))
  		goto out;
  
  	pmd = pmd_offset(pud, address);
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
716
  	/*
8809aa2d2   Aneesh Kumar K.V   mm: clarify that ...
717
  	 * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
718
719
720
  	 * without holding anon_vma lock for write.  So when looking for a
  	 * genuine pmde (in which to find pte), test present and !THP together.
  	 */
e37c69827   Christian Borntraeger   mm: replace ACCES...
721
722
  	pmde = *pmd;
  	barrier();
f72e7dcdd   Hugh Dickins   mm: let mm_find_p...
723
  	if (!pmd_present(pmde) || pmd_trans_huge(pmde))
6219049ae   Bob Liu   mm: introduce mm_...
724
725
726
727
  		pmd = NULL;
  out:
  	return pmd;
  }
8749cfea1   Vladimir Davydov   mm: add page_chec...
728
729
730
731
732
733
734
735
736
  struct page_referenced_arg {
  	int mapcount;
  	int referenced;
  	unsigned long vm_flags;
  	struct mem_cgroup *memcg;
  };
  /*
   * arg: page_referenced_arg will be passed
   */
e4b822227   Minchan Kim   mm: make rmap_one...
737
  static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
8749cfea1   Vladimir Davydov   mm: add page_chec...
738
739
  			unsigned long address, void *arg)
  {
8749cfea1   Vladimir Davydov   mm: add page_chec...
740
  	struct page_referenced_arg *pra = arg;
8eaedede8   Kirill A. Shutemov   mm: fix handling ...
741
742
743
744
745
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  		.address = address,
  	};
8749cfea1   Vladimir Davydov   mm: add page_chec...
746
  	int referenced = 0;
8eaedede8   Kirill A. Shutemov   mm: fix handling ...
747
748
  	while (page_vma_mapped_walk(&pvmw)) {
  		address = pvmw.address;
b20ce5e03   Kirill A. Shutemov   mm: prepare page_...
749

8eaedede8   Kirill A. Shutemov   mm: fix handling ...
750
751
752
  		if (vma->vm_flags & VM_LOCKED) {
  			page_vma_mapped_walk_done(&pvmw);
  			pra->vm_flags |= VM_LOCKED;
e4b822227   Minchan Kim   mm: make rmap_one...
753
  			return false; /* To break the loop */
8eaedede8   Kirill A. Shutemov   mm: fix handling ...
754
  		}
71e3aac07   Andrea Arcangeli   thp: transparent ...
755

8eaedede8   Kirill A. Shutemov   mm: fix handling ...
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
  		if (pvmw.pte) {
  			if (ptep_clear_flush_young_notify(vma, address,
  						pvmw.pte)) {
  				/*
  				 * Don't treat a reference through
  				 * a sequentially read mapping as such.
  				 * If the page has been used in another mapping,
  				 * we will catch it; if this other mapping is
  				 * already gone, the unmap path will have set
  				 * PG_referenced or activated the page.
  				 */
  				if (likely(!(vma->vm_flags & VM_SEQ_READ)))
  					referenced++;
  			}
  		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
  			if (pmdp_clear_flush_young_notify(vma, address,
  						pvmw.pmd))
8749cfea1   Vladimir Davydov   mm: add page_chec...
773
  				referenced++;
8eaedede8   Kirill A. Shutemov   mm: fix handling ...
774
775
776
  		} else {
  			/* unexpected pmd-mapped page? */
  			WARN_ON_ONCE(1);
8749cfea1   Vladimir Davydov   mm: add page_chec...
777
  		}
8eaedede8   Kirill A. Shutemov   mm: fix handling ...
778
779
  
  		pra->mapcount--;
b20ce5e03   Kirill A. Shutemov   mm: prepare page_...
780
  	}
b20ce5e03   Kirill A. Shutemov   mm: prepare page_...
781

33c3fc71c   Vladimir Davydov   mm: introduce idl...
782
783
784
785
  	if (referenced)
  		clear_page_idle(page);
  	if (test_and_clear_page_young(page))
  		referenced++;
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
786
787
788
  	if (referenced) {
  		pra->referenced++;
  		pra->vm_flags |= vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
  	}
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
790

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
791
  	if (!pra->mapcount)
e4b822227   Minchan Kim   mm: make rmap_one...
792
  		return false; /* To break the loop */
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
793

e4b822227   Minchan Kim   mm: make rmap_one...
794
  	return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795
  }
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
796
  static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797
  {
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
798
799
  	struct page_referenced_arg *pra = arg;
  	struct mem_cgroup *memcg = pra->memcg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
800

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
801
802
  	if (!mm_match_cgroup(vma->vm_mm, memcg))
  		return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
804
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
806
807
808
809
810
  }
  
  /**
   * page_referenced - test if the page was referenced
   * @page: the page to test
   * @is_locked: caller holds lock on the page
72835c86c   Johannes Weiner   mm: unify remaini...
811
   * @memcg: target memory cgroup
6fe6b7e35   Wu Fengguang   vmscan: report vm...
812
   * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
814
815
816
   *
   * Quick test_and_clear_referenced for all mappings to a page,
   * returns the number of ptes which referenced the page.
   */
6fe6b7e35   Wu Fengguang   vmscan: report vm...
817
818
  int page_referenced(struct page *page,
  		    int is_locked,
72835c86c   Johannes Weiner   mm: unify remaini...
819
  		    struct mem_cgroup *memcg,
6fe6b7e35   Wu Fengguang   vmscan: report vm...
820
  		    unsigned long *vm_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
821
  {
5ad646880   Hugh Dickins   ksm: let shared p...
822
  	int we_locked = 0;
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
823
  	struct page_referenced_arg pra = {
b20ce5e03   Kirill A. Shutemov   mm: prepare page_...
824
  		.mapcount = total_mapcount(page),
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
825
826
827
828
829
830
831
  		.memcg = memcg,
  	};
  	struct rmap_walk_control rwc = {
  		.rmap_one = page_referenced_one,
  		.arg = (void *)&pra,
  		.anon_lock = page_lock_anon_vma_read,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832

6fe6b7e35   Wu Fengguang   vmscan: report vm...
833
  	*vm_flags = 0;
059d8442e   Huang Shijie   mm/rmap.c: use th...
834
  	if (!pra.mapcount)
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
835
836
837
838
839
840
841
842
843
  		return 0;
  
  	if (!page_rmapping(page))
  		return 0;
  
  	if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
  		we_locked = trylock_page(page);
  		if (!we_locked)
  			return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
  	}
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
845
846
847
848
849
850
851
852
853
  
  	/*
  	 * If we are reclaiming on behalf of a cgroup, skip
  	 * counting on behalf of references from different
  	 * cgroups
  	 */
  	if (memcg) {
  		rwc.invalid_vma = invalid_page_referenced_vma;
  	}
c24f386c6   Minchan Kim   mm: remove unnces...
854
  	rmap_walk(page, &rwc);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
855
856
857
858
859
860
  	*vm_flags = pra.vm_flags;
  
  	if (we_locked)
  		unlock_page(page);
  
  	return pra.referenced;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
861
  }
e4b822227   Minchan Kim   mm: make rmap_one...
862
  static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
863
  			    unsigned long address, void *arg)
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
864
  {
f27176cfc   Kirill A. Shutemov   mm: convert page_...
865
866
867
868
869
870
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  		.address = address,
  		.flags = PVMW_SYNC,
  	};
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
871
  	struct mmu_notifier_range range;
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
872
  	int *cleaned = arg;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
873

369ea8242   Jérôme Glisse   mm/rmap: update t...
874
875
876
877
  	/*
  	 * We have to assume the worse case ie pmd for invalidation. Note that
  	 * the page can not be free from this function.
  	 */
7269f9999   Jérôme Glisse   mm/mmu_notifier: ...
878
879
  	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
  				0, vma, vma->vm_mm, address,
a50b854e0   Matthew Wilcox (Oracle)   mm: introduce pag...
880
  				min(vma->vm_end, address + page_size(page)));
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
881
  	mmu_notifier_invalidate_range_start(&range);
369ea8242   Jérôme Glisse   mm/rmap: update t...
882

f27176cfc   Kirill A. Shutemov   mm: convert page_...
883
884
  	while (page_vma_mapped_walk(&pvmw)) {
  		int ret = 0;
369ea8242   Jérôme Glisse   mm/rmap: update t...
885

1f18b2966   YueHaibing   mm/rmap.c: remove...
886
  		address = pvmw.address;
f27176cfc   Kirill A. Shutemov   mm: convert page_...
887
888
889
890
891
892
  		if (pvmw.pte) {
  			pte_t entry;
  			pte_t *pte = pvmw.pte;
  
  			if (!pte_dirty(*pte) && !pte_write(*pte))
  				continue;
785373b4c   Linus Torvalds   Revert "rmap: do ...
893
894
  			flush_cache_page(vma, address, pte_pfn(*pte));
  			entry = ptep_clear_flush(vma, address, pte);
f27176cfc   Kirill A. Shutemov   mm: convert page_...
895
896
  			entry = pte_wrprotect(entry);
  			entry = pte_mkclean(entry);
785373b4c   Linus Torvalds   Revert "rmap: do ...
897
  			set_pte_at(vma->vm_mm, address, pte, entry);
f27176cfc   Kirill A. Shutemov   mm: convert page_...
898
899
  			ret = 1;
  		} else {
396bcc529   Matthew Wilcox (Oracle)   mm: remove CONFIG...
900
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
f27176cfc   Kirill A. Shutemov   mm: convert page_...
901
902
903
904
905
  			pmd_t *pmd = pvmw.pmd;
  			pmd_t entry;
  
  			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
  				continue;
785373b4c   Linus Torvalds   Revert "rmap: do ...
906
  			flush_cache_page(vma, address, page_to_pfn(page));
024eee0e8   Aneesh Kumar K.V   mm: page_mkclean ...
907
  			entry = pmdp_invalidate(vma, address, pmd);
f27176cfc   Kirill A. Shutemov   mm: convert page_...
908
909
  			entry = pmd_wrprotect(entry);
  			entry = pmd_mkclean(entry);
785373b4c   Linus Torvalds   Revert "rmap: do ...
910
  			set_pmd_at(vma->vm_mm, address, pmd, entry);
f27176cfc   Kirill A. Shutemov   mm: convert page_...
911
912
913
914
915
916
  			ret = 1;
  #else
  			/* unexpected pmd-mapped page? */
  			WARN_ON_ONCE(1);
  #endif
  		}
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
917

0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
918
919
920
921
922
  		/*
  		 * No need to call mmu_notifier_invalidate_range() as we are
  		 * downgrading page table protection not changing it to point
  		 * to a new page.
  		 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
923
  		 * See Documentation/vm/mmu_notifier.rst
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
924
925
  		 */
  		if (ret)
f27176cfc   Kirill A. Shutemov   mm: convert page_...
926
  			(*cleaned)++;
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
927
  	}
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
928

ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
929
  	mmu_notifier_invalidate_range_end(&range);
369ea8242   Jérôme Glisse   mm/rmap: update t...
930

e4b822227   Minchan Kim   mm: make rmap_one...
931
  	return true;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
932
  }
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
933
  static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
934
  {
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
935
  	if (vma->vm_flags & VM_SHARED)
871beb8c3   Fengguang Wu   mm/rmap: fix cocc...
936
  		return false;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
937

871beb8c3   Fengguang Wu   mm/rmap: fix cocc...
938
  	return true;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
939
940
941
942
  }
  
  int page_mkclean(struct page *page)
  {
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
943
944
945
946
947
948
949
  	int cleaned = 0;
  	struct address_space *mapping;
  	struct rmap_walk_control rwc = {
  		.arg = (void *)&cleaned,
  		.rmap_one = page_mkclean_one,
  		.invalid_vma = invalid_mkclean_vma,
  	};
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
950
951
  
  	BUG_ON(!PageLocked(page));
9853a407b   Joonsoo Kim   mm/rmap: use rmap...
952
953
954
955
956
957
958
959
  	if (!page_mapped(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (!mapping)
  		return 0;
  
  	rmap_walk(page, &rwc);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
960

9853a407b   Joonsoo Kim   mm/rmap: use rmap...
961
  	return cleaned;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
962
  }
60b59beaf   Jaya Kumar   fbdev: mm: Deferr...
963
  EXPORT_SYMBOL_GPL(page_mkclean);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
964

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
965
  /**
c44b67432   Rik van Riel   rmap: move exclus...
966
967
968
   * page_move_anon_rmap - move a page to our anon_vma
   * @page:	the page to move to our anon_vma
   * @vma:	the vma the page belongs to
c44b67432   Rik van Riel   rmap: move exclus...
969
970
971
972
973
974
   *
   * When a page belongs exclusively to one process after a COW event,
   * that page can be moved into the anon_vma that belongs to just that
   * process, so the rmap code will not search the parent or sibling
   * processes.
   */
5a49973d7   Hugh Dickins   mm: thp: refix fa...
975
  void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
c44b67432   Rik van Riel   rmap: move exclus...
976
977
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
5a49973d7   Hugh Dickins   mm: thp: refix fa...
978
  	page = compound_head(page);
309381fea   Sasha Levin   mm: dump page whe...
979
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
81d1b09c6   Sasha Levin   mm: convert a few...
980
  	VM_BUG_ON_VMA(!anon_vma, vma);
c44b67432   Rik van Riel   rmap: move exclus...
981
982
  
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
414e2fb8c   Vladimir Davydov   rmap: fix theoret...
983
984
985
986
987
988
  	/*
  	 * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
  	 * simultaneously, so a concurrent reader (eg page_referenced()'s
  	 * PageAnon()) will not see one without the other.
  	 */
  	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
c44b67432   Rik van Riel   rmap: move exclus...
989
990
991
  }
  
  /**
4e1c19750   Andi Kleen   Clean up __page_s...
992
   * __page_set_anon_rmap - set up new anonymous rmap
451b9514a   Kirill Tkhai   mm: remove __huge...
993
   * @page:	Page or Hugepage to add to rmap
4e1c19750   Andi Kleen   Clean up __page_s...
994
995
   * @vma:	VM area to add page to.
   * @address:	User virtual address of the mapping	
e8a03feb5   Rik van Riel   rmap: add exclusi...
996
   * @exclusive:	the page is exclusively owned by the current process
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
997
998
   */
  static void __page_set_anon_rmap(struct page *page,
e8a03feb5   Rik van Riel   rmap: add exclusi...
999
  	struct vm_area_struct *vma, unsigned long address, int exclusive)
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1000
  {
e8a03feb5   Rik van Riel   rmap: add exclusi...
1001
  	struct anon_vma *anon_vma = vma->anon_vma;
ea90002b0   Linus Torvalds   anonvma: when set...
1002

e8a03feb5   Rik van Riel   rmap: add exclusi...
1003
  	BUG_ON(!anon_vma);
ea90002b0   Linus Torvalds   anonvma: when set...
1004

4e1c19750   Andi Kleen   Clean up __page_s...
1005
1006
  	if (PageAnon(page))
  		return;
ea90002b0   Linus Torvalds   anonvma: when set...
1007
  	/*
e8a03feb5   Rik van Riel   rmap: add exclusi...
1008
1009
1010
  	 * If the page isn't exclusively mapped into this vma,
  	 * we must use the _oldest_ possible anon_vma for the
  	 * page mapping!
ea90002b0   Linus Torvalds   anonvma: when set...
1011
  	 */
4e1c19750   Andi Kleen   Clean up __page_s...
1012
  	if (!exclusive)
288468c33   Andrea Arcangeli   rmap: always use ...
1013
  		anon_vma = anon_vma->root;
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1014

9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1015
1016
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
  	page->mapping = (struct address_space *) anon_vma;
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1017
  	page->index = linear_page_index(vma, address);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1018
1019
1020
  }
  
  /**
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
1021
   * __page_check_anon_rmap - sanity check anonymous rmap addition
c97a9e10e   Nick Piggin   mm: more rmap che...
1022
1023
1024
1025
1026
1027
1028
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   */
  static void __page_check_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
c97a9e10e   Nick Piggin   mm: more rmap che...
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
  	/*
  	 * The page's anon-rmap details (mapping and index) are guaranteed to
  	 * be set up correctly at this point.
  	 *
  	 * We have exclusion against page_add_anon_rmap because the caller
  	 * always holds the page locked, except if called from page_dup_rmap,
  	 * in which case the page is already known to be setup.
  	 *
  	 * We have exclusion against page_add_new_anon_rmap because those pages
  	 * are initially only visible via the pagetables, and the pte is locked
  	 * over the call to page_add_new_anon_rmap.
  	 */
30c463828   Yang Shi   mm/rmap.c: use VM...
1041
1042
1043
  	VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
  	VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
  		       page);
c97a9e10e   Nick Piggin   mm: more rmap che...
1044
1045
1046
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1047
1048
1049
1050
   * page_add_anon_rmap - add pte mapping to an anonymous page
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1051
   * @compound:	charge the page as compound or small page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1052
   *
5ad646880   Hugh Dickins   ksm: let shared p...
1053
   * The caller needs to hold the pte lock, and the page must be locked in
80e148226   Hugh Dickins   ksm: share anon p...
1054
1055
1056
   * the anon_vma case: to serialize mapping,index checking after setting,
   * and to ensure that PageAnon is not being upgraded racily to PageKsm
   * (but PageKsm is never downgraded to PageAnon).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
1058
   */
  void page_add_anon_rmap(struct page *page,
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1059
  	struct vm_area_struct *vma, unsigned long address, bool compound)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1060
  {
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1061
  	do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
ad8c2ee80   Rik van Riel   rmap: add exclusi...
1062
1063
1064
1065
1066
1067
1068
1069
  }
  
  /*
   * Special version of the above for do_swap_page, which often runs
   * into pages that are exclusively owned by the current process.
   * Everybody else should continue to use page_add_anon_rmap above.
   */
  void do_page_add_anon_rmap(struct page *page,
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1070
  	struct vm_area_struct *vma, unsigned long address, int flags)
ad8c2ee80   Rik van Riel   rmap: add exclusi...
1071
  {
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1072
1073
  	bool compound = flags & RMAP_COMPOUND;
  	bool first;
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1074
1075
1076
1077
  	if (unlikely(PageKsm(page)))
  		lock_page_memcg(page);
  	else
  		VM_BUG_ON_PAGE(!PageLocked(page), page);
e9b61f198   Kirill A. Shutemov   thp: reintroduce ...
1078
1079
  	if (compound) {
  		atomic_t *mapcount;
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1080
  		VM_BUG_ON_PAGE(!PageLocked(page), page);
e9b61f198   Kirill A. Shutemov   thp: reintroduce ...
1081
1082
1083
  		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  		mapcount = compound_mapcount_ptr(page);
  		first = atomic_inc_and_test(mapcount);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1084
1085
1086
  	} else {
  		first = atomic_inc_and_test(&page->_mapcount);
  	}
79134171d   Andrea Arcangeli   thp: transparent ...
1087
  	if (first) {
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
1088
  		int nr = compound ? thp_nr_pages(page) : 1;
bea04b073   Jianyu Zhan   mm: use the light...
1089
1090
1091
1092
1093
1094
  		/*
  		 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
  		 * these counters are not modified in interrupt context, and
  		 * pte lock(a spinlock) is held, which implies preemption
  		 * disabled.
  		 */
65c453778   Kirill A. Shutemov   mm, rmap: account...
1095
  		if (compound)
468c39823   Johannes Weiner   mm: memcontrol: s...
1096
  			__inc_lruvec_page_state(page, NR_ANON_THPS);
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1097
  		__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
79134171d   Andrea Arcangeli   thp: transparent ...
1098
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
1099

be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1100
1101
1102
1103
  	if (unlikely(PageKsm(page))) {
  		unlock_page_memcg(page);
  		return;
  	}
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1104

5dbe0af47   Hugh Dickins   mm: fix kernel BU...
1105
  	/* address might be in next vma when migration races vma_adjust */
5ad646880   Hugh Dickins   ksm: let shared p...
1106
  	if (first)
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1107
1108
  		__page_set_anon_rmap(page, vma, address,
  				flags & RMAP_EXCLUSIVE);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1109
  	else
c97a9e10e   Nick Piggin   mm: more rmap che...
1110
  		__page_check_anon_rmap(page, vma, address);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1111
  }
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
1112
  /**
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1113
1114
1115
1116
   * page_add_new_anon_rmap - add pte mapping to a new anonymous page
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1117
   * @compound:	charge the page as compound or small page
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1118
1119
1120
   *
   * Same as page_add_anon_rmap but must only be called on *new* pages.
   * This means the inc-and-test can be bypassed.
c97a9e10e   Nick Piggin   mm: more rmap che...
1121
   * Page does not have to be locked.
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1122
1123
   */
  void page_add_new_anon_rmap(struct page *page,
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1124
  	struct vm_area_struct *vma, unsigned long address, bool compound)
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1125
  {
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
1126
  	int nr = compound ? thp_nr_pages(page) : 1;
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1127

81d1b09c6   Sasha Levin   mm: convert a few...
1128
  	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
fa9949da5   Hugh Dickins   mm: use __SetPage...
1129
  	__SetPageSwapBacked(page);
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1130
1131
  	if (compound) {
  		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1132
1133
  		/* increment count (starts at -1) */
  		atomic_set(compound_mapcount_ptr(page), 0);
47e29d32a   John Hubbard   mm/gup: page->hpa...
1134
1135
  		if (hpage_pincount_available(page))
  			atomic_set(compound_pincount_ptr(page), 0);
468c39823   Johannes Weiner   mm: memcontrol: s...
1136
  		__inc_lruvec_page_state(page, NR_ANON_THPS);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1137
1138
1139
1140
1141
  	} else {
  		/* Anon THP always mapped first with PMD */
  		VM_BUG_ON_PAGE(PageTransCompound(page), page);
  		/* increment count (starts at -1) */
  		atomic_set(&page->_mapcount, 0);
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1142
  	}
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1143
  	__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
e8a03feb5   Rik van Riel   rmap: add exclusi...
1144
  	__page_set_anon_rmap(page, vma, address, 1);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
1145
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1146
1147
1148
  /**
   * page_add_file_rmap - add pte mapping to a file page
   * @page: the page to add the mapping to
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
1149
   * @compound: charge the page as compound or small page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
1151
   * The caller needs to hold the pte lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152
   */
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1153
  void page_add_file_rmap(struct page *page, bool compound)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
  {
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1155
1156
1157
  	int i, nr = 1;
  
  	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
62cccb8c8   Johannes Weiner   mm: simplify lock...
1158
  	lock_page_memcg(page);
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1159
  	if (compound && PageTransHuge(page)) {
5eaf35ab1   Matthew Wilcox (Oracle)   mm/rmap: fix assu...
1160
  		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1161
1162
1163
1164
1165
  			if (atomic_inc_and_test(&page[i]._mapcount))
  				nr++;
  		}
  		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
  			goto out;
99cb0dbd4   Song Liu   mm,thp: add read-...
1166
1167
1168
1169
  		if (PageSwapBacked(page))
  			__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
  		else
  			__inc_node_page_state(page, NR_FILE_PMDMAPPED);
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1170
  	} else {
c8efc390c   Kirill A. Shutemov   mm, rmap: fix fal...
1171
1172
  		if (PageTransCompound(page) && page_mapping(page)) {
  			VM_WARN_ON_ONCE(!PageLocked(page));
9a73f61bd   Kirill A. Shutemov   thp, mlock: do no...
1173
1174
1175
1176
  			SetPageDoubleMap(compound_head(page));
  			if (PageMlocked(page))
  				clear_page_mlock(compound_head(page));
  		}
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1177
1178
  		if (!atomic_inc_and_test(&page->_mapcount))
  			goto out;
d69b042f3   Balbir Singh   memcg: add file-b...
1179
  	}
00f3ca2c2   Johannes Weiner   mm: memcontrol: p...
1180
  	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1181
  out:
62cccb8c8   Johannes Weiner   mm: simplify lock...
1182
  	unlock_page_memcg(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183
  }
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1184
  static void page_remove_file_rmap(struct page *page, bool compound)
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1185
  {
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1186
  	int i, nr = 1;
57dea93ac   Steve Capper   rmap: fix compoun...
1187
  	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1188

53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1189
1190
1191
1192
  	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
  	if (unlikely(PageHuge(page))) {
  		/* hugetlb pages are always mapped with pmds */
  		atomic_dec(compound_mapcount_ptr(page));
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1193
  		return;
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1194
  	}
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1195

53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1196
  	/* page still mapped by someone else? */
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1197
  	if (compound && PageTransHuge(page)) {
5eaf35ab1   Matthew Wilcox (Oracle)   mm/rmap: fix assu...
1198
  		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1199
1200
1201
1202
  			if (atomic_add_negative(-1, &page[i]._mapcount))
  				nr++;
  		}
  		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1203
  			return;
99cb0dbd4   Song Liu   mm,thp: add read-...
1204
1205
1206
1207
  		if (PageSwapBacked(page))
  			__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
  		else
  			__dec_node_page_state(page, NR_FILE_PMDMAPPED);
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1208
1209
  	} else {
  		if (!atomic_add_negative(-1, &page->_mapcount))
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1210
  			return;
dd78fedde   Kirill A. Shutemov   rmap: support fil...
1211
  	}
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1212
1213
  
  	/*
00f3ca2c2   Johannes Weiner   mm: memcontrol: p...
1214
  	 * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1215
1216
1217
  	 * these counters are not modified in interrupt context, and
  	 * pte lock(a spinlock) is held, which implies preemption disabled.
  	 */
00f3ca2c2   Johannes Weiner   mm: memcontrol: p...
1218
  	__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1219
1220
1221
  
  	if (unlikely(PageMlocked(page)))
  		clear_page_mlock(page);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1222
  }
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
  static void page_remove_anon_compound_rmap(struct page *page)
  {
  	int i, nr;
  
  	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
  		return;
  
  	/* Hugepages are not counted in NR_ANON_PAGES for now. */
  	if (unlikely(PageHuge(page)))
  		return;
  
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
  		return;
468c39823   Johannes Weiner   mm: memcontrol: s...
1236
  	__dec_lruvec_page_state(page, NR_ANON_THPS);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1237
1238
1239
1240
  
  	if (TestClearPageDoubleMap(page)) {
  		/*
  		 * Subpages can be mapped with PTEs too. Check how many of
f1fe80d4a   Kirill A. Shutemov   mm, thp: do not q...
1241
  		 * them are still mapped.
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1242
  		 */
5eaf35ab1   Matthew Wilcox (Oracle)   mm/rmap: fix assu...
1243
  		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1244
1245
1246
  			if (atomic_add_negative(-1, &page[i]._mapcount))
  				nr++;
  		}
f1fe80d4a   Kirill A. Shutemov   mm, thp: do not q...
1247
1248
1249
1250
1251
1252
  
  		/*
  		 * Queue the page for deferred split if at least one small
  		 * page of the compound page is unmapped, but at least one
  		 * small page is still mapped.
  		 */
5eaf35ab1   Matthew Wilcox (Oracle)   mm/rmap: fix assu...
1253
  		if (nr && nr < thp_nr_pages(page))
f1fe80d4a   Kirill A. Shutemov   mm, thp: do not q...
1254
  			deferred_split_huge_page(page);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1255
  	} else {
5eaf35ab1   Matthew Wilcox (Oracle)   mm/rmap: fix assu...
1256
  		nr = thp_nr_pages(page);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1257
  	}
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
1258
1259
  	if (unlikely(PageMlocked(page)))
  		clear_page_mlock(page);
f1fe80d4a   Kirill A. Shutemov   mm, thp: do not q...
1260
  	if (nr)
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1261
  		__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1262
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
1264
  /**
   * page_remove_rmap - take down pte mapping from a page
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1265
1266
   * @page:	page to remove mapping from
   * @compound:	uncharge the page as compound or small page
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
1268
   * The caller needs to hold the pte lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
   */
d281ee614   Kirill A. Shutemov   rmap: add argumen...
1270
  void page_remove_rmap(struct page *page, bool compound)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1271
  {
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1272
  	lock_page_memcg(page);
89c06bd52   KAMEZAWA Hiroyuki   memcg: use new lo...
1273

be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1274
1275
1276
1277
1278
1279
1280
1281
1282
  	if (!PageAnon(page)) {
  		page_remove_file_rmap(page, compound);
  		goto out;
  	}
  
  	if (compound) {
  		page_remove_anon_compound_rmap(page);
  		goto out;
  	}
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1283

b904dcfed   KOSAKI Motohiro   mm: clean up page...
1284
1285
  	/* page still mapped by someone else? */
  	if (!atomic_add_negative(-1, &page->_mapcount))
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1286
  		goto out;
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1287

b904dcfed   KOSAKI Motohiro   mm: clean up page...
1288
  	/*
bea04b073   Jianyu Zhan   mm: use the light...
1289
1290
  	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
  	 * these counters are not modified in interrupt context, and
bea04b073   Jianyu Zhan   mm: use the light...
1291
  	 * pte lock(a spinlock) is held, which implies preemption disabled.
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1292
  	 */
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1293
  	__dec_lruvec_page_state(page, NR_ANON_MAPPED);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1294

e6c509f85   Hugh Dickins   mm: use clear_pag...
1295
1296
  	if (unlikely(PageMlocked(page)))
  		clear_page_mlock(page);
8186eb6a7   Johannes Weiner   mm: rmap: split o...
1297

9a982250f   Kirill A. Shutemov   thp: introduce de...
1298
1299
  	if (PageTransCompound(page))
  		deferred_split_huge_page(compound_head(page));
b904dcfed   KOSAKI Motohiro   mm: clean up page...
1300
1301
1302
1303
  	/*
  	 * It would be tidy to reset the PageAnon mapping here,
  	 * but that might overwrite a racing page_add_anon_rmap
  	 * which increments mapcount after us but sets mapping
2d4894b5d   Mel Gorman   mm: remove cold p...
1304
  	 * before us: so leave the reset to free_unref_page,
b904dcfed   KOSAKI Motohiro   mm: clean up page...
1305
1306
1307
1308
  	 * and remember that it's only reliable while mapped.
  	 * Leaving it set also helps swapoff to reinstate ptes
  	 * faster for those pages still in swapcache.
  	 */
be5d0a74c   Johannes Weiner   mm: memcontrol: s...
1309
1310
  out:
  	unlock_page_memcg(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
1312
1313
  }
  
  /*
526295064   Joonsoo Kim   mm/rmap: use rmap...
1314
   * @arg: enum ttu_flags will be passed to this argument
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1315
   */
e4b822227   Minchan Kim   mm: make rmap_one...
1316
  static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
526295064   Joonsoo Kim   mm/rmap: use rmap...
1317
  		     unsigned long address, void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1318
1319
  {
  	struct mm_struct *mm = vma->vm_mm;
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1320
1321
1322
1323
1324
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  		.address = address,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1325
  	pte_t pteval;
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1326
  	struct page *subpage;
785373b4c   Linus Torvalds   Revert "rmap: do ...
1327
  	bool ret = true;
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1328
  	struct mmu_notifier_range range;
4708f3188   Palmer Dabbelt   mm: prevent a war...
1329
  	enum ttu_flags flags = (enum ttu_flags)(long)arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1330

b87537d9e   Hugh Dickins   mm: rmap use pte ...
1331
1332
  	/* munlock has nothing to gain from examining un-locked vmas */
  	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
e4b822227   Minchan Kim   mm: make rmap_one...
1333
  		return true;
b87537d9e   Hugh Dickins   mm: rmap use pte ...
1334

a5430dda8   Jérôme Glisse   mm/migrate: suppo...
1335
1336
1337
  	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
  	    is_zone_device_page(page) && !is_device_private_page(page))
  		return true;
fec89c109   Kirill A. Shutemov   thp: rewrite free...
1338
1339
  	if (flags & TTU_SPLIT_HUGE_PMD) {
  		split_huge_pmd_address(vma, address,
b5ff8161e   Naoya Horiguchi   mm: thp: introduc...
1340
  				flags & TTU_SPLIT_FREEZE, page);
fec89c109   Kirill A. Shutemov   thp: rewrite free...
1341
  	}
369ea8242   Jérôme Glisse   mm/rmap: update t...
1342
  	/*
017b1660d   Mike Kravetz   mm: migration: fi...
1343
1344
1345
1346
1347
1348
  	 * For THP, we have to assume the worse case ie pmd for invalidation.
  	 * For hugetlb, it could be much worse if we need to do pud
  	 * invalidation in the case of pmd sharing.
  	 *
  	 * Note that the page can not be free in this function as call of
  	 * try_to_unmap() must hold a reference on the page.
369ea8242   Jérôme Glisse   mm/rmap: update t...
1349
  	 */
7269f9999   Jérôme Glisse   mm/mmu_notifier: ...
1350
  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
6f4f13e8d   Jérôme Glisse   mm/mmu_notifier: ...
1351
  				address,
a50b854e0   Matthew Wilcox (Oracle)   mm: introduce pag...
1352
  				min(vma->vm_end, address + page_size(page)));
017b1660d   Mike Kravetz   mm: migration: fi...
1353
1354
1355
1356
1357
  	if (PageHuge(page)) {
  		/*
  		 * If sharing is possible, start and end will be adjusted
  		 * accordingly.
  		 */
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1358
1359
  		adjust_range_if_pmd_sharing_possible(vma, &range.start,
  						     &range.end);
017b1660d   Mike Kravetz   mm: migration: fi...
1360
  	}
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1361
  	mmu_notifier_invalidate_range_start(&range);
369ea8242   Jérôme Glisse   mm/rmap: update t...
1362

c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1363
  	while (page_vma_mapped_walk(&pvmw)) {
616b83715   Zi Yan   mm: thp: enable t...
1364
1365
1366
1367
  #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
  		/* PMD-mapped THP migration entry */
  		if (!pvmw.pte && (flags & TTU_MIGRATION)) {
  			VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
616b83715   Zi Yan   mm: thp: enable t...
1368
1369
1370
1371
  			set_pmd_migration_entry(&pvmw, page);
  			continue;
  		}
  #endif
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
  		/*
  		 * If the page is mlock()d, we cannot swap it out.
  		 * If it's recently referenced (perhaps page_referenced
  		 * skipped over this mm) then we should reactivate it.
  		 */
  		if (!(flags & TTU_IGNORE_MLOCK)) {
  			if (vma->vm_flags & VM_LOCKED) {
  				/* PTE-mapped THP are never mlocked */
  				if (!PageTransCompound(page)) {
  					/*
  					 * Holding pte lock, we do *not* need
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
1383
  					 * mmap_lock here
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1384
1385
1386
  					 */
  					mlock_vma_page(page);
  				}
e4b822227   Minchan Kim   mm: make rmap_one...
1387
  				ret = false;
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1388
1389
  				page_vma_mapped_walk_done(&pvmw);
  				break;
9a73f61bd   Kirill A. Shutemov   thp, mlock: do no...
1390
  			}
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1391
1392
  			if (flags & TTU_MUNLOCK)
  				continue;
b87537d9e   Hugh Dickins   mm: rmap use pte ...
1393
  		}
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1394

8346242a7   Kirill A. Shutemov   rmap: fix NULL-po...
1395
1396
1397
1398
  		/* Unexpected PMD-mapped THP? */
  		VM_BUG_ON_PAGE(!pvmw.pte, page);
  
  		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
785373b4c   Linus Torvalds   Revert "rmap: do ...
1399
  		address = pvmw.address;
336bf30eb   Mike Kravetz   hugetlbfs: fix an...
1400
  		if (PageHuge(page) && !PageAnon(page)) {
c0d0381ad   Mike Kravetz   hugetlbfs: use i_...
1401
1402
1403
1404
1405
1406
  			/*
  			 * To call huge_pmd_unshare, i_mmap_rwsem must be
  			 * held in write mode.  Caller needs to explicitly
  			 * do this outside rmap routines.
  			 */
  			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
34ae204f1   Mike Kravetz   hugetlbfs: remove...
1407
  			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
017b1660d   Mike Kravetz   mm: migration: fi...
1408
1409
1410
1411
1412
1413
1414
  				/*
  				 * huge_pmd_unshare unmapped an entire PMD
  				 * page.  There is no way of knowing exactly
  				 * which PMDs may be cached for this mm, so
  				 * we must flush them all.  start/end were
  				 * already adjusted above to cover this range.
  				 */
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1415
1416
1417
1418
  				flush_cache_range(vma, range.start, range.end);
  				flush_tlb_range(vma, range.start, range.end);
  				mmu_notifier_invalidate_range(mm, range.start,
  							      range.end);
017b1660d   Mike Kravetz   mm: migration: fi...
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
  
  				/*
  				 * The ref count of the PMD page was dropped
  				 * which is part of the way map counting
  				 * is done for shared PMDs.  Return 'true'
  				 * here.  When there is no other sharing,
  				 * huge_pmd_unshare returns false and we will
  				 * unmap the actual page and drop map count
  				 * to zero.
  				 */
  				page_vma_mapped_walk_done(&pvmw);
  				break;
  			}
  		}
8346242a7   Kirill A. Shutemov   rmap: fix NULL-po...
1433

a5430dda8   Jérôme Glisse   mm/migrate: suppo...
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
  		if (IS_ENABLED(CONFIG_MIGRATION) &&
  		    (flags & TTU_MIGRATION) &&
  		    is_zone_device_page(page)) {
  			swp_entry_t entry;
  			pte_t swp_pte;
  
  			pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
  
  			/*
  			 * Store the pfn of the page in a special migration
  			 * pte. do_swap_page() will wait until the migration
  			 * pte is removed and then restart fault handling.
  			 */
  			entry = make_migration_entry(page, 0);
  			swp_pte = swp_entry_to_pte(entry);
ad7df764b   Alistair Popple   mm/rmap: fixup co...
1449
1450
1451
1452
1453
1454
  
  			/*
  			 * pteval maps a zone device page and is therefore
  			 * a swap pte.
  			 */
  			if (pte_swp_soft_dirty(pteval))
a5430dda8   Jérôme Glisse   mm/migrate: suppo...
1455
  				swp_pte = pte_swp_mksoft_dirty(swp_pte);
ad7df764b   Alistair Popple   mm/rmap: fixup co...
1456
  			if (pte_swp_uffd_wp(pteval))
f45ec5ff1   Peter Xu   userfaultfd: wp: ...
1457
  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
a5430dda8   Jérôme Glisse   mm/migrate: suppo...
1458
  			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1459
1460
1461
  			/*
  			 * No need to invalidate here it will synchronize on
  			 * against the special swap migration pte.
1de13ee59   Ralph Campbell   mm/hmm: fix bad s...
1462
1463
1464
1465
1466
1467
1468
  			 *
  			 * The assignment to subpage above was computed from a
  			 * swap PTE which results in an invalid pointer.
  			 * Since only PAGE_SIZE pages can currently be
  			 * migrated, just set it to page. This will need to be
  			 * changed when hugepage migrations to device private
  			 * memory are supported.
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1469
  			 */
1de13ee59   Ralph Campbell   mm/hmm: fix bad s...
1470
  			subpage = page;
a5430dda8   Jérôme Glisse   mm/migrate: suppo...
1471
1472
  			goto discard;
  		}
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1473
  		/* Nuke the page table entry. */
785373b4c   Linus Torvalds   Revert "rmap: do ...
1474
  		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1475
1476
1477
1478
1479
1480
1481
1482
1483
  		if (should_defer_flush(mm, flags)) {
  			/*
  			 * We clear the PTE but do not flush so potentially
  			 * a remote CPU could still be writing to the page.
  			 * If the entry was previously clean then the
  			 * architecture must guarantee that a clear->dirty
  			 * transition on a cached TLB entry is written through
  			 * and traps if the PTE is unmapped.
  			 */
785373b4c   Linus Torvalds   Revert "rmap: do ...
1484
  			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1485
1486
1487
  
  			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
  		} else {
785373b4c   Linus Torvalds   Revert "rmap: do ...
1488
  			pteval = ptep_clear_flush(vma, address, pvmw.pte);
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1489
  		}
72b252aed   Mel Gorman   mm: send one IPI ...
1490

c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1491
1492
1493
  		/* Move the dirty bit to the page. Now the pte is gone. */
  		if (pte_dirty(pteval))
  			set_page_dirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1494

c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1495
1496
  		/* Update high watermark before we lower rss */
  		update_hiwater_rss(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1497

c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1498
  		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
5fd27b8e7   Punit Agrawal   mm: rmap: use cor...
1499
  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1500
  			if (PageHuge(page)) {
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
1501
  				hugetlb_count_sub(compound_nr(page), mm);
785373b4c   Linus Torvalds   Revert "rmap: do ...
1502
  				set_huge_swap_pte_at(mm, address,
5fd27b8e7   Punit Agrawal   mm: rmap: use cor...
1503
1504
  						     pvmw.pte, pteval,
  						     vma_mmu_pagesize(vma));
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1505
1506
  			} else {
  				dec_mm_counter(mm, mm_counter(page));
785373b4c   Linus Torvalds   Revert "rmap: do ...
1507
  				set_pte_at(mm, address, pvmw.pte, pteval);
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1508
  			}
365e9c87a   Hugh Dickins   [PATCH] mm: updat...
1509

bce73e484   Christian Borntraeger   mm: do not drop u...
1510
  		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1511
1512
1513
1514
  			/*
  			 * The guest indicated that the page content is of no
  			 * interest anymore. Simply discard the pte, vmscan
  			 * will take care of the rest.
bce73e484   Christian Borntraeger   mm: do not drop u...
1515
1516
1517
1518
1519
  			 * A future reference will then fault in a new zero
  			 * page. When userfaultfd is active, we must not drop
  			 * this page though, as its main user (postcopy
  			 * migration) will not expect userfaults on already
  			 * copied pages.
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1520
  			 */
eca56ff90   Jerome Marchand   mm, shmem: add in...
1521
  			dec_mm_counter(mm, mm_counter(page));
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1522
1523
1524
  			/* We have to invalidate as we cleared the pte */
  			mmu_notifier_invalidate_range(mm, address,
  						      address + PAGE_SIZE);
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1525
  		} else if (IS_ENABLED(CONFIG_MIGRATION) &&
b5ff8161e   Naoya Horiguchi   mm: thp: introduc...
1526
  				(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1527
1528
  			swp_entry_t entry;
  			pte_t swp_pte;
ca827d55e   Khalid Aziz   mm, swap: Add inf...
1529
1530
1531
1532
1533
1534
1535
  
  			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
  				set_pte_at(mm, address, pvmw.pte, pteval);
  				ret = false;
  				page_vma_mapped_walk_done(&pvmw);
  				break;
  			}
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
  			/*
  			 * Store the pfn of the page in a special migration
  			 * pte. do_swap_page() will wait until the migration
  			 * pte is removed and then restart fault handling.
  			 */
  			entry = make_migration_entry(subpage,
  					pte_write(pteval));
  			swp_pte = swp_entry_to_pte(entry);
  			if (pte_soft_dirty(pteval))
  				swp_pte = pte_swp_mksoft_dirty(swp_pte);
f45ec5ff1   Peter Xu   userfaultfd: wp: ...
1546
1547
  			if (pte_uffd_wp(pteval))
  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
785373b4c   Linus Torvalds   Revert "rmap: do ...
1548
  			set_pte_at(mm, address, pvmw.pte, swp_pte);
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1549
1550
1551
1552
  			/*
  			 * No need to invalidate here it will synchronize on
  			 * against the special swap migration pte.
  			 */
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1553
1554
1555
1556
1557
1558
1559
  		} else if (PageAnon(page)) {
  			swp_entry_t entry = { .val = page_private(subpage) };
  			pte_t swp_pte;
  			/*
  			 * Store the swap location in the pte.
  			 * See handle_pte_fault() ...
  			 */
eb94a8784   Minchan Kim   mm: fix lazyfree ...
1560
1561
  			if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
  				WARN_ON_ONCE(1);
83612a948   Minchan Kim   mm: remove SWAP_[...
1562
  				ret = false;
369ea8242   Jérôme Glisse   mm/rmap: update t...
1563
  				/* We have to invalidate as we cleared the pte */
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1564
1565
  				mmu_notifier_invalidate_range(mm, address,
  							address + PAGE_SIZE);
eb94a8784   Minchan Kim   mm: fix lazyfree ...
1566
1567
1568
  				page_vma_mapped_walk_done(&pvmw);
  				break;
  			}
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1569

802a3a92a   Shaohua Li   mm: reclaim MADV_...
1570
1571
1572
  			/* MADV_FREE page check */
  			if (!PageSwapBacked(page)) {
  				if (!PageDirty(page)) {
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1573
1574
1575
  					/* Invalidate as we cleared the pte */
  					mmu_notifier_invalidate_range(mm,
  						address, address + PAGE_SIZE);
802a3a92a   Shaohua Li   mm: reclaim MADV_...
1576
1577
1578
1579
1580
1581
1582
1583
  					dec_mm_counter(mm, MM_ANONPAGES);
  					goto discard;
  				}
  
  				/*
  				 * If the page was redirtied, it cannot be
  				 * discarded. Remap the page to page table.
  				 */
785373b4c   Linus Torvalds   Revert "rmap: do ...
1584
  				set_pte_at(mm, address, pvmw.pte, pteval);
18863d3a3   Minchan Kim   mm: remove SWAP_D...
1585
  				SetPageSwapBacked(page);
e4b822227   Minchan Kim   mm: make rmap_one...
1586
  				ret = false;
802a3a92a   Shaohua Li   mm: reclaim MADV_...
1587
1588
  				page_vma_mapped_walk_done(&pvmw);
  				break;
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1589
  			}
854e9ed09   Minchan Kim   mm: support madvi...
1590

c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1591
  			if (swap_duplicate(entry) < 0) {
785373b4c   Linus Torvalds   Revert "rmap: do ...
1592
  				set_pte_at(mm, address, pvmw.pte, pteval);
e4b822227   Minchan Kim   mm: make rmap_one...
1593
  				ret = false;
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1594
1595
1596
  				page_vma_mapped_walk_done(&pvmw);
  				break;
  			}
ca827d55e   Khalid Aziz   mm, swap: Add inf...
1597
1598
1599
1600
1601
1602
  			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
  				set_pte_at(mm, address, pvmw.pte, pteval);
  				ret = false;
  				page_vma_mapped_walk_done(&pvmw);
  				break;
  			}
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1603
1604
1605
1606
1607
1608
  			if (list_empty(&mm->mmlist)) {
  				spin_lock(&mmlist_lock);
  				if (list_empty(&mm->mmlist))
  					list_add(&mm->mmlist, &init_mm.mmlist);
  				spin_unlock(&mmlist_lock);
  			}
854e9ed09   Minchan Kim   mm: support madvi...
1609
  			dec_mm_counter(mm, MM_ANONPAGES);
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1610
1611
1612
1613
  			inc_mm_counter(mm, MM_SWAPENTS);
  			swp_pte = swp_entry_to_pte(entry);
  			if (pte_soft_dirty(pteval))
  				swp_pte = pte_swp_mksoft_dirty(swp_pte);
f45ec5ff1   Peter Xu   userfaultfd: wp: ...
1614
1615
  			if (pte_uffd_wp(pteval))
  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
785373b4c   Linus Torvalds   Revert "rmap: do ...
1616
  			set_pte_at(mm, address, pvmw.pte, swp_pte);
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1617
1618
1619
1620
1621
  			/* Invalidate as we cleared the pte */
  			mmu_notifier_invalidate_range(mm, address,
  						      address + PAGE_SIZE);
  		} else {
  			/*
906f9cdfc   Hugh Dickins   mm/huge_memory: r...
1622
1623
1624
  			 * This is a locked file-backed page, thus it cannot
  			 * be removed from the page cache and replaced by a new
  			 * page before mmu_notifier_invalidate_range_end, so no
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1625
1626
1627
1628
  			 * concurrent thread might update its page table to
  			 * point at new page while a device still is using this
  			 * page.
  			 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
1629
  			 * See Documentation/vm/mmu_notifier.rst
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1630
  			 */
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1631
  			dec_mm_counter(mm, mm_counter_file(page));
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1632
  		}
854e9ed09   Minchan Kim   mm: support madvi...
1633
  discard:
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1634
1635
1636
1637
1638
  		/*
  		 * No need to call mmu_notifier_invalidate_range() it has be
  		 * done above for all cases requiring it to happen under page
  		 * table lock before mmu_notifier_invalidate_range_end()
  		 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
1639
  		 * See Documentation/vm/mmu_notifier.rst
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1640
  		 */
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1641
1642
  		page_remove_rmap(subpage, PageHuge(page));
  		put_page(page);
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1643
  	}
369ea8242   Jérôme Glisse   mm/rmap: update t...
1644

ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1645
  	mmu_notifier_invalidate_range_end(&range);
369ea8242   Jérôme Glisse   mm/rmap: update t...
1646

caed0f486   KOSAKI Motohiro   mm: simplify try_...
1647
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1648
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1649
1650
  static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
  {
222100eed   Anshuman Khandual   mm/vma: make is_v...
1651
  	return vma_is_temporary_stack(vma);
526295064   Joonsoo Kim   mm/rmap: use rmap...
1652
  }
2a52bcbcc   Kirill A. Shutemov   rmap: extend try_...
1653
  static int page_mapcount_is_zero(struct page *page)
526295064   Joonsoo Kim   mm/rmap: use rmap...
1654
  {
c7ab0d2fd   Kirill A. Shutemov   mm: convert try_t...
1655
  	return !total_mapcount(page);
2a52bcbcc   Kirill A. Shutemov   rmap: extend try_...
1656
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1657

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1658
1659
1660
  /**
   * try_to_unmap - try to remove all page table mappings to a page
   * @page: the page to get unmapped
14fa31b89   Andi Kleen   HWPOISON: Use bit...
1661
   * @flags: action and flags
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1662
1663
1664
   *
   * Tries to remove all the page table entries which are mapping this
   * page, used in the pageout path.  Caller must hold the page lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1665
   *
666e5a406   Minchan Kim   mm: make ttu's re...
1666
   * If unmap is successful, return true. Otherwise, false.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1667
   */
666e5a406   Minchan Kim   mm: make ttu's re...
1668
  bool try_to_unmap(struct page *page, enum ttu_flags flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1669
  {
526295064   Joonsoo Kim   mm/rmap: use rmap...
1670
1671
  	struct rmap_walk_control rwc = {
  		.rmap_one = try_to_unmap_one,
802a3a92a   Shaohua Li   mm: reclaim MADV_...
1672
  		.arg = (void *)flags,
2a52bcbcc   Kirill A. Shutemov   rmap: extend try_...
1673
  		.done = page_mapcount_is_zero,
526295064   Joonsoo Kim   mm/rmap: use rmap...
1674
1675
  		.anon_lock = page_lock_anon_vma_read,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1676

526295064   Joonsoo Kim   mm/rmap: use rmap...
1677
1678
1679
1680
1681
1682
1683
1684
  	/*
  	 * During exec, a temporary VMA is setup and later moved.
  	 * The VMA is moved under the anon_vma lock but not the
  	 * page tables leading to a race where migration cannot
  	 * find the migration ptes. Rather than increasing the
  	 * locking requirements of exec(), migration skips
  	 * temporary VMAs until after exec() completes.
  	 */
b5ff8161e   Naoya Horiguchi   mm: thp: introduc...
1685
1686
  	if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
  	    && !PageKsm(page) && PageAnon(page))
526295064   Joonsoo Kim   mm/rmap: use rmap...
1687
  		rwc.invalid_vma = invalid_migration_vma;
2a52bcbcc   Kirill A. Shutemov   rmap: extend try_...
1688
  	if (flags & TTU_RMAP_LOCKED)
33fc80e25   Minchan Kim   mm: remove SWAP_A...
1689
  		rmap_walk_locked(page, &rwc);
2a52bcbcc   Kirill A. Shutemov   rmap: extend try_...
1690
  	else
33fc80e25   Minchan Kim   mm: remove SWAP_A...
1691
  		rmap_walk(page, &rwc);
526295064   Joonsoo Kim   mm/rmap: use rmap...
1692

666e5a406   Minchan Kim   mm: make ttu's re...
1693
  	return !page_mapcount(page) ? true : false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1694
  }
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
1695

2a52bcbcc   Kirill A. Shutemov   rmap: extend try_...
1696
1697
1698
1699
  static int page_not_mapped(struct page *page)
  {
  	return !page_mapped(page);
  };
b291f0003   Nick Piggin   mlock: mlocked pa...
1700
1701
1702
1703
1704
1705
1706
  /**
   * try_to_munlock - try to munlock a page
   * @page: the page to be munlocked
   *
   * Called from munlock code.  Checks all of the VMAs mapping the page
   * to make sure nobody else has this page mlocked. The page will be
   * returned with PG_mlocked cleared if no other vmas have it mlocked.
b291f0003   Nick Piggin   mlock: mlocked pa...
1707
   */
854e9ed09   Minchan Kim   mm: support madvi...
1708

192d72325   Minchan Kim   mm: make try_to_m...
1709
1710
  void try_to_munlock(struct page *page)
  {
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1711
1712
  	struct rmap_walk_control rwc = {
  		.rmap_one = try_to_unmap_one,
802a3a92a   Shaohua Li   mm: reclaim MADV_...
1713
  		.arg = (void *)TTU_MUNLOCK,
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1714
  		.done = page_not_mapped,
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1715
1716
1717
  		.anon_lock = page_lock_anon_vma_read,
  
  	};
309381fea   Sasha Levin   mm: dump page whe...
1718
  	VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
192d72325   Minchan Kim   mm: make try_to_m...
1719
  	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
b291f0003   Nick Piggin   mlock: mlocked pa...
1720

192d72325   Minchan Kim   mm: make try_to_m...
1721
  	rmap_walk(page, &rwc);
b291f0003   Nick Piggin   mlock: mlocked pa...
1722
  }
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1723

01d8b20de   Peter Zijlstra   mm: simplify anon...
1724
  void __put_anon_vma(struct anon_vma *anon_vma)
76545066c   Rik van Riel   mm: extend KSM re...
1725
  {
01d8b20de   Peter Zijlstra   mm: simplify anon...
1726
  	struct anon_vma *root = anon_vma->root;
76545066c   Rik van Riel   mm: extend KSM re...
1727

624483f3e   Andrey Ryabinin   mm: rmap: fix use...
1728
  	anon_vma_free(anon_vma);
01d8b20de   Peter Zijlstra   mm: simplify anon...
1729
1730
  	if (root != anon_vma && atomic_dec_and_test(&root->refcount))
  		anon_vma_free(root);
76545066c   Rik van Riel   mm: extend KSM re...
1731
  }
76545066c   Rik van Riel   mm: extend KSM re...
1732

0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1733
1734
  static struct anon_vma *rmap_walk_anon_lock(struct page *page,
  					struct rmap_walk_control *rwc)
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1735
1736
  {
  	struct anon_vma *anon_vma;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1737
1738
  	if (rwc->anon_lock)
  		return rwc->anon_lock(page);
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1739
1740
1741
  	/*
  	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
  	 * because that depends on page_mapped(); but not all its usages
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
1742
  	 * are holding mmap_lock. Users without mmap_lock are required to
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1743
1744
1745
1746
1747
1748
1749
1750
1751
  	 * take a reference count to prevent the anon_vma disappearing
  	 */
  	anon_vma = page_anon_vma(page);
  	if (!anon_vma)
  		return NULL;
  
  	anon_vma_lock_read(anon_vma);
  	return anon_vma;
  }
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1752
  /*
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1753
1754
1755
1756
1757
1758
1759
1760
   * rmap_walk_anon - do something to anonymous page using the object-based
   * rmap method
   * @page: the page to be handled
   * @rwc: control variable according to each walk type
   *
   * Find all the mappings of a page using the mapping pointer and the vma chains
   * contained in the anon_vma struct it points to.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
1761
   * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1762
1763
1764
   * where the page was found will be held for write.  So, we won't recheck
   * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
   * LOCKED.
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1765
   */
1df631ae1   Minchan Kim   mm: make rmap_wal...
1766
  static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
b97731992   Kirill A. Shutemov   rmap: introduce r...
1767
  		bool locked)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1768
1769
  {
  	struct anon_vma *anon_vma;
a8fa41ad2   Kirill A. Shutemov   mm, rmap: check a...
1770
  	pgoff_t pgoff_start, pgoff_end;
5beb49305   Rik van Riel   mm: change anon_v...
1771
  	struct anon_vma_chain *avc;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1772

b97731992   Kirill A. Shutemov   rmap: introduce r...
1773
1774
1775
1776
1777
1778
1779
  	if (locked) {
  		anon_vma = page_anon_vma(page);
  		/* anon_vma disappear under us? */
  		VM_BUG_ON_PAGE(!anon_vma, page);
  	} else {
  		anon_vma = rmap_walk_anon_lock(page, rwc);
  	}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1780
  	if (!anon_vma)
1df631ae1   Minchan Kim   mm: make rmap_wal...
1781
  		return;
faecd8dd8   Joonsoo Kim   mm/rmap: factor l...
1782

a8fa41ad2   Kirill A. Shutemov   mm, rmap: check a...
1783
  	pgoff_start = page_to_pgoff(page);
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
1784
  	pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
a8fa41ad2   Kirill A. Shutemov   mm, rmap: check a...
1785
1786
  	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
  			pgoff_start, pgoff_end) {
5beb49305   Rik van Riel   mm: change anon_v...
1787
  		struct vm_area_struct *vma = avc->vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1788
  		unsigned long address = vma_address(page, vma);
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1789

ad12695f1   Andrea Arcangeli   ksm: add cond_res...
1790
  		cond_resched();
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1791
1792
  		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  			continue;
e4b822227   Minchan Kim   mm: make rmap_one...
1793
  		if (!rwc->rmap_one(page, vma, address, rwc->arg))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1794
  			break;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1795
1796
  		if (rwc->done && rwc->done(page))
  			break;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1797
  	}
b97731992   Kirill A. Shutemov   rmap: introduce r...
1798
1799
1800
  
  	if (!locked)
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1801
  }
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1802
1803
1804
1805
1806
1807
1808
1809
  /*
   * rmap_walk_file - do something to file page using the object-based rmap method
   * @page: the page to be handled
   * @rwc: control variable according to each walk type
   *
   * Find all the mappings of a page using the mapping pointer and the vma chains
   * contained in the address_space struct it points to.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
1810
   * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
e8351ac9b   Joonsoo Kim   mm/rmap: use rmap...
1811
1812
1813
1814
   * where the page was found will be held for write.  So, we won't recheck
   * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
   * LOCKED.
   */
1df631ae1   Minchan Kim   mm: make rmap_wal...
1815
  static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
b97731992   Kirill A. Shutemov   rmap: introduce r...
1816
  		bool locked)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1817
  {
b97731992   Kirill A. Shutemov   rmap: introduce r...
1818
  	struct address_space *mapping = page_mapping(page);
a8fa41ad2   Kirill A. Shutemov   mm, rmap: check a...
1819
  	pgoff_t pgoff_start, pgoff_end;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1820
  	struct vm_area_struct *vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1821

9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1822
1823
1824
1825
  	/*
  	 * The page lock not only makes sure that page->mapping cannot
  	 * suddenly be NULLified by truncation, it makes sure that the
  	 * structure at mapping cannot be freed and reused yet,
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
1826
  	 * so we can safely take mapping->i_mmap_rwsem.
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1827
  	 */
81d1b09c6   Sasha Levin   mm: convert a few...
1828
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1829

e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1830
  	if (!mapping)
1df631ae1   Minchan Kim   mm: make rmap_wal...
1831
  		return;
3dec0ba0b   Davidlohr Bueso   mm/rmap: share th...
1832

a8fa41ad2   Kirill A. Shutemov   mm, rmap: check a...
1833
  	pgoff_start = page_to_pgoff(page);
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
1834
  	pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
b97731992   Kirill A. Shutemov   rmap: introduce r...
1835
1836
  	if (!locked)
  		i_mmap_lock_read(mapping);
a8fa41ad2   Kirill A. Shutemov   mm, rmap: check a...
1837
1838
  	vma_interval_tree_foreach(vma, &mapping->i_mmap,
  			pgoff_start, pgoff_end) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1839
  		unsigned long address = vma_address(page, vma);
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1840

ad12695f1   Andrea Arcangeli   ksm: add cond_res...
1841
  		cond_resched();
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1842
1843
  		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  			continue;
e4b822227   Minchan Kim   mm: make rmap_one...
1844
  		if (!rwc->rmap_one(page, vma, address, rwc->arg))
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1845
1846
1847
  			goto done;
  		if (rwc->done && rwc->done(page))
  			goto done;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1848
  	}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1849

0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1850
  done:
b97731992   Kirill A. Shutemov   rmap: introduce r...
1851
1852
  	if (!locked)
  		i_mmap_unlock_read(mapping);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1853
  }
1df631ae1   Minchan Kim   mm: make rmap_wal...
1854
  void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1855
  {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1856
  	if (unlikely(PageKsm(page)))
1df631ae1   Minchan Kim   mm: make rmap_wal...
1857
  		rmap_walk_ksm(page, rwc);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1858
  	else if (PageAnon(page))
1df631ae1   Minchan Kim   mm: make rmap_wal...
1859
  		rmap_walk_anon(page, rwc, false);
b97731992   Kirill A. Shutemov   rmap: introduce r...
1860
  	else
1df631ae1   Minchan Kim   mm: make rmap_wal...
1861
  		rmap_walk_file(page, rwc, false);
b97731992   Kirill A. Shutemov   rmap: introduce r...
1862
1863
1864
  }
  
  /* Like rmap_walk, but caller holds relevant rmap lock */
1df631ae1   Minchan Kim   mm: make rmap_wal...
1865
  void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
b97731992   Kirill A. Shutemov   rmap: introduce r...
1866
1867
1868
1869
  {
  	/* no ksm support for now */
  	VM_BUG_ON_PAGE(PageKsm(page), page);
  	if (PageAnon(page))
1df631ae1   Minchan Kim   mm: make rmap_wal...
1870
  		rmap_walk_anon(page, rwc, true);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1871
  	else
1df631ae1   Minchan Kim   mm: make rmap_wal...
1872
  		rmap_walk_file(page, rwc, true);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1873
  }
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1874

e3390f67a   Naoya Horiguchi   hwpoison: rename ...
1875
  #ifdef CONFIG_HUGETLB_PAGE
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1876
  /*
451b9514a   Kirill Tkhai   mm: remove __huge...
1877
   * The following two functions are for anonymous (private mapped) hugepages.
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1878
1879
1880
   * Unlike common anonymous pages, anonymous hugepages have no accounting code
   * and no lru code, because we handle hugepages differently from common pages.
   */
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1881
1882
1883
1884
1885
  void hugepage_add_anon_rmap(struct page *page,
  			    struct vm_area_struct *vma, unsigned long address)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
  	int first;
a850ea303   Naoya Horiguchi   hugetlb, rmap: ad...
1886
1887
  
  	BUG_ON(!PageLocked(page));
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1888
  	BUG_ON(!anon_vma);
5dbe0af47   Hugh Dickins   mm: fix kernel BU...
1889
  	/* address might be in next vma when migration races vma_adjust */
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1890
  	first = atomic_inc_and_test(compound_mapcount_ptr(page));
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1891
  	if (first)
451b9514a   Kirill Tkhai   mm: remove __huge...
1892
  		__page_set_anon_rmap(page, vma, address, 0);
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1893
1894
1895
1896
1897
1898
  }
  
  void hugepage_add_new_anon_rmap(struct page *page,
  			struct vm_area_struct *vma, unsigned long address)
  {
  	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
53f9263ba   Kirill A. Shutemov   mm: rework mapcou...
1899
  	atomic_set(compound_mapcount_ptr(page), 0);
47e29d32a   John Hubbard   mm/gup: page->hpa...
1900
1901
  	if (hpage_pincount_available(page))
  		atomic_set(compound_pincount_ptr(page), 0);
451b9514a   Kirill Tkhai   mm: remove __huge...
1902
  	__page_set_anon_rmap(page, vma, address, 1);
0fe6e20b9   Naoya Horiguchi   hugetlb, rmap: ad...
1903
  }
e3390f67a   Naoya Horiguchi   hwpoison: rename ...
1904
  #endif /* CONFIG_HUGETLB_PAGE */