Blame view

mm/rmap.c 27.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
  /*
   * mm/rmap.c - physical to virtual reverse mappings
   *
   * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
   * Released under the General Public License (GPL).
   *
   * Simple, low overhead reverse mapping scheme.
   * Please try to keep this thing as modular as possible.
   *
   * Provides methods for unmapping each kind of mapped page:
   * the anon methods track anonymous pages, and
   * the file methods track pages belonging to an inode.
   *
   * Original design by Rik van Riel <riel@conectiva.com.br> 2001
   * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
   * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
   * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004
   */
  
  /*
   * Lock ordering in mm:
   *
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
23
   * inode->i_mutex	(while writing or truncating, not reading or faulting)
82591e6ea   Nick Piggin   [PATCH] mm: more ...
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
   *   inode->i_alloc_sem (vmtruncate_range)
   *   mm->mmap_sem
   *     page->flags PG_locked (lock_page)
   *       mapping->i_mmap_lock
   *         anon_vma->lock
   *           mm->page_table_lock or pte_lock
   *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
   *             swap_lock (in swap_duplicate, swap_info_get)
   *               mmlist_lock (in mmput, drain_mmlist and others)
   *               mapping->private_lock (in __set_page_dirty_buffers)
   *               inode_lock (in set_page_dirty's __mark_inode_dirty)
   *                 sb_lock (within inode_lock in fs/fs-writeback.c)
   *                 mapping->tree_lock (widely used, in set_page_dirty,
   *                           in arch-dependent flush_dcache_mmap_lock,
   *                           within inode_lock in __sync_single_inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39
40
41
42
43
44
45
46
47
48
   */
  
  #include <linux/mm.h>
  #include <linux/pagemap.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/rmap.h>
  #include <linux/rcupdate.h>
a48d07afd   Christoph Lameter   [PATCH] Direct Mi...
49
  #include <linux/module.h>
7de6b8057   Nick Piggin   [PATCH] mm: more ...
50
  #include <linux/kallsyms.h>
8a9f3ccd2   Balbir Singh   Memory controller...
51
  #include <linux/memcontrol.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
52
  #include <linux/mmu_notifier.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
  
  #include <asm/tlbflush.h>
fcc234f88   Pekka Enberg   [PATCH] mm: kill ...
55
  struct kmem_cache *anon_vma_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
  /* This must be called under the mmap_sem. */
  int anon_vma_prepare(struct vm_area_struct *vma)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
  
  	might_sleep();
  	if (unlikely(!anon_vma)) {
  		struct mm_struct *mm = vma->vm_mm;
  		struct anon_vma *allocated, *locked;
  
  		anon_vma = find_mergeable_anon_vma(vma);
  		if (anon_vma) {
  			allocated = NULL;
  			locked = anon_vma;
  			spin_lock(&locked->lock);
  		} else {
  			anon_vma = anon_vma_alloc();
  			if (unlikely(!anon_vma))
  				return -ENOMEM;
  			allocated = anon_vma;
  			locked = NULL;
  		}
  
  		/* page_table_lock to protect against threads */
  		spin_lock(&mm->page_table_lock);
  		if (likely(!vma->anon_vma)) {
  			vma->anon_vma = anon_vma;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
84
  			list_add_tail(&vma->anon_vma_node, &anon_vma->head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  			allocated = NULL;
  		}
  		spin_unlock(&mm->page_table_lock);
  
  		if (locked)
  			spin_unlock(&locked->lock);
  		if (unlikely(allocated))
  			anon_vma_free(allocated);
  	}
  	return 0;
  }
  
  void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
  {
  	BUG_ON(vma->anon_vma != next->anon_vma);
  	list_del(&next->anon_vma_node);
  }
  
  void __anon_vma_link(struct vm_area_struct *vma)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
30acbabae   Hugh Dickins   mm: kill validate...
106
  	if (anon_vma)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
107
  		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
110
111
112
113
114
115
  }
  
  void anon_vma_link(struct vm_area_struct *vma)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
  
  	if (anon_vma) {
  		spin_lock(&anon_vma->lock);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
116
  		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
120
121
122
123
124
125
126
127
128
129
  		spin_unlock(&anon_vma->lock);
  	}
  }
  
  void anon_vma_unlink(struct vm_area_struct *vma)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
  	int empty;
  
  	if (!anon_vma)
  		return;
  
  	spin_lock(&anon_vma->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
134
135
136
137
138
  	list_del(&vma->anon_vma_node);
  
  	/* We must garbage collect the anon_vma if it's empty */
  	empty = list_empty(&anon_vma->head);
  	spin_unlock(&anon_vma->lock);
  
  	if (empty)
  		anon_vma_free(anon_vma);
  }
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
139
  static void anon_vma_ctor(void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
  {
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
141
  	struct anon_vma *anon_vma = data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142

a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
143
144
  	spin_lock_init(&anon_vma->lock);
  	INIT_LIST_HEAD(&anon_vma->head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
146
147
148
149
  }
  
  void __init anon_vma_init(void)
  {
  	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
20c2df83d   Paul Mundt   mm: Remove slab d...
150
  			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
153
154
155
156
157
158
  }
  
  /*
   * Getting a lock on a stable anon_vma from a page off the LRU is
   * tricky: page_lock_anon_vma rely on RCU to guard against the races.
   */
  static struct anon_vma *page_lock_anon_vma(struct page *page)
  {
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
159
  	struct anon_vma *anon_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
161
162
163
164
165
166
167
168
169
170
  	unsigned long anon_mapping;
  
  	rcu_read_lock();
  	anon_mapping = (unsigned long) page->mapping;
  	if (!(anon_mapping & PAGE_MAPPING_ANON))
  		goto out;
  	if (!page_mapped(page))
  		goto out;
  
  	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
  	spin_lock(&anon_vma->lock);
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
171
  	return anon_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
  out:
  	rcu_read_unlock();
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
174
175
176
177
178
179
180
  	return NULL;
  }
  
  static void page_unlock_anon_vma(struct anon_vma *anon_vma)
  {
  	spin_unlock(&anon_vma->lock);
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
182
183
  }
  
  /*
3ad33b243   Lee Schermerhorn   Migration: find c...
184
185
186
   * At what user virtual address is page expected in @vma?
   * Returns virtual address or -EFAULT if page's index/offset is not
   * within the range mapped the @vma.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
190
191
192
193
194
195
   */
  static inline unsigned long
  vma_address(struct page *page, struct vm_area_struct *vma)
  {
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	unsigned long address;
  
  	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
3ad33b243   Lee Schermerhorn   Migration: find c...
196
  		/* page should be within @vma mapping range */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
200
201
202
203
  		return -EFAULT;
  	}
  	return address;
  }
  
  /*
   * At what user virtual address is page expected in vma? checking that the
ee498ed73   Hugh Dickins   [PATCH] unpaged: ...
204
   * page matches the vma: currently only used on anon pages, by unuse_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
206
207
208
209
210
211
212
   */
  unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
  {
  	if (PageAnon(page)) {
  		if ((void *)vma->anon_vma !=
  		    (void *)page->mapping - PAGE_MAPPING_ANON)
  			return -EFAULT;
  	} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
ee498ed73   Hugh Dickins   [PATCH] unpaged: ...
213
214
  		if (!vma->vm_file ||
  		    vma->vm_file->f_mapping != page->mapping)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
219
220
221
  			return -EFAULT;
  	} else
  		return -EFAULT;
  	return vma_address(page, vma);
  }
  
  /*
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
222
223
   * Check that @page is mapped at @address into @mm.
   *
479db0bf4   Nick Piggin   mm: dirty page tr...
224
225
226
227
   * If @sync is false, page_check_address may perform a racy check to avoid
   * the page table lock when the pte is not present (helpful when reclaiming
   * highly shared pages).
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
228
   * On success returns with pte mapped and locked.
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
229
   */
ceffc0785   Carsten Otte   [PATCH] xip: fs/m...
230
  pte_t *page_check_address(struct page *page, struct mm_struct *mm,
479db0bf4   Nick Piggin   mm: dirty page tr...
231
  			  unsigned long address, spinlock_t **ptlp, int sync)
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
232
233
234
235
236
  {
  	pgd_t *pgd;
  	pud_t *pud;
  	pmd_t *pmd;
  	pte_t *pte;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
237
  	spinlock_t *ptl;
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
238

81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
239
  	pgd = pgd_offset(mm, address);
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
240
241
242
243
244
245
246
247
248
249
250
251
252
  	if (!pgd_present(*pgd))
  		return NULL;
  
  	pud = pud_offset(pgd, address);
  	if (!pud_present(*pud))
  		return NULL;
  
  	pmd = pmd_offset(pud, address);
  	if (!pmd_present(*pmd))
  		return NULL;
  
  	pte = pte_offset_map(pmd, address);
  	/* Make a quick check before getting the lock */
479db0bf4   Nick Piggin   mm: dirty page tr...
253
  	if (!sync && !pte_present(*pte)) {
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
254
255
256
  		pte_unmap(pte);
  		return NULL;
  	}
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
257
  	ptl = pte_lockptr(mm, pmd);
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
258
259
260
261
  	spin_lock(ptl);
  	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
  		*ptlp = ptl;
  		return pte;
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
262
  	}
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
263
264
  	pte_unmap_unlock(pte, ptl);
  	return NULL;
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
265
266
267
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
270
271
   * Subfunctions of page_referenced: page_referenced_one called
   * repeatedly from either page_referenced_anon or page_referenced_file.
   */
  static int page_referenced_one(struct page *page,
f7b7fd8f3   Rik van Riel   [PATCH] temporari...
272
  	struct vm_area_struct *vma, unsigned int *mapcount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273
274
275
  {
  	struct mm_struct *mm = vma->vm_mm;
  	unsigned long address;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276
  	pte_t *pte;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
277
  	spinlock_t *ptl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  	int referenced = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
280
281
  	address = vma_address(page, vma);
  	if (address == -EFAULT)
  		goto out;
479db0bf4   Nick Piggin   mm: dirty page tr...
282
  	pte = page_check_address(page, mm, address, &ptl, 0);
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
283
284
  	if (!pte)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285

5a9bbdcd2   Hugh Dickins   mm: don't waste s...
286
287
288
  	if (vma->vm_flags & VM_LOCKED) {
  		referenced++;
  		*mapcount = 1;	/* break early from loop */
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
289
  	} else if (ptep_clear_flush_young_notify(vma, address, pte))
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
290
  		referenced++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291

c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
292
293
  	/* Pretend the page is referenced if the task has the
  	   swap token and is in the middle of a page fault. */
f7b7fd8f3   Rik van Riel   [PATCH] temporari...
294
  	if (mm != current->mm && has_swap_token(mm) &&
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
295
296
297
298
299
  			rwsem_is_locked(&mm->mmap_sem))
  		referenced++;
  
  	(*mapcount)--;
  	pte_unmap_unlock(pte, ptl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
300
301
302
  out:
  	return referenced;
  }
bed7161a5   Balbir Singh   Memory controller...
303
304
  static int page_referenced_anon(struct page *page,
  				struct mem_cgroup *mem_cont)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
307
308
309
310
311
312
313
314
315
316
  {
  	unsigned int mapcount;
  	struct anon_vma *anon_vma;
  	struct vm_area_struct *vma;
  	int referenced = 0;
  
  	anon_vma = page_lock_anon_vma(page);
  	if (!anon_vma)
  		return referenced;
  
  	mapcount = page_mapcount(page);
  	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
bed7161a5   Balbir Singh   Memory controller...
317
318
319
320
321
  		/*
  		 * If we are reclaiming on behalf of a cgroup, skip
  		 * counting on behalf of references from different
  		 * cgroups
  		 */
bd845e38c   Hugh Dickins   memcg: mm_match_c...
322
  		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
bed7161a5   Balbir Singh   Memory controller...
323
  			continue;
f7b7fd8f3   Rik van Riel   [PATCH] temporari...
324
  		referenced += page_referenced_one(page, vma, &mapcount);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
326
327
  		if (!mapcount)
  			break;
  	}
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
328
329
  
  	page_unlock_anon_vma(anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
332
333
334
335
  	return referenced;
  }
  
  /**
   * page_referenced_file - referenced check for object-based rmap
   * @page: the page we're checking references on.
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
336
   * @mem_cont: target memory controller
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
338
339
340
341
342
343
344
   *
   * For an object-based mapped page, find all the places it is mapped and
   * check/clear the referenced flag.  This is done by following the page->mapping
   * pointer, then walking the chain of vmas it holds.  It returns the number
   * of references it found.
   *
   * This function is only called from page_referenced for object-based pages.
   */
bed7161a5   Balbir Singh   Memory controller...
345
346
  static int page_referenced_file(struct page *page,
  				struct mem_cgroup *mem_cont)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
  {
  	unsigned int mapcount;
  	struct address_space *mapping = page->mapping;
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	struct vm_area_struct *vma;
  	struct prio_tree_iter iter;
  	int referenced = 0;
  
  	/*
  	 * The caller's checks on page->mapping and !PageAnon have made
  	 * sure that this is a file page: the check for page->mapping
  	 * excludes the case just before it gets set on an anon page.
  	 */
  	BUG_ON(PageAnon(page));
  
  	/*
  	 * The page lock not only makes sure that page->mapping cannot
  	 * suddenly be NULLified by truncation, it makes sure that the
  	 * structure at mapping cannot be freed and reused yet,
  	 * so we can safely take mapping->i_mmap_lock.
  	 */
  	BUG_ON(!PageLocked(page));
  
  	spin_lock(&mapping->i_mmap_lock);
  
  	/*
  	 * i_mmap_lock does not stabilize mapcount at all, but mapcount
  	 * is more likely to be accurate if we note it after spinning.
  	 */
  	mapcount = page_mapcount(page);
  
  	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
bed7161a5   Balbir Singh   Memory controller...
379
380
381
382
383
  		/*
  		 * If we are reclaiming on behalf of a cgroup, skip
  		 * counting on behalf of references from different
  		 * cgroups
  		 */
bd845e38c   Hugh Dickins   memcg: mm_match_c...
384
  		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
bed7161a5   Balbir Singh   Memory controller...
385
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
386
387
388
389
390
  		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
  				  == (VM_LOCKED|VM_MAYSHARE)) {
  			referenced++;
  			break;
  		}
f7b7fd8f3   Rik van Riel   [PATCH] temporari...
391
  		referenced += page_referenced_one(page, vma, &mapcount);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
392
393
394
395
396
397
398
399
400
401
402
403
  		if (!mapcount)
  			break;
  	}
  
  	spin_unlock(&mapping->i_mmap_lock);
  	return referenced;
  }
  
  /**
   * page_referenced - test if the page was referenced
   * @page: the page to test
   * @is_locked: caller holds lock on the page
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
404
   * @mem_cont: target memory controller
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
405
406
407
408
   *
   * Quick test_and_clear_referenced for all mappings to a page,
   * returns the number of ptes which referenced the page.
   */
bed7161a5   Balbir Singh   Memory controller...
409
410
  int page_referenced(struct page *page, int is_locked,
  			struct mem_cgroup *mem_cont)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
411
412
  {
  	int referenced = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
414
415
416
417
  	if (TestClearPageReferenced(page))
  		referenced++;
  
  	if (page_mapped(page) && page->mapping) {
  		if (PageAnon(page))
bed7161a5   Balbir Singh   Memory controller...
418
  			referenced += page_referenced_anon(page, mem_cont);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
  		else if (is_locked)
bed7161a5   Balbir Singh   Memory controller...
420
  			referenced += page_referenced_file(page, mem_cont);
529ae9aaa   Nick Piggin   mm: rename page t...
421
  		else if (!trylock_page(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
423
424
  			referenced++;
  		else {
  			if (page->mapping)
bed7161a5   Balbir Singh   Memory controller...
425
426
  				referenced +=
  					page_referenced_file(page, mem_cont);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
427
428
429
  			unlock_page(page);
  		}
  	}
5b7baf057   Christian Borntraeger   s390: KVM prepara...
430
431
432
  
  	if (page_test_and_clear_young(page))
  		referenced++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433
434
  	return referenced;
  }
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
435
436
437
438
  static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
  {
  	struct mm_struct *mm = vma->vm_mm;
  	unsigned long address;
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
439
  	pte_t *pte;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
440
441
442
443
444
445
  	spinlock_t *ptl;
  	int ret = 0;
  
  	address = vma_address(page, vma);
  	if (address == -EFAULT)
  		goto out;
479db0bf4   Nick Piggin   mm: dirty page tr...
446
  	pte = page_check_address(page, mm, address, &ptl, 1);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
447
448
  	if (!pte)
  		goto out;
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
449
450
  	if (pte_dirty(*pte) || pte_write(*pte)) {
  		pte_t entry;
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
451

c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
452
  		flush_cache_page(vma, address, pte_pfn(*pte));
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
453
  		entry = ptep_clear_flush_notify(vma, address, pte);
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
454
455
  		entry = pte_wrprotect(entry);
  		entry = pte_mkclean(entry);
d6e88e671   Al Viro   [PATCH] page_mkcl...
456
  		set_pte_at(mm, address, pte, entry);
c2fda5fed   Peter Zijlstra   [PATCH] Fix up pa...
457
458
  		ret = 1;
  	}
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
459

d08b3851d   Peter Zijlstra   [PATCH] mm: track...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
  	pte_unmap_unlock(pte, ptl);
  out:
  	return ret;
  }
  
  static int page_mkclean_file(struct address_space *mapping, struct page *page)
  {
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	struct vm_area_struct *vma;
  	struct prio_tree_iter iter;
  	int ret = 0;
  
  	BUG_ON(PageAnon(page));
  
  	spin_lock(&mapping->i_mmap_lock);
  	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
  		if (vma->vm_flags & VM_SHARED)
  			ret += page_mkclean_one(page, vma);
  	}
  	spin_unlock(&mapping->i_mmap_lock);
  	return ret;
  }
  
  int page_mkclean(struct page *page)
  {
  	int ret = 0;
  
  	BUG_ON(!PageLocked(page));
  
  	if (page_mapped(page)) {
  		struct address_space *mapping = page_mapping(page);
ce7e9fae8   Christian Borntraeger   [S390] Optimize s...
491
  		if (mapping) {
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
492
  			ret = page_mkclean_file(mapping, page);
ce7e9fae8   Christian Borntraeger   [S390] Optimize s...
493
494
495
496
  			if (page_test_dirty(page)) {
  				page_clear_dirty(page);
  				ret = 1;
  			}
6c210482a   Martin Schwidefsky   [S390] split page...
497
  		}
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
498
499
500
501
  	}
  
  	return ret;
  }
60b59beaf   Jaya Kumar   fbdev: mm: Deferr...
502
  EXPORT_SYMBOL_GPL(page_mkclean);
d08b3851d   Peter Zijlstra   [PATCH] mm: track...
503

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
  /**
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
505
   * __page_set_anon_rmap - setup new anonymous rmap
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
506
507
508
509
510
511
512
513
514
515
516
517
518
519
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   */
  static void __page_set_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
  	struct anon_vma *anon_vma = vma->anon_vma;
  
  	BUG_ON(!anon_vma);
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
  	page->mapping = (struct address_space *) anon_vma;
  
  	page->index = linear_page_index(vma, address);
a74609faf   Nick Piggin   [PATCH] mm: page_...
520
521
522
523
  	/*
  	 * nr_mapped state can be updated without turning off
  	 * interrupts because it is not modified via interrupt.
  	 */
f3dbd3446   Christoph Lameter   [PATCH] zoned vm ...
524
  	__inc_zone_page_state(page, NR_ANON_PAGES);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
525
526
527
  }
  
  /**
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
528
   * __page_check_anon_rmap - sanity check anonymous rmap addition
c97a9e10e   Nick Piggin   mm: more rmap che...
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   */
  static void __page_check_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
  #ifdef CONFIG_DEBUG_VM
  	/*
  	 * The page's anon-rmap details (mapping and index) are guaranteed to
  	 * be set up correctly at this point.
  	 *
  	 * We have exclusion against page_add_anon_rmap because the caller
  	 * always holds the page locked, except if called from page_dup_rmap,
  	 * in which case the page is already known to be setup.
  	 *
  	 * We have exclusion against page_add_new_anon_rmap because those pages
  	 * are initially only visible via the pagetables, and the pte is locked
  	 * over the call to page_add_new_anon_rmap.
  	 */
  	struct anon_vma *anon_vma = vma->anon_vma;
  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
  	BUG_ON(page->mapping != (struct address_space *)anon_vma);
  	BUG_ON(page->index != linear_page_index(vma, address));
  #endif
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
558
559
560
561
   * page_add_anon_rmap - add pte mapping to an anonymous page
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   *
c97a9e10e   Nick Piggin   mm: more rmap che...
562
   * The caller needs to hold the pte lock and the page must be locked.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
564
565
566
   */
  void page_add_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
c97a9e10e   Nick Piggin   mm: more rmap che...
567
568
  	VM_BUG_ON(!PageLocked(page));
  	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
569
570
  	if (atomic_inc_and_test(&page->_mapcount))
  		__page_set_anon_rmap(page, vma, address);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
571
  	else
c97a9e10e   Nick Piggin   mm: more rmap che...
572
  		__page_check_anon_rmap(page, vma, address);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
  }
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
574
  /**
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
575
576
577
578
579
580
581
   * page_add_new_anon_rmap - add pte mapping to a new anonymous page
   * @page:	the page to add the mapping to
   * @vma:	the vm area in which the mapping is added
   * @address:	the user virtual address mapped
   *
   * Same as page_add_anon_rmap but must only be called on *new* pages.
   * This means the inc-and-test can be bypassed.
c97a9e10e   Nick Piggin   mm: more rmap che...
582
   * Page does not have to be locked.
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
583
584
585
586
   */
  void page_add_new_anon_rmap(struct page *page,
  	struct vm_area_struct *vma, unsigned long address)
  {
c97a9e10e   Nick Piggin   mm: more rmap che...
587
  	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
9617d95e6   Nick Piggin   [PATCH] mm: rmap ...
588
589
590
  	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
  	__page_set_anon_rmap(page, vma, address);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
591
592
593
594
  /**
   * page_add_file_rmap - add pte mapping to a file page
   * @page: the page to add the mapping to
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
595
   * The caller needs to hold the pte lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
597
598
   */
  void page_add_file_rmap(struct page *page)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
599
  	if (atomic_inc_and_test(&page->_mapcount))
65ba55f50   Christoph Lameter   [PATCH] zoned vm ...
600
  		__inc_zone_page_state(page, NR_FILE_MAPPED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
  }
c97a9e10e   Nick Piggin   mm: more rmap che...
602
603
604
605
  #ifdef CONFIG_DEBUG_VM
  /**
   * page_dup_rmap - duplicate pte mapping to a page
   * @page:	the page to add the mapping to
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
606
607
   * @vma:	the vm area being duplicated
   * @address:	the user virtual address mapped
c97a9e10e   Nick Piggin   mm: more rmap che...
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
   *
   * For copy_page_range only: minimal extract from page_add_file_rmap /
   * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
   * quicker.
   *
   * The caller needs to hold the pte lock.
   */
  void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
  {
  	BUG_ON(page_mapcount(page) == 0);
  	if (PageAnon(page))
  		__page_check_anon_rmap(page, vma, address);
  	atomic_inc(&page->_mapcount);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
624
625
  /**
   * page_remove_rmap - take down pte mapping from a page
   * @page: page to remove mapping from
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
626
   * @vma: the vm area in which the mapping is removed
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
627
   *
b8072f099   Hugh Dickins   [PATCH] mm: updat...
628
   * The caller needs to hold the pte lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
629
   */
7de6b8057   Nick Piggin   [PATCH] mm: more ...
630
  void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
632
  	if (atomic_add_negative(-1, &page->_mapcount)) {
b7ab795b7   Nick Piggin   [PATCH] mm: more ...
633
  		if (unlikely(page_mapcount(page) < 0)) {
ef2bf0dc8   Dave Jones   [PATCH] rmap: add...
634
635
  			printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)
  ", page_mapcount(page));
7de6b8057   Nick Piggin   [PATCH] mm: more ...
636
637
  			printk (KERN_EMERG "  page pfn = %lx
  ", page_to_pfn(page));
ef2bf0dc8   Dave Jones   [PATCH] rmap: add...
638
639
640
641
642
643
  			printk (KERN_EMERG "  page->flags = %lx
  ", page->flags);
  			printk (KERN_EMERG "  page->count = %x
  ", page_count(page));
  			printk (KERN_EMERG "  page->mapping = %p
  ", page->mapping);
7de6b8057   Nick Piggin   [PATCH] mm: more ...
644
645
  			print_symbol (KERN_EMERG "  vma->vm_ops = %s
  ", (unsigned long)vma->vm_ops);
54cb8821d   Nick Piggin   mm: merge populat...
646
  			if (vma->vm_ops) {
54cb8821d   Nick Piggin   mm: merge populat...
647
648
649
  				print_symbol (KERN_EMERG "  vma->vm_ops->fault = %s
  ", (unsigned long)vma->vm_ops->fault);
  			}
7de6b8057   Nick Piggin   [PATCH] mm: more ...
650
651
652
  			if (vma->vm_file && vma->vm_file->f_op)
  				print_symbol (KERN_EMERG "  vma->vm_file->f_op->mmap = %s
  ", (unsigned long)vma->vm_file->f_op->mmap);
b16bc64d1   Dave Jones   [PATCH] move rmap...
653
  			BUG();
ef2bf0dc8   Dave Jones   [PATCH] rmap: add...
654
  		}
b16bc64d1   Dave Jones   [PATCH] move rmap...
655

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
656
  		/*
16f8c5b2e   Hugh Dickins   mm: page_remove_r...
657
658
659
660
661
  		 * Now that the last pte has gone, s390 must transfer dirty
  		 * flag from storage key to struct page.  We can usually skip
  		 * this if the page is anon, so about to be freed; but perhaps
  		 * not if it's in swapcache - there might be another pte slot
  		 * containing the swap entry, but page not yet written to swap.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
  		 */
a4b526b3b   Martin Schwidefsky   [S390] Optimize s...
663
664
  		if ((!PageAnon(page) || PageSwapCache(page)) &&
  		    page_test_dirty(page)) {
6c210482a   Martin Schwidefsky   [S390] split page...
665
  			page_clear_dirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
  			set_page_dirty(page);
6c210482a   Martin Schwidefsky   [S390] split page...
667
  		}
8a9f3ccd2   Balbir Singh   Memory controller...
668

16f8c5b2e   Hugh Dickins   mm: page_remove_r...
669
  		mem_cgroup_uncharge_page(page);
f3dbd3446   Christoph Lameter   [PATCH] zoned vm ...
670
  		__dec_zone_page_state(page,
16f8c5b2e   Hugh Dickins   mm: page_remove_r...
671
672
673
674
675
676
677
678
679
680
  			PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
  		/*
  		 * It would be tidy to reset the PageAnon mapping here,
  		 * but that might overwrite a racing page_add_anon_rmap
  		 * which increments mapcount after us but sets mapping
  		 * before us: so leave the reset to free_hot_cold_page,
  		 * and remember that it's only reliable while mapped.
  		 * Leaving it set also helps swapoff to reinstate ptes
  		 * faster for those pages still in swapcache.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
681
682
683
684
685
686
687
  	}
  }
  
  /*
   * Subfunctions of try_to_unmap: try_to_unmap_one called
   * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
   */
a48d07afd   Christoph Lameter   [PATCH] Direct Mi...
688
  static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
7352349a1   Christoph Lameter   [PATCH] page migr...
689
  				int migration)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
691
692
  {
  	struct mm_struct *mm = vma->vm_mm;
  	unsigned long address;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
694
  	pte_t *pte;
  	pte_t pteval;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
695
  	spinlock_t *ptl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
696
  	int ret = SWAP_AGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
698
699
  	address = vma_address(page, vma);
  	if (address == -EFAULT)
  		goto out;
479db0bf4   Nick Piggin   mm: dirty page tr...
700
  	pte = page_check_address(page, mm, address, &ptl, 0);
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
701
  	if (!pte)
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
702
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
705
706
707
708
  
  	/*
  	 * If the page is mlock()d, we cannot swap it out.
  	 * If it's recently referenced (perhaps page_referenced
  	 * skipped over this mm) then we should reactivate it.
  	 */
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
709
  	if (!migration && ((vma->vm_flags & VM_LOCKED) ||
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
710
  			(ptep_clear_flush_young_notify(vma, address, pte)))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
712
713
  		ret = SWAP_FAIL;
  		goto out_unmap;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
715
  	/* Nuke the page table entry. */
  	flush_cache_page(vma, address, page_to_pfn(page));
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
716
  	pteval = ptep_clear_flush_notify(vma, address, pte);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
718
719
720
  
  	/* Move the dirty bit to the physical page now the pte is gone. */
  	if (pte_dirty(pteval))
  		set_page_dirty(page);
365e9c87a   Hugh Dickins   [PATCH] mm: updat...
721
722
  	/* Update high watermark before we lower rss */
  	update_hiwater_rss(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
723
  	if (PageAnon(page)) {
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
724
  		swp_entry_t entry = { .val = page_private(page) };
0697212a4   Christoph Lameter   [PATCH] Swapless ...
725
726
727
728
729
730
731
732
733
734
735
736
737
  
  		if (PageSwapCache(page)) {
  			/*
  			 * Store the swap location in the pte.
  			 * See handle_pte_fault() ...
  			 */
  			swap_duplicate(entry);
  			if (list_empty(&mm->mmlist)) {
  				spin_lock(&mmlist_lock);
  				if (list_empty(&mm->mmlist))
  					list_add(&mm->mmlist, &init_mm.mmlist);
  				spin_unlock(&mmlist_lock);
  			}
442c9137d   Christoph Lameter   [PATCH] More page...
738
  			dec_mm_counter(mm, anon_rss);
04e62a29b   Christoph Lameter   [PATCH] More page...
739
  #ifdef CONFIG_MIGRATION
0697212a4   Christoph Lameter   [PATCH] Swapless ...
740
741
742
743
744
745
746
747
  		} else {
  			/*
  			 * Store the pfn of the page in a special migration
  			 * pte. do_swap_page() will wait until the migration
  			 * pte is removed and then restart fault handling.
  			 */
  			BUG_ON(!migration);
  			entry = make_migration_entry(page, pte_write(pteval));
04e62a29b   Christoph Lameter   [PATCH] More page...
748
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
749
750
751
  		}
  		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
  		BUG_ON(pte_file(*pte));
4294621f4   Hugh Dickins   [PATCH] mm: rss =...
752
  	} else
04e62a29b   Christoph Lameter   [PATCH] More page...
753
754
755
756
757
758
759
760
  #ifdef CONFIG_MIGRATION
  	if (migration) {
  		/* Establish migration entry for a file page */
  		swp_entry_t entry;
  		entry = make_migration_entry(page, pte_write(pteval));
  		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
  	} else
  #endif
4294621f4   Hugh Dickins   [PATCH] mm: rss =...
761
  		dec_mm_counter(mm, file_rss);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762

04e62a29b   Christoph Lameter   [PATCH] More page...
763

7de6b8057   Nick Piggin   [PATCH] mm: more ...
764
  	page_remove_rmap(page, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
765
766
767
  	page_cache_release(page);
  
  out_unmap:
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
768
  	pte_unmap_unlock(pte, ptl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
  out:
  	return ret;
  }
  
  /*
   * objrmap doesn't work for nonlinear VMAs because the assumption that
   * offset-into-file correlates with offset-into-virtual-addresses does not hold.
   * Consequently, given a particular page and its ->index, we cannot locate the
   * ptes which are mapping that page without an exhaustive linear search.
   *
   * So what this code does is a mini "virtual scan" of each nonlinear VMA which
   * maps the file to which the target page belongs.  The ->vm_private_data field
   * holds the current cursor into that scan.  Successive searches will circulate
   * around the vma's virtual address space.
   *
   * So as more replacement pressure is applied to the pages in a nonlinear VMA,
   * more scanning pressure is placed against them as well.   Eventually pages
   * will become fully unmapped and are eligible for eviction.
   *
   * For very sparsely populated VMAs this is a little inefficient - chances are
   * there there won't be many ptes located within the scan cluster.  In this case
   * maybe we could scan further - to the end of the pte page, perhaps.
   */
  #define CLUSTER_SIZE	min(32*PAGE_SIZE, PMD_SIZE)
  #define CLUSTER_MASK	(~(CLUSTER_SIZE - 1))
  
  static void try_to_unmap_cluster(unsigned long cursor,
  	unsigned int *mapcount, struct vm_area_struct *vma)
  {
  	struct mm_struct *mm = vma->vm_mm;
  	pgd_t *pgd;
  	pud_t *pud;
  	pmd_t *pmd;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
802
  	pte_t *pte;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
  	pte_t pteval;
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
804
  	spinlock_t *ptl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
806
807
  	struct page *page;
  	unsigned long address;
  	unsigned long end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
808

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
810
811
812
813
814
815
816
817
  	address = (vma->vm_start + cursor) & CLUSTER_MASK;
  	end = address + CLUSTER_SIZE;
  	if (address < vma->vm_start)
  		address = vma->vm_start;
  	if (end > vma->vm_end)
  		end = vma->vm_end;
  
  	pgd = pgd_offset(mm, address);
  	if (!pgd_present(*pgd))
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
818
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
820
821
  
  	pud = pud_offset(pgd, address);
  	if (!pud_present(*pud))
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
822
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
825
  
  	pmd = pmd_offset(pud, address);
  	if (!pmd_present(*pmd))
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
826
827
828
  		return;
  
  	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829

365e9c87a   Hugh Dickins   [PATCH] mm: updat...
830
831
  	/* Update high watermark before we lower rss */
  	update_hiwater_rss(mm);
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
832
  	for (; address < end; pte++, address += PAGE_SIZE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
834
  		if (!pte_present(*pte))
  			continue;
6aab341e0   Linus Torvalds   mm: re-architect ...
835
836
  		page = vm_normal_page(vma, address, *pte);
  		BUG_ON(!page || PageAnon(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837

cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
838
  		if (ptep_clear_flush_young_notify(vma, address, pte))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
840
841
  			continue;
  
  		/* Nuke the page table entry. */
eca351336   Ben Collins   [PATCH] Fix missi...
842
  		flush_cache_page(vma, address, pte_pfn(*pte));
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
843
  		pteval = ptep_clear_flush_notify(vma, address, pte);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
845
846
847
848
849
850
851
  
  		/* If nonlinear, store the file page offset in the pte. */
  		if (page->index != linear_page_index(vma, address))
  			set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
  
  		/* Move the dirty bit to the physical page now the pte is gone. */
  		if (pte_dirty(pteval))
  			set_page_dirty(page);
7de6b8057   Nick Piggin   [PATCH] mm: more ...
852
  		page_remove_rmap(page, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
853
  		page_cache_release(page);
4294621f4   Hugh Dickins   [PATCH] mm: rss =...
854
  		dec_mm_counter(mm, file_rss);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
855
856
  		(*mapcount)--;
  	}
c0718806c   Hugh Dickins   [PATCH] mm: rmap ...
857
  	pte_unmap_unlock(pte - 1, ptl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
858
  }
7352349a1   Christoph Lameter   [PATCH] page migr...
859
  static int try_to_unmap_anon(struct page *page, int migration)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860
861
862
863
864
865
866
867
868
869
  {
  	struct anon_vma *anon_vma;
  	struct vm_area_struct *vma;
  	int ret = SWAP_AGAIN;
  
  	anon_vma = page_lock_anon_vma(page);
  	if (!anon_vma)
  		return ret;
  
  	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
7352349a1   Christoph Lameter   [PATCH] page migr...
870
  		ret = try_to_unmap_one(page, vma, migration);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
872
873
  		if (ret == SWAP_FAIL || !page_mapped(page))
  			break;
  	}
34bbd7040   Oleg Nesterov   [PATCH] adapt pag...
874
875
  
  	page_unlock_anon_vma(anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
877
878
879
880
881
  	return ret;
  }
  
  /**
   * try_to_unmap_file - unmap file page using the object-based rmap method
   * @page: the page to unmap
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
882
   * @migration: migration flag
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
883
884
885
886
887
888
   *
   * Find all the mappings of a page using the mapping pointer and the vma chains
   * contained in the address_space struct it points to.
   *
   * This function is only called from try_to_unmap for object-based pages.
   */
7352349a1   Christoph Lameter   [PATCH] page migr...
889
  static int try_to_unmap_file(struct page *page, int migration)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
891
892
893
894
895
896
897
898
899
900
901
902
  {
  	struct address_space *mapping = page->mapping;
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	struct vm_area_struct *vma;
  	struct prio_tree_iter iter;
  	int ret = SWAP_AGAIN;
  	unsigned long cursor;
  	unsigned long max_nl_cursor = 0;
  	unsigned long max_nl_size = 0;
  	unsigned int mapcount;
  
  	spin_lock(&mapping->i_mmap_lock);
  	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
7352349a1   Christoph Lameter   [PATCH] page migr...
903
  		ret = try_to_unmap_one(page, vma, migration);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
904
905
906
907
908
909
910
911
912
  		if (ret == SWAP_FAIL || !page_mapped(page))
  			goto out;
  	}
  
  	if (list_empty(&mapping->i_mmap_nonlinear))
  		goto out;
  
  	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
  						shared.vm_set.list) {
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
913
  		if ((vma->vm_flags & VM_LOCKED) && !migration)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
  			continue;
  		cursor = (unsigned long) vma->vm_private_data;
  		if (cursor > max_nl_cursor)
  			max_nl_cursor = cursor;
  		cursor = vma->vm_end - vma->vm_start;
  		if (cursor > max_nl_size)
  			max_nl_size = cursor;
  	}
  
  	if (max_nl_size == 0) {	/* any nonlinears locked or reserved */
  		ret = SWAP_FAIL;
  		goto out;
  	}
  
  	/*
  	 * We don't try to search for this page in the nonlinear vmas,
  	 * and page_referenced wouldn't have found it anyway.  Instead
  	 * just walk the nonlinear vmas trying to age and unmap some.
  	 * The mapcount of the page we came in with is irrelevant,
  	 * but even so use it as a guide to how hard we should try?
  	 */
  	mapcount = page_mapcount(page);
  	if (!mapcount)
  		goto out;
  	cond_resched_lock(&mapping->i_mmap_lock);
  
  	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
  	if (max_nl_cursor == 0)
  		max_nl_cursor = CLUSTER_SIZE;
  
  	do {
  		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
  						shared.vm_set.list) {
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
947
  			if ((vma->vm_flags & VM_LOCKED) && !migration)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
948
949
  				continue;
  			cursor = (unsigned long) vma->vm_private_data;
839b9685e   Hugh Dickins   [PATCH] rmap: don...
950
  			while ( cursor < max_nl_cursor &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
  				cursor < vma->vm_end - vma->vm_start) {
  				try_to_unmap_cluster(cursor, &mapcount, vma);
  				cursor += CLUSTER_SIZE;
  				vma->vm_private_data = (void *) cursor;
  				if ((int)mapcount <= 0)
  					goto out;
  			}
  			vma->vm_private_data = (void *) max_nl_cursor;
  		}
  		cond_resched_lock(&mapping->i_mmap_lock);
  		max_nl_cursor += CLUSTER_SIZE;
  	} while (max_nl_cursor <= max_nl_size);
  
  	/*
  	 * Don't loop forever (perhaps all the remaining pages are
  	 * in locked vmas).  Reset cursor on all unreserved nonlinear
  	 * vmas, now forgetting on which ones it had fallen behind.
  	 */
101d2be76   Hugh Dickins   [PATCH] unpaged: ...
969
970
  	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
  		vma->vm_private_data = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
971
972
973
974
975
976
977
978
  out:
  	spin_unlock(&mapping->i_mmap_lock);
  	return ret;
  }
  
  /**
   * try_to_unmap - try to remove all page table mappings to a page
   * @page: the page to get unmapped
43d8eac44   Randy Dunlap   mm: rmap kernel-d...
979
   * @migration: migration flag
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
981
982
983
984
985
986
987
988
   *
   * Tries to remove all the page table entries which are mapping this
   * page, used in the pageout path.  Caller must hold the page lock.
   * Return values are:
   *
   * SWAP_SUCCESS	- we succeeded in removing all mappings
   * SWAP_AGAIN	- we missed a mapping, try again later
   * SWAP_FAIL	- the page is unswappable
   */
7352349a1   Christoph Lameter   [PATCH] page migr...
989
  int try_to_unmap(struct page *page, int migration)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
990
991
  {
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
993
994
  	BUG_ON(!PageLocked(page));
  
  	if (PageAnon(page))
7352349a1   Christoph Lameter   [PATCH] page migr...
995
  		ret = try_to_unmap_anon(page, migration);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
996
  	else
7352349a1   Christoph Lameter   [PATCH] page migr...
997
  		ret = try_to_unmap_file(page, migration);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
999
1000
1001
1002
  
  	if (!page_mapped(page))
  		ret = SWAP_SUCCESS;
  	return ret;
  }
81b4082dc   Nikita Danilov   [PATCH] mm: rmap....
1003