Blame view

mm/migrate.c 33.4 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
   */
  
  #include <linux/migrate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
16
  #include <linux/export.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
17
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
  #include <linux/pagevec.h>
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
24
  #include <linux/ksm.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
25
26
27
28
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
29
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
30
31
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
32
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
34
  #include <linux/syscalls.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/hugetlb.h>
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
36
  #include <linux/hugetlb_cgroup.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
37
  #include <linux/gfp.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
38

0d1836c36   Michal Nazarewicz   mm/migrate.c: fix...
39
  #include <asm/tlbflush.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
40
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
41
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
42
   * migrate_prep() needs to be called before we start compiling a list of pages
748446bb6   Mel Gorman   mm: compaction: m...
43
44
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
b20a35035   Christoph Lameter   [PATCH] page migr...
45
46
47
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
48
49
50
51
52
53
54
55
56
57
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
748446bb6   Mel Gorman   mm: compaction: m...
58
59
60
61
62
63
64
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
  int migrate_prep_local(void)
  {
  	lru_add_drain();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
65
  /*
894bc3104   Lee Schermerhorn   Unevictable LRU I...
66
67
   * Add isolated pages on the list back to the LRU under page lock
   * to avoid leaking evictable pages back onto unevictable list.
b20a35035   Christoph Lameter   [PATCH] page migr...
68
   */
e13861d82   Minchan Kim   mm: remove return...
69
  void putback_lru_pages(struct list_head *l)
b20a35035   Christoph Lameter   [PATCH] page migr...
70
71
72
  {
  	struct page *page;
  	struct page *page2;
b20a35035   Christoph Lameter   [PATCH] page migr...
73
74
  
  	list_for_each_entry_safe(page, page2, l, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
75
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
76
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
77
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
78
  		putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
79
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
80
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
81
82
83
  /*
   * Restore a potential migration pte to a working pte entry
   */
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
84
85
  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
  				 unsigned long addr, void *old)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
86
87
88
89
90
91
92
93
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
   	pgd_t *pgd;
   	pud_t *pud;
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
94
95
96
97
98
99
100
101
102
  	if (unlikely(PageHuge(new))) {
  		ptep = huge_pte_offset(mm, addr);
  		if (!ptep)
  			goto out;
  		ptl = &mm->page_table_lock;
  	} else {
  		pgd = pgd_offset(mm, addr);
  		if (!pgd_present(*pgd))
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
103

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
104
105
106
  		pud = pud_offset(pgd, addr);
  		if (!pud_present(*pud))
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
107

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
108
  		pmd = pmd_offset(pud, addr);
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
109
110
  		if (pmd_trans_huge(*pmd))
  			goto out;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
111
112
  		if (!pmd_present(*pmd))
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
113

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
114
  		ptep = pte_offset_map(pmd, addr);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
115

486cf46f3   Hugh Dickins   mm: fix race betw...
116
117
118
119
  		/*
  		 * Peek to check is_swap_pte() before taking ptlock?  No, we
  		 * can race mremap's move_ptes(), which skips anon_vma lock.
  		 */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
120
121
122
  
  		ptl = pte_lockptr(mm, pmd);
  	}
0697212a4   Christoph Lameter   [PATCH] Swapless ...
123

0697212a4   Christoph Lameter   [PATCH] Swapless ...
124
125
126
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
127
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
128
129
  
  	entry = pte_to_swp_entry(pte);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
130
131
132
  	if (!is_migration_entry(entry) ||
  	    migration_entry_to_page(entry) != old)
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
133

0697212a4   Christoph Lameter   [PATCH] Swapless ...
134
135
136
137
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
  	if (is_write_migration_entry(entry))
  		pte = pte_mkwrite(pte);
3ef8fd7f7   Andi Kleen   Fix migration.c c...
138
  #ifdef CONFIG_HUGETLB_PAGE
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
139
140
  	if (PageHuge(new))
  		pte = pte_mkhuge(pte);
3ef8fd7f7   Andi Kleen   Fix migration.c c...
141
  #endif
97ee05246   KAMEZAWA Hiroyuki   flush cache befor...
142
  	flush_cache_page(vma, addr, pte_pfn(pte));
0697212a4   Christoph Lameter   [PATCH] Swapless ...
143
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
144

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
145
146
147
148
149
150
  	if (PageHuge(new)) {
  		if (PageAnon(new))
  			hugepage_add_anon_rmap(new, vma, addr);
  		else
  			page_dup_rmap(new);
  	} else if (PageAnon(new))
04e62a29b   Christoph Lameter   [PATCH] More page...
151
152
153
154
155
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
4b3073e1c   Russell King   MM: Pass a PTE po...
156
  	update_mmu_cache(vma, addr, ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
157
  unlock:
0697212a4   Christoph Lameter   [PATCH] Swapless ...
158
  	pte_unmap_unlock(ptep, ptl);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
159
160
  out:
  	return SWAP_AGAIN;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
161
162
163
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
164
165
166
167
168
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
169
  	rmap_walk(new, remove_migration_pte, old);
04e62a29b   Christoph Lameter   [PATCH] More page...
170
171
172
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
173
174
175
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
0697212a4   Christoph Lameter   [PATCH] Swapless ...
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
   */
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	pte_t *ptep, pte;
  	spinlock_t *ptl;
  	swp_entry_t entry;
  	struct page *page;
  
  	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
195
196
197
198
199
200
201
202
203
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
204
205
206
207
208
209
210
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
b969c4ab9   Mel Gorman   mm: compaction: d...
211
212
  #ifdef CONFIG_BLOCK
  /* Returns true if all buffers are successfully locked */
a6bc32b89   Mel Gorman   mm: compaction: i...
213
214
  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
215
216
217
218
  {
  	struct buffer_head *bh = head;
  
  	/* Simple case, sync compaction */
a6bc32b89   Mel Gorman   mm: compaction: i...
219
  	if (mode != MIGRATE_ASYNC) {
b969c4ab9   Mel Gorman   mm: compaction: d...
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  		do {
  			get_bh(bh);
  			lock_buffer(bh);
  			bh = bh->b_this_page;
  
  		} while (bh != head);
  
  		return true;
  	}
  
  	/* async case, we cannot block on lock_buffer so use trylock_buffer */
  	do {
  		get_bh(bh);
  		if (!trylock_buffer(bh)) {
  			/*
  			 * We failed to lock the buffer and cannot stall in
  			 * async migration. Release the taken locks
  			 */
  			struct buffer_head *failed_bh = bh;
  			put_bh(failed_bh);
  			bh = head;
  			while (bh != failed_bh) {
  				unlock_buffer(bh);
  				put_bh(bh);
  				bh = bh->b_this_page;
  			}
  			return false;
  		}
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  	return true;
  }
  #else
  static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
a6bc32b89   Mel Gorman   mm: compaction: i...
255
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
256
257
258
259
  {
  	return true;
  }
  #endif /* CONFIG_BLOCK */
b20a35035   Christoph Lameter   [PATCH] page migr...
260
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
261
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
262
263
264
265
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
266cf658e   David Howells   FS-Cache: Recruit...
266
   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
b20a35035   Christoph Lameter   [PATCH] page migr...
267
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
268
  static int migrate_page_move_mapping(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
269
  		struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
270
  		struct buffer_head *head, enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
271
  {
e286781d5   Nick Piggin   mm: speculative p...
272
  	int expected_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
273
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
274

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
275
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
276
  		/* Anonymous page without mapping */
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
277
278
279
280
  		if (page_count(page) != 1)
  			return -EAGAIN;
  		return 0;
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
281
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
282

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
283
284
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
285

edcf4748c   Johannes Weiner   mm: return boolea...
286
  	expected_count = 2 + page_has_private(page);
e286781d5   Nick Piggin   mm: speculative p...
287
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
288
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
289
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
290
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
291
  	}
e286781d5   Nick Piggin   mm: speculative p...
292
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
293
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
294
295
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
296
  	/*
b969c4ab9   Mel Gorman   mm: compaction: d...
297
298
299
300
301
302
  	 * In the async migration case of moving a page with buffers, lock the
  	 * buffers using trylock before the mapping is moved. If the mapping
  	 * was moved, we later failed to lock the buffers and could not move
  	 * the mapping back due to an elevated page count, we would have to
  	 * block waiting on other references to be dropped.
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
303
304
  	if (mode == MIGRATE_ASYNC && head &&
  			!buffer_migrate_lock_buffers(head, mode)) {
b969c4ab9   Mel Gorman   mm: compaction: d...
305
306
307
308
309
310
  		page_unfreeze_refs(page, expected_count);
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	/*
b20a35035   Christoph Lameter   [PATCH] page migr...
311
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
312
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
313
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
314
315
316
317
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
318
319
320
  	radix_tree_replace_slot(pslot, newpage);
  
  	/*
937a94c9d   Jacobo Giralt   mm: migrate: one ...
321
322
  	 * Drop cache reference from old page by unfreezing
  	 * to one less reference.
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
323
324
  	 * We know this isn't the last reference.
  	 */
937a94c9d   Jacobo Giralt   mm: migrate: one ...
325
  	page_unfreeze_refs(page, expected_count - 1);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
326

0e8c7d0fd   Christoph Lameter   page migration: f...
327
328
329
330
331
332
333
334
335
336
337
338
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
99a15e21d   Andrea Arcangeli   migrate: don't ac...
339
  	if (!PageSwapCache(page) && PageSwapBacked(page)) {
4b02108ac   KOSAKI Motohiro   mm: oom analysis:...
340
341
342
  		__dec_zone_page_state(page, NR_SHMEM);
  		__inc_zone_page_state(newpage, NR_SHMEM);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
343
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
344
345
346
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
347
348
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
   * The expected number of remaining references is the same as that
   * of migrate_page_move_mapping().
   */
  int migrate_huge_page_move_mapping(struct address_space *mapping,
  				   struct page *newpage, struct page *page)
  {
  	int expected_count;
  	void **pslot;
  
  	if (!mapping) {
  		if (page_count(page) != 1)
  			return -EAGAIN;
  		return 0;
  	}
  
  	spin_lock_irq(&mapping->tree_lock);
  
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
  
  	expected_count = 2 + page_has_private(page);
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
371
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
372
373
374
375
376
377
378
379
380
381
382
383
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	if (!page_freeze_refs(page, expected_count)) {
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	get_page(newpage);
  
  	radix_tree_replace_slot(pslot, newpage);
937a94c9d   Jacobo Giralt   mm: migrate: one ...
384
  	page_unfreeze_refs(page, expected_count - 1);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
385
386
387
388
389
390
  
  	spin_unlock_irq(&mapping->tree_lock);
  	return 0;
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
391
392
   * Copy the page to its new location
   */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
393
  void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
394
  {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
395
396
397
398
  	if (PageHuge(page))
  		copy_huge_page(newpage, page);
  	else
  		copy_highpage(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
399
400
401
402
403
404
405
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
406
407
  	if (TestClearPageActive(page)) {
  		VM_BUG_ON(PageUnevictable(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
408
  		SetPageActive(newpage);
418b27ef5   Lee Schermerhorn   mm: remove unevic...
409
410
  	} else if (TestClearPageUnevictable(page))
  		SetPageUnevictable(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
411
412
413
414
415
416
417
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
418
419
420
421
422
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
25985edce   Lucas De Marchi   Fix common misspe...
423
  		 * Whereas only part of our page may be dirty.
3a902c5f6   Nick Piggin   mm: fix warning o...
424
  		 */
752dc185d   Hugh Dickins   mm: fix warning i...
425
426
427
428
  		if (PageSwapBacked(page))
  			SetPageDirty(newpage);
  		else
  			__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
429
   	}
b291f0003   Nick Piggin   mlock: mlocked pa...
430
  	mlock_migrate_page(newpage, page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
431
  	ksm_migrate_page(newpage, page);
b291f0003   Nick Piggin   mlock: mlocked pa...
432

b20a35035   Christoph Lameter   [PATCH] page migr...
433
  	ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
434
435
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
436
437
438
439
440
441
442
443
  
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
444

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
445
446
447
448
449
  /************************************************************
   *                    Migration functions
   ***********************************************************/
  
  /* Always fail migration. Used for mappings that are not movable */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
450
451
  int fail_migrate_page(struct address_space *mapping,
  			struct page *newpage, struct page *page)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
452
453
454
455
  {
  	return -EIO;
  }
  EXPORT_SYMBOL(fail_migrate_page);
b20a35035   Christoph Lameter   [PATCH] page migr...
456
457
  /*
   * Common logic to directly migrate a single page suitable for
266cf658e   David Howells   FS-Cache: Recruit...
458
   * pages that do not use PagePrivate/PagePrivate2.
b20a35035   Christoph Lameter   [PATCH] page migr...
459
460
461
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
462
  int migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
463
464
  		struct page *newpage, struct page *page,
  		enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
465
466
467
468
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
a6bc32b89   Mel Gorman   mm: compaction: i...
469
  	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
b20a35035   Christoph Lameter   [PATCH] page migr...
470
471
472
473
474
  
  	if (rc)
  		return rc;
  
  	migrate_page_copy(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
475
476
477
  	return 0;
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
478
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
479
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
480
481
482
483
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
484
  int buffer_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
485
  		struct page *newpage, struct page *page, enum migrate_mode mode)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
486
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
487
488
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
489
  	if (!page_has_buffers(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
490
  		return migrate_page(mapping, newpage, page, mode);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
491
492
  
  	head = page_buffers(page);
a6bc32b89   Mel Gorman   mm: compaction: i...
493
  	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
494
495
496
  
  	if (rc)
  		return rc;
b969c4ab9   Mel Gorman   mm: compaction: d...
497
498
499
500
501
  	/*
  	 * In the async case, migrate_page_move_mapping locked the buffers
  	 * with an IRQ-safe spinlock held. In the sync case, the buffers
  	 * need to be locked now
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
502
503
  	if (mode != MIGRATE_ASYNC)
  		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	return 0;
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
533
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
534

04e62a29b   Christoph Lameter   [PATCH] More page...
535
536
537
538
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
539
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
540
541
542
543
544
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
04e62a29b   Christoph Lameter   [PATCH] More page...
545
546
547
548
549
550
551
552
553
554
555
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
556
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
557
558
559
560
561
562
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
563
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
564
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
565

04e62a29b   Christoph Lameter   [PATCH] More page...
566
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
567

04e62a29b   Christoph Lameter   [PATCH] More page...
568
569
570
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
571
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
572
573
574
575
576
577
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
578
  	struct page *newpage, struct page *page, enum migrate_mode mode)
04e62a29b   Christoph Lameter   [PATCH] More page...
579
  {
b969c4ab9   Mel Gorman   mm: compaction: d...
580
  	if (PageDirty(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
581
582
  		/* Only writeback pages in full synchronous migration */
  		if (mode != MIGRATE_SYNC)
b969c4ab9   Mel Gorman   mm: compaction: d...
583
  			return -EBUSY;
04e62a29b   Christoph Lameter   [PATCH] More page...
584
  		return writeout(mapping, page);
b969c4ab9   Mel Gorman   mm: compaction: d...
585
  	}
8351a6e47   Christoph Lameter   [PATCH] page migr...
586
587
588
589
590
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
266cf658e   David Howells   FS-Cache: Recruit...
591
  	if (page_has_private(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
592
593
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
a6bc32b89   Mel Gorman   mm: compaction: i...
594
  	return migrate_page(mapping, newpage, page, mode);
8351a6e47   Christoph Lameter   [PATCH] page migr...
595
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
596
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
597
598
599
600
601
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
602
603
604
605
   *
   * Return value:
   *   < 0 - error code
   *  == 0 - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
606
   */
3fe2011ff   Mel Gorman   mm: migration: al...
607
  static int move_to_new_page(struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
608
  				int remap_swapcache, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
609
610
611
612
613
614
615
616
617
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
618
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
619
620
621
622
623
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
624
625
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
626
627
628
  
  	mapping = page_mapping(page);
  	if (!mapping)
a6bc32b89   Mel Gorman   mm: compaction: i...
629
  		rc = migrate_page(mapping, newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
630
  	else if (mapping->a_ops->migratepage)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
631
  		/*
b969c4ab9   Mel Gorman   mm: compaction: d...
632
633
634
635
  		 * Most pages have a mapping and most filesystems provide a
  		 * migratepage callback. Anonymous pages are part of swap
  		 * space which also has its own migratepage callback. This
  		 * is the most common path for page migration.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
636
  		 */
b969c4ab9   Mel Gorman   mm: compaction: d...
637
  		rc = mapping->a_ops->migratepage(mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
638
  						newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
639
  	else
a6bc32b89   Mel Gorman   mm: compaction: i...
640
  		rc = fallback_migrate_page(mapping, newpage, page, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
641

3fe2011ff   Mel Gorman   mm: migration: al...
642
  	if (rc) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
643
  		newpage->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
644
645
646
  	} else {
  		if (remap_swapcache)
  			remove_migration_ptes(page, newpage);
35512ecae   Konstantin Khlebnikov   mm: postpone migr...
647
  		page->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
648
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
649
650
651
652
653
  
  	unlock_page(newpage);
  
  	return rc;
  }
0dabec93d   Minchan Kim   mm: migration: cl...
654
  static int __unmap_and_move(struct page *page, struct page *newpage,
a6bc32b89   Mel Gorman   mm: compaction: i...
655
  			int force, bool offlining, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
656
  {
0dabec93d   Minchan Kim   mm: migration: cl...
657
  	int rc = -EAGAIN;
3fe2011ff   Mel Gorman   mm: migration: al...
658
  	int remap_swapcache = 1;
56039efa1   KAMEZAWA Hiroyuki   memcg: fix ugly i...
659
  	struct mem_cgroup *mem;
3f6c82728   Mel Gorman   mm: migration: ta...
660
  	struct anon_vma *anon_vma = NULL;
95a402c38   Christoph Lameter   [PATCH] page migr...
661

529ae9aaa   Nick Piggin   mm: rename page t...
662
  	if (!trylock_page(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
663
  		if (!force || mode == MIGRATE_ASYNC)
0dabec93d   Minchan Kim   mm: migration: cl...
664
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
  
  		/*
  		 * It's not safe for direct compaction to call lock_page.
  		 * For example, during page readahead pages are added locked
  		 * to the LRU. Later, when the IO completes the pages are
  		 * marked uptodate and unlocked. However, the queueing
  		 * could be merging multiple pages for one bio (e.g.
  		 * mpage_readpages). If an allocation happens for the
  		 * second or third page, the process can end up locking
  		 * the same page twice and deadlocking. Rather than
  		 * trying to be clever about what pages can be locked,
  		 * avoid the use of lock_page for direct compaction
  		 * altogether.
  		 */
  		if (current->flags & PF_MEMALLOC)
0dabec93d   Minchan Kim   mm: migration: cl...
680
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
681

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
682
683
  		lock_page(page);
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
684
685
686
687
688
689
690
691
692
693
694
695
696
  	/*
  	 * Only memory hotplug's offline_pages() caller has locked out KSM,
  	 * and can safely migrate a KSM page.  The other cases have skipped
  	 * PageKsm along with PageReserved - but it is only now when we have
  	 * the page lock that we can be certain it will not go KSM beneath us
  	 * (KSM will not upgrade a page from PageAnon to PageKsm when it sees
  	 * its pagecount raised, but only here do we take the page lock which
  	 * serializes that).
  	 */
  	if (PageKsm(page) && !offlining) {
  		rc = -EBUSY;
  		goto unlock;
  	}
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
697
  	/* charge against new page */
0030f535a   Johannes Weiner   mm: memcg: fix co...
698
  	mem_cgroup_prepare_migration(page, newpage, &mem);
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
699

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
700
  	if (PageWriteback(page)) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
701
  		/*
a6bc32b89   Mel Gorman   mm: compaction: i...
702
703
704
705
  		 * Only in the case of a full syncronous migration is it
  		 * necessary to wait for PageWriteback. In the async case,
  		 * the retry loop is too short and in the sync-light case,
  		 * the overhead of stalling is too much
11bc82d67   Andrea Arcangeli   mm: compaction: U...
706
  		 */
a6bc32b89   Mel Gorman   mm: compaction: i...
707
  		if (mode != MIGRATE_SYNC) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
708
709
710
711
  			rc = -EBUSY;
  			goto uncharge;
  		}
  		if (!force)
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
712
  			goto uncharge;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
713
714
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
715
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
716
717
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
1ce82b69e   Hugh Dickins   mm: fix migration...
718
  	 * This get_anon_vma() delays freeing anon_vma pointer until the end
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
719
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
720
721
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
722
  	 */
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
723
  	if (PageAnon(page)) {
1ce82b69e   Hugh Dickins   mm: fix migration...
724
725
726
727
  		/*
  		 * Only page_lock_anon_vma() understands the subtleties of
  		 * getting a hold on an anon_vma from outside one of its mms.
  		 */
746b18d42   Peter Zijlstra   mm: use refcounts...
728
  		anon_vma = page_get_anon_vma(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
729
730
  		if (anon_vma) {
  			/*
746b18d42   Peter Zijlstra   mm: use refcounts...
731
  			 * Anon page
1ce82b69e   Hugh Dickins   mm: fix migration...
732
  			 */
1ce82b69e   Hugh Dickins   mm: fix migration...
733
  		} else if (PageSwapCache(page)) {
3fe2011ff   Mel Gorman   mm: migration: al...
734
735
736
737
738
739
740
741
742
743
744
745
746
747
  			/*
  			 * We cannot be sure that the anon_vma of an unmapped
  			 * swapcache page is safe to use because we don't
  			 * know in advance if the VMA that this page belonged
  			 * to still exists. If the VMA and others sharing the
  			 * data have been freed, then the anon_vma could
  			 * already be invalid.
  			 *
  			 * To avoid this possibility, swapcache pages get
  			 * migrated but are not remapped when migration
  			 * completes
  			 */
  			remap_swapcache = 0;
  		} else {
1ce82b69e   Hugh Dickins   mm: fix migration...
748
  			goto uncharge;
3fe2011ff   Mel Gorman   mm: migration: al...
749
  		}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
750
  	}
62e1c5530   Shaohua Li   page migraton: ha...
751

dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
752
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
753
754
755
756
757
758
759
760
761
762
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
763
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
764
  	if (!page->mapping) {
1ce82b69e   Hugh Dickins   mm: fix migration...
765
766
  		VM_BUG_ON(PageAnon(page));
  		if (page_has_private(page)) {
62e1c5530   Shaohua Li   page migraton: ha...
767
  			try_to_free_buffers(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
768
  			goto uncharge;
62e1c5530   Shaohua Li   page migraton: ha...
769
  		}
abfc34881   Shaohua Li   memory hotplug: m...
770
  		goto skip_unmap;
62e1c5530   Shaohua Li   page migraton: ha...
771
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
772
  	/* Establish migration ptes or remove ptes */
14fa31b89   Andi Kleen   HWPOISON: Use bit...
773
  	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
774

abfc34881   Shaohua Li   memory hotplug: m...
775
  skip_unmap:
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
776
  	if (!page_mapped(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
777
  		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
778

3fe2011ff   Mel Gorman   mm: migration: al...
779
  	if (rc && remap_swapcache)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
780
  		remove_migration_ptes(page, page);
3f6c82728   Mel Gorman   mm: migration: ta...
781
782
  
  	/* Drop an anon_vma reference if we took one */
76545066c   Rik van Riel   mm: extend KSM re...
783
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
784
  		put_anon_vma(anon_vma);
3f6c82728   Mel Gorman   mm: migration: ta...
785

01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
786
  uncharge:
0030f535a   Johannes Weiner   mm: memcg: fix co...
787
  	mem_cgroup_end_migration(mem, page, newpage, rc == 0);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
788
789
  unlock:
  	unlock_page(page);
0dabec93d   Minchan Kim   mm: migration: cl...
790
791
792
  out:
  	return rc;
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
793

0dabec93d   Minchan Kim   mm: migration: cl...
794
795
796
797
798
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
  static int unmap_and_move(new_page_t get_new_page, unsigned long private,
a6bc32b89   Mel Gorman   mm: compaction: i...
799
800
  			struct page *page, int force, bool offlining,
  			enum migrate_mode mode)
0dabec93d   Minchan Kim   mm: migration: cl...
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
  {
  	int rc = 0;
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
  
  	if (!newpage)
  		return -ENOMEM;
  
  	if (page_count(page) == 1) {
  		/* page was freed from under us. So we are done. */
  		goto out;
  	}
  
  	if (unlikely(PageTransHuge(page)))
  		if (unlikely(split_huge_page(page)))
  			goto out;
a6bc32b89   Mel Gorman   mm: compaction: i...
817
  	rc = __unmap_and_move(page, newpage, force, offlining, mode);
0dabec93d   Minchan Kim   mm: migration: cl...
818
  out:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
819
  	if (rc != -EAGAIN) {
0dabec93d   Minchan Kim   mm: migration: cl...
820
821
822
823
824
825
826
  		/*
  		 * A page that has been migrated has all references
  		 * removed and will be freed. A page that has not been
  		 * migrated will have kepts its references and be
  		 * restored.
  		 */
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
827
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
828
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
829
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
830
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
831
832
833
834
  	/*
  	 * Move the new page to the LRU. If migration was not successful
  	 * then this will free the page.
  	 */
894bc3104   Lee Schermerhorn   Unevictable LRU I...
835
  	putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
836
837
838
839
840
841
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
842
843
844
845
  	return rc;
  }
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
   * Counterpart of unmap_and_move_page() for hugepage migration.
   *
   * This function doesn't wait the completion of hugepage I/O
   * because there is no race between I/O and migration for hugepage.
   * Note that currently hugepage I/O occurs only in direct I/O
   * where no lock is held and PG_writeback is irrelevant,
   * and writeback status of all subpages are counted in the reference
   * count of the head page (i.e. if all subpages of a 2MB hugepage are
   * under direct I/O, the reference of the head page is 512 and a bit more.)
   * This means that when we try to migrate hugepage whose subpages are
   * doing direct I/O, some references remain after try_to_unmap() and
   * hugepage migration fails without data corruption.
   *
   * There is also no race when direct I/O is issued on the page under migration,
   * because then pte is replaced with migration swap entry and direct I/O code
   * will wait in the page fault for migration to complete.
   */
  static int unmap_and_move_huge_page(new_page_t get_new_page,
  				unsigned long private, struct page *hpage,
a6bc32b89   Mel Gorman   mm: compaction: i...
865
866
  				int force, bool offlining,
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
867
868
869
870
  {
  	int rc = 0;
  	int *result = NULL;
  	struct page *new_hpage = get_new_page(hpage, private, &result);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
871
872
873
874
875
876
877
878
  	struct anon_vma *anon_vma = NULL;
  
  	if (!new_hpage)
  		return -ENOMEM;
  
  	rc = -EAGAIN;
  
  	if (!trylock_page(hpage)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
879
  		if (!force || mode != MIGRATE_SYNC)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
880
881
882
  			goto out;
  		lock_page(hpage);
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
883
884
  	if (PageAnon(hpage))
  		anon_vma = page_get_anon_vma(hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
885
886
887
888
  
  	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  
  	if (!page_mapped(hpage))
a6bc32b89   Mel Gorman   mm: compaction: i...
889
  		rc = move_to_new_page(new_hpage, hpage, 1, mode);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
890
891
892
  
  	if (rc)
  		remove_migration_ptes(hpage, hpage);
fd4a4663d   Hugh Dickins   mm: fix hugepage ...
893
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
894
  		put_anon_vma(anon_vma);
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
895
896
897
  
  	if (!rc)
  		hugetlb_cgroup_migrate(hpage, new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
898
  	unlock_page(hpage);
09761333e   Hillf Danton   mm/migrate.c: pai...
899
  out:
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
900
  	put_page(new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
901
902
903
904
905
906
907
908
909
910
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(new_hpage);
  	}
  	return rc;
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
911
912
   * migrate_pages
   *
95a402c38   Christoph Lameter   [PATCH] page migr...
913
914
915
   * The function takes one list of pages to migrate and a function
   * that determines from the page to be migrated and the private data
   * the target of the move and allocates the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
916
917
918
   *
   * The function returns after 10 attempts or if no pages
   * are movable anymore because to has become empty
cf608ac19   Minchan Kim   mm: compaction: f...
919
920
   * or no retryable pages exist anymore.
   * Caller should call putback_lru_pages to return pages to the LRU
28bd65781   Minchan Kim   mm: migration: cl...
921
   * or free list only if ret != 0.
b20a35035   Christoph Lameter   [PATCH] page migr...
922
   *
95a402c38   Christoph Lameter   [PATCH] page migr...
923
   * Return: Number of pages not migrated or error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
924
   */
95a402c38   Christoph Lameter   [PATCH] page migr...
925
  int migrate_pages(struct list_head *from,
7f0f24967   Mel Gorman   mm: migration: cl...
926
  		new_page_t get_new_page, unsigned long private, bool offlining,
a6bc32b89   Mel Gorman   mm: compaction: i...
927
  		enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
928
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
929
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
930
931
932
933
934
935
936
937
938
  	int nr_failed = 0;
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
939
940
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
941

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
942
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
943
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
944

95a402c38   Christoph Lameter   [PATCH] page migr...
945
  			rc = unmap_and_move(get_new_page, private,
77f1fe6b0   Mel Gorman   mm: migration: al...
946
  						page, pass > 2, offlining,
a6bc32b89   Mel Gorman   mm: compaction: i...
947
  						mode);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
948

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
949
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
950
951
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
952
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
953
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
954
955
  				break;
  			case 0:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
956
957
  				break;
  			default:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
958
  				/* Permanent failure */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
959
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
960
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
961
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
962
963
  		}
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
964
965
  	rc = 0;
  out:
b20a35035   Christoph Lameter   [PATCH] page migr...
966
967
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
95a402c38   Christoph Lameter   [PATCH] page migr...
968
969
  	if (rc)
  		return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
970

95a402c38   Christoph Lameter   [PATCH] page migr...
971
  	return nr_failed + retry;
b20a35035   Christoph Lameter   [PATCH] page migr...
972
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
973

189ebff28   Aneesh Kumar K.V   hugetlb: simplify...
974
975
976
  int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
  		      unsigned long private, bool offlining,
  		      enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
977
  {
189ebff28   Aneesh Kumar K.V   hugetlb: simplify...
978
979
980
981
982
983
984
985
986
987
988
  	int pass, rc;
  
  	for (pass = 0; pass < 10; pass++) {
  		rc = unmap_and_move_huge_page(get_new_page,
  					      private, hpage, pass > 2, offlining,
  					      mode);
  		switch (rc) {
  		case -ENOMEM:
  			goto out;
  		case -EAGAIN:
  			/* try again */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
989
  			cond_resched();
189ebff28   Aneesh Kumar K.V   hugetlb: simplify...
990
991
992
993
994
995
  			break;
  		case 0:
  			goto out;
  		default:
  			rc = -EIO;
  			goto out;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
996
997
  		}
  	}
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
998
  out:
189ebff28   Aneesh Kumar K.V   hugetlb: simplify...
999
  	return rc;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1000
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
6484eb3e2   Mel Gorman   page allocator: d...
1024
  	return alloc_pages_exact_node(pm->node,
769848c03   Mel Gorman   Add __GFP_MOVABLE...
1025
  				GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
1026
1027
1028
1029
1030
1031
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
1032
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
1033
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
1034
1035
1036
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
  
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
1047
1048
1049
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
1050
1051
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1052
  		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
1053
  			goto set_status;
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
1054
  		page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1055
1056
1057
1058
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1059
1060
1061
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
62b61f611   Hugh Dickins   ksm: memory hotre...
1062
1063
  		/* Use PageReserved to check for zero page */
  		if (PageReserved(page) || PageKsm(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
62695a84e   Nick Piggin   vmscan: move isol...
1079
  		err = isolate_lru_page(page);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1080
  		if (!err) {
62695a84e   Nick Piggin   vmscan: move isol...
1081
  			list_add_tail(&page->lru, &pagelist);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1082
1083
1084
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
1095
  	err = 0;
cf608ac19   Minchan Kim   mm: compaction: f...
1096
  	if (!list_empty(&pagelist)) {
742755a1d   Christoph Lameter   [PATCH] page migr...
1097
  		err = migrate_pages(&pagelist, new_page_node,
a6bc32b89   Mel Gorman   mm: compaction: i...
1098
  				(unsigned long)pm, 0, MIGRATE_SYNC);
cf608ac19   Minchan Kim   mm: compaction: f...
1099
1100
1101
  		if (err)
  			putback_lru_pages(&pagelist);
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1102
1103
1104
1105
1106
1107
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
1108
1109
1110
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
3268c63ed   Christoph Lameter   mm: fix move/migr...
1111
  static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
5e9a0f023   Brice Goglin   mm: extract do_pa...
1112
1113
1114
1115
1116
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
1117
  	struct page_to_node *pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1118
1119
1120
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1121

3140a2273   Brice Goglin   mm: rework do_pag...
1122
1123
1124
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1125
  		goto out;
35282a2de   Brice Goglin   migration: only m...
1126
1127
  
  	migrate_prep();
5e9a0f023   Brice Goglin   mm: extract do_pa...
1128
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
1129
1130
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
1131
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
1132
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1133

3140a2273   Brice Goglin   mm: rework do_pag...
1134
1135
1136
1137
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1138

3140a2273   Brice Goglin   mm: rework do_pag...
1139
1140
1141
1142
1143
1144
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1145
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
1146
1147
1148
1149
1150
1151
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1152
1153
1154
  				goto out_pm;
  
  			err = -ENODEV;
6f5a55f1a   Linus Torvalds   Fix potential cra...
1155
1156
  			if (node < 0 || node >= MAX_NUMNODES)
  				goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1157
1158
1159
1160
1161
1162
  			if (!node_state(node, N_HIGH_MEMORY))
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1174

5e9a0f023   Brice Goglin   mm: extract do_pa...
1175
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
1176
1177
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
1178
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
1179
1180
1181
1182
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1183
1184
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
1185
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
1186
1187
1188
1189
1190
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
1191
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
1192
   */
80bba1290   Brice Goglin   mm: no get_user/p...
1193
1194
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
1195
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1196
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1197

742755a1d   Christoph Lameter   [PATCH] page migr...
1198
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
1199
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
1200
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
1201
1202
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
1203
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1204
1205
  
  		vma = find_vma(mm, addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1206
  		if (!vma || addr < vma->vm_start)
742755a1d   Christoph Lameter   [PATCH] page migr...
1207
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1208
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1209
1210
1211
1212
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1213
1214
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
62b61f611   Hugh Dickins   ksm: memory hotre...
1215
  		if (!page || PageReserved(page) || PageKsm(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1216
1217
1218
1219
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
80bba1290   Brice Goglin   mm: no get_user/p...
1240

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1241
1242
  	while (nr_pages) {
  		unsigned long chunk_nr;
80bba1290   Brice Goglin   mm: no get_user/p...
1243

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1244
1245
1246
1247
1248
1249
  		chunk_nr = nr_pages;
  		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
  			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
  		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
  			break;
80bba1290   Brice Goglin   mm: no get_user/p...
1250
1251
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1252
1253
  		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
  			break;
742755a1d   Christoph Lameter   [PATCH] page migr...
1254

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1255
1256
1257
1258
1259
  		pages += chunk_nr;
  		status += chunk_nr;
  		nr_pages -= chunk_nr;
  	}
  	return nr_pages ? -EFAULT : 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1260
1261
1262
1263
1264
1265
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
938bb9f5e   Heiko Carstens   [CVE-2009-0029] S...
1266
1267
1268
1269
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  		const void __user * __user *, pages,
  		const int __user *, nodes,
  		int __user *, status, int, flags)
742755a1d   Christoph Lameter   [PATCH] page migr...
1270
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1271
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1272
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1273
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1274
  	int err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1275
  	nodemask_t task_nodes;
742755a1d   Christoph Lameter   [PATCH] page migr...
1276
1277
1278
1279
1280
1281
1282
1283
1284
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
a879bf582   Greg Thelen   mm: grab rcu read...
1285
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1286
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1287
  	if (!task) {
a879bf582   Greg Thelen   mm: grab rcu read...
1288
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1289
1290
  		return -ESRCH;
  	}
3268c63ed   Christoph Lameter   mm: fix move/migr...
1291
  	get_task_struct(task);
742755a1d   Christoph Lameter   [PATCH] page migr...
1292
1293
1294
1295
1296
1297
1298
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1299
  	tcred = __task_cred(task);
b38a86eb1   Eric W. Biederman   userns: Convert t...
1300
1301
  	if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
  	    !uid_eq(cred->uid,  tcred->suid) && !uid_eq(cred->uid,  tcred->uid) &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1302
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1303
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1304
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1305
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1306
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1307
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1308

86c3a7645   David Quigley   [PATCH] SELinux: ...
1309
1310
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1311
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1312

3268c63ed   Christoph Lameter   mm: fix move/migr...
1313
1314
1315
  	task_nodes = cpuset_mems_allowed(task);
  	mm = get_task_mm(task);
  	put_task_struct(task);
6e8b09eaf   Sasha Levin   mm: fix NULL ptr ...
1316
1317
1318
1319
1320
1321
1322
1323
  	if (!mm)
  		return -EINVAL;
  
  	if (nodes)
  		err = do_pages_move(mm, task_nodes, nr_pages, pages,
  				    nodes, status, flags);
  	else
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1324

742755a1d   Christoph Lameter   [PATCH] page migr...
1325
1326
  	mmput(mm);
  	return err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1327
1328
1329
1330
  
  out:
  	put_task_struct(task);
  	return err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1331
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1332

7b2259b3e   Christoph Lameter   [PATCH] page migr...
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
  /*
   * Call migration functions in the vma_ops that may prepare
   * memory in a vm for migration. migration functions may perform
   * the migration for vmas that do not have an underlying page struct.
   */
  int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	const nodemask_t *from, unsigned long flags)
  {
   	struct vm_area_struct *vma;
   	int err = 0;
1001c9fb8   Daisuke Nishimura   migration: migrat...
1343
  	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
7b2259b3e   Christoph Lameter   [PATCH] page migr...
1344
1345
1346
1347
1348
1349
1350
1351
   		if (vma->vm_ops && vma->vm_ops->migrate) {
   			err = vma->vm_ops->migrate(vma, to, from, flags);
   			if (err)
   				break;
   		}
   	}
   	return err;
  }
83d1674a9   Gerald Schaefer   mm: make CONFIG_M...
1352
  #endif