Blame view

mm/migrate.c 46.7 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
   */
  
  #include <linux/migrate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
16
  #include <linux/export.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
17
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
  #include <linux/pagevec.h>
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
24
  #include <linux/ksm.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
25
26
27
28
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
29
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
30
31
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
32
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
34
  #include <linux/syscalls.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/hugetlb.h>
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
36
  #include <linux/hugetlb_cgroup.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
37
  #include <linux/gfp.h>
bf6bddf19   Rafael Aquini   mm: introduce com...
38
  #include <linux/balloon_compaction.h>
f714f4f20   Mel Gorman   mm: numa: call MM...
39
  #include <linux/mmu_notifier.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
40

0d1836c36   Michal Nazarewicz   mm/migrate.c: fix...
41
  #include <asm/tlbflush.h>
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/migrate.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
44
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
45
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
46
   * migrate_prep() needs to be called before we start compiling a list of pages
748446bb6   Mel Gorman   mm: compaction: m...
47
48
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
b20a35035   Christoph Lameter   [PATCH] page migr...
49
50
51
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
52
53
54
55
56
57
58
59
60
61
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
748446bb6   Mel Gorman   mm: compaction: m...
62
63
64
65
66
67
68
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
  int migrate_prep_local(void)
  {
  	lru_add_drain();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
69
  /*
5733c7d11   Rafael Aquini   mm: introduce put...
70
71
72
   * Put previously isolated pages back onto the appropriate lists
   * from where they were once taken off for compaction/migration.
   *
59c82b70d   Joonsoo Kim   mm/migrate: remov...
73
74
75
   * This function shall be used whenever the isolated pageset has been
   * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
   * and isolate_huge_page().
5733c7d11   Rafael Aquini   mm: introduce put...
76
77
78
79
80
81
82
   */
  void putback_movable_pages(struct list_head *l)
  {
  	struct page *page;
  	struct page *page2;
  
  	list_for_each_entry_safe(page, page2, l, lru) {
31caf665e   Naoya Horiguchi   mm: migrate: make...
83
84
85
86
  		if (unlikely(PageHuge(page))) {
  			putback_active_hugepage(page);
  			continue;
  		}
5733c7d11   Rafael Aquini   mm: introduce put...
87
88
89
  		list_del(&page->lru);
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
  				page_is_file_cache(page));
117aad1e9   Rafael Aquini   mm: avoid reinser...
90
  		if (unlikely(isolated_balloon_page(page)))
bf6bddf19   Rafael Aquini   mm: introduce com...
91
92
93
  			balloon_page_putback(page);
  		else
  			putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
94
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
95
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
96
97
98
  /*
   * Restore a potential migration pte to a working pte entry
   */
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
99
100
  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
  				 unsigned long addr, void *old)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
101
102
103
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
104
105
106
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
107
108
109
110
  	if (unlikely(PageHuge(new))) {
  		ptep = huge_pte_offset(mm, addr);
  		if (!ptep)
  			goto out;
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
111
  		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
112
  	} else {
6219049ae   Bob Liu   mm: introduce mm_...
113
114
  		pmd = mm_find_pmd(mm, addr);
  		if (!pmd)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
115
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
116

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
117
  		ptep = pte_offset_map(pmd, addr);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
118

486cf46f3   Hugh Dickins   mm: fix race betw...
119
120
121
122
  		/*
  		 * Peek to check is_swap_pte() before taking ptlock?  No, we
  		 * can race mremap's move_ptes(), which skips anon_vma lock.
  		 */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
123
124
125
  
  		ptl = pte_lockptr(mm, pmd);
  	}
0697212a4   Christoph Lameter   [PATCH] Swapless ...
126

0697212a4   Christoph Lameter   [PATCH] Swapless ...
127
128
129
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
130
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
131
132
  
  	entry = pte_to_swp_entry(pte);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
133
134
135
  	if (!is_migration_entry(entry) ||
  	    migration_entry_to_page(entry) != old)
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
136

0697212a4   Christoph Lameter   [PATCH] Swapless ...
137
138
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
c3d16e165   Cyrill Gorcunov   mm: migration: do...
139
140
  	if (pte_swp_soft_dirty(*ptep))
  		pte = pte_mksoft_dirty(pte);
d3cb8bf60   Mel Gorman   mm: migrate: Clos...
141
142
  
  	/* Recheck VMA as permissions can change since migration started  */
0697212a4   Christoph Lameter   [PATCH] Swapless ...
143
  	if (is_write_migration_entry(entry))
d3cb8bf60   Mel Gorman   mm: migrate: Clos...
144
  		pte = maybe_mkwrite(pte, vma);
3ef8fd7f7   Andi Kleen   Fix migration.c c...
145
  #ifdef CONFIG_HUGETLB_PAGE
be7517d6a   Tony Lu   mm/hugetlb: set P...
146
  	if (PageHuge(new)) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
147
  		pte = pte_mkhuge(pte);
be7517d6a   Tony Lu   mm/hugetlb: set P...
148
149
  		pte = arch_make_huge_pte(pte, vma, new, 0);
  	}
3ef8fd7f7   Andi Kleen   Fix migration.c c...
150
  #endif
c2cc499c5   Leonid Yegoshin   mm compaction: fi...
151
  	flush_dcache_page(new);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
152
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
153

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
154
155
156
157
158
159
  	if (PageHuge(new)) {
  		if (PageAnon(new))
  			hugepage_add_anon_rmap(new, vma, addr);
  		else
  			page_dup_rmap(new);
  	} else if (PageAnon(new))
04e62a29b   Christoph Lameter   [PATCH] More page...
160
161
162
163
164
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
4b3073e1c   Russell King   MM: Pass a PTE po...
165
  	update_mmu_cache(vma, addr, ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
166
  unlock:
0697212a4   Christoph Lameter   [PATCH] Swapless ...
167
  	pte_unmap_unlock(ptep, ptl);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
168
169
  out:
  	return SWAP_AGAIN;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
170
171
172
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
173
174
175
176
177
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
178
179
180
181
182
183
  	struct rmap_walk_control rwc = {
  		.rmap_one = remove_migration_pte,
  		.arg = old,
  	};
  
  	rmap_walk(new, &rwc);
04e62a29b   Christoph Lameter   [PATCH] More page...
184
185
186
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
187
188
189
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
0697212a4   Christoph Lameter   [PATCH] Swapless ...
190
   */
e66f17ff7   Naoya Horiguchi   mm/hugetlb: take ...
191
  void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
30dad3092   Naoya Horiguchi   mm: migration: ad...
192
  				spinlock_t *ptl)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
193
  {
30dad3092   Naoya Horiguchi   mm: migration: ad...
194
  	pte_t pte;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
195
196
  	swp_entry_t entry;
  	struct page *page;
30dad3092   Naoya Horiguchi   mm: migration: ad...
197
  	spin_lock(ptl);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
198
199
200
201
202
203
204
205
206
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
207
208
209
210
211
212
213
214
215
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
216
217
218
219
220
221
222
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
30dad3092   Naoya Horiguchi   mm: migration: ad...
223
224
225
226
227
228
229
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	spinlock_t *ptl = pte_lockptr(mm, pmd);
  	pte_t *ptep = pte_offset_map(pmd, address);
  	__migration_entry_wait(mm, ptep, ptl);
  }
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
230
231
  void migration_entry_wait_huge(struct vm_area_struct *vma,
  		struct mm_struct *mm, pte_t *pte)
30dad3092   Naoya Horiguchi   mm: migration: ad...
232
  {
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
233
  	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
30dad3092   Naoya Horiguchi   mm: migration: ad...
234
235
  	__migration_entry_wait(mm, pte, ptl);
  }
b969c4ab9   Mel Gorman   mm: compaction: d...
236
237
  #ifdef CONFIG_BLOCK
  /* Returns true if all buffers are successfully locked */
a6bc32b89   Mel Gorman   mm: compaction: i...
238
239
  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
240
241
242
243
  {
  	struct buffer_head *bh = head;
  
  	/* Simple case, sync compaction */
a6bc32b89   Mel Gorman   mm: compaction: i...
244
  	if (mode != MIGRATE_ASYNC) {
b969c4ab9   Mel Gorman   mm: compaction: d...
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
  		do {
  			get_bh(bh);
  			lock_buffer(bh);
  			bh = bh->b_this_page;
  
  		} while (bh != head);
  
  		return true;
  	}
  
  	/* async case, we cannot block on lock_buffer so use trylock_buffer */
  	do {
  		get_bh(bh);
  		if (!trylock_buffer(bh)) {
  			/*
  			 * We failed to lock the buffer and cannot stall in
  			 * async migration. Release the taken locks
  			 */
  			struct buffer_head *failed_bh = bh;
  			put_bh(failed_bh);
  			bh = head;
  			while (bh != failed_bh) {
  				unlock_buffer(bh);
  				put_bh(bh);
  				bh = bh->b_this_page;
  			}
  			return false;
  		}
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  	return true;
  }
  #else
  static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
a6bc32b89   Mel Gorman   mm: compaction: i...
280
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
281
282
283
284
  {
  	return true;
  }
  #endif /* CONFIG_BLOCK */
b20a35035   Christoph Lameter   [PATCH] page migr...
285
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
286
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
287
288
289
290
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
266cf658e   David Howells   FS-Cache: Recruit...
291
   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
b20a35035   Christoph Lameter   [PATCH] page migr...
292
   */
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
293
  int migrate_page_move_mapping(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
294
  		struct page *newpage, struct page *page,
8e321fefb   Benjamin LaHaise   aio/migratepages:...
295
296
  		struct buffer_head *head, enum migrate_mode mode,
  		int extra_count)
b20a35035   Christoph Lameter   [PATCH] page migr...
297
  {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
298
  	int expected_count = 1 + extra_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
299
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
300

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
301
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
302
  		/* Anonymous page without mapping */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
303
  		if (page_count(page) != expected_count)
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
304
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
305
  		return MIGRATEPAGE_SUCCESS;
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
306
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
307
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
308

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
309
310
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
311

8e321fefb   Benjamin LaHaise   aio/migratepages:...
312
  	expected_count += 1 + page_has_private(page);
e286781d5   Nick Piggin   mm: speculative p...
313
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
314
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
315
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
316
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
317
  	}
e286781d5   Nick Piggin   mm: speculative p...
318
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
319
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
320
321
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
322
  	/*
b969c4ab9   Mel Gorman   mm: compaction: d...
323
324
325
326
327
328
  	 * In the async migration case of moving a page with buffers, lock the
  	 * buffers using trylock before the mapping is moved. If the mapping
  	 * was moved, we later failed to lock the buffers and could not move
  	 * the mapping back due to an elevated page count, we would have to
  	 * block waiting on other references to be dropped.
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
329
330
  	if (mode == MIGRATE_ASYNC && head &&
  			!buffer_migrate_lock_buffers(head, mode)) {
b969c4ab9   Mel Gorman   mm: compaction: d...
331
332
333
334
335
336
  		page_unfreeze_refs(page, expected_count);
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	/*
b20a35035   Christoph Lameter   [PATCH] page migr...
337
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
338
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
339
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
340
341
342
343
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
344
345
346
  	radix_tree_replace_slot(pslot, newpage);
  
  	/*
937a94c9d   Jacobo Giralt   mm: migrate: one ...
347
348
  	 * Drop cache reference from old page by unfreezing
  	 * to one less reference.
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
349
350
  	 * We know this isn't the last reference.
  	 */
937a94c9d   Jacobo Giralt   mm: migrate: one ...
351
  	page_unfreeze_refs(page, expected_count - 1);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
352

0e8c7d0fd   Christoph Lameter   page migration: f...
353
354
355
356
357
358
359
360
361
362
363
364
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
99a15e21d   Andrea Arcangeli   migrate: don't ac...
365
  	if (!PageSwapCache(page) && PageSwapBacked(page)) {
4b02108ac   KOSAKI Motohiro   mm: oom analysis:...
366
367
368
  		__dec_zone_page_state(page, NR_SHMEM);
  		__inc_zone_page_state(newpage, NR_SHMEM);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
369
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
370

78bd52097   Rafael Aquini   mm: adjust addres...
371
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
372
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
373
374
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
375
376
377
378
379
380
381
382
383
384
385
386
   * The expected number of remaining references is the same as that
   * of migrate_page_move_mapping().
   */
  int migrate_huge_page_move_mapping(struct address_space *mapping,
  				   struct page *newpage, struct page *page)
  {
  	int expected_count;
  	void **pslot;
  
  	if (!mapping) {
  		if (page_count(page) != 1)
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
387
  		return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
388
389
390
391
392
393
394
395
396
  	}
  
  	spin_lock_irq(&mapping->tree_lock);
  
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
  
  	expected_count = 2 + page_has_private(page);
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
397
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
398
399
400
401
402
403
404
405
406
407
408
409
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	if (!page_freeze_refs(page, expected_count)) {
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	get_page(newpage);
  
  	radix_tree_replace_slot(pslot, newpage);
937a94c9d   Jacobo Giralt   mm: migrate: one ...
410
  	page_unfreeze_refs(page, expected_count - 1);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
411
412
  
  	spin_unlock_irq(&mapping->tree_lock);
78bd52097   Rafael Aquini   mm: adjust addres...
413
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
414
415
416
  }
  
  /*
30b0a105d   Dave Hansen   mm: thp: give tra...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
   * Gigantic pages are so large that we do not guarantee that page++ pointer
   * arithmetic will work across the entire page.  We need something more
   * specialized.
   */
  static void __copy_gigantic_page(struct page *dst, struct page *src,
  				int nr_pages)
  {
  	int i;
  	struct page *dst_base = dst;
  	struct page *src_base = src;
  
  	for (i = 0; i < nr_pages; ) {
  		cond_resched();
  		copy_highpage(dst, src);
  
  		i++;
  		dst = mem_map_next(dst, dst_base, i);
  		src = mem_map_next(src, src_base, i);
  	}
  }
  
  static void copy_huge_page(struct page *dst, struct page *src)
  {
  	int i;
  	int nr_pages;
  
  	if (PageHuge(src)) {
  		/* hugetlbfs page */
  		struct hstate *h = page_hstate(src);
  		nr_pages = pages_per_huge_page(h);
  
  		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
  			__copy_gigantic_page(dst, src, nr_pages);
  			return;
  		}
  	} else {
  		/* thp page */
  		BUG_ON(!PageTransHuge(src));
  		nr_pages = hpage_nr_pages(src);
  	}
  
  	for (i = 0; i < nr_pages; i++) {
  		cond_resched();
  		copy_highpage(dst + i, src + i);
  	}
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
465
466
   * Copy the page to its new location
   */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
467
  void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
468
  {
7851a45cd   Rik van Riel   mm: numa: Copy cp...
469
  	int cpupid;
b32967ff1   Mel Gorman   mm: numa: Add THP...
470
  	if (PageHuge(page) || PageTransHuge(page))
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
471
472
473
  		copy_huge_page(newpage, page);
  	else
  		copy_highpage(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
474
475
476
477
478
479
480
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
481
  	if (TestClearPageActive(page)) {
309381fea   Sasha Levin   mm: dump page whe...
482
  		VM_BUG_ON_PAGE(PageUnevictable(page), page);
b20a35035   Christoph Lameter   [PATCH] page migr...
483
  		SetPageActive(newpage);
418b27ef5   Lee Schermerhorn   mm: remove unevic...
484
485
  	} else if (TestClearPageUnevictable(page))
  		SetPageUnevictable(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
486
487
488
489
490
491
492
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
493
494
495
496
497
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
25985edce   Lucas De Marchi   Fix common misspe...
498
  		 * Whereas only part of our page may be dirty.
3a902c5f6   Nick Piggin   mm: fix warning o...
499
  		 */
752dc185d   Hugh Dickins   mm: fix warning i...
500
501
502
503
  		if (PageSwapBacked(page))
  			SetPageDirty(newpage);
  		else
  			__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
504
   	}
7851a45cd   Rik van Riel   mm: numa: Copy cp...
505
506
507
508
509
510
  	/*
  	 * Copy NUMA information to the new page, to prevent over-eager
  	 * future migrations of this same page.
  	 */
  	cpupid = page_cpupid_xchg_last(page, -1);
  	page_cpupid_xchg_last(newpage, cpupid);
b291f0003   Nick Piggin   mlock: mlocked pa...
511
  	mlock_migrate_page(newpage, page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
512
  	ksm_migrate_page(newpage, page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
513
514
515
516
  	/*
  	 * Please do not reorder this without considering how mm/ksm.c's
  	 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
  	 */
b3b3a99c5   Naoya Horiguchi   mm/migrate: check...
517
518
  	if (PageSwapCache(page))
  		ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
519
520
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
521
522
523
524
525
526
527
528
  
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
529

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
530
531
532
  /************************************************************
   *                    Migration functions
   ***********************************************************/
b20a35035   Christoph Lameter   [PATCH] page migr...
533
534
  /*
   * Common logic to directly migrate a single page suitable for
266cf658e   David Howells   FS-Cache: Recruit...
535
   * pages that do not use PagePrivate/PagePrivate2.
b20a35035   Christoph Lameter   [PATCH] page migr...
536
537
538
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
539
  int migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
540
541
  		struct page *newpage, struct page *page,
  		enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
542
543
544
545
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
546
  	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
547

78bd52097   Rafael Aquini   mm: adjust addres...
548
  	if (rc != MIGRATEPAGE_SUCCESS)
b20a35035   Christoph Lameter   [PATCH] page migr...
549
550
551
  		return rc;
  
  	migrate_page_copy(newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
552
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
553
554
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
555
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
556
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
557
558
559
560
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
561
  int buffer_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
562
  		struct page *newpage, struct page *page, enum migrate_mode mode)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
563
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
564
565
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
566
  	if (!page_has_buffers(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
567
  		return migrate_page(mapping, newpage, page, mode);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
568
569
  
  	head = page_buffers(page);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
570
  	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
571

78bd52097   Rafael Aquini   mm: adjust addres...
572
  	if (rc != MIGRATEPAGE_SUCCESS)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
573
  		return rc;
b969c4ab9   Mel Gorman   mm: compaction: d...
574
575
576
577
578
  	/*
  	 * In the async case, migrate_page_move_mapping locked the buffers
  	 * with an IRQ-safe spinlock held. In the sync case, the buffers
  	 * need to be locked now
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
579
580
  	if (mode != MIGRATE_ASYNC)
  		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
78bd52097   Rafael Aquini   mm: adjust addres...
606
  	return MIGRATEPAGE_SUCCESS;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
607
608
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
609
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
610

04e62a29b   Christoph Lameter   [PATCH] More page...
611
612
613
614
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
615
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
616
617
618
619
620
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
04e62a29b   Christoph Lameter   [PATCH] More page...
621
622
623
624
625
626
627
628
629
630
631
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
632
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
633
634
635
636
637
638
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
639
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
640
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
641

04e62a29b   Christoph Lameter   [PATCH] More page...
642
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
643

04e62a29b   Christoph Lameter   [PATCH] More page...
644
645
646
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
647
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
648
649
650
651
652
653
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
654
  	struct page *newpage, struct page *page, enum migrate_mode mode)
04e62a29b   Christoph Lameter   [PATCH] More page...
655
  {
b969c4ab9   Mel Gorman   mm: compaction: d...
656
  	if (PageDirty(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
657
658
  		/* Only writeback pages in full synchronous migration */
  		if (mode != MIGRATE_SYNC)
b969c4ab9   Mel Gorman   mm: compaction: d...
659
  			return -EBUSY;
04e62a29b   Christoph Lameter   [PATCH] More page...
660
  		return writeout(mapping, page);
b969c4ab9   Mel Gorman   mm: compaction: d...
661
  	}
8351a6e47   Christoph Lameter   [PATCH] page migr...
662
663
664
665
666
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
266cf658e   David Howells   FS-Cache: Recruit...
667
  	if (page_has_private(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
668
669
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
a6bc32b89   Mel Gorman   mm: compaction: i...
670
  	return migrate_page(mapping, newpage, page, mode);
8351a6e47   Christoph Lameter   [PATCH] page migr...
671
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
672
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
673
674
675
676
677
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
678
679
680
   *
   * Return value:
   *   < 0 - error code
78bd52097   Rafael Aquini   mm: adjust addres...
681
   *  MIGRATEPAGE_SUCCESS - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
682
   */
3fe2011ff   Mel Gorman   mm: migration: al...
683
  static int move_to_new_page(struct page *newpage, struct page *page,
2ebba6b7e   Hugh Dickins   mm: unmapped page...
684
  				int page_was_mapped, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
685
686
687
688
689
690
691
692
693
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
694
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
695
696
697
698
699
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
700
701
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
702
703
704
  
  	mapping = page_mapping(page);
  	if (!mapping)
a6bc32b89   Mel Gorman   mm: compaction: i...
705
  		rc = migrate_page(mapping, newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
706
  	else if (mapping->a_ops->migratepage)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
707
  		/*
b969c4ab9   Mel Gorman   mm: compaction: d...
708
709
710
711
  		 * Most pages have a mapping and most filesystems provide a
  		 * migratepage callback. Anonymous pages are part of swap
  		 * space which also has its own migratepage callback. This
  		 * is the most common path for page migration.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
712
  		 */
b969c4ab9   Mel Gorman   mm: compaction: d...
713
  		rc = mapping->a_ops->migratepage(mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
714
  						newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
715
  	else
a6bc32b89   Mel Gorman   mm: compaction: i...
716
  		rc = fallback_migrate_page(mapping, newpage, page, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
717

78bd52097   Rafael Aquini   mm: adjust addres...
718
  	if (rc != MIGRATEPAGE_SUCCESS) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
719
  		newpage->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
720
  	} else {
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
721
  		mem_cgroup_migrate(page, newpage, false);
2ebba6b7e   Hugh Dickins   mm: unmapped page...
722
  		if (page_was_mapped)
3fe2011ff   Mel Gorman   mm: migration: al...
723
  			remove_migration_ptes(page, newpage);
35512ecae   Konstantin Khlebnikov   mm: postpone migr...
724
  		page->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
725
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
726
727
728
729
730
  
  	unlock_page(newpage);
  
  	return rc;
  }
0dabec93d   Minchan Kim   mm: migration: cl...
731
  static int __unmap_and_move(struct page *page, struct page *newpage,
9c620e2bc   Hugh Dickins   mm: remove offlin...
732
  				int force, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
733
  {
0dabec93d   Minchan Kim   mm: migration: cl...
734
  	int rc = -EAGAIN;
2ebba6b7e   Hugh Dickins   mm: unmapped page...
735
  	int page_was_mapped = 0;
3f6c82728   Mel Gorman   mm: migration: ta...
736
  	struct anon_vma *anon_vma = NULL;
95a402c38   Christoph Lameter   [PATCH] page migr...
737

529ae9aaa   Nick Piggin   mm: rename page t...
738
  	if (!trylock_page(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
739
  		if (!force || mode == MIGRATE_ASYNC)
0dabec93d   Minchan Kim   mm: migration: cl...
740
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
  
  		/*
  		 * It's not safe for direct compaction to call lock_page.
  		 * For example, during page readahead pages are added locked
  		 * to the LRU. Later, when the IO completes the pages are
  		 * marked uptodate and unlocked. However, the queueing
  		 * could be merging multiple pages for one bio (e.g.
  		 * mpage_readpages). If an allocation happens for the
  		 * second or third page, the process can end up locking
  		 * the same page twice and deadlocking. Rather than
  		 * trying to be clever about what pages can be locked,
  		 * avoid the use of lock_page for direct compaction
  		 * altogether.
  		 */
  		if (current->flags & PF_MEMALLOC)
0dabec93d   Minchan Kim   mm: migration: cl...
756
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
757

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
758
759
760
761
  		lock_page(page);
  	}
  
  	if (PageWriteback(page)) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
762
  		/*
fed5b64a9   Jianguo Wu   mm/migrate: fix c...
763
  		 * Only in the case of a full synchronous migration is it
a6bc32b89   Mel Gorman   mm: compaction: i...
764
765
766
  		 * necessary to wait for PageWriteback. In the async case,
  		 * the retry loop is too short and in the sync-light case,
  		 * the overhead of stalling is too much
11bc82d67   Andrea Arcangeli   mm: compaction: U...
767
  		 */
a6bc32b89   Mel Gorman   mm: compaction: i...
768
  		if (mode != MIGRATE_SYNC) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
769
  			rc = -EBUSY;
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
770
  			goto out_unlock;
11bc82d67   Andrea Arcangeli   mm: compaction: U...
771
772
  		}
  		if (!force)
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
773
  			goto out_unlock;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
774
775
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
776
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
777
778
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
1ce82b69e   Hugh Dickins   mm: fix migration...
779
  	 * This get_anon_vma() delays freeing anon_vma pointer until the end
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
780
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
781
782
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
783
  	 */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
784
  	if (PageAnon(page) && !PageKsm(page)) {
1ce82b69e   Hugh Dickins   mm: fix migration...
785
  		/*
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
786
  		 * Only page_lock_anon_vma_read() understands the subtleties of
1ce82b69e   Hugh Dickins   mm: fix migration...
787
788
  		 * getting a hold on an anon_vma from outside one of its mms.
  		 */
746b18d42   Peter Zijlstra   mm: use refcounts...
789
  		anon_vma = page_get_anon_vma(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
790
791
  		if (anon_vma) {
  			/*
746b18d42   Peter Zijlstra   mm: use refcounts...
792
  			 * Anon page
1ce82b69e   Hugh Dickins   mm: fix migration...
793
  			 */
1ce82b69e   Hugh Dickins   mm: fix migration...
794
  		} else if (PageSwapCache(page)) {
3fe2011ff   Mel Gorman   mm: migration: al...
795
796
797
798
799
800
801
802
803
804
805
806
  			/*
  			 * We cannot be sure that the anon_vma of an unmapped
  			 * swapcache page is safe to use because we don't
  			 * know in advance if the VMA that this page belonged
  			 * to still exists. If the VMA and others sharing the
  			 * data have been freed, then the anon_vma could
  			 * already be invalid.
  			 *
  			 * To avoid this possibility, swapcache pages get
  			 * migrated but are not remapped when migration
  			 * completes
  			 */
3fe2011ff   Mel Gorman   mm: migration: al...
807
  		} else {
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
808
  			goto out_unlock;
3fe2011ff   Mel Gorman   mm: migration: al...
809
  		}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
810
  	}
62e1c5530   Shaohua Li   page migraton: ha...
811

d6d86c0a7   Konstantin Khlebnikov   mm/balloon_compac...
812
  	if (unlikely(isolated_balloon_page(page))) {
bf6bddf19   Rafael Aquini   mm: introduce com...
813
814
815
816
817
818
819
820
  		/*
  		 * A ballooned page does not need any special attention from
  		 * physical to virtual reverse mapping procedures.
  		 * Skip any attempt to unmap PTEs or to remap swap cache,
  		 * in order to avoid burning cycles at rmap level, and perform
  		 * the page migration right away (proteced by page lock).
  		 */
  		rc = balloon_page_migrate(newpage, page, mode);
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
821
  		goto out_unlock;
bf6bddf19   Rafael Aquini   mm: introduce com...
822
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
823
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
824
825
826
827
828
829
830
831
832
833
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
834
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
835
  	if (!page->mapping) {
309381fea   Sasha Levin   mm: dump page whe...
836
  		VM_BUG_ON_PAGE(PageAnon(page), page);
1ce82b69e   Hugh Dickins   mm: fix migration...
837
  		if (page_has_private(page)) {
62e1c5530   Shaohua Li   page migraton: ha...
838
  			try_to_free_buffers(page);
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
839
  			goto out_unlock;
62e1c5530   Shaohua Li   page migraton: ha...
840
  		}
abfc34881   Shaohua Li   memory hotplug: m...
841
  		goto skip_unmap;
62e1c5530   Shaohua Li   page migraton: ha...
842
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
843
  	/* Establish migration ptes or remove ptes */
2ebba6b7e   Hugh Dickins   mm: unmapped page...
844
845
846
847
848
  	if (page_mapped(page)) {
  		try_to_unmap(page,
  			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  		page_was_mapped = 1;
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
849

abfc34881   Shaohua Li   memory hotplug: m...
850
  skip_unmap:
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
851
  	if (!page_mapped(page))
2ebba6b7e   Hugh Dickins   mm: unmapped page...
852
  		rc = move_to_new_page(newpage, page, page_was_mapped, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
853

2ebba6b7e   Hugh Dickins   mm: unmapped page...
854
  	if (rc && page_was_mapped)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
855
  		remove_migration_ptes(page, page);
3f6c82728   Mel Gorman   mm: migration: ta...
856
857
  
  	/* Drop an anon_vma reference if we took one */
76545066c   Rik van Riel   mm: extend KSM re...
858
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
859
  		put_anon_vma(anon_vma);
3f6c82728   Mel Gorman   mm: migration: ta...
860

0a31bc97c   Johannes Weiner   mm: memcontrol: r...
861
  out_unlock:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
862
  	unlock_page(page);
0dabec93d   Minchan Kim   mm: migration: cl...
863
864
865
  out:
  	return rc;
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
866

0dabec93d   Minchan Kim   mm: migration: cl...
867
  /*
ef2a5153b   Geert Uytterhoeven   mm/migrate: mark ...
868
869
870
871
872
873
874
875
876
877
   * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move().  Work
   * around it.
   */
  #if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM)
  #define ICE_noinline noinline
  #else
  #define ICE_noinline
  #endif
  
  /*
0dabec93d   Minchan Kim   mm: migration: cl...
878
879
880
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
ef2a5153b   Geert Uytterhoeven   mm/migrate: mark ...
881
882
883
884
  static ICE_noinline int unmap_and_move(new_page_t get_new_page,
  				   free_page_t put_new_page,
  				   unsigned long private, struct page *page,
  				   int force, enum migrate_mode mode)
0dabec93d   Minchan Kim   mm: migration: cl...
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
  {
  	int rc = 0;
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
  
  	if (!newpage)
  		return -ENOMEM;
  
  	if (page_count(page) == 1) {
  		/* page was freed from under us. So we are done. */
  		goto out;
  	}
  
  	if (unlikely(PageTransHuge(page)))
  		if (unlikely(split_huge_page(page)))
  			goto out;
9c620e2bc   Hugh Dickins   mm: remove offlin...
901
  	rc = __unmap_and_move(page, newpage, force, mode);
bf6bddf19   Rafael Aquini   mm: introduce com...
902

0dabec93d   Minchan Kim   mm: migration: cl...
903
  out:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
904
  	if (rc != -EAGAIN) {
0dabec93d   Minchan Kim   mm: migration: cl...
905
906
907
908
909
910
911
  		/*
  		 * A page that has been migrated has all references
  		 * removed and will be freed. A page that has not been
  		 * migrated will have kepts its references and be
  		 * restored.
  		 */
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
912
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
913
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
914
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
915
  	}
68711a746   David Rientjes   mm, migration: ad...
916

95a402c38   Christoph Lameter   [PATCH] page migr...
917
  	/*
68711a746   David Rientjes   mm, migration: ad...
918
919
920
  	 * If migration was not successful and there's a freeing callback, use
  	 * it.  Otherwise, putback_lru_page() will drop the reference grabbed
  	 * during isolation.
95a402c38   Christoph Lameter   [PATCH] page migr...
921
  	 */
8bdd63809   Hugh Dickins   mm: fix direct re...
922
923
  	if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
  		ClearPageSwapBacked(newpage);
68711a746   David Rientjes   mm, migration: ad...
924
  		put_new_page(newpage, private);
d6d86c0a7   Konstantin Khlebnikov   mm/balloon_compac...
925
926
927
  	} else if (unlikely(__is_movable_balloon_page(newpage))) {
  		/* drop our reference, page already in the balloon */
  		put_page(newpage);
8bdd63809   Hugh Dickins   mm: fix direct re...
928
  	} else
68711a746   David Rientjes   mm, migration: ad...
929
  		putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
930
931
932
933
934
935
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
936
937
938
939
  	return rc;
  }
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
   * Counterpart of unmap_and_move_page() for hugepage migration.
   *
   * This function doesn't wait the completion of hugepage I/O
   * because there is no race between I/O and migration for hugepage.
   * Note that currently hugepage I/O occurs only in direct I/O
   * where no lock is held and PG_writeback is irrelevant,
   * and writeback status of all subpages are counted in the reference
   * count of the head page (i.e. if all subpages of a 2MB hugepage are
   * under direct I/O, the reference of the head page is 512 and a bit more.)
   * This means that when we try to migrate hugepage whose subpages are
   * doing direct I/O, some references remain after try_to_unmap() and
   * hugepage migration fails without data corruption.
   *
   * There is also no race when direct I/O is issued on the page under migration,
   * because then pte is replaced with migration swap entry and direct I/O code
   * will wait in the page fault for migration to complete.
   */
  static int unmap_and_move_huge_page(new_page_t get_new_page,
68711a746   David Rientjes   mm, migration: ad...
958
959
960
  				free_page_t put_new_page, unsigned long private,
  				struct page *hpage, int force,
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
961
962
963
  {
  	int rc = 0;
  	int *result = NULL;
2ebba6b7e   Hugh Dickins   mm: unmapped page...
964
  	int page_was_mapped = 0;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
965
  	struct page *new_hpage;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
966
  	struct anon_vma *anon_vma = NULL;
83467efbd   Naoya Horiguchi   mm: migrate: chec...
967
968
969
970
971
972
973
  	/*
  	 * Movability of hugepages depends on architectures and hugepage size.
  	 * This check is necessary because some callers of hugepage migration
  	 * like soft offline and memory hotremove don't walk through page
  	 * tables or check whether the hugepage is pmd-based or not before
  	 * kicking migration.
  	 */
100873d7a   Naoya Horiguchi   hugetlb: rename h...
974
  	if (!hugepage_migration_supported(page_hstate(hpage))) {
32665f2bb   Joonsoo Kim   mm/migrate: corre...
975
  		putback_active_hugepage(hpage);
83467efbd   Naoya Horiguchi   mm: migrate: chec...
976
  		return -ENOSYS;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
977
  	}
83467efbd   Naoya Horiguchi   mm: migrate: chec...
978

32665f2bb   Joonsoo Kim   mm/migrate: corre...
979
  	new_hpage = get_new_page(hpage, private, &result);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
980
981
982
983
984
985
  	if (!new_hpage)
  		return -ENOMEM;
  
  	rc = -EAGAIN;
  
  	if (!trylock_page(hpage)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
986
  		if (!force || mode != MIGRATE_SYNC)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
987
988
989
  			goto out;
  		lock_page(hpage);
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
990
991
  	if (PageAnon(hpage))
  		anon_vma = page_get_anon_vma(hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
992

2ebba6b7e   Hugh Dickins   mm: unmapped page...
993
994
995
996
997
  	if (page_mapped(hpage)) {
  		try_to_unmap(hpage,
  			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  		page_was_mapped = 1;
  	}
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
998
999
  
  	if (!page_mapped(hpage))
2ebba6b7e   Hugh Dickins   mm: unmapped page...
1000
  		rc = move_to_new_page(new_hpage, hpage, page_was_mapped, mode);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1001

2ebba6b7e   Hugh Dickins   mm: unmapped page...
1002
  	if (rc != MIGRATEPAGE_SUCCESS && page_was_mapped)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1003
  		remove_migration_ptes(hpage, hpage);
fd4a4663d   Hugh Dickins   mm: fix hugepage ...
1004
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1005
  		put_anon_vma(anon_vma);
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1006

68711a746   David Rientjes   mm, migration: ad...
1007
  	if (rc == MIGRATEPAGE_SUCCESS)
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1008
  		hugetlb_cgroup_migrate(hpage, new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1009
  	unlock_page(hpage);
09761333e   Hillf Danton   mm/migrate.c: pai...
1010
  out:
b8ec1cee5   Naoya Horiguchi   mm: soft-offline:...
1011
1012
  	if (rc != -EAGAIN)
  		putback_active_hugepage(hpage);
68711a746   David Rientjes   mm, migration: ad...
1013
1014
1015
1016
1017
1018
1019
1020
1021
  
  	/*
  	 * If migration was not successful and there's a freeing callback, use
  	 * it.  Otherwise, put_page() will drop the reference grabbed during
  	 * isolation.
  	 */
  	if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
  		put_new_page(new_hpage, private);
  	else
9a36019b8   Naoya Horiguchi   mm: migrate: huge...
1022
  		putback_active_hugepage(new_hpage);
68711a746   David Rientjes   mm, migration: ad...
1023

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(new_hpage);
  	}
  	return rc;
  }
  
  /*
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1034
1035
   * migrate_pages - migrate the pages specified in a list, to the free pages
   *		   supplied as the target for the page migration
b20a35035   Christoph Lameter   [PATCH] page migr...
1036
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1037
1038
1039
   * @from:		The list of pages to be migrated.
   * @get_new_page:	The function used to allocate free pages to be used
   *			as the target of the page migration.
68711a746   David Rientjes   mm, migration: ad...
1040
1041
   * @put_new_page:	The function used to free target pages if migration
   *			fails, or NULL if no special handling is necessary.
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1042
1043
1044
1045
   * @private:		Private data to be passed on to get_new_page()
   * @mode:		The migration mode that specifies the constraints for
   *			page migration, if any.
   * @reason:		The reason for page migration.
b20a35035   Christoph Lameter   [PATCH] page migr...
1046
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1047
1048
1049
   * The function returns after 10 attempts or if no pages are movable any more
   * because the list has become empty or no retryable pages exist any more.
   * The caller should call putback_lru_pages() to return pages to the LRU
28bd65781   Minchan Kim   mm: migration: cl...
1050
   * or free list only if ret != 0.
b20a35035   Christoph Lameter   [PATCH] page migr...
1051
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1052
   * Returns the number of pages that were not migrated, or an error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
1053
   */
9c620e2bc   Hugh Dickins   mm: remove offlin...
1054
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
68711a746   David Rientjes   mm, migration: ad...
1055
1056
  		free_page_t put_new_page, unsigned long private,
  		enum migrate_mode mode, int reason)
b20a35035   Christoph Lameter   [PATCH] page migr...
1057
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1058
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
1059
  	int nr_failed = 0;
5647bc293   Mel Gorman   mm: compaction: M...
1060
  	int nr_succeeded = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1061
1062
1063
1064
1065
1066
1067
1068
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1069
1070
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1071

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1072
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1073
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1074

31caf665e   Naoya Horiguchi   mm: migrate: make...
1075
1076
  			if (PageHuge(page))
  				rc = unmap_and_move_huge_page(get_new_page,
68711a746   David Rientjes   mm, migration: ad...
1077
1078
  						put_new_page, private, page,
  						pass > 2, mode);
31caf665e   Naoya Horiguchi   mm: migrate: make...
1079
  			else
68711a746   David Rientjes   mm, migration: ad...
1080
1081
  				rc = unmap_and_move(get_new_page, put_new_page,
  						private, page, pass > 2, mode);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1082

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1083
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
1084
1085
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1086
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1087
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1088
  				break;
78bd52097   Rafael Aquini   mm: adjust addres...
1089
  			case MIGRATEPAGE_SUCCESS:
5647bc293   Mel Gorman   mm: compaction: M...
1090
  				nr_succeeded++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1091
1092
  				break;
  			default:
354a33633   Naoya Horiguchi   mm/migrate: add c...
1093
1094
1095
1096
1097
1098
  				/*
  				 * Permanent failure (-EBUSY, -ENOSYS, etc.):
  				 * unlike -EAGAIN case, the failed page is
  				 * removed from migration page list and not
  				 * retried in the next outer loop.
  				 */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1099
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1100
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1101
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
1102
1103
  		}
  	}
78bd52097   Rafael Aquini   mm: adjust addres...
1104
  	rc = nr_failed + retry;
95a402c38   Christoph Lameter   [PATCH] page migr...
1105
  out:
5647bc293   Mel Gorman   mm: compaction: M...
1106
1107
1108
1109
  	if (nr_succeeded)
  		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
  	if (nr_failed)
  		count_vm_events(PGMIGRATE_FAIL, nr_failed);
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1110
  	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
b20a35035   Christoph Lameter   [PATCH] page migr...
1111
1112
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
78bd52097   Rafael Aquini   mm: adjust addres...
1113
  	return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
1114
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
1115

742755a1d   Christoph Lameter   [PATCH] page migr...
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1139
1140
1141
1142
1143
  	if (PageHuge(p))
  		return alloc_huge_page_node(page_hstate(compound_head(p)),
  					pm->node);
  	else
  		return alloc_pages_exact_node(pm->node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1144
  				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
1145
1146
1147
1148
1149
1150
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
1151
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
1152
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
1153
1154
1155
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
  
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
1166
1167
1168
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
1169
1170
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1171
  		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
1172
  			goto set_status;
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
1173
  		page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1174
1175
1176
1177
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1178
1179
1180
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
62b61f611   Hugh Dickins   ksm: memory hotre...
1181
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1182
  		if (PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1198
  		if (PageHuge(page)) {
e66f17ff7   Naoya Horiguchi   mm/hugetlb: take ...
1199
1200
  			if (PageHead(page))
  				isolate_huge_page(page, &pagelist);
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1201
1202
  			goto put_and_set;
  		}
62695a84e   Nick Piggin   vmscan: move isol...
1203
  		err = isolate_lru_page(page);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1204
  		if (!err) {
62695a84e   Nick Piggin   vmscan: move isol...
1205
  			list_add_tail(&page->lru, &pagelist);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1206
1207
1208
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
1219
  	err = 0;
cf608ac19   Minchan Kim   mm: compaction: f...
1220
  	if (!list_empty(&pagelist)) {
68711a746   David Rientjes   mm, migration: ad...
1221
  		err = migrate_pages(&pagelist, new_page_node, NULL,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1222
  				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
cf608ac19   Minchan Kim   mm: compaction: f...
1223
  		if (err)
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1224
  			putback_movable_pages(&pagelist);
cf608ac19   Minchan Kim   mm: compaction: f...
1225
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1226
1227
1228
1229
1230
1231
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
1232
1233
1234
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
3268c63ed   Christoph Lameter   mm: fix move/migr...
1235
  static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
5e9a0f023   Brice Goglin   mm: extract do_pa...
1236
1237
1238
1239
1240
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
1241
  	struct page_to_node *pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1242
1243
1244
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1245

3140a2273   Brice Goglin   mm: rework do_pag...
1246
1247
1248
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1249
  		goto out;
35282a2de   Brice Goglin   migration: only m...
1250
1251
  
  	migrate_prep();
5e9a0f023   Brice Goglin   mm: extract do_pa...
1252
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
1253
1254
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
1255
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
1256
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1257

3140a2273   Brice Goglin   mm: rework do_pag...
1258
1259
1260
1261
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1262

3140a2273   Brice Goglin   mm: rework do_pag...
1263
1264
1265
1266
1267
1268
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1269
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
1270
1271
1272
1273
1274
1275
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1276
1277
1278
  				goto out_pm;
  
  			err = -ENODEV;
6f5a55f1a   Linus Torvalds   Fix potential cra...
1279
1280
  			if (node < 0 || node >= MAX_NUMNODES)
  				goto out_pm;
389162c22   Lai Jiangshan   mm,migrate: use N...
1281
  			if (!node_state(node, N_MEMORY))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1282
1283
1284
1285
1286
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1298

5e9a0f023   Brice Goglin   mm: extract do_pa...
1299
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
1300
1301
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
1302
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
1303
1304
1305
1306
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1307
1308
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
1309
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
1310
1311
1312
1313
1314
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
1315
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
1316
   */
80bba1290   Brice Goglin   mm: no get_user/p...
1317
1318
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
1319
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1320
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1321

742755a1d   Christoph Lameter   [PATCH] page migr...
1322
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
1323
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
1324
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
1325
1326
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
1327
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1328
1329
  
  		vma = find_vma(mm, addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1330
  		if (!vma || addr < vma->vm_start)
742755a1d   Christoph Lameter   [PATCH] page migr...
1331
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1332
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1333
1334
1335
1336
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1337
1338
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1339
  		if (!page || PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1340
1341
1342
1343
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
80bba1290   Brice Goglin   mm: no get_user/p...
1364

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1365
1366
  	while (nr_pages) {
  		unsigned long chunk_nr;
80bba1290   Brice Goglin   mm: no get_user/p...
1367

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1368
1369
1370
1371
1372
1373
  		chunk_nr = nr_pages;
  		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
  			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
  		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
  			break;
80bba1290   Brice Goglin   mm: no get_user/p...
1374
1375
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1376
1377
  		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
  			break;
742755a1d   Christoph Lameter   [PATCH] page migr...
1378

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1379
1380
1381
1382
1383
  		pages += chunk_nr;
  		status += chunk_nr;
  		nr_pages -= chunk_nr;
  	}
  	return nr_pages ? -EFAULT : 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1384
1385
1386
1387
1388
1389
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
938bb9f5e   Heiko Carstens   [CVE-2009-0029] S...
1390
1391
1392
1393
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  		const void __user * __user *, pages,
  		const int __user *, nodes,
  		int __user *, status, int, flags)
742755a1d   Christoph Lameter   [PATCH] page migr...
1394
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1395
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1396
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1397
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1398
  	int err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1399
  	nodemask_t task_nodes;
742755a1d   Christoph Lameter   [PATCH] page migr...
1400
1401
1402
1403
1404
1405
1406
1407
1408
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
a879bf582   Greg Thelen   mm: grab rcu read...
1409
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1410
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1411
  	if (!task) {
a879bf582   Greg Thelen   mm: grab rcu read...
1412
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1413
1414
  		return -ESRCH;
  	}
3268c63ed   Christoph Lameter   mm: fix move/migr...
1415
  	get_task_struct(task);
742755a1d   Christoph Lameter   [PATCH] page migr...
1416
1417
1418
1419
1420
1421
1422
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1423
  	tcred = __task_cred(task);
b38a86eb1   Eric W. Biederman   userns: Convert t...
1424
1425
  	if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
  	    !uid_eq(cred->uid,  tcred->suid) && !uid_eq(cred->uid,  tcred->uid) &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1426
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1427
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1428
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1429
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1430
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1431
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1432

86c3a7645   David Quigley   [PATCH] SELinux: ...
1433
1434
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1435
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1436

3268c63ed   Christoph Lameter   mm: fix move/migr...
1437
1438
1439
  	task_nodes = cpuset_mems_allowed(task);
  	mm = get_task_mm(task);
  	put_task_struct(task);
6e8b09eaf   Sasha Levin   mm: fix NULL ptr ...
1440
1441
1442
1443
1444
1445
1446
1447
  	if (!mm)
  		return -EINVAL;
  
  	if (nodes)
  		err = do_pages_move(mm, task_nodes, nr_pages, pages,
  				    nodes, status, flags);
  	else
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1448

742755a1d   Christoph Lameter   [PATCH] page migr...
1449
1450
  	mmput(mm);
  	return err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1451
1452
1453
1454
  
  out:
  	put_task_struct(task);
  	return err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1455
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1456

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1457
1458
1459
1460
1461
1462
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * Returns true if this is a safe migration target node for misplaced NUMA
   * pages. Currently it only checks the watermarks which crude
   */
  static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
3abef4e6c   Mel Gorman   mm: numa: take TH...
1463
  				   unsigned long nr_migrate_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1464
1465
1466
1467
1468
1469
1470
  {
  	int z;
  	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
  		struct zone *zone = pgdat->node_zones + z;
  
  		if (!populated_zone(zone))
  			continue;
6e543d578   Lisa Du   mm: vmscan: fix d...
1471
  		if (!zone_reclaimable(zone))
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
  			continue;
  
  		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
  		if (!zone_watermark_ok(zone, 0,
  				       high_wmark_pages(zone) +
  				       nr_migrate_pages,
  				       0, 0))
  			continue;
  		return true;
  	}
  	return false;
  }
  
  static struct page *alloc_misplaced_dst_page(struct page *page,
  					   unsigned long data,
  					   int **result)
  {
  	int nid = (int) data;
  	struct page *newpage;
  
  	newpage = alloc_pages_exact_node(nid,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1493
1494
1495
  					 (GFP_HIGHUSER_MOVABLE |
  					  __GFP_THISNODE | __GFP_NOMEMALLOC |
  					  __GFP_NORETRY | __GFP_NOWARN) &
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1496
  					 ~GFP_IOFS, 0);
bac0382c6   Hillf Danton   mm: numa: migrate...
1497

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1498
1499
1500
1501
  	return newpage;
  }
  
  /*
a8f607721   Mel Gorman   mm: numa: Rate li...
1502
1503
1504
1505
1506
1507
   * page migration rate limiting control.
   * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
   * window of time. Default here says do not migrate more than 1280M per second.
   */
  static unsigned int migrate_interval_millisecs __read_mostly = 100;
  static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1508
  /* Returns true if the node is migrate rate-limited after the update */
1c30e0177   Mel Gorman   mm: numa: make NU...
1509
1510
  static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
  					unsigned long nr_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1511
  {
a8f607721   Mel Gorman   mm: numa: Rate li...
1512
1513
1514
1515
1516
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
a8f607721   Mel Gorman   mm: numa: Rate li...
1517
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1518
  		spin_lock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1519
1520
1521
  		pgdat->numabalancing_migrate_nr_pages = 0;
  		pgdat->numabalancing_migrate_next_window = jiffies +
  			msecs_to_jiffies(migrate_interval_millisecs);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1522
  		spin_unlock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1523
  	}
af1839d72   Mel Gorman   mm: numa: trace t...
1524
1525
1526
  	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
  		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
  								nr_pages);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1527
  		return true;
af1839d72   Mel Gorman   mm: numa: trace t...
1528
  	}
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1529
1530
1531
1532
1533
1534
1535
1536
1537
  
  	/*
  	 * This is an unlocked non-atomic update so errors are possible.
  	 * The consequences are failing to migrate when we potentiall should
  	 * have which is not severe enough to warrant locking. If it is ever
  	 * a problem, it can be converted to a per-cpu counter.
  	 */
  	pgdat->numabalancing_migrate_nr_pages += nr_pages;
  	return false;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1538
  }
1c30e0177   Mel Gorman   mm: numa: make NU...
1539
  static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1540
  {
340ef3902   Hugh Dickins   mm: numa: cleanup...
1541
  	int page_lru;
a8f607721   Mel Gorman   mm: numa: Rate li...
1542

309381fea   Sasha Levin   mm: dump page whe...
1543
  	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
3abef4e6c   Mel Gorman   mm: numa: take TH...
1544

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1545
  	/* Avoid migrating to a node that is nearly full */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1546
1547
  	if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1548

340ef3902   Hugh Dickins   mm: numa: cleanup...
1549
1550
  	if (isolate_lru_page(page))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1551

340ef3902   Hugh Dickins   mm: numa: cleanup...
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
  	/*
  	 * migrate_misplaced_transhuge_page() skips page migration's usual
  	 * check on page_count(), so we must do it here, now that the page
  	 * has been isolated: a GUP pin, or any other pin, prevents migration.
  	 * The expected page count is 3: 1 for page's mapcount and 1 for the
  	 * caller's pin and 1 for the reference taken by isolate_lru_page().
  	 */
  	if (PageTransHuge(page) && page_count(page) != 3) {
  		putback_lru_page(page);
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1562
  	}
340ef3902   Hugh Dickins   mm: numa: cleanup...
1563
1564
1565
  	page_lru = page_is_file_cache(page);
  	mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
  				hpage_nr_pages(page));
149c33e1c   Mel Gorman   mm: migrate: Drop...
1566
  	/*
340ef3902   Hugh Dickins   mm: numa: cleanup...
1567
1568
1569
  	 * Isolating the page has taken another reference, so the
  	 * caller's reference can be safely dropped without the page
  	 * disappearing underneath us during migration.
149c33e1c   Mel Gorman   mm: migrate: Drop...
1570
1571
  	 */
  	put_page(page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1572
  	return 1;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1573
  }
de466bd62   Mel Gorman   mm: numa: avoid u...
1574
1575
1576
1577
1578
  bool pmd_trans_migrating(pmd_t pmd)
  {
  	struct page *page = pmd_page(pmd);
  	return PageLocked(page);
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1579
1580
1581
1582
1583
  /*
   * Attempt to migrate a misplaced page to the specified destination
   * node. Caller is expected to have an elevated reference count on
   * the page that will be dropped by this function before returning.
   */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1584
1585
  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
  			   int node)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1586
1587
  {
  	pg_data_t *pgdat = NODE_DATA(node);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1588
  	int isolated;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1589
1590
1591
1592
  	int nr_remaining;
  	LIST_HEAD(migratepages);
  
  	/*
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1593
1594
  	 * Don't migrate file pages that are mapped in multiple processes
  	 * with execute permissions as they are probably shared libraries.
b32967ff1   Mel Gorman   mm: numa: Add THP...
1595
  	 */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1596
1597
  	if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
  	    (vma->vm_flags & VM_EXEC))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1598
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1599
1600
1601
1602
1603
1604
  
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1605
  	if (numamigrate_update_ratelimit(pgdat, 1))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1606
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1607
1608
1609
1610
1611
1612
  
  	isolated = numamigrate_isolate_page(pgdat, page);
  	if (!isolated)
  		goto out;
  
  	list_add(&page->lru, &migratepages);
9c620e2bc   Hugh Dickins   mm: remove offlin...
1613
  	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
68711a746   David Rientjes   mm, migration: ad...
1614
1615
  				     NULL, node, MIGRATE_ASYNC,
  				     MR_NUMA_MISPLACED);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1616
  	if (nr_remaining) {
59c82b70d   Joonsoo Kim   mm/migrate: remov...
1617
1618
1619
1620
1621
1622
  		if (!list_empty(&migratepages)) {
  			list_del(&page->lru);
  			dec_zone_page_state(page, NR_ISOLATED_ANON +
  					page_is_file_cache(page));
  			putback_lru_page(page);
  		}
b32967ff1   Mel Gorman   mm: numa: Add THP...
1623
1624
1625
  		isolated = 0;
  	} else
  		count_vm_numa_event(NUMA_PAGE_MIGRATE);
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1626
  	BUG_ON(!list_empty(&migratepages));
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1627
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1628
1629
1630
1631
  
  out:
  	put_page(page);
  	return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1632
  }
220018d38   Mel Gorman   mm: numa: Add THP...
1633
  #endif /* CONFIG_NUMA_BALANCING */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1634

220018d38   Mel Gorman   mm: numa: Add THP...
1635
  #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
340ef3902   Hugh Dickins   mm: numa: cleanup...
1636
1637
1638
1639
  /*
   * Migrates a THP to a given target node. page must be locked and is unlocked
   * before returning.
   */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1640
1641
1642
1643
1644
1645
  int migrate_misplaced_transhuge_page(struct mm_struct *mm,
  				struct vm_area_struct *vma,
  				pmd_t *pmd, pmd_t entry,
  				unsigned long address,
  				struct page *page, int node)
  {
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1646
  	spinlock_t *ptl;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1647
1648
1649
  	pg_data_t *pgdat = NODE_DATA(node);
  	int isolated = 0;
  	struct page *new_page = NULL;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1650
  	int page_lru = page_is_file_cache(page);
f714f4f20   Mel Gorman   mm: numa: call MM...
1651
1652
  	unsigned long mmun_start = address & HPAGE_PMD_MASK;
  	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
2b4847e73   Mel Gorman   mm: numa: seriali...
1653
  	pmd_t orig_entry;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1654
1655
  
  	/*
b32967ff1   Mel Gorman   mm: numa: Add THP...
1656
1657
1658
1659
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
d28d43351   Mel Gorman   mm: migrate: Acco...
1660
  	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1661
1662
1663
  		goto out_dropref;
  
  	new_page = alloc_pages_node(node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1664
1665
  		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
  		HPAGE_PMD_ORDER);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1666
1667
  	if (!new_page)
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1668
  	isolated = numamigrate_isolate_page(pgdat, page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1669
  	if (!isolated) {
b32967ff1   Mel Gorman   mm: numa: Add THP...
1670
  		put_page(new_page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1671
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1672
  	}
b0943d61b   Mel Gorman   mm: numa: defer T...
1673
1674
  	if (mm_tlb_flush_pending(mm))
  		flush_tlb_range(vma, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
  	/* Prepare a page as a migration target */
  	__set_page_locked(new_page);
  	SetPageSwapBacked(new_page);
  
  	/* anon mapping, we can simply copy page->mapping to the new page: */
  	new_page->mapping = page->mapping;
  	new_page->index = page->index;
  	migrate_page_copy(new_page, page);
  	WARN_ON(PageLRU(new_page));
  
  	/* Recheck the target PMD */
f714f4f20   Mel Gorman   mm: numa: call MM...
1686
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1687
  	ptl = pmd_lock(mm, pmd);
2b4847e73   Mel Gorman   mm: numa: seriali...
1688
1689
  	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
  fail_putback:
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1690
  		spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1691
  		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
  
  		/* Reverse changes made by migrate_page_copy() */
  		if (TestClearPageActive(new_page))
  			SetPageActive(page);
  		if (TestClearPageUnevictable(new_page))
  			SetPageUnevictable(page);
  		mlock_migrate_page(page, new_page);
  
  		unlock_page(new_page);
  		put_page(new_page);		/* Free it */
a54a407fb   Mel Gorman   mm: Close races b...
1702
1703
  		/* Retake the callers reference and putback on LRU */
  		get_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1704
  		putback_lru_page(page);
a54a407fb   Mel Gorman   mm: Close races b...
1705
1706
  		mod_zone_page_state(page_zone(page),
  			 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
eb4489f69   Mel Gorman   mm: numa: avoid u...
1707
1708
  
  		goto out_unlock;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1709
  	}
2b4847e73   Mel Gorman   mm: numa: seriali...
1710
  	orig_entry = *pmd;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1711
  	entry = mk_pmd(new_page, vma->vm_page_prot);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1712
  	entry = pmd_mkhuge(entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1713
  	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1714

2b4847e73   Mel Gorman   mm: numa: seriali...
1715
1716
1717
1718
1719
1720
1721
  	/*
  	 * Clear the old entry under pagetable lock and establish the new PTE.
  	 * Any parallel GUP will either observe the old page blocking on the
  	 * page lock, block on the page table lock or observe the new page.
  	 * The SetPageUptodate on the new page and page_add_new_anon_rmap
  	 * guarantee the copy is visible before the pagetable update.
  	 */
f714f4f20   Mel Gorman   mm: numa: call MM...
1722
  	flush_cache_range(vma, mmun_start, mmun_end);
11de9927f   Mel Gorman   mm: numa: add mig...
1723
  	page_add_anon_rmap(new_page, vma, mmun_start);
34ee645e8   Joerg Roedel   mmu_notifier: cal...
1724
  	pmdp_clear_flush_notify(vma, mmun_start, pmd);
f714f4f20   Mel Gorman   mm: numa: call MM...
1725
1726
  	set_pmd_at(mm, mmun_start, pmd, entry);
  	flush_tlb_range(vma, mmun_start, mmun_end);
ce4a9cc57   Stephen Rothwell   mm,numa: fix upda...
1727
  	update_mmu_cache_pmd(vma, address, &entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1728
1729
  
  	if (page_count(page) != 2) {
f714f4f20   Mel Gorman   mm: numa: call MM...
1730
1731
  		set_pmd_at(mm, mmun_start, pmd, orig_entry);
  		flush_tlb_range(vma, mmun_start, mmun_end);
34ee645e8   Joerg Roedel   mmu_notifier: cal...
1732
  		mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
2b4847e73   Mel Gorman   mm: numa: seriali...
1733
1734
1735
1736
  		update_mmu_cache_pmd(vma, address, &entry);
  		page_remove_rmap(new_page);
  		goto fail_putback;
  	}
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
1737
  	mem_cgroup_migrate(page, new_page, false);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1738
  	page_remove_rmap(page);
2b4847e73   Mel Gorman   mm: numa: seriali...
1739

c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1740
  	spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1741
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1742

11de9927f   Mel Gorman   mm: numa: add mig...
1743
1744
1745
  	/* Take an "isolate" reference and put new page on the LRU. */
  	get_page(new_page);
  	putback_lru_page(new_page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1746
1747
1748
1749
1750
1751
1752
  	unlock_page(new_page);
  	unlock_page(page);
  	put_page(page);			/* Drop the rmap reference */
  	put_page(page);			/* Drop the LRU isolation reference */
  
  	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
  	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1753
1754
1755
1756
  	mod_zone_page_state(page_zone(page),
  			NR_ISOLATED_ANON + page_lru,
  			-HPAGE_PMD_NR);
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1757
1758
  out_fail:
  	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1759
  out_dropref:
2b4847e73   Mel Gorman   mm: numa: seriali...
1760
1761
  	ptl = pmd_lock(mm, pmd);
  	if (pmd_same(*pmd, entry)) {
4d9424669   Mel Gorman   mm: convert p[te|...
1762
  		entry = pmd_modify(entry, vma->vm_page_prot);
f714f4f20   Mel Gorman   mm: numa: call MM...
1763
  		set_pmd_at(mm, mmun_start, pmd, entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1764
1765
1766
  		update_mmu_cache_pmd(vma, address, &entry);
  	}
  	spin_unlock(ptl);
a54a407fb   Mel Gorman   mm: Close races b...
1767

eb4489f69   Mel Gorman   mm: numa: avoid u...
1768
  out_unlock:
340ef3902   Hugh Dickins   mm: numa: cleanup...
1769
  	unlock_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1770
  	put_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1771
1772
  	return 0;
  }
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1773
1774
1775
  #endif /* CONFIG_NUMA_BALANCING */
  
  #endif /* CONFIG_NUMA */