Blame view

mm/migrate.c 48.6 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
   */
  
  #include <linux/migrate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
16
  #include <linux/export.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
17
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
  #include <linux/pagevec.h>
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
24
  #include <linux/ksm.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
25
26
27
28
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
29
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
30
31
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
32
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
34
  #include <linux/syscalls.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/hugetlb.h>
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
36
  #include <linux/hugetlb_cgroup.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
37
  #include <linux/gfp.h>
bf6bddf19   Rafael Aquini   mm: introduce com...
38
  #include <linux/balloon_compaction.h>
f714f4f20   Mel Gorman   mm: numa: call MM...
39
  #include <linux/mmu_notifier.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
40

0d1836c36   Michal Nazarewicz   mm/migrate.c: fix...
41
  #include <asm/tlbflush.h>
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/migrate.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
44
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
45
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
46
   * migrate_prep() needs to be called before we start compiling a list of pages
748446bb6   Mel Gorman   mm: compaction: m...
47
48
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
b20a35035   Christoph Lameter   [PATCH] page migr...
49
50
51
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
52
53
54
55
56
57
58
59
60
61
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
748446bb6   Mel Gorman   mm: compaction: m...
62
63
64
65
66
67
68
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
  int migrate_prep_local(void)
  {
  	lru_add_drain();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
69
  /*
5733c7d11   Rafael Aquini   mm: introduce put...
70
71
72
   * Put previously isolated pages back onto the appropriate lists
   * from where they were once taken off for compaction/migration.
   *
59c82b70d   Joonsoo Kim   mm/migrate: remov...
73
74
75
   * This function shall be used whenever the isolated pageset has been
   * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
   * and isolate_huge_page().
5733c7d11   Rafael Aquini   mm: introduce put...
76
77
78
79
80
81
82
   */
  void putback_movable_pages(struct list_head *l)
  {
  	struct page *page;
  	struct page *page2;
  
  	list_for_each_entry_safe(page, page2, l, lru) {
31caf665e   Naoya Horiguchi   mm: migrate: make...
83
84
85
86
  		if (unlikely(PageHuge(page))) {
  			putback_active_hugepage(page);
  			continue;
  		}
5733c7d11   Rafael Aquini   mm: introduce put...
87
88
89
  		list_del(&page->lru);
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
  				page_is_file_cache(page));
117aad1e9   Rafael Aquini   mm: avoid reinser...
90
  		if (unlikely(isolated_balloon_page(page)))
bf6bddf19   Rafael Aquini   mm: introduce com...
91
92
93
  			balloon_page_putback(page);
  		else
  			putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
94
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
95
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
96
97
98
  /*
   * Restore a potential migration pte to a working pte entry
   */
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
99
100
  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
  				 unsigned long addr, void *old)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
101
102
103
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
104
105
106
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
107
108
109
110
  	if (unlikely(PageHuge(new))) {
  		ptep = huge_pte_offset(mm, addr);
  		if (!ptep)
  			goto out;
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
111
  		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
112
  	} else {
6219049ae   Bob Liu   mm: introduce mm_...
113
114
  		pmd = mm_find_pmd(mm, addr);
  		if (!pmd)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
115
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
116

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
117
  		ptep = pte_offset_map(pmd, addr);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
118

486cf46f3   Hugh Dickins   mm: fix race betw...
119
120
121
122
  		/*
  		 * Peek to check is_swap_pte() before taking ptlock?  No, we
  		 * can race mremap's move_ptes(), which skips anon_vma lock.
  		 */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
123
124
125
  
  		ptl = pte_lockptr(mm, pmd);
  	}
0697212a4   Christoph Lameter   [PATCH] Swapless ...
126

0697212a4   Christoph Lameter   [PATCH] Swapless ...
127
128
129
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
130
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
131
132
  
  	entry = pte_to_swp_entry(pte);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
133
134
135
  	if (!is_migration_entry(entry) ||
  	    migration_entry_to_page(entry) != old)
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
136

0697212a4   Christoph Lameter   [PATCH] Swapless ...
137
138
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
c3d16e165   Cyrill Gorcunov   mm: migration: do...
139
140
  	if (pte_swp_soft_dirty(*ptep))
  		pte = pte_mksoft_dirty(pte);
d3cb8bf60   Mel Gorman   mm: migrate: Clos...
141
142
  
  	/* Recheck VMA as permissions can change since migration started  */
0697212a4   Christoph Lameter   [PATCH] Swapless ...
143
  	if (is_write_migration_entry(entry))
d3cb8bf60   Mel Gorman   mm: migrate: Clos...
144
  		pte = maybe_mkwrite(pte, vma);
3ef8fd7f7   Andi Kleen   Fix migration.c c...
145
  #ifdef CONFIG_HUGETLB_PAGE
be7517d6a   Tony Lu   mm/hugetlb: set P...
146
  	if (PageHuge(new)) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
147
  		pte = pte_mkhuge(pte);
be7517d6a   Tony Lu   mm/hugetlb: set P...
148
149
  		pte = arch_make_huge_pte(pte, vma, new, 0);
  	}
3ef8fd7f7   Andi Kleen   Fix migration.c c...
150
  #endif
c2cc499c5   Leonid Yegoshin   mm compaction: fi...
151
  	flush_dcache_page(new);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
152
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
153

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
154
155
156
157
158
159
  	if (PageHuge(new)) {
  		if (PageAnon(new))
  			hugepage_add_anon_rmap(new, vma, addr);
  		else
  			page_dup_rmap(new);
  	} else if (PageAnon(new))
04e62a29b   Christoph Lameter   [PATCH] More page...
160
161
162
163
164
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
4b3073e1c   Russell King   MM: Pass a PTE po...
165
  	update_mmu_cache(vma, addr, ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
166
  unlock:
0697212a4   Christoph Lameter   [PATCH] Swapless ...
167
  	pte_unmap_unlock(ptep, ptl);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
168
169
  out:
  	return SWAP_AGAIN;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
170
171
172
  }
  
  /*
7e09e738a   Hugh Dickins   mm: fix swapops.h...
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
   * Congratulations to trinity for discovering this bug.
   * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
   * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
   * replace the specified range by file ptes throughout (maybe populated after).
   * If page migration finds a page within that range, while it's still located
   * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
   * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
   * But if the migrating page is in a part of the vma outside the range to be
   * remapped, then it will not be cleared, and remove_migration_ptes() needs to
   * deal with it.  Fortunately, this part of the vma is of course still linear,
   * so we just need to use linear location on the nonlinear list.
   */
  static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
  		struct address_space *mapping, void *arg)
  {
  	struct vm_area_struct *vma;
  	/* hugetlbfs does not support remap_pages, so no huge pgoff worries */
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	unsigned long addr;
  
  	list_for_each_entry(vma,
  		&mapping->i_mmap_nonlinear, shared.nonlinear) {
  
  		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  		if (addr >= vma->vm_start && addr < vma->vm_end)
  			remove_migration_pte(page, vma, addr, arg);
  	}
  	return SWAP_AGAIN;
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
204
205
206
207
208
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
209
210
211
  	struct rmap_walk_control rwc = {
  		.rmap_one = remove_migration_pte,
  		.arg = old,
7e09e738a   Hugh Dickins   mm: fix swapops.h...
212
  		.file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
213
214
215
  	};
  
  	rmap_walk(new, &rwc);
04e62a29b   Christoph Lameter   [PATCH] More page...
216
217
218
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
219
220
221
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
0697212a4   Christoph Lameter   [PATCH] Swapless ...
222
   */
30dad3092   Naoya Horiguchi   mm: migration: ad...
223
224
  static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
  				spinlock_t *ptl)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
225
  {
30dad3092   Naoya Horiguchi   mm: migration: ad...
226
  	pte_t pte;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
227
228
  	swp_entry_t entry;
  	struct page *page;
30dad3092   Naoya Horiguchi   mm: migration: ad...
229
  	spin_lock(ptl);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
230
231
232
233
234
235
236
237
238
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
239
240
241
242
243
244
245
246
247
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
248
249
250
251
252
253
254
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
30dad3092   Naoya Horiguchi   mm: migration: ad...
255
256
257
258
259
260
261
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	spinlock_t *ptl = pte_lockptr(mm, pmd);
  	pte_t *ptep = pte_offset_map(pmd, address);
  	__migration_entry_wait(mm, ptep, ptl);
  }
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
262
263
  void migration_entry_wait_huge(struct vm_area_struct *vma,
  		struct mm_struct *mm, pte_t *pte)
30dad3092   Naoya Horiguchi   mm: migration: ad...
264
  {
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
265
  	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
30dad3092   Naoya Horiguchi   mm: migration: ad...
266
267
  	__migration_entry_wait(mm, pte, ptl);
  }
b969c4ab9   Mel Gorman   mm: compaction: d...
268
269
  #ifdef CONFIG_BLOCK
  /* Returns true if all buffers are successfully locked */
a6bc32b89   Mel Gorman   mm: compaction: i...
270
271
  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
272
273
274
275
  {
  	struct buffer_head *bh = head;
  
  	/* Simple case, sync compaction */
a6bc32b89   Mel Gorman   mm: compaction: i...
276
  	if (mode != MIGRATE_ASYNC) {
b969c4ab9   Mel Gorman   mm: compaction: d...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
  		do {
  			get_bh(bh);
  			lock_buffer(bh);
  			bh = bh->b_this_page;
  
  		} while (bh != head);
  
  		return true;
  	}
  
  	/* async case, we cannot block on lock_buffer so use trylock_buffer */
  	do {
  		get_bh(bh);
  		if (!trylock_buffer(bh)) {
  			/*
  			 * We failed to lock the buffer and cannot stall in
  			 * async migration. Release the taken locks
  			 */
  			struct buffer_head *failed_bh = bh;
  			put_bh(failed_bh);
  			bh = head;
  			while (bh != failed_bh) {
  				unlock_buffer(bh);
  				put_bh(bh);
  				bh = bh->b_this_page;
  			}
  			return false;
  		}
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  	return true;
  }
  #else
  static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
a6bc32b89   Mel Gorman   mm: compaction: i...
312
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
313
314
315
316
  {
  	return true;
  }
  #endif /* CONFIG_BLOCK */
b20a35035   Christoph Lameter   [PATCH] page migr...
317
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
318
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
319
320
321
322
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
266cf658e   David Howells   FS-Cache: Recruit...
323
   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
b20a35035   Christoph Lameter   [PATCH] page migr...
324
   */
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
325
  int migrate_page_move_mapping(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
326
  		struct page *newpage, struct page *page,
8e321fefb   Benjamin LaHaise   aio/migratepages:...
327
328
  		struct buffer_head *head, enum migrate_mode mode,
  		int extra_count)
b20a35035   Christoph Lameter   [PATCH] page migr...
329
  {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
330
  	int expected_count = 1 + extra_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
331
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
332

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
333
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
334
  		/* Anonymous page without mapping */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
335
  		if (page_count(page) != expected_count)
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
336
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
337
  		return MIGRATEPAGE_SUCCESS;
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
338
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
339
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
340

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
341
342
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
343

8e321fefb   Benjamin LaHaise   aio/migratepages:...
344
  	expected_count += 1 + page_has_private(page);
e286781d5   Nick Piggin   mm: speculative p...
345
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
346
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
347
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
348
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
349
  	}
e286781d5   Nick Piggin   mm: speculative p...
350
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
351
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
352
353
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
354
  	/*
b969c4ab9   Mel Gorman   mm: compaction: d...
355
356
357
358
359
360
  	 * In the async migration case of moving a page with buffers, lock the
  	 * buffers using trylock before the mapping is moved. If the mapping
  	 * was moved, we later failed to lock the buffers and could not move
  	 * the mapping back due to an elevated page count, we would have to
  	 * block waiting on other references to be dropped.
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
361
362
  	if (mode == MIGRATE_ASYNC && head &&
  			!buffer_migrate_lock_buffers(head, mode)) {
b969c4ab9   Mel Gorman   mm: compaction: d...
363
364
365
366
367
368
  		page_unfreeze_refs(page, expected_count);
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	/*
b20a35035   Christoph Lameter   [PATCH] page migr...
369
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
370
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
371
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
372
373
374
375
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
376
377
378
  	radix_tree_replace_slot(pslot, newpage);
  
  	/*
937a94c9d   Jacobo Giralt   mm: migrate: one ...
379
380
  	 * Drop cache reference from old page by unfreezing
  	 * to one less reference.
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
381
382
  	 * We know this isn't the last reference.
  	 */
937a94c9d   Jacobo Giralt   mm: migrate: one ...
383
  	page_unfreeze_refs(page, expected_count - 1);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
384

0e8c7d0fd   Christoph Lameter   page migration: f...
385
386
387
388
389
390
391
392
393
394
395
396
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
99a15e21d   Andrea Arcangeli   migrate: don't ac...
397
  	if (!PageSwapCache(page) && PageSwapBacked(page)) {
4b02108ac   KOSAKI Motohiro   mm: oom analysis:...
398
399
400
  		__dec_zone_page_state(page, NR_SHMEM);
  		__inc_zone_page_state(newpage, NR_SHMEM);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
401
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
402

78bd52097   Rafael Aquini   mm: adjust addres...
403
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
404
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
405
406
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
407
408
409
410
411
412
413
414
415
416
417
418
   * The expected number of remaining references is the same as that
   * of migrate_page_move_mapping().
   */
  int migrate_huge_page_move_mapping(struct address_space *mapping,
  				   struct page *newpage, struct page *page)
  {
  	int expected_count;
  	void **pslot;
  
  	if (!mapping) {
  		if (page_count(page) != 1)
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
419
  		return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
420
421
422
423
424
425
426
427
428
  	}
  
  	spin_lock_irq(&mapping->tree_lock);
  
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
  
  	expected_count = 2 + page_has_private(page);
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
429
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
430
431
432
433
434
435
436
437
438
439
440
441
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	if (!page_freeze_refs(page, expected_count)) {
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	get_page(newpage);
  
  	radix_tree_replace_slot(pslot, newpage);
937a94c9d   Jacobo Giralt   mm: migrate: one ...
442
  	page_unfreeze_refs(page, expected_count - 1);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
443
444
  
  	spin_unlock_irq(&mapping->tree_lock);
78bd52097   Rafael Aquini   mm: adjust addres...
445
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
446
447
448
  }
  
  /*
30b0a105d   Dave Hansen   mm: thp: give tra...
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
   * Gigantic pages are so large that we do not guarantee that page++ pointer
   * arithmetic will work across the entire page.  We need something more
   * specialized.
   */
  static void __copy_gigantic_page(struct page *dst, struct page *src,
  				int nr_pages)
  {
  	int i;
  	struct page *dst_base = dst;
  	struct page *src_base = src;
  
  	for (i = 0; i < nr_pages; ) {
  		cond_resched();
  		copy_highpage(dst, src);
  
  		i++;
  		dst = mem_map_next(dst, dst_base, i);
  		src = mem_map_next(src, src_base, i);
  	}
  }
  
  static void copy_huge_page(struct page *dst, struct page *src)
  {
  	int i;
  	int nr_pages;
  
  	if (PageHuge(src)) {
  		/* hugetlbfs page */
  		struct hstate *h = page_hstate(src);
  		nr_pages = pages_per_huge_page(h);
  
  		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
  			__copy_gigantic_page(dst, src, nr_pages);
  			return;
  		}
  	} else {
  		/* thp page */
  		BUG_ON(!PageTransHuge(src));
  		nr_pages = hpage_nr_pages(src);
  	}
  
  	for (i = 0; i < nr_pages; i++) {
  		cond_resched();
  		copy_highpage(dst + i, src + i);
  	}
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
497
498
   * Copy the page to its new location
   */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
499
  void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
500
  {
7851a45cd   Rik van Riel   mm: numa: Copy cp...
501
  	int cpupid;
b32967ff1   Mel Gorman   mm: numa: Add THP...
502
  	if (PageHuge(page) || PageTransHuge(page))
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
503
504
505
  		copy_huge_page(newpage, page);
  	else
  		copy_highpage(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
506
507
508
509
510
511
512
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
513
  	if (TestClearPageActive(page)) {
309381fea   Sasha Levin   mm: dump page whe...
514
  		VM_BUG_ON_PAGE(PageUnevictable(page), page);
b20a35035   Christoph Lameter   [PATCH] page migr...
515
  		SetPageActive(newpage);
418b27ef5   Lee Schermerhorn   mm: remove unevic...
516
517
  	} else if (TestClearPageUnevictable(page))
  		SetPageUnevictable(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
518
519
520
521
522
523
524
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
525
526
527
528
529
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
25985edce   Lucas De Marchi   Fix common misspe...
530
  		 * Whereas only part of our page may be dirty.
3a902c5f6   Nick Piggin   mm: fix warning o...
531
  		 */
752dc185d   Hugh Dickins   mm: fix warning i...
532
533
534
535
  		if (PageSwapBacked(page))
  			SetPageDirty(newpage);
  		else
  			__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
536
   	}
7851a45cd   Rik van Riel   mm: numa: Copy cp...
537
538
539
540
541
542
  	/*
  	 * Copy NUMA information to the new page, to prevent over-eager
  	 * future migrations of this same page.
  	 */
  	cpupid = page_cpupid_xchg_last(page, -1);
  	page_cpupid_xchg_last(newpage, cpupid);
b291f0003   Nick Piggin   mlock: mlocked pa...
543
  	mlock_migrate_page(newpage, page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
544
  	ksm_migrate_page(newpage, page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
545
546
547
548
  	/*
  	 * Please do not reorder this without considering how mm/ksm.c's
  	 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
  	 */
b20a35035   Christoph Lameter   [PATCH] page migr...
549
  	ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
550
551
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
552
553
554
555
556
557
558
559
  
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
560

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
561
562
563
  /************************************************************
   *                    Migration functions
   ***********************************************************/
b20a35035   Christoph Lameter   [PATCH] page migr...
564
565
  /*
   * Common logic to directly migrate a single page suitable for
266cf658e   David Howells   FS-Cache: Recruit...
566
   * pages that do not use PagePrivate/PagePrivate2.
b20a35035   Christoph Lameter   [PATCH] page migr...
567
568
569
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
570
  int migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
571
572
  		struct page *newpage, struct page *page,
  		enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
573
574
575
576
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
577
  	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
578

78bd52097   Rafael Aquini   mm: adjust addres...
579
  	if (rc != MIGRATEPAGE_SUCCESS)
b20a35035   Christoph Lameter   [PATCH] page migr...
580
581
582
  		return rc;
  
  	migrate_page_copy(newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
583
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
584
585
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
586
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
587
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
588
589
590
591
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
592
  int buffer_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
593
  		struct page *newpage, struct page *page, enum migrate_mode mode)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
594
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
595
596
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
597
  	if (!page_has_buffers(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
598
  		return migrate_page(mapping, newpage, page, mode);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
599
600
  
  	head = page_buffers(page);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
601
  	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
602

78bd52097   Rafael Aquini   mm: adjust addres...
603
  	if (rc != MIGRATEPAGE_SUCCESS)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
604
  		return rc;
b969c4ab9   Mel Gorman   mm: compaction: d...
605
606
607
608
609
  	/*
  	 * In the async case, migrate_page_move_mapping locked the buffers
  	 * with an IRQ-safe spinlock held. In the sync case, the buffers
  	 * need to be locked now
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
610
611
  	if (mode != MIGRATE_ASYNC)
  		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
78bd52097   Rafael Aquini   mm: adjust addres...
637
  	return MIGRATEPAGE_SUCCESS;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
638
639
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
640
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
641

04e62a29b   Christoph Lameter   [PATCH] More page...
642
643
644
645
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
646
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
647
648
649
650
651
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
04e62a29b   Christoph Lameter   [PATCH] More page...
652
653
654
655
656
657
658
659
660
661
662
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
663
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
664
665
666
667
668
669
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
670
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
671
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
672

04e62a29b   Christoph Lameter   [PATCH] More page...
673
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
674

04e62a29b   Christoph Lameter   [PATCH] More page...
675
676
677
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
678
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
679
680
681
682
683
684
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
685
  	struct page *newpage, struct page *page, enum migrate_mode mode)
04e62a29b   Christoph Lameter   [PATCH] More page...
686
  {
b969c4ab9   Mel Gorman   mm: compaction: d...
687
  	if (PageDirty(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
688
689
  		/* Only writeback pages in full synchronous migration */
  		if (mode != MIGRATE_SYNC)
b969c4ab9   Mel Gorman   mm: compaction: d...
690
  			return -EBUSY;
04e62a29b   Christoph Lameter   [PATCH] More page...
691
  		return writeout(mapping, page);
b969c4ab9   Mel Gorman   mm: compaction: d...
692
  	}
8351a6e47   Christoph Lameter   [PATCH] page migr...
693
694
695
696
697
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
266cf658e   David Howells   FS-Cache: Recruit...
698
  	if (page_has_private(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
699
700
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
a6bc32b89   Mel Gorman   mm: compaction: i...
701
  	return migrate_page(mapping, newpage, page, mode);
8351a6e47   Christoph Lameter   [PATCH] page migr...
702
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
703
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
704
705
706
707
708
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
709
710
711
   *
   * Return value:
   *   < 0 - error code
78bd52097   Rafael Aquini   mm: adjust addres...
712
   *  MIGRATEPAGE_SUCCESS - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
713
   */
3fe2011ff   Mel Gorman   mm: migration: al...
714
  static int move_to_new_page(struct page *newpage, struct page *page,
2ebba6b7e   Hugh Dickins   mm: unmapped page...
715
  				int page_was_mapped, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
716
717
718
719
720
721
722
723
724
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
725
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
726
727
728
729
730
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
731
732
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
733
734
735
  
  	mapping = page_mapping(page);
  	if (!mapping)
a6bc32b89   Mel Gorman   mm: compaction: i...
736
  		rc = migrate_page(mapping, newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
737
  	else if (mapping->a_ops->migratepage)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
738
  		/*
b969c4ab9   Mel Gorman   mm: compaction: d...
739
740
741
742
  		 * Most pages have a mapping and most filesystems provide a
  		 * migratepage callback. Anonymous pages are part of swap
  		 * space which also has its own migratepage callback. This
  		 * is the most common path for page migration.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
743
  		 */
b969c4ab9   Mel Gorman   mm: compaction: d...
744
  		rc = mapping->a_ops->migratepage(mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
745
  						newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
746
  	else
a6bc32b89   Mel Gorman   mm: compaction: i...
747
  		rc = fallback_migrate_page(mapping, newpage, page, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
748

78bd52097   Rafael Aquini   mm: adjust addres...
749
  	if (rc != MIGRATEPAGE_SUCCESS) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
750
  		newpage->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
751
  	} else {
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
752
  		mem_cgroup_migrate(page, newpage, false);
2ebba6b7e   Hugh Dickins   mm: unmapped page...
753
  		if (page_was_mapped)
3fe2011ff   Mel Gorman   mm: migration: al...
754
  			remove_migration_ptes(page, newpage);
35512ecae   Konstantin Khlebnikov   mm: postpone migr...
755
  		page->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
756
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
757
758
759
760
761
  
  	unlock_page(newpage);
  
  	return rc;
  }
0dabec93d   Minchan Kim   mm: migration: cl...
762
  static int __unmap_and_move(struct page *page, struct page *newpage,
9c620e2bc   Hugh Dickins   mm: remove offlin...
763
  				int force, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
764
  {
0dabec93d   Minchan Kim   mm: migration: cl...
765
  	int rc = -EAGAIN;
2ebba6b7e   Hugh Dickins   mm: unmapped page...
766
  	int page_was_mapped = 0;
3f6c82728   Mel Gorman   mm: migration: ta...
767
  	struct anon_vma *anon_vma = NULL;
95a402c38   Christoph Lameter   [PATCH] page migr...
768

529ae9aaa   Nick Piggin   mm: rename page t...
769
  	if (!trylock_page(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
770
  		if (!force || mode == MIGRATE_ASYNC)
0dabec93d   Minchan Kim   mm: migration: cl...
771
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
  
  		/*
  		 * It's not safe for direct compaction to call lock_page.
  		 * For example, during page readahead pages are added locked
  		 * to the LRU. Later, when the IO completes the pages are
  		 * marked uptodate and unlocked. However, the queueing
  		 * could be merging multiple pages for one bio (e.g.
  		 * mpage_readpages). If an allocation happens for the
  		 * second or third page, the process can end up locking
  		 * the same page twice and deadlocking. Rather than
  		 * trying to be clever about what pages can be locked,
  		 * avoid the use of lock_page for direct compaction
  		 * altogether.
  		 */
  		if (current->flags & PF_MEMALLOC)
0dabec93d   Minchan Kim   mm: migration: cl...
787
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
788

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
789
790
791
792
  		lock_page(page);
  	}
  
  	if (PageWriteback(page)) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
793
  		/*
fed5b64a9   Jianguo Wu   mm/migrate: fix c...
794
  		 * Only in the case of a full synchronous migration is it
a6bc32b89   Mel Gorman   mm: compaction: i...
795
796
797
  		 * necessary to wait for PageWriteback. In the async case,
  		 * the retry loop is too short and in the sync-light case,
  		 * the overhead of stalling is too much
11bc82d67   Andrea Arcangeli   mm: compaction: U...
798
  		 */
a6bc32b89   Mel Gorman   mm: compaction: i...
799
  		if (mode != MIGRATE_SYNC) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
800
  			rc = -EBUSY;
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
801
  			goto out_unlock;
11bc82d67   Andrea Arcangeli   mm: compaction: U...
802
803
  		}
  		if (!force)
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
804
  			goto out_unlock;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
805
806
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
807
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
808
809
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
1ce82b69e   Hugh Dickins   mm: fix migration...
810
  	 * This get_anon_vma() delays freeing anon_vma pointer until the end
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
811
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
812
813
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
814
  	 */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
815
  	if (PageAnon(page) && !PageKsm(page)) {
1ce82b69e   Hugh Dickins   mm: fix migration...
816
  		/*
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
817
  		 * Only page_lock_anon_vma_read() understands the subtleties of
1ce82b69e   Hugh Dickins   mm: fix migration...
818
819
  		 * getting a hold on an anon_vma from outside one of its mms.
  		 */
746b18d42   Peter Zijlstra   mm: use refcounts...
820
  		anon_vma = page_get_anon_vma(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
821
822
  		if (anon_vma) {
  			/*
746b18d42   Peter Zijlstra   mm: use refcounts...
823
  			 * Anon page
1ce82b69e   Hugh Dickins   mm: fix migration...
824
  			 */
1ce82b69e   Hugh Dickins   mm: fix migration...
825
  		} else if (PageSwapCache(page)) {
3fe2011ff   Mel Gorman   mm: migration: al...
826
827
828
829
830
831
832
833
834
835
836
837
  			/*
  			 * We cannot be sure that the anon_vma of an unmapped
  			 * swapcache page is safe to use because we don't
  			 * know in advance if the VMA that this page belonged
  			 * to still exists. If the VMA and others sharing the
  			 * data have been freed, then the anon_vma could
  			 * already be invalid.
  			 *
  			 * To avoid this possibility, swapcache pages get
  			 * migrated but are not remapped when migration
  			 * completes
  			 */
3fe2011ff   Mel Gorman   mm: migration: al...
838
  		} else {
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
839
  			goto out_unlock;
3fe2011ff   Mel Gorman   mm: migration: al...
840
  		}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
841
  	}
62e1c5530   Shaohua Li   page migraton: ha...
842

d6d86c0a7   Konstantin Khlebnikov   mm/balloon_compac...
843
  	if (unlikely(isolated_balloon_page(page))) {
bf6bddf19   Rafael Aquini   mm: introduce com...
844
845
846
847
848
849
850
851
  		/*
  		 * A ballooned page does not need any special attention from
  		 * physical to virtual reverse mapping procedures.
  		 * Skip any attempt to unmap PTEs or to remap swap cache,
  		 * in order to avoid burning cycles at rmap level, and perform
  		 * the page migration right away (proteced by page lock).
  		 */
  		rc = balloon_page_migrate(newpage, page, mode);
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
852
  		goto out_unlock;
bf6bddf19   Rafael Aquini   mm: introduce com...
853
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
854
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
855
856
857
858
859
860
861
862
863
864
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
865
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
866
  	if (!page->mapping) {
309381fea   Sasha Levin   mm: dump page whe...
867
  		VM_BUG_ON_PAGE(PageAnon(page), page);
1ce82b69e   Hugh Dickins   mm: fix migration...
868
  		if (page_has_private(page)) {
62e1c5530   Shaohua Li   page migraton: ha...
869
  			try_to_free_buffers(page);
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
870
  			goto out_unlock;
62e1c5530   Shaohua Li   page migraton: ha...
871
  		}
abfc34881   Shaohua Li   memory hotplug: m...
872
  		goto skip_unmap;
62e1c5530   Shaohua Li   page migraton: ha...
873
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
874
  	/* Establish migration ptes or remove ptes */
2ebba6b7e   Hugh Dickins   mm: unmapped page...
875
876
877
878
879
  	if (page_mapped(page)) {
  		try_to_unmap(page,
  			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  		page_was_mapped = 1;
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
880

abfc34881   Shaohua Li   memory hotplug: m...
881
  skip_unmap:
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
882
  	if (!page_mapped(page))
2ebba6b7e   Hugh Dickins   mm: unmapped page...
883
  		rc = move_to_new_page(newpage, page, page_was_mapped, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
884

2ebba6b7e   Hugh Dickins   mm: unmapped page...
885
  	if (rc && page_was_mapped)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
886
  		remove_migration_ptes(page, page);
3f6c82728   Mel Gorman   mm: migration: ta...
887
888
  
  	/* Drop an anon_vma reference if we took one */
76545066c   Rik van Riel   mm: extend KSM re...
889
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
890
  		put_anon_vma(anon_vma);
3f6c82728   Mel Gorman   mm: migration: ta...
891

0a31bc97c   Johannes Weiner   mm: memcontrol: r...
892
  out_unlock:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
893
  	unlock_page(page);
0dabec93d   Minchan Kim   mm: migration: cl...
894
895
896
  out:
  	return rc;
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
897

0dabec93d   Minchan Kim   mm: migration: cl...
898
899
900
901
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
68711a746   David Rientjes   mm, migration: ad...
902
903
904
  static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
  			unsigned long private, struct page *page, int force,
  			enum migrate_mode mode)
0dabec93d   Minchan Kim   mm: migration: cl...
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
  {
  	int rc = 0;
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
  
  	if (!newpage)
  		return -ENOMEM;
  
  	if (page_count(page) == 1) {
  		/* page was freed from under us. So we are done. */
  		goto out;
  	}
  
  	if (unlikely(PageTransHuge(page)))
  		if (unlikely(split_huge_page(page)))
  			goto out;
9c620e2bc   Hugh Dickins   mm: remove offlin...
921
  	rc = __unmap_and_move(page, newpage, force, mode);
bf6bddf19   Rafael Aquini   mm: introduce com...
922

0dabec93d   Minchan Kim   mm: migration: cl...
923
  out:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
924
  	if (rc != -EAGAIN) {
0dabec93d   Minchan Kim   mm: migration: cl...
925
926
927
928
929
930
931
  		/*
  		 * A page that has been migrated has all references
  		 * removed and will be freed. A page that has not been
  		 * migrated will have kepts its references and be
  		 * restored.
  		 */
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
932
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
933
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
934
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
935
  	}
68711a746   David Rientjes   mm, migration: ad...
936

95a402c38   Christoph Lameter   [PATCH] page migr...
937
  	/*
68711a746   David Rientjes   mm, migration: ad...
938
939
940
  	 * If migration was not successful and there's a freeing callback, use
  	 * it.  Otherwise, putback_lru_page() will drop the reference grabbed
  	 * during isolation.
95a402c38   Christoph Lameter   [PATCH] page migr...
941
  	 */
8bdd63809   Hugh Dickins   mm: fix direct re...
942
943
  	if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
  		ClearPageSwapBacked(newpage);
68711a746   David Rientjes   mm, migration: ad...
944
  		put_new_page(newpage, private);
d6d86c0a7   Konstantin Khlebnikov   mm/balloon_compac...
945
946
947
  	} else if (unlikely(__is_movable_balloon_page(newpage))) {
  		/* drop our reference, page already in the balloon */
  		put_page(newpage);
8bdd63809   Hugh Dickins   mm: fix direct re...
948
  	} else
68711a746   David Rientjes   mm, migration: ad...
949
  		putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
950
951
952
953
954
955
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
956
957
958
959
  	return rc;
  }
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
   * Counterpart of unmap_and_move_page() for hugepage migration.
   *
   * This function doesn't wait the completion of hugepage I/O
   * because there is no race between I/O and migration for hugepage.
   * Note that currently hugepage I/O occurs only in direct I/O
   * where no lock is held and PG_writeback is irrelevant,
   * and writeback status of all subpages are counted in the reference
   * count of the head page (i.e. if all subpages of a 2MB hugepage are
   * under direct I/O, the reference of the head page is 512 and a bit more.)
   * This means that when we try to migrate hugepage whose subpages are
   * doing direct I/O, some references remain after try_to_unmap() and
   * hugepage migration fails without data corruption.
   *
   * There is also no race when direct I/O is issued on the page under migration,
   * because then pte is replaced with migration swap entry and direct I/O code
   * will wait in the page fault for migration to complete.
   */
  static int unmap_and_move_huge_page(new_page_t get_new_page,
68711a746   David Rientjes   mm, migration: ad...
978
979
980
  				free_page_t put_new_page, unsigned long private,
  				struct page *hpage, int force,
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
981
982
983
  {
  	int rc = 0;
  	int *result = NULL;
2ebba6b7e   Hugh Dickins   mm: unmapped page...
984
  	int page_was_mapped = 0;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
985
  	struct page *new_hpage;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
986
  	struct anon_vma *anon_vma = NULL;
83467efbd   Naoya Horiguchi   mm: migrate: chec...
987
988
989
990
991
992
993
  	/*
  	 * Movability of hugepages depends on architectures and hugepage size.
  	 * This check is necessary because some callers of hugepage migration
  	 * like soft offline and memory hotremove don't walk through page
  	 * tables or check whether the hugepage is pmd-based or not before
  	 * kicking migration.
  	 */
100873d7a   Naoya Horiguchi   hugetlb: rename h...
994
  	if (!hugepage_migration_supported(page_hstate(hpage))) {
32665f2bb   Joonsoo Kim   mm/migrate: corre...
995
  		putback_active_hugepage(hpage);
83467efbd   Naoya Horiguchi   mm: migrate: chec...
996
  		return -ENOSYS;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
997
  	}
83467efbd   Naoya Horiguchi   mm: migrate: chec...
998

32665f2bb   Joonsoo Kim   mm/migrate: corre...
999
  	new_hpage = get_new_page(hpage, private, &result);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1000
1001
1002
1003
1004
1005
  	if (!new_hpage)
  		return -ENOMEM;
  
  	rc = -EAGAIN;
  
  	if (!trylock_page(hpage)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
1006
  		if (!force || mode != MIGRATE_SYNC)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1007
1008
1009
  			goto out;
  		lock_page(hpage);
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
1010
1011
  	if (PageAnon(hpage))
  		anon_vma = page_get_anon_vma(hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1012

2ebba6b7e   Hugh Dickins   mm: unmapped page...
1013
1014
1015
1016
1017
  	if (page_mapped(hpage)) {
  		try_to_unmap(hpage,
  			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  		page_was_mapped = 1;
  	}
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1018
1019
  
  	if (!page_mapped(hpage))
2ebba6b7e   Hugh Dickins   mm: unmapped page...
1020
  		rc = move_to_new_page(new_hpage, hpage, page_was_mapped, mode);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1021

2ebba6b7e   Hugh Dickins   mm: unmapped page...
1022
  	if (rc != MIGRATEPAGE_SUCCESS && page_was_mapped)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1023
  		remove_migration_ptes(hpage, hpage);
fd4a4663d   Hugh Dickins   mm: fix hugepage ...
1024
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1025
  		put_anon_vma(anon_vma);
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1026

68711a746   David Rientjes   mm, migration: ad...
1027
  	if (rc == MIGRATEPAGE_SUCCESS)
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1028
  		hugetlb_cgroup_migrate(hpage, new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1029
  	unlock_page(hpage);
09761333e   Hillf Danton   mm/migrate.c: pai...
1030
  out:
b8ec1cee5   Naoya Horiguchi   mm: soft-offline:...
1031
1032
  	if (rc != -EAGAIN)
  		putback_active_hugepage(hpage);
68711a746   David Rientjes   mm, migration: ad...
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
  
  	/*
  	 * If migration was not successful and there's a freeing callback, use
  	 * it.  Otherwise, put_page() will drop the reference grabbed during
  	 * isolation.
  	 */
  	if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
  		put_new_page(new_hpage, private);
  	else
  		put_page(new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(new_hpage);
  	}
  	return rc;
  }
  
  /*
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1053
1054
   * migrate_pages - migrate the pages specified in a list, to the free pages
   *		   supplied as the target for the page migration
b20a35035   Christoph Lameter   [PATCH] page migr...
1055
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1056
1057
1058
   * @from:		The list of pages to be migrated.
   * @get_new_page:	The function used to allocate free pages to be used
   *			as the target of the page migration.
68711a746   David Rientjes   mm, migration: ad...
1059
1060
   * @put_new_page:	The function used to free target pages if migration
   *			fails, or NULL if no special handling is necessary.
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1061
1062
1063
1064
   * @private:		Private data to be passed on to get_new_page()
   * @mode:		The migration mode that specifies the constraints for
   *			page migration, if any.
   * @reason:		The reason for page migration.
b20a35035   Christoph Lameter   [PATCH] page migr...
1065
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1066
1067
1068
   * The function returns after 10 attempts or if no pages are movable any more
   * because the list has become empty or no retryable pages exist any more.
   * The caller should call putback_lru_pages() to return pages to the LRU
28bd65781   Minchan Kim   mm: migration: cl...
1069
   * or free list only if ret != 0.
b20a35035   Christoph Lameter   [PATCH] page migr...
1070
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1071
   * Returns the number of pages that were not migrated, or an error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
1072
   */
9c620e2bc   Hugh Dickins   mm: remove offlin...
1073
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
68711a746   David Rientjes   mm, migration: ad...
1074
1075
  		free_page_t put_new_page, unsigned long private,
  		enum migrate_mode mode, int reason)
b20a35035   Christoph Lameter   [PATCH] page migr...
1076
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1077
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
1078
  	int nr_failed = 0;
5647bc293   Mel Gorman   mm: compaction: M...
1079
  	int nr_succeeded = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1080
1081
1082
1083
1084
1085
1086
1087
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1088
1089
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1090

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1091
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1092
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1093

31caf665e   Naoya Horiguchi   mm: migrate: make...
1094
1095
  			if (PageHuge(page))
  				rc = unmap_and_move_huge_page(get_new_page,
68711a746   David Rientjes   mm, migration: ad...
1096
1097
  						put_new_page, private, page,
  						pass > 2, mode);
31caf665e   Naoya Horiguchi   mm: migrate: make...
1098
  			else
68711a746   David Rientjes   mm, migration: ad...
1099
1100
  				rc = unmap_and_move(get_new_page, put_new_page,
  						private, page, pass > 2, mode);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1101

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1102
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
1103
1104
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1105
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1106
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1107
  				break;
78bd52097   Rafael Aquini   mm: adjust addres...
1108
  			case MIGRATEPAGE_SUCCESS:
5647bc293   Mel Gorman   mm: compaction: M...
1109
  				nr_succeeded++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1110
1111
  				break;
  			default:
354a33633   Naoya Horiguchi   mm/migrate: add c...
1112
1113
1114
1115
1116
1117
  				/*
  				 * Permanent failure (-EBUSY, -ENOSYS, etc.):
  				 * unlike -EAGAIN case, the failed page is
  				 * removed from migration page list and not
  				 * retried in the next outer loop.
  				 */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1118
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1119
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1120
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
1121
1122
  		}
  	}
78bd52097   Rafael Aquini   mm: adjust addres...
1123
  	rc = nr_failed + retry;
95a402c38   Christoph Lameter   [PATCH] page migr...
1124
  out:
5647bc293   Mel Gorman   mm: compaction: M...
1125
1126
1127
1128
  	if (nr_succeeded)
  		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
  	if (nr_failed)
  		count_vm_events(PGMIGRATE_FAIL, nr_failed);
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1129
  	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
b20a35035   Christoph Lameter   [PATCH] page migr...
1130
1131
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
78bd52097   Rafael Aquini   mm: adjust addres...
1132
  	return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
1133
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
1134

742755a1d   Christoph Lameter   [PATCH] page migr...
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1158
1159
1160
1161
1162
  	if (PageHuge(p))
  		return alloc_huge_page_node(page_hstate(compound_head(p)),
  					pm->node);
  	else
  		return alloc_pages_exact_node(pm->node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1163
  				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
1164
1165
1166
1167
1168
1169
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
1170
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
1171
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
1172
1173
1174
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
  
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
1185
1186
1187
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
1188
1189
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1190
  		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
1191
  			goto set_status;
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
1192
  		page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1193
1194
1195
1196
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1197
1198
1199
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
62b61f611   Hugh Dickins   ksm: memory hotre...
1200
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1201
  		if (PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1217
1218
1219
1220
  		if (PageHuge(page)) {
  			isolate_huge_page(page, &pagelist);
  			goto put_and_set;
  		}
62695a84e   Nick Piggin   vmscan: move isol...
1221
  		err = isolate_lru_page(page);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1222
  		if (!err) {
62695a84e   Nick Piggin   vmscan: move isol...
1223
  			list_add_tail(&page->lru, &pagelist);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1224
1225
1226
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
1237
  	err = 0;
cf608ac19   Minchan Kim   mm: compaction: f...
1238
  	if (!list_empty(&pagelist)) {
68711a746   David Rientjes   mm, migration: ad...
1239
  		err = migrate_pages(&pagelist, new_page_node, NULL,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1240
  				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
cf608ac19   Minchan Kim   mm: compaction: f...
1241
  		if (err)
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1242
  			putback_movable_pages(&pagelist);
cf608ac19   Minchan Kim   mm: compaction: f...
1243
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1244
1245
1246
1247
1248
1249
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
1250
1251
1252
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
3268c63ed   Christoph Lameter   mm: fix move/migr...
1253
  static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
5e9a0f023   Brice Goglin   mm: extract do_pa...
1254
1255
1256
1257
1258
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
1259
  	struct page_to_node *pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1260
1261
1262
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1263

3140a2273   Brice Goglin   mm: rework do_pag...
1264
1265
1266
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1267
  		goto out;
35282a2de   Brice Goglin   migration: only m...
1268
1269
  
  	migrate_prep();
5e9a0f023   Brice Goglin   mm: extract do_pa...
1270
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
1271
1272
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
1273
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
1274
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1275

3140a2273   Brice Goglin   mm: rework do_pag...
1276
1277
1278
1279
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1280

3140a2273   Brice Goglin   mm: rework do_pag...
1281
1282
1283
1284
1285
1286
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1287
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
1288
1289
1290
1291
1292
1293
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1294
1295
1296
  				goto out_pm;
  
  			err = -ENODEV;
6f5a55f1a   Linus Torvalds   Fix potential cra...
1297
1298
  			if (node < 0 || node >= MAX_NUMNODES)
  				goto out_pm;
389162c22   Lai Jiangshan   mm,migrate: use N...
1299
  			if (!node_state(node, N_MEMORY))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1300
1301
1302
1303
1304
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1316

5e9a0f023   Brice Goglin   mm: extract do_pa...
1317
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
1318
1319
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
1320
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
1321
1322
1323
1324
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1325
1326
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
1327
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
1328
1329
1330
1331
1332
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
1333
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
1334
   */
80bba1290   Brice Goglin   mm: no get_user/p...
1335
1336
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
1337
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1338
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1339

742755a1d   Christoph Lameter   [PATCH] page migr...
1340
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
1341
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
1342
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
1343
1344
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
1345
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1346
1347
  
  		vma = find_vma(mm, addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1348
  		if (!vma || addr < vma->vm_start)
742755a1d   Christoph Lameter   [PATCH] page migr...
1349
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1350
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1351
1352
1353
1354
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1355
1356
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1357
  		if (!page || PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1358
1359
1360
1361
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
80bba1290   Brice Goglin   mm: no get_user/p...
1382

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1383
1384
  	while (nr_pages) {
  		unsigned long chunk_nr;
80bba1290   Brice Goglin   mm: no get_user/p...
1385

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1386
1387
1388
1389
1390
1391
  		chunk_nr = nr_pages;
  		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
  			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
  		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
  			break;
80bba1290   Brice Goglin   mm: no get_user/p...
1392
1393
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1394
1395
  		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
  			break;
742755a1d   Christoph Lameter   [PATCH] page migr...
1396

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1397
1398
1399
1400
1401
  		pages += chunk_nr;
  		status += chunk_nr;
  		nr_pages -= chunk_nr;
  	}
  	return nr_pages ? -EFAULT : 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1402
1403
1404
1405
1406
1407
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
938bb9f5e   Heiko Carstens   [CVE-2009-0029] S...
1408
1409
1410
1411
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  		const void __user * __user *, pages,
  		const int __user *, nodes,
  		int __user *, status, int, flags)
742755a1d   Christoph Lameter   [PATCH] page migr...
1412
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1413
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1414
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1415
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1416
  	int err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1417
  	nodemask_t task_nodes;
742755a1d   Christoph Lameter   [PATCH] page migr...
1418
1419
1420
1421
1422
1423
1424
1425
1426
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
a879bf582   Greg Thelen   mm: grab rcu read...
1427
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1428
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1429
  	if (!task) {
a879bf582   Greg Thelen   mm: grab rcu read...
1430
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1431
1432
  		return -ESRCH;
  	}
3268c63ed   Christoph Lameter   mm: fix move/migr...
1433
  	get_task_struct(task);
742755a1d   Christoph Lameter   [PATCH] page migr...
1434
1435
1436
1437
1438
1439
1440
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1441
  	tcred = __task_cred(task);
b38a86eb1   Eric W. Biederman   userns: Convert t...
1442
1443
  	if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
  	    !uid_eq(cred->uid,  tcred->suid) && !uid_eq(cred->uid,  tcred->uid) &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1444
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1445
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1446
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1447
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1448
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1449
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1450

86c3a7645   David Quigley   [PATCH] SELinux: ...
1451
1452
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1453
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1454

3268c63ed   Christoph Lameter   mm: fix move/migr...
1455
1456
1457
  	task_nodes = cpuset_mems_allowed(task);
  	mm = get_task_mm(task);
  	put_task_struct(task);
6e8b09eaf   Sasha Levin   mm: fix NULL ptr ...
1458
1459
1460
1461
1462
1463
1464
1465
  	if (!mm)
  		return -EINVAL;
  
  	if (nodes)
  		err = do_pages_move(mm, task_nodes, nr_pages, pages,
  				    nodes, status, flags);
  	else
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1466

742755a1d   Christoph Lameter   [PATCH] page migr...
1467
1468
  	mmput(mm);
  	return err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1469
1470
1471
1472
  
  out:
  	put_task_struct(task);
  	return err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1473
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1474

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1475
1476
1477
1478
1479
1480
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * Returns true if this is a safe migration target node for misplaced NUMA
   * pages. Currently it only checks the watermarks which crude
   */
  static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
3abef4e6c   Mel Gorman   mm: numa: take TH...
1481
  				   unsigned long nr_migrate_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1482
1483
1484
1485
1486
1487
1488
  {
  	int z;
  	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
  		struct zone *zone = pgdat->node_zones + z;
  
  		if (!populated_zone(zone))
  			continue;
6e543d578   Lisa Du   mm: vmscan: fix d...
1489
  		if (!zone_reclaimable(zone))
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
  			continue;
  
  		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
  		if (!zone_watermark_ok(zone, 0,
  				       high_wmark_pages(zone) +
  				       nr_migrate_pages,
  				       0, 0))
  			continue;
  		return true;
  	}
  	return false;
  }
  
  static struct page *alloc_misplaced_dst_page(struct page *page,
  					   unsigned long data,
  					   int **result)
  {
  	int nid = (int) data;
  	struct page *newpage;
  
  	newpage = alloc_pages_exact_node(nid,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1511
1512
1513
  					 (GFP_HIGHUSER_MOVABLE |
  					  __GFP_THISNODE | __GFP_NOMEMALLOC |
  					  __GFP_NORETRY | __GFP_NOWARN) &
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1514
  					 ~GFP_IOFS, 0);
bac0382c6   Hillf Danton   mm: numa: migrate...
1515

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1516
1517
1518
1519
  	return newpage;
  }
  
  /*
a8f607721   Mel Gorman   mm: numa: Rate li...
1520
1521
1522
   * page migration rate limiting control.
   * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
   * window of time. Default here says do not migrate more than 1280M per second.
e14808b49   Mel Gorman   mm: numa: Rate li...
1523
1524
1525
1526
   * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
   * as it is faults that reset the window, pte updates will happen unconditionally
   * if there has not been a fault since @pteupdate_interval_millisecs after the
   * throttle window closed.
a8f607721   Mel Gorman   mm: numa: Rate li...
1527
1528
   */
  static unsigned int migrate_interval_millisecs __read_mostly = 100;
e14808b49   Mel Gorman   mm: numa: Rate li...
1529
  static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
a8f607721   Mel Gorman   mm: numa: Rate li...
1530
  static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
e14808b49   Mel Gorman   mm: numa: Rate li...
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
  /* Returns true if NUMA migration is currently rate limited */
  bool migrate_ratelimited(int node)
  {
  	pg_data_t *pgdat = NODE_DATA(node);
  
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
  				msecs_to_jiffies(pteupdate_interval_millisecs)))
  		return false;
  
  	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
  		return false;
  
  	return true;
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1545
  /* Returns true if the node is migrate rate-limited after the update */
1c30e0177   Mel Gorman   mm: numa: make NU...
1546
1547
  static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
  					unsigned long nr_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1548
  {
a8f607721   Mel Gorman   mm: numa: Rate li...
1549
1550
1551
1552
1553
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
a8f607721   Mel Gorman   mm: numa: Rate li...
1554
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1555
  		spin_lock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1556
1557
1558
  		pgdat->numabalancing_migrate_nr_pages = 0;
  		pgdat->numabalancing_migrate_next_window = jiffies +
  			msecs_to_jiffies(migrate_interval_millisecs);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1559
  		spin_unlock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1560
  	}
af1839d72   Mel Gorman   mm: numa: trace t...
1561
1562
1563
  	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
  		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
  								nr_pages);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1564
  		return true;
af1839d72   Mel Gorman   mm: numa: trace t...
1565
  	}
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1566
1567
1568
1569
1570
1571
1572
1573
1574
  
  	/*
  	 * This is an unlocked non-atomic update so errors are possible.
  	 * The consequences are failing to migrate when we potentiall should
  	 * have which is not severe enough to warrant locking. If it is ever
  	 * a problem, it can be converted to a per-cpu counter.
  	 */
  	pgdat->numabalancing_migrate_nr_pages += nr_pages;
  	return false;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1575
  }
1c30e0177   Mel Gorman   mm: numa: make NU...
1576
  static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1577
  {
340ef3902   Hugh Dickins   mm: numa: cleanup...
1578
  	int page_lru;
a8f607721   Mel Gorman   mm: numa: Rate li...
1579

309381fea   Sasha Levin   mm: dump page whe...
1580
  	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
3abef4e6c   Mel Gorman   mm: numa: take TH...
1581

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1582
  	/* Avoid migrating to a node that is nearly full */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1583
1584
  	if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1585

340ef3902   Hugh Dickins   mm: numa: cleanup...
1586
1587
  	if (isolate_lru_page(page))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1588

340ef3902   Hugh Dickins   mm: numa: cleanup...
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
  	/*
  	 * migrate_misplaced_transhuge_page() skips page migration's usual
  	 * check on page_count(), so we must do it here, now that the page
  	 * has been isolated: a GUP pin, or any other pin, prevents migration.
  	 * The expected page count is 3: 1 for page's mapcount and 1 for the
  	 * caller's pin and 1 for the reference taken by isolate_lru_page().
  	 */
  	if (PageTransHuge(page) && page_count(page) != 3) {
  		putback_lru_page(page);
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1599
  	}
340ef3902   Hugh Dickins   mm: numa: cleanup...
1600
1601
1602
  	page_lru = page_is_file_cache(page);
  	mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
  				hpage_nr_pages(page));
149c33e1c   Mel Gorman   mm: migrate: Drop...
1603
  	/*
340ef3902   Hugh Dickins   mm: numa: cleanup...
1604
1605
1606
  	 * Isolating the page has taken another reference, so the
  	 * caller's reference can be safely dropped without the page
  	 * disappearing underneath us during migration.
149c33e1c   Mel Gorman   mm: migrate: Drop...
1607
1608
  	 */
  	put_page(page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1609
  	return 1;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1610
  }
de466bd62   Mel Gorman   mm: numa: avoid u...
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
  bool pmd_trans_migrating(pmd_t pmd)
  {
  	struct page *page = pmd_page(pmd);
  	return PageLocked(page);
  }
  
  void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
  {
  	struct page *page = pmd_page(*pmd);
  	wait_on_page_locked(page);
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1622
1623
1624
1625
1626
  /*
   * Attempt to migrate a misplaced page to the specified destination
   * node. Caller is expected to have an elevated reference count on
   * the page that will be dropped by this function before returning.
   */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1627
1628
  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
  			   int node)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1629
1630
  {
  	pg_data_t *pgdat = NODE_DATA(node);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1631
  	int isolated;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1632
1633
1634
1635
  	int nr_remaining;
  	LIST_HEAD(migratepages);
  
  	/*
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1636
1637
  	 * Don't migrate file pages that are mapped in multiple processes
  	 * with execute permissions as they are probably shared libraries.
b32967ff1   Mel Gorman   mm: numa: Add THP...
1638
  	 */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1639
1640
  	if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
  	    (vma->vm_flags & VM_EXEC))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1641
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1642
1643
1644
1645
1646
1647
  
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1648
  	if (numamigrate_update_ratelimit(pgdat, 1))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1649
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1650
1651
1652
1653
1654
1655
  
  	isolated = numamigrate_isolate_page(pgdat, page);
  	if (!isolated)
  		goto out;
  
  	list_add(&page->lru, &migratepages);
9c620e2bc   Hugh Dickins   mm: remove offlin...
1656
  	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
68711a746   David Rientjes   mm, migration: ad...
1657
1658
  				     NULL, node, MIGRATE_ASYNC,
  				     MR_NUMA_MISPLACED);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1659
  	if (nr_remaining) {
59c82b70d   Joonsoo Kim   mm/migrate: remov...
1660
1661
1662
1663
1664
1665
  		if (!list_empty(&migratepages)) {
  			list_del(&page->lru);
  			dec_zone_page_state(page, NR_ISOLATED_ANON +
  					page_is_file_cache(page));
  			putback_lru_page(page);
  		}
b32967ff1   Mel Gorman   mm: numa: Add THP...
1666
1667
1668
  		isolated = 0;
  	} else
  		count_vm_numa_event(NUMA_PAGE_MIGRATE);
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1669
  	BUG_ON(!list_empty(&migratepages));
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1670
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1671
1672
1673
1674
  
  out:
  	put_page(page);
  	return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1675
  }
220018d38   Mel Gorman   mm: numa: Add THP...
1676
  #endif /* CONFIG_NUMA_BALANCING */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1677

220018d38   Mel Gorman   mm: numa: Add THP...
1678
  #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
340ef3902   Hugh Dickins   mm: numa: cleanup...
1679
1680
1681
1682
  /*
   * Migrates a THP to a given target node. page must be locked and is unlocked
   * before returning.
   */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1683
1684
1685
1686
1687
1688
  int migrate_misplaced_transhuge_page(struct mm_struct *mm,
  				struct vm_area_struct *vma,
  				pmd_t *pmd, pmd_t entry,
  				unsigned long address,
  				struct page *page, int node)
  {
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1689
  	spinlock_t *ptl;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1690
1691
1692
  	pg_data_t *pgdat = NODE_DATA(node);
  	int isolated = 0;
  	struct page *new_page = NULL;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1693
  	int page_lru = page_is_file_cache(page);
f714f4f20   Mel Gorman   mm: numa: call MM...
1694
1695
  	unsigned long mmun_start = address & HPAGE_PMD_MASK;
  	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
2b4847e73   Mel Gorman   mm: numa: seriali...
1696
  	pmd_t orig_entry;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1697
1698
  
  	/*
b32967ff1   Mel Gorman   mm: numa: Add THP...
1699
1700
1701
1702
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
d28d43351   Mel Gorman   mm: migrate: Acco...
1703
  	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1704
1705
1706
  		goto out_dropref;
  
  	new_page = alloc_pages_node(node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1707
1708
  		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
  		HPAGE_PMD_ORDER);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1709
1710
  	if (!new_page)
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1711
  	isolated = numamigrate_isolate_page(pgdat, page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1712
  	if (!isolated) {
b32967ff1   Mel Gorman   mm: numa: Add THP...
1713
  		put_page(new_page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1714
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1715
  	}
b0943d61b   Mel Gorman   mm: numa: defer T...
1716
1717
  	if (mm_tlb_flush_pending(mm))
  		flush_tlb_range(vma, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
  	/* Prepare a page as a migration target */
  	__set_page_locked(new_page);
  	SetPageSwapBacked(new_page);
  
  	/* anon mapping, we can simply copy page->mapping to the new page: */
  	new_page->mapping = page->mapping;
  	new_page->index = page->index;
  	migrate_page_copy(new_page, page);
  	WARN_ON(PageLRU(new_page));
  
  	/* Recheck the target PMD */
f714f4f20   Mel Gorman   mm: numa: call MM...
1729
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1730
  	ptl = pmd_lock(mm, pmd);
2b4847e73   Mel Gorman   mm: numa: seriali...
1731
1732
  	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
  fail_putback:
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1733
  		spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1734
  		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
  
  		/* Reverse changes made by migrate_page_copy() */
  		if (TestClearPageActive(new_page))
  			SetPageActive(page);
  		if (TestClearPageUnevictable(new_page))
  			SetPageUnevictable(page);
  		mlock_migrate_page(page, new_page);
  
  		unlock_page(new_page);
  		put_page(new_page);		/* Free it */
a54a407fb   Mel Gorman   mm: Close races b...
1745
1746
  		/* Retake the callers reference and putback on LRU */
  		get_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1747
  		putback_lru_page(page);
a54a407fb   Mel Gorman   mm: Close races b...
1748
1749
  		mod_zone_page_state(page_zone(page),
  			 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
eb4489f69   Mel Gorman   mm: numa: avoid u...
1750
1751
  
  		goto out_unlock;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1752
  	}
2b4847e73   Mel Gorman   mm: numa: seriali...
1753
  	orig_entry = *pmd;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1754
  	entry = mk_pmd(new_page, vma->vm_page_prot);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1755
  	entry = pmd_mkhuge(entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1756
  	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1757

2b4847e73   Mel Gorman   mm: numa: seriali...
1758
1759
1760
1761
1762
1763
1764
  	/*
  	 * Clear the old entry under pagetable lock and establish the new PTE.
  	 * Any parallel GUP will either observe the old page blocking on the
  	 * page lock, block on the page table lock or observe the new page.
  	 * The SetPageUptodate on the new page and page_add_new_anon_rmap
  	 * guarantee the copy is visible before the pagetable update.
  	 */
f714f4f20   Mel Gorman   mm: numa: call MM...
1765
  	flush_cache_range(vma, mmun_start, mmun_end);
11de9927f   Mel Gorman   mm: numa: add mig...
1766
  	page_add_anon_rmap(new_page, vma, mmun_start);
34ee645e8   Joerg Roedel   mmu_notifier: cal...
1767
  	pmdp_clear_flush_notify(vma, mmun_start, pmd);
f714f4f20   Mel Gorman   mm: numa: call MM...
1768
1769
  	set_pmd_at(mm, mmun_start, pmd, entry);
  	flush_tlb_range(vma, mmun_start, mmun_end);
ce4a9cc57   Stephen Rothwell   mm,numa: fix upda...
1770
  	update_mmu_cache_pmd(vma, address, &entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1771
1772
  
  	if (page_count(page) != 2) {
f714f4f20   Mel Gorman   mm: numa: call MM...
1773
1774
  		set_pmd_at(mm, mmun_start, pmd, orig_entry);
  		flush_tlb_range(vma, mmun_start, mmun_end);
34ee645e8   Joerg Roedel   mmu_notifier: cal...
1775
  		mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
2b4847e73   Mel Gorman   mm: numa: seriali...
1776
1777
1778
1779
  		update_mmu_cache_pmd(vma, address, &entry);
  		page_remove_rmap(new_page);
  		goto fail_putback;
  	}
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
1780
  	mem_cgroup_migrate(page, new_page, false);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1781
  	page_remove_rmap(page);
2b4847e73   Mel Gorman   mm: numa: seriali...
1782

c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1783
  	spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1784
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1785

11de9927f   Mel Gorman   mm: numa: add mig...
1786
1787
1788
  	/* Take an "isolate" reference and put new page on the LRU. */
  	get_page(new_page);
  	putback_lru_page(new_page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1789
1790
1791
1792
1793
1794
1795
  	unlock_page(new_page);
  	unlock_page(page);
  	put_page(page);			/* Drop the rmap reference */
  	put_page(page);			/* Drop the LRU isolation reference */
  
  	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
  	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1796
1797
1798
1799
  	mod_zone_page_state(page_zone(page),
  			NR_ISOLATED_ANON + page_lru,
  			-HPAGE_PMD_NR);
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1800
1801
  out_fail:
  	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1802
  out_dropref:
2b4847e73   Mel Gorman   mm: numa: seriali...
1803
1804
1805
  	ptl = pmd_lock(mm, pmd);
  	if (pmd_same(*pmd, entry)) {
  		entry = pmd_mknonnuma(entry);
f714f4f20   Mel Gorman   mm: numa: call MM...
1806
  		set_pmd_at(mm, mmun_start, pmd, entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1807
1808
1809
  		update_mmu_cache_pmd(vma, address, &entry);
  	}
  	spin_unlock(ptl);
a54a407fb   Mel Gorman   mm: Close races b...
1810

eb4489f69   Mel Gorman   mm: numa: avoid u...
1811
  out_unlock:
340ef3902   Hugh Dickins   mm: numa: cleanup...
1812
  	unlock_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1813
  	put_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1814
1815
  	return 0;
  }
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1816
1817
1818
  #endif /* CONFIG_NUMA_BALANCING */
  
  #endif /* CONFIG_NUMA */