Blame view

mm/migrate.c 49 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
   */
  
  #include <linux/migrate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
16
  #include <linux/export.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
17
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
  #include <linux/pagevec.h>
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
24
  #include <linux/ksm.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
25
26
27
28
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
29
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
30
31
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
32
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
34
  #include <linux/syscalls.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/hugetlb.h>
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
36
  #include <linux/hugetlb_cgroup.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
37
  #include <linux/gfp.h>
bf6bddf19   Rafael Aquini   mm: introduce com...
38
  #include <linux/balloon_compaction.h>
f714f4f20   Mel Gorman   mm: numa: call MM...
39
  #include <linux/mmu_notifier.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
40

0d1836c36   Michal Nazarewicz   mm/migrate.c: fix...
41
  #include <asm/tlbflush.h>
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/migrate.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
44
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
45
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
46
   * migrate_prep() needs to be called before we start compiling a list of pages
748446bb6   Mel Gorman   mm: compaction: m...
47
48
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
b20a35035   Christoph Lameter   [PATCH] page migr...
49
50
51
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
52
53
54
55
56
57
58
59
60
61
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
748446bb6   Mel Gorman   mm: compaction: m...
62
63
64
65
66
67
68
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
  int migrate_prep_local(void)
  {
  	lru_add_drain();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
69
  /*
5733c7d11   Rafael Aquini   mm: introduce put...
70
71
72
   * Put previously isolated pages back onto the appropriate lists
   * from where they were once taken off for compaction/migration.
   *
59c82b70d   Joonsoo Kim   mm/migrate: remov...
73
74
75
   * This function shall be used whenever the isolated pageset has been
   * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
   * and isolate_huge_page().
5733c7d11   Rafael Aquini   mm: introduce put...
76
77
78
79
80
81
82
   */
  void putback_movable_pages(struct list_head *l)
  {
  	struct page *page;
  	struct page *page2;
  
  	list_for_each_entry_safe(page, page2, l, lru) {
31caf665e   Naoya Horiguchi   mm: migrate: make...
83
84
85
86
  		if (unlikely(PageHuge(page))) {
  			putback_active_hugepage(page);
  			continue;
  		}
5733c7d11   Rafael Aquini   mm: introduce put...
87
88
89
  		list_del(&page->lru);
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
  				page_is_file_cache(page));
117aad1e9   Rafael Aquini   mm: avoid reinser...
90
  		if (unlikely(isolated_balloon_page(page)))
bf6bddf19   Rafael Aquini   mm: introduce com...
91
92
93
  			balloon_page_putback(page);
  		else
  			putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
94
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
95
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
96
97
98
  /*
   * Restore a potential migration pte to a working pte entry
   */
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
99
100
  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
  				 unsigned long addr, void *old)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
101
102
103
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
104
105
106
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
107
108
109
110
  	if (unlikely(PageHuge(new))) {
  		ptep = huge_pte_offset(mm, addr);
  		if (!ptep)
  			goto out;
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
111
  		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
112
  	} else {
6219049ae   Bob Liu   mm: introduce mm_...
113
114
  		pmd = mm_find_pmd(mm, addr);
  		if (!pmd)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
115
  			goto out;
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
116
117
  		if (pmd_trans_huge(*pmd))
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
118

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
119
  		ptep = pte_offset_map(pmd, addr);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
120

486cf46f3   Hugh Dickins   mm: fix race betw...
121
122
123
124
  		/*
  		 * Peek to check is_swap_pte() before taking ptlock?  No, we
  		 * can race mremap's move_ptes(), which skips anon_vma lock.
  		 */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
125
126
127
  
  		ptl = pte_lockptr(mm, pmd);
  	}
0697212a4   Christoph Lameter   [PATCH] Swapless ...
128

0697212a4   Christoph Lameter   [PATCH] Swapless ...
129
130
131
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
132
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
133
134
  
  	entry = pte_to_swp_entry(pte);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
135
136
137
  	if (!is_migration_entry(entry) ||
  	    migration_entry_to_page(entry) != old)
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
138

0697212a4   Christoph Lameter   [PATCH] Swapless ...
139
140
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
c3d16e165   Cyrill Gorcunov   mm: migration: do...
141
142
  	if (pte_swp_soft_dirty(*ptep))
  		pte = pte_mksoft_dirty(pte);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
143
144
  	if (is_write_migration_entry(entry))
  		pte = pte_mkwrite(pte);
3ef8fd7f7   Andi Kleen   Fix migration.c c...
145
  #ifdef CONFIG_HUGETLB_PAGE
be7517d6a   Tony Lu   mm/hugetlb: set P...
146
  	if (PageHuge(new)) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
147
  		pte = pte_mkhuge(pte);
be7517d6a   Tony Lu   mm/hugetlb: set P...
148
149
  		pte = arch_make_huge_pte(pte, vma, new, 0);
  	}
3ef8fd7f7   Andi Kleen   Fix migration.c c...
150
  #endif
c2cc499c5   Leonid Yegoshin   mm compaction: fi...
151
  	flush_dcache_page(new);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
152
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
153

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
154
155
156
157
158
159
  	if (PageHuge(new)) {
  		if (PageAnon(new))
  			hugepage_add_anon_rmap(new, vma, addr);
  		else
  			page_dup_rmap(new);
  	} else if (PageAnon(new))
04e62a29b   Christoph Lameter   [PATCH] More page...
160
161
162
163
164
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
4b3073e1c   Russell King   MM: Pass a PTE po...
165
  	update_mmu_cache(vma, addr, ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
166
  unlock:
0697212a4   Christoph Lameter   [PATCH] Swapless ...
167
  	pte_unmap_unlock(ptep, ptl);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
168
169
  out:
  	return SWAP_AGAIN;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
170
171
172
  }
  
  /*
7e09e738a   Hugh Dickins   mm: fix swapops.h...
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
   * Congratulations to trinity for discovering this bug.
   * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
   * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
   * replace the specified range by file ptes throughout (maybe populated after).
   * If page migration finds a page within that range, while it's still located
   * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
   * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
   * But if the migrating page is in a part of the vma outside the range to be
   * remapped, then it will not be cleared, and remove_migration_ptes() needs to
   * deal with it.  Fortunately, this part of the vma is of course still linear,
   * so we just need to use linear location on the nonlinear list.
   */
  static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
  		struct address_space *mapping, void *arg)
  {
  	struct vm_area_struct *vma;
  	/* hugetlbfs does not support remap_pages, so no huge pgoff worries */
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	unsigned long addr;
  
  	list_for_each_entry(vma,
  		&mapping->i_mmap_nonlinear, shared.nonlinear) {
  
  		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  		if (addr >= vma->vm_start && addr < vma->vm_end)
  			remove_migration_pte(page, vma, addr, arg);
  	}
  	return SWAP_AGAIN;
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
204
205
206
207
208
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
209
210
211
  	struct rmap_walk_control rwc = {
  		.rmap_one = remove_migration_pte,
  		.arg = old,
7e09e738a   Hugh Dickins   mm: fix swapops.h...
212
  		.file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
213
214
215
  	};
  
  	rmap_walk(new, &rwc);
04e62a29b   Christoph Lameter   [PATCH] More page...
216
217
218
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
219
220
221
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
0697212a4   Christoph Lameter   [PATCH] Swapless ...
222
   */
30dad3092   Naoya Horiguchi   mm: migration: ad...
223
224
  static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
  				spinlock_t *ptl)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
225
  {
30dad3092   Naoya Horiguchi   mm: migration: ad...
226
  	pte_t pte;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
227
228
  	swp_entry_t entry;
  	struct page *page;
30dad3092   Naoya Horiguchi   mm: migration: ad...
229
  	spin_lock(ptl);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
230
231
232
233
234
235
236
237
238
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
239
240
241
242
243
244
245
246
247
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
248
249
250
251
252
253
254
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
30dad3092   Naoya Horiguchi   mm: migration: ad...
255
256
257
258
259
260
261
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	spinlock_t *ptl = pte_lockptr(mm, pmd);
  	pte_t *ptep = pte_offset_map(pmd, address);
  	__migration_entry_wait(mm, ptep, ptl);
  }
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
262
263
  void migration_entry_wait_huge(struct vm_area_struct *vma,
  		struct mm_struct *mm, pte_t *pte)
30dad3092   Naoya Horiguchi   mm: migration: ad...
264
  {
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
265
  	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
30dad3092   Naoya Horiguchi   mm: migration: ad...
266
267
  	__migration_entry_wait(mm, pte, ptl);
  }
b969c4ab9   Mel Gorman   mm: compaction: d...
268
269
  #ifdef CONFIG_BLOCK
  /* Returns true if all buffers are successfully locked */
a6bc32b89   Mel Gorman   mm: compaction: i...
270
271
  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
272
273
274
275
  {
  	struct buffer_head *bh = head;
  
  	/* Simple case, sync compaction */
a6bc32b89   Mel Gorman   mm: compaction: i...
276
  	if (mode != MIGRATE_ASYNC) {
b969c4ab9   Mel Gorman   mm: compaction: d...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
  		do {
  			get_bh(bh);
  			lock_buffer(bh);
  			bh = bh->b_this_page;
  
  		} while (bh != head);
  
  		return true;
  	}
  
  	/* async case, we cannot block on lock_buffer so use trylock_buffer */
  	do {
  		get_bh(bh);
  		if (!trylock_buffer(bh)) {
  			/*
  			 * We failed to lock the buffer and cannot stall in
  			 * async migration. Release the taken locks
  			 */
  			struct buffer_head *failed_bh = bh;
  			put_bh(failed_bh);
  			bh = head;
  			while (bh != failed_bh) {
  				unlock_buffer(bh);
  				put_bh(bh);
  				bh = bh->b_this_page;
  			}
  			return false;
  		}
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  	return true;
  }
  #else
  static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
a6bc32b89   Mel Gorman   mm: compaction: i...
312
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
313
314
315
316
  {
  	return true;
  }
  #endif /* CONFIG_BLOCK */
b20a35035   Christoph Lameter   [PATCH] page migr...
317
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
318
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
319
320
321
322
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
266cf658e   David Howells   FS-Cache: Recruit...
323
   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
b20a35035   Christoph Lameter   [PATCH] page migr...
324
   */
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
325
  int migrate_page_move_mapping(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
326
  		struct page *newpage, struct page *page,
8e321fefb   Benjamin LaHaise   aio/migratepages:...
327
328
  		struct buffer_head *head, enum migrate_mode mode,
  		int extra_count)
b20a35035   Christoph Lameter   [PATCH] page migr...
329
  {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
330
  	int expected_count = 1 + extra_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
331
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
332

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
333
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
334
  		/* Anonymous page without mapping */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
335
  		if (page_count(page) != expected_count)
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
336
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
337
  		return MIGRATEPAGE_SUCCESS;
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
338
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
339
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
340

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
341
342
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
343

8e321fefb   Benjamin LaHaise   aio/migratepages:...
344
  	expected_count += 1 + page_has_private(page);
e286781d5   Nick Piggin   mm: speculative p...
345
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
346
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
347
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
348
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
349
  	}
e286781d5   Nick Piggin   mm: speculative p...
350
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
351
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
352
353
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
354
  	/*
b969c4ab9   Mel Gorman   mm: compaction: d...
355
356
357
358
359
360
  	 * In the async migration case of moving a page with buffers, lock the
  	 * buffers using trylock before the mapping is moved. If the mapping
  	 * was moved, we later failed to lock the buffers and could not move
  	 * the mapping back due to an elevated page count, we would have to
  	 * block waiting on other references to be dropped.
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
361
362
  	if (mode == MIGRATE_ASYNC && head &&
  			!buffer_migrate_lock_buffers(head, mode)) {
b969c4ab9   Mel Gorman   mm: compaction: d...
363
364
365
366
367
368
  		page_unfreeze_refs(page, expected_count);
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	/*
b20a35035   Christoph Lameter   [PATCH] page migr...
369
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
370
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
371
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
372
373
374
375
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
376
377
378
  	radix_tree_replace_slot(pslot, newpage);
  
  	/*
937a94c9d   Jacobo Giralt   mm: migrate: one ...
379
380
  	 * Drop cache reference from old page by unfreezing
  	 * to one less reference.
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
381
382
  	 * We know this isn't the last reference.
  	 */
937a94c9d   Jacobo Giralt   mm: migrate: one ...
383
  	page_unfreeze_refs(page, expected_count - 1);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
384

0e8c7d0fd   Christoph Lameter   page migration: f...
385
386
387
388
389
390
391
392
393
394
395
396
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
99a15e21d   Andrea Arcangeli   migrate: don't ac...
397
  	if (!PageSwapCache(page) && PageSwapBacked(page)) {
4b02108ac   KOSAKI Motohiro   mm: oom analysis:...
398
399
400
  		__dec_zone_page_state(page, NR_SHMEM);
  		__inc_zone_page_state(newpage, NR_SHMEM);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
401
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
402

78bd52097   Rafael Aquini   mm: adjust addres...
403
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
404
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
405
406
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
407
408
409
410
411
412
413
414
415
416
417
418
   * The expected number of remaining references is the same as that
   * of migrate_page_move_mapping().
   */
  int migrate_huge_page_move_mapping(struct address_space *mapping,
  				   struct page *newpage, struct page *page)
  {
  	int expected_count;
  	void **pslot;
  
  	if (!mapping) {
  		if (page_count(page) != 1)
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
419
  		return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
420
421
422
423
424
425
426
427
428
  	}
  
  	spin_lock_irq(&mapping->tree_lock);
  
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
  
  	expected_count = 2 + page_has_private(page);
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
429
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
430
431
432
433
434
435
436
437
438
439
440
441
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	if (!page_freeze_refs(page, expected_count)) {
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	get_page(newpage);
  
  	radix_tree_replace_slot(pslot, newpage);
937a94c9d   Jacobo Giralt   mm: migrate: one ...
442
  	page_unfreeze_refs(page, expected_count - 1);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
443
444
  
  	spin_unlock_irq(&mapping->tree_lock);
78bd52097   Rafael Aquini   mm: adjust addres...
445
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
446
447
448
  }
  
  /*
30b0a105d   Dave Hansen   mm: thp: give tra...
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
   * Gigantic pages are so large that we do not guarantee that page++ pointer
   * arithmetic will work across the entire page.  We need something more
   * specialized.
   */
  static void __copy_gigantic_page(struct page *dst, struct page *src,
  				int nr_pages)
  {
  	int i;
  	struct page *dst_base = dst;
  	struct page *src_base = src;
  
  	for (i = 0; i < nr_pages; ) {
  		cond_resched();
  		copy_highpage(dst, src);
  
  		i++;
  		dst = mem_map_next(dst, dst_base, i);
  		src = mem_map_next(src, src_base, i);
  	}
  }
  
  static void copy_huge_page(struct page *dst, struct page *src)
  {
  	int i;
  	int nr_pages;
  
  	if (PageHuge(src)) {
  		/* hugetlbfs page */
  		struct hstate *h = page_hstate(src);
  		nr_pages = pages_per_huge_page(h);
  
  		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
  			__copy_gigantic_page(dst, src, nr_pages);
  			return;
  		}
  	} else {
  		/* thp page */
  		BUG_ON(!PageTransHuge(src));
  		nr_pages = hpage_nr_pages(src);
  	}
  
  	for (i = 0; i < nr_pages; i++) {
  		cond_resched();
  		copy_highpage(dst + i, src + i);
  	}
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
497
498
   * Copy the page to its new location
   */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
499
  void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
500
  {
7851a45cd   Rik van Riel   mm: numa: Copy cp...
501
  	int cpupid;
b32967ff1   Mel Gorman   mm: numa: Add THP...
502
  	if (PageHuge(page) || PageTransHuge(page))
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
503
504
505
  		copy_huge_page(newpage, page);
  	else
  		copy_highpage(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
506
507
508
509
510
511
512
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
513
  	if (TestClearPageActive(page)) {
309381fea   Sasha Levin   mm: dump page whe...
514
  		VM_BUG_ON_PAGE(PageUnevictable(page), page);
b20a35035   Christoph Lameter   [PATCH] page migr...
515
  		SetPageActive(newpage);
418b27ef5   Lee Schermerhorn   mm: remove unevic...
516
517
  	} else if (TestClearPageUnevictable(page))
  		SetPageUnevictable(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
518
519
520
521
522
523
524
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
525
526
527
528
529
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
25985edce   Lucas De Marchi   Fix common misspe...
530
  		 * Whereas only part of our page may be dirty.
3a902c5f6   Nick Piggin   mm: fix warning o...
531
  		 */
752dc185d   Hugh Dickins   mm: fix warning i...
532
533
534
535
  		if (PageSwapBacked(page))
  			SetPageDirty(newpage);
  		else
  			__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
536
   	}
7851a45cd   Rik van Riel   mm: numa: Copy cp...
537
538
539
540
541
542
  	/*
  	 * Copy NUMA information to the new page, to prevent over-eager
  	 * future migrations of this same page.
  	 */
  	cpupid = page_cpupid_xchg_last(page, -1);
  	page_cpupid_xchg_last(newpage, cpupid);
b291f0003   Nick Piggin   mlock: mlocked pa...
543
  	mlock_migrate_page(newpage, page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
544
  	ksm_migrate_page(newpage, page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
545
546
547
548
  	/*
  	 * Please do not reorder this without considering how mm/ksm.c's
  	 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
  	 */
b20a35035   Christoph Lameter   [PATCH] page migr...
549
  	ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
550
551
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
552
553
554
555
556
557
558
559
  
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
560

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
561
562
563
  /************************************************************
   *                    Migration functions
   ***********************************************************/
b20a35035   Christoph Lameter   [PATCH] page migr...
564
565
  /*
   * Common logic to directly migrate a single page suitable for
266cf658e   David Howells   FS-Cache: Recruit...
566
   * pages that do not use PagePrivate/PagePrivate2.
b20a35035   Christoph Lameter   [PATCH] page migr...
567
568
569
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
570
  int migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
571
572
  		struct page *newpage, struct page *page,
  		enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
573
574
575
576
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
577
  	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
578

78bd52097   Rafael Aquini   mm: adjust addres...
579
  	if (rc != MIGRATEPAGE_SUCCESS)
b20a35035   Christoph Lameter   [PATCH] page migr...
580
581
582
  		return rc;
  
  	migrate_page_copy(newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
583
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
584
585
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
586
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
587
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
588
589
590
591
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
592
  int buffer_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
593
  		struct page *newpage, struct page *page, enum migrate_mode mode)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
594
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
595
596
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
597
  	if (!page_has_buffers(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
598
  		return migrate_page(mapping, newpage, page, mode);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
599
600
  
  	head = page_buffers(page);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
601
  	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
602

78bd52097   Rafael Aquini   mm: adjust addres...
603
  	if (rc != MIGRATEPAGE_SUCCESS)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
604
  		return rc;
b969c4ab9   Mel Gorman   mm: compaction: d...
605
606
607
608
609
  	/*
  	 * In the async case, migrate_page_move_mapping locked the buffers
  	 * with an IRQ-safe spinlock held. In the sync case, the buffers
  	 * need to be locked now
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
610
611
  	if (mode != MIGRATE_ASYNC)
  		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
78bd52097   Rafael Aquini   mm: adjust addres...
637
  	return MIGRATEPAGE_SUCCESS;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
638
639
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
640
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
641

04e62a29b   Christoph Lameter   [PATCH] More page...
642
643
644
645
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
646
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
647
648
649
650
651
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
04e62a29b   Christoph Lameter   [PATCH] More page...
652
653
654
655
656
657
658
659
660
661
662
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
663
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
664
665
666
667
668
669
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
670
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
671
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
672

04e62a29b   Christoph Lameter   [PATCH] More page...
673
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
674

04e62a29b   Christoph Lameter   [PATCH] More page...
675
676
677
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
678
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
679
680
681
682
683
684
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
685
  	struct page *newpage, struct page *page, enum migrate_mode mode)
04e62a29b   Christoph Lameter   [PATCH] More page...
686
  {
b969c4ab9   Mel Gorman   mm: compaction: d...
687
  	if (PageDirty(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
688
689
  		/* Only writeback pages in full synchronous migration */
  		if (mode != MIGRATE_SYNC)
b969c4ab9   Mel Gorman   mm: compaction: d...
690
  			return -EBUSY;
04e62a29b   Christoph Lameter   [PATCH] More page...
691
  		return writeout(mapping, page);
b969c4ab9   Mel Gorman   mm: compaction: d...
692
  	}
8351a6e47   Christoph Lameter   [PATCH] page migr...
693
694
695
696
697
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
266cf658e   David Howells   FS-Cache: Recruit...
698
  	if (page_has_private(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
699
700
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
a6bc32b89   Mel Gorman   mm: compaction: i...
701
  	return migrate_page(mapping, newpage, page, mode);
8351a6e47   Christoph Lameter   [PATCH] page migr...
702
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
703
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
704
705
706
707
708
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
709
710
711
   *
   * Return value:
   *   < 0 - error code
78bd52097   Rafael Aquini   mm: adjust addres...
712
   *  MIGRATEPAGE_SUCCESS - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
713
   */
3fe2011ff   Mel Gorman   mm: migration: al...
714
  static int move_to_new_page(struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
715
  				int remap_swapcache, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
716
717
718
719
720
721
722
723
724
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
725
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
726
727
728
729
730
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
731
732
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
733
734
735
  
  	mapping = page_mapping(page);
  	if (!mapping)
a6bc32b89   Mel Gorman   mm: compaction: i...
736
  		rc = migrate_page(mapping, newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
737
  	else if (mapping->a_ops->migratepage)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
738
  		/*
b969c4ab9   Mel Gorman   mm: compaction: d...
739
740
741
742
  		 * Most pages have a mapping and most filesystems provide a
  		 * migratepage callback. Anonymous pages are part of swap
  		 * space which also has its own migratepage callback. This
  		 * is the most common path for page migration.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
743
  		 */
b969c4ab9   Mel Gorman   mm: compaction: d...
744
  		rc = mapping->a_ops->migratepage(mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
745
  						newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
746
  	else
a6bc32b89   Mel Gorman   mm: compaction: i...
747
  		rc = fallback_migrate_page(mapping, newpage, page, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
748

78bd52097   Rafael Aquini   mm: adjust addres...
749
  	if (rc != MIGRATEPAGE_SUCCESS) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
750
  		newpage->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
751
752
753
  	} else {
  		if (remap_swapcache)
  			remove_migration_ptes(page, newpage);
35512ecae   Konstantin Khlebnikov   mm: postpone migr...
754
  		page->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
755
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
756
757
758
759
760
  
  	unlock_page(newpage);
  
  	return rc;
  }
0dabec93d   Minchan Kim   mm: migration: cl...
761
  static int __unmap_and_move(struct page *page, struct page *newpage,
9c620e2bc   Hugh Dickins   mm: remove offlin...
762
  				int force, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
763
  {
0dabec93d   Minchan Kim   mm: migration: cl...
764
  	int rc = -EAGAIN;
3fe2011ff   Mel Gorman   mm: migration: al...
765
  	int remap_swapcache = 1;
56039efa1   KAMEZAWA Hiroyuki   memcg: fix ugly i...
766
  	struct mem_cgroup *mem;
3f6c82728   Mel Gorman   mm: migration: ta...
767
  	struct anon_vma *anon_vma = NULL;
95a402c38   Christoph Lameter   [PATCH] page migr...
768

529ae9aaa   Nick Piggin   mm: rename page t...
769
  	if (!trylock_page(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
770
  		if (!force || mode == MIGRATE_ASYNC)
0dabec93d   Minchan Kim   mm: migration: cl...
771
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
  
  		/*
  		 * It's not safe for direct compaction to call lock_page.
  		 * For example, during page readahead pages are added locked
  		 * to the LRU. Later, when the IO completes the pages are
  		 * marked uptodate and unlocked. However, the queueing
  		 * could be merging multiple pages for one bio (e.g.
  		 * mpage_readpages). If an allocation happens for the
  		 * second or third page, the process can end up locking
  		 * the same page twice and deadlocking. Rather than
  		 * trying to be clever about what pages can be locked,
  		 * avoid the use of lock_page for direct compaction
  		 * altogether.
  		 */
  		if (current->flags & PF_MEMALLOC)
0dabec93d   Minchan Kim   mm: migration: cl...
787
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
788

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
789
790
  		lock_page(page);
  	}
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
791
  	/* charge against new page */
0030f535a   Johannes Weiner   mm: memcg: fix co...
792
  	mem_cgroup_prepare_migration(page, newpage, &mem);
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
793

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
794
  	if (PageWriteback(page)) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
795
  		/*
fed5b64a9   Jianguo Wu   mm/migrate: fix c...
796
  		 * Only in the case of a full synchronous migration is it
a6bc32b89   Mel Gorman   mm: compaction: i...
797
798
799
  		 * necessary to wait for PageWriteback. In the async case,
  		 * the retry loop is too short and in the sync-light case,
  		 * the overhead of stalling is too much
11bc82d67   Andrea Arcangeli   mm: compaction: U...
800
  		 */
a6bc32b89   Mel Gorman   mm: compaction: i...
801
  		if (mode != MIGRATE_SYNC) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
802
803
804
805
  			rc = -EBUSY;
  			goto uncharge;
  		}
  		if (!force)
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
806
  			goto uncharge;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
807
808
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
809
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
810
811
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
1ce82b69e   Hugh Dickins   mm: fix migration...
812
  	 * This get_anon_vma() delays freeing anon_vma pointer until the end
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
813
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
814
815
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
816
  	 */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
817
  	if (PageAnon(page) && !PageKsm(page)) {
1ce82b69e   Hugh Dickins   mm: fix migration...
818
  		/*
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
819
  		 * Only page_lock_anon_vma_read() understands the subtleties of
1ce82b69e   Hugh Dickins   mm: fix migration...
820
821
  		 * getting a hold on an anon_vma from outside one of its mms.
  		 */
746b18d42   Peter Zijlstra   mm: use refcounts...
822
  		anon_vma = page_get_anon_vma(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
823
824
  		if (anon_vma) {
  			/*
746b18d42   Peter Zijlstra   mm: use refcounts...
825
  			 * Anon page
1ce82b69e   Hugh Dickins   mm: fix migration...
826
  			 */
1ce82b69e   Hugh Dickins   mm: fix migration...
827
  		} else if (PageSwapCache(page)) {
3fe2011ff   Mel Gorman   mm: migration: al...
828
829
830
831
832
833
834
835
836
837
838
839
840
841
  			/*
  			 * We cannot be sure that the anon_vma of an unmapped
  			 * swapcache page is safe to use because we don't
  			 * know in advance if the VMA that this page belonged
  			 * to still exists. If the VMA and others sharing the
  			 * data have been freed, then the anon_vma could
  			 * already be invalid.
  			 *
  			 * To avoid this possibility, swapcache pages get
  			 * migrated but are not remapped when migration
  			 * completes
  			 */
  			remap_swapcache = 0;
  		} else {
1ce82b69e   Hugh Dickins   mm: fix migration...
842
  			goto uncharge;
3fe2011ff   Mel Gorman   mm: migration: al...
843
  		}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
844
  	}
62e1c5530   Shaohua Li   page migraton: ha...
845

bf6bddf19   Rafael Aquini   mm: introduce com...
846
847
848
849
850
851
852
853
854
855
856
  	if (unlikely(balloon_page_movable(page))) {
  		/*
  		 * A ballooned page does not need any special attention from
  		 * physical to virtual reverse mapping procedures.
  		 * Skip any attempt to unmap PTEs or to remap swap cache,
  		 * in order to avoid burning cycles at rmap level, and perform
  		 * the page migration right away (proteced by page lock).
  		 */
  		rc = balloon_page_migrate(newpage, page, mode);
  		goto uncharge;
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
857
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
858
859
860
861
862
863
864
865
866
867
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
868
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
869
  	if (!page->mapping) {
309381fea   Sasha Levin   mm: dump page whe...
870
  		VM_BUG_ON_PAGE(PageAnon(page), page);
1ce82b69e   Hugh Dickins   mm: fix migration...
871
  		if (page_has_private(page)) {
62e1c5530   Shaohua Li   page migraton: ha...
872
  			try_to_free_buffers(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
873
  			goto uncharge;
62e1c5530   Shaohua Li   page migraton: ha...
874
  		}
abfc34881   Shaohua Li   memory hotplug: m...
875
  		goto skip_unmap;
62e1c5530   Shaohua Li   page migraton: ha...
876
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
877
  	/* Establish migration ptes or remove ptes */
14fa31b89   Andi Kleen   HWPOISON: Use bit...
878
  	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
879

abfc34881   Shaohua Li   memory hotplug: m...
880
  skip_unmap:
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
881
  	if (!page_mapped(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
882
  		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
883

3fe2011ff   Mel Gorman   mm: migration: al...
884
  	if (rc && remap_swapcache)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
885
  		remove_migration_ptes(page, page);
3f6c82728   Mel Gorman   mm: migration: ta...
886
887
  
  	/* Drop an anon_vma reference if we took one */
76545066c   Rik van Riel   mm: extend KSM re...
888
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
889
  		put_anon_vma(anon_vma);
3f6c82728   Mel Gorman   mm: migration: ta...
890

01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
891
  uncharge:
bf6bddf19   Rafael Aquini   mm: introduce com...
892
893
894
  	mem_cgroup_end_migration(mem, page, newpage,
  				 (rc == MIGRATEPAGE_SUCCESS ||
  				  rc == MIGRATEPAGE_BALLOON_SUCCESS));
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
895
  	unlock_page(page);
0dabec93d   Minchan Kim   mm: migration: cl...
896
897
898
  out:
  	return rc;
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
899

0dabec93d   Minchan Kim   mm: migration: cl...
900
901
902
903
904
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
  static int unmap_and_move(new_page_t get_new_page, unsigned long private,
9c620e2bc   Hugh Dickins   mm: remove offlin...
905
  			struct page *page, int force, enum migrate_mode mode)
0dabec93d   Minchan Kim   mm: migration: cl...
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
  {
  	int rc = 0;
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
  
  	if (!newpage)
  		return -ENOMEM;
  
  	if (page_count(page) == 1) {
  		/* page was freed from under us. So we are done. */
  		goto out;
  	}
  
  	if (unlikely(PageTransHuge(page)))
  		if (unlikely(split_huge_page(page)))
  			goto out;
9c620e2bc   Hugh Dickins   mm: remove offlin...
922
  	rc = __unmap_and_move(page, newpage, force, mode);
bf6bddf19   Rafael Aquini   mm: introduce com...
923
924
925
926
927
928
929
930
931
932
933
934
  
  	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
  		/*
  		 * A ballooned page has been migrated already.
  		 * Now, it's the time to wrap-up counters,
  		 * handle the page back to Buddy and return.
  		 */
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
  				    page_is_file_cache(page));
  		balloon_page_free(page);
  		return MIGRATEPAGE_SUCCESS;
  	}
0dabec93d   Minchan Kim   mm: migration: cl...
935
  out:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
936
  	if (rc != -EAGAIN) {
0dabec93d   Minchan Kim   mm: migration: cl...
937
938
939
940
941
942
943
  		/*
  		 * A page that has been migrated has all references
  		 * removed and will be freed. A page that has not been
  		 * migrated will have kepts its references and be
  		 * restored.
  		 */
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
944
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
945
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
946
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
947
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
948
949
950
951
  	/*
  	 * Move the new page to the LRU. If migration was not successful
  	 * then this will free the page.
  	 */
894bc3104   Lee Schermerhorn   Unevictable LRU I...
952
  	putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
953
954
955
956
957
958
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
959
960
961
962
  	return rc;
  }
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
   * Counterpart of unmap_and_move_page() for hugepage migration.
   *
   * This function doesn't wait the completion of hugepage I/O
   * because there is no race between I/O and migration for hugepage.
   * Note that currently hugepage I/O occurs only in direct I/O
   * where no lock is held and PG_writeback is irrelevant,
   * and writeback status of all subpages are counted in the reference
   * count of the head page (i.e. if all subpages of a 2MB hugepage are
   * under direct I/O, the reference of the head page is 512 and a bit more.)
   * This means that when we try to migrate hugepage whose subpages are
   * doing direct I/O, some references remain after try_to_unmap() and
   * hugepage migration fails without data corruption.
   *
   * There is also no race when direct I/O is issued on the page under migration,
   * because then pte is replaced with migration swap entry and direct I/O code
   * will wait in the page fault for migration to complete.
   */
  static int unmap_and_move_huge_page(new_page_t get_new_page,
  				unsigned long private, struct page *hpage,
9c620e2bc   Hugh Dickins   mm: remove offlin...
982
  				int force, enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
983
984
985
  {
  	int rc = 0;
  	int *result = NULL;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
986
  	struct page *new_hpage;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
987
  	struct anon_vma *anon_vma = NULL;
83467efbd   Naoya Horiguchi   mm: migrate: chec...
988
989
990
991
992
993
994
  	/*
  	 * Movability of hugepages depends on architectures and hugepage size.
  	 * This check is necessary because some callers of hugepage migration
  	 * like soft offline and memory hotremove don't walk through page
  	 * tables or check whether the hugepage is pmd-based or not before
  	 * kicking migration.
  	 */
32665f2bb   Joonsoo Kim   mm/migrate: corre...
995
996
  	if (!hugepage_migration_support(page_hstate(hpage))) {
  		putback_active_hugepage(hpage);
83467efbd   Naoya Horiguchi   mm: migrate: chec...
997
  		return -ENOSYS;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
998
  	}
83467efbd   Naoya Horiguchi   mm: migrate: chec...
999

32665f2bb   Joonsoo Kim   mm/migrate: corre...
1000
  	new_hpage = get_new_page(hpage, private, &result);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1001
1002
1003
1004
1005
1006
  	if (!new_hpage)
  		return -ENOMEM;
  
  	rc = -EAGAIN;
  
  	if (!trylock_page(hpage)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
1007
  		if (!force || mode != MIGRATE_SYNC)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1008
1009
1010
  			goto out;
  		lock_page(hpage);
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
1011
1012
  	if (PageAnon(hpage))
  		anon_vma = page_get_anon_vma(hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1013
1014
1015
1016
  
  	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  
  	if (!page_mapped(hpage))
a6bc32b89   Mel Gorman   mm: compaction: i...
1017
  		rc = move_to_new_page(new_hpage, hpage, 1, mode);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1018
1019
1020
  
  	if (rc)
  		remove_migration_ptes(hpage, hpage);
fd4a4663d   Hugh Dickins   mm: fix hugepage ...
1021
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1022
  		put_anon_vma(anon_vma);
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1023
1024
1025
  
  	if (!rc)
  		hugetlb_cgroup_migrate(hpage, new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1026
  	unlock_page(hpage);
09761333e   Hillf Danton   mm/migrate.c: pai...
1027
  out:
b8ec1cee5   Naoya Horiguchi   mm: soft-offline:...
1028
1029
  	if (rc != -EAGAIN)
  		putback_active_hugepage(hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1030
  	put_page(new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(new_hpage);
  	}
  	return rc;
  }
  
  /*
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1041
1042
   * migrate_pages - migrate the pages specified in a list, to the free pages
   *		   supplied as the target for the page migration
b20a35035   Christoph Lameter   [PATCH] page migr...
1043
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1044
1045
1046
1047
1048
1049
1050
   * @from:		The list of pages to be migrated.
   * @get_new_page:	The function used to allocate free pages to be used
   *			as the target of the page migration.
   * @private:		Private data to be passed on to get_new_page()
   * @mode:		The migration mode that specifies the constraints for
   *			page migration, if any.
   * @reason:		The reason for page migration.
b20a35035   Christoph Lameter   [PATCH] page migr...
1051
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1052
1053
1054
   * The function returns after 10 attempts or if no pages are movable any more
   * because the list has become empty or no retryable pages exist any more.
   * The caller should call putback_lru_pages() to return pages to the LRU
28bd65781   Minchan Kim   mm: migration: cl...
1055
   * or free list only if ret != 0.
b20a35035   Christoph Lameter   [PATCH] page migr...
1056
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1057
   * Returns the number of pages that were not migrated, or an error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
1058
   */
9c620e2bc   Hugh Dickins   mm: remove offlin...
1059
1060
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
  		unsigned long private, enum migrate_mode mode, int reason)
b20a35035   Christoph Lameter   [PATCH] page migr...
1061
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1062
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
1063
  	int nr_failed = 0;
5647bc293   Mel Gorman   mm: compaction: M...
1064
  	int nr_succeeded = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1065
1066
1067
1068
1069
1070
1071
1072
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1073
1074
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1075

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1076
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1077
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1078

31caf665e   Naoya Horiguchi   mm: migrate: make...
1079
1080
1081
1082
1083
  			if (PageHuge(page))
  				rc = unmap_and_move_huge_page(get_new_page,
  						private, page, pass > 2, mode);
  			else
  				rc = unmap_and_move(get_new_page, private,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1084
  						page, pass > 2, mode);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1085

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1086
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
1087
1088
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1089
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1090
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1091
  				break;
78bd52097   Rafael Aquini   mm: adjust addres...
1092
  			case MIGRATEPAGE_SUCCESS:
5647bc293   Mel Gorman   mm: compaction: M...
1093
  				nr_succeeded++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1094
1095
  				break;
  			default:
354a33633   Naoya Horiguchi   mm/migrate: add c...
1096
1097
1098
1099
1100
1101
  				/*
  				 * Permanent failure (-EBUSY, -ENOSYS, etc.):
  				 * unlike -EAGAIN case, the failed page is
  				 * removed from migration page list and not
  				 * retried in the next outer loop.
  				 */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1102
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1103
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1104
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
1105
1106
  		}
  	}
78bd52097   Rafael Aquini   mm: adjust addres...
1107
  	rc = nr_failed + retry;
95a402c38   Christoph Lameter   [PATCH] page migr...
1108
  out:
5647bc293   Mel Gorman   mm: compaction: M...
1109
1110
1111
1112
  	if (nr_succeeded)
  		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
  	if (nr_failed)
  		count_vm_events(PGMIGRATE_FAIL, nr_failed);
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1113
  	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
b20a35035   Christoph Lameter   [PATCH] page migr...
1114
1115
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
78bd52097   Rafael Aquini   mm: adjust addres...
1116
  	return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
1117
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
1118

742755a1d   Christoph Lameter   [PATCH] page migr...
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1142
1143
1144
1145
1146
  	if (PageHuge(p))
  		return alloc_huge_page_node(page_hstate(compound_head(p)),
  					pm->node);
  	else
  		return alloc_pages_exact_node(pm->node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1147
  				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
1148
1149
1150
1151
1152
1153
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
1154
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
1155
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
1156
1157
1158
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
  
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
1169
1170
1171
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
1172
1173
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1174
  		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
1175
  			goto set_status;
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
1176
  		page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1177
1178
1179
1180
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1181
1182
1183
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
62b61f611   Hugh Dickins   ksm: memory hotre...
1184
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1185
  		if (PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1201
1202
1203
1204
  		if (PageHuge(page)) {
  			isolate_huge_page(page, &pagelist);
  			goto put_and_set;
  		}
62695a84e   Nick Piggin   vmscan: move isol...
1205
  		err = isolate_lru_page(page);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1206
  		if (!err) {
62695a84e   Nick Piggin   vmscan: move isol...
1207
  			list_add_tail(&page->lru, &pagelist);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1208
1209
1210
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
1221
  	err = 0;
cf608ac19   Minchan Kim   mm: compaction: f...
1222
  	if (!list_empty(&pagelist)) {
742755a1d   Christoph Lameter   [PATCH] page migr...
1223
  		err = migrate_pages(&pagelist, new_page_node,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1224
  				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
cf608ac19   Minchan Kim   mm: compaction: f...
1225
  		if (err)
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1226
  			putback_movable_pages(&pagelist);
cf608ac19   Minchan Kim   mm: compaction: f...
1227
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1228
1229
1230
1231
1232
1233
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
1234
1235
1236
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
3268c63ed   Christoph Lameter   mm: fix move/migr...
1237
  static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
5e9a0f023   Brice Goglin   mm: extract do_pa...
1238
1239
1240
1241
1242
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
1243
  	struct page_to_node *pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1244
1245
1246
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1247

3140a2273   Brice Goglin   mm: rework do_pag...
1248
1249
1250
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1251
  		goto out;
35282a2de   Brice Goglin   migration: only m...
1252
1253
  
  	migrate_prep();
5e9a0f023   Brice Goglin   mm: extract do_pa...
1254
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
1255
1256
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
1257
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
1258
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1259

3140a2273   Brice Goglin   mm: rework do_pag...
1260
1261
1262
1263
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1264

3140a2273   Brice Goglin   mm: rework do_pag...
1265
1266
1267
1268
1269
1270
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1271
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
1272
1273
1274
1275
1276
1277
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1278
1279
1280
  				goto out_pm;
  
  			err = -ENODEV;
6f5a55f1a   Linus Torvalds   Fix potential cra...
1281
1282
  			if (node < 0 || node >= MAX_NUMNODES)
  				goto out_pm;
389162c22   Lai Jiangshan   mm,migrate: use N...
1283
  			if (!node_state(node, N_MEMORY))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1284
1285
1286
1287
1288
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1300

5e9a0f023   Brice Goglin   mm: extract do_pa...
1301
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
1302
1303
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
1304
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
1305
1306
1307
1308
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1309
1310
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
1311
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
1312
1313
1314
1315
1316
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
1317
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
1318
   */
80bba1290   Brice Goglin   mm: no get_user/p...
1319
1320
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
1321
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1322
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1323

742755a1d   Christoph Lameter   [PATCH] page migr...
1324
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
1325
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
1326
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
1327
1328
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
1329
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1330
1331
  
  		vma = find_vma(mm, addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1332
  		if (!vma || addr < vma->vm_start)
742755a1d   Christoph Lameter   [PATCH] page migr...
1333
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1334
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1335
1336
1337
1338
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1339
1340
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1341
  		if (!page || PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1342
1343
1344
1345
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
80bba1290   Brice Goglin   mm: no get_user/p...
1366

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1367
1368
  	while (nr_pages) {
  		unsigned long chunk_nr;
80bba1290   Brice Goglin   mm: no get_user/p...
1369

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1370
1371
1372
1373
1374
1375
  		chunk_nr = nr_pages;
  		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
  			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
  		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
  			break;
80bba1290   Brice Goglin   mm: no get_user/p...
1376
1377
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1378
1379
  		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
  			break;
742755a1d   Christoph Lameter   [PATCH] page migr...
1380

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1381
1382
1383
1384
1385
  		pages += chunk_nr;
  		status += chunk_nr;
  		nr_pages -= chunk_nr;
  	}
  	return nr_pages ? -EFAULT : 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1386
1387
1388
1389
1390
1391
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
938bb9f5e   Heiko Carstens   [CVE-2009-0029] S...
1392
1393
1394
1395
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  		const void __user * __user *, pages,
  		const int __user *, nodes,
  		int __user *, status, int, flags)
742755a1d   Christoph Lameter   [PATCH] page migr...
1396
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1397
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1398
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1399
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1400
  	int err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1401
  	nodemask_t task_nodes;
742755a1d   Christoph Lameter   [PATCH] page migr...
1402
1403
1404
1405
1406
1407
1408
1409
1410
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
a879bf582   Greg Thelen   mm: grab rcu read...
1411
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1412
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1413
  	if (!task) {
a879bf582   Greg Thelen   mm: grab rcu read...
1414
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1415
1416
  		return -ESRCH;
  	}
3268c63ed   Christoph Lameter   mm: fix move/migr...
1417
  	get_task_struct(task);
742755a1d   Christoph Lameter   [PATCH] page migr...
1418
1419
1420
1421
1422
1423
1424
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1425
  	tcred = __task_cred(task);
b38a86eb1   Eric W. Biederman   userns: Convert t...
1426
1427
  	if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
  	    !uid_eq(cred->uid,  tcred->suid) && !uid_eq(cred->uid,  tcred->uid) &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1428
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1429
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1430
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1431
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1432
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1433
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1434

86c3a7645   David Quigley   [PATCH] SELinux: ...
1435
1436
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1437
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1438

3268c63ed   Christoph Lameter   mm: fix move/migr...
1439
1440
1441
  	task_nodes = cpuset_mems_allowed(task);
  	mm = get_task_mm(task);
  	put_task_struct(task);
6e8b09eaf   Sasha Levin   mm: fix NULL ptr ...
1442
1443
1444
1445
1446
1447
1448
1449
  	if (!mm)
  		return -EINVAL;
  
  	if (nodes)
  		err = do_pages_move(mm, task_nodes, nr_pages, pages,
  				    nodes, status, flags);
  	else
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1450

742755a1d   Christoph Lameter   [PATCH] page migr...
1451
1452
  	mmput(mm);
  	return err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1453
1454
1455
1456
  
  out:
  	put_task_struct(task);
  	return err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1457
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1458

7b2259b3e   Christoph Lameter   [PATCH] page migr...
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
  /*
   * Call migration functions in the vma_ops that may prepare
   * memory in a vm for migration. migration functions may perform
   * the migration for vmas that do not have an underlying page struct.
   */
  int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	const nodemask_t *from, unsigned long flags)
  {
   	struct vm_area_struct *vma;
   	int err = 0;
1001c9fb8   Daisuke Nishimura   migration: migrat...
1469
  	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
7b2259b3e   Christoph Lameter   [PATCH] page migr...
1470
1471
1472
1473
1474
1475
1476
1477
   		if (vma->vm_ops && vma->vm_ops->migrate) {
   			err = vma->vm_ops->migrate(vma, to, from, flags);
   			if (err)
   				break;
   		}
   	}
   	return err;
  }
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1478
1479
1480
1481
1482
1483
1484
  
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * Returns true if this is a safe migration target node for misplaced NUMA
   * pages. Currently it only checks the watermarks which crude
   */
  static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
3abef4e6c   Mel Gorman   mm: numa: take TH...
1485
  				   unsigned long nr_migrate_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1486
1487
1488
1489
1490
1491
1492
  {
  	int z;
  	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
  		struct zone *zone = pgdat->node_zones + z;
  
  		if (!populated_zone(zone))
  			continue;
6e543d578   Lisa Du   mm: vmscan: fix d...
1493
  		if (!zone_reclaimable(zone))
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
  			continue;
  
  		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
  		if (!zone_watermark_ok(zone, 0,
  				       high_wmark_pages(zone) +
  				       nr_migrate_pages,
  				       0, 0))
  			continue;
  		return true;
  	}
  	return false;
  }
  
  static struct page *alloc_misplaced_dst_page(struct page *page,
  					   unsigned long data,
  					   int **result)
  {
  	int nid = (int) data;
  	struct page *newpage;
  
  	newpage = alloc_pages_exact_node(nid,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1515
1516
1517
  					 (GFP_HIGHUSER_MOVABLE |
  					  __GFP_THISNODE | __GFP_NOMEMALLOC |
  					  __GFP_NORETRY | __GFP_NOWARN) &
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1518
  					 ~GFP_IOFS, 0);
bac0382c6   Hillf Danton   mm: numa: migrate...
1519

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1520
1521
1522
1523
  	return newpage;
  }
  
  /*
a8f607721   Mel Gorman   mm: numa: Rate li...
1524
1525
1526
   * page migration rate limiting control.
   * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
   * window of time. Default here says do not migrate more than 1280M per second.
e14808b49   Mel Gorman   mm: numa: Rate li...
1527
1528
1529
1530
   * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
   * as it is faults that reset the window, pte updates will happen unconditionally
   * if there has not been a fault since @pteupdate_interval_millisecs after the
   * throttle window closed.
a8f607721   Mel Gorman   mm: numa: Rate li...
1531
1532
   */
  static unsigned int migrate_interval_millisecs __read_mostly = 100;
e14808b49   Mel Gorman   mm: numa: Rate li...
1533
  static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
a8f607721   Mel Gorman   mm: numa: Rate li...
1534
  static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
e14808b49   Mel Gorman   mm: numa: Rate li...
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
  /* Returns true if NUMA migration is currently rate limited */
  bool migrate_ratelimited(int node)
  {
  	pg_data_t *pgdat = NODE_DATA(node);
  
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
  				msecs_to_jiffies(pteupdate_interval_millisecs)))
  		return false;
  
  	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
  		return false;
  
  	return true;
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1549
  /* Returns true if the node is migrate rate-limited after the update */
1c30e0177   Mel Gorman   mm: numa: make NU...
1550
1551
  static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
  					unsigned long nr_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1552
  {
a8f607721   Mel Gorman   mm: numa: Rate li...
1553
1554
1555
1556
1557
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
a8f607721   Mel Gorman   mm: numa: Rate li...
1558
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1559
  		spin_lock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1560
1561
1562
  		pgdat->numabalancing_migrate_nr_pages = 0;
  		pgdat->numabalancing_migrate_next_window = jiffies +
  			msecs_to_jiffies(migrate_interval_millisecs);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1563
  		spin_unlock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1564
  	}
af1839d72   Mel Gorman   mm: numa: trace t...
1565
1566
1567
  	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
  		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
  								nr_pages);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1568
  		return true;
af1839d72   Mel Gorman   mm: numa: trace t...
1569
  	}
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1570
1571
1572
1573
1574
1575
1576
1577
1578
  
  	/*
  	 * This is an unlocked non-atomic update so errors are possible.
  	 * The consequences are failing to migrate when we potentiall should
  	 * have which is not severe enough to warrant locking. If it is ever
  	 * a problem, it can be converted to a per-cpu counter.
  	 */
  	pgdat->numabalancing_migrate_nr_pages += nr_pages;
  	return false;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1579
  }
1c30e0177   Mel Gorman   mm: numa: make NU...
1580
  static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1581
  {
340ef3902   Hugh Dickins   mm: numa: cleanup...
1582
  	int page_lru;
a8f607721   Mel Gorman   mm: numa: Rate li...
1583

309381fea   Sasha Levin   mm: dump page whe...
1584
  	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
3abef4e6c   Mel Gorman   mm: numa: take TH...
1585

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1586
  	/* Avoid migrating to a node that is nearly full */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1587
1588
  	if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1589

340ef3902   Hugh Dickins   mm: numa: cleanup...
1590
1591
  	if (isolate_lru_page(page))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1592

340ef3902   Hugh Dickins   mm: numa: cleanup...
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
  	/*
  	 * migrate_misplaced_transhuge_page() skips page migration's usual
  	 * check on page_count(), so we must do it here, now that the page
  	 * has been isolated: a GUP pin, or any other pin, prevents migration.
  	 * The expected page count is 3: 1 for page's mapcount and 1 for the
  	 * caller's pin and 1 for the reference taken by isolate_lru_page().
  	 */
  	if (PageTransHuge(page) && page_count(page) != 3) {
  		putback_lru_page(page);
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1603
  	}
340ef3902   Hugh Dickins   mm: numa: cleanup...
1604
1605
1606
  	page_lru = page_is_file_cache(page);
  	mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
  				hpage_nr_pages(page));
149c33e1c   Mel Gorman   mm: migrate: Drop...
1607
  	/*
340ef3902   Hugh Dickins   mm: numa: cleanup...
1608
1609
1610
  	 * Isolating the page has taken another reference, so the
  	 * caller's reference can be safely dropped without the page
  	 * disappearing underneath us during migration.
149c33e1c   Mel Gorman   mm: migrate: Drop...
1611
1612
  	 */
  	put_page(page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1613
  	return 1;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1614
  }
de466bd62   Mel Gorman   mm: numa: avoid u...
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
  bool pmd_trans_migrating(pmd_t pmd)
  {
  	struct page *page = pmd_page(pmd);
  	return PageLocked(page);
  }
  
  void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
  {
  	struct page *page = pmd_page(*pmd);
  	wait_on_page_locked(page);
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1626
1627
1628
1629
1630
  /*
   * Attempt to migrate a misplaced page to the specified destination
   * node. Caller is expected to have an elevated reference count on
   * the page that will be dropped by this function before returning.
   */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1631
1632
  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
  			   int node)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1633
1634
  {
  	pg_data_t *pgdat = NODE_DATA(node);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1635
  	int isolated;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1636
1637
1638
1639
  	int nr_remaining;
  	LIST_HEAD(migratepages);
  
  	/*
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1640
1641
  	 * Don't migrate file pages that are mapped in multiple processes
  	 * with execute permissions as they are probably shared libraries.
b32967ff1   Mel Gorman   mm: numa: Add THP...
1642
  	 */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1643
1644
  	if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
  	    (vma->vm_flags & VM_EXEC))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1645
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1646
1647
1648
1649
1650
1651
  
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1652
  	if (numamigrate_update_ratelimit(pgdat, 1))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1653
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1654
1655
1656
1657
1658
1659
  
  	isolated = numamigrate_isolate_page(pgdat, page);
  	if (!isolated)
  		goto out;
  
  	list_add(&page->lru, &migratepages);
9c620e2bc   Hugh Dickins   mm: remove offlin...
1660
1661
  	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
  				     node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1662
  	if (nr_remaining) {
59c82b70d   Joonsoo Kim   mm/migrate: remov...
1663
1664
1665
1666
1667
1668
  		if (!list_empty(&migratepages)) {
  			list_del(&page->lru);
  			dec_zone_page_state(page, NR_ISOLATED_ANON +
  					page_is_file_cache(page));
  			putback_lru_page(page);
  		}
b32967ff1   Mel Gorman   mm: numa: Add THP...
1669
1670
1671
  		isolated = 0;
  	} else
  		count_vm_numa_event(NUMA_PAGE_MIGRATE);
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1672
  	BUG_ON(!list_empty(&migratepages));
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1673
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1674
1675
1676
1677
  
  out:
  	put_page(page);
  	return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1678
  }
220018d38   Mel Gorman   mm: numa: Add THP...
1679
  #endif /* CONFIG_NUMA_BALANCING */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1680

220018d38   Mel Gorman   mm: numa: Add THP...
1681
  #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
340ef3902   Hugh Dickins   mm: numa: cleanup...
1682
1683
1684
1685
  /*
   * Migrates a THP to a given target node. page must be locked and is unlocked
   * before returning.
   */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1686
1687
1688
1689
1690
1691
  int migrate_misplaced_transhuge_page(struct mm_struct *mm,
  				struct vm_area_struct *vma,
  				pmd_t *pmd, pmd_t entry,
  				unsigned long address,
  				struct page *page, int node)
  {
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1692
  	spinlock_t *ptl;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1693
1694
1695
1696
1697
  	pg_data_t *pgdat = NODE_DATA(node);
  	int isolated = 0;
  	struct page *new_page = NULL;
  	struct mem_cgroup *memcg = NULL;
  	int page_lru = page_is_file_cache(page);
f714f4f20   Mel Gorman   mm: numa: call MM...
1698
1699
  	unsigned long mmun_start = address & HPAGE_PMD_MASK;
  	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
2b4847e73   Mel Gorman   mm: numa: seriali...
1700
  	pmd_t orig_entry;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1701
1702
  
  	/*
b32967ff1   Mel Gorman   mm: numa: Add THP...
1703
1704
1705
1706
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
d28d43351   Mel Gorman   mm: migrate: Acco...
1707
  	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1708
1709
1710
  		goto out_dropref;
  
  	new_page = alloc_pages_node(node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1711
1712
  		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
  		HPAGE_PMD_ORDER);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1713
1714
  	if (!new_page)
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1715
  	isolated = numamigrate_isolate_page(pgdat, page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1716
  	if (!isolated) {
b32967ff1   Mel Gorman   mm: numa: Add THP...
1717
  		put_page(new_page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1718
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1719
  	}
b0943d61b   Mel Gorman   mm: numa: defer T...
1720
1721
  	if (mm_tlb_flush_pending(mm))
  		flush_tlb_range(vma, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
  	/* Prepare a page as a migration target */
  	__set_page_locked(new_page);
  	SetPageSwapBacked(new_page);
  
  	/* anon mapping, we can simply copy page->mapping to the new page: */
  	new_page->mapping = page->mapping;
  	new_page->index = page->index;
  	migrate_page_copy(new_page, page);
  	WARN_ON(PageLRU(new_page));
  
  	/* Recheck the target PMD */
f714f4f20   Mel Gorman   mm: numa: call MM...
1733
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1734
  	ptl = pmd_lock(mm, pmd);
2b4847e73   Mel Gorman   mm: numa: seriali...
1735
1736
  	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
  fail_putback:
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1737
  		spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1738
  		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
  
  		/* Reverse changes made by migrate_page_copy() */
  		if (TestClearPageActive(new_page))
  			SetPageActive(page);
  		if (TestClearPageUnevictable(new_page))
  			SetPageUnevictable(page);
  		mlock_migrate_page(page, new_page);
  
  		unlock_page(new_page);
  		put_page(new_page);		/* Free it */
a54a407fb   Mel Gorman   mm: Close races b...
1749
1750
  		/* Retake the callers reference and putback on LRU */
  		get_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1751
  		putback_lru_page(page);
a54a407fb   Mel Gorman   mm: Close races b...
1752
1753
  		mod_zone_page_state(page_zone(page),
  			 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
eb4489f69   Mel Gorman   mm: numa: avoid u...
1754
1755
  
  		goto out_unlock;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
  	}
  
  	/*
  	 * Traditional migration needs to prepare the memcg charge
  	 * transaction early to prevent the old page from being
  	 * uncharged when installing migration entries.  Here we can
  	 * save the potential rollback and start the charge transfer
  	 * only when migration is already known to end successfully.
  	 */
  	mem_cgroup_prepare_migration(page, new_page, &memcg);
2b4847e73   Mel Gorman   mm: numa: seriali...
1766
  	orig_entry = *pmd;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1767
  	entry = mk_pmd(new_page, vma->vm_page_prot);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1768
  	entry = pmd_mkhuge(entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1769
  	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1770

2b4847e73   Mel Gorman   mm: numa: seriali...
1771
1772
1773
1774
1775
1776
1777
  	/*
  	 * Clear the old entry under pagetable lock and establish the new PTE.
  	 * Any parallel GUP will either observe the old page blocking on the
  	 * page lock, block on the page table lock or observe the new page.
  	 * The SetPageUptodate on the new page and page_add_new_anon_rmap
  	 * guarantee the copy is visible before the pagetable update.
  	 */
f714f4f20   Mel Gorman   mm: numa: call MM...
1778
1779
1780
1781
1782
  	flush_cache_range(vma, mmun_start, mmun_end);
  	page_add_new_anon_rmap(new_page, vma, mmun_start);
  	pmdp_clear_flush(vma, mmun_start, pmd);
  	set_pmd_at(mm, mmun_start, pmd, entry);
  	flush_tlb_range(vma, mmun_start, mmun_end);
ce4a9cc57   Stephen Rothwell   mm,numa: fix upda...
1783
  	update_mmu_cache_pmd(vma, address, &entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1784
1785
  
  	if (page_count(page) != 2) {
f714f4f20   Mel Gorman   mm: numa: call MM...
1786
1787
  		set_pmd_at(mm, mmun_start, pmd, orig_entry);
  		flush_tlb_range(vma, mmun_start, mmun_end);
2b4847e73   Mel Gorman   mm: numa: seriali...
1788
1789
1790
1791
  		update_mmu_cache_pmd(vma, address, &entry);
  		page_remove_rmap(new_page);
  		goto fail_putback;
  	}
b32967ff1   Mel Gorman   mm: numa: Add THP...
1792
  	page_remove_rmap(page);
2b4847e73   Mel Gorman   mm: numa: seriali...
1793

b32967ff1   Mel Gorman   mm: numa: Add THP...
1794
1795
1796
1797
1798
1799
  	/*
  	 * Finish the charge transaction under the page table lock to
  	 * prevent split_huge_page() from dividing up the charge
  	 * before it's fully transferred to the new page.
  	 */
  	mem_cgroup_end_migration(memcg, page, new_page, true);
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1800
  	spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1801
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1802
1803
1804
1805
1806
1807
1808
1809
  
  	unlock_page(new_page);
  	unlock_page(page);
  	put_page(page);			/* Drop the rmap reference */
  	put_page(page);			/* Drop the LRU isolation reference */
  
  	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
  	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1810
1811
1812
1813
  	mod_zone_page_state(page_zone(page),
  			NR_ISOLATED_ANON + page_lru,
  			-HPAGE_PMD_NR);
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1814
1815
  out_fail:
  	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1816
  out_dropref:
2b4847e73   Mel Gorman   mm: numa: seriali...
1817
1818
1819
  	ptl = pmd_lock(mm, pmd);
  	if (pmd_same(*pmd, entry)) {
  		entry = pmd_mknonnuma(entry);
f714f4f20   Mel Gorman   mm: numa: call MM...
1820
  		set_pmd_at(mm, mmun_start, pmd, entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1821
1822
1823
  		update_mmu_cache_pmd(vma, address, &entry);
  	}
  	spin_unlock(ptl);
a54a407fb   Mel Gorman   mm: Close races b...
1824

eb4489f69   Mel Gorman   mm: numa: avoid u...
1825
  out_unlock:
340ef3902   Hugh Dickins   mm: numa: cleanup...
1826
  	unlock_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1827
  	put_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1828
1829
  	return 0;
  }
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1830
1831
1832
  #endif /* CONFIG_NUMA_BALANCING */
  
  #endif /* CONFIG_NUMA */