Blame view

mm/migrate.c 49.8 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
   */
  
  #include <linux/migrate.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
16
  #include <linux/export.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
17
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
  #include <linux/pagevec.h>
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
24
  #include <linux/ksm.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
25
26
27
28
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
29
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
30
31
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
32
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
34
  #include <linux/syscalls.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/hugetlb.h>
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
36
  #include <linux/hugetlb_cgroup.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
37
  #include <linux/gfp.h>
bf6bddf19   Rafael Aquini   mm: introduce com...
38
  #include <linux/balloon_compaction.h>
f714f4f20   Mel Gorman   mm: numa: call MM...
39
  #include <linux/mmu_notifier.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
40

0d1836c36   Michal Nazarewicz   mm/migrate.c: fix...
41
  #include <asm/tlbflush.h>
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/migrate.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
44
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
45
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
46
   * migrate_prep() needs to be called before we start compiling a list of pages
748446bb6   Mel Gorman   mm: compaction: m...
47
48
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
b20a35035   Christoph Lameter   [PATCH] page migr...
49
50
51
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
52
53
54
55
56
57
58
59
60
61
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
748446bb6   Mel Gorman   mm: compaction: m...
62
63
64
65
66
67
68
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
  int migrate_prep_local(void)
  {
  	lru_add_drain();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
69
  /*
5733c7d11   Rafael Aquini   mm: introduce put...
70
71
72
   * Put previously isolated pages back onto the appropriate lists
   * from where they were once taken off for compaction/migration.
   *
59c82b70d   Joonsoo Kim   mm/migrate: remov...
73
74
75
   * This function shall be used whenever the isolated pageset has been
   * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
   * and isolate_huge_page().
5733c7d11   Rafael Aquini   mm: introduce put...
76
77
78
79
80
81
82
   */
  void putback_movable_pages(struct list_head *l)
  {
  	struct page *page;
  	struct page *page2;
  
  	list_for_each_entry_safe(page, page2, l, lru) {
31caf665e   Naoya Horiguchi   mm: migrate: make...
83
84
85
86
  		if (unlikely(PageHuge(page))) {
  			putback_active_hugepage(page);
  			continue;
  		}
5733c7d11   Rafael Aquini   mm: introduce put...
87
88
89
  		list_del(&page->lru);
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
  				page_is_file_cache(page));
117aad1e9   Rafael Aquini   mm: avoid reinser...
90
  		if (unlikely(isolated_balloon_page(page)))
bf6bddf19   Rafael Aquini   mm: introduce com...
91
92
93
  			balloon_page_putback(page);
  		else
  			putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
94
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
95
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
96
97
98
  /*
   * Restore a potential migration pte to a working pte entry
   */
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
99
100
  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
  				 unsigned long addr, void *old)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
101
102
103
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
104
105
106
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
107
108
109
110
  	if (unlikely(PageHuge(new))) {
  		ptep = huge_pte_offset(mm, addr);
  		if (!ptep)
  			goto out;
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
111
  		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
112
  	} else {
6219049ae   Bob Liu   mm: introduce mm_...
113
114
  		pmd = mm_find_pmd(mm, addr);
  		if (!pmd)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
115
  			goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
116

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
117
  		ptep = pte_offset_map(pmd, addr);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
118

486cf46f3   Hugh Dickins   mm: fix race betw...
119
120
121
122
  		/*
  		 * Peek to check is_swap_pte() before taking ptlock?  No, we
  		 * can race mremap's move_ptes(), which skips anon_vma lock.
  		 */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
123
124
125
  
  		ptl = pte_lockptr(mm, pmd);
  	}
0697212a4   Christoph Lameter   [PATCH] Swapless ...
126

0697212a4   Christoph Lameter   [PATCH] Swapless ...
127
128
129
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
130
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
131
132
  
  	entry = pte_to_swp_entry(pte);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
133
134
135
  	if (!is_migration_entry(entry) ||
  	    migration_entry_to_page(entry) != old)
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
136

0697212a4   Christoph Lameter   [PATCH] Swapless ...
137
138
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
c3d16e165   Cyrill Gorcunov   mm: migration: do...
139
140
  	if (pte_swp_soft_dirty(*ptep))
  		pte = pte_mksoft_dirty(pte);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
141
142
  	if (is_write_migration_entry(entry))
  		pte = pte_mkwrite(pte);
3ef8fd7f7   Andi Kleen   Fix migration.c c...
143
  #ifdef CONFIG_HUGETLB_PAGE
be7517d6a   Tony Lu   mm/hugetlb: set P...
144
  	if (PageHuge(new)) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
145
  		pte = pte_mkhuge(pte);
be7517d6a   Tony Lu   mm/hugetlb: set P...
146
147
  		pte = arch_make_huge_pte(pte, vma, new, 0);
  	}
3ef8fd7f7   Andi Kleen   Fix migration.c c...
148
  #endif
c2cc499c5   Leonid Yegoshin   mm compaction: fi...
149
  	flush_dcache_page(new);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
150
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
151

290408d4a   Naoya Horiguchi   hugetlb: hugepage...
152
153
154
155
156
157
  	if (PageHuge(new)) {
  		if (PageAnon(new))
  			hugepage_add_anon_rmap(new, vma, addr);
  		else
  			page_dup_rmap(new);
  	} else if (PageAnon(new))
04e62a29b   Christoph Lameter   [PATCH] More page...
158
159
160
161
162
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
4b3073e1c   Russell King   MM: Pass a PTE po...
163
  	update_mmu_cache(vma, addr, ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
164
  unlock:
0697212a4   Christoph Lameter   [PATCH] Swapless ...
165
  	pte_unmap_unlock(ptep, ptl);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
166
167
  out:
  	return SWAP_AGAIN;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
168
169
170
  }
  
  /*
7e09e738a   Hugh Dickins   mm: fix swapops.h...
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
   * Congratulations to trinity for discovering this bug.
   * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
   * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
   * replace the specified range by file ptes throughout (maybe populated after).
   * If page migration finds a page within that range, while it's still located
   * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
   * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
   * But if the migrating page is in a part of the vma outside the range to be
   * remapped, then it will not be cleared, and remove_migration_ptes() needs to
   * deal with it.  Fortunately, this part of the vma is of course still linear,
   * so we just need to use linear location on the nonlinear list.
   */
  static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
  		struct address_space *mapping, void *arg)
  {
  	struct vm_area_struct *vma;
  	/* hugetlbfs does not support remap_pages, so no huge pgoff worries */
  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  	unsigned long addr;
  
  	list_for_each_entry(vma,
  		&mapping->i_mmap_nonlinear, shared.nonlinear) {
  
  		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  		if (addr >= vma->vm_start && addr < vma->vm_end)
  			remove_migration_pte(page, vma, addr, arg);
  	}
  	return SWAP_AGAIN;
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
202
203
204
205
206
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
207
208
209
  	struct rmap_walk_control rwc = {
  		.rmap_one = remove_migration_pte,
  		.arg = old,
7e09e738a   Hugh Dickins   mm: fix swapops.h...
210
  		.file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
211
212
213
  	};
  
  	rmap_walk(new, &rwc);
04e62a29b   Christoph Lameter   [PATCH] More page...
214
215
216
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
217
218
219
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
0697212a4   Christoph Lameter   [PATCH] Swapless ...
220
   */
30dad3092   Naoya Horiguchi   mm: migration: ad...
221
222
  static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
  				spinlock_t *ptl)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
223
  {
30dad3092   Naoya Horiguchi   mm: migration: ad...
224
  	pte_t pte;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
225
226
  	swp_entry_t entry;
  	struct page *page;
30dad3092   Naoya Horiguchi   mm: migration: ad...
227
  	spin_lock(ptl);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
228
229
230
231
232
233
234
235
236
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
237
238
239
240
241
242
243
244
245
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
246
247
248
249
250
251
252
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
30dad3092   Naoya Horiguchi   mm: migration: ad...
253
254
255
256
257
258
259
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	spinlock_t *ptl = pte_lockptr(mm, pmd);
  	pte_t *ptep = pte_offset_map(pmd, address);
  	__migration_entry_wait(mm, ptep, ptl);
  }
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
260
261
  void migration_entry_wait_huge(struct vm_area_struct *vma,
  		struct mm_struct *mm, pte_t *pte)
30dad3092   Naoya Horiguchi   mm: migration: ad...
262
  {
cb900f412   Kirill A. Shutemov   mm, hugetlb: conv...
263
  	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
30dad3092   Naoya Horiguchi   mm: migration: ad...
264
265
  	__migration_entry_wait(mm, pte, ptl);
  }
b969c4ab9   Mel Gorman   mm: compaction: d...
266
267
  #ifdef CONFIG_BLOCK
  /* Returns true if all buffers are successfully locked */
a6bc32b89   Mel Gorman   mm: compaction: i...
268
269
  static bool buffer_migrate_lock_buffers(struct buffer_head *head,
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
270
271
272
273
  {
  	struct buffer_head *bh = head;
  
  	/* Simple case, sync compaction */
a6bc32b89   Mel Gorman   mm: compaction: i...
274
  	if (mode != MIGRATE_ASYNC) {
b969c4ab9   Mel Gorman   mm: compaction: d...
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  		do {
  			get_bh(bh);
  			lock_buffer(bh);
  			bh = bh->b_this_page;
  
  		} while (bh != head);
  
  		return true;
  	}
  
  	/* async case, we cannot block on lock_buffer so use trylock_buffer */
  	do {
  		get_bh(bh);
  		if (!trylock_buffer(bh)) {
  			/*
  			 * We failed to lock the buffer and cannot stall in
  			 * async migration. Release the taken locks
  			 */
  			struct buffer_head *failed_bh = bh;
  			put_bh(failed_bh);
  			bh = head;
  			while (bh != failed_bh) {
  				unlock_buffer(bh);
  				put_bh(bh);
  				bh = bh->b_this_page;
  			}
  			return false;
  		}
  
  		bh = bh->b_this_page;
  	} while (bh != head);
  	return true;
  }
  #else
  static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
a6bc32b89   Mel Gorman   mm: compaction: i...
310
  							enum migrate_mode mode)
b969c4ab9   Mel Gorman   mm: compaction: d...
311
312
313
314
  {
  	return true;
  }
  #endif /* CONFIG_BLOCK */
b20a35035   Christoph Lameter   [PATCH] page migr...
315
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
316
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
317
318
319
320
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
266cf658e   David Howells   FS-Cache: Recruit...
321
   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
b20a35035   Christoph Lameter   [PATCH] page migr...
322
   */
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
323
  int migrate_page_move_mapping(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
324
  		struct page *newpage, struct page *page,
8e321fefb   Benjamin LaHaise   aio/migratepages:...
325
326
  		struct buffer_head *head, enum migrate_mode mode,
  		int extra_count)
b20a35035   Christoph Lameter   [PATCH] page migr...
327
  {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
328
  	int expected_count = 1 + extra_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
329
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
330

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
331
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
332
  		/* Anonymous page without mapping */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
333
  		if (page_count(page) != expected_count)
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
334
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
335
  		return MIGRATEPAGE_SUCCESS;
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
336
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
337
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
338

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
339
340
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
341

8e321fefb   Benjamin LaHaise   aio/migratepages:...
342
  	expected_count += 1 + page_has_private(page);
e286781d5   Nick Piggin   mm: speculative p...
343
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
344
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
345
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
346
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
347
  	}
e286781d5   Nick Piggin   mm: speculative p...
348
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
349
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
350
351
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
352
  	/*
b969c4ab9   Mel Gorman   mm: compaction: d...
353
354
355
356
357
358
  	 * In the async migration case of moving a page with buffers, lock the
  	 * buffers using trylock before the mapping is moved. If the mapping
  	 * was moved, we later failed to lock the buffers and could not move
  	 * the mapping back due to an elevated page count, we would have to
  	 * block waiting on other references to be dropped.
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
359
360
  	if (mode == MIGRATE_ASYNC && head &&
  			!buffer_migrate_lock_buffers(head, mode)) {
b969c4ab9   Mel Gorman   mm: compaction: d...
361
362
363
364
365
366
  		page_unfreeze_refs(page, expected_count);
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	/*
b20a35035   Christoph Lameter   [PATCH] page migr...
367
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
368
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
369
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
370
371
372
373
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
374
375
376
  	radix_tree_replace_slot(pslot, newpage);
  
  	/*
937a94c9d   Jacobo Giralt   mm: migrate: one ...
377
378
  	 * Drop cache reference from old page by unfreezing
  	 * to one less reference.
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
379
380
  	 * We know this isn't the last reference.
  	 */
937a94c9d   Jacobo Giralt   mm: migrate: one ...
381
  	page_unfreeze_refs(page, expected_count - 1);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
382

0e8c7d0fd   Christoph Lameter   page migration: f...
383
384
385
386
387
388
389
390
391
392
393
394
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
99a15e21d   Andrea Arcangeli   migrate: don't ac...
395
  	if (!PageSwapCache(page) && PageSwapBacked(page)) {
4b02108ac   KOSAKI Motohiro   mm: oom analysis:...
396
397
398
  		__dec_zone_page_state(page, NR_SHMEM);
  		__inc_zone_page_state(newpage, NR_SHMEM);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
399
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
400

78bd52097   Rafael Aquini   mm: adjust addres...
401
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
402
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
403
404
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
405
406
407
408
409
410
411
412
413
414
415
416
   * The expected number of remaining references is the same as that
   * of migrate_page_move_mapping().
   */
  int migrate_huge_page_move_mapping(struct address_space *mapping,
  				   struct page *newpage, struct page *page)
  {
  	int expected_count;
  	void **pslot;
  
  	if (!mapping) {
  		if (page_count(page) != 1)
  			return -EAGAIN;
78bd52097   Rafael Aquini   mm: adjust addres...
417
  		return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
418
419
420
421
422
423
424
425
426
  	}
  
  	spin_lock_irq(&mapping->tree_lock);
  
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
  
  	expected_count = 2 + page_has_private(page);
  	if (page_count(page) != expected_count ||
29c1f677d   Mel Gorman   mm: migration: us...
427
  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
428
429
430
431
432
433
434
435
436
437
438
439
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	if (!page_freeze_refs(page, expected_count)) {
  		spin_unlock_irq(&mapping->tree_lock);
  		return -EAGAIN;
  	}
  
  	get_page(newpage);
  
  	radix_tree_replace_slot(pslot, newpage);
937a94c9d   Jacobo Giralt   mm: migrate: one ...
440
  	page_unfreeze_refs(page, expected_count - 1);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
441
442
  
  	spin_unlock_irq(&mapping->tree_lock);
78bd52097   Rafael Aquini   mm: adjust addres...
443
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
444
445
446
  }
  
  /*
30b0a105d   Dave Hansen   mm: thp: give tra...
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
   * Gigantic pages are so large that we do not guarantee that page++ pointer
   * arithmetic will work across the entire page.  We need something more
   * specialized.
   */
  static void __copy_gigantic_page(struct page *dst, struct page *src,
  				int nr_pages)
  {
  	int i;
  	struct page *dst_base = dst;
  	struct page *src_base = src;
  
  	for (i = 0; i < nr_pages; ) {
  		cond_resched();
  		copy_highpage(dst, src);
  
  		i++;
  		dst = mem_map_next(dst, dst_base, i);
  		src = mem_map_next(src, src_base, i);
  	}
  }
  
  static void copy_huge_page(struct page *dst, struct page *src)
  {
  	int i;
  	int nr_pages;
  
  	if (PageHuge(src)) {
  		/* hugetlbfs page */
  		struct hstate *h = page_hstate(src);
  		nr_pages = pages_per_huge_page(h);
  
  		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
  			__copy_gigantic_page(dst, src, nr_pages);
  			return;
  		}
  	} else {
  		/* thp page */
  		BUG_ON(!PageTransHuge(src));
  		nr_pages = hpage_nr_pages(src);
  	}
  
  	for (i = 0; i < nr_pages; i++) {
  		cond_resched();
  		copy_highpage(dst + i, src + i);
  	}
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
495
496
   * Copy the page to its new location
   */
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
497
  void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
498
  {
7851a45cd   Rik van Riel   mm: numa: Copy cp...
499
  	int cpupid;
b32967ff1   Mel Gorman   mm: numa: Add THP...
500
  	if (PageHuge(page) || PageTransHuge(page))
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
501
502
503
  		copy_huge_page(newpage, page);
  	else
  		copy_highpage(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
504
505
506
507
508
509
510
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
511
  	if (TestClearPageActive(page)) {
309381fea   Sasha Levin   mm: dump page whe...
512
  		VM_BUG_ON_PAGE(PageUnevictable(page), page);
b20a35035   Christoph Lameter   [PATCH] page migr...
513
  		SetPageActive(newpage);
418b27ef5   Lee Schermerhorn   mm: remove unevic...
514
515
  	} else if (TestClearPageUnevictable(page))
  		SetPageUnevictable(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
516
517
518
519
520
521
522
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
523
524
525
526
527
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
25985edce   Lucas De Marchi   Fix common misspe...
528
  		 * Whereas only part of our page may be dirty.
3a902c5f6   Nick Piggin   mm: fix warning o...
529
  		 */
752dc185d   Hugh Dickins   mm: fix warning i...
530
531
532
533
  		if (PageSwapBacked(page))
  			SetPageDirty(newpage);
  		else
  			__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
534
   	}
7851a45cd   Rik van Riel   mm: numa: Copy cp...
535
536
537
538
539
540
  	/*
  	 * Copy NUMA information to the new page, to prevent over-eager
  	 * future migrations of this same page.
  	 */
  	cpupid = page_cpupid_xchg_last(page, -1);
  	page_cpupid_xchg_last(newpage, cpupid);
b291f0003   Nick Piggin   mlock: mlocked pa...
541
  	mlock_migrate_page(newpage, page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
542
  	ksm_migrate_page(newpage, page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
543
544
545
546
  	/*
  	 * Please do not reorder this without considering how mm/ksm.c's
  	 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
  	 */
b20a35035   Christoph Lameter   [PATCH] page migr...
547
  	ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
548
549
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
550
551
552
553
554
555
556
557
  
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
558

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
559
560
561
  /************************************************************
   *                    Migration functions
   ***********************************************************/
b20a35035   Christoph Lameter   [PATCH] page migr...
562
563
  /*
   * Common logic to directly migrate a single page suitable for
266cf658e   David Howells   FS-Cache: Recruit...
564
   * pages that do not use PagePrivate/PagePrivate2.
b20a35035   Christoph Lameter   [PATCH] page migr...
565
566
567
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
568
  int migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
569
570
  		struct page *newpage, struct page *page,
  		enum migrate_mode mode)
b20a35035   Christoph Lameter   [PATCH] page migr...
571
572
573
574
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
575
  	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
b20a35035   Christoph Lameter   [PATCH] page migr...
576

78bd52097   Rafael Aquini   mm: adjust addres...
577
  	if (rc != MIGRATEPAGE_SUCCESS)
b20a35035   Christoph Lameter   [PATCH] page migr...
578
579
580
  		return rc;
  
  	migrate_page_copy(newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
581
  	return MIGRATEPAGE_SUCCESS;
b20a35035   Christoph Lameter   [PATCH] page migr...
582
583
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
584
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
585
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
586
587
588
589
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
590
  int buffer_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
591
  		struct page *newpage, struct page *page, enum migrate_mode mode)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
592
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
593
594
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
595
  	if (!page_has_buffers(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
596
  		return migrate_page(mapping, newpage, page, mode);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
597
598
  
  	head = page_buffers(page);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
599
  	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
600

78bd52097   Rafael Aquini   mm: adjust addres...
601
  	if (rc != MIGRATEPAGE_SUCCESS)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
602
  		return rc;
b969c4ab9   Mel Gorman   mm: compaction: d...
603
604
605
606
607
  	/*
  	 * In the async case, migrate_page_move_mapping locked the buffers
  	 * with an IRQ-safe spinlock held. In the sync case, the buffers
  	 * need to be locked now
  	 */
a6bc32b89   Mel Gorman   mm: compaction: i...
608
609
  	if (mode != MIGRATE_ASYNC)
  		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
78bd52097   Rafael Aquini   mm: adjust addres...
635
  	return MIGRATEPAGE_SUCCESS;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
636
637
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
638
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
639

04e62a29b   Christoph Lameter   [PATCH] More page...
640
641
642
643
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
644
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
645
646
647
648
649
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
04e62a29b   Christoph Lameter   [PATCH] More page...
650
651
652
653
654
655
656
657
658
659
660
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
661
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
662
663
664
665
666
667
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
668
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
669
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
670

04e62a29b   Christoph Lameter   [PATCH] More page...
671
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
672

04e62a29b   Christoph Lameter   [PATCH] More page...
673
674
675
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
676
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
677
678
679
680
681
682
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
683
  	struct page *newpage, struct page *page, enum migrate_mode mode)
04e62a29b   Christoph Lameter   [PATCH] More page...
684
  {
b969c4ab9   Mel Gorman   mm: compaction: d...
685
  	if (PageDirty(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
686
687
  		/* Only writeback pages in full synchronous migration */
  		if (mode != MIGRATE_SYNC)
b969c4ab9   Mel Gorman   mm: compaction: d...
688
  			return -EBUSY;
04e62a29b   Christoph Lameter   [PATCH] More page...
689
  		return writeout(mapping, page);
b969c4ab9   Mel Gorman   mm: compaction: d...
690
  	}
8351a6e47   Christoph Lameter   [PATCH] page migr...
691
692
693
694
695
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
266cf658e   David Howells   FS-Cache: Recruit...
696
  	if (page_has_private(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
697
698
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
a6bc32b89   Mel Gorman   mm: compaction: i...
699
  	return migrate_page(mapping, newpage, page, mode);
8351a6e47   Christoph Lameter   [PATCH] page migr...
700
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
701
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
702
703
704
705
706
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
707
708
709
   *
   * Return value:
   *   < 0 - error code
78bd52097   Rafael Aquini   mm: adjust addres...
710
   *  MIGRATEPAGE_SUCCESS - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
711
   */
3fe2011ff   Mel Gorman   mm: migration: al...
712
  static int move_to_new_page(struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
713
  				int remap_swapcache, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
714
715
716
717
718
719
720
721
722
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
723
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
724
725
726
727
728
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
729
730
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
731
732
733
  
  	mapping = page_mapping(page);
  	if (!mapping)
a6bc32b89   Mel Gorman   mm: compaction: i...
734
  		rc = migrate_page(mapping, newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
735
  	else if (mapping->a_ops->migratepage)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
736
  		/*
b969c4ab9   Mel Gorman   mm: compaction: d...
737
738
739
740
  		 * Most pages have a mapping and most filesystems provide a
  		 * migratepage callback. Anonymous pages are part of swap
  		 * space which also has its own migratepage callback. This
  		 * is the most common path for page migration.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
741
  		 */
b969c4ab9   Mel Gorman   mm: compaction: d...
742
  		rc = mapping->a_ops->migratepage(mapping,
a6bc32b89   Mel Gorman   mm: compaction: i...
743
  						newpage, page, mode);
b969c4ab9   Mel Gorman   mm: compaction: d...
744
  	else
a6bc32b89   Mel Gorman   mm: compaction: i...
745
  		rc = fallback_migrate_page(mapping, newpage, page, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
746

78bd52097   Rafael Aquini   mm: adjust addres...
747
  	if (rc != MIGRATEPAGE_SUCCESS) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
748
  		newpage->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
749
750
751
  	} else {
  		if (remap_swapcache)
  			remove_migration_ptes(page, newpage);
35512ecae   Konstantin Khlebnikov   mm: postpone migr...
752
  		page->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
753
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
754
755
756
757
758
  
  	unlock_page(newpage);
  
  	return rc;
  }
0dabec93d   Minchan Kim   mm: migration: cl...
759
  static int __unmap_and_move(struct page *page, struct page *newpage,
9c620e2bc   Hugh Dickins   mm: remove offlin...
760
  				int force, enum migrate_mode mode)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
761
  {
0dabec93d   Minchan Kim   mm: migration: cl...
762
  	int rc = -EAGAIN;
3fe2011ff   Mel Gorman   mm: migration: al...
763
  	int remap_swapcache = 1;
56039efa1   KAMEZAWA Hiroyuki   memcg: fix ugly i...
764
  	struct mem_cgroup *mem;
3f6c82728   Mel Gorman   mm: migration: ta...
765
  	struct anon_vma *anon_vma = NULL;
95a402c38   Christoph Lameter   [PATCH] page migr...
766

529ae9aaa   Nick Piggin   mm: rename page t...
767
  	if (!trylock_page(page)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
768
  		if (!force || mode == MIGRATE_ASYNC)
0dabec93d   Minchan Kim   mm: migration: cl...
769
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
  
  		/*
  		 * It's not safe for direct compaction to call lock_page.
  		 * For example, during page readahead pages are added locked
  		 * to the LRU. Later, when the IO completes the pages are
  		 * marked uptodate and unlocked. However, the queueing
  		 * could be merging multiple pages for one bio (e.g.
  		 * mpage_readpages). If an allocation happens for the
  		 * second or third page, the process can end up locking
  		 * the same page twice and deadlocking. Rather than
  		 * trying to be clever about what pages can be locked,
  		 * avoid the use of lock_page for direct compaction
  		 * altogether.
  		 */
  		if (current->flags & PF_MEMALLOC)
0dabec93d   Minchan Kim   mm: migration: cl...
785
  			goto out;
3e7d34497   Mel Gorman   mm: vmscan: recla...
786

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
787
788
  		lock_page(page);
  	}
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
789
  	/* charge against new page */
0030f535a   Johannes Weiner   mm: memcg: fix co...
790
  	mem_cgroup_prepare_migration(page, newpage, &mem);
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
791

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
792
  	if (PageWriteback(page)) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
793
  		/*
fed5b64a9   Jianguo Wu   mm/migrate: fix c...
794
  		 * Only in the case of a full synchronous migration is it
a6bc32b89   Mel Gorman   mm: compaction: i...
795
796
797
  		 * necessary to wait for PageWriteback. In the async case,
  		 * the retry loop is too short and in the sync-light case,
  		 * the overhead of stalling is too much
11bc82d67   Andrea Arcangeli   mm: compaction: U...
798
  		 */
a6bc32b89   Mel Gorman   mm: compaction: i...
799
  		if (mode != MIGRATE_SYNC) {
11bc82d67   Andrea Arcangeli   mm: compaction: U...
800
801
802
803
  			rc = -EBUSY;
  			goto uncharge;
  		}
  		if (!force)
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
804
  			goto uncharge;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
805
806
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
807
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
808
809
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
1ce82b69e   Hugh Dickins   mm: fix migration...
810
  	 * This get_anon_vma() delays freeing anon_vma pointer until the end
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
811
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
812
813
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
814
  	 */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
815
  	if (PageAnon(page) && !PageKsm(page)) {
1ce82b69e   Hugh Dickins   mm: fix migration...
816
  		/*
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
817
  		 * Only page_lock_anon_vma_read() understands the subtleties of
1ce82b69e   Hugh Dickins   mm: fix migration...
818
819
  		 * getting a hold on an anon_vma from outside one of its mms.
  		 */
746b18d42   Peter Zijlstra   mm: use refcounts...
820
  		anon_vma = page_get_anon_vma(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
821
822
  		if (anon_vma) {
  			/*
746b18d42   Peter Zijlstra   mm: use refcounts...
823
  			 * Anon page
1ce82b69e   Hugh Dickins   mm: fix migration...
824
  			 */
1ce82b69e   Hugh Dickins   mm: fix migration...
825
  		} else if (PageSwapCache(page)) {
3fe2011ff   Mel Gorman   mm: migration: al...
826
827
828
829
830
831
832
833
834
835
836
837
838
839
  			/*
  			 * We cannot be sure that the anon_vma of an unmapped
  			 * swapcache page is safe to use because we don't
  			 * know in advance if the VMA that this page belonged
  			 * to still exists. If the VMA and others sharing the
  			 * data have been freed, then the anon_vma could
  			 * already be invalid.
  			 *
  			 * To avoid this possibility, swapcache pages get
  			 * migrated but are not remapped when migration
  			 * completes
  			 */
  			remap_swapcache = 0;
  		} else {
1ce82b69e   Hugh Dickins   mm: fix migration...
840
  			goto uncharge;
3fe2011ff   Mel Gorman   mm: migration: al...
841
  		}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
842
  	}
62e1c5530   Shaohua Li   page migraton: ha...
843

bf6bddf19   Rafael Aquini   mm: introduce com...
844
845
846
847
848
849
850
851
852
853
854
  	if (unlikely(balloon_page_movable(page))) {
  		/*
  		 * A ballooned page does not need any special attention from
  		 * physical to virtual reverse mapping procedures.
  		 * Skip any attempt to unmap PTEs or to remap swap cache,
  		 * in order to avoid burning cycles at rmap level, and perform
  		 * the page migration right away (proteced by page lock).
  		 */
  		rc = balloon_page_migrate(newpage, page, mode);
  		goto uncharge;
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
855
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
856
857
858
859
860
861
862
863
864
865
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
866
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
867
  	if (!page->mapping) {
309381fea   Sasha Levin   mm: dump page whe...
868
  		VM_BUG_ON_PAGE(PageAnon(page), page);
1ce82b69e   Hugh Dickins   mm: fix migration...
869
  		if (page_has_private(page)) {
62e1c5530   Shaohua Li   page migraton: ha...
870
  			try_to_free_buffers(page);
1ce82b69e   Hugh Dickins   mm: fix migration...
871
  			goto uncharge;
62e1c5530   Shaohua Li   page migraton: ha...
872
  		}
abfc34881   Shaohua Li   memory hotplug: m...
873
  		goto skip_unmap;
62e1c5530   Shaohua Li   page migraton: ha...
874
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
875
  	/* Establish migration ptes or remove ptes */
14fa31b89   Andi Kleen   HWPOISON: Use bit...
876
  	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
877

abfc34881   Shaohua Li   memory hotplug: m...
878
  skip_unmap:
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
879
  	if (!page_mapped(page))
a6bc32b89   Mel Gorman   mm: compaction: i...
880
  		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
881

3fe2011ff   Mel Gorman   mm: migration: al...
882
  	if (rc && remap_swapcache)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
883
  		remove_migration_ptes(page, page);
3f6c82728   Mel Gorman   mm: migration: ta...
884
885
  
  	/* Drop an anon_vma reference if we took one */
76545066c   Rik van Riel   mm: extend KSM re...
886
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
887
  		put_anon_vma(anon_vma);
3f6c82728   Mel Gorman   mm: migration: ta...
888

01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
889
  uncharge:
bf6bddf19   Rafael Aquini   mm: introduce com...
890
891
892
  	mem_cgroup_end_migration(mem, page, newpage,
  				 (rc == MIGRATEPAGE_SUCCESS ||
  				  rc == MIGRATEPAGE_BALLOON_SUCCESS));
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
893
  	unlock_page(page);
0dabec93d   Minchan Kim   mm: migration: cl...
894
895
896
  out:
  	return rc;
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
897

0dabec93d   Minchan Kim   mm: migration: cl...
898
899
900
901
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
68711a746   David Rientjes   mm, migration: ad...
902
903
904
  static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
  			unsigned long private, struct page *page, int force,
  			enum migrate_mode mode)
0dabec93d   Minchan Kim   mm: migration: cl...
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
  {
  	int rc = 0;
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
  
  	if (!newpage)
  		return -ENOMEM;
  
  	if (page_count(page) == 1) {
  		/* page was freed from under us. So we are done. */
  		goto out;
  	}
  
  	if (unlikely(PageTransHuge(page)))
  		if (unlikely(split_huge_page(page)))
  			goto out;
9c620e2bc   Hugh Dickins   mm: remove offlin...
921
  	rc = __unmap_and_move(page, newpage, force, mode);
bf6bddf19   Rafael Aquini   mm: introduce com...
922
923
924
925
926
927
928
929
930
931
932
933
  
  	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
  		/*
  		 * A ballooned page has been migrated already.
  		 * Now, it's the time to wrap-up counters,
  		 * handle the page back to Buddy and return.
  		 */
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
  				    page_is_file_cache(page));
  		balloon_page_free(page);
  		return MIGRATEPAGE_SUCCESS;
  	}
0dabec93d   Minchan Kim   mm: migration: cl...
934
  out:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
935
  	if (rc != -EAGAIN) {
0dabec93d   Minchan Kim   mm: migration: cl...
936
937
938
939
940
941
942
  		/*
  		 * A page that has been migrated has all references
  		 * removed and will be freed. A page that has not been
  		 * migrated will have kepts its references and be
  		 * restored.
  		 */
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
943
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
944
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
945
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
946
  	}
68711a746   David Rientjes   mm, migration: ad...
947

95a402c38   Christoph Lameter   [PATCH] page migr...
948
  	/*
68711a746   David Rientjes   mm, migration: ad...
949
950
951
  	 * If migration was not successful and there's a freeing callback, use
  	 * it.  Otherwise, putback_lru_page() will drop the reference grabbed
  	 * during isolation.
95a402c38   Christoph Lameter   [PATCH] page migr...
952
  	 */
68711a746   David Rientjes   mm, migration: ad...
953
954
955
956
  	if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
  		put_new_page(newpage, private);
  	else
  		putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
957
958
959
960
961
962
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
963
964
965
966
  	return rc;
  }
  
  /*
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
   * Counterpart of unmap_and_move_page() for hugepage migration.
   *
   * This function doesn't wait the completion of hugepage I/O
   * because there is no race between I/O and migration for hugepage.
   * Note that currently hugepage I/O occurs only in direct I/O
   * where no lock is held and PG_writeback is irrelevant,
   * and writeback status of all subpages are counted in the reference
   * count of the head page (i.e. if all subpages of a 2MB hugepage are
   * under direct I/O, the reference of the head page is 512 and a bit more.)
   * This means that when we try to migrate hugepage whose subpages are
   * doing direct I/O, some references remain after try_to_unmap() and
   * hugepage migration fails without data corruption.
   *
   * There is also no race when direct I/O is issued on the page under migration,
   * because then pte is replaced with migration swap entry and direct I/O code
   * will wait in the page fault for migration to complete.
   */
  static int unmap_and_move_huge_page(new_page_t get_new_page,
68711a746   David Rientjes   mm, migration: ad...
985
986
987
  				free_page_t put_new_page, unsigned long private,
  				struct page *hpage, int force,
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
988
989
990
  {
  	int rc = 0;
  	int *result = NULL;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
991
  	struct page *new_hpage;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
992
  	struct anon_vma *anon_vma = NULL;
83467efbd   Naoya Horiguchi   mm: migrate: chec...
993
994
995
996
997
998
999
  	/*
  	 * Movability of hugepages depends on architectures and hugepage size.
  	 * This check is necessary because some callers of hugepage migration
  	 * like soft offline and memory hotremove don't walk through page
  	 * tables or check whether the hugepage is pmd-based or not before
  	 * kicking migration.
  	 */
100873d7a   Naoya Horiguchi   hugetlb: rename h...
1000
  	if (!hugepage_migration_supported(page_hstate(hpage))) {
32665f2bb   Joonsoo Kim   mm/migrate: corre...
1001
  		putback_active_hugepage(hpage);
83467efbd   Naoya Horiguchi   mm: migrate: chec...
1002
  		return -ENOSYS;
32665f2bb   Joonsoo Kim   mm/migrate: corre...
1003
  	}
83467efbd   Naoya Horiguchi   mm: migrate: chec...
1004

32665f2bb   Joonsoo Kim   mm/migrate: corre...
1005
  	new_hpage = get_new_page(hpage, private, &result);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1006
1007
1008
1009
1010
1011
  	if (!new_hpage)
  		return -ENOMEM;
  
  	rc = -EAGAIN;
  
  	if (!trylock_page(hpage)) {
a6bc32b89   Mel Gorman   mm: compaction: i...
1012
  		if (!force || mode != MIGRATE_SYNC)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1013
1014
1015
  			goto out;
  		lock_page(hpage);
  	}
746b18d42   Peter Zijlstra   mm: use refcounts...
1016
1017
  	if (PageAnon(hpage))
  		anon_vma = page_get_anon_vma(hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1018
1019
1020
1021
  
  	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  
  	if (!page_mapped(hpage))
a6bc32b89   Mel Gorman   mm: compaction: i...
1022
  		rc = move_to_new_page(new_hpage, hpage, 1, mode);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1023

68711a746   David Rientjes   mm, migration: ad...
1024
  	if (rc != MIGRATEPAGE_SUCCESS)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1025
  		remove_migration_ptes(hpage, hpage);
fd4a4663d   Hugh Dickins   mm: fix hugepage ...
1026
  	if (anon_vma)
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1027
  		put_anon_vma(anon_vma);
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1028

68711a746   David Rientjes   mm, migration: ad...
1029
  	if (rc == MIGRATEPAGE_SUCCESS)
8e6ac7fab   Aneesh Kumar K.V   hugetlb/cgroup: m...
1030
  		hugetlb_cgroup_migrate(hpage, new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1031
  	unlock_page(hpage);
09761333e   Hillf Danton   mm/migrate.c: pai...
1032
  out:
b8ec1cee5   Naoya Horiguchi   mm: soft-offline:...
1033
1034
  	if (rc != -EAGAIN)
  		putback_active_hugepage(hpage);
68711a746   David Rientjes   mm, migration: ad...
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
  
  	/*
  	 * If migration was not successful and there's a freeing callback, use
  	 * it.  Otherwise, put_page() will drop the reference grabbed during
  	 * isolation.
  	 */
  	if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
  		put_new_page(new_hpage, private);
  	else
  		put_page(new_hpage);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(new_hpage);
  	}
  	return rc;
  }
  
  /*
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1055
1056
   * migrate_pages - migrate the pages specified in a list, to the free pages
   *		   supplied as the target for the page migration
b20a35035   Christoph Lameter   [PATCH] page migr...
1057
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1058
1059
1060
   * @from:		The list of pages to be migrated.
   * @get_new_page:	The function used to allocate free pages to be used
   *			as the target of the page migration.
68711a746   David Rientjes   mm, migration: ad...
1061
1062
   * @put_new_page:	The function used to free target pages if migration
   *			fails, or NULL if no special handling is necessary.
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1063
1064
1065
1066
   * @private:		Private data to be passed on to get_new_page()
   * @mode:		The migration mode that specifies the constraints for
   *			page migration, if any.
   * @reason:		The reason for page migration.
b20a35035   Christoph Lameter   [PATCH] page migr...
1067
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1068
1069
1070
   * The function returns after 10 attempts or if no pages are movable any more
   * because the list has become empty or no retryable pages exist any more.
   * The caller should call putback_lru_pages() to return pages to the LRU
28bd65781   Minchan Kim   mm: migration: cl...
1071
   * or free list only if ret != 0.
b20a35035   Christoph Lameter   [PATCH] page migr...
1072
   *
c73e5c9c5   Srivatsa S. Bhat   mm: rewrite the c...
1073
   * Returns the number of pages that were not migrated, or an error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
1074
   */
9c620e2bc   Hugh Dickins   mm: remove offlin...
1075
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
68711a746   David Rientjes   mm, migration: ad...
1076
1077
  		free_page_t put_new_page, unsigned long private,
  		enum migrate_mode mode, int reason)
b20a35035   Christoph Lameter   [PATCH] page migr...
1078
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1079
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
1080
  	int nr_failed = 0;
5647bc293   Mel Gorman   mm: compaction: M...
1081
  	int nr_succeeded = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1082
1083
1084
1085
1086
1087
1088
1089
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1090
1091
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
1092

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1093
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1094
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1095

31caf665e   Naoya Horiguchi   mm: migrate: make...
1096
1097
  			if (PageHuge(page))
  				rc = unmap_and_move_huge_page(get_new_page,
68711a746   David Rientjes   mm, migration: ad...
1098
1099
  						put_new_page, private, page,
  						pass > 2, mode);
31caf665e   Naoya Horiguchi   mm: migrate: make...
1100
  			else
68711a746   David Rientjes   mm, migration: ad...
1101
1102
  				rc = unmap_and_move(get_new_page, put_new_page,
  						private, page, pass > 2, mode);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1103

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1104
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
1105
1106
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1107
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1108
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1109
  				break;
78bd52097   Rafael Aquini   mm: adjust addres...
1110
  			case MIGRATEPAGE_SUCCESS:
5647bc293   Mel Gorman   mm: compaction: M...
1111
  				nr_succeeded++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1112
1113
  				break;
  			default:
354a33633   Naoya Horiguchi   mm/migrate: add c...
1114
1115
1116
1117
1118
1119
  				/*
  				 * Permanent failure (-EBUSY, -ENOSYS, etc.):
  				 * unlike -EAGAIN case, the failed page is
  				 * removed from migration page list and not
  				 * retried in the next outer loop.
  				 */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1120
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
1121
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
1122
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
1123
1124
  		}
  	}
78bd52097   Rafael Aquini   mm: adjust addres...
1125
  	rc = nr_failed + retry;
95a402c38   Christoph Lameter   [PATCH] page migr...
1126
  out:
5647bc293   Mel Gorman   mm: compaction: M...
1127
1128
1129
1130
  	if (nr_succeeded)
  		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
  	if (nr_failed)
  		count_vm_events(PGMIGRATE_FAIL, nr_failed);
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1131
  	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
b20a35035   Christoph Lameter   [PATCH] page migr...
1132
1133
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
78bd52097   Rafael Aquini   mm: adjust addres...
1134
  	return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
1135
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
1136

742755a1d   Christoph Lameter   [PATCH] page migr...
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1160
1161
1162
1163
1164
  	if (PageHuge(p))
  		return alloc_huge_page_node(page_hstate(compound_head(p)),
  					pm->node);
  	else
  		return alloc_pages_exact_node(pm->node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1165
  				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
1166
1167
1168
1169
1170
1171
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
1172
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
1173
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
1174
1175
1176
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
  
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
1187
1188
1189
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
1190
1191
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1192
  		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
1193
  			goto set_status;
500d65d47   Andrea Arcangeli   thp: pmd_trans_hu...
1194
  		page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1195
1196
1197
1198
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1199
1200
1201
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
62b61f611   Hugh Dickins   ksm: memory hotre...
1202
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1203
  		if (PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1219
1220
1221
1222
  		if (PageHuge(page)) {
  			isolate_huge_page(page, &pagelist);
  			goto put_and_set;
  		}
62695a84e   Nick Piggin   vmscan: move isol...
1223
  		err = isolate_lru_page(page);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1224
  		if (!err) {
62695a84e   Nick Piggin   vmscan: move isol...
1225
  			list_add_tail(&page->lru, &pagelist);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1226
1227
1228
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
1239
  	err = 0;
cf608ac19   Minchan Kim   mm: compaction: f...
1240
  	if (!list_empty(&pagelist)) {
68711a746   David Rientjes   mm, migration: ad...
1241
  		err = migrate_pages(&pagelist, new_page_node, NULL,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1242
  				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
cf608ac19   Minchan Kim   mm: compaction: f...
1243
  		if (err)
e632a938d   Naoya Horiguchi   mm: migrate: add ...
1244
  			putback_movable_pages(&pagelist);
cf608ac19   Minchan Kim   mm: compaction: f...
1245
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1246
1247
1248
1249
1250
1251
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
1252
1253
1254
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
3268c63ed   Christoph Lameter   mm: fix move/migr...
1255
  static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
5e9a0f023   Brice Goglin   mm: extract do_pa...
1256
1257
1258
1259
1260
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
1261
  	struct page_to_node *pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1262
1263
1264
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1265

3140a2273   Brice Goglin   mm: rework do_pag...
1266
1267
1268
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1269
  		goto out;
35282a2de   Brice Goglin   migration: only m...
1270
1271
  
  	migrate_prep();
5e9a0f023   Brice Goglin   mm: extract do_pa...
1272
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
1273
1274
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
1275
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
1276
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1277

3140a2273   Brice Goglin   mm: rework do_pag...
1278
1279
1280
1281
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1282

3140a2273   Brice Goglin   mm: rework do_pag...
1283
1284
1285
1286
1287
1288
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1289
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
1290
1291
1292
1293
1294
1295
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1296
1297
1298
  				goto out_pm;
  
  			err = -ENODEV;
6f5a55f1a   Linus Torvalds   Fix potential cra...
1299
1300
  			if (node < 0 || node >= MAX_NUMNODES)
  				goto out_pm;
389162c22   Lai Jiangshan   mm,migrate: use N...
1301
  			if (!node_state(node, N_MEMORY))
5e9a0f023   Brice Goglin   mm: extract do_pa...
1302
1303
1304
1305
1306
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1318

5e9a0f023   Brice Goglin   mm: extract do_pa...
1319
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
1320
1321
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
1322
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
1323
1324
1325
1326
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1327
1328
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
1329
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
1330
1331
1332
1333
1334
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
1335
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
1336
   */
80bba1290   Brice Goglin   mm: no get_user/p...
1337
1338
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
1339
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1340
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1341

742755a1d   Christoph Lameter   [PATCH] page migr...
1342
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
1343
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
1344
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
1345
1346
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
1347
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1348
1349
  
  		vma = find_vma(mm, addr);
70384dc6d   Gleb Natapov   mm: fix error rep...
1350
  		if (!vma || addr < vma->vm_start)
742755a1d   Christoph Lameter   [PATCH] page migr...
1351
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
1352
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
1353
1354
1355
1356
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
1357
1358
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
b79bc0a0c   Hugh Dickins   ksm: enable KSM p...
1359
  		if (!page || PageReserved(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
1360
1361
1362
1363
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
80bba1290   Brice Goglin   mm: no get_user/p...
1384

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1385
1386
  	while (nr_pages) {
  		unsigned long chunk_nr;
80bba1290   Brice Goglin   mm: no get_user/p...
1387

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1388
1389
1390
1391
1392
1393
  		chunk_nr = nr_pages;
  		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
  			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
  		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
  			break;
80bba1290   Brice Goglin   mm: no get_user/p...
1394
1395
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1396
1397
  		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
  			break;
742755a1d   Christoph Lameter   [PATCH] page migr...
1398

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1399
1400
1401
1402
1403
  		pages += chunk_nr;
  		status += chunk_nr;
  		nr_pages -= chunk_nr;
  	}
  	return nr_pages ? -EFAULT : 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1404
1405
1406
1407
1408
1409
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
938bb9f5e   Heiko Carstens   [CVE-2009-0029] S...
1410
1411
1412
1413
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  		const void __user * __user *, pages,
  		const int __user *, nodes,
  		int __user *, status, int, flags)
742755a1d   Christoph Lameter   [PATCH] page migr...
1414
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1415
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1416
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1417
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1418
  	int err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1419
  	nodemask_t task_nodes;
742755a1d   Christoph Lameter   [PATCH] page migr...
1420
1421
1422
1423
1424
1425
1426
1427
1428
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
a879bf582   Greg Thelen   mm: grab rcu read...
1429
  	rcu_read_lock();
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1430
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1431
  	if (!task) {
a879bf582   Greg Thelen   mm: grab rcu read...
1432
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1433
1434
  		return -ESRCH;
  	}
3268c63ed   Christoph Lameter   mm: fix move/migr...
1435
  	get_task_struct(task);
742755a1d   Christoph Lameter   [PATCH] page migr...
1436
1437
1438
1439
1440
1441
1442
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1443
  	tcred = __task_cred(task);
b38a86eb1   Eric W. Biederman   userns: Convert t...
1444
1445
  	if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
  	    !uid_eq(cred->uid,  tcred->suid) && !uid_eq(cred->uid,  tcred->uid) &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1446
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1447
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1448
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1449
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1450
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1451
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1452

86c3a7645   David Quigley   [PATCH] SELinux: ...
1453
1454
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1455
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1456

3268c63ed   Christoph Lameter   mm: fix move/migr...
1457
1458
1459
  	task_nodes = cpuset_mems_allowed(task);
  	mm = get_task_mm(task);
  	put_task_struct(task);
6e8b09eaf   Sasha Levin   mm: fix NULL ptr ...
1460
1461
1462
1463
1464
1465
1466
1467
  	if (!mm)
  		return -EINVAL;
  
  	if (nodes)
  		err = do_pages_move(mm, task_nodes, nr_pages, pages,
  				    nodes, status, flags);
  	else
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1468

742755a1d   Christoph Lameter   [PATCH] page migr...
1469
1470
  	mmput(mm);
  	return err;
3268c63ed   Christoph Lameter   mm: fix move/migr...
1471
1472
1473
1474
  
  out:
  	put_task_struct(task);
  	return err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1475
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1476

7b2259b3e   Christoph Lameter   [PATCH] page migr...
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
  /*
   * Call migration functions in the vma_ops that may prepare
   * memory in a vm for migration. migration functions may perform
   * the migration for vmas that do not have an underlying page struct.
   */
  int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	const nodemask_t *from, unsigned long flags)
  {
   	struct vm_area_struct *vma;
   	int err = 0;
1001c9fb8   Daisuke Nishimura   migration: migrat...
1487
  	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
7b2259b3e   Christoph Lameter   [PATCH] page migr...
1488
1489
1490
1491
1492
1493
1494
1495
   		if (vma->vm_ops && vma->vm_ops->migrate) {
   			err = vma->vm_ops->migrate(vma, to, from, flags);
   			if (err)
   				break;
   		}
   	}
   	return err;
  }
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1496
1497
1498
1499
1500
1501
1502
  
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * Returns true if this is a safe migration target node for misplaced NUMA
   * pages. Currently it only checks the watermarks which crude
   */
  static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
3abef4e6c   Mel Gorman   mm: numa: take TH...
1503
  				   unsigned long nr_migrate_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1504
1505
1506
1507
1508
1509
1510
  {
  	int z;
  	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
  		struct zone *zone = pgdat->node_zones + z;
  
  		if (!populated_zone(zone))
  			continue;
6e543d578   Lisa Du   mm: vmscan: fix d...
1511
  		if (!zone_reclaimable(zone))
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
  			continue;
  
  		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
  		if (!zone_watermark_ok(zone, 0,
  				       high_wmark_pages(zone) +
  				       nr_migrate_pages,
  				       0, 0))
  			continue;
  		return true;
  	}
  	return false;
  }
  
  static struct page *alloc_misplaced_dst_page(struct page *page,
  					   unsigned long data,
  					   int **result)
  {
  	int nid = (int) data;
  	struct page *newpage;
  
  	newpage = alloc_pages_exact_node(nid,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1533
1534
1535
  					 (GFP_HIGHUSER_MOVABLE |
  					  __GFP_THISNODE | __GFP_NOMEMALLOC |
  					  __GFP_NORETRY | __GFP_NOWARN) &
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1536
  					 ~GFP_IOFS, 0);
bac0382c6   Hillf Danton   mm: numa: migrate...
1537

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1538
1539
1540
1541
  	return newpage;
  }
  
  /*
a8f607721   Mel Gorman   mm: numa: Rate li...
1542
1543
1544
   * page migration rate limiting control.
   * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
   * window of time. Default here says do not migrate more than 1280M per second.
e14808b49   Mel Gorman   mm: numa: Rate li...
1545
1546
1547
1548
   * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
   * as it is faults that reset the window, pte updates will happen unconditionally
   * if there has not been a fault since @pteupdate_interval_millisecs after the
   * throttle window closed.
a8f607721   Mel Gorman   mm: numa: Rate li...
1549
1550
   */
  static unsigned int migrate_interval_millisecs __read_mostly = 100;
e14808b49   Mel Gorman   mm: numa: Rate li...
1551
  static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
a8f607721   Mel Gorman   mm: numa: Rate li...
1552
  static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
e14808b49   Mel Gorman   mm: numa: Rate li...
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
  /* Returns true if NUMA migration is currently rate limited */
  bool migrate_ratelimited(int node)
  {
  	pg_data_t *pgdat = NODE_DATA(node);
  
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
  				msecs_to_jiffies(pteupdate_interval_millisecs)))
  		return false;
  
  	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
  		return false;
  
  	return true;
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1567
  /* Returns true if the node is migrate rate-limited after the update */
1c30e0177   Mel Gorman   mm: numa: make NU...
1568
1569
  static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
  					unsigned long nr_pages)
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1570
  {
a8f607721   Mel Gorman   mm: numa: Rate li...
1571
1572
1573
1574
1575
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
a8f607721   Mel Gorman   mm: numa: Rate li...
1576
  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1577
  		spin_lock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1578
1579
1580
  		pgdat->numabalancing_migrate_nr_pages = 0;
  		pgdat->numabalancing_migrate_next_window = jiffies +
  			msecs_to_jiffies(migrate_interval_millisecs);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1581
  		spin_unlock(&pgdat->numabalancing_migrate_lock);
a8f607721   Mel Gorman   mm: numa: Rate li...
1582
  	}
af1839d72   Mel Gorman   mm: numa: trace t...
1583
1584
1585
  	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
  		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
  								nr_pages);
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1586
  		return true;
af1839d72   Mel Gorman   mm: numa: trace t...
1587
  	}
1c5e9c27c   Mel Gorman   mm: numa: limit s...
1588
1589
1590
1591
1592
1593
1594
1595
1596
  
  	/*
  	 * This is an unlocked non-atomic update so errors are possible.
  	 * The consequences are failing to migrate when we potentiall should
  	 * have which is not severe enough to warrant locking. If it is ever
  	 * a problem, it can be converted to a per-cpu counter.
  	 */
  	pgdat->numabalancing_migrate_nr_pages += nr_pages;
  	return false;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1597
  }
1c30e0177   Mel Gorman   mm: numa: make NU...
1598
  static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1599
  {
340ef3902   Hugh Dickins   mm: numa: cleanup...
1600
  	int page_lru;
a8f607721   Mel Gorman   mm: numa: Rate li...
1601

309381fea   Sasha Levin   mm: dump page whe...
1602
  	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
3abef4e6c   Mel Gorman   mm: numa: take TH...
1603

7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1604
  	/* Avoid migrating to a node that is nearly full */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1605
1606
  	if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1607

340ef3902   Hugh Dickins   mm: numa: cleanup...
1608
1609
  	if (isolate_lru_page(page))
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1610

340ef3902   Hugh Dickins   mm: numa: cleanup...
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
  	/*
  	 * migrate_misplaced_transhuge_page() skips page migration's usual
  	 * check on page_count(), so we must do it here, now that the page
  	 * has been isolated: a GUP pin, or any other pin, prevents migration.
  	 * The expected page count is 3: 1 for page's mapcount and 1 for the
  	 * caller's pin and 1 for the reference taken by isolate_lru_page().
  	 */
  	if (PageTransHuge(page) && page_count(page) != 3) {
  		putback_lru_page(page);
  		return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1621
  	}
340ef3902   Hugh Dickins   mm: numa: cleanup...
1622
1623
1624
  	page_lru = page_is_file_cache(page);
  	mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
  				hpage_nr_pages(page));
149c33e1c   Mel Gorman   mm: migrate: Drop...
1625
  	/*
340ef3902   Hugh Dickins   mm: numa: cleanup...
1626
1627
1628
  	 * Isolating the page has taken another reference, so the
  	 * caller's reference can be safely dropped without the page
  	 * disappearing underneath us during migration.
149c33e1c   Mel Gorman   mm: migrate: Drop...
1629
1630
  	 */
  	put_page(page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1631
  	return 1;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1632
  }
de466bd62   Mel Gorman   mm: numa: avoid u...
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
  bool pmd_trans_migrating(pmd_t pmd)
  {
  	struct page *page = pmd_page(pmd);
  	return PageLocked(page);
  }
  
  void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
  {
  	struct page *page = pmd_page(*pmd);
  	wait_on_page_locked(page);
  }
b32967ff1   Mel Gorman   mm: numa: Add THP...
1644
1645
1646
1647
1648
  /*
   * Attempt to migrate a misplaced page to the specified destination
   * node. Caller is expected to have an elevated reference count on
   * the page that will be dropped by this function before returning.
   */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1649
1650
  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
  			   int node)
b32967ff1   Mel Gorman   mm: numa: Add THP...
1651
1652
  {
  	pg_data_t *pgdat = NODE_DATA(node);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1653
  	int isolated;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1654
1655
1656
1657
  	int nr_remaining;
  	LIST_HEAD(migratepages);
  
  	/*
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1658
1659
  	 * Don't migrate file pages that are mapped in multiple processes
  	 * with execute permissions as they are probably shared libraries.
b32967ff1   Mel Gorman   mm: numa: Add THP...
1660
  	 */
1bc115d87   Mel Gorman   mm: numa: Scan pa...
1661
1662
  	if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
  	    (vma->vm_flags & VM_EXEC))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1663
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1664
1665
1666
1667
1668
1669
  
  	/*
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
340ef3902   Hugh Dickins   mm: numa: cleanup...
1670
  	if (numamigrate_update_ratelimit(pgdat, 1))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1671
  		goto out;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1672
1673
1674
1675
1676
1677
  
  	isolated = numamigrate_isolate_page(pgdat, page);
  	if (!isolated)
  		goto out;
  
  	list_add(&page->lru, &migratepages);
9c620e2bc   Hugh Dickins   mm: remove offlin...
1678
  	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
68711a746   David Rientjes   mm, migration: ad...
1679
1680
  				     NULL, node, MIGRATE_ASYNC,
  				     MR_NUMA_MISPLACED);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1681
  	if (nr_remaining) {
59c82b70d   Joonsoo Kim   mm/migrate: remov...
1682
1683
1684
1685
1686
1687
  		if (!list_empty(&migratepages)) {
  			list_del(&page->lru);
  			dec_zone_page_state(page, NR_ISOLATED_ANON +
  					page_is_file_cache(page));
  			putback_lru_page(page);
  		}
b32967ff1   Mel Gorman   mm: numa: Add THP...
1688
1689
1690
  		isolated = 0;
  	} else
  		count_vm_numa_event(NUMA_PAGE_MIGRATE);
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1691
  	BUG_ON(!list_empty(&migratepages));
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1692
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1693
1694
1695
1696
  
  out:
  	put_page(page);
  	return 0;
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1697
  }
220018d38   Mel Gorman   mm: numa: Add THP...
1698
  #endif /* CONFIG_NUMA_BALANCING */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1699

220018d38   Mel Gorman   mm: numa: Add THP...
1700
  #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
340ef3902   Hugh Dickins   mm: numa: cleanup...
1701
1702
1703
1704
  /*
   * Migrates a THP to a given target node. page must be locked and is unlocked
   * before returning.
   */
b32967ff1   Mel Gorman   mm: numa: Add THP...
1705
1706
1707
1708
1709
1710
  int migrate_misplaced_transhuge_page(struct mm_struct *mm,
  				struct vm_area_struct *vma,
  				pmd_t *pmd, pmd_t entry,
  				unsigned long address,
  				struct page *page, int node)
  {
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1711
  	spinlock_t *ptl;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1712
1713
1714
1715
1716
  	pg_data_t *pgdat = NODE_DATA(node);
  	int isolated = 0;
  	struct page *new_page = NULL;
  	struct mem_cgroup *memcg = NULL;
  	int page_lru = page_is_file_cache(page);
f714f4f20   Mel Gorman   mm: numa: call MM...
1717
1718
  	unsigned long mmun_start = address & HPAGE_PMD_MASK;
  	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
2b4847e73   Mel Gorman   mm: numa: seriali...
1719
  	pmd_t orig_entry;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1720
1721
  
  	/*
b32967ff1   Mel Gorman   mm: numa: Add THP...
1722
1723
1724
1725
  	 * Rate-limit the amount of data that is being migrated to a node.
  	 * Optimal placement is no good if the memory bus is saturated and
  	 * all the time is being spent migrating!
  	 */
d28d43351   Mel Gorman   mm: migrate: Acco...
1726
  	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
b32967ff1   Mel Gorman   mm: numa: Add THP...
1727
1728
1729
  		goto out_dropref;
  
  	new_page = alloc_pages_node(node,
e97ca8e5b   Johannes Weiner   mm: fix GFP_THISN...
1730
1731
  		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
  		HPAGE_PMD_ORDER);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1732
1733
  	if (!new_page)
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1734
  	isolated = numamigrate_isolate_page(pgdat, page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1735
  	if (!isolated) {
b32967ff1   Mel Gorman   mm: numa: Add THP...
1736
  		put_page(new_page);
340ef3902   Hugh Dickins   mm: numa: cleanup...
1737
  		goto out_fail;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1738
  	}
b0943d61b   Mel Gorman   mm: numa: defer T...
1739
1740
  	if (mm_tlb_flush_pending(mm))
  		flush_tlb_range(vma, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
  	/* Prepare a page as a migration target */
  	__set_page_locked(new_page);
  	SetPageSwapBacked(new_page);
  
  	/* anon mapping, we can simply copy page->mapping to the new page: */
  	new_page->mapping = page->mapping;
  	new_page->index = page->index;
  	migrate_page_copy(new_page, page);
  	WARN_ON(PageLRU(new_page));
  
  	/* Recheck the target PMD */
f714f4f20   Mel Gorman   mm: numa: call MM...
1752
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1753
  	ptl = pmd_lock(mm, pmd);
2b4847e73   Mel Gorman   mm: numa: seriali...
1754
1755
  	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
  fail_putback:
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1756
  		spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1757
  		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
  
  		/* Reverse changes made by migrate_page_copy() */
  		if (TestClearPageActive(new_page))
  			SetPageActive(page);
  		if (TestClearPageUnevictable(new_page))
  			SetPageUnevictable(page);
  		mlock_migrate_page(page, new_page);
  
  		unlock_page(new_page);
  		put_page(new_page);		/* Free it */
a54a407fb   Mel Gorman   mm: Close races b...
1768
1769
  		/* Retake the callers reference and putback on LRU */
  		get_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1770
  		putback_lru_page(page);
a54a407fb   Mel Gorman   mm: Close races b...
1771
1772
  		mod_zone_page_state(page_zone(page),
  			 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
eb4489f69   Mel Gorman   mm: numa: avoid u...
1773
1774
  
  		goto out_unlock;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
  	}
  
  	/*
  	 * Traditional migration needs to prepare the memcg charge
  	 * transaction early to prevent the old page from being
  	 * uncharged when installing migration entries.  Here we can
  	 * save the potential rollback and start the charge transfer
  	 * only when migration is already known to end successfully.
  	 */
  	mem_cgroup_prepare_migration(page, new_page, &memcg);
2b4847e73   Mel Gorman   mm: numa: seriali...
1785
  	orig_entry = *pmd;
b32967ff1   Mel Gorman   mm: numa: Add THP...
1786
  	entry = mk_pmd(new_page, vma->vm_page_prot);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1787
  	entry = pmd_mkhuge(entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1788
  	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1789

2b4847e73   Mel Gorman   mm: numa: seriali...
1790
1791
1792
1793
1794
1795
1796
  	/*
  	 * Clear the old entry under pagetable lock and establish the new PTE.
  	 * Any parallel GUP will either observe the old page blocking on the
  	 * page lock, block on the page table lock or observe the new page.
  	 * The SetPageUptodate on the new page and page_add_new_anon_rmap
  	 * guarantee the copy is visible before the pagetable update.
  	 */
f714f4f20   Mel Gorman   mm: numa: call MM...
1797
  	flush_cache_range(vma, mmun_start, mmun_end);
11de9927f   Mel Gorman   mm: numa: add mig...
1798
  	page_add_anon_rmap(new_page, vma, mmun_start);
f714f4f20   Mel Gorman   mm: numa: call MM...
1799
1800
1801
  	pmdp_clear_flush(vma, mmun_start, pmd);
  	set_pmd_at(mm, mmun_start, pmd, entry);
  	flush_tlb_range(vma, mmun_start, mmun_end);
ce4a9cc57   Stephen Rothwell   mm,numa: fix upda...
1802
  	update_mmu_cache_pmd(vma, address, &entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1803
1804
  
  	if (page_count(page) != 2) {
f714f4f20   Mel Gorman   mm: numa: call MM...
1805
1806
  		set_pmd_at(mm, mmun_start, pmd, orig_entry);
  		flush_tlb_range(vma, mmun_start, mmun_end);
2b4847e73   Mel Gorman   mm: numa: seriali...
1807
1808
1809
1810
  		update_mmu_cache_pmd(vma, address, &entry);
  		page_remove_rmap(new_page);
  		goto fail_putback;
  	}
b32967ff1   Mel Gorman   mm: numa: Add THP...
1811
  	page_remove_rmap(page);
2b4847e73   Mel Gorman   mm: numa: seriali...
1812

b32967ff1   Mel Gorman   mm: numa: Add THP...
1813
1814
1815
1816
1817
1818
  	/*
  	 * Finish the charge transaction under the page table lock to
  	 * prevent split_huge_page() from dividing up the charge
  	 * before it's fully transferred to the new page.
  	 */
  	mem_cgroup_end_migration(memcg, page, new_page, true);
c4088ebdc   Kirill A. Shutemov   mm: convert the r...
1819
  	spin_unlock(ptl);
f714f4f20   Mel Gorman   mm: numa: call MM...
1820
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1821

11de9927f   Mel Gorman   mm: numa: add mig...
1822
1823
1824
  	/* Take an "isolate" reference and put new page on the LRU. */
  	get_page(new_page);
  	putback_lru_page(new_page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1825
1826
1827
1828
1829
1830
1831
  	unlock_page(new_page);
  	unlock_page(page);
  	put_page(page);			/* Drop the rmap reference */
  	put_page(page);			/* Drop the LRU isolation reference */
  
  	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
  	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1832
1833
1834
1835
  	mod_zone_page_state(page_zone(page),
  			NR_ISOLATED_ANON + page_lru,
  			-HPAGE_PMD_NR);
  	return isolated;
340ef3902   Hugh Dickins   mm: numa: cleanup...
1836
1837
  out_fail:
  	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1838
  out_dropref:
2b4847e73   Mel Gorman   mm: numa: seriali...
1839
1840
1841
  	ptl = pmd_lock(mm, pmd);
  	if (pmd_same(*pmd, entry)) {
  		entry = pmd_mknonnuma(entry);
f714f4f20   Mel Gorman   mm: numa: call MM...
1842
  		set_pmd_at(mm, mmun_start, pmd, entry);
2b4847e73   Mel Gorman   mm: numa: seriali...
1843
1844
1845
  		update_mmu_cache_pmd(vma, address, &entry);
  	}
  	spin_unlock(ptl);
a54a407fb   Mel Gorman   mm: Close races b...
1846

eb4489f69   Mel Gorman   mm: numa: avoid u...
1847
  out_unlock:
340ef3902   Hugh Dickins   mm: numa: cleanup...
1848
  	unlock_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1849
  	put_page(page);
b32967ff1   Mel Gorman   mm: numa: Add THP...
1850
1851
  	return 0;
  }
7039e1dbe   Peter Zijlstra   mm: migrate: Intr...
1852
1853
1854
  #endif /* CONFIG_NUMA_BALANCING */
  
  #endif /* CONFIG_NUMA */