Blame view

mm/migrate.c 26.8 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
16
17
   */
  
  #include <linux/migrate.h>
  #include <linux/module.h>
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
  #include <linux/pagevec.h>
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
24
  #include <linux/ksm.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
25
26
27
28
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
29
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
30
31
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
32
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
34
  #include <linux/syscalls.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
35
  #include <linux/gfp.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
36
37
  
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
38
39
40
  #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
  
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
41
   * migrate_prep() needs to be called before we start compiling a list of pages
748446bb6   Mel Gorman   mm: compaction: m...
42
43
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
b20a35035   Christoph Lameter   [PATCH] page migr...
44
45
46
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
47
48
49
50
51
52
53
54
55
56
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
748446bb6   Mel Gorman   mm: compaction: m...
57
58
59
60
61
62
63
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
  int migrate_prep_local(void)
  {
  	lru_add_drain();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
64
  /*
894bc3104   Lee Schermerhorn   Unevictable LRU I...
65
66
   * Add isolated pages on the list back to the LRU under page lock
   * to avoid leaking evictable pages back onto unevictable list.
b20a35035   Christoph Lameter   [PATCH] page migr...
67
   */
e13861d82   Minchan Kim   mm: remove return...
68
  void putback_lru_pages(struct list_head *l)
b20a35035   Christoph Lameter   [PATCH] page migr...
69
70
71
  {
  	struct page *page;
  	struct page *page2;
b20a35035   Christoph Lameter   [PATCH] page migr...
72
73
  
  	list_for_each_entry_safe(page, page2, l, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
74
  		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
75
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
76
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
77
  		putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
78
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
79
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
80
81
82
  /*
   * Restore a potential migration pte to a working pte entry
   */
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
83
84
  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
  				 unsigned long addr, void *old)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
85
86
87
88
89
90
91
92
93
94
95
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
   	pgd_t *pgd;
   	pud_t *pud;
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
  
   	pgd = pgd_offset(mm, addr);
  	if (!pgd_present(*pgd))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
96
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
97
98
99
  
  	pud = pud_offset(pgd, addr);
  	if (!pud_present(*pud))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
100
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
101
102
103
  
  	pmd = pmd_offset(pud, addr);
  	if (!pmd_present(*pmd))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
104
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
105
106
107
108
109
  
  	ptep = pte_offset_map(pmd, addr);
  
  	if (!is_swap_pte(*ptep)) {
  		pte_unmap(ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
110
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
111
112
113
114
115
116
   	}
  
   	ptl = pte_lockptr(mm, pmd);
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
117
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
118
119
  
  	entry = pte_to_swp_entry(pte);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
120
121
122
  	if (!is_migration_entry(entry) ||
  	    migration_entry_to_page(entry) != old)
  		goto unlock;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
123

0697212a4   Christoph Lameter   [PATCH] Swapless ...
124
125
126
127
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
  	if (is_write_migration_entry(entry))
  		pte = pte_mkwrite(pte);
97ee05246   KAMEZAWA Hiroyuki   flush cache befor...
128
  	flush_cache_page(vma, addr, pte_pfn(pte));
0697212a4   Christoph Lameter   [PATCH] Swapless ...
129
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
130
131
132
133
134
135
136
  
  	if (PageAnon(new))
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
4b3073e1c   Russell King   MM: Pass a PTE po...
137
  	update_mmu_cache(vma, addr, ptep);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
138
  unlock:
0697212a4   Christoph Lameter   [PATCH] Swapless ...
139
  	pte_unmap_unlock(ptep, ptl);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
140
141
  out:
  	return SWAP_AGAIN;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
142
143
144
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
145
146
147
148
149
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
150
  	rmap_walk(new, remove_migration_pte, old);
04e62a29b   Christoph Lameter   [PATCH] More page...
151
152
153
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
   *
   * This function is called from do_swap_page().
   */
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	pte_t *ptep, pte;
  	spinlock_t *ptl;
  	swp_entry_t entry;
  	struct page *page;
  
  	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
178
179
180
181
182
183
184
185
186
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
187
188
189
190
191
192
193
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
194
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
195
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
196
197
198
199
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
266cf658e   David Howells   FS-Cache: Recruit...
200
   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
b20a35035   Christoph Lameter   [PATCH] page migr...
201
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
202
203
  static int migrate_page_move_mapping(struct address_space *mapping,
  		struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
204
  {
e286781d5   Nick Piggin   mm: speculative p...
205
  	int expected_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
206
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
207

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
208
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
209
  		/* Anonymous page without mapping */
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
210
211
212
213
  		if (page_count(page) != 1)
  			return -EAGAIN;
  		return 0;
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
214
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
215

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
216
217
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
218

edcf4748c   Johannes Weiner   mm: return boolea...
219
  	expected_count = 2 + page_has_private(page);
e286781d5   Nick Piggin   mm: speculative p...
220
  	if (page_count(page) != expected_count ||
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
221
  			(struct page *)radix_tree_deref_slot(pslot) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
222
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
223
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
224
  	}
e286781d5   Nick Piggin   mm: speculative p...
225
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
226
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
227
228
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
229
230
  	/*
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
231
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
232
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
233
234
235
236
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
237
  	radix_tree_replace_slot(pslot, newpage);
e286781d5   Nick Piggin   mm: speculative p...
238
  	page_unfreeze_refs(page, expected_count);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
239
240
241
242
  	/*
  	 * Drop cache reference from old page.
  	 * We know this isn't the last reference.
  	 */
b20a35035   Christoph Lameter   [PATCH] page migr...
243
  	__put_page(page);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
244

0e8c7d0fd   Christoph Lameter   page migration: f...
245
246
247
248
249
250
251
252
253
254
255
256
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
4b02108ac   KOSAKI Motohiro   mm: oom analysis:...
257
258
259
260
  	if (PageSwapBacked(page)) {
  		__dec_zone_page_state(page, NR_SHMEM);
  		__inc_zone_page_state(newpage, NR_SHMEM);
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
261
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
262
263
264
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
265
266
267
268
  
  /*
   * Copy the page to its new location
   */
e7340f733   Christoph Lameter   [PATCH] page migr...
269
  static void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
270
271
272
273
274
275
276
277
278
  {
  	copy_highpage(newpage, page);
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
279
280
  	if (TestClearPageActive(page)) {
  		VM_BUG_ON(PageUnevictable(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
281
  		SetPageActive(newpage);
418b27ef5   Lee Schermerhorn   mm: remove unevic...
282
283
  	} else if (TestClearPageUnevictable(page))
  		SetPageUnevictable(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
284
285
286
287
288
289
290
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
291
292
293
294
295
296
297
298
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
  		 * Wheras only part of our page may be dirty.
  		 */
  		__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
299
   	}
b291f0003   Nick Piggin   mlock: mlocked pa...
300
  	mlock_migrate_page(newpage, page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
301
  	ksm_migrate_page(newpage, page);
b291f0003   Nick Piggin   mlock: mlocked pa...
302

b20a35035   Christoph Lameter   [PATCH] page migr...
303
  	ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
304
305
306
307
308
309
310
311
312
313
314
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
  	page->mapping = NULL;
  
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
315

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
316
317
318
319
320
  /************************************************************
   *                    Migration functions
   ***********************************************************/
  
  /* Always fail migration. Used for mappings that are not movable */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
321
322
  int fail_migrate_page(struct address_space *mapping,
  			struct page *newpage, struct page *page)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
323
324
325
326
  {
  	return -EIO;
  }
  EXPORT_SYMBOL(fail_migrate_page);
b20a35035   Christoph Lameter   [PATCH] page migr...
327
328
  /*
   * Common logic to directly migrate a single page suitable for
266cf658e   David Howells   FS-Cache: Recruit...
329
   * pages that do not use PagePrivate/PagePrivate2.
b20a35035   Christoph Lameter   [PATCH] page migr...
330
331
332
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
333
334
  int migrate_page(struct address_space *mapping,
  		struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
335
336
337
338
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
339
  	rc = migrate_page_move_mapping(mapping, newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
340
341
342
343
344
  
  	if (rc)
  		return rc;
  
  	migrate_page_copy(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
345
346
347
  	return 0;
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
348
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
349
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
350
351
352
353
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
354
355
  int buffer_migrate_page(struct address_space *mapping,
  		struct page *newpage, struct page *page)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
356
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
357
358
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
359
  	if (!page_has_buffers(page))
2d1db3b11   Christoph Lameter   [PATCH] page migr...
360
  		return migrate_page(mapping, newpage, page);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
361
362
  
  	head = page_buffers(page);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
363
  	rc = migrate_page_move_mapping(mapping, newpage, page);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
  
  	if (rc)
  		return rc;
  
  	bh = head;
  	do {
  		get_bh(bh);
  		lock_buffer(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	return 0;
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
404
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
405

04e62a29b   Christoph Lameter   [PATCH] More page...
406
407
408
409
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
410
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
  		.nonblocking = 1,
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
428
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
429
430
431
432
433
434
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
435
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
436
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
437

04e62a29b   Christoph Lameter   [PATCH] More page...
438
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
439

04e62a29b   Christoph Lameter   [PATCH] More page...
440
441
442
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
443
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
444
445
446
447
448
449
450
451
452
453
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
  	struct page *newpage, struct page *page)
  {
  	if (PageDirty(page))
  		return writeout(mapping, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
454
455
456
457
458
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
266cf658e   David Howells   FS-Cache: Recruit...
459
  	if (page_has_private(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
460
461
462
463
464
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
  
  	return migrate_page(mapping, newpage, page);
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
465
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
466
467
468
469
470
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
471
472
473
474
   *
   * Return value:
   *   < 0 - error code
   *  == 0 - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
475
   */
3fe2011ff   Mel Gorman   mm: migration: al...
476
477
  static int move_to_new_page(struct page *newpage, struct page *page,
  						int remap_swapcache)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
478
479
480
481
482
483
484
485
486
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
487
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
488
489
490
491
492
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
493
494
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
  
  	mapping = page_mapping(page);
  	if (!mapping)
  		rc = migrate_page(mapping, newpage, page);
  	else if (mapping->a_ops->migratepage)
  		/*
  		 * Most pages have a mapping and most filesystems
  		 * should provide a migration function. Anonymous
  		 * pages are part of swap space which also has its
  		 * own migration function. This is the most common
  		 * path for page migration.
  		 */
  		rc = mapping->a_ops->migratepage(mapping,
  						newpage, page);
  	else
  		rc = fallback_migrate_page(mapping, newpage, page);
3fe2011ff   Mel Gorman   mm: migration: al...
511
  	if (rc) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
512
  		newpage->mapping = NULL;
3fe2011ff   Mel Gorman   mm: migration: al...
513
514
515
516
  	} else {
  		if (remap_swapcache)
  			remove_migration_ptes(page, newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
517
518
519
520
521
522
523
524
525
526
  
  	unlock_page(newpage);
  
  	return rc;
  }
  
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
95a402c38   Christoph Lameter   [PATCH] page migr...
527
  static int unmap_and_move(new_page_t get_new_page, unsigned long private,
62b61f611   Hugh Dickins   ksm: memory hotre...
528
  			struct page *page, int force, int offlining)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
529
530
  {
  	int rc = 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
531
532
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
3fe2011ff   Mel Gorman   mm: migration: al...
533
  	int remap_swapcache = 1;
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
534
  	int rcu_locked = 0;
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
535
  	int charge = 0;
e00e43161   KAMEZAWA Hiroyuki   memcg: fix wrong ...
536
  	struct mem_cgroup *mem = NULL;
3f6c82728   Mel Gorman   mm: migration: ta...
537
  	struct anon_vma *anon_vma = NULL;
95a402c38   Christoph Lameter   [PATCH] page migr...
538
539
540
  
  	if (!newpage)
  		return -ENOMEM;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
541

894bc3104   Lee Schermerhorn   Unevictable LRU I...
542
  	if (page_count(page) == 1) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
543
  		/* page was freed from under us. So we are done. */
95a402c38   Christoph Lameter   [PATCH] page migr...
544
  		goto move_newpage;
894bc3104   Lee Schermerhorn   Unevictable LRU I...
545
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
546

e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
547
  	/* prepare cgroup just returns 0 or -ENOMEM */
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
548
  	rc = -EAGAIN;
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
549

529ae9aaa   Nick Piggin   mm: rename page t...
550
  	if (!trylock_page(page)) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
551
  		if (!force)
95a402c38   Christoph Lameter   [PATCH] page migr...
552
  			goto move_newpage;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
553
554
  		lock_page(page);
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
555
556
557
558
559
560
561
562
563
564
565
566
567
  	/*
  	 * Only memory hotplug's offline_pages() caller has locked out KSM,
  	 * and can safely migrate a KSM page.  The other cases have skipped
  	 * PageKsm along with PageReserved - but it is only now when we have
  	 * the page lock that we can be certain it will not go KSM beneath us
  	 * (KSM will not upgrade a page from PageAnon to PageKsm when it sees
  	 * its pagecount raised, but only here do we take the page lock which
  	 * serializes that).
  	 */
  	if (PageKsm(page) && !offlining) {
  		rc = -EBUSY;
  		goto unlock;
  	}
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
568
  	/* charge against new page */
ac39cf8cb   akpm@linux-foundation.org   memcg: fix mis-ac...
569
  	charge = mem_cgroup_prepare_migration(page, newpage, &mem);
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
570
571
572
573
574
  	if (charge == -ENOMEM) {
  		rc = -ENOMEM;
  		goto unlock;
  	}
  	BUG_ON(charge);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
575
576
  	if (PageWriteback(page)) {
  		if (!force)
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
577
  			goto uncharge;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
578
579
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
580
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
581
582
583
584
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
  	 * This rcu_read_lock() delays freeing anon_vma pointer until the end
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
585
586
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
587
  	 */
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
588
589
590
  	if (PageAnon(page)) {
  		rcu_read_lock();
  		rcu_locked = 1;
67b9509b2   Mel Gorman   mm: migration: do...
591

3fe2011ff   Mel Gorman   mm: migration: al...
592
593
594
595
  		/* Determine how to safely use anon_vma */
  		if (!page_mapped(page)) {
  			if (!PageSwapCache(page))
  				goto rcu_unlock;
67b9509b2   Mel Gorman   mm: migration: do...
596

3fe2011ff   Mel Gorman   mm: migration: al...
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
  			/*
  			 * We cannot be sure that the anon_vma of an unmapped
  			 * swapcache page is safe to use because we don't
  			 * know in advance if the VMA that this page belonged
  			 * to still exists. If the VMA and others sharing the
  			 * data have been freed, then the anon_vma could
  			 * already be invalid.
  			 *
  			 * To avoid this possibility, swapcache pages get
  			 * migrated but are not remapped when migration
  			 * completes
  			 */
  			remap_swapcache = 0;
  		} else {
  			/*
  			 * Take a reference count on the anon_vma if the
  			 * page is mapped so that it is guaranteed to
  			 * exist when the page is remapped later
  			 */
  			anon_vma = page_anon_vma(page);
  			atomic_inc(&anon_vma->external_refcount);
  		}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
619
  	}
62e1c5530   Shaohua Li   page migraton: ha...
620

dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
621
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
622
623
624
625
626
627
628
629
630
631
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
632
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
633
  	if (!page->mapping) {
266cf658e   David Howells   FS-Cache: Recruit...
634
  		if (!PageAnon(page) && page_has_private(page)) {
62e1c5530   Shaohua Li   page migraton: ha...
635
636
637
638
639
640
641
642
  			/*
  			 * Go direct to try_to_free_buffers() here because
  			 * a) that's what try_to_release_page() would do anyway
  			 * b) we may be under rcu_read_lock() here, so we can't
  			 *    use GFP_KERNEL which is what try_to_release_page()
  			 *    needs to be effective.
  			 */
  			try_to_free_buffers(page);
abfc34881   Shaohua Li   memory hotplug: m...
643
  			goto rcu_unlock;
62e1c5530   Shaohua Li   page migraton: ha...
644
  		}
abfc34881   Shaohua Li   memory hotplug: m...
645
  		goto skip_unmap;
62e1c5530   Shaohua Li   page migraton: ha...
646
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
647
  	/* Establish migration ptes or remove ptes */
14fa31b89   Andi Kleen   HWPOISON: Use bit...
648
  	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
649

abfc34881   Shaohua Li   memory hotplug: m...
650
  skip_unmap:
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
651
  	if (!page_mapped(page))
3fe2011ff   Mel Gorman   mm: migration: al...
652
  		rc = move_to_new_page(newpage, page, remap_swapcache);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
653

3fe2011ff   Mel Gorman   mm: migration: al...
654
  	if (rc && remap_swapcache)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
655
  		remove_migration_ptes(page, page);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
656
  rcu_unlock:
3f6c82728   Mel Gorman   mm: migration: ta...
657
658
  
  	/* Drop an anon_vma reference if we took one */
7f60c214f   Mel Gorman   mm: migration: sh...
659
  	if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
3f6c82728   Mel Gorman   mm: migration: ta...
660
661
662
663
664
  		int empty = list_empty(&anon_vma->head);
  		spin_unlock(&anon_vma->lock);
  		if (empty)
  			anon_vma_free(anon_vma);
  	}
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
665
666
  	if (rcu_locked)
  		rcu_read_unlock();
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
667
668
669
  uncharge:
  	if (!charge)
  		mem_cgroup_end_migration(mem, page, newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
670
671
  unlock:
  	unlock_page(page);
95a402c38   Christoph Lameter   [PATCH] page migr...
672

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
673
  	if (rc != -EAGAIN) {
aaa994b30   Christoph Lameter   [PATCH] page migr...
674
675
676
677
678
679
680
   		/*
   		 * A page that has been migrated has all references
   		 * removed and will be freed. A page that has not been
   		 * migrated will have kepts its references and be
   		 * restored.
   		 */
   		list_del(&page->lru);
a731286de   KOSAKI Motohiro   mm: vmstat: add i...
681
  		dec_zone_page_state(page, NR_ISOLATED_ANON +
6c0b13519   Johannes Weiner   mm: return boolea...
682
  				page_is_file_cache(page));
894bc3104   Lee Schermerhorn   Unevictable LRU I...
683
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
684
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
685
686
  
  move_newpage:
894bc3104   Lee Schermerhorn   Unevictable LRU I...
687

95a402c38   Christoph Lameter   [PATCH] page migr...
688
689
690
691
  	/*
  	 * Move the new page to the LRU. If migration was not successful
  	 * then this will free the page.
  	 */
894bc3104   Lee Schermerhorn   Unevictable LRU I...
692
  	putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
693
694
695
696
697
698
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
699
700
701
702
  	return rc;
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
703
704
   * migrate_pages
   *
95a402c38   Christoph Lameter   [PATCH] page migr...
705
706
707
   * The function takes one list of pages to migrate and a function
   * that determines from the page to be migrated and the private data
   * the target of the move and allocates the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
708
709
710
   *
   * The function returns after 10 attempts or if no pages
   * are movable anymore because to has become empty
aaa994b30   Christoph Lameter   [PATCH] page migr...
711
   * or no retryable pages exist anymore. All pages will be
e9534b3fd   Gabriel Craciunescu   Typo fixes retrun...
712
   * returned to the LRU or freed.
b20a35035   Christoph Lameter   [PATCH] page migr...
713
   *
95a402c38   Christoph Lameter   [PATCH] page migr...
714
   * Return: Number of pages not migrated or error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
715
   */
95a402c38   Christoph Lameter   [PATCH] page migr...
716
  int migrate_pages(struct list_head *from,
62b61f611   Hugh Dickins   ksm: memory hotre...
717
  		new_page_t get_new_page, unsigned long private, int offlining)
b20a35035   Christoph Lameter   [PATCH] page migr...
718
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
719
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
720
721
722
723
724
725
726
727
728
  	int nr_failed = 0;
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
729
730
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
731

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
732
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
733
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
734

95a402c38   Christoph Lameter   [PATCH] page migr...
735
  			rc = unmap_and_move(get_new_page, private,
62b61f611   Hugh Dickins   ksm: memory hotre...
736
  						page, pass > 2, offlining);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
737

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
738
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
739
740
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
741
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
742
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
743
744
  				break;
  			case 0:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
745
746
  				break;
  			default:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
747
  				/* Permanent failure */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
748
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
749
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
750
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
751
752
  		}
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
753
754
  	rc = 0;
  out:
b20a35035   Christoph Lameter   [PATCH] page migr...
755
756
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
aaa994b30   Christoph Lameter   [PATCH] page migr...
757
  	putback_lru_pages(from);
b20a35035   Christoph Lameter   [PATCH] page migr...
758

95a402c38   Christoph Lameter   [PATCH] page migr...
759
760
  	if (rc)
  		return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
761

95a402c38   Christoph Lameter   [PATCH] page migr...
762
  	return nr_failed + retry;
b20a35035   Christoph Lameter   [PATCH] page migr...
763
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
764

742755a1d   Christoph Lameter   [PATCH] page migr...
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
6484eb3e2   Mel Gorman   page allocator: d...
788
  	return alloc_pages_exact_node(pm->node,
769848c03   Mel Gorman   Add __GFP_MOVABLE...
789
  				GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
790
791
792
793
794
795
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
796
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
797
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
798
799
800
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
801
802
803
804
805
806
807
808
809
810
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
  
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
811
812
813
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
814
815
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
0dc952dc3   Christoph Lameter   [PATCH] Page migr...
816
  		if (!vma || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
817
818
819
  			goto set_status;
  
  		page = follow_page(vma, pp->addr, FOLL_GET);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
820
821
822
823
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
824
825
826
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
62b61f611   Hugh Dickins   ksm: memory hotre...
827
828
  		/* Use PageReserved to check for zero page */
  		if (PageReserved(page) || PageKsm(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
62695a84e   Nick Piggin   vmscan: move isol...
844
  		err = isolate_lru_page(page);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
845
  		if (!err) {
62695a84e   Nick Piggin   vmscan: move isol...
846
  			list_add_tail(&page->lru, &pagelist);
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
847
848
849
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
850
851
852
853
854
855
856
857
858
859
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
860
  	err = 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
861
862
  	if (!list_empty(&pagelist))
  		err = migrate_pages(&pagelist, new_page_node,
62b61f611   Hugh Dickins   ksm: memory hotre...
863
  				(unsigned long)pm, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
864
865
866
867
868
869
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
870
871
872
873
874
875
876
877
878
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
  static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
879
  	struct page_to_node *pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
880
  	nodemask_t task_nodes;
3140a2273   Brice Goglin   mm: rework do_pag...
881
882
883
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
884
885
  
  	task_nodes = cpuset_mems_allowed(task);
3140a2273   Brice Goglin   mm: rework do_pag...
886
887
888
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
889
  		goto out;
35282a2de   Brice Goglin   migration: only m...
890
891
  
  	migrate_prep();
5e9a0f023   Brice Goglin   mm: extract do_pa...
892
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
893
894
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
895
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
896
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
897

3140a2273   Brice Goglin   mm: rework do_pag...
898
899
900
901
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
902

3140a2273   Brice Goglin   mm: rework do_pag...
903
904
905
906
907
908
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
909
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
910
911
912
913
914
915
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
916
917
918
  				goto out_pm;
  
  			err = -ENODEV;
6f5a55f1a   Linus Torvalds   Fix potential cra...
919
920
  			if (node < 0 || node >= MAX_NUMNODES)
  				goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
921
922
923
924
925
926
  			if (!node_state(node, N_HIGH_MEMORY))
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
927
928
929
930
931
932
933
934
935
936
937
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
938

5e9a0f023   Brice Goglin   mm: extract do_pa...
939
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
940
941
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
942
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
943
944
945
946
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
947
948
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
949
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
950
951
952
953
954
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
955
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
956
   */
80bba1290   Brice Goglin   mm: no get_user/p...
957
958
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
959
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
960
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
961

742755a1d   Christoph Lameter   [PATCH] page migr...
962
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
963
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
964
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
965
966
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
967
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
968
969
  
  		vma = find_vma(mm, addr);
742755a1d   Christoph Lameter   [PATCH] page migr...
970
971
  		if (!vma)
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
972
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
973
974
975
976
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
977
978
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
62b61f611   Hugh Dickins   ksm: memory hotre...
979
  		if (!page || PageReserved(page) || PageKsm(page))
742755a1d   Christoph Lameter   [PATCH] page migr...
980
981
982
983
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
80bba1290   Brice Goglin   mm: no get_user/p...
1004

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1005
1006
  	while (nr_pages) {
  		unsigned long chunk_nr;
80bba1290   Brice Goglin   mm: no get_user/p...
1007

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1008
1009
1010
1011
1012
1013
  		chunk_nr = nr_pages;
  		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
  			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
  		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
  			break;
80bba1290   Brice Goglin   mm: no get_user/p...
1014
1015
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1016
1017
  		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
  			break;
742755a1d   Christoph Lameter   [PATCH] page migr...
1018

87b8d1ade   H. Peter Anvin   mm: Make copy_fro...
1019
1020
1021
1022
1023
  		pages += chunk_nr;
  		status += chunk_nr;
  		nr_pages -= chunk_nr;
  	}
  	return nr_pages ? -EFAULT : 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1024
1025
1026
1027
1028
1029
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
938bb9f5e   Heiko Carstens   [CVE-2009-0029] S...
1030
1031
1032
1033
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  		const void __user * __user *, pages,
  		const int __user *, nodes,
  		int __user *, status, int, flags)
742755a1d   Christoph Lameter   [PATCH] page migr...
1034
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1035
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1036
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1037
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1038
  	int err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
  	read_lock(&tasklist_lock);
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1049
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
  	if (!task) {
  		read_unlock(&tasklist_lock);
  		return -ESRCH;
  	}
  	mm = get_task_mm(task);
  	read_unlock(&tasklist_lock);
  
  	if (!mm)
  		return -EINVAL;
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1066
1067
  	rcu_read_lock();
  	tcred = __task_cred(task);
b6dff3ec5   David Howells   CRED: Separate ta...
1068
1069
  	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
  	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1070
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1071
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1072
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1073
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1074
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1075
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1076

86c3a7645   David Quigley   [PATCH] SELinux: ...
1077
1078
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1079
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1080

5e9a0f023   Brice Goglin   mm: extract do_pa...
1081
1082
1083
1084
  	if (nodes) {
  		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
  				    flags);
  	} else {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1085
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1086
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1087
  out:
742755a1d   Christoph Lameter   [PATCH] page migr...
1088
1089
1090
  	mmput(mm);
  	return err;
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1091

7b2259b3e   Christoph Lameter   [PATCH] page migr...
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
  /*
   * Call migration functions in the vma_ops that may prepare
   * memory in a vm for migration. migration functions may perform
   * the migration for vmas that do not have an underlying page struct.
   */
  int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	const nodemask_t *from, unsigned long flags)
  {
   	struct vm_area_struct *vma;
   	int err = 0;
1001c9fb8   Daisuke Nishimura   migration: migrat...
1102
  	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
7b2259b3e   Christoph Lameter   [PATCH] page migr...
1103
1104
1105
1106
1107
1108
1109
1110
   		if (vma->vm_ops && vma->vm_ops->migrate) {
   			err = vma->vm_ops->migrate(vma, to, from, flags);
   			if (err)
   				break;
   		}
   	}
   	return err;
  }
83d1674a9   Gerald Schaefer   mm: make CONFIG_M...
1111
  #endif