Blame view

mm/migrate.c 26.5 KB
b20a35035   Christoph Lameter   [PATCH] page migr...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * Memory Migration functionality - linux/mm/migration.c
   *
   * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
   *
   * Page migration was first developed in the context of the memory hotplug
   * project. The main authors of the migration code are:
   *
   * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
   * Hirokazu Takahashi <taka@valinux.co.jp>
   * Dave Hansen <haveblue@us.ibm.com>
cde535359   Christoph Lameter   Christoph has moved
12
   * Christoph Lameter
b20a35035   Christoph Lameter   [PATCH] page migr...
13
14
15
16
17
   */
  
  #include <linux/migrate.h>
  #include <linux/module.h>
  #include <linux/swap.h>
0697212a4   Christoph Lameter   [PATCH] Swapless ...
18
  #include <linux/swapops.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/pagemap.h>
e23ca00bf   Christoph Lameter   [PATCH] Some page...
20
  #include <linux/buffer_head.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
21
  #include <linux/mm_inline.h>
b488893a3   Pavel Emelyanov   pid namespaces: c...
22
  #include <linux/nsproxy.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
23
24
25
26
27
  #include <linux/pagevec.h>
  #include <linux/rmap.h>
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
04e62a29b   Christoph Lameter   [PATCH] More page...
28
  #include <linux/writeback.h>
742755a1d   Christoph Lameter   [PATCH] page migr...
29
30
  #include <linux/mempolicy.h>
  #include <linux/vmalloc.h>
86c3a7645   David Quigley   [PATCH] SELinux: ...
31
  #include <linux/security.h>
8a9f3ccd2   Balbir Singh   Memory controller...
32
  #include <linux/memcontrol.h>
4f5ca2657   Adrian Bunk   mm/migrate.c shou...
33
  #include <linux/syscalls.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
34
35
  
  #include "internal.h"
b20a35035   Christoph Lameter   [PATCH] page migr...
36
37
38
  #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
  
  /*
742755a1d   Christoph Lameter   [PATCH] page migr...
39
40
   * migrate_prep() needs to be called before we start compiling a list of pages
   * to be migrated using isolate_lru_page().
b20a35035   Christoph Lameter   [PATCH] page migr...
41
42
43
   */
  int migrate_prep(void)
  {
b20a35035   Christoph Lameter   [PATCH] page migr...
44
45
46
47
48
49
50
51
52
53
  	/*
  	 * Clear the LRU lists so pages can be isolated.
  	 * Note that pages may be moved off the LRU after we have
  	 * drained them. Those pages will fail to migrate like other
  	 * pages that may be busy.
  	 */
  	lru_add_drain_all();
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
54
  /*
894bc3104   Lee Schermerhorn   Unevictable LRU I...
55
56
   * Add isolated pages on the list back to the LRU under page lock
   * to avoid leaking evictable pages back onto unevictable list.
b20a35035   Christoph Lameter   [PATCH] page migr...
57
58
59
60
61
62
63
64
65
66
   *
   * returns the number of pages put back.
   */
  int putback_lru_pages(struct list_head *l)
  {
  	struct page *page;
  	struct page *page2;
  	int count = 0;
  
  	list_for_each_entry_safe(page, page2, l, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
67
  		list_del(&page->lru);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
68
  		putback_lru_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
69
70
71
72
  		count++;
  	}
  	return count;
  }
0697212a4   Christoph Lameter   [PATCH] Swapless ...
73
74
75
  /*
   * Restore a potential migration pte to a working pte entry
   */
04e62a29b   Christoph Lameter   [PATCH] More page...
76
  static void remove_migration_pte(struct vm_area_struct *vma,
0697212a4   Christoph Lameter   [PATCH] Swapless ...
77
78
79
80
81
82
83
84
85
  		struct page *old, struct page *new)
  {
  	struct mm_struct *mm = vma->vm_mm;
  	swp_entry_t entry;
   	pgd_t *pgd;
   	pud_t *pud;
   	pmd_t *pmd;
  	pte_t *ptep, pte;
   	spinlock_t *ptl;
04e62a29b   Christoph Lameter   [PATCH] More page...
86
87
88
89
  	unsigned long addr = page_address_in_vma(new, vma);
  
  	if (addr == -EFAULT)
  		return;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  
   	pgd = pgd_offset(mm, addr);
  	if (!pgd_present(*pgd))
                  return;
  
  	pud = pud_offset(pgd, addr);
  	if (!pud_present(*pud))
                  return;
  
  	pmd = pmd_offset(pud, addr);
  	if (!pmd_present(*pmd))
  		return;
  
  	ptep = pte_offset_map(pmd, addr);
  
  	if (!is_swap_pte(*ptep)) {
  		pte_unmap(ptep);
   		return;
   	}
  
   	ptl = pte_lockptr(mm, pmd);
   	spin_lock(ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  
  	if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
  		goto out;
98837c7f8   Hugh Dickins   memcg: fix VM_BUG...
120
121
122
123
124
125
126
127
128
129
130
131
132
  	/*
  	 * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
  	 * Failure is not an option here: we're now expected to remove every
  	 * migration pte, and will cause crashes otherwise.  Normally this
  	 * is not an issue: mem_cgroup_prepare_migration bumped up the old
  	 * page_cgroup count for safety, that's now attached to the new page,
  	 * so this charge should just be another incrementation of the count,
  	 * to keep in balance with rmap.c's mem_cgroup_uncharging.  But if
  	 * there's been a force_empty, those reference counts may no longer
  	 * be reliable, and this charge can actually fail: oh well, we don't
  	 * make the situation any worse by proceeding as if it had succeeded.
  	 */
  	mem_cgroup_charge(new, mm, GFP_ATOMIC);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
133
134
135
136
  	get_page(new);
  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
  	if (is_write_migration_entry(entry))
  		pte = pte_mkwrite(pte);
97ee05246   KAMEZAWA Hiroyuki   flush cache befor...
137
  	flush_cache_page(vma, addr, pte_pfn(pte));
0697212a4   Christoph Lameter   [PATCH] Swapless ...
138
  	set_pte_at(mm, addr, ptep, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
139
140
141
142
143
144
145
146
  
  	if (PageAnon(new))
  		page_add_anon_rmap(new, vma, addr);
  	else
  		page_add_file_rmap(new);
  
  	/* No need to invalidate - it was non-present before */
  	update_mmu_cache(vma, addr, pte);
04e62a29b   Christoph Lameter   [PATCH] More page...
147

0697212a4   Christoph Lameter   [PATCH] Swapless ...
148
149
150
151
152
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
   * Note that remove_file_migration_ptes will only work on regular mappings,
   * Nonlinear mappings do not use migration entries.
   */
  static void remove_file_migration_ptes(struct page *old, struct page *new)
  {
  	struct vm_area_struct *vma;
  	struct address_space *mapping = page_mapping(new);
  	struct prio_tree_iter iter;
  	pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  
  	if (!mapping)
  		return;
  
  	spin_lock(&mapping->i_mmap_lock);
  
  	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff)
  		remove_migration_pte(vma, old, new);
  
  	spin_unlock(&mapping->i_mmap_lock);
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
175
176
177
   * Must hold mmap_sem lock on at least one of the vmas containing
   * the page so that the anon_vma cannot vanish.
   */
04e62a29b   Christoph Lameter   [PATCH] More page...
178
  static void remove_anon_migration_ptes(struct page *old, struct page *new)
0697212a4   Christoph Lameter   [PATCH] Swapless ...
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  {
  	struct anon_vma *anon_vma;
  	struct vm_area_struct *vma;
  	unsigned long mapping;
  
  	mapping = (unsigned long)new->mapping;
  
  	if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
  		return;
  
  	/*
  	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
  	 */
  	anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
  	spin_lock(&anon_vma->lock);
  
  	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
04e62a29b   Christoph Lameter   [PATCH] More page...
196
  		remove_migration_pte(vma, old, new);
0697212a4   Christoph Lameter   [PATCH] Swapless ...
197
198
199
200
201
  
  	spin_unlock(&anon_vma->lock);
  }
  
  /*
04e62a29b   Christoph Lameter   [PATCH] More page...
202
203
204
205
206
207
208
209
210
211
212
213
   * Get rid of all migration entries and replace them by
   * references to the indicated page.
   */
  static void remove_migration_ptes(struct page *old, struct page *new)
  {
  	if (PageAnon(new))
  		remove_anon_migration_ptes(old, new);
  	else
  		remove_file_migration_ptes(old, new);
  }
  
  /*
0697212a4   Christoph Lameter   [PATCH] Swapless ...
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
   * When we return from this function the fault will be retried.
   *
   * This function is called from do_swap_page().
   */
  void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  				unsigned long address)
  {
  	pte_t *ptep, pte;
  	spinlock_t *ptl;
  	swp_entry_t entry;
  	struct page *page;
  
  	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
  	pte = *ptep;
  	if (!is_swap_pte(pte))
  		goto out;
  
  	entry = pte_to_swp_entry(pte);
  	if (!is_migration_entry(entry))
  		goto out;
  
  	page = migration_entry_to_page(entry);
e286781d5   Nick Piggin   mm: speculative p...
238
239
240
241
242
243
244
245
246
  	/*
  	 * Once radix-tree replacement of page migration started, page_count
  	 * *must* be zero. And, we don't want to call wait_on_page_locked()
  	 * against a page without get_page().
  	 * So, we use get_page_unless_zero(), here. Even failed, page fault
  	 * will occur again.
  	 */
  	if (!get_page_unless_zero(page))
  		goto out;
0697212a4   Christoph Lameter   [PATCH] Swapless ...
247
248
249
250
251
252
253
  	pte_unmap_unlock(ptep, ptl);
  	wait_on_page_locked(page);
  	put_page(page);
  	return;
  out:
  	pte_unmap_unlock(ptep, ptl);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
254
  /*
c3fcf8a5d   Christoph Lameter   [PATCH] page migr...
255
   * Replace the page in the mapping.
5b5c7120e   Christoph Lameter   [PATCH] page migr...
256
257
258
259
260
   *
   * The number of remaining references must be:
   * 1 for anonymous pages without a mapping
   * 2 for pages with a mapping
   * 3 for pages with a mapping and PagePrivate set.
b20a35035   Christoph Lameter   [PATCH] page migr...
261
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
262
263
  static int migrate_page_move_mapping(struct address_space *mapping,
  		struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
264
  {
e286781d5   Nick Piggin   mm: speculative p...
265
  	int expected_count;
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
266
  	void **pslot;
b20a35035   Christoph Lameter   [PATCH] page migr...
267

6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
268
  	if (!mapping) {
0e8c7d0fd   Christoph Lameter   page migration: f...
269
  		/* Anonymous page without mapping */
6c5240ae7   Christoph Lameter   [PATCH] Swapless ...
270
271
272
273
  		if (page_count(page) != 1)
  			return -EAGAIN;
  		return 0;
  	}
19fd62312   Nick Piggin   mm: spinlock tree...
274
  	spin_lock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
275

7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
276
277
  	pslot = radix_tree_lookup_slot(&mapping->page_tree,
   					page_index(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
278

e286781d5   Nick Piggin   mm: speculative p...
279
280
  	expected_count = 2 + !!PagePrivate(page);
  	if (page_count(page) != expected_count ||
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
281
  			(struct page *)radix_tree_deref_slot(pslot) != page) {
19fd62312   Nick Piggin   mm: spinlock tree...
282
  		spin_unlock_irq(&mapping->tree_lock);
e23ca00bf   Christoph Lameter   [PATCH] Some page...
283
  		return -EAGAIN;
b20a35035   Christoph Lameter   [PATCH] page migr...
284
  	}
e286781d5   Nick Piggin   mm: speculative p...
285
  	if (!page_freeze_refs(page, expected_count)) {
19fd62312   Nick Piggin   mm: spinlock tree...
286
  		spin_unlock_irq(&mapping->tree_lock);
e286781d5   Nick Piggin   mm: speculative p...
287
288
  		return -EAGAIN;
  	}
b20a35035   Christoph Lameter   [PATCH] page migr...
289
290
  	/*
  	 * Now we know that no one else is looking at the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
291
  	 */
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
292
  	get_page(newpage);	/* add cache reference */
b20a35035   Christoph Lameter   [PATCH] page migr...
293
294
295
296
  	if (PageSwapCache(page)) {
  		SetPageSwapCache(newpage);
  		set_page_private(newpage, page_private(page));
  	}
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
297
  	radix_tree_replace_slot(pslot, newpage);
e286781d5   Nick Piggin   mm: speculative p...
298
  	page_unfreeze_refs(page, expected_count);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
299
300
301
302
  	/*
  	 * Drop cache reference from old page.
  	 * We know this isn't the last reference.
  	 */
b20a35035   Christoph Lameter   [PATCH] page migr...
303
  	__put_page(page);
7cf9c2c76   Nick Piggin   [PATCH] radix-tre...
304

0e8c7d0fd   Christoph Lameter   page migration: f...
305
306
307
308
309
310
311
312
313
314
315
316
  	/*
  	 * If moved to a different zone then also account
  	 * the page for that zone. Other VM counters will be
  	 * taken care of when we establish references to the
  	 * new page and drop references to the old page.
  	 *
  	 * Note that anonymous pages are accounted for
  	 * via NR_FILE_PAGES and NR_ANON_PAGES if they
  	 * are mapped to swap space.
  	 */
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__inc_zone_page_state(newpage, NR_FILE_PAGES);
19fd62312   Nick Piggin   mm: spinlock tree...
317
  	spin_unlock_irq(&mapping->tree_lock);
b20a35035   Christoph Lameter   [PATCH] page migr...
318
319
320
  
  	return 0;
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
321
322
323
324
  
  /*
   * Copy the page to its new location
   */
e7340f733   Christoph Lameter   [PATCH] page migr...
325
  static void migrate_page_copy(struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
326
  {
b7abea963   KAMEZAWA Hiroyuki   memcg: make page-...
327
  	int anon;
b20a35035   Christoph Lameter   [PATCH] page migr...
328
329
330
331
332
333
334
335
  	copy_highpage(newpage, page);
  
  	if (PageError(page))
  		SetPageError(newpage);
  	if (PageReferenced(page))
  		SetPageReferenced(newpage);
  	if (PageUptodate(page))
  		SetPageUptodate(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
336
337
  	if (TestClearPageActive(page)) {
  		VM_BUG_ON(PageUnevictable(page));
b20a35035   Christoph Lameter   [PATCH] page migr...
338
  		SetPageActive(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
339
340
  	} else
  		unevictable_migrate_page(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
341
342
343
344
345
346
347
  	if (PageChecked(page))
  		SetPageChecked(newpage);
  	if (PageMappedToDisk(page))
  		SetPageMappedToDisk(newpage);
  
  	if (PageDirty(page)) {
  		clear_page_dirty_for_io(page);
3a902c5f6   Nick Piggin   mm: fix warning o...
348
349
350
351
352
353
354
355
  		/*
  		 * Want to mark the page and the radix tree as dirty, and
  		 * redo the accounting that clear_page_dirty_for_io undid,
  		 * but we can't use set_page_dirty because that function
  		 * is actually a signal that all of the page has become dirty.
  		 * Wheras only part of our page may be dirty.
  		 */
  		__set_page_dirty_nobuffers(newpage);
b20a35035   Christoph Lameter   [PATCH] page migr...
356
   	}
b291f0003   Nick Piggin   mlock: mlocked pa...
357
  	mlock_migrate_page(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
358
  	ClearPageSwapCache(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
359
360
  	ClearPagePrivate(page);
  	set_page_private(page, 0);
b7abea963   KAMEZAWA Hiroyuki   memcg: make page-...
361
362
  	/* page->mapping contains a flag for PageAnon() */
  	anon = PageAnon(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
363
  	page->mapping = NULL;
b7abea963   KAMEZAWA Hiroyuki   memcg: make page-...
364
365
  	if (!anon) /* This page was removed from radix-tree. */
  		mem_cgroup_uncharge_cache_page(page);
b20a35035   Christoph Lameter   [PATCH] page migr...
366
367
368
369
370
371
372
  	/*
  	 * If any waiters have accumulated on the new page then
  	 * wake them up.
  	 */
  	if (PageWriteback(newpage))
  		end_page_writeback(newpage);
  }
b20a35035   Christoph Lameter   [PATCH] page migr...
373

1d8b85ccf   Christoph Lameter   [PATCH] page migr...
374
375
376
377
378
  /************************************************************
   *                    Migration functions
   ***********************************************************/
  
  /* Always fail migration. Used for mappings that are not movable */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
379
380
  int fail_migrate_page(struct address_space *mapping,
  			struct page *newpage, struct page *page)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
381
382
383
384
  {
  	return -EIO;
  }
  EXPORT_SYMBOL(fail_migrate_page);
b20a35035   Christoph Lameter   [PATCH] page migr...
385
386
387
388
389
390
  /*
   * Common logic to directly migrate a single page suitable for
   * pages that do not use PagePrivate.
   *
   * Pages are locked upon entry and exit.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
391
392
  int migrate_page(struct address_space *mapping,
  		struct page *newpage, struct page *page)
b20a35035   Christoph Lameter   [PATCH] page migr...
393
394
395
396
  {
  	int rc;
  
  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
397
  	rc = migrate_page_move_mapping(mapping, newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
398
399
400
401
402
  
  	if (rc)
  		return rc;
  
  	migrate_page_copy(newpage, page);
b20a35035   Christoph Lameter   [PATCH] page migr...
403
404
405
  	return 0;
  }
  EXPORT_SYMBOL(migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
406
  #ifdef CONFIG_BLOCK
b20a35035   Christoph Lameter   [PATCH] page migr...
407
  /*
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
408
409
410
411
   * Migration function for pages with buffers. This function can only be used
   * if the underlying filesystem guarantees that no other references to "page"
   * exist.
   */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
412
413
  int buffer_migrate_page(struct address_space *mapping,
  		struct page *newpage, struct page *page)
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
414
  {
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
415
416
  	struct buffer_head *bh, *head;
  	int rc;
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
417
  	if (!page_has_buffers(page))
2d1db3b11   Christoph Lameter   [PATCH] page migr...
418
  		return migrate_page(mapping, newpage, page);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
419
420
  
  	head = page_buffers(page);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
421
  	rc = migrate_page_move_mapping(mapping, newpage, page);
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
  
  	if (rc)
  		return rc;
  
  	bh = head;
  	do {
  		get_bh(bh);
  		lock_buffer(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	ClearPagePrivate(page);
  	set_page_private(newpage, page_private(page));
  	set_page_private(page, 0);
  	put_page(page);
  	get_page(newpage);
  
  	bh = head;
  	do {
  		set_bh_page(bh, newpage, bh_offset(bh));
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	SetPagePrivate(newpage);
  
  	migrate_page_copy(newpage, page);
  
  	bh = head;
  	do {
  		unlock_buffer(bh);
   		put_bh(bh);
  		bh = bh->b_this_page;
  
  	} while (bh != head);
  
  	return 0;
  }
  EXPORT_SYMBOL(buffer_migrate_page);
9361401eb   David Howells   [PATCH] BLOCK: Ma...
462
  #endif
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
463

04e62a29b   Christoph Lameter   [PATCH] More page...
464
465
466
467
  /*
   * Writeback a page to clean the dirty state
   */
  static int writeout(struct address_space *mapping, struct page *page)
8351a6e47   Christoph Lameter   [PATCH] page migr...
468
  {
04e62a29b   Christoph Lameter   [PATCH] More page...
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
  	struct writeback_control wbc = {
  		.sync_mode = WB_SYNC_NONE,
  		.nr_to_write = 1,
  		.range_start = 0,
  		.range_end = LLONG_MAX,
  		.nonblocking = 1,
  		.for_reclaim = 1
  	};
  	int rc;
  
  	if (!mapping->a_ops->writepage)
  		/* No write method for the address space */
  		return -EINVAL;
  
  	if (!clear_page_dirty_for_io(page))
  		/* Someone else already triggered a write */
  		return -EAGAIN;
8351a6e47   Christoph Lameter   [PATCH] page migr...
486
  	/*
04e62a29b   Christoph Lameter   [PATCH] More page...
487
488
489
490
491
492
  	 * A dirty page may imply that the underlying filesystem has
  	 * the page on some queue. So the page must be clean for
  	 * migration. Writeout may mean we loose the lock and the
  	 * page state is no longer what we checked for earlier.
  	 * At this point we know that the migration attempt cannot
  	 * be successful.
8351a6e47   Christoph Lameter   [PATCH] page migr...
493
  	 */
04e62a29b   Christoph Lameter   [PATCH] More page...
494
  	remove_migration_ptes(page, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
495

04e62a29b   Christoph Lameter   [PATCH] More page...
496
  	rc = mapping->a_ops->writepage(page, &wbc);
8351a6e47   Christoph Lameter   [PATCH] page migr...
497

04e62a29b   Christoph Lameter   [PATCH] More page...
498
499
500
  	if (rc != AOP_WRITEPAGE_ACTIVATE)
  		/* unlocked. Relock */
  		lock_page(page);
bda8550de   Hugh Dickins   migration: fix wr...
501
  	return (rc < 0) ? -EIO : -EAGAIN;
04e62a29b   Christoph Lameter   [PATCH] More page...
502
503
504
505
506
507
508
509
510
511
  }
  
  /*
   * Default handling if a filesystem does not provide a migration function.
   */
  static int fallback_migrate_page(struct address_space *mapping,
  	struct page *newpage, struct page *page)
  {
  	if (PageDirty(page))
  		return writeout(mapping, page);
8351a6e47   Christoph Lameter   [PATCH] page migr...
512
513
514
515
516
  
  	/*
  	 * Buffers may be managed in a filesystem specific way.
  	 * We must have no buffers or drop them.
  	 */
b398f6bff   David Howells   [PATCH] BLOCK: St...
517
  	if (PagePrivate(page) &&
8351a6e47   Christoph Lameter   [PATCH] page migr...
518
519
520
521
522
  	    !try_to_release_page(page, GFP_KERNEL))
  		return -EAGAIN;
  
  	return migrate_page(mapping, newpage, page);
  }
1d8b85ccf   Christoph Lameter   [PATCH] page migr...
523
  /*
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
524
525
526
527
528
   * Move a page to a newly allocated page
   * The page is locked and all ptes have been successfully removed.
   *
   * The new page will have replaced the old page if this function
   * is successful.
894bc3104   Lee Schermerhorn   Unevictable LRU I...
529
530
531
532
   *
   * Return value:
   *   < 0 - error code
   *  == 0 - success
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
533
534
535
536
537
538
539
540
541
542
543
   */
  static int move_to_new_page(struct page *newpage, struct page *page)
  {
  	struct address_space *mapping;
  	int rc;
  
  	/*
  	 * Block others from accessing the page when we get around to
  	 * establishing additional references. We are the only one
  	 * holding a reference to the new page at this point.
  	 */
529ae9aaa   Nick Piggin   mm: rename page t...
544
  	if (!trylock_page(newpage))
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
545
546
547
548
549
  		BUG();
  
  	/* Prepare mapping for the new page.*/
  	newpage->index = page->index;
  	newpage->mapping = page->mapping;
b2e185384   Rik van Riel   define page_file_...
550
551
  	if (PageSwapBacked(page))
  		SetPageSwapBacked(newpage);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
  
  	mapping = page_mapping(page);
  	if (!mapping)
  		rc = migrate_page(mapping, newpage, page);
  	else if (mapping->a_ops->migratepage)
  		/*
  		 * Most pages have a mapping and most filesystems
  		 * should provide a migration function. Anonymous
  		 * pages are part of swap space which also has its
  		 * own migration function. This is the most common
  		 * path for page migration.
  		 */
  		rc = mapping->a_ops->migratepage(mapping,
  						newpage, page);
  	else
  		rc = fallback_migrate_page(mapping, newpage, page);
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
568
  	if (!rc) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
569
  		remove_migration_ptes(page, newpage);
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
570
  	} else
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
571
572
573
574
575
576
577
578
579
580
581
  		newpage->mapping = NULL;
  
  	unlock_page(newpage);
  
  	return rc;
  }
  
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
   */
95a402c38   Christoph Lameter   [PATCH] page migr...
582
583
  static int unmap_and_move(new_page_t get_new_page, unsigned long private,
  			struct page *page, int force)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
584
585
  {
  	int rc = 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
586
587
  	int *result = NULL;
  	struct page *newpage = get_new_page(page, private, &result);
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
588
  	int rcu_locked = 0;
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
589
  	int charge = 0;
95a402c38   Christoph Lameter   [PATCH] page migr...
590
591
592
  
  	if (!newpage)
  		return -ENOMEM;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
593

894bc3104   Lee Schermerhorn   Unevictable LRU I...
594
  	if (page_count(page) == 1) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
595
  		/* page was freed from under us. So we are done. */
95a402c38   Christoph Lameter   [PATCH] page migr...
596
  		goto move_newpage;
894bc3104   Lee Schermerhorn   Unevictable LRU I...
597
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
598

e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
599
600
601
602
603
604
605
  	charge = mem_cgroup_prepare_migration(page, newpage);
  	if (charge == -ENOMEM) {
  		rc = -ENOMEM;
  		goto move_newpage;
  	}
  	/* prepare cgroup just returns 0 or -ENOMEM */
  	BUG_ON(charge);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
606
  	rc = -EAGAIN;
529ae9aaa   Nick Piggin   mm: rename page t...
607
  	if (!trylock_page(page)) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
608
  		if (!force)
95a402c38   Christoph Lameter   [PATCH] page migr...
609
  			goto move_newpage;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
610
611
612
613
614
615
616
617
  		lock_page(page);
  	}
  
  	if (PageWriteback(page)) {
  		if (!force)
  			goto unlock;
  		wait_on_page_writeback(page);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
618
  	/*
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
619
620
621
622
  	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  	 * we cannot notice that anon_vma is freed while we migrates a page.
  	 * This rcu_read_lock() delays freeing anon_vma pointer until the end
  	 * of migration. File cache pages are no problem because of page_lock()
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
623
624
  	 * File Caches may use write_page() or lock_page() in migration, then,
  	 * just care Anon page here.
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
625
  	 */
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
626
627
628
629
  	if (PageAnon(page)) {
  		rcu_read_lock();
  		rcu_locked = 1;
  	}
62e1c5530   Shaohua Li   page migraton: ha...
630

dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
631
  	/*
62e1c5530   Shaohua Li   page migraton: ha...
632
633
634
635
636
637
638
639
640
641
  	 * Corner case handling:
  	 * 1. When a new swap-cache page is read into, it is added to the LRU
  	 * and treated as swapcache but it has no rmap yet.
  	 * Calling try_to_unmap() against a page->mapping==NULL page will
  	 * trigger a BUG.  So handle it here.
  	 * 2. An orphaned page (see truncate_complete_page) might have
  	 * fs-private metadata. The page can be picked up due to memory
  	 * offlining.  Everywhere else except page reclaim, the page is
  	 * invisible to the vm, so the page can not be migrated.  So try to
  	 * free the metadata, so the page can be freed.
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
642
  	 */
62e1c5530   Shaohua Li   page migraton: ha...
643
644
645
646
647
648
649
650
651
652
653
  	if (!page->mapping) {
  		if (!PageAnon(page) && PagePrivate(page)) {
  			/*
  			 * Go direct to try_to_free_buffers() here because
  			 * a) that's what try_to_release_page() would do anyway
  			 * b) we may be under rcu_read_lock() here, so we can't
  			 *    use GFP_KERNEL which is what try_to_release_page()
  			 *    needs to be effective.
  			 */
  			try_to_free_buffers(page);
  		}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
654
  		goto rcu_unlock;
62e1c5530   Shaohua Li   page migraton: ha...
655
  	}
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
656
  	/* Establish migration ptes or remove ptes */
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
657
  	try_to_unmap(page, 1);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
658

e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
659
660
  	if (!page_mapped(page))
  		rc = move_to_new_page(newpage, page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
661

e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
662
  	if (rc)
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
663
  		remove_migration_ptes(page, page);
dc386d4d1   KAMEZAWA Hiroyuki   memory unplug: mi...
664
  rcu_unlock:
989f89c57   KAMEZAWA Hiroyuki   fix rcu_read_lock...
665
666
  	if (rcu_locked)
  		rcu_read_unlock();
e6a1530d6   Christoph Lameter   [PATCH] Allow mig...
667

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
668
669
  unlock:
  	unlock_page(page);
95a402c38   Christoph Lameter   [PATCH] page migr...
670

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
671
  	if (rc != -EAGAIN) {
aaa994b30   Christoph Lameter   [PATCH] page migr...
672
673
674
675
676
677
678
   		/*
   		 * A page that has been migrated has all references
   		 * removed and will be freed. A page that has not been
   		 * migrated will have kepts its references and be
   		 * restored.
   		 */
   		list_del(&page->lru);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
679
  		putback_lru_page(page);
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
680
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
681
682
  
  move_newpage:
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
683
684
  	if (!charge)
  		mem_cgroup_end_migration(newpage);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
685

95a402c38   Christoph Lameter   [PATCH] page migr...
686
687
688
689
  	/*
  	 * Move the new page to the LRU. If migration was not successful
  	 * then this will free the page.
  	 */
894bc3104   Lee Schermerhorn   Unevictable LRU I...
690
  	putback_lru_page(newpage);
742755a1d   Christoph Lameter   [PATCH] page migr...
691
692
693
694
695
696
  	if (result) {
  		if (rc)
  			*result = rc;
  		else
  			*result = page_to_nid(newpage);
  	}
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
697
698
699
700
  	return rc;
  }
  
  /*
b20a35035   Christoph Lameter   [PATCH] page migr...
701
702
   * migrate_pages
   *
95a402c38   Christoph Lameter   [PATCH] page migr...
703
704
705
   * The function takes one list of pages to migrate and a function
   * that determines from the page to be migrated and the private data
   * the target of the move and allocates the page.
b20a35035   Christoph Lameter   [PATCH] page migr...
706
707
708
   *
   * The function returns after 10 attempts or if no pages
   * are movable anymore because to has become empty
aaa994b30   Christoph Lameter   [PATCH] page migr...
709
   * or no retryable pages exist anymore. All pages will be
e9534b3fd   Gabriel Craciunescu   Typo fixes retrun...
710
   * returned to the LRU or freed.
b20a35035   Christoph Lameter   [PATCH] page migr...
711
   *
95a402c38   Christoph Lameter   [PATCH] page migr...
712
   * Return: Number of pages not migrated or error code.
b20a35035   Christoph Lameter   [PATCH] page migr...
713
   */
95a402c38   Christoph Lameter   [PATCH] page migr...
714
715
  int migrate_pages(struct list_head *from,
  		new_page_t get_new_page, unsigned long private)
b20a35035   Christoph Lameter   [PATCH] page migr...
716
  {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
717
  	int retry = 1;
b20a35035   Christoph Lameter   [PATCH] page migr...
718
719
720
721
722
723
724
725
726
  	int nr_failed = 0;
  	int pass = 0;
  	struct page *page;
  	struct page *page2;
  	int swapwrite = current->flags & PF_SWAPWRITE;
  	int rc;
  
  	if (!swapwrite)
  		current->flags |= PF_SWAPWRITE;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
727
728
  	for(pass = 0; pass < 10 && retry; pass++) {
  		retry = 0;
b20a35035   Christoph Lameter   [PATCH] page migr...
729

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
730
  		list_for_each_entry_safe(page, page2, from, lru) {
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
731
  			cond_resched();
2d1db3b11   Christoph Lameter   [PATCH] page migr...
732

95a402c38   Christoph Lameter   [PATCH] page migr...
733
734
  			rc = unmap_and_move(get_new_page, private,
  						page, pass > 2);
2d1db3b11   Christoph Lameter   [PATCH] page migr...
735

e24f0b8f7   Christoph Lameter   [PATCH] page migr...
736
  			switch(rc) {
95a402c38   Christoph Lameter   [PATCH] page migr...
737
738
  			case -ENOMEM:
  				goto out;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
739
  			case -EAGAIN:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
740
  				retry++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
741
742
  				break;
  			case 0:
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
743
744
  				break;
  			default:
2d1db3b11   Christoph Lameter   [PATCH] page migr...
745
  				/* Permanent failure */
2d1db3b11   Christoph Lameter   [PATCH] page migr...
746
  				nr_failed++;
e24f0b8f7   Christoph Lameter   [PATCH] page migr...
747
  				break;
2d1db3b11   Christoph Lameter   [PATCH] page migr...
748
  			}
b20a35035   Christoph Lameter   [PATCH] page migr...
749
750
  		}
  	}
95a402c38   Christoph Lameter   [PATCH] page migr...
751
752
  	rc = 0;
  out:
b20a35035   Christoph Lameter   [PATCH] page migr...
753
754
  	if (!swapwrite)
  		current->flags &= ~PF_SWAPWRITE;
aaa994b30   Christoph Lameter   [PATCH] page migr...
755
  	putback_lru_pages(from);
b20a35035   Christoph Lameter   [PATCH] page migr...
756

95a402c38   Christoph Lameter   [PATCH] page migr...
757
758
  	if (rc)
  		return rc;
b20a35035   Christoph Lameter   [PATCH] page migr...
759

95a402c38   Christoph Lameter   [PATCH] page migr...
760
  	return nr_failed + retry;
b20a35035   Christoph Lameter   [PATCH] page migr...
761
  }
95a402c38   Christoph Lameter   [PATCH] page migr...
762

742755a1d   Christoph Lameter   [PATCH] page migr...
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
  #ifdef CONFIG_NUMA
  /*
   * Move a list of individual pages
   */
  struct page_to_node {
  	unsigned long addr;
  	struct page *page;
  	int node;
  	int status;
  };
  
  static struct page *new_page_node(struct page *p, unsigned long private,
  		int **result)
  {
  	struct page_to_node *pm = (struct page_to_node *)private;
  
  	while (pm->node != MAX_NUMNODES && pm->page != p)
  		pm++;
  
  	if (pm->node == MAX_NUMNODES)
  		return NULL;
  
  	*result = &pm->status;
769848c03   Mel Gorman   Add __GFP_MOVABLE...
786
787
  	return alloc_pages_node(pm->node,
  				GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
742755a1d   Christoph Lameter   [PATCH] page migr...
788
789
790
791
792
793
  }
  
  /*
   * Move a set of pages as indicated in the pm array. The addr
   * field must be set to the virtual address of the page to be moved
   * and the node number must contain a valid target node.
5e9a0f023   Brice Goglin   mm: extract do_pa...
794
   * The pm array ends with node = MAX_NUMNODES.
742755a1d   Christoph Lameter   [PATCH] page migr...
795
   */
5e9a0f023   Brice Goglin   mm: extract do_pa...
796
797
798
  static int do_move_page_to_node_array(struct mm_struct *mm,
  				      struct page_to_node *pm,
  				      int migrate_all)
742755a1d   Christoph Lameter   [PATCH] page migr...
799
800
801
802
  {
  	int err;
  	struct page_to_node *pp;
  	LIST_HEAD(pagelist);
0aedadf91   Christoph Lameter   mm: move migrate_...
803
  	migrate_prep();
742755a1d   Christoph Lameter   [PATCH] page migr...
804
805
806
807
808
  	down_read(&mm->mmap_sem);
  
  	/*
  	 * Build a list of pages to migrate
  	 */
742755a1d   Christoph Lameter   [PATCH] page migr...
809
810
811
  	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
  		struct vm_area_struct *vma;
  		struct page *page;
742755a1d   Christoph Lameter   [PATCH] page migr...
812
813
  		err = -EFAULT;
  		vma = find_vma(mm, pp->addr);
0dc952dc3   Christoph Lameter   [PATCH] Page migr...
814
  		if (!vma || !vma_migratable(vma))
742755a1d   Christoph Lameter   [PATCH] page migr...
815
816
817
  			goto set_status;
  
  		page = follow_page(vma, pp->addr, FOLL_GET);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
818
819
820
821
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
  		err = -ENOENT;
  		if (!page)
  			goto set_status;
  
  		if (PageReserved(page))		/* Check for zero page */
  			goto put_and_set;
  
  		pp->page = page;
  		err = page_to_nid(page);
  
  		if (err == pp->node)
  			/*
  			 * Node already in the right place
  			 */
  			goto put_and_set;
  
  		err = -EACCES;
  		if (page_mapcount(page) > 1 &&
  				!migrate_all)
  			goto put_and_set;
62695a84e   Nick Piggin   vmscan: move isol...
842
843
844
  		err = isolate_lru_page(page);
  		if (!err)
  			list_add_tail(&page->lru, &pagelist);
742755a1d   Christoph Lameter   [PATCH] page migr...
845
846
847
848
849
850
851
852
853
854
  put_and_set:
  		/*
  		 * Either remove the duplicate refcount from
  		 * isolate_lru_page() or drop the page ref if it was
  		 * not isolated.
  		 */
  		put_page(page);
  set_status:
  		pp->status = err;
  	}
e78bbfa82   Brice Goglin   mm: stop returnin...
855
  	err = 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
856
857
858
  	if (!list_empty(&pagelist))
  		err = migrate_pages(&pagelist, new_page_node,
  				(unsigned long)pm);
742755a1d   Christoph Lameter   [PATCH] page migr...
859
860
861
862
863
864
  
  	up_read(&mm->mmap_sem);
  	return err;
  }
  
  /*
5e9a0f023   Brice Goglin   mm: extract do_pa...
865
866
867
868
869
870
871
872
873
   * Migrate an array of page address onto an array of nodes and fill
   * the corresponding array of status.
   */
  static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
  			 unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 const int __user *nodes,
  			 int __user *status, int flags)
  {
3140a2273   Brice Goglin   mm: rework do_pag...
874
  	struct page_to_node *pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
875
  	nodemask_t task_nodes;
3140a2273   Brice Goglin   mm: rework do_pag...
876
877
878
  	unsigned long chunk_nr_pages;
  	unsigned long chunk_start;
  	int err;
5e9a0f023   Brice Goglin   mm: extract do_pa...
879
880
  
  	task_nodes = cpuset_mems_allowed(task);
3140a2273   Brice Goglin   mm: rework do_pag...
881
882
883
  	err = -ENOMEM;
  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
  	if (!pm)
5e9a0f023   Brice Goglin   mm: extract do_pa...
884
  		goto out;
5e9a0f023   Brice Goglin   mm: extract do_pa...
885
  	/*
3140a2273   Brice Goglin   mm: rework do_pag...
886
887
  	 * Store a chunk of page_to_node array in a page,
  	 * but keep the last one as a marker
5e9a0f023   Brice Goglin   mm: extract do_pa...
888
  	 */
3140a2273   Brice Goglin   mm: rework do_pag...
889
  	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
5e9a0f023   Brice Goglin   mm: extract do_pa...
890

3140a2273   Brice Goglin   mm: rework do_pag...
891
892
893
894
  	for (chunk_start = 0;
  	     chunk_start < nr_pages;
  	     chunk_start += chunk_nr_pages) {
  		int j;
5e9a0f023   Brice Goglin   mm: extract do_pa...
895

3140a2273   Brice Goglin   mm: rework do_pag...
896
897
898
899
900
901
  		if (chunk_start + chunk_nr_pages > nr_pages)
  			chunk_nr_pages = nr_pages - chunk_start;
  
  		/* fill the chunk pm with addrs and nodes from user-space */
  		for (j = 0; j < chunk_nr_pages; j++) {
  			const void __user *p;
5e9a0f023   Brice Goglin   mm: extract do_pa...
902
  			int node;
3140a2273   Brice Goglin   mm: rework do_pag...
903
904
905
906
907
908
  			err = -EFAULT;
  			if (get_user(p, pages + j + chunk_start))
  				goto out_pm;
  			pm[j].addr = (unsigned long) p;
  
  			if (get_user(node, nodes + j + chunk_start))
5e9a0f023   Brice Goglin   mm: extract do_pa...
909
910
911
912
913
914
915
916
917
  				goto out_pm;
  
  			err = -ENODEV;
  			if (!node_state(node, N_HIGH_MEMORY))
  				goto out_pm;
  
  			err = -EACCES;
  			if (!node_isset(node, task_nodes))
  				goto out_pm;
3140a2273   Brice Goglin   mm: rework do_pag...
918
919
920
921
922
923
924
925
926
927
928
  			pm[j].node = node;
  		}
  
  		/* End marker for this chunk */
  		pm[chunk_nr_pages].node = MAX_NUMNODES;
  
  		/* Migrate this chunk */
  		err = do_move_page_to_node_array(mm, pm,
  						 flags & MPOL_MF_MOVE_ALL);
  		if (err < 0)
  			goto out_pm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
929

5e9a0f023   Brice Goglin   mm: extract do_pa...
930
  		/* Return status information */
3140a2273   Brice Goglin   mm: rework do_pag...
931
932
  		for (j = 0; j < chunk_nr_pages; j++)
  			if (put_user(pm[j].status, status + j + chunk_start)) {
5e9a0f023   Brice Goglin   mm: extract do_pa...
933
  				err = -EFAULT;
3140a2273   Brice Goglin   mm: rework do_pag...
934
935
936
937
  				goto out_pm;
  			}
  	}
  	err = 0;
5e9a0f023   Brice Goglin   mm: extract do_pa...
938
939
  
  out_pm:
3140a2273   Brice Goglin   mm: rework do_pag...
940
  	free_page((unsigned long)pm);
5e9a0f023   Brice Goglin   mm: extract do_pa...
941
942
943
944
945
  out:
  	return err;
  }
  
  /*
2f007e74b   Brice Goglin   mm: don't vmalloc...
946
   * Determine the nodes of an array of pages and store it in an array of status.
742755a1d   Christoph Lameter   [PATCH] page migr...
947
   */
80bba1290   Brice Goglin   mm: no get_user/p...
948
949
  static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
  				const void __user **pages, int *status)
742755a1d   Christoph Lameter   [PATCH] page migr...
950
  {
2f007e74b   Brice Goglin   mm: don't vmalloc...
951
  	unsigned long i;
2f007e74b   Brice Goglin   mm: don't vmalloc...
952

742755a1d   Christoph Lameter   [PATCH] page migr...
953
  	down_read(&mm->mmap_sem);
2f007e74b   Brice Goglin   mm: don't vmalloc...
954
  	for (i = 0; i < nr_pages; i++) {
80bba1290   Brice Goglin   mm: no get_user/p...
955
  		unsigned long addr = (unsigned long)(*pages);
742755a1d   Christoph Lameter   [PATCH] page migr...
956
957
  		struct vm_area_struct *vma;
  		struct page *page;
c095adbc2   KOSAKI Motohiro   mm: Don't touch u...
958
  		int err = -EFAULT;
2f007e74b   Brice Goglin   mm: don't vmalloc...
959
960
  
  		vma = find_vma(mm, addr);
742755a1d   Christoph Lameter   [PATCH] page migr...
961
962
  		if (!vma)
  			goto set_status;
2f007e74b   Brice Goglin   mm: don't vmalloc...
963
  		page = follow_page(vma, addr, 0);
89f5b7da2   Linus Torvalds   Reinstate ZERO_PA...
964
965
966
967
  
  		err = PTR_ERR(page);
  		if (IS_ERR(page))
  			goto set_status;
742755a1d   Christoph Lameter   [PATCH] page migr...
968
969
970
971
972
973
974
  		err = -ENOENT;
  		/* Use PageReserved to check for zero page */
  		if (!page || PageReserved(page))
  			goto set_status;
  
  		err = page_to_nid(page);
  set_status:
80bba1290   Brice Goglin   mm: no get_user/p...
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
  		*status = err;
  
  		pages++;
  		status++;
  	}
  
  	up_read(&mm->mmap_sem);
  }
  
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
   */
  static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
  			 const void __user * __user *pages,
  			 int __user *status)
  {
  #define DO_PAGES_STAT_CHUNK_NR 16
  	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
  	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
  	unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  	int err;
  
  	for (i = 0; i < nr_pages; i += chunk_nr) {
  		if (chunk_nr + i > nr_pages)
  			chunk_nr = nr_pages - i;
  
  		err = copy_from_user(chunk_pages, &pages[i],
  				     chunk_nr * sizeof(*chunk_pages));
  		if (err) {
  			err = -EFAULT;
  			goto out;
  		}
  
  		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
  
  		err = copy_to_user(&status[i], chunk_status,
  				   chunk_nr * sizeof(*chunk_status));
  		if (err) {
  			err = -EFAULT;
  			goto out;
  		}
742755a1d   Christoph Lameter   [PATCH] page migr...
1017
  	}
2f007e74b   Brice Goglin   mm: don't vmalloc...
1018
  	err = 0;
742755a1d   Christoph Lameter   [PATCH] page migr...
1019

2f007e74b   Brice Goglin   mm: don't vmalloc...
1020
  out:
2f007e74b   Brice Goglin   mm: don't vmalloc...
1021
  	return err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
  }
  
  /*
   * Move a list of pages in the address space of the currently executing
   * process.
   */
  asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
  			const void __user * __user *pages,
  			const int __user *nodes,
  			int __user *status, int flags)
  {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1033
  	const struct cred *cred = current_cred(), *tcred;
742755a1d   Christoph Lameter   [PATCH] page migr...
1034
  	struct task_struct *task;
742755a1d   Christoph Lameter   [PATCH] page migr...
1035
  	struct mm_struct *mm;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1036
  	int err;
742755a1d   Christoph Lameter   [PATCH] page migr...
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
  
  	/* Check flags */
  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
  		return -EINVAL;
  
  	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
  		return -EPERM;
  
  	/* Find the mm_struct */
  	read_lock(&tasklist_lock);
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
1047
  	task = pid ? find_task_by_vpid(pid) : current;
742755a1d   Christoph Lameter   [PATCH] page migr...
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
  	if (!task) {
  		read_unlock(&tasklist_lock);
  		return -ESRCH;
  	}
  	mm = get_task_mm(task);
  	read_unlock(&tasklist_lock);
  
  	if (!mm)
  		return -EINVAL;
  
  	/*
  	 * Check if this process has the right to modify the specified
  	 * process. The right exists if the process has administrative
  	 * capabilities, superuser privileges or the same
  	 * userid as the target process.
  	 */
c69e8d9c0   David Howells   CRED: Use RCU to ...
1064
1065
  	rcu_read_lock();
  	tcred = __task_cred(task);
b6dff3ec5   David Howells   CRED: Separate ta...
1066
1067
  	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
  	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
742755a1d   Christoph Lameter   [PATCH] page migr...
1068
  	    !capable(CAP_SYS_NICE)) {
c69e8d9c0   David Howells   CRED: Use RCU to ...
1069
  		rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1070
  		err = -EPERM;
5e9a0f023   Brice Goglin   mm: extract do_pa...
1071
  		goto out;
742755a1d   Christoph Lameter   [PATCH] page migr...
1072
  	}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1073
  	rcu_read_unlock();
742755a1d   Christoph Lameter   [PATCH] page migr...
1074

86c3a7645   David Quigley   [PATCH] SELinux: ...
1075
1076
   	err = security_task_movememory(task);
   	if (err)
5e9a0f023   Brice Goglin   mm: extract do_pa...
1077
  		goto out;
86c3a7645   David Quigley   [PATCH] SELinux: ...
1078

5e9a0f023   Brice Goglin   mm: extract do_pa...
1079
1080
1081
1082
  	if (nodes) {
  		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
  				    flags);
  	} else {
2f007e74b   Brice Goglin   mm: don't vmalloc...
1083
  		err = do_pages_stat(mm, nr_pages, pages, status);
742755a1d   Christoph Lameter   [PATCH] page migr...
1084
  	}
742755a1d   Christoph Lameter   [PATCH] page migr...
1085
  out:
742755a1d   Christoph Lameter   [PATCH] page migr...
1086
1087
1088
  	mmput(mm);
  	return err;
  }
742755a1d   Christoph Lameter   [PATCH] page migr...
1089

7b2259b3e   Christoph Lameter   [PATCH] page migr...
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
  /*
   * Call migration functions in the vma_ops that may prepare
   * memory in a vm for migration. migration functions may perform
   * the migration for vmas that do not have an underlying page struct.
   */
  int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	const nodemask_t *from, unsigned long flags)
  {
   	struct vm_area_struct *vma;
   	int err = 0;
  
   	for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) {
   		if (vma->vm_ops && vma->vm_ops->migrate) {
   			err = vma->vm_ops->migrate(vma, to, from, flags);
   			if (err)
   				break;
   		}
   	}
   	return err;
  }
83d1674a9   Gerald Schaefer   mm: make CONFIG_M...
1110
  #endif