Commit 053837fce7aa79025ed57656855df09f80175527

Authored by Nick Piggin
Committed by Linus Torvalds
1 parent e236a166b2

[PATCH] mm: migration page refcounting fix

Migration code currently does not take a reference to target page
properly, so between unlocking the pte and trying to take a new
reference to the page with isolate_lru_page, anything could happen to
it.

Fix this by holding the pte lock until we get a chance to elevate the
refcount.

Other small cleanups while we're here.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 7 changed files with 76 additions and 75 deletions Side-by-side Diff

include/linux/mm_inline.h
... ... @@ -38,26 +38,4 @@
38 38 zone->nr_inactive--;
39 39 }
40 40 }
41   -
42   -/*
43   - * Isolate one page from the LRU lists.
44   - *
45   - * - zone->lru_lock must be held
46   - */
47   -static inline int __isolate_lru_page(struct page *page)
48   -{
49   - if (unlikely(!TestClearPageLRU(page)))
50   - return 0;
51   -
52   - if (get_page_testone(page)) {
53   - /*
54   - * It is being freed elsewhere
55   - */
56   - __put_page(page);
57   - SetPageLRU(page);
58   - return -ENOENT;
59   - }
60   -
61   - return 1;
62   -}
include/linux/swap.h
... ... @@ -167,6 +167,7 @@
167 167 extern void FASTCALL(activate_page(struct page *));
168 168 extern void FASTCALL(mark_page_accessed(struct page *));
169 169 extern void lru_add_drain(void);
  170 +extern int lru_add_drain_all(void);
170 171 extern int rotate_reclaimable_page(struct page *page);
171 172 extern void swap_setup(void);
172 173  
... ... @@ -94,6 +94,7 @@
94 94 * ->private_lock (try_to_unmap_one)
95 95 * ->tree_lock (try_to_unmap_one)
96 96 * ->zone.lru_lock (follow_page->mark_page_accessed)
  97 + * ->zone.lru_lock (check_pte_range->isolate_lru_page)
97 98 * ->private_lock (page_remove_rmap->set_page_dirty)
98 99 * ->tree_lock (page_remove_rmap->set_page_dirty)
99 100 * ->inode_lock (page_remove_rmap->set_page_dirty)
... ... @@ -208,6 +208,17 @@
208 208 page = vm_normal_page(vma, addr, *pte);
209 209 if (!page)
210 210 continue;
  211 + /*
  212 + * The check for PageReserved here is important to avoid
  213 + * handling zero pages and other pages that may have been
  214 + * marked special by the system.
  215 + *
  216 + * If the PageReserved would not be checked here then f.e.
  217 + * the location of the zero page could have an influence
  218 + * on MPOL_MF_STRICT, zero pages would be counted for
  219 + * the per node stats, and there would be useless attempts
  220 + * to put zero pages on the migration list.
  221 + */
211 222 if (PageReserved(page))
212 223 continue;
213 224 nid = page_to_nid(page);
214 225  
... ... @@ -216,11 +227,8 @@
216 227  
217 228 if (flags & MPOL_MF_STATS)
218 229 gather_stats(page, private);
219   - else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
220   - spin_unlock(ptl);
  230 + else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
221 231 migrate_page_add(vma, page, private, flags);
222   - spin_lock(ptl);
223   - }
224 232 else
225 233 break;
226 234 } while (pte++, addr += PAGE_SIZE, addr != end);
... ... @@ -309,6 +317,10 @@
309 317 int err;
310 318 struct vm_area_struct *first, *vma, *prev;
311 319  
  320 + /* Clear the LRU lists so pages can be isolated */
  321 + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
  322 + lru_add_drain_all();
  323 +
312 324 first = find_vma(mm, start);
313 325 if (!first)
314 326 return ERR_PTR(-EFAULT);
315 327  
... ... @@ -555,15 +567,8 @@
555 567 if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
556 568 mapping_writably_mapped(page->mapping) ||
557 569 single_mm_mapping(vma->vm_mm, page->mapping)) {
558   - int rc = isolate_lru_page(page);
559   -
560   - if (rc == 1)
  570 + if (isolate_lru_page(page))
561 571 list_add(&page->lru, pagelist);
562   - /*
563   - * If the isolate attempt was not successful then we just
564   - * encountered an unswappable page. Something must be wrong.
565   - */
566   - WARN_ON(rc == 0);
567 572 }
568 573 }
569 574  
... ... @@ -33,7 +33,7 @@
33 33 * mapping->i_mmap_lock
34 34 * anon_vma->lock
35 35 * mm->page_table_lock or pte_lock
36   - * zone->lru_lock (in mark_page_accessed)
  36 + * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
37 37 * swap_lock (in swap_duplicate, swap_info_get)
38 38 * mmlist_lock (in mmput, drain_mmlist and others)
39 39 * mapping->private_lock (in __set_page_dirty_buffers)
... ... @@ -174,6 +174,32 @@
174 174 put_cpu();
175 175 }
176 176  
  177 +#ifdef CONFIG_NUMA
  178 +static void lru_add_drain_per_cpu(void *dummy)
  179 +{
  180 + lru_add_drain();
  181 +}
  182 +
  183 +/*
  184 + * Returns 0 for success
  185 + */
  186 +int lru_add_drain_all(void)
  187 +{
  188 + return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
  189 +}
  190 +
  191 +#else
  192 +
  193 +/*
  194 + * Returns 0 for success
  195 + */
  196 +int lru_add_drain_all(void)
  197 +{
  198 + lru_add_drain();
  199 + return 0;
  200 +}
  201 +#endif
  202 +
177 203 /*
178 204 * This path almost never happens for VM activity - pages are normally
179 205 * freed via pagevecs. But it gets used by networking.
... ... @@ -586,7 +586,7 @@
586 586 }
587 587  
588 588 /*
589   - * Add isolated pages on the list back to the LRU
  589 + * Add isolated pages on the list back to the LRU.
590 590 *
591 591 * returns the number of pages put back.
592 592 */
593 593  
594 594  
595 595  
596 596  
597 597  
... ... @@ -760,46 +760,33 @@
760 760 return nr_failed + retry;
761 761 }
762 762  
763   -static void lru_add_drain_per_cpu(void *dummy)
764   -{
765   - lru_add_drain();
766   -}
767   -
768 763 /*
769 764 * Isolate one page from the LRU lists and put it on the
770   - * indicated list. Do necessary cache draining if the
771   - * page is not on the LRU lists yet.
  765 + * indicated list with elevated refcount.
772 766 *
773 767 * Result:
774 768 * 0 = page not on LRU list
775 769 * 1 = page removed from LRU list and added to the specified list.
776   - * -ENOENT = page is being freed elsewhere.
777 770 */
778 771 int isolate_lru_page(struct page *page)
779 772 {
780   - int rc = 0;
781   - struct zone *zone = page_zone(page);
  773 + int ret = 0;
782 774  
783   -redo:
784   - spin_lock_irq(&zone->lru_lock);
785   - rc = __isolate_lru_page(page);
786   - if (rc == 1) {
787   - if (PageActive(page))
788   - del_page_from_active_list(zone, page);
789   - else
790   - del_page_from_inactive_list(zone, page);
  775 + if (PageLRU(page)) {
  776 + struct zone *zone = page_zone(page);
  777 + spin_lock_irq(&zone->lru_lock);
  778 + if (TestClearPageLRU(page)) {
  779 + ret = 1;
  780 + get_page(page);
  781 + if (PageActive(page))
  782 + del_page_from_active_list(zone, page);
  783 + else
  784 + del_page_from_inactive_list(zone, page);
  785 + }
  786 + spin_unlock_irq(&zone->lru_lock);
791 787 }
792   - spin_unlock_irq(&zone->lru_lock);
793   - if (rc == 0) {
794   - /*
795   - * Maybe this page is still waiting for a cpu to drain it
796   - * from one of the lru lists?
797   - */
798   - rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
799   - if (rc == 0 && PageLRU(page))
800   - goto redo;
801   - }
802   - return rc;
  788 +
  789 + return ret;
803 790 }
804 791 #endif
805 792  
806 793  
... ... @@ -831,18 +818,20 @@
831 818 page = lru_to_page(src);
832 819 prefetchw_prev_lru_page(page, src, flags);
833 820  
834   - switch (__isolate_lru_page(page)) {
835   - case 1:
836   - /* Succeeded to isolate page */
837   - list_move(&page->lru, dst);
838   - nr_taken++;
839   - break;
840   - case -ENOENT:
841   - /* Not possible to isolate */
842   - list_move(&page->lru, src);
843   - break;
844   - default:
  821 + if (!TestClearPageLRU(page))
845 822 BUG();
  823 + list_del(&page->lru);
  824 + if (get_page_testone(page)) {
  825 + /*
  826 + * It is being freed elsewhere
  827 + */
  828 + __put_page(page);
  829 + SetPageLRU(page);
  830 + list_add(&page->lru, src);
  831 + continue;
  832 + } else {
  833 + list_add(&page->lru, dst);
  834 + nr_taken++;
846 835 }
847 836 }
848 837