Commit 053837fce7aa79025ed57656855df09f80175527
Committed by
Linus Torvalds
1 parent
e236a166b2
Exists in
master
and in
20 other branches
[PATCH] mm: migration page refcounting fix
Migration code currently does not take a reference to target page properly, so between unlocking the pte and trying to take a new reference to the page with isolate_lru_page, anything could happen to it. Fix this by holding the pte lock until we get a chance to elevate the refcount. Other small cleanups while we're here. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 7 changed files with 76 additions and 75 deletions Side-by-side Diff
include/linux/mm_inline.h
... | ... | @@ -38,26 +38,4 @@ |
38 | 38 | zone->nr_inactive--; |
39 | 39 | } |
40 | 40 | } |
41 | - | |
42 | -/* | |
43 | - * Isolate one page from the LRU lists. | |
44 | - * | |
45 | - * - zone->lru_lock must be held | |
46 | - */ | |
47 | -static inline int __isolate_lru_page(struct page *page) | |
48 | -{ | |
49 | - if (unlikely(!TestClearPageLRU(page))) | |
50 | - return 0; | |
51 | - | |
52 | - if (get_page_testone(page)) { | |
53 | - /* | |
54 | - * It is being freed elsewhere | |
55 | - */ | |
56 | - __put_page(page); | |
57 | - SetPageLRU(page); | |
58 | - return -ENOENT; | |
59 | - } | |
60 | - | |
61 | - return 1; | |
62 | -} |
include/linux/swap.h
... | ... | @@ -167,6 +167,7 @@ |
167 | 167 | extern void FASTCALL(activate_page(struct page *)); |
168 | 168 | extern void FASTCALL(mark_page_accessed(struct page *)); |
169 | 169 | extern void lru_add_drain(void); |
170 | +extern int lru_add_drain_all(void); | |
170 | 171 | extern int rotate_reclaimable_page(struct page *page); |
171 | 172 | extern void swap_setup(void); |
172 | 173 |
mm/filemap.c
... | ... | @@ -94,6 +94,7 @@ |
94 | 94 | * ->private_lock (try_to_unmap_one) |
95 | 95 | * ->tree_lock (try_to_unmap_one) |
96 | 96 | * ->zone.lru_lock (follow_page->mark_page_accessed) |
97 | + * ->zone.lru_lock (check_pte_range->isolate_lru_page) | |
97 | 98 | * ->private_lock (page_remove_rmap->set_page_dirty) |
98 | 99 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
99 | 100 | * ->inode_lock (page_remove_rmap->set_page_dirty) |
mm/mempolicy.c
... | ... | @@ -208,6 +208,17 @@ |
208 | 208 | page = vm_normal_page(vma, addr, *pte); |
209 | 209 | if (!page) |
210 | 210 | continue; |
211 | + /* | |
212 | + * The check for PageReserved here is important to avoid | |
213 | + * handling zero pages and other pages that may have been | |
214 | + * marked special by the system. | |
215 | + * | |
216 | + * If the PageReserved would not be checked here then f.e. | |
217 | + * the location of the zero page could have an influence | |
218 | + * on MPOL_MF_STRICT, zero pages would be counted for | |
219 | + * the per node stats, and there would be useless attempts | |
220 | + * to put zero pages on the migration list. | |
221 | + */ | |
211 | 222 | if (PageReserved(page)) |
212 | 223 | continue; |
213 | 224 | nid = page_to_nid(page); |
214 | 225 | |
... | ... | @@ -216,11 +227,8 @@ |
216 | 227 | |
217 | 228 | if (flags & MPOL_MF_STATS) |
218 | 229 | gather_stats(page, private); |
219 | - else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | |
220 | - spin_unlock(ptl); | |
230 | + else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | |
221 | 231 | migrate_page_add(vma, page, private, flags); |
222 | - spin_lock(ptl); | |
223 | - } | |
224 | 232 | else |
225 | 233 | break; |
226 | 234 | } while (pte++, addr += PAGE_SIZE, addr != end); |
... | ... | @@ -309,6 +317,10 @@ |
309 | 317 | int err; |
310 | 318 | struct vm_area_struct *first, *vma, *prev; |
311 | 319 | |
320 | + /* Clear the LRU lists so pages can be isolated */ | |
321 | + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | |
322 | + lru_add_drain_all(); | |
323 | + | |
312 | 324 | first = find_vma(mm, start); |
313 | 325 | if (!first) |
314 | 326 | return ERR_PTR(-EFAULT); |
315 | 327 | |
... | ... | @@ -555,15 +567,8 @@ |
555 | 567 | if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || |
556 | 568 | mapping_writably_mapped(page->mapping) || |
557 | 569 | single_mm_mapping(vma->vm_mm, page->mapping)) { |
558 | - int rc = isolate_lru_page(page); | |
559 | - | |
560 | - if (rc == 1) | |
570 | + if (isolate_lru_page(page)) | |
561 | 571 | list_add(&page->lru, pagelist); |
562 | - /* | |
563 | - * If the isolate attempt was not successful then we just | |
564 | - * encountered an unswappable page. Something must be wrong. | |
565 | - */ | |
566 | - WARN_ON(rc == 0); | |
567 | 572 | } |
568 | 573 | } |
569 | 574 |
mm/rmap.c
... | ... | @@ -33,7 +33,7 @@ |
33 | 33 | * mapping->i_mmap_lock |
34 | 34 | * anon_vma->lock |
35 | 35 | * mm->page_table_lock or pte_lock |
36 | - * zone->lru_lock (in mark_page_accessed) | |
36 | + * zone->lru_lock (in mark_page_accessed, isolate_lru_page) | |
37 | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
38 | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
39 | 39 | * mapping->private_lock (in __set_page_dirty_buffers) |
mm/swap.c
... | ... | @@ -174,6 +174,32 @@ |
174 | 174 | put_cpu(); |
175 | 175 | } |
176 | 176 | |
177 | +#ifdef CONFIG_NUMA | |
178 | +static void lru_add_drain_per_cpu(void *dummy) | |
179 | +{ | |
180 | + lru_add_drain(); | |
181 | +} | |
182 | + | |
183 | +/* | |
184 | + * Returns 0 for success | |
185 | + */ | |
186 | +int lru_add_drain_all(void) | |
187 | +{ | |
188 | + return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); | |
189 | +} | |
190 | + | |
191 | +#else | |
192 | + | |
193 | +/* | |
194 | + * Returns 0 for success | |
195 | + */ | |
196 | +int lru_add_drain_all(void) | |
197 | +{ | |
198 | + lru_add_drain(); | |
199 | + return 0; | |
200 | +} | |
201 | +#endif | |
202 | + | |
177 | 203 | /* |
178 | 204 | * This path almost never happens for VM activity - pages are normally |
179 | 205 | * freed via pagevecs. But it gets used by networking. |
mm/vmscan.c
... | ... | @@ -586,7 +586,7 @@ |
586 | 586 | } |
587 | 587 | |
588 | 588 | /* |
589 | - * Add isolated pages on the list back to the LRU | |
589 | + * Add isolated pages on the list back to the LRU. | |
590 | 590 | * |
591 | 591 | * returns the number of pages put back. |
592 | 592 | */ |
593 | 593 | |
594 | 594 | |
595 | 595 | |
596 | 596 | |
597 | 597 | |
... | ... | @@ -760,46 +760,33 @@ |
760 | 760 | return nr_failed + retry; |
761 | 761 | } |
762 | 762 | |
763 | -static void lru_add_drain_per_cpu(void *dummy) | |
764 | -{ | |
765 | - lru_add_drain(); | |
766 | -} | |
767 | - | |
768 | 763 | /* |
769 | 764 | * Isolate one page from the LRU lists and put it on the |
770 | - * indicated list. Do necessary cache draining if the | |
771 | - * page is not on the LRU lists yet. | |
765 | + * indicated list with elevated refcount. | |
772 | 766 | * |
773 | 767 | * Result: |
774 | 768 | * 0 = page not on LRU list |
775 | 769 | * 1 = page removed from LRU list and added to the specified list. |
776 | - * -ENOENT = page is being freed elsewhere. | |
777 | 770 | */ |
778 | 771 | int isolate_lru_page(struct page *page) |
779 | 772 | { |
780 | - int rc = 0; | |
781 | - struct zone *zone = page_zone(page); | |
773 | + int ret = 0; | |
782 | 774 | |
783 | -redo: | |
784 | - spin_lock_irq(&zone->lru_lock); | |
785 | - rc = __isolate_lru_page(page); | |
786 | - if (rc == 1) { | |
787 | - if (PageActive(page)) | |
788 | - del_page_from_active_list(zone, page); | |
789 | - else | |
790 | - del_page_from_inactive_list(zone, page); | |
775 | + if (PageLRU(page)) { | |
776 | + struct zone *zone = page_zone(page); | |
777 | + spin_lock_irq(&zone->lru_lock); | |
778 | + if (TestClearPageLRU(page)) { | |
779 | + ret = 1; | |
780 | + get_page(page); | |
781 | + if (PageActive(page)) | |
782 | + del_page_from_active_list(zone, page); | |
783 | + else | |
784 | + del_page_from_inactive_list(zone, page); | |
785 | + } | |
786 | + spin_unlock_irq(&zone->lru_lock); | |
791 | 787 | } |
792 | - spin_unlock_irq(&zone->lru_lock); | |
793 | - if (rc == 0) { | |
794 | - /* | |
795 | - * Maybe this page is still waiting for a cpu to drain it | |
796 | - * from one of the lru lists? | |
797 | - */ | |
798 | - rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); | |
799 | - if (rc == 0 && PageLRU(page)) | |
800 | - goto redo; | |
801 | - } | |
802 | - return rc; | |
788 | + | |
789 | + return ret; | |
803 | 790 | } |
804 | 791 | #endif |
805 | 792 | |
806 | 793 | |
... | ... | @@ -831,18 +818,20 @@ |
831 | 818 | page = lru_to_page(src); |
832 | 819 | prefetchw_prev_lru_page(page, src, flags); |
833 | 820 | |
834 | - switch (__isolate_lru_page(page)) { | |
835 | - case 1: | |
836 | - /* Succeeded to isolate page */ | |
837 | - list_move(&page->lru, dst); | |
838 | - nr_taken++; | |
839 | - break; | |
840 | - case -ENOENT: | |
841 | - /* Not possible to isolate */ | |
842 | - list_move(&page->lru, src); | |
843 | - break; | |
844 | - default: | |
821 | + if (!TestClearPageLRU(page)) | |
845 | 822 | BUG(); |
823 | + list_del(&page->lru); | |
824 | + if (get_page_testone(page)) { | |
825 | + /* | |
826 | + * It is being freed elsewhere | |
827 | + */ | |
828 | + __put_page(page); | |
829 | + SetPageLRU(page); | |
830 | + list_add(&page->lru, src); | |
831 | + continue; | |
832 | + } else { | |
833 | + list_add(&page->lru, dst); | |
834 | + nr_taken++; | |
846 | 835 | } |
847 | 836 | } |
848 | 837 |