Commit 9180706344487700b40da9eca5dedd3d11cb33b4
Committed by
Linus Torvalds
1 parent
e9da73d677
Exists in
master
and in
20 other branches
thp: alter compound get_page/put_page
Alter compound get_page/put_page to keep references on subpages too, in order to allow __split_huge_page_refcount to split an hugepage even while subpages have been pinned by one of the get_user_pages() variants. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 129 additions and 9 deletions Side-by-side Diff
arch/powerpc/mm/gup.c
... | ... | @@ -16,6 +16,16 @@ |
16 | 16 | |
17 | 17 | #ifdef __HAVE_ARCH_PTE_SPECIAL |
18 | 18 | |
19 | +static inline void get_huge_page_tail(struct page *page) | |
20 | +{ | |
21 | + /* | |
22 | + * __split_huge_page_refcount() cannot run | |
23 | + * from under us. | |
24 | + */ | |
25 | + VM_BUG_ON(atomic_read(&page->_count) < 0); | |
26 | + atomic_inc(&page->_count); | |
27 | +} | |
28 | + | |
19 | 29 | /* |
20 | 30 | * The performance critical leaf functions are made noinline otherwise gcc |
21 | 31 | * inlines everything into a single function which results in too much |
... | ... | @@ -47,6 +57,8 @@ |
47 | 57 | put_page(page); |
48 | 58 | return 0; |
49 | 59 | } |
60 | + if (PageTail(page)) | |
61 | + get_huge_page_tail(page); | |
50 | 62 | pages[*nr] = page; |
51 | 63 | (*nr)++; |
52 | 64 |
arch/x86/mm/gup.c
... | ... | @@ -105,6 +105,16 @@ |
105 | 105 | atomic_add(nr, &page->_count); |
106 | 106 | } |
107 | 107 | |
108 | +static inline void get_huge_page_tail(struct page *page) | |
109 | +{ | |
110 | + /* | |
111 | + * __split_huge_page_refcount() cannot run | |
112 | + * from under us. | |
113 | + */ | |
114 | + VM_BUG_ON(atomic_read(&page->_count) < 0); | |
115 | + atomic_inc(&page->_count); | |
116 | +} | |
117 | + | |
108 | 118 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, |
109 | 119 | unsigned long end, int write, struct page **pages, int *nr) |
110 | 120 | { |
... | ... | @@ -128,6 +138,8 @@ |
128 | 138 | do { |
129 | 139 | VM_BUG_ON(compound_head(page) != head); |
130 | 140 | pages[*nr] = page; |
141 | + if (PageTail(page)) | |
142 | + get_huge_page_tail(page); | |
131 | 143 | (*nr)++; |
132 | 144 | page++; |
133 | 145 | refs++; |
include/linux/mm.h
... | ... | @@ -353,9 +353,29 @@ |
353 | 353 | |
354 | 354 | static inline void get_page(struct page *page) |
355 | 355 | { |
356 | - page = compound_head(page); | |
357 | - VM_BUG_ON(atomic_read(&page->_count) == 0); | |
356 | + /* | |
357 | + * Getting a normal page or the head of a compound page | |
358 | + * requires to already have an elevated page->_count. Only if | |
359 | + * we're getting a tail page, the elevated page->_count is | |
360 | + * required only in the head page, so for tail pages the | |
361 | + * bugcheck only verifies that the page->_count isn't | |
362 | + * negative. | |
363 | + */ | |
364 | + VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page)); | |
358 | 365 | atomic_inc(&page->_count); |
366 | + /* | |
367 | + * Getting a tail page will elevate both the head and tail | |
368 | + * page->_count(s). | |
369 | + */ | |
370 | + if (unlikely(PageTail(page))) { | |
371 | + /* | |
372 | + * This is safe only because | |
373 | + * __split_huge_page_refcount can't run under | |
374 | + * get_page(). | |
375 | + */ | |
376 | + VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); | |
377 | + atomic_inc(&page->first_page->_count); | |
378 | + } | |
359 | 379 | } |
360 | 380 | |
361 | 381 | static inline struct page *virt_to_head_page(const void *x) |
mm/swap.c
... | ... | @@ -56,17 +56,93 @@ |
56 | 56 | del_page_from_lru(zone, page); |
57 | 57 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
58 | 58 | } |
59 | +} | |
60 | + | |
61 | +static void __put_single_page(struct page *page) | |
62 | +{ | |
63 | + __page_cache_release(page); | |
59 | 64 | free_hot_cold_page(page, 0); |
60 | 65 | } |
61 | 66 | |
62 | -static void put_compound_page(struct page *page) | |
67 | +static void __put_compound_page(struct page *page) | |
63 | 68 | { |
64 | - page = compound_head(page); | |
65 | - if (put_page_testzero(page)) { | |
66 | - compound_page_dtor *dtor; | |
69 | + compound_page_dtor *dtor; | |
67 | 70 | |
68 | - dtor = get_compound_page_dtor(page); | |
69 | - (*dtor)(page); | |
71 | + __page_cache_release(page); | |
72 | + dtor = get_compound_page_dtor(page); | |
73 | + (*dtor)(page); | |
74 | +} | |
75 | + | |
76 | +static void put_compound_page(struct page *page) | |
77 | +{ | |
78 | + if (unlikely(PageTail(page))) { | |
79 | + /* __split_huge_page_refcount can run under us */ | |
80 | + struct page *page_head = page->first_page; | |
81 | + smp_rmb(); | |
82 | + /* | |
83 | + * If PageTail is still set after smp_rmb() we can be sure | |
84 | + * that the page->first_page we read wasn't a dangling pointer. | |
85 | + * See __split_huge_page_refcount() smp_wmb(). | |
86 | + */ | |
87 | + if (likely(PageTail(page) && get_page_unless_zero(page_head))) { | |
88 | + unsigned long flags; | |
89 | + /* | |
90 | + * Verify that our page_head wasn't converted | |
91 | + * to a a regular page before we got a | |
92 | + * reference on it. | |
93 | + */ | |
94 | + if (unlikely(!PageHead(page_head))) { | |
95 | + /* PageHead is cleared after PageTail */ | |
96 | + smp_rmb(); | |
97 | + VM_BUG_ON(PageTail(page)); | |
98 | + goto out_put_head; | |
99 | + } | |
100 | + /* | |
101 | + * Only run compound_lock on a valid PageHead, | |
102 | + * after having it pinned with | |
103 | + * get_page_unless_zero() above. | |
104 | + */ | |
105 | + smp_mb(); | |
106 | + /* page_head wasn't a dangling pointer */ | |
107 | + flags = compound_lock_irqsave(page_head); | |
108 | + if (unlikely(!PageTail(page))) { | |
109 | + /* __split_huge_page_refcount run before us */ | |
110 | + compound_unlock_irqrestore(page_head, flags); | |
111 | + VM_BUG_ON(PageHead(page_head)); | |
112 | + out_put_head: | |
113 | + if (put_page_testzero(page_head)) | |
114 | + __put_single_page(page_head); | |
115 | + out_put_single: | |
116 | + if (put_page_testzero(page)) | |
117 | + __put_single_page(page); | |
118 | + return; | |
119 | + } | |
120 | + VM_BUG_ON(page_head != page->first_page); | |
121 | + /* | |
122 | + * We can release the refcount taken by | |
123 | + * get_page_unless_zero now that | |
124 | + * split_huge_page_refcount is blocked on the | |
125 | + * compound_lock. | |
126 | + */ | |
127 | + if (put_page_testzero(page_head)) | |
128 | + VM_BUG_ON(1); | |
129 | + /* __split_huge_page_refcount will wait now */ | |
130 | + VM_BUG_ON(atomic_read(&page->_count) <= 0); | |
131 | + atomic_dec(&page->_count); | |
132 | + VM_BUG_ON(atomic_read(&page_head->_count) <= 0); | |
133 | + compound_unlock_irqrestore(page_head, flags); | |
134 | + if (put_page_testzero(page_head)) | |
135 | + __put_compound_page(page_head); | |
136 | + } else { | |
137 | + /* page_head is a dangling pointer */ | |
138 | + VM_BUG_ON(PageTail(page)); | |
139 | + goto out_put_single; | |
140 | + } | |
141 | + } else if (put_page_testzero(page)) { | |
142 | + if (PageHead(page)) | |
143 | + __put_compound_page(page); | |
144 | + else | |
145 | + __put_single_page(page); | |
70 | 146 | } |
71 | 147 | } |
72 | 148 | |
... | ... | @@ -75,7 +151,7 @@ |
75 | 151 | if (unlikely(PageCompound(page))) |
76 | 152 | put_compound_page(page); |
77 | 153 | else if (put_page_testzero(page)) |
78 | - __page_cache_release(page); | |
154 | + __put_single_page(page); | |
79 | 155 | } |
80 | 156 | EXPORT_SYMBOL(put_page); |
81 | 157 |