Commit 9180706344487700b40da9eca5dedd3d11cb33b4

Authored by Andrea Arcangeli
Committed by Linus Torvalds
1 parent e9da73d677

thp: alter compound get_page/put_page

Alter compound get_page/put_page to keep references on subpages too, in
order to allow __split_huge_page_refcount to split an hugepage even while
subpages have been pinned by one of the get_user_pages() variants.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 129 additions and 9 deletions Side-by-side Diff

arch/powerpc/mm/gup.c
... ... @@ -16,6 +16,16 @@
16 16  
17 17 #ifdef __HAVE_ARCH_PTE_SPECIAL
18 18  
  19 +static inline void get_huge_page_tail(struct page *page)
  20 +{
  21 + /*
  22 + * __split_huge_page_refcount() cannot run
  23 + * from under us.
  24 + */
  25 + VM_BUG_ON(atomic_read(&page->_count) < 0);
  26 + atomic_inc(&page->_count);
  27 +}
  28 +
19 29 /*
20 30 * The performance critical leaf functions are made noinline otherwise gcc
21 31 * inlines everything into a single function which results in too much
... ... @@ -47,6 +57,8 @@
47 57 put_page(page);
48 58 return 0;
49 59 }
  60 + if (PageTail(page))
  61 + get_huge_page_tail(page);
50 62 pages[*nr] = page;
51 63 (*nr)++;
52 64  
... ... @@ -105,6 +105,16 @@
105 105 atomic_add(nr, &page->_count);
106 106 }
107 107  
  108 +static inline void get_huge_page_tail(struct page *page)
  109 +{
  110 + /*
  111 + * __split_huge_page_refcount() cannot run
  112 + * from under us.
  113 + */
  114 + VM_BUG_ON(atomic_read(&page->_count) < 0);
  115 + atomic_inc(&page->_count);
  116 +}
  117 +
108 118 static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
109 119 unsigned long end, int write, struct page **pages, int *nr)
110 120 {
... ... @@ -128,6 +138,8 @@
128 138 do {
129 139 VM_BUG_ON(compound_head(page) != head);
130 140 pages[*nr] = page;
  141 + if (PageTail(page))
  142 + get_huge_page_tail(page);
131 143 (*nr)++;
132 144 page++;
133 145 refs++;
... ... @@ -353,9 +353,29 @@
353 353  
354 354 static inline void get_page(struct page *page)
355 355 {
356   - page = compound_head(page);
357   - VM_BUG_ON(atomic_read(&page->_count) == 0);
  356 + /*
  357 + * Getting a normal page or the head of a compound page
  358 + * requires to already have an elevated page->_count. Only if
  359 + * we're getting a tail page, the elevated page->_count is
  360 + * required only in the head page, so for tail pages the
  361 + * bugcheck only verifies that the page->_count isn't
  362 + * negative.
  363 + */
  364 + VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
358 365 atomic_inc(&page->_count);
  366 + /*
  367 + * Getting a tail page will elevate both the head and tail
  368 + * page->_count(s).
  369 + */
  370 + if (unlikely(PageTail(page))) {
  371 + /*
  372 + * This is safe only because
  373 + * __split_huge_page_refcount can't run under
  374 + * get_page().
  375 + */
  376 + VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
  377 + atomic_inc(&page->first_page->_count);
  378 + }
359 379 }
360 380  
361 381 static inline struct page *virt_to_head_page(const void *x)
... ... @@ -56,17 +56,93 @@
56 56 del_page_from_lru(zone, page);
57 57 spin_unlock_irqrestore(&zone->lru_lock, flags);
58 58 }
  59 +}
  60 +
  61 +static void __put_single_page(struct page *page)
  62 +{
  63 + __page_cache_release(page);
59 64 free_hot_cold_page(page, 0);
60 65 }
61 66  
62   -static void put_compound_page(struct page *page)
  67 +static void __put_compound_page(struct page *page)
63 68 {
64   - page = compound_head(page);
65   - if (put_page_testzero(page)) {
66   - compound_page_dtor *dtor;
  69 + compound_page_dtor *dtor;
67 70  
68   - dtor = get_compound_page_dtor(page);
69   - (*dtor)(page);
  71 + __page_cache_release(page);
  72 + dtor = get_compound_page_dtor(page);
  73 + (*dtor)(page);
  74 +}
  75 +
  76 +static void put_compound_page(struct page *page)
  77 +{
  78 + if (unlikely(PageTail(page))) {
  79 + /* __split_huge_page_refcount can run under us */
  80 + struct page *page_head = page->first_page;
  81 + smp_rmb();
  82 + /*
  83 + * If PageTail is still set after smp_rmb() we can be sure
  84 + * that the page->first_page we read wasn't a dangling pointer.
  85 + * See __split_huge_page_refcount() smp_wmb().
  86 + */
  87 + if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
  88 + unsigned long flags;
  89 + /*
  90 + * Verify that our page_head wasn't converted
  91 + * to a a regular page before we got a
  92 + * reference on it.
  93 + */
  94 + if (unlikely(!PageHead(page_head))) {
  95 + /* PageHead is cleared after PageTail */
  96 + smp_rmb();
  97 + VM_BUG_ON(PageTail(page));
  98 + goto out_put_head;
  99 + }
  100 + /*
  101 + * Only run compound_lock on a valid PageHead,
  102 + * after having it pinned with
  103 + * get_page_unless_zero() above.
  104 + */
  105 + smp_mb();
  106 + /* page_head wasn't a dangling pointer */
  107 + flags = compound_lock_irqsave(page_head);
  108 + if (unlikely(!PageTail(page))) {
  109 + /* __split_huge_page_refcount run before us */
  110 + compound_unlock_irqrestore(page_head, flags);
  111 + VM_BUG_ON(PageHead(page_head));
  112 + out_put_head:
  113 + if (put_page_testzero(page_head))
  114 + __put_single_page(page_head);
  115 + out_put_single:
  116 + if (put_page_testzero(page))
  117 + __put_single_page(page);
  118 + return;
  119 + }
  120 + VM_BUG_ON(page_head != page->first_page);
  121 + /*
  122 + * We can release the refcount taken by
  123 + * get_page_unless_zero now that
  124 + * split_huge_page_refcount is blocked on the
  125 + * compound_lock.
  126 + */
  127 + if (put_page_testzero(page_head))
  128 + VM_BUG_ON(1);
  129 + /* __split_huge_page_refcount will wait now */
  130 + VM_BUG_ON(atomic_read(&page->_count) <= 0);
  131 + atomic_dec(&page->_count);
  132 + VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
  133 + compound_unlock_irqrestore(page_head, flags);
  134 + if (put_page_testzero(page_head))
  135 + __put_compound_page(page_head);
  136 + } else {
  137 + /* page_head is a dangling pointer */
  138 + VM_BUG_ON(PageTail(page));
  139 + goto out_put_single;
  140 + }
  141 + } else if (put_page_testzero(page)) {
  142 + if (PageHead(page))
  143 + __put_compound_page(page);
  144 + else
  145 + __put_single_page(page);
70 146 }
71 147 }
72 148  
... ... @@ -75,7 +151,7 @@
75 151 if (unlikely(PageCompound(page)))
76 152 put_compound_page(page);
77 153 else if (put_page_testzero(page))
78   - __page_cache_release(page);
  154 + __put_single_page(page);
79 155 }
80 156 EXPORT_SYMBOL(put_page);
81 157