Commit d6d86c0a7f8ddc5b38cf089222cb1d9540762dc2

Authored by Konstantin Khlebnikov
Committed by Linus Torvalds
1 parent 29e5694054

mm/balloon_compaction: redesign ballooned pages management

Sasha Levin reported KASAN splash inside isolate_migratepages_range().
Problem is in the function __is_movable_balloon_page() which tests
AS_BALLOON_MAP in page->mapping->flags.  This function has no protection
against anonymous pages.  As result it tried to check address space flags
inside struct anon_vma.

Further investigation shows more problems in current implementation:

* Special branch in __unmap_and_move() never works:
  balloon_page_movable() checks page flags and page_count.  In
  __unmap_and_move() page is locked, reference counter is elevated, thus
  balloon_page_movable() always fails.  As a result execution goes to the
  normal migration path.  virtballoon_migratepage() returns
  MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS,
  move_to_new_page() thinks this is an error code and assigns
  newpage->mapping to NULL.  Newly migrated page lose connectivity with
  balloon an all ability for further migration.

* lru_lock erroneously required in isolate_migratepages_range() for
  isolation ballooned page.  This function releases lru_lock periodically,
  this makes migration mostly impossible for some pages.

* balloon_page_dequeue have a tight race with balloon_page_isolate:
  balloon_page_isolate could be executed in parallel with dequeue between
  picking page from list and locking page_lock.  Race is rare because they
  use trylock_page() for locking.

This patch fixes all of them.

Instead of fake mapping with special flag this patch uses special state of
page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256.  Buddy allocator uses
PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose.  Storing mark
directly in struct page makes everything safer and easier.

PagePrivate is used to mark pages present in page list (i.e.  not
isolated, like PageLRU for normal pages).  It replaces special rules for
reference counter and makes balloon migration similar to migration of
normal pages.  This flag is protected by page_lock together with link to
the balloon device.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Link: http://lkml.kernel.org/p/53E6CEAA.9020105@oracle.com
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: <stable@vger.kernel.org>	[3.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 7 changed files with 68 additions and 118 deletions Side-by-side Diff

drivers/virtio/virtio_balloon.c
... ... @@ -163,8 +163,8 @@
163 163 /* Find pfns pointing at start of each page, get pages and free them. */
164 164 for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
165 165 struct page *page = balloon_pfn_to_page(pfns[i]);
166   - balloon_page_free(page);
167 166 adjust_managed_page_count(page, 1);
  167 + put_page(page); /* balloon reference */
168 168 }
169 169 }
170 170  
... ... @@ -395,6 +395,8 @@
395 395 if (!mutex_trylock(&vb->balloon_lock))
396 396 return -EAGAIN;
397 397  
  398 + get_page(newpage); /* balloon reference */
  399 +
398 400 /* balloon's page migration 1st step -- inflate "newpage" */
399 401 spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
400 402 balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
... ... @@ -404,12 +406,7 @@
404 406 set_page_pfns(vb->pfns, newpage);
405 407 tell_host(vb, vb->inflate_vq);
406 408  
407   - /*
408   - * balloon's page migration 2nd step -- deflate "page"
409   - *
410   - * It's safe to delete page->lru here because this page is at
411   - * an isolated migration list, and this step is expected to happen here
412   - */
  409 + /* balloon's page migration 2nd step -- deflate "page" */
413 410 balloon_page_delete(page);
414 411 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
415 412 set_page_pfns(vb->pfns, page);
... ... @@ -417,7 +414,9 @@
417 414  
418 415 mutex_unlock(&vb->balloon_lock);
419 416  
420   - return MIGRATEPAGE_BALLOON_SUCCESS;
  417 + put_page(page); /* balloon reference */
  418 +
  419 + return MIGRATEPAGE_SUCCESS;
421 420 }
422 421  
423 422 /* define the balloon_mapping->a_ops callback to allow balloon page migration */
include/linux/balloon_compaction.h
... ... @@ -27,10 +27,13 @@
27 27 * counter raised only while it is under our special handling;
28 28 *
29 29 * iii. after the lockless scan step have selected a potential balloon page for
30   - * isolation, re-test the page->mapping flags and the page ref counter
  30 + * isolation, re-test the PageBalloon mark and the PagePrivate flag
31 31 * under the proper page lock, to ensure isolating a valid balloon page
32 32 * (not yet isolated, nor under release procedure)
33 33 *
  34 + * iv. isolation or dequeueing procedure must clear PagePrivate flag under
  35 + * page lock together with removing page from balloon device page list.
  36 + *
34 37 * The functions provided by this interface are placed to help on coping with
35 38 * the aforementioned balloon page corner case, as well as to ensure the simple
36 39 * set of exposed rules are satisfied while we are dealing with balloon pages
... ... @@ -71,28 +74,6 @@
71 74 kfree(b_dev_info);
72 75 }
73 76  
74   -/*
75   - * balloon_page_free - release a balloon page back to the page free lists
76   - * @page: ballooned page to be set free
77   - *
78   - * This function must be used to properly set free an isolated/dequeued balloon
79   - * page at the end of a sucessful page migration, or at the balloon driver's
80   - * page release procedure.
81   - */
82   -static inline void balloon_page_free(struct page *page)
83   -{
84   - /*
85   - * Balloon pages always get an extra refcount before being isolated
86   - * and before being dequeued to help on sorting out fortuite colisions
87   - * between a thread attempting to isolate and another thread attempting
88   - * to release the very same balloon page.
89   - *
90   - * Before we handle the page back to Buddy, lets drop its extra refcnt.
91   - */
92   - put_page(page);
93   - __free_page(page);
94   -}
95   -
96 77 #ifdef CONFIG_BALLOON_COMPACTION
97 78 extern bool balloon_page_isolate(struct page *page);
98 79 extern void balloon_page_putback(struct page *page);
99 80  
100 81  
101 82  
102 83  
103 84  
104 85  
105 86  
106 87  
... ... @@ -108,74 +89,33 @@
108 89 }
109 90  
110 91 /*
111   - * page_flags_cleared - helper to perform balloon @page ->flags tests.
112   - *
113   - * As balloon pages are obtained from buddy and we do not play with page->flags
114   - * at driver level (exception made when we get the page lock for compaction),
115   - * we can safely identify a ballooned page by checking if the
116   - * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also
117   - * helps us skip ballooned pages that are locked for compaction or release, thus
118   - * mitigating their racy check at balloon_page_movable()
  92 + * __is_movable_balloon_page - helper to perform @page PageBalloon tests
119 93 */
120   -static inline bool page_flags_cleared(struct page *page)
121   -{
122   - return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
123   -}
124   -
125   -/*
126   - * __is_movable_balloon_page - helper to perform @page mapping->flags tests
127   - */
128 94 static inline bool __is_movable_balloon_page(struct page *page)
129 95 {
130   - struct address_space *mapping = page->mapping;
131   - return mapping_balloon(mapping);
  96 + return PageBalloon(page);
132 97 }
133 98  
134 99 /*
135   - * balloon_page_movable - test page->mapping->flags to identify balloon pages
136   - * that can be moved by compaction/migration.
  100 + * balloon_page_movable - test PageBalloon to identify balloon pages
  101 + * and PagePrivate to check that the page is not
  102 + * isolated and can be moved by compaction/migration.
137 103 *
138   - * This function is used at core compaction's page isolation scheme, therefore
139   - * most pages exposed to it are not enlisted as balloon pages and so, to avoid
140   - * undesired side effects like racing against __free_pages(), we cannot afford
141   - * holding the page locked while testing page->mapping->flags here.
142   - *
143 104 * As we might return false positives in the case of a balloon page being just
144   - * released under us, the page->mapping->flags need to be re-tested later,
145   - * under the proper page lock, at the functions that will be coping with the
146   - * balloon page case.
  105 + * released under us, this need to be re-tested later, under the page lock.
147 106 */
148 107 static inline bool balloon_page_movable(struct page *page)
149 108 {
150   - /*
151   - * Before dereferencing and testing mapping->flags, let's make sure
152   - * this is not a page that uses ->mapping in a different way
153   - */
154   - if (page_flags_cleared(page) && !page_mapped(page) &&
155   - page_count(page) == 1)
156   - return __is_movable_balloon_page(page);
157   -
158   - return false;
  109 + return PageBalloon(page) && PagePrivate(page);
159 110 }
160 111  
161 112 /*
162 113 * isolated_balloon_page - identify an isolated balloon page on private
163 114 * compaction/migration page lists.
164   - *
165   - * After a compaction thread isolates a balloon page for migration, it raises
166   - * the page refcount to prevent concurrent compaction threads from re-isolating
167   - * the same page. For that reason putback_movable_pages(), or other routines
168   - * that need to identify isolated balloon pages on private pagelists, cannot
169   - * rely on balloon_page_movable() to accomplish the task.
170 115 */
171 116 static inline bool isolated_balloon_page(struct page *page)
172 117 {
173   - /* Already isolated balloon pages, by default, have a raised refcount */
174   - if (page_flags_cleared(page) && !page_mapped(page) &&
175   - page_count(page) >= 2)
176   - return __is_movable_balloon_page(page);
177   -
178   - return false;
  118 + return PageBalloon(page);
179 119 }
180 120  
181 121 /*
... ... @@ -192,6 +132,8 @@
192 132 struct address_space *mapping,
193 133 struct list_head *head)
194 134 {
  135 + __SetPageBalloon(page);
  136 + SetPagePrivate(page);
195 137 page->mapping = mapping;
196 138 list_add(&page->lru, head);
197 139 }
198 140  
... ... @@ -206,8 +148,12 @@
206 148 */
207 149 static inline void balloon_page_delete(struct page *page)
208 150 {
  151 + __ClearPageBalloon(page);
209 152 page->mapping = NULL;
210   - list_del(&page->lru);
  153 + if (PagePrivate(page)) {
  154 + ClearPagePrivate(page);
  155 + list_del(&page->lru);
  156 + }
211 157 }
212 158  
213 159 /*
... ... @@ -256,6 +202,11 @@
256 202 static inline void balloon_page_delete(struct page *page)
257 203 {
258 204 list_del(&page->lru);
  205 +}
  206 +
  207 +static inline bool __is_movable_balloon_page(struct page *page)
  208 +{
  209 + return false;
259 210 }
260 211  
261 212 static inline bool balloon_page_movable(struct page *page)
include/linux/migrate.h
... ... @@ -13,18 +13,9 @@
13 13 * Return values from addresss_space_operations.migratepage():
14 14 * - negative errno on page migration failure;
15 15 * - zero on page migration success;
16   - *
17   - * The balloon page migration introduces this special case where a 'distinct'
18   - * return code is used to flag a successful page migration to unmap_and_move().
19   - * This approach is necessary because page migration can race against balloon
20   - * deflation procedure, and for such case we could introduce a nasty page leak
21   - * if a successfully migrated balloon page gets released concurrently with
22   - * migration's unmap_and_move() wrap-up steps.
23 16 */
24 17 #define MIGRATEPAGE_SUCCESS 0
25   -#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
26   - * sucessful migration case.
27   - */
  18 +
28 19 enum migrate_reason {
29 20 MR_COMPACTION,
30 21 MR_MEMORY_FAILURE,
... ... @@ -554,6 +554,25 @@
554 554 atomic_set(&page->_mapcount, -1);
555 555 }
556 556  
  557 +#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
  558 +
  559 +static inline int PageBalloon(struct page *page)
  560 +{
  561 + return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
  562 +}
  563 +
  564 +static inline void __SetPageBalloon(struct page *page)
  565 +{
  566 + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
  567 + atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
  568 +}
  569 +
  570 +static inline void __ClearPageBalloon(struct page *page)
  571 +{
  572 + VM_BUG_ON_PAGE(!PageBalloon(page), page);
  573 + atomic_set(&page->_mapcount, -1);
  574 +}
  575 +
557 576 void put_page(struct page *page);
558 577 void put_pages_list(struct list_head *pages);
559 578  
mm/balloon_compaction.c
... ... @@ -93,17 +93,12 @@
93 93 * to be released by the balloon driver.
94 94 */
95 95 if (trylock_page(page)) {
  96 + if (!PagePrivate(page)) {
  97 + /* raced with isolation */
  98 + unlock_page(page);
  99 + continue;
  100 + }
96 101 spin_lock_irqsave(&b_dev_info->pages_lock, flags);
97   - /*
98   - * Raise the page refcount here to prevent any wrong
99   - * attempt to isolate this page, in case of coliding
100   - * with balloon_page_isolate() just after we release
101   - * the page lock.
102   - *
103   - * balloon_page_free() will take care of dropping
104   - * this extra refcount later.
105   - */
106   - get_page(page);
107 102 balloon_page_delete(page);
108 103 spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
109 104 unlock_page(page);
110 105  
... ... @@ -187,7 +182,9 @@
187 182 {
188 183 struct balloon_dev_info *b_dev_info = page->mapping->private_data;
189 184 unsigned long flags;
  185 +
190 186 spin_lock_irqsave(&b_dev_info->pages_lock, flags);
  187 + ClearPagePrivate(page);
191 188 list_del(&page->lru);
192 189 b_dev_info->isolated_pages++;
193 190 spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
194 191  
... ... @@ -197,7 +194,9 @@
197 194 {
198 195 struct balloon_dev_info *b_dev_info = page->mapping->private_data;
199 196 unsigned long flags;
  197 +
200 198 spin_lock_irqsave(&b_dev_info->pages_lock, flags);
  199 + SetPagePrivate(page);
201 200 list_add(&page->lru, &b_dev_info->pages);
202 201 b_dev_info->isolated_pages--;
203 202 spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
204 203  
205 204  
... ... @@ -235,12 +234,11 @@
235 234 */
236 235 if (likely(trylock_page(page))) {
237 236 /*
238   - * A ballooned page, by default, has just one refcount.
  237 + * A ballooned page, by default, has PagePrivate set.
239 238 * Prevent concurrent compaction threads from isolating
240   - * an already isolated balloon page by refcount check.
  239 + * an already isolated balloon page by clearing it.
241 240 */
242   - if (__is_movable_balloon_page(page) &&
243   - page_count(page) == 2) {
  241 + if (balloon_page_movable(page)) {
244 242 __isolate_balloon_page(page);
245 243 unlock_page(page);
246 244 return true;
... ... @@ -640,7 +640,7 @@
640 640 */
641 641 if (!PageLRU(page)) {
642 642 if (unlikely(balloon_page_movable(page))) {
643   - if (locked && balloon_page_isolate(page)) {
  643 + if (balloon_page_isolate(page)) {
644 644 /* Successfully isolated */
645 645 goto isolate_success;
646 646 }
... ... @@ -876,7 +876,7 @@
876 876 }
877 877 }
878 878  
879   - if (unlikely(balloon_page_movable(page))) {
  879 + if (unlikely(isolated_balloon_page(page))) {
880 880 /*
881 881 * A ballooned page does not need any special attention from
882 882 * physical to virtual reverse mapping procedures.
... ... @@ -955,17 +955,6 @@
955 955  
956 956 rc = __unmap_and_move(page, newpage, force, mode);
957 957  
958   - if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
959   - /*
960   - * A ballooned page has been migrated already.
961   - * Now, it's the time to wrap-up counters,
962   - * handle the page back to Buddy and return.
963   - */
964   - dec_zone_page_state(page, NR_ISOLATED_ANON +
965   - page_is_file_cache(page));
966   - balloon_page_free(page);
967   - return MIGRATEPAGE_SUCCESS;
968   - }
969 958 out:
970 959 if (rc != -EAGAIN) {
971 960 /*
... ... @@ -988,6 +977,9 @@
988 977 if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
989 978 ClearPageSwapBacked(newpage);
990 979 put_new_page(newpage, private);
  980 + } else if (unlikely(__is_movable_balloon_page(newpage))) {
  981 + /* drop our reference, page already in the balloon */
  982 + put_page(newpage);
991 983 } else
992 984 putback_lru_page(newpage);
993 985