Commit 9442ec9df40d952b0de185ae5638a74970388e01

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent 98837c7f82

memcg: bad page if page_cgroup when free

Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad
page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it
were set here, it'd likely cause corruption when the page is reused.

Don't use page_assign_page_cgroup to clear it: that should be private to
memcontrol.c, and always called with the lock taken; and memmap_init_zone
doesn't need it either - like page->mapping and other pointers throughout the
kernel, Linux assumes pointers in zeroed structures are NULL pointers.

Instead use page_reset_bad_cgroup, added to memcontrol.h for this only.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 28 additions and 25 deletions Side-by-side Diff

include/linux/memcontrol.h
... ... @@ -29,8 +29,9 @@
29 29  
30 30 extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
31 31 extern void mm_free_cgroup(struct mm_struct *mm);
32   -extern void page_assign_page_cgroup(struct page *page,
33   - struct page_cgroup *pc);
  32 +
  33 +#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0)
  34 +
34 35 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
35 36 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
36 37 gfp_t gfp_mask);
... ... @@ -82,8 +83,7 @@
82 83 {
83 84 }
84 85  
85   -static inline void page_assign_page_cgroup(struct page *page,
86   - struct page_cgroup *pc)
  86 +static inline void page_reset_bad_cgroup(struct page *page)
87 87 {
88 88 }
89 89  
... ... @@ -140,11 +140,17 @@
140 140  
141 141 /*
142 142 * We use the lower bit of the page->page_cgroup pointer as a bit spin
143   - * lock. We need to ensure that page->page_cgroup is atleast two
144   - * byte aligned (based on comments from Nick Piggin)
  143 + * lock. We need to ensure that page->page_cgroup is at least two
  144 + * byte aligned (based on comments from Nick Piggin). But since
  145 + * bit_spin_lock doesn't actually set that lock bit in a non-debug
  146 + * uniprocessor kernel, we should avoid setting it here too.
145 147 */
146 148 #define PAGE_CGROUP_LOCK_BIT 0x0
147   -#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
  149 +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  150 +#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
  151 +#else
  152 +#define PAGE_CGROUP_LOCK 0x0
  153 +#endif
148 154  
149 155 /*
150 156 * A page_cgroup page is associated with every page descriptor. The
151 157  
... ... @@ -271,19 +277,10 @@
271 277 &page->page_cgroup);
272 278 }
273 279  
274   -void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
  280 +static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
275 281 {
276   - int locked;
277   -
278   - /*
279   - * While resetting the page_cgroup we might not hold the
280   - * page_cgroup lock. free_hot_cold_page() is an example
281   - * of such a scenario
282   - */
283   - if (pc)
284   - VM_BUG_ON(!page_cgroup_locked(page));
285   - locked = (page->page_cgroup & PAGE_CGROUP_LOCK);
286   - page->page_cgroup = ((unsigned long)pc | locked);
  282 + VM_BUG_ON(!page_cgroup_locked(page));
  283 + page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
287 284 }
288 285  
289 286 struct page_cgroup *page_get_page_cgroup(struct page *page)
... ... @@ -222,13 +222,19 @@
222 222  
223 223 static void bad_page(struct page *page)
224 224 {
225   - printk(KERN_EMERG "Bad page state in process '%s'\n"
226   - KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
227   - KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
228   - KERN_EMERG "Backtrace:\n",
  225 + void *pc = page_get_page_cgroup(page);
  226 +
  227 + printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
  228 + "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
229 229 current->comm, page, (int)(2*sizeof(unsigned long)),
230 230 (unsigned long)page->flags, page->mapping,
231 231 page_mapcount(page), page_count(page));
  232 + if (pc) {
  233 + printk(KERN_EMERG "cgroup:%p\n", pc);
  234 + page_reset_bad_cgroup(page);
  235 + }
  236 + printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
  237 + KERN_EMERG "Backtrace:\n");
232 238 dump_stack();
233 239 page->flags &= ~(1 << PG_lru |
234 240 1 << PG_private |
... ... @@ -454,6 +460,7 @@
454 460 {
455 461 if (unlikely(page_mapcount(page) |
456 462 (page->mapping != NULL) |
  463 + (page_get_page_cgroup(page) != NULL) |
457 464 (page_count(page) != 0) |
458 465 (page->flags & (
459 466 1 << PG_lru |
... ... @@ -603,6 +610,7 @@
603 610 {
604 611 if (unlikely(page_mapcount(page) |
605 612 (page->mapping != NULL) |
  613 + (page_get_page_cgroup(page) != NULL) |
606 614 (page_count(page) != 0) |
607 615 (page->flags & (
608 616 1 << PG_lru |
... ... @@ -989,7 +997,6 @@
989 997  
990 998 if (!PageHighMem(page))
991 999 debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
992   - VM_BUG_ON(page_get_page_cgroup(page));
993 1000 arch_free_page(page, 0);
994 1001 kernel_map_pages(page, 1, 0);
995 1002  
... ... @@ -2528,7 +2535,6 @@
2528 2535 set_page_links(page, zone, nid, pfn);
2529 2536 init_page_count(page);
2530 2537 reset_page_mapcount(page);
2531   - page_assign_page_cgroup(page, NULL);
2532 2538 SetPageReserved(page);
2533 2539  
2534 2540 /*