Commit 9442ec9df40d952b0de185ae5638a74970388e01
Committed by
Linus Torvalds
1 parent
98837c7f82
Exists in
master
and in
7 other branches
memcg: bad page if page_cgroup when free
Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it were set here, it'd likely cause corruption when the page is reused. Don't use page_assign_page_cgroup to clear it: that should be private to memcontrol.c, and always called with the lock taken; and memmap_init_zone doesn't need it either - like page->mapping and other pointers throughout the kernel, Linux assumes pointers in zeroed structures are NULL pointers. Instead use page_reset_bad_cgroup, added to memcontrol.h for this only. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 28 additions and 25 deletions Side-by-side Diff
include/linux/memcontrol.h
... | ... | @@ -29,8 +29,9 @@ |
29 | 29 | |
30 | 30 | extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); |
31 | 31 | extern void mm_free_cgroup(struct mm_struct *mm); |
32 | -extern void page_assign_page_cgroup(struct page *page, | |
33 | - struct page_cgroup *pc); | |
32 | + | |
33 | +#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) | |
34 | + | |
34 | 35 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); |
35 | 36 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, |
36 | 37 | gfp_t gfp_mask); |
... | ... | @@ -82,8 +83,7 @@ |
82 | 83 | { |
83 | 84 | } |
84 | 85 | |
85 | -static inline void page_assign_page_cgroup(struct page *page, | |
86 | - struct page_cgroup *pc) | |
86 | +static inline void page_reset_bad_cgroup(struct page *page) | |
87 | 87 | { |
88 | 88 | } |
89 | 89 |
mm/memcontrol.c
... | ... | @@ -140,11 +140,17 @@ |
140 | 140 | |
141 | 141 | /* |
142 | 142 | * We use the lower bit of the page->page_cgroup pointer as a bit spin |
143 | - * lock. We need to ensure that page->page_cgroup is atleast two | |
144 | - * byte aligned (based on comments from Nick Piggin) | |
143 | + * lock. We need to ensure that page->page_cgroup is at least two | |
144 | + * byte aligned (based on comments from Nick Piggin). But since | |
145 | + * bit_spin_lock doesn't actually set that lock bit in a non-debug | |
146 | + * uniprocessor kernel, we should avoid setting it here too. | |
145 | 147 | */ |
146 | 148 | #define PAGE_CGROUP_LOCK_BIT 0x0 |
147 | -#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | |
149 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
150 | +#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | |
151 | +#else | |
152 | +#define PAGE_CGROUP_LOCK 0x0 | |
153 | +#endif | |
148 | 154 | |
149 | 155 | /* |
150 | 156 | * A page_cgroup page is associated with every page descriptor. The |
151 | 157 | |
... | ... | @@ -271,19 +277,10 @@ |
271 | 277 | &page->page_cgroup); |
272 | 278 | } |
273 | 279 | |
274 | -void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | |
280 | +static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | |
275 | 281 | { |
276 | - int locked; | |
277 | - | |
278 | - /* | |
279 | - * While resetting the page_cgroup we might not hold the | |
280 | - * page_cgroup lock. free_hot_cold_page() is an example | |
281 | - * of such a scenario | |
282 | - */ | |
283 | - if (pc) | |
284 | - VM_BUG_ON(!page_cgroup_locked(page)); | |
285 | - locked = (page->page_cgroup & PAGE_CGROUP_LOCK); | |
286 | - page->page_cgroup = ((unsigned long)pc | locked); | |
282 | + VM_BUG_ON(!page_cgroup_locked(page)); | |
283 | + page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); | |
287 | 284 | } |
288 | 285 | |
289 | 286 | struct page_cgroup *page_get_page_cgroup(struct page *page) |
mm/page_alloc.c
... | ... | @@ -222,13 +222,19 @@ |
222 | 222 | |
223 | 223 | static void bad_page(struct page *page) |
224 | 224 | { |
225 | - printk(KERN_EMERG "Bad page state in process '%s'\n" | |
226 | - KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" | |
227 | - KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | |
228 | - KERN_EMERG "Backtrace:\n", | |
225 | + void *pc = page_get_page_cgroup(page); | |
226 | + | |
227 | + printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG | |
228 | + "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", | |
229 | 229 | current->comm, page, (int)(2*sizeof(unsigned long)), |
230 | 230 | (unsigned long)page->flags, page->mapping, |
231 | 231 | page_mapcount(page), page_count(page)); |
232 | + if (pc) { | |
233 | + printk(KERN_EMERG "cgroup:%p\n", pc); | |
234 | + page_reset_bad_cgroup(page); | |
235 | + } | |
236 | + printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | |
237 | + KERN_EMERG "Backtrace:\n"); | |
232 | 238 | dump_stack(); |
233 | 239 | page->flags &= ~(1 << PG_lru | |
234 | 240 | 1 << PG_private | |
... | ... | @@ -454,6 +460,7 @@ |
454 | 460 | { |
455 | 461 | if (unlikely(page_mapcount(page) | |
456 | 462 | (page->mapping != NULL) | |
463 | + (page_get_page_cgroup(page) != NULL) | | |
457 | 464 | (page_count(page) != 0) | |
458 | 465 | (page->flags & ( |
459 | 466 | 1 << PG_lru | |
... | ... | @@ -603,6 +610,7 @@ |
603 | 610 | { |
604 | 611 | if (unlikely(page_mapcount(page) | |
605 | 612 | (page->mapping != NULL) | |
613 | + (page_get_page_cgroup(page) != NULL) | | |
606 | 614 | (page_count(page) != 0) | |
607 | 615 | (page->flags & ( |
608 | 616 | 1 << PG_lru | |
... | ... | @@ -989,7 +997,6 @@ |
989 | 997 | |
990 | 998 | if (!PageHighMem(page)) |
991 | 999 | debug_check_no_locks_freed(page_address(page), PAGE_SIZE); |
992 | - VM_BUG_ON(page_get_page_cgroup(page)); | |
993 | 1000 | arch_free_page(page, 0); |
994 | 1001 | kernel_map_pages(page, 1, 0); |
995 | 1002 | |
... | ... | @@ -2528,7 +2535,6 @@ |
2528 | 2535 | set_page_links(page, zone, nid, pfn); |
2529 | 2536 | init_page_count(page); |
2530 | 2537 | reset_page_mapcount(page); |
2531 | - page_assign_page_cgroup(page, NULL); | |
2532 | 2538 | SetPageReserved(page); |
2533 | 2539 | |
2534 | 2540 | /* |