Commit 3ad3d901bbcfb15a5e4690e55350db0899095a68
Committed by
Linus Torvalds
1 parent
bdf4f4d216
Exists in
master
and in
20 other branches
mm: mmu_notifier: fix freed page still mapped in secondary MMU
mmu_notifier_release() is called when the process is exiting. It will delete all the mmu notifiers. But at this time the page belonging to the process is still present in page tables and is present on the LRU list, so this race will happen: CPU 0 CPU 1 mmu_notifier_release: try_to_unmap: hlist_del_init_rcu(&mn->hlist); ptep_clear_flush_notify: mmu nofifler not found free page !!!!!! /* * At the point, the page has been * freed, but it is still mapped in * the secondary MMU. */ mn->ops->release(mn, mm); Then the box is not stable and sometimes we can get this bug: [ 738.075923] BUG: Bad page state in process migrate-perf pfn:03bec [ 738.075931] page:ffffea00000efb00 count:0 mapcount:0 mapping: (null) index:0x8076 [ 738.075936] page flags: 0x20000000000014(referenced|dirty) The same issue is present in mmu_notifier_unregister(). We can call ->release before deleting the notifier to ensure the page has been unmapped from the secondary MMU before it is freed. Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 23 additions and 22 deletions Side-by-side Diff
mm/mmu_notifier.c
... | ... | @@ -33,7 +33,25 @@ |
33 | 33 | void __mmu_notifier_release(struct mm_struct *mm) |
34 | 34 | { |
35 | 35 | struct mmu_notifier *mn; |
36 | + struct hlist_node *n; | |
36 | 37 | |
38 | + /* | |
39 | + * RCU here will block mmu_notifier_unregister until | |
40 | + * ->release returns. | |
41 | + */ | |
42 | + rcu_read_lock(); | |
43 | + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) | |
44 | + /* | |
45 | + * if ->release runs before mmu_notifier_unregister it | |
46 | + * must be handled as it's the only way for the driver | |
47 | + * to flush all existing sptes and stop the driver | |
48 | + * from establishing any more sptes before all the | |
49 | + * pages in the mm are freed. | |
50 | + */ | |
51 | + if (mn->ops->release) | |
52 | + mn->ops->release(mn, mm); | |
53 | + rcu_read_unlock(); | |
54 | + | |
37 | 55 | spin_lock(&mm->mmu_notifier_mm->lock); |
38 | 56 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
39 | 57 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
... | ... | @@ -46,23 +64,6 @@ |
46 | 64 | * mmu_notifier_unregister to return. |
47 | 65 | */ |
48 | 66 | hlist_del_init_rcu(&mn->hlist); |
49 | - /* | |
50 | - * RCU here will block mmu_notifier_unregister until | |
51 | - * ->release returns. | |
52 | - */ | |
53 | - rcu_read_lock(); | |
54 | - spin_unlock(&mm->mmu_notifier_mm->lock); | |
55 | - /* | |
56 | - * if ->release runs before mmu_notifier_unregister it | |
57 | - * must be handled as it's the only way for the driver | |
58 | - * to flush all existing sptes and stop the driver | |
59 | - * from establishing any more sptes before all the | |
60 | - * pages in the mm are freed. | |
61 | - */ | |
62 | - if (mn->ops->release) | |
63 | - mn->ops->release(mn, mm); | |
64 | - rcu_read_unlock(); | |
65 | - spin_lock(&mm->mmu_notifier_mm->lock); | |
66 | 67 | } |
67 | 68 | spin_unlock(&mm->mmu_notifier_mm->lock); |
68 | 69 | |
69 | 70 | |
70 | 71 | |
... | ... | @@ -284,16 +285,13 @@ |
284 | 285 | { |
285 | 286 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
286 | 287 | |
287 | - spin_lock(&mm->mmu_notifier_mm->lock); | |
288 | 288 | if (!hlist_unhashed(&mn->hlist)) { |
289 | - hlist_del_rcu(&mn->hlist); | |
290 | - | |
291 | 289 | /* |
292 | 290 | * RCU here will force exit_mmap to wait ->release to finish |
293 | 291 | * before freeing the pages. |
294 | 292 | */ |
295 | 293 | rcu_read_lock(); |
296 | - spin_unlock(&mm->mmu_notifier_mm->lock); | |
294 | + | |
297 | 295 | /* |
298 | 296 | * exit_mmap will block in mmu_notifier_release to |
299 | 297 | * guarantee ->release is called before freeing the |
300 | 298 | |
... | ... | @@ -302,8 +300,11 @@ |
302 | 300 | if (mn->ops->release) |
303 | 301 | mn->ops->release(mn, mm); |
304 | 302 | rcu_read_unlock(); |
305 | - } else | |
303 | + | |
304 | + spin_lock(&mm->mmu_notifier_mm->lock); | |
305 | + hlist_del_rcu(&mn->hlist); | |
306 | 306 | spin_unlock(&mm->mmu_notifier_mm->lock); |
307 | + } | |
307 | 308 | |
308 | 309 | /* |
309 | 310 | * Wait any running method to finish, of course including |