Commit 751efd8610d3d7d67b7bdf7f62646edea7365dd7
Committed by
Linus Torvalds
1 parent
c1f1949527
Exists in
master
and in
20 other branches
mmu_notifier_unregister NULL Pointer deref and multiple ->release() callouts
There is a race condition between mmu_notifier_unregister() and __mmu_notifier_release(). Assume two tasks, one calling mmu_notifier_unregister() as a result of a filp_close() ->flush() callout (task A), and the other calling mmu_notifier_release() from an mmput() (task B). A B t1 srcu_read_lock() t2 if (!hlist_unhashed()) t3 srcu_read_unlock() t4 srcu_read_lock() t5 hlist_del_init_rcu() t6 synchronize_srcu() t7 srcu_read_unlock() t8 hlist_del_rcu() <--- NULL pointer deref. Additionally, the list traversal in __mmu_notifier_release() is not protected by the by the mmu_notifier_mm->hlist_lock which can result in callouts to the ->release() notifier from both mmu_notifier_unregister() and __mmu_notifier_release(). -stable suggestions: The stable trees prior to 3.7.y need commits 21a92735f660 and 70400303ce0c cherry-picked in that order prior to cherry-picking this commit. The 3.7.y tree already has those two commits. Signed-off-by: Robin Holt <holt@sgi.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Sagi Grimberg <sagig@mellanox.co.il> Cc: Haggai Eran <haggaie@mellanox.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 42 additions and 40 deletions Side-by-side Diff
mm/mmu_notifier.c
... | ... | @@ -37,50 +37,52 @@ |
37 | 37 | void __mmu_notifier_release(struct mm_struct *mm) |
38 | 38 | { |
39 | 39 | struct mmu_notifier *mn; |
40 | - struct hlist_node *n; | |
41 | 40 | int id; |
42 | 41 | |
43 | 42 | /* |
44 | - * SRCU here will block mmu_notifier_unregister until | |
45 | - * ->release returns. | |
43 | + * srcu_read_lock() here will block synchronize_srcu() in | |
44 | + * mmu_notifier_unregister() until all registered | |
45 | + * ->release() callouts this function makes have | |
46 | + * returned. | |
46 | 47 | */ |
47 | 48 | id = srcu_read_lock(&srcu); |
48 | - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) | |
49 | - /* | |
50 | - * if ->release runs before mmu_notifier_unregister it | |
51 | - * must be handled as it's the only way for the driver | |
52 | - * to flush all existing sptes and stop the driver | |
53 | - * from establishing any more sptes before all the | |
54 | - * pages in the mm are freed. | |
55 | - */ | |
56 | - if (mn->ops->release) | |
57 | - mn->ops->release(mn, mm); | |
58 | - srcu_read_unlock(&srcu, id); | |
59 | - | |
60 | 49 | spin_lock(&mm->mmu_notifier_mm->lock); |
61 | 50 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
62 | 51 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
63 | 52 | struct mmu_notifier, |
64 | 53 | hlist); |
54 | + | |
65 | 55 | /* |
66 | - * We arrived before mmu_notifier_unregister so | |
67 | - * mmu_notifier_unregister will do nothing other than | |
68 | - * to wait ->release to finish and | |
69 | - * mmu_notifier_unregister to return. | |
56 | + * Unlink. This will prevent mmu_notifier_unregister() | |
57 | + * from also making the ->release() callout. | |
70 | 58 | */ |
71 | 59 | hlist_del_init_rcu(&mn->hlist); |
60 | + spin_unlock(&mm->mmu_notifier_mm->lock); | |
61 | + | |
62 | + /* | |
63 | + * Clear sptes. (see 'release' description in mmu_notifier.h) | |
64 | + */ | |
65 | + if (mn->ops->release) | |
66 | + mn->ops->release(mn, mm); | |
67 | + | |
68 | + spin_lock(&mm->mmu_notifier_mm->lock); | |
72 | 69 | } |
73 | 70 | spin_unlock(&mm->mmu_notifier_mm->lock); |
74 | 71 | |
75 | 72 | /* |
76 | - * synchronize_srcu here prevents mmu_notifier_release to | |
77 | - * return to exit_mmap (which would proceed freeing all pages | |
78 | - * in the mm) until the ->release method returns, if it was | |
79 | - * invoked by mmu_notifier_unregister. | |
80 | - * | |
81 | - * The mmu_notifier_mm can't go away from under us because one | |
82 | - * mm_count is hold by exit_mmap. | |
73 | + * All callouts to ->release() which we have done are complete. | |
74 | + * Allow synchronize_srcu() in mmu_notifier_unregister() to complete | |
83 | 75 | */ |
76 | + srcu_read_unlock(&srcu, id); | |
77 | + | |
78 | + /* | |
79 | + * mmu_notifier_unregister() may have unlinked a notifier and may | |
80 | + * still be calling out to it. Additionally, other notifiers | |
81 | + * may have been active via vmtruncate() et. al. Block here | |
82 | + * to ensure that all notifier callouts for this mm have been | |
83 | + * completed and the sptes are really cleaned up before returning | |
84 | + * to exit_mmap(). | |
85 | + */ | |
84 | 86 | synchronize_srcu(&srcu); |
85 | 87 | } |
86 | 88 | |
87 | 89 | |
88 | 90 | |
89 | 91 | |
90 | 92 | |
91 | 93 | |
92 | 94 | |
93 | 95 | |
94 | 96 | |
... | ... | @@ -294,31 +296,31 @@ |
294 | 296 | { |
295 | 297 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
296 | 298 | |
299 | + spin_lock(&mm->mmu_notifier_mm->lock); | |
297 | 300 | if (!hlist_unhashed(&mn->hlist)) { |
298 | - /* | |
299 | - * SRCU here will force exit_mmap to wait ->release to finish | |
300 | - * before freeing the pages. | |
301 | - */ | |
302 | 301 | int id; |
303 | 302 | |
304 | - id = srcu_read_lock(&srcu); | |
305 | 303 | /* |
306 | - * exit_mmap will block in mmu_notifier_release to | |
307 | - * guarantee ->release is called before freeing the | |
308 | - * pages. | |
304 | + * Ensure we synchronize up with __mmu_notifier_release(). | |
309 | 305 | */ |
306 | + id = srcu_read_lock(&srcu); | |
307 | + | |
308 | + hlist_del_rcu(&mn->hlist); | |
309 | + spin_unlock(&mm->mmu_notifier_mm->lock); | |
310 | + | |
310 | 311 | if (mn->ops->release) |
311 | 312 | mn->ops->release(mn, mm); |
312 | - srcu_read_unlock(&srcu, id); | |
313 | 313 | |
314 | - spin_lock(&mm->mmu_notifier_mm->lock); | |
315 | - hlist_del_rcu(&mn->hlist); | |
314 | + /* | |
315 | + * Allow __mmu_notifier_release() to complete. | |
316 | + */ | |
317 | + srcu_read_unlock(&srcu, id); | |
318 | + } else | |
316 | 319 | spin_unlock(&mm->mmu_notifier_mm->lock); |
317 | - } | |
318 | 320 | |
319 | 321 | /* |
320 | - * Wait any running method to finish, of course including | |
321 | - * ->release if it was run by mmu_notifier_relase instead of us. | |
322 | + * Wait for any running method to finish, including ->release() if it | |
323 | + * was run by __mmu_notifier_release() instead of us. | |
322 | 324 | */ |
323 | 325 | synchronize_srcu(&srcu); |
324 | 326 |