Commit 971316f0503a5c50633d07b83b6db2f15a3a5b00

Authored by Oleg Nesterov
Committed by Linus Torvalds
1 parent d80e731eca

epoll: ep_unregister_pollwait() can use the freed pwq->whead

signalfd_cleanup() ensures that ->signalfd_wqh is not used, but
this is not enough. eppoll_entry->whead still points to the memory
we are going to free, ep_unregister_pollwait()->remove_wait_queue()
is obviously unsafe.

Change ep_poll_callback(POLLFREE) to set eppoll_entry->whead = NULL,
change ep_unregister_pollwait() to check pwq->whead != NULL under
rcu_read_lock() before remove_wait_queue(). We add the new helper,
ep_remove_wait_queue(), for this.

This works because sighand_cachep is SLAB_DESTROY_BY_RCU and because
->signalfd_wqh is initialized in sighand_ctor(), not in copy_sighand.
ep_unregister_pollwait()->remove_wait_queue() can play with already
freed and potentially reused ->sighand, but this is fine. This memory
must have the valid ->signalfd_wqh until rcu_read_unlock().

Reported-by: Maxime Bizon <mbizon@freebox.fr>
Cc: <stable@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 32 additions and 4 deletions Side-by-side Diff

... ... @@ -320,6 +320,11 @@
320 320 return !list_empty(p);
321 321 }
322 322  
  323 +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
  324 +{
  325 + return container_of(p, struct eppoll_entry, wait);
  326 +}
  327 +
323 328 /* Get the "struct epitem" from a wait queue pointer */
324 329 static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
325 330 {
... ... @@ -467,6 +472,18 @@
467 472 put_cpu();
468 473 }
469 474  
  475 +static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  476 +{
  477 + wait_queue_head_t *whead;
  478 +
  479 + rcu_read_lock();
  480 + /* If it is cleared by POLLFREE, it should be rcu-safe */
  481 + whead = rcu_dereference(pwq->whead);
  482 + if (whead)
  483 + remove_wait_queue(whead, &pwq->wait);
  484 + rcu_read_unlock();
  485 +}
  486 +
470 487 /*
471 488 * This function unregisters poll callbacks from the associated file
472 489 * descriptor. Must be called with "mtx" held (or "epmutex" if called from
... ... @@ -481,7 +498,7 @@
481 498 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
482 499  
483 500 list_del(&pwq->llink);
484   - remove_wait_queue(pwq->whead, &pwq->wait);
  501 + ep_remove_wait_queue(pwq);
485 502 kmem_cache_free(pwq_cache, pwq);
486 503 }
487 504 }
488 505  
... ... @@ -842,9 +859,16 @@
842 859 struct epitem *epi = ep_item_from_wait(wait);
843 860 struct eventpoll *ep = epi->ep;
844 861  
845   - /* the caller holds eppoll_entry->whead->lock */
846   - if ((unsigned long)key & POLLFREE)
  862 + if ((unsigned long)key & POLLFREE) {
  863 + ep_pwq_from_wait(wait)->whead = NULL;
  864 + /*
  865 + * whead = NULL above can race with ep_remove_wait_queue()
  866 + * which can do another remove_wait_queue() after us, so we
  867 + * can't use __remove_wait_queue(). whead->lock is held by
  868 + * the caller.
  869 + */
847 870 list_del_init(&wait->task_list);
  871 + }
848 872  
849 873 spin_lock_irqsave(&ep->lock, flags);
850 874  
... ... @@ -33,7 +33,11 @@
33 33 void signalfd_cleanup(struct sighand_struct *sighand)
34 34 {
35 35 wait_queue_head_t *wqh = &sighand->signalfd_wqh;
36   -
  36 + /*
  37 + * The lockless check can race with remove_wait_queue() in progress,
  38 + * but in this case its caller should run under rcu_read_lock() and
  39 + * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
  40 + */
37 41 if (likely(!waitqueue_active(wqh)))
38 42 return;
39 43