Commit b1f9d594668d008cacd5679cfde675dcdb9f5f8f

Authored by Thomas Gleixner
Committed by Greg Kroah-Hartman
1 parent 19040c57ac

futex: Make lookup_pi_state more robust

commit 54a217887a7b658e2650c3feff22756ab80c7339 upstream.

The current implementation of lookup_pi_state has ambigous handling of
the TID value 0 in the user space futex.  We can get into the kernel
even if the TID value is 0, because either there is a stale waiters bit
or the owner died bit is set or we are called from the requeue_pi path
or from user space just for fun.

The current code avoids an explicit sanity check for pid = 0 in case
that kernel internal state (waiters) are found for the user space
address.  This can lead to state leakage and worse under some
circumstances.

Handle the cases explicit:

       Waiter | pi_state | pi->owner | uTID      | uODIED | ?

  [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
  [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid

  [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid

  [4]  Found  | Found    | NULL      | 0         | 1      | Valid
  [5]  Found  | Found    | NULL      | >0        | 1      | Invalid

  [6]  Found  | Found    | task      | 0         | 1      | Valid

  [7]  Found  | Found    | NULL      | Any       | 0      | Invalid

  [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
  [9]  Found  | Found    | task      | 0         | 0      | Invalid
  [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid

 [1] Indicates that the kernel can acquire the futex atomically. We
     came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.

 [2] Valid, if TID does not belong to a kernel thread. If no matching
     thread is found then it indicates that the owner TID has died.

 [3] Invalid. The waiter is queued on a non PI futex

 [4] Valid state after exit_robust_list(), which sets the user space
     value to FUTEX_WAITERS | FUTEX_OWNER_DIED.

 [5] The user space value got manipulated between exit_robust_list()
     and exit_pi_state_list()

 [6] Valid state after exit_pi_state_list() which sets the new owner in
     the pi_state but cannot access the user space value.

 [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.

 [8] Owner and user space value match

 [9] There is no transient state which sets the user space TID to 0
     except exit_robust_list(), but this is indicated by the
     FUTEX_OWNER_DIED bit. See [4]

[10] There is no transient state which leaves owner and user space
     TID out of sync.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Will Drewry <wad@chromium.org>
Cc: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 1 changed file with 106 additions and 28 deletions Side-by-side Diff

... ... @@ -729,10 +729,58 @@
729 729 raw_spin_unlock_irq(&curr->pi_lock);
730 730 }
731 731  
  732 +/*
  733 + * We need to check the following states:
  734 + *
  735 + * Waiter | pi_state | pi->owner | uTID | uODIED | ?
  736 + *
  737 + * [1] NULL | --- | --- | 0 | 0/1 | Valid
  738 + * [2] NULL | --- | --- | >0 | 0/1 | Valid
  739 + *
  740 + * [3] Found | NULL | -- | Any | 0/1 | Invalid
  741 + *
  742 + * [4] Found | Found | NULL | 0 | 1 | Valid
  743 + * [5] Found | Found | NULL | >0 | 1 | Invalid
  744 + *
  745 + * [6] Found | Found | task | 0 | 1 | Valid
  746 + *
  747 + * [7] Found | Found | NULL | Any | 0 | Invalid
  748 + *
  749 + * [8] Found | Found | task | ==taskTID | 0/1 | Valid
  750 + * [9] Found | Found | task | 0 | 0 | Invalid
  751 + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
  752 + *
  753 + * [1] Indicates that the kernel can acquire the futex atomically. We
  754 + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
  755 + *
  756 + * [2] Valid, if TID does not belong to a kernel thread. If no matching
  757 + * thread is found then it indicates that the owner TID has died.
  758 + *
  759 + * [3] Invalid. The waiter is queued on a non PI futex
  760 + *
  761 + * [4] Valid state after exit_robust_list(), which sets the user space
  762 + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
  763 + *
  764 + * [5] The user space value got manipulated between exit_robust_list()
  765 + * and exit_pi_state_list()
  766 + *
  767 + * [6] Valid state after exit_pi_state_list() which sets the new owner in
  768 + * the pi_state but cannot access the user space value.
  769 + *
  770 + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
  771 + *
  772 + * [8] Owner and user space value match
  773 + *
  774 + * [9] There is no transient state which sets the user space TID to 0
  775 + * except exit_robust_list(), but this is indicated by the
  776 + * FUTEX_OWNER_DIED bit. See [4]
  777 + *
  778 + * [10] There is no transient state which leaves owner and user space
  779 + * TID out of sync.
  780 + */
732 781 static int
733 782 lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
734   - union futex_key *key, struct futex_pi_state **ps,
735   - struct task_struct *task)
  783 + union futex_key *key, struct futex_pi_state **ps)
736 784 {
737 785 struct futex_pi_state *pi_state = NULL;
738 786 struct futex_q *this, *next;
739 787  
... ... @@ -742,12 +790,13 @@
742 790 plist_for_each_entry_safe(this, next, &hb->chain, list) {
743 791 if (match_futex(&this->key, key)) {
744 792 /*
745   - * Another waiter already exists - bump up
746   - * the refcount and return its pi_state:
  793 + * Sanity check the waiter before increasing
  794 + * the refcount and attaching to it.
747 795 */
748 796 pi_state = this->pi_state;
749 797 /*
750   - * Userspace might have messed up non-PI and PI futexes
  798 + * Userspace might have messed up non-PI and
  799 + * PI futexes [3]
751 800 */
752 801 if (unlikely(!pi_state))
753 802 return -EINVAL;
754 803  
755 804  
756 805  
757 806  
758 807  
759 808  
760 809  
761 810  
... ... @@ -755,44 +804,70 @@
755 804 WARN_ON(!atomic_read(&pi_state->refcount));
756 805  
757 806 /*
758   - * When pi_state->owner is NULL then the owner died
759   - * and another waiter is on the fly. pi_state->owner
760   - * is fixed up by the task which acquires
761   - * pi_state->rt_mutex.
762   - *
763   - * We do not check for pid == 0 which can happen when
764   - * the owner died and robust_list_exit() cleared the
765   - * TID.
  807 + * Handle the owner died case:
766 808 */
767   - if (pid && pi_state->owner) {
  809 + if (uval & FUTEX_OWNER_DIED) {
768 810 /*
769   - * Bail out if user space manipulated the
770   - * futex value.
  811 + * exit_pi_state_list sets owner to NULL and
  812 + * wakes the topmost waiter. The task which
  813 + * acquires the pi_state->rt_mutex will fixup
  814 + * owner.
771 815 */
772   - if (pid != task_pid_vnr(pi_state->owner))
  816 + if (!pi_state->owner) {
  817 + /*
  818 + * No pi state owner, but the user
  819 + * space TID is not 0. Inconsistent
  820 + * state. [5]
  821 + */
  822 + if (pid)
  823 + return -EINVAL;
  824 + /*
  825 + * Take a ref on the state and
  826 + * return. [4]
  827 + */
  828 + goto out_state;
  829 + }
  830 +
  831 + /*
  832 + * If TID is 0, then either the dying owner
  833 + * has not yet executed exit_pi_state_list()
  834 + * or some waiter acquired the rtmutex in the
  835 + * pi state, but did not yet fixup the TID in
  836 + * user space.
  837 + *
  838 + * Take a ref on the state and return. [6]
  839 + */
  840 + if (!pid)
  841 + goto out_state;
  842 + } else {
  843 + /*
  844 + * If the owner died bit is not set,
  845 + * then the pi_state must have an
  846 + * owner. [7]
  847 + */
  848 + if (!pi_state->owner)
773 849 return -EINVAL;
774 850 }
775 851  
776 852 /*
777   - * Protect against a corrupted uval. If uval
778   - * is 0x80000000 then pid is 0 and the waiter
779   - * bit is set. So the deadlock check in the
780   - * calling code has failed and we did not fall
781   - * into the check above due to !pid.
  853 + * Bail out if user space manipulated the
  854 + * futex value. If pi state exists then the
  855 + * owner TID must be the same as the user
  856 + * space TID. [9/10]
782 857 */
783   - if (task && pi_state->owner == task)
784   - return -EDEADLK;
  858 + if (pid != task_pid_vnr(pi_state->owner))
  859 + return -EINVAL;
785 860  
  861 + out_state:
786 862 atomic_inc(&pi_state->refcount);
787 863 *ps = pi_state;
788   -
789 864 return 0;
790 865 }
791 866 }
792 867  
793 868 /*
794 869 * We are the first waiter - try to look up the real owner and attach
795   - * the new pi_state to it, but bail out when TID = 0
  870 + * the new pi_state to it, but bail out when TID = 0 [1]
796 871 */
797 872 if (!pid)
798 873 return -ESRCH;
... ... @@ -825,6 +900,9 @@
825 900 return ret;
826 901 }
827 902  
  903 + /*
  904 + * No existing pi state. First waiter. [2]
  905 + */
828 906 pi_state = alloc_pi_state();
829 907  
830 908 /*
... ... @@ -945,7 +1023,7 @@
945 1023 * We dont have the lock. Look up the PI state (or create it if
946 1024 * we are the first waiter):
947 1025 */
948   - ret = lookup_pi_state(uval, hb, key, ps, task);
  1026 + ret = lookup_pi_state(uval, hb, key, ps);
949 1027  
950 1028 if (unlikely(ret)) {
951 1029 switch (ret) {
... ... @@ -1551,7 +1629,7 @@
1551 1629 * rereading and handing potential crap to
1552 1630 * lookup_pi_state.
1553 1631 */
1554   - ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
  1632 + ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
1555 1633 }
1556 1634  
1557 1635 switch (ret) {