Commit b1f9d594668d008cacd5679cfde675dcdb9f5f8f
Committed by
Greg Kroah-Hartman
1 parent
19040c57ac
Exists in
ti-linux-3.14.y
and in
2 other branches
futex: Make lookup_pi_state more robust
commit 54a217887a7b658e2650c3feff22756ab80c7339 upstream. The current implementation of lookup_pi_state has ambigous handling of the TID value 0 in the user space futex. We can get into the kernel even if the TID value is 0, because either there is a stale waiters bit or the owner died bit is set or we are called from the requeue_pi path or from user space just for fun. The current code avoids an explicit sanity check for pid = 0 in case that kernel internal state (waiters) are found for the user space address. This can lead to state leakage and worse under some circumstances. Handle the cases explicit: Waiter | pi_state | pi->owner | uTID | uODIED | ? [1] NULL | --- | --- | 0 | 0/1 | Valid [2] NULL | --- | --- | >0 | 0/1 | Valid [3] Found | NULL | -- | Any | 0/1 | Invalid [4] Found | Found | NULL | 0 | 1 | Valid [5] Found | Found | NULL | >0 | 1 | Invalid [6] Found | Found | task | 0 | 1 | Valid [7] Found | Found | NULL | Any | 0 | Invalid [8] Found | Found | task | ==taskTID | 0/1 | Valid [9] Found | Found | task | 0 | 0 | Invalid [10] Found | Found | task | !=taskTID | 0/1 | Invalid [1] Indicates that the kernel can acquire the futex atomically. We came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. [2] Valid, if TID does not belong to a kernel thread. If no matching thread is found then it indicates that the owner TID has died. [3] Invalid. The waiter is queued on a non PI futex [4] Valid state after exit_robust_list(), which sets the user space value to FUTEX_WAITERS | FUTEX_OWNER_DIED. [5] The user space value got manipulated between exit_robust_list() and exit_pi_state_list() [6] Valid state after exit_pi_state_list() which sets the new owner in the pi_state but cannot access the user space value. [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. [8] Owner and user space value match [9] There is no transient state which sets the user space TID to 0 except exit_robust_list(), but this is indicated by the FUTEX_OWNER_DIED bit. See [4] [10] There is no transient state which leaves owner and user space TID out of sync. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Kees Cook <keescook@chromium.org> Cc: Will Drewry <wad@chromium.org> Cc: Darren Hart <dvhart@linux.intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Showing 1 changed file with 106 additions and 28 deletions Side-by-side Diff
kernel/futex.c
... | ... | @@ -729,10 +729,58 @@ |
729 | 729 | raw_spin_unlock_irq(&curr->pi_lock); |
730 | 730 | } |
731 | 731 | |
732 | +/* | |
733 | + * We need to check the following states: | |
734 | + * | |
735 | + * Waiter | pi_state | pi->owner | uTID | uODIED | ? | |
736 | + * | |
737 | + * [1] NULL | --- | --- | 0 | 0/1 | Valid | |
738 | + * [2] NULL | --- | --- | >0 | 0/1 | Valid | |
739 | + * | |
740 | + * [3] Found | NULL | -- | Any | 0/1 | Invalid | |
741 | + * | |
742 | + * [4] Found | Found | NULL | 0 | 1 | Valid | |
743 | + * [5] Found | Found | NULL | >0 | 1 | Invalid | |
744 | + * | |
745 | + * [6] Found | Found | task | 0 | 1 | Valid | |
746 | + * | |
747 | + * [7] Found | Found | NULL | Any | 0 | Invalid | |
748 | + * | |
749 | + * [8] Found | Found | task | ==taskTID | 0/1 | Valid | |
750 | + * [9] Found | Found | task | 0 | 0 | Invalid | |
751 | + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid | |
752 | + * | |
753 | + * [1] Indicates that the kernel can acquire the futex atomically. We | |
754 | + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. | |
755 | + * | |
756 | + * [2] Valid, if TID does not belong to a kernel thread. If no matching | |
757 | + * thread is found then it indicates that the owner TID has died. | |
758 | + * | |
759 | + * [3] Invalid. The waiter is queued on a non PI futex | |
760 | + * | |
761 | + * [4] Valid state after exit_robust_list(), which sets the user space | |
762 | + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. | |
763 | + * | |
764 | + * [5] The user space value got manipulated between exit_robust_list() | |
765 | + * and exit_pi_state_list() | |
766 | + * | |
767 | + * [6] Valid state after exit_pi_state_list() which sets the new owner in | |
768 | + * the pi_state but cannot access the user space value. | |
769 | + * | |
770 | + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. | |
771 | + * | |
772 | + * [8] Owner and user space value match | |
773 | + * | |
774 | + * [9] There is no transient state which sets the user space TID to 0 | |
775 | + * except exit_robust_list(), but this is indicated by the | |
776 | + * FUTEX_OWNER_DIED bit. See [4] | |
777 | + * | |
778 | + * [10] There is no transient state which leaves owner and user space | |
779 | + * TID out of sync. | |
780 | + */ | |
732 | 781 | static int |
733 | 782 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
734 | - union futex_key *key, struct futex_pi_state **ps, | |
735 | - struct task_struct *task) | |
783 | + union futex_key *key, struct futex_pi_state **ps) | |
736 | 784 | { |
737 | 785 | struct futex_pi_state *pi_state = NULL; |
738 | 786 | struct futex_q *this, *next; |
739 | 787 | |
... | ... | @@ -742,12 +790,13 @@ |
742 | 790 | plist_for_each_entry_safe(this, next, &hb->chain, list) { |
743 | 791 | if (match_futex(&this->key, key)) { |
744 | 792 | /* |
745 | - * Another waiter already exists - bump up | |
746 | - * the refcount and return its pi_state: | |
793 | + * Sanity check the waiter before increasing | |
794 | + * the refcount and attaching to it. | |
747 | 795 | */ |
748 | 796 | pi_state = this->pi_state; |
749 | 797 | /* |
750 | - * Userspace might have messed up non-PI and PI futexes | |
798 | + * Userspace might have messed up non-PI and | |
799 | + * PI futexes [3] | |
751 | 800 | */ |
752 | 801 | if (unlikely(!pi_state)) |
753 | 802 | return -EINVAL; |
754 | 803 | |
755 | 804 | |
756 | 805 | |
757 | 806 | |
758 | 807 | |
759 | 808 | |
760 | 809 | |
761 | 810 | |
... | ... | @@ -755,44 +804,70 @@ |
755 | 804 | WARN_ON(!atomic_read(&pi_state->refcount)); |
756 | 805 | |
757 | 806 | /* |
758 | - * When pi_state->owner is NULL then the owner died | |
759 | - * and another waiter is on the fly. pi_state->owner | |
760 | - * is fixed up by the task which acquires | |
761 | - * pi_state->rt_mutex. | |
762 | - * | |
763 | - * We do not check for pid == 0 which can happen when | |
764 | - * the owner died and robust_list_exit() cleared the | |
765 | - * TID. | |
807 | + * Handle the owner died case: | |
766 | 808 | */ |
767 | - if (pid && pi_state->owner) { | |
809 | + if (uval & FUTEX_OWNER_DIED) { | |
768 | 810 | /* |
769 | - * Bail out if user space manipulated the | |
770 | - * futex value. | |
811 | + * exit_pi_state_list sets owner to NULL and | |
812 | + * wakes the topmost waiter. The task which | |
813 | + * acquires the pi_state->rt_mutex will fixup | |
814 | + * owner. | |
771 | 815 | */ |
772 | - if (pid != task_pid_vnr(pi_state->owner)) | |
816 | + if (!pi_state->owner) { | |
817 | + /* | |
818 | + * No pi state owner, but the user | |
819 | + * space TID is not 0. Inconsistent | |
820 | + * state. [5] | |
821 | + */ | |
822 | + if (pid) | |
823 | + return -EINVAL; | |
824 | + /* | |
825 | + * Take a ref on the state and | |
826 | + * return. [4] | |
827 | + */ | |
828 | + goto out_state; | |
829 | + } | |
830 | + | |
831 | + /* | |
832 | + * If TID is 0, then either the dying owner | |
833 | + * has not yet executed exit_pi_state_list() | |
834 | + * or some waiter acquired the rtmutex in the | |
835 | + * pi state, but did not yet fixup the TID in | |
836 | + * user space. | |
837 | + * | |
838 | + * Take a ref on the state and return. [6] | |
839 | + */ | |
840 | + if (!pid) | |
841 | + goto out_state; | |
842 | + } else { | |
843 | + /* | |
844 | + * If the owner died bit is not set, | |
845 | + * then the pi_state must have an | |
846 | + * owner. [7] | |
847 | + */ | |
848 | + if (!pi_state->owner) | |
773 | 849 | return -EINVAL; |
774 | 850 | } |
775 | 851 | |
776 | 852 | /* |
777 | - * Protect against a corrupted uval. If uval | |
778 | - * is 0x80000000 then pid is 0 and the waiter | |
779 | - * bit is set. So the deadlock check in the | |
780 | - * calling code has failed and we did not fall | |
781 | - * into the check above due to !pid. | |
853 | + * Bail out if user space manipulated the | |
854 | + * futex value. If pi state exists then the | |
855 | + * owner TID must be the same as the user | |
856 | + * space TID. [9/10] | |
782 | 857 | */ |
783 | - if (task && pi_state->owner == task) | |
784 | - return -EDEADLK; | |
858 | + if (pid != task_pid_vnr(pi_state->owner)) | |
859 | + return -EINVAL; | |
785 | 860 | |
861 | + out_state: | |
786 | 862 | atomic_inc(&pi_state->refcount); |
787 | 863 | *ps = pi_state; |
788 | - | |
789 | 864 | return 0; |
790 | 865 | } |
791 | 866 | } |
792 | 867 | |
793 | 868 | /* |
794 | 869 | * We are the first waiter - try to look up the real owner and attach |
795 | - * the new pi_state to it, but bail out when TID = 0 | |
870 | + * the new pi_state to it, but bail out when TID = 0 [1] | |
796 | 871 | */ |
797 | 872 | if (!pid) |
798 | 873 | return -ESRCH; |
... | ... | @@ -825,6 +900,9 @@ |
825 | 900 | return ret; |
826 | 901 | } |
827 | 902 | |
903 | + /* | |
904 | + * No existing pi state. First waiter. [2] | |
905 | + */ | |
828 | 906 | pi_state = alloc_pi_state(); |
829 | 907 | |
830 | 908 | /* |
... | ... | @@ -945,7 +1023,7 @@ |
945 | 1023 | * We dont have the lock. Look up the PI state (or create it if |
946 | 1024 | * we are the first waiter): |
947 | 1025 | */ |
948 | - ret = lookup_pi_state(uval, hb, key, ps, task); | |
1026 | + ret = lookup_pi_state(uval, hb, key, ps); | |
949 | 1027 | |
950 | 1028 | if (unlikely(ret)) { |
951 | 1029 | switch (ret) { |
... | ... | @@ -1551,7 +1629,7 @@ |
1551 | 1629 | * rereading and handing potential crap to |
1552 | 1630 | * lookup_pi_state. |
1553 | 1631 | */ |
1554 | - ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); | |
1632 | + ret = lookup_pi_state(ret, hb2, &key2, &pi_state); | |
1555 | 1633 | } |
1556 | 1634 | |
1557 | 1635 | switch (ret) { |