Commit c845acb324aa85a39650a14e7696982ceea75dc1
Committed by
David S. Miller
1 parent
b5bdacf3bb
Exists in
smarc_imx_lf-5.15.y
and in
27 other branches
af_unix: Fix splice-bind deadlock
On 2015/11/06, Dmitry Vyukov reported a deadlock involving the splice system call and AF_UNIX sockets, http://lists.openwall.net/netdev/2015/11/06/24 The situation was analyzed as (a while ago) A: socketpair() B: splice() from a pipe to /mnt/regular_file does sb_start_write() on /mnt C: try to freeze /mnt wait for B to finish with /mnt A: bind() try to bind our socket to /mnt/new_socket_name lock our socket, see it not bound yet decide that it needs to create something in /mnt try to do sb_start_write() on /mnt, block (it's waiting for C). D: splice() from the same pipe to our socket lock the pipe, see that socket is connected try to lock the socket, block waiting for A B: get around to actually feeding a chunk from pipe to file, try to lock the pipe. Deadlock. on 2015/11/10 by Al Viro, http://lists.openwall.net/netdev/2015/11/10/4 The patch fixes this by removing the kern_path_create related code from unix_mknod and executing it as part of unix_bind prior acquiring the readlock of the socket in question. This means that A (as used above) will sb_start_write on /mnt before it acquires the readlock, hence, it won't indirectly block B which first did a sb_start_write and then waited for a thread trying to acquire the readlock. Consequently, A being blocked by C waiting for B won't cause a deadlock anymore (effectively, both A and B acquire two locks in opposite order in the situation described above). Dmitry Vyukov(<dvyukov@google.com>) tested the original patch. Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 1 changed file with 40 additions and 26 deletions Side-by-side Diff
net/unix/af_unix.c
... | ... | @@ -953,32 +953,20 @@ |
953 | 953 | return NULL; |
954 | 954 | } |
955 | 955 | |
956 | -static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) | |
956 | +static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode, | |
957 | + struct path *res) | |
957 | 958 | { |
958 | - struct dentry *dentry; | |
959 | - struct path path; | |
960 | - int err = 0; | |
961 | - /* | |
962 | - * Get the parent directory, calculate the hash for last | |
963 | - * component. | |
964 | - */ | |
965 | - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | |
966 | - err = PTR_ERR(dentry); | |
967 | - if (IS_ERR(dentry)) | |
968 | - return err; | |
959 | + int err; | |
969 | 960 | |
970 | - /* | |
971 | - * All right, let's create it. | |
972 | - */ | |
973 | - err = security_path_mknod(&path, dentry, mode, 0); | |
961 | + err = security_path_mknod(path, dentry, mode, 0); | |
974 | 962 | if (!err) { |
975 | - err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); | |
963 | + err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); | |
976 | 964 | if (!err) { |
977 | - res->mnt = mntget(path.mnt); | |
965 | + res->mnt = mntget(path->mnt); | |
978 | 966 | res->dentry = dget(dentry); |
979 | 967 | } |
980 | 968 | } |
981 | - done_path_create(&path, dentry); | |
969 | + | |
982 | 970 | return err; |
983 | 971 | } |
984 | 972 | |
985 | 973 | |
... | ... | @@ -989,10 +977,12 @@ |
989 | 977 | struct unix_sock *u = unix_sk(sk); |
990 | 978 | struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; |
991 | 979 | char *sun_path = sunaddr->sun_path; |
992 | - int err; | |
980 | + int err, name_err; | |
993 | 981 | unsigned int hash; |
994 | 982 | struct unix_address *addr; |
995 | 983 | struct hlist_head *list; |
984 | + struct path path; | |
985 | + struct dentry *dentry; | |
996 | 986 | |
997 | 987 | err = -EINVAL; |
998 | 988 | if (sunaddr->sun_family != AF_UNIX) |
999 | 989 | |
1000 | 990 | |
... | ... | @@ -1008,14 +998,34 @@ |
1008 | 998 | goto out; |
1009 | 999 | addr_len = err; |
1010 | 1000 | |
1001 | + name_err = 0; | |
1002 | + dentry = NULL; | |
1003 | + if (sun_path[0]) { | |
1004 | + /* Get the parent directory, calculate the hash for last | |
1005 | + * component. | |
1006 | + */ | |
1007 | + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | |
1008 | + | |
1009 | + if (IS_ERR(dentry)) { | |
1010 | + /* delay report until after 'already bound' check */ | |
1011 | + name_err = PTR_ERR(dentry); | |
1012 | + dentry = NULL; | |
1013 | + } | |
1014 | + } | |
1015 | + | |
1011 | 1016 | err = mutex_lock_interruptible(&u->readlock); |
1012 | 1017 | if (err) |
1013 | - goto out; | |
1018 | + goto out_path; | |
1014 | 1019 | |
1015 | 1020 | err = -EINVAL; |
1016 | 1021 | if (u->addr) |
1017 | 1022 | goto out_up; |
1018 | 1023 | |
1024 | + if (name_err) { | |
1025 | + err = name_err == -EEXIST ? -EADDRINUSE : name_err; | |
1026 | + goto out_up; | |
1027 | + } | |
1028 | + | |
1019 | 1029 | err = -ENOMEM; |
1020 | 1030 | addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); |
1021 | 1031 | if (!addr) |
1022 | 1032 | |
... | ... | @@ -1026,11 +1036,11 @@ |
1026 | 1036 | addr->hash = hash ^ sk->sk_type; |
1027 | 1037 | atomic_set(&addr->refcnt, 1); |
1028 | 1038 | |
1029 | - if (sun_path[0]) { | |
1030 | - struct path path; | |
1039 | + if (dentry) { | |
1040 | + struct path u_path; | |
1031 | 1041 | umode_t mode = S_IFSOCK | |
1032 | 1042 | (SOCK_INODE(sock)->i_mode & ~current_umask()); |
1033 | - err = unix_mknod(sun_path, mode, &path); | |
1043 | + err = unix_mknod(dentry, &path, mode, &u_path); | |
1034 | 1044 | if (err) { |
1035 | 1045 | if (err == -EEXIST) |
1036 | 1046 | err = -EADDRINUSE; |
1037 | 1047 | |
... | ... | @@ -1038,9 +1048,9 @@ |
1038 | 1048 | goto out_up; |
1039 | 1049 | } |
1040 | 1050 | addr->hash = UNIX_HASH_SIZE; |
1041 | - hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); | |
1051 | + hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); | |
1042 | 1052 | spin_lock(&unix_table_lock); |
1043 | - u->path = path; | |
1053 | + u->path = u_path; | |
1044 | 1054 | list = &unix_socket_table[hash]; |
1045 | 1055 | } else { |
1046 | 1056 | spin_lock(&unix_table_lock); |
... | ... | @@ -1063,6 +1073,10 @@ |
1063 | 1073 | spin_unlock(&unix_table_lock); |
1064 | 1074 | out_up: |
1065 | 1075 | mutex_unlock(&u->readlock); |
1076 | +out_path: | |
1077 | + if (dentry) | |
1078 | + done_path_create(&path, dentry); | |
1079 | + | |
1066 | 1080 | out: |
1067 | 1081 | return err; |
1068 | 1082 | } |