Commit a2e2725541fad72416326798c2d7fa4dafb7d337
Committed by
David S. Miller
1 parent
c05e85a06e
Exists in
master
and in
20 other branches
net: Introduce recvmmsg socket syscall
Meaning receive multiple messages, reducing the number of syscalls and net stack entry/exit operations. Next patches will introduce mechanisms where protocols that want to optimize this operation will provide an unlocked_recvmsg operation. This takes into account comments made by: . Paul Moore: sock_recvmsg is called only for the first datagram, sock_recvmsg_nosec is used for the rest. . Caitlin Bestler: recvmmsg now has a struct timespec timeout, that works in the same fashion as the ppoll one. If the underlying protocol returns a datagram with MSG_OOB set, this will make recvmmsg return right away with as many datagrams (+ the OOB one) it has received so far. . Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen datagrams and then recvmsg returns an error, recvmmsg will return the successfully received datagrams, store the error and return it in the next call. This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg, where we will be able to acquire the lock only at batch start and end, not at every underlying recvmsg call. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 25 changed files with 261 additions and 50 deletions Side-by-side Diff
- arch/alpha/kernel/systbls.S
- arch/arm/kernel/calls.S
- arch/avr32/kernel/syscall_table.S
- arch/blackfin/mach-common/entry.S
- arch/ia64/kernel/entry.S
- arch/microblaze/kernel/syscall_table.S
- arch/mips/kernel/scall32-o32.S
- arch/mips/kernel/scall64-64.S
- arch/mips/kernel/scall64-n32.S
- arch/mips/kernel/scall64-o32.S
- arch/sh/kernel/syscalls_64.S
- arch/sparc/kernel/systbls_32.S
- arch/sparc/kernel/systbls_64.S
- arch/x86/ia32/ia32entry.S
- arch/x86/include/asm/unistd_32.h
- arch/x86/include/asm/unistd_64.h
- arch/x86/kernel/syscall_table_32.S
- arch/xtensa/include/asm/unistd.h
- include/linux/net.h
- include/linux/socket.h
- include/linux/syscalls.h
- include/net/compat.h
- kernel/sys_ni.c
- net/compat.c
- net/socket.c
arch/alpha/kernel/systbls.S
arch/arm/kernel/calls.S
arch/avr32/kernel/syscall_table.S
arch/blackfin/mach-common/entry.S
arch/ia64/kernel/entry.S
... | ... | @@ -1806,6 +1806,7 @@ |
1806 | 1806 | data8 sys_preadv |
1807 | 1807 | data8 sys_pwritev // 1320 |
1808 | 1808 | data8 sys_rt_tgsigqueueinfo |
1809 | + data8 sys_recvmmsg | |
1809 | 1810 | |
1810 | 1811 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls |
1811 | 1812 | #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ |
arch/microblaze/kernel/syscall_table.S
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-64.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-o32.S
arch/sh/kernel/syscalls_64.S
arch/sparc/kernel/systbls_32.S
... | ... | @@ -82,5 +82,5 @@ |
82 | 82 | /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate |
83 | 83 | /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 |
84 | 84 | /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv |
85 | -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open | |
85 | +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg |
arch/sparc/kernel/systbls_64.S
... | ... | @@ -83,7 +83,7 @@ |
83 | 83 | /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate |
84 | 84 | .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 |
85 | 85 | /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv |
86 | - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open | |
86 | + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg | |
87 | 87 | |
88 | 88 | #endif /* CONFIG_COMPAT */ |
89 | 89 | |
... | ... | @@ -158,5 +158,5 @@ |
158 | 158 | /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate |
159 | 159 | .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 |
160 | 160 | /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv |
161 | - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open | |
161 | + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg |
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/unistd_32.h
... | ... | @@ -342,10 +342,11 @@ |
342 | 342 | #define __NR_pwritev 334 |
343 | 343 | #define __NR_rt_tgsigqueueinfo 335 |
344 | 344 | #define __NR_perf_event_open 336 |
345 | +#define __NR_recvmmsg 337 | |
345 | 346 | |
346 | 347 | #ifdef __KERNEL__ |
347 | 348 | |
348 | -#define NR_syscalls 337 | |
349 | +#define NR_syscalls 338 | |
349 | 350 | |
350 | 351 | #define __ARCH_WANT_IPC_PARSE_VERSION |
351 | 352 | #define __ARCH_WANT_OLD_READDIR |
arch/x86/include/asm/unistd_64.h
... | ... | @@ -661,6 +661,8 @@ |
661 | 661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) |
662 | 662 | #define __NR_perf_event_open 298 |
663 | 663 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) |
664 | +#define __NR_recvmmsg 299 | |
665 | +__SYSCALL(__NR_recvmmsg, sys_recvmmsg) | |
664 | 666 | |
665 | 667 | #ifndef __NO_STUBS |
666 | 668 | #define __ARCH_WANT_OLD_READDIR |
arch/x86/kernel/syscall_table_32.S
arch/xtensa/include/asm/unistd.h
... | ... | @@ -681,8 +681,10 @@ |
681 | 681 | __SYSCALL(305, sys_ni_syscall, 0) |
682 | 682 | #define __NR_eventfd 306 |
683 | 683 | __SYSCALL(306, sys_eventfd, 1) |
684 | +#define __NR_recvmmsg 307 | |
685 | +__SYSCALL(307, sys_recvmmsg, 5) | |
684 | 686 | |
685 | -#define __NR_syscall_count 307 | |
687 | +#define __NR_syscall_count 308 | |
686 | 688 | |
687 | 689 | /* |
688 | 690 | * sysxtensa syscall handler |
include/linux/net.h
include/linux/socket.h
... | ... | @@ -65,6 +65,12 @@ |
65 | 65 | unsigned msg_flags; |
66 | 66 | }; |
67 | 67 | |
68 | +/* For recvmmsg/sendmmsg */ | |
69 | +struct mmsghdr { | |
70 | + struct msghdr msg_hdr; | |
71 | + unsigned msg_len; | |
72 | +}; | |
73 | + | |
68 | 74 | /* |
69 | 75 | * POSIX 1003.1g - ancillary data object information |
70 | 76 | * Ancillary data consits of a sequence of pairs of |
... | ... | @@ -312,6 +318,10 @@ |
312 | 318 | extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); |
313 | 319 | extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); |
314 | 320 | |
321 | +struct timespec; | |
322 | + | |
323 | +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | |
324 | + unsigned int flags, struct timespec *timeout); | |
315 | 325 | #endif |
316 | 326 | #endif /* not kernel and not glibc */ |
317 | 327 | #endif /* _LINUX_SOCKET_H */ |
include/linux/syscalls.h
... | ... | @@ -25,6 +25,7 @@ |
25 | 25 | struct list_head; |
26 | 26 | struct msgbuf; |
27 | 27 | struct msghdr; |
28 | +struct mmsghdr; | |
28 | 29 | struct msqid_ds; |
29 | 30 | struct new_utsname; |
30 | 31 | struct nfsctl_arg; |
... | ... | @@ -677,6 +678,9 @@ |
677 | 678 | asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, |
678 | 679 | struct sockaddr __user *, int __user *); |
679 | 680 | asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); |
681 | +asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, | |
682 | + unsigned int vlen, unsigned flags, | |
683 | + struct timespec __user *timeout); | |
680 | 684 | asmlinkage long sys_socket(int, int, int); |
681 | 685 | asmlinkage long sys_socketpair(int, int, int, int __user *); |
682 | 686 | asmlinkage long sys_socketcall(int call, unsigned long __user *args); |
include/net/compat.h
... | ... | @@ -18,6 +18,11 @@ |
18 | 18 | compat_uint_t msg_flags; |
19 | 19 | }; |
20 | 20 | |
21 | +struct compat_mmsghdr { | |
22 | + struct compat_msghdr msg_hdr; | |
23 | + compat_uint_t msg_len; | |
24 | +}; | |
25 | + | |
21 | 26 | struct compat_cmsghdr { |
22 | 27 | compat_size_t cmsg_len; |
23 | 28 | compat_int_t cmsg_level; |
... | ... | @@ -35,6 +40,9 @@ |
35 | 40 | extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); |
36 | 41 | extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); |
37 | 42 | extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); |
43 | +extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, | |
44 | + unsigned, unsigned, | |
45 | + struct timespec __user *); | |
38 | 46 | extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); |
39 | 47 | extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); |
40 | 48 |
kernel/sys_ni.c
... | ... | @@ -48,8 +48,10 @@ |
48 | 48 | cond_syscall(sys_sendmsg); |
49 | 49 | cond_syscall(compat_sys_sendmsg); |
50 | 50 | cond_syscall(sys_recvmsg); |
51 | +cond_syscall(sys_recvmmsg); | |
51 | 52 | cond_syscall(compat_sys_recvmsg); |
52 | 53 | cond_syscall(compat_sys_recvfrom); |
54 | +cond_syscall(compat_sys_recvmmsg); | |
53 | 55 | cond_syscall(sys_socketcall); |
54 | 56 | cond_syscall(sys_futex); |
55 | 57 | cond_syscall(compat_sys_futex); |
net/compat.c
... | ... | @@ -727,10 +727,10 @@ |
727 | 727 | |
728 | 728 | /* Argument list sizes for compat_sys_socketcall */ |
729 | 729 | #define AL(x) ((x) * sizeof(u32)) |
730 | -static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), | |
730 | +static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), | |
731 | 731 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), |
732 | 732 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), |
733 | - AL(4)}; | |
733 | + AL(4),AL(5)}; | |
734 | 734 | #undef AL |
735 | 735 | |
736 | 736 | asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) |
737 | 737 | |
... | ... | @@ -755,13 +755,36 @@ |
755 | 755 | return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); |
756 | 756 | } |
757 | 757 | |
758 | +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, | |
759 | + unsigned vlen, unsigned int flags, | |
760 | + struct timespec __user *timeout) | |
761 | +{ | |
762 | + int datagrams; | |
763 | + struct timespec ktspec; | |
764 | + struct compat_timespec __user *utspec = | |
765 | + (struct compat_timespec __user *)timeout; | |
766 | + | |
767 | + if (get_user(ktspec.tv_sec, &utspec->tv_sec) || | |
768 | + get_user(ktspec.tv_nsec, &utspec->tv_nsec)) | |
769 | + return -EFAULT; | |
770 | + | |
771 | + datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, | |
772 | + flags | MSG_CMSG_COMPAT, &ktspec); | |
773 | + if (datagrams > 0 && | |
774 | + (put_user(ktspec.tv_sec, &utspec->tv_sec) || | |
775 | + put_user(ktspec.tv_nsec, &utspec->tv_nsec))) | |
776 | + datagrams = -EFAULT; | |
777 | + | |
778 | + return datagrams; | |
779 | +} | |
780 | + | |
758 | 781 | asmlinkage long compat_sys_socketcall(int call, u32 __user *args) |
759 | 782 | { |
760 | 783 | int ret; |
761 | 784 | u32 a[6]; |
762 | 785 | u32 a0, a1; |
763 | 786 | |
764 | - if (call < SYS_SOCKET || call > SYS_ACCEPT4) | |
787 | + if (call < SYS_SOCKET || call > SYS_RECVMMSG) | |
765 | 788 | return -EINVAL; |
766 | 789 | if (copy_from_user(a, args, nas[call])) |
767 | 790 | return -EFAULT; |
... | ... | @@ -822,6 +845,10 @@ |
822 | 845 | break; |
823 | 846 | case SYS_RECVMSG: |
824 | 847 | ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); |
848 | + break; | |
849 | + case SYS_RECVMMSG: | |
850 | + ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], | |
851 | + compat_ptr(a[4])); | |
825 | 852 | break; |
826 | 853 | case SYS_ACCEPT4: |
827 | 854 | ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); |
net/socket.c
... | ... | @@ -683,10 +683,9 @@ |
683 | 683 | } |
684 | 684 | EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); |
685 | 685 | |
686 | -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, | |
687 | - struct msghdr *msg, size_t size, int flags) | |
686 | +static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, | |
687 | + struct msghdr *msg, size_t size, int flags) | |
688 | 688 | { |
689 | - int err; | |
690 | 689 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
691 | 690 | |
692 | 691 | si->sock = sock; |
693 | 692 | |
... | ... | @@ -695,13 +694,17 @@ |
695 | 694 | si->size = size; |
696 | 695 | si->flags = flags; |
697 | 696 | |
698 | - err = security_socket_recvmsg(sock, msg, size, flags); | |
699 | - if (err) | |
700 | - return err; | |
701 | - | |
702 | 697 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); |
703 | 698 | } |
704 | 699 | |
700 | +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, | |
701 | + struct msghdr *msg, size_t size, int flags) | |
702 | +{ | |
703 | + int err = security_socket_recvmsg(sock, msg, size, flags); | |
704 | + | |
705 | + return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); | |
706 | +} | |
707 | + | |
705 | 708 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, |
706 | 709 | size_t size, int flags) |
707 | 710 | { |
... | ... | @@ -717,6 +720,21 @@ |
717 | 720 | return ret; |
718 | 721 | } |
719 | 722 | |
723 | +static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, | |
724 | + size_t size, int flags) | |
725 | +{ | |
726 | + struct kiocb iocb; | |
727 | + struct sock_iocb siocb; | |
728 | + int ret; | |
729 | + | |
730 | + init_sync_kiocb(&iocb, NULL); | |
731 | + iocb.private = &siocb; | |
732 | + ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); | |
733 | + if (-EIOCBQUEUED == ret) | |
734 | + ret = wait_on_sync_kiocb(&iocb); | |
735 | + return ret; | |
736 | +} | |
737 | + | |
720 | 738 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, |
721 | 739 | struct kvec *vec, size_t num, size_t size, int flags) |
722 | 740 | { |
723 | 741 | |
724 | 742 | |
725 | 743 | |
... | ... | @@ -1983,22 +2001,15 @@ |
1983 | 2001 | return err; |
1984 | 2002 | } |
1985 | 2003 | |
1986 | -/* | |
1987 | - * BSD recvmsg interface | |
1988 | - */ | |
1989 | - | |
1990 | -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, | |
1991 | - unsigned int, flags) | |
2004 | +static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, | |
2005 | + struct msghdr *msg_sys, unsigned flags, int nosec) | |
1992 | 2006 | { |
1993 | 2007 | struct compat_msghdr __user *msg_compat = |
1994 | 2008 | (struct compat_msghdr __user *)msg; |
1995 | - struct socket *sock; | |
1996 | 2009 | struct iovec iovstack[UIO_FASTIOV]; |
1997 | 2010 | struct iovec *iov = iovstack; |
1998 | - struct msghdr msg_sys; | |
1999 | 2011 | unsigned long cmsg_ptr; |
2000 | 2012 | int err, iov_size, total_len, len; |
2001 | - int fput_needed; | |
2002 | 2013 | |
2003 | 2014 | /* kernel mode address */ |
2004 | 2015 | struct sockaddr_storage addr; |
2005 | 2016 | |
2006 | 2017 | |
2007 | 2018 | |
2008 | 2019 | |
2009 | 2020 | |
... | ... | @@ -2008,27 +2019,23 @@ |
2008 | 2019 | int __user *uaddr_len; |
2009 | 2020 | |
2010 | 2021 | if (MSG_CMSG_COMPAT & flags) { |
2011 | - if (get_compat_msghdr(&msg_sys, msg_compat)) | |
2022 | + if (get_compat_msghdr(msg_sys, msg_compat)) | |
2012 | 2023 | return -EFAULT; |
2013 | 2024 | } |
2014 | - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | |
2025 | + else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) | |
2015 | 2026 | return -EFAULT; |
2016 | 2027 | |
2017 | - sock = sockfd_lookup_light(fd, &err, &fput_needed); | |
2018 | - if (!sock) | |
2028 | + err = -EMSGSIZE; | |
2029 | + if (msg_sys->msg_iovlen > UIO_MAXIOV) | |
2019 | 2030 | goto out; |
2020 | 2031 | |
2021 | - err = -EMSGSIZE; | |
2022 | - if (msg_sys.msg_iovlen > UIO_MAXIOV) | |
2023 | - goto out_put; | |
2024 | - | |
2025 | 2032 | /* Check whether to allocate the iovec area */ |
2026 | 2033 | err = -ENOMEM; |
2027 | - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | |
2028 | - if (msg_sys.msg_iovlen > UIO_FASTIOV) { | |
2034 | + iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); | |
2035 | + if (msg_sys->msg_iovlen > UIO_FASTIOV) { | |
2029 | 2036 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
2030 | 2037 | if (!iov) |
2031 | - goto out_put; | |
2038 | + goto out; | |
2032 | 2039 | } |
2033 | 2040 | |
2034 | 2041 | /* |
2035 | 2042 | |
2036 | 2043 | |
2037 | 2044 | |
2038 | 2045 | |
2039 | 2046 | |
2040 | 2047 | |
2041 | 2048 | |
2042 | 2049 | |
... | ... | @@ -2036,46 +2043,47 @@ |
2036 | 2043 | * kernel msghdr to use the kernel address space) |
2037 | 2044 | */ |
2038 | 2045 | |
2039 | - uaddr = (__force void __user *)msg_sys.msg_name; | |
2046 | + uaddr = (__force void __user *)msg_sys->msg_name; | |
2040 | 2047 | uaddr_len = COMPAT_NAMELEN(msg); |
2041 | 2048 | if (MSG_CMSG_COMPAT & flags) { |
2042 | - err = verify_compat_iovec(&msg_sys, iov, | |
2049 | + err = verify_compat_iovec(msg_sys, iov, | |
2043 | 2050 | (struct sockaddr *)&addr, |
2044 | 2051 | VERIFY_WRITE); |
2045 | 2052 | } else |
2046 | - err = verify_iovec(&msg_sys, iov, | |
2053 | + err = verify_iovec(msg_sys, iov, | |
2047 | 2054 | (struct sockaddr *)&addr, |
2048 | 2055 | VERIFY_WRITE); |
2049 | 2056 | if (err < 0) |
2050 | 2057 | goto out_freeiov; |
2051 | 2058 | total_len = err; |
2052 | 2059 | |
2053 | - cmsg_ptr = (unsigned long)msg_sys.msg_control; | |
2054 | - msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); | |
2060 | + cmsg_ptr = (unsigned long)msg_sys->msg_control; | |
2061 | + msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); | |
2055 | 2062 | |
2056 | 2063 | if (sock->file->f_flags & O_NONBLOCK) |
2057 | 2064 | flags |= MSG_DONTWAIT; |
2058 | - err = sock_recvmsg(sock, &msg_sys, total_len, flags); | |
2065 | + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, | |
2066 | + total_len, flags); | |
2059 | 2067 | if (err < 0) |
2060 | 2068 | goto out_freeiov; |
2061 | 2069 | len = err; |
2062 | 2070 | |
2063 | 2071 | if (uaddr != NULL) { |
2064 | 2072 | err = move_addr_to_user((struct sockaddr *)&addr, |
2065 | - msg_sys.msg_namelen, uaddr, | |
2073 | + msg_sys->msg_namelen, uaddr, | |
2066 | 2074 | uaddr_len); |
2067 | 2075 | if (err < 0) |
2068 | 2076 | goto out_freeiov; |
2069 | 2077 | } |
2070 | - err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), | |
2078 | + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), | |
2071 | 2079 | COMPAT_FLAGS(msg)); |
2072 | 2080 | if (err) |
2073 | 2081 | goto out_freeiov; |
2074 | 2082 | if (MSG_CMSG_COMPAT & flags) |
2075 | - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, | |
2083 | + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, | |
2076 | 2084 | &msg_compat->msg_controllen); |
2077 | 2085 | else |
2078 | - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, | |
2086 | + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, | |
2079 | 2087 | &msg->msg_controllen); |
2080 | 2088 | if (err) |
2081 | 2089 | goto out_freeiov; |
2082 | 2090 | |
2083 | 2091 | |
2084 | 2092 | |
2085 | 2093 | |
... | ... | @@ -2084,21 +2092,150 @@ |
2084 | 2092 | out_freeiov: |
2085 | 2093 | if (iov != iovstack) |
2086 | 2094 | sock_kfree_s(sock->sk, iov, iov_size); |
2087 | -out_put: | |
2095 | +out: | |
2096 | + return err; | |
2097 | +} | |
2098 | + | |
2099 | +/* | |
2100 | + * BSD recvmsg interface | |
2101 | + */ | |
2102 | + | |
2103 | +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, | |
2104 | + unsigned int, flags) | |
2105 | +{ | |
2106 | + int fput_needed, err; | |
2107 | + struct msghdr msg_sys; | |
2108 | + struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); | |
2109 | + | |
2110 | + if (!sock) | |
2111 | + goto out; | |
2112 | + | |
2113 | + err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); | |
2114 | + | |
2088 | 2115 | fput_light(sock->file, fput_needed); |
2089 | 2116 | out: |
2090 | 2117 | return err; |
2091 | 2118 | } |
2092 | 2119 | |
2093 | -#ifdef __ARCH_WANT_SYS_SOCKETCALL | |
2120 | +/* | |
2121 | + * Linux recvmmsg interface | |
2122 | + */ | |
2094 | 2123 | |
2124 | +int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | |
2125 | + unsigned int flags, struct timespec *timeout) | |
2126 | +{ | |
2127 | + int fput_needed, err, datagrams; | |
2128 | + struct socket *sock; | |
2129 | + struct mmsghdr __user *entry; | |
2130 | + struct msghdr msg_sys; | |
2131 | + struct timespec end_time; | |
2132 | + | |
2133 | + if (timeout && | |
2134 | + poll_select_set_timeout(&end_time, timeout->tv_sec, | |
2135 | + timeout->tv_nsec)) | |
2136 | + return -EINVAL; | |
2137 | + | |
2138 | + datagrams = 0; | |
2139 | + | |
2140 | + sock = sockfd_lookup_light(fd, &err, &fput_needed); | |
2141 | + if (!sock) | |
2142 | + return err; | |
2143 | + | |
2144 | + err = sock_error(sock->sk); | |
2145 | + if (err) | |
2146 | + goto out_put; | |
2147 | + | |
2148 | + entry = mmsg; | |
2149 | + | |
2150 | + while (datagrams < vlen) { | |
2151 | + /* | |
2152 | + * No need to ask LSM for more than the first datagram. | |
2153 | + */ | |
2154 | + err = __sys_recvmsg(sock, (struct msghdr __user *)entry, | |
2155 | + &msg_sys, flags, datagrams); | |
2156 | + if (err < 0) | |
2157 | + break; | |
2158 | + err = put_user(err, &entry->msg_len); | |
2159 | + if (err) | |
2160 | + break; | |
2161 | + ++entry; | |
2162 | + ++datagrams; | |
2163 | + | |
2164 | + if (timeout) { | |
2165 | + ktime_get_ts(timeout); | |
2166 | + *timeout = timespec_sub(end_time, *timeout); | |
2167 | + if (timeout->tv_sec < 0) { | |
2168 | + timeout->tv_sec = timeout->tv_nsec = 0; | |
2169 | + break; | |
2170 | + } | |
2171 | + | |
2172 | + /* Timeout, return less than vlen datagrams */ | |
2173 | + if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) | |
2174 | + break; | |
2175 | + } | |
2176 | + | |
2177 | + /* Out of band data, return right away */ | |
2178 | + if (msg_sys.msg_flags & MSG_OOB) | |
2179 | + break; | |
2180 | + } | |
2181 | + | |
2182 | +out_put: | |
2183 | + fput_light(sock->file, fput_needed); | |
2184 | + | |
2185 | + if (err == 0) | |
2186 | + return datagrams; | |
2187 | + | |
2188 | + if (datagrams != 0) { | |
2189 | + /* | |
2190 | + * We may return less entries than requested (vlen) if the | |
2191 | + * sock is non block and there aren't enough datagrams... | |
2192 | + */ | |
2193 | + if (err != -EAGAIN) { | |
2194 | + /* | |
2195 | + * ... or if recvmsg returns an error after we | |
2196 | + * received some datagrams, where we record the | |
2197 | + * error to return on the next call or if the | |
2198 | + * app asks about it using getsockopt(SO_ERROR). | |
2199 | + */ | |
2200 | + sock->sk->sk_err = -err; | |
2201 | + } | |
2202 | + | |
2203 | + return datagrams; | |
2204 | + } | |
2205 | + | |
2206 | + return err; | |
2207 | +} | |
2208 | + | |
2209 | +SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, | |
2210 | + unsigned int, vlen, unsigned int, flags, | |
2211 | + struct timespec __user *, timeout) | |
2212 | +{ | |
2213 | + int datagrams; | |
2214 | + struct timespec timeout_sys; | |
2215 | + | |
2216 | + if (!timeout) | |
2217 | + return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); | |
2218 | + | |
2219 | + if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) | |
2220 | + return -EFAULT; | |
2221 | + | |
2222 | + datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); | |
2223 | + | |
2224 | + if (datagrams > 0 && | |
2225 | + copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) | |
2226 | + datagrams = -EFAULT; | |
2227 | + | |
2228 | + return datagrams; | |
2229 | +} | |
2230 | + | |
2231 | +#ifdef __ARCH_WANT_SYS_SOCKETCALL | |
2095 | 2232 | /* Argument list sizes for sys_socketcall */ |
2096 | 2233 | #define AL(x) ((x) * sizeof(unsigned long)) |
2097 | -static const unsigned char nargs[19]={ | |
2234 | +static const unsigned char nargs[20] = { | |
2098 | 2235 | AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), |
2099 | 2236 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), |
2100 | 2237 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), |
2101 | - AL(4) | |
2238 | + AL(4),AL(5) | |
2102 | 2239 | }; |
2103 | 2240 | |
2104 | 2241 | #undef AL |
... | ... | @@ -2118,7 +2255,7 @@ |
2118 | 2255 | int err; |
2119 | 2256 | unsigned int len; |
2120 | 2257 | |
2121 | - if (call < 1 || call > SYS_ACCEPT4) | |
2258 | + if (call < 1 || call > SYS_RECVMMSG) | |
2122 | 2259 | return -EINVAL; |
2123 | 2260 | |
2124 | 2261 | len = nargs[call]; |
... | ... | @@ -2195,6 +2332,10 @@ |
2195 | 2332 | break; |
2196 | 2333 | case SYS_RECVMSG: |
2197 | 2334 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); |
2335 | + break; | |
2336 | + case SYS_RECVMMSG: | |
2337 | + err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], | |
2338 | + (struct timespec __user *)a[4]); | |
2198 | 2339 | break; |
2199 | 2340 | case SYS_ACCEPT4: |
2200 | 2341 | err = sys_accept4(a0, (struct sockaddr __user *)a1, |