Commit 77d2720059618b9b6e827a8b73831eb6c6fad63c

Authored by Ulrich Drepper
Committed by Linus Torvalds
1 parent 99829b8329

flag parameters: NONBLOCK in socket and socketpair

This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and  paccept.  To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.

Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_NONBLOCK O_NONBLOCK

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  return NULL;
}

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("socket(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (fl & O_NONBLOCK)
        {
          printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_NONBLOCK) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((fl & O_NONBLOCK) == 0)
        {
          printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, NULL) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if (fl & O_NONBLOCK)
    {
      puts ("paccept(0) set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);

  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_NONBLOCK) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);
  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 11 additions and 11 deletions Inline Diff

1 /* 1 /*
2 * NET An implementation of the SOCKET network access protocol. 2 * NET An implementation of the SOCKET network access protocol.
3 * This is the master header file for the Linux NET layer, 3 * This is the master header file for the Linux NET layer,
4 * or, in plain English: the networking handling part of the 4 * or, in plain English: the networking handling part of the
5 * kernel. 5 * kernel.
6 * 6 *
7 * Version: @(#)net.h 1.0.3 05/25/93 7 * Version: @(#)net.h 1.0.3 05/25/93
8 * 8 *
9 * Authors: Orest Zborowski, <obz@Kodak.COM> 9 * Authors: Orest Zborowski, <obz@Kodak.COM>
10 * Ross Biro 10 * Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * 12 *
13 * This program is free software; you can redistribute it and/or 13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License 14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version. 16 * 2 of the License, or (at your option) any later version.
17 */ 17 */
18 #ifndef _LINUX_NET_H 18 #ifndef _LINUX_NET_H
19 #define _LINUX_NET_H 19 #define _LINUX_NET_H
20 20
21 #include <linux/wait.h> 21 #include <linux/wait.h>
22 #include <linux/socket.h> 22 #include <linux/socket.h>
23 #include <linux/fcntl.h> /* For O_CLOEXEC */ 23 #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
24 #include <asm/socket.h> 24 #include <asm/socket.h>
25 25
26 struct poll_table_struct; 26 struct poll_table_struct;
27 struct pipe_inode_info; 27 struct pipe_inode_info;
28 struct inode; 28 struct inode;
29 struct net; 29 struct net;
30 30
31 #define NPROTO AF_MAX 31 #define NPROTO AF_MAX
32 32
33 #define SYS_SOCKET 1 /* sys_socket(2) */ 33 #define SYS_SOCKET 1 /* sys_socket(2) */
34 #define SYS_BIND 2 /* sys_bind(2) */ 34 #define SYS_BIND 2 /* sys_bind(2) */
35 #define SYS_CONNECT 3 /* sys_connect(2) */ 35 #define SYS_CONNECT 3 /* sys_connect(2) */
36 #define SYS_LISTEN 4 /* sys_listen(2) */ 36 #define SYS_LISTEN 4 /* sys_listen(2) */
37 #define SYS_ACCEPT 5 /* sys_accept(2) */ 37 #define SYS_ACCEPT 5 /* sys_accept(2) */
38 #define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */ 38 #define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */
39 #define SYS_GETPEERNAME 7 /* sys_getpeername(2) */ 39 #define SYS_GETPEERNAME 7 /* sys_getpeername(2) */
40 #define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */ 40 #define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */
41 #define SYS_SEND 9 /* sys_send(2) */ 41 #define SYS_SEND 9 /* sys_send(2) */
42 #define SYS_RECV 10 /* sys_recv(2) */ 42 #define SYS_RECV 10 /* sys_recv(2) */
43 #define SYS_SENDTO 11 /* sys_sendto(2) */ 43 #define SYS_SENDTO 11 /* sys_sendto(2) */
44 #define SYS_RECVFROM 12 /* sys_recvfrom(2) */ 44 #define SYS_RECVFROM 12 /* sys_recvfrom(2) */
45 #define SYS_SHUTDOWN 13 /* sys_shutdown(2) */ 45 #define SYS_SHUTDOWN 13 /* sys_shutdown(2) */
46 #define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */ 46 #define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */
47 #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ 47 #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
48 #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ 48 #define SYS_SENDMSG 16 /* sys_sendmsg(2) */
49 #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ 49 #define SYS_RECVMSG 17 /* sys_recvmsg(2) */
50 #define SYS_PACCEPT 18 /* sys_paccept(2) */ 50 #define SYS_PACCEPT 18 /* sys_paccept(2) */
51 51
52 typedef enum { 52 typedef enum {
53 SS_FREE = 0, /* not allocated */ 53 SS_FREE = 0, /* not allocated */
54 SS_UNCONNECTED, /* unconnected to any socket */ 54 SS_UNCONNECTED, /* unconnected to any socket */
55 SS_CONNECTING, /* in process of connecting */ 55 SS_CONNECTING, /* in process of connecting */
56 SS_CONNECTED, /* connected to socket */ 56 SS_CONNECTED, /* connected to socket */
57 SS_DISCONNECTING /* in process of disconnecting */ 57 SS_DISCONNECTING /* in process of disconnecting */
58 } socket_state; 58 } socket_state;
59 59
60 #define __SO_ACCEPTCON (1 << 16) /* performed a listen */ 60 #define __SO_ACCEPTCON (1 << 16) /* performed a listen */
61 61
62 #ifdef __KERNEL__ 62 #ifdef __KERNEL__
63 #include <linux/stringify.h> 63 #include <linux/stringify.h>
64 #include <linux/random.h> 64 #include <linux/random.h>
65 65
66 #define SOCK_ASYNC_NOSPACE 0 66 #define SOCK_ASYNC_NOSPACE 0
67 #define SOCK_ASYNC_WAITDATA 1 67 #define SOCK_ASYNC_WAITDATA 1
68 #define SOCK_NOSPACE 2 68 #define SOCK_NOSPACE 2
69 #define SOCK_PASSCRED 3 69 #define SOCK_PASSCRED 3
70 #define SOCK_PASSSEC 4 70 #define SOCK_PASSSEC 4
71 71
72 #ifndef ARCH_HAS_SOCKET_TYPES 72 #ifndef ARCH_HAS_SOCKET_TYPES
73 /** 73 /**
74 * enum sock_type - Socket types 74 * enum sock_type - Socket types
75 * @SOCK_STREAM: stream (connection) socket 75 * @SOCK_STREAM: stream (connection) socket
76 * @SOCK_DGRAM: datagram (conn.less) socket 76 * @SOCK_DGRAM: datagram (conn.less) socket
77 * @SOCK_RAW: raw socket 77 * @SOCK_RAW: raw socket
78 * @SOCK_RDM: reliably-delivered message 78 * @SOCK_RDM: reliably-delivered message
79 * @SOCK_SEQPACKET: sequential packet socket 79 * @SOCK_SEQPACKET: sequential packet socket
80 * @SOCK_DCCP: Datagram Congestion Control Protocol socket 80 * @SOCK_DCCP: Datagram Congestion Control Protocol socket
81 * @SOCK_PACKET: linux specific way of getting packets at the dev level. 81 * @SOCK_PACKET: linux specific way of getting packets at the dev level.
82 * For writing rarp and other similar things on the user level. 82 * For writing rarp and other similar things on the user level.
83 * 83 *
84 * When adding some new socket type please 84 * When adding some new socket type please
85 * grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS 85 * grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS
86 * overrides this enum for binary compat reasons. 86 * overrides this enum for binary compat reasons.
87 */ 87 */
88 enum sock_type { 88 enum sock_type {
89 SOCK_STREAM = 1, 89 SOCK_STREAM = 1,
90 SOCK_DGRAM = 2, 90 SOCK_DGRAM = 2,
91 SOCK_RAW = 3, 91 SOCK_RAW = 3,
92 SOCK_RDM = 4, 92 SOCK_RDM = 4,
93 SOCK_SEQPACKET = 5, 93 SOCK_SEQPACKET = 5,
94 SOCK_DCCP = 6, 94 SOCK_DCCP = 6,
95 SOCK_PACKET = 10, 95 SOCK_PACKET = 10,
96 }; 96 };
97 97
98 #define SOCK_MAX (SOCK_PACKET + 1) 98 #define SOCK_MAX (SOCK_PACKET + 1)
99 /* Mask which covers at least up to SOCK_MASK-1. The 99 /* Mask which covers at least up to SOCK_MASK-1. The
100 * remaining bits are used as flags. */ 100 * remaining bits are used as flags. */
101 #define SOCK_TYPE_MASK 0xf 101 #define SOCK_TYPE_MASK 0xf
102 102
103 /* Flags for socket, socketpair, paccept */ 103 /* Flags for socket, socketpair, paccept */
104 #define SOCK_CLOEXEC O_CLOEXEC 104 #define SOCK_CLOEXEC O_CLOEXEC
105 #ifndef SOCK_NONBLOCK 105 #ifndef SOCK_NONBLOCK
106 #define SOCK_NONBLOCK O_NONBLOCK 106 #define SOCK_NONBLOCK O_NONBLOCK
107 #endif 107 #endif
108 108
109 #endif /* ARCH_HAS_SOCKET_TYPES */ 109 #endif /* ARCH_HAS_SOCKET_TYPES */
110 110
111 enum sock_shutdown_cmd { 111 enum sock_shutdown_cmd {
112 SHUT_RD = 0, 112 SHUT_RD = 0,
113 SHUT_WR = 1, 113 SHUT_WR = 1,
114 SHUT_RDWR = 2, 114 SHUT_RDWR = 2,
115 }; 115 };
116 116
117 /** 117 /**
118 * struct socket - general BSD socket 118 * struct socket - general BSD socket
119 * @state: socket state (%SS_CONNECTED, etc) 119 * @state: socket state (%SS_CONNECTED, etc)
120 * @type: socket type (%SOCK_STREAM, etc) 120 * @type: socket type (%SOCK_STREAM, etc)
121 * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) 121 * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
122 * @ops: protocol specific socket operations 122 * @ops: protocol specific socket operations
123 * @fasync_list: Asynchronous wake up list 123 * @fasync_list: Asynchronous wake up list
124 * @file: File back pointer for gc 124 * @file: File back pointer for gc
125 * @sk: internal networking protocol agnostic socket representation 125 * @sk: internal networking protocol agnostic socket representation
126 * @wait: wait queue for several uses 126 * @wait: wait queue for several uses
127 */ 127 */
128 struct socket { 128 struct socket {
129 socket_state state; 129 socket_state state;
130 short type; 130 short type;
131 unsigned long flags; 131 unsigned long flags;
132 const struct proto_ops *ops; 132 const struct proto_ops *ops;
133 struct fasync_struct *fasync_list; 133 struct fasync_struct *fasync_list;
134 struct file *file; 134 struct file *file;
135 struct sock *sk; 135 struct sock *sk;
136 wait_queue_head_t wait; 136 wait_queue_head_t wait;
137 }; 137 };
138 138
139 struct vm_area_struct; 139 struct vm_area_struct;
140 struct page; 140 struct page;
141 struct kiocb; 141 struct kiocb;
142 struct sockaddr; 142 struct sockaddr;
143 struct msghdr; 143 struct msghdr;
144 struct module; 144 struct module;
145 145
146 struct proto_ops { 146 struct proto_ops {
147 int family; 147 int family;
148 struct module *owner; 148 struct module *owner;
149 int (*release) (struct socket *sock); 149 int (*release) (struct socket *sock);
150 int (*bind) (struct socket *sock, 150 int (*bind) (struct socket *sock,
151 struct sockaddr *myaddr, 151 struct sockaddr *myaddr,
152 int sockaddr_len); 152 int sockaddr_len);
153 int (*connect) (struct socket *sock, 153 int (*connect) (struct socket *sock,
154 struct sockaddr *vaddr, 154 struct sockaddr *vaddr,
155 int sockaddr_len, int flags); 155 int sockaddr_len, int flags);
156 int (*socketpair)(struct socket *sock1, 156 int (*socketpair)(struct socket *sock1,
157 struct socket *sock2); 157 struct socket *sock2);
158 int (*accept) (struct socket *sock, 158 int (*accept) (struct socket *sock,
159 struct socket *newsock, int flags); 159 struct socket *newsock, int flags);
160 int (*getname) (struct socket *sock, 160 int (*getname) (struct socket *sock,
161 struct sockaddr *addr, 161 struct sockaddr *addr,
162 int *sockaddr_len, int peer); 162 int *sockaddr_len, int peer);
163 unsigned int (*poll) (struct file *file, struct socket *sock, 163 unsigned int (*poll) (struct file *file, struct socket *sock,
164 struct poll_table_struct *wait); 164 struct poll_table_struct *wait);
165 int (*ioctl) (struct socket *sock, unsigned int cmd, 165 int (*ioctl) (struct socket *sock, unsigned int cmd,
166 unsigned long arg); 166 unsigned long arg);
167 int (*compat_ioctl) (struct socket *sock, unsigned int cmd, 167 int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
168 unsigned long arg); 168 unsigned long arg);
169 int (*listen) (struct socket *sock, int len); 169 int (*listen) (struct socket *sock, int len);
170 int (*shutdown) (struct socket *sock, int flags); 170 int (*shutdown) (struct socket *sock, int flags);
171 int (*setsockopt)(struct socket *sock, int level, 171 int (*setsockopt)(struct socket *sock, int level,
172 int optname, char __user *optval, int optlen); 172 int optname, char __user *optval, int optlen);
173 int (*getsockopt)(struct socket *sock, int level, 173 int (*getsockopt)(struct socket *sock, int level,
174 int optname, char __user *optval, int __user *optlen); 174 int optname, char __user *optval, int __user *optlen);
175 int (*compat_setsockopt)(struct socket *sock, int level, 175 int (*compat_setsockopt)(struct socket *sock, int level,
176 int optname, char __user *optval, int optlen); 176 int optname, char __user *optval, int optlen);
177 int (*compat_getsockopt)(struct socket *sock, int level, 177 int (*compat_getsockopt)(struct socket *sock, int level,
178 int optname, char __user *optval, int __user *optlen); 178 int optname, char __user *optval, int __user *optlen);
179 int (*sendmsg) (struct kiocb *iocb, struct socket *sock, 179 int (*sendmsg) (struct kiocb *iocb, struct socket *sock,
180 struct msghdr *m, size_t total_len); 180 struct msghdr *m, size_t total_len);
181 int (*recvmsg) (struct kiocb *iocb, struct socket *sock, 181 int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
182 struct msghdr *m, size_t total_len, 182 struct msghdr *m, size_t total_len,
183 int flags); 183 int flags);
184 int (*mmap) (struct file *file, struct socket *sock, 184 int (*mmap) (struct file *file, struct socket *sock,
185 struct vm_area_struct * vma); 185 struct vm_area_struct * vma);
186 ssize_t (*sendpage) (struct socket *sock, struct page *page, 186 ssize_t (*sendpage) (struct socket *sock, struct page *page,
187 int offset, size_t size, int flags); 187 int offset, size_t size, int flags);
188 ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, 188 ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
189 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 189 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
190 }; 190 };
191 191
192 struct net_proto_family { 192 struct net_proto_family {
193 int family; 193 int family;
194 int (*create)(struct net *net, struct socket *sock, int protocol); 194 int (*create)(struct net *net, struct socket *sock, int protocol);
195 struct module *owner; 195 struct module *owner;
196 }; 196 };
197 197
198 struct iovec; 198 struct iovec;
199 struct kvec; 199 struct kvec;
200 200
201 enum { 201 enum {
202 SOCK_WAKE_IO, 202 SOCK_WAKE_IO,
203 SOCK_WAKE_WAITD, 203 SOCK_WAKE_WAITD,
204 SOCK_WAKE_SPACE, 204 SOCK_WAKE_SPACE,
205 SOCK_WAKE_URG, 205 SOCK_WAKE_URG,
206 }; 206 };
207 207
208 extern int sock_wake_async(struct socket *sk, int how, int band); 208 extern int sock_wake_async(struct socket *sk, int how, int band);
209 extern int sock_register(const struct net_proto_family *fam); 209 extern int sock_register(const struct net_proto_family *fam);
210 extern void sock_unregister(int family); 210 extern void sock_unregister(int family);
211 extern int sock_create(int family, int type, int proto, 211 extern int sock_create(int family, int type, int proto,
212 struct socket **res); 212 struct socket **res);
213 extern int sock_create_kern(int family, int type, int proto, 213 extern int sock_create_kern(int family, int type, int proto,
214 struct socket **res); 214 struct socket **res);
215 extern int sock_create_lite(int family, int type, int proto, 215 extern int sock_create_lite(int family, int type, int proto,
216 struct socket **res); 216 struct socket **res);
217 extern void sock_release(struct socket *sock); 217 extern void sock_release(struct socket *sock);
218 extern int sock_sendmsg(struct socket *sock, struct msghdr *msg, 218 extern int sock_sendmsg(struct socket *sock, struct msghdr *msg,
219 size_t len); 219 size_t len);
220 extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, 220 extern int sock_recvmsg(struct socket *sock, struct msghdr *msg,
221 size_t size, int flags); 221 size_t size, int flags);
222 extern int sock_map_fd(struct socket *sock, int flags); 222 extern int sock_map_fd(struct socket *sock, int flags);
223 extern struct socket *sockfd_lookup(int fd, int *err); 223 extern struct socket *sockfd_lookup(int fd, int *err);
224 #define sockfd_put(sock) fput(sock->file) 224 #define sockfd_put(sock) fput(sock->file)
225 extern int net_ratelimit(void); 225 extern int net_ratelimit(void);
226 extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, 226 extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
227 int __user *upeer_addrlen, int flags); 227 int __user *upeer_addrlen, int flags);
228 228
229 #define net_random() random32() 229 #define net_random() random32()
230 #define net_srandom(seed) srandom32((__force u32)seed) 230 #define net_srandom(seed) srandom32((__force u32)seed)
231 231
232 extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 232 extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
233 struct kvec *vec, size_t num, size_t len); 233 struct kvec *vec, size_t num, size_t len);
234 extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 234 extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
235 struct kvec *vec, size_t num, 235 struct kvec *vec, size_t num,
236 size_t len, int flags); 236 size_t len, int flags);
237 237
238 extern int kernel_bind(struct socket *sock, struct sockaddr *addr, 238 extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
239 int addrlen); 239 int addrlen);
240 extern int kernel_listen(struct socket *sock, int backlog); 240 extern int kernel_listen(struct socket *sock, int backlog);
241 extern int kernel_accept(struct socket *sock, struct socket **newsock, 241 extern int kernel_accept(struct socket *sock, struct socket **newsock,
242 int flags); 242 int flags);
243 extern int kernel_connect(struct socket *sock, struct sockaddr *addr, 243 extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
244 int addrlen, int flags); 244 int addrlen, int flags);
245 extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 245 extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
246 int *addrlen); 246 int *addrlen);
247 extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 247 extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
248 int *addrlen); 248 int *addrlen);
249 extern int kernel_getsockopt(struct socket *sock, int level, int optname, 249 extern int kernel_getsockopt(struct socket *sock, int level, int optname,
250 char *optval, int *optlen); 250 char *optval, int *optlen);
251 extern int kernel_setsockopt(struct socket *sock, int level, int optname, 251 extern int kernel_setsockopt(struct socket *sock, int level, int optname,
252 char *optval, int optlen); 252 char *optval, int optlen);
253 extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, 253 extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
254 size_t size, int flags); 254 size_t size, int flags);
255 extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); 255 extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
256 extern int kernel_sock_shutdown(struct socket *sock, 256 extern int kernel_sock_shutdown(struct socket *sock,
257 enum sock_shutdown_cmd how); 257 enum sock_shutdown_cmd how);
258 258
259 #ifndef CONFIG_SMP 259 #ifndef CONFIG_SMP
260 #define SOCKOPS_WRAPPED(name) name 260 #define SOCKOPS_WRAPPED(name) name
261 #define SOCKOPS_WRAP(name, fam) 261 #define SOCKOPS_WRAP(name, fam)
262 #else 262 #else
263 263
264 #define SOCKOPS_WRAPPED(name) __unlocked_##name 264 #define SOCKOPS_WRAPPED(name) __unlocked_##name
265 265
266 #define SOCKCALL_WRAP(name, call, parms, args) \ 266 #define SOCKCALL_WRAP(name, call, parms, args) \
267 static int __lock_##name##_##call parms \ 267 static int __lock_##name##_##call parms \
268 { \ 268 { \
269 int ret; \ 269 int ret; \
270 lock_kernel(); \ 270 lock_kernel(); \
271 ret = __unlocked_##name##_ops.call args ;\ 271 ret = __unlocked_##name##_ops.call args ;\
272 unlock_kernel(); \ 272 unlock_kernel(); \
273 return ret; \ 273 return ret; \
274 } 274 }
275 275
276 #define SOCKCALL_UWRAP(name, call, parms, args) \ 276 #define SOCKCALL_UWRAP(name, call, parms, args) \
277 static unsigned int __lock_##name##_##call parms \ 277 static unsigned int __lock_##name##_##call parms \
278 { \ 278 { \
279 int ret; \ 279 int ret; \
280 lock_kernel(); \ 280 lock_kernel(); \
281 ret = __unlocked_##name##_ops.call args ;\ 281 ret = __unlocked_##name##_ops.call args ;\
282 unlock_kernel(); \ 282 unlock_kernel(); \
283 return ret; \ 283 return ret; \
284 } 284 }
285 285
286 286
287 #define SOCKOPS_WRAP(name, fam) \ 287 #define SOCKOPS_WRAP(name, fam) \
288 SOCKCALL_WRAP(name, release, (struct socket *sock), (sock)) \ 288 SOCKCALL_WRAP(name, release, (struct socket *sock), (sock)) \
289 SOCKCALL_WRAP(name, bind, (struct socket *sock, struct sockaddr *uaddr, int addr_len), \ 289 SOCKCALL_WRAP(name, bind, (struct socket *sock, struct sockaddr *uaddr, int addr_len), \
290 (sock, uaddr, addr_len)) \ 290 (sock, uaddr, addr_len)) \
291 SOCKCALL_WRAP(name, connect, (struct socket *sock, struct sockaddr * uaddr, \ 291 SOCKCALL_WRAP(name, connect, (struct socket *sock, struct sockaddr * uaddr, \
292 int addr_len, int flags), \ 292 int addr_len, int flags), \
293 (sock, uaddr, addr_len, flags)) \ 293 (sock, uaddr, addr_len, flags)) \
294 SOCKCALL_WRAP(name, socketpair, (struct socket *sock1, struct socket *sock2), \ 294 SOCKCALL_WRAP(name, socketpair, (struct socket *sock1, struct socket *sock2), \
295 (sock1, sock2)) \ 295 (sock1, sock2)) \
296 SOCKCALL_WRAP(name, accept, (struct socket *sock, struct socket *newsock, \ 296 SOCKCALL_WRAP(name, accept, (struct socket *sock, struct socket *newsock, \
297 int flags), (sock, newsock, flags)) \ 297 int flags), (sock, newsock, flags)) \
298 SOCKCALL_WRAP(name, getname, (struct socket *sock, struct sockaddr *uaddr, \ 298 SOCKCALL_WRAP(name, getname, (struct socket *sock, struct sockaddr *uaddr, \
299 int *addr_len, int peer), (sock, uaddr, addr_len, peer)) \ 299 int *addr_len, int peer), (sock, uaddr, addr_len, peer)) \
300 SOCKCALL_UWRAP(name, poll, (struct file *file, struct socket *sock, struct poll_table_struct *wait), \ 300 SOCKCALL_UWRAP(name, poll, (struct file *file, struct socket *sock, struct poll_table_struct *wait), \
301 (file, sock, wait)) \ 301 (file, sock, wait)) \
302 SOCKCALL_WRAP(name, ioctl, (struct socket *sock, unsigned int cmd, \ 302 SOCKCALL_WRAP(name, ioctl, (struct socket *sock, unsigned int cmd, \
303 unsigned long arg), (sock, cmd, arg)) \ 303 unsigned long arg), (sock, cmd, arg)) \
304 SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \ 304 SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \
305 unsigned long arg), (sock, cmd, arg)) \ 305 unsigned long arg), (sock, cmd, arg)) \
306 SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \ 306 SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \
307 SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \ 307 SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \
308 SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \ 308 SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \
309 char __user *optval, int optlen), (sock, level, optname, optval, optlen)) \ 309 char __user *optval, int optlen), (sock, level, optname, optval, optlen)) \
310 SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \ 310 SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \
311 char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \ 311 char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \
312 SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \ 312 SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \
313 (iocb, sock, m, len)) \ 313 (iocb, sock, m, len)) \
314 SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags), \ 314 SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags), \
315 (iocb, sock, m, len, flags)) \ 315 (iocb, sock, m, len, flags)) \
316 SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \ 316 SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \
317 (file, sock, vma)) \ 317 (file, sock, vma)) \
318 \ 318 \
319 static const struct proto_ops name##_ops = { \ 319 static const struct proto_ops name##_ops = { \
320 .family = fam, \ 320 .family = fam, \
321 .owner = THIS_MODULE, \ 321 .owner = THIS_MODULE, \
322 .release = __lock_##name##_release, \ 322 .release = __lock_##name##_release, \
323 .bind = __lock_##name##_bind, \ 323 .bind = __lock_##name##_bind, \
324 .connect = __lock_##name##_connect, \ 324 .connect = __lock_##name##_connect, \
325 .socketpair = __lock_##name##_socketpair, \ 325 .socketpair = __lock_##name##_socketpair, \
326 .accept = __lock_##name##_accept, \ 326 .accept = __lock_##name##_accept, \
327 .getname = __lock_##name##_getname, \ 327 .getname = __lock_##name##_getname, \
328 .poll = __lock_##name##_poll, \ 328 .poll = __lock_##name##_poll, \
329 .ioctl = __lock_##name##_ioctl, \ 329 .ioctl = __lock_##name##_ioctl, \
330 .compat_ioctl = __lock_##name##_compat_ioctl, \ 330 .compat_ioctl = __lock_##name##_compat_ioctl, \
331 .listen = __lock_##name##_listen, \ 331 .listen = __lock_##name##_listen, \
332 .shutdown = __lock_##name##_shutdown, \ 332 .shutdown = __lock_##name##_shutdown, \
333 .setsockopt = __lock_##name##_setsockopt, \ 333 .setsockopt = __lock_##name##_setsockopt, \
334 .getsockopt = __lock_##name##_getsockopt, \ 334 .getsockopt = __lock_##name##_getsockopt, \
335 .sendmsg = __lock_##name##_sendmsg, \ 335 .sendmsg = __lock_##name##_sendmsg, \
336 .recvmsg = __lock_##name##_recvmsg, \ 336 .recvmsg = __lock_##name##_recvmsg, \
337 .mmap = __lock_##name##_mmap, \ 337 .mmap = __lock_##name##_mmap, \
338 }; 338 };
339 339
340 #endif 340 #endif
341 341
342 #define MODULE_ALIAS_NETPROTO(proto) \ 342 #define MODULE_ALIAS_NETPROTO(proto) \
343 MODULE_ALIAS("net-pf-" __stringify(proto)) 343 MODULE_ALIAS("net-pf-" __stringify(proto))
344 344
345 #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ 345 #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
346 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) 346 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
347 347
348 #define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \ 348 #define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \
349 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \ 349 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
350 "-type-" __stringify(type)) 350 "-type-" __stringify(type))
351 351
352 #ifdef CONFIG_SYSCTL 352 #ifdef CONFIG_SYSCTL
353 #include <linux/sysctl.h> 353 #include <linux/sysctl.h>
354 extern int net_msg_cost; 354 extern int net_msg_cost;
355 extern int net_msg_burst; 355 extern int net_msg_burst;
356 #endif 356 #endif
357 357
358 #endif /* __KERNEL__ */ 358 #endif /* __KERNEL__ */
359 #endif /* _LINUX_NET_H */ 359 #endif /* _LINUX_NET_H */
360 360
1 /* 1 /*
2 * NET An implementation of the SOCKET network access protocol. 2 * NET An implementation of the SOCKET network access protocol.
3 * 3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95 4 * Version: @(#)socket.c 1.1.93 18/02/95
5 * 5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM> 6 * Authors: Orest Zborowski, <obz@Kodak.COM>
7 * Ross Biro 7 * Ross Biro
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 * 9 *
10 * Fixes: 10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown() 12 * shutdown()
13 * Alan Cox : verify_area() fixes 13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI 14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug 15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very 16 * Alan Cox : Moved a load of checks to the very
17 * top level. 17 * top level.
18 * Alan Cox : Move address structures to/from user 18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers. 19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends. 20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the 21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers). 22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line 24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable. 25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic, 26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr. 27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be 28 * Uphoff's max is used as max to be
29 * allowed to allocate. 29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation 30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now. 31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public 32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type 33 * for NetROM and future kernel nfsd type
34 * stuff. 34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics. 35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols. 36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls 38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the 39 * for sockets. May have errors at the
40 * moment. 40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above. 41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations, 42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug. 43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
45 * Tigran Aivazian : Made listen(2) backlog sanity checks 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
46 * protocol-independent 46 * protocol-independent
47 * 47 *
48 * 48 *
49 * This program is free software; you can redistribute it and/or 49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License 50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version 51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version. 52 * 2 of the License, or (at your option) any later version.
53 * 53 *
54 * 54 *
55 * This module is effectively the top level interface to the BSD socket 55 * This module is effectively the top level interface to the BSD socket
56 * paradigm. 56 * paradigm.
57 * 57 *
58 * Based upon Swansea University Computer Society NET3.039 58 * Based upon Swansea University Computer Society NET3.039
59 */ 59 */
60 60
61 #include <linux/mm.h> 61 #include <linux/mm.h>
62 #include <linux/socket.h> 62 #include <linux/socket.h>
63 #include <linux/file.h> 63 #include <linux/file.h>
64 #include <linux/net.h> 64 #include <linux/net.h>
65 #include <linux/interrupt.h> 65 #include <linux/interrupt.h>
66 #include <linux/thread_info.h> 66 #include <linux/thread_info.h>
67 #include <linux/rcupdate.h> 67 #include <linux/rcupdate.h>
68 #include <linux/netdevice.h> 68 #include <linux/netdevice.h>
69 #include <linux/proc_fs.h> 69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h> 70 #include <linux/seq_file.h>
71 #include <linux/mutex.h> 71 #include <linux/mutex.h>
72 #include <linux/thread_info.h> 72 #include <linux/thread_info.h>
73 #include <linux/wanrouter.h> 73 #include <linux/wanrouter.h>
74 #include <linux/if_bridge.h> 74 #include <linux/if_bridge.h>
75 #include <linux/if_frad.h> 75 #include <linux/if_frad.h>
76 #include <linux/if_vlan.h> 76 #include <linux/if_vlan.h>
77 #include <linux/init.h> 77 #include <linux/init.h>
78 #include <linux/poll.h> 78 #include <linux/poll.h>
79 #include <linux/cache.h> 79 #include <linux/cache.h>
80 #include <linux/module.h> 80 #include <linux/module.h>
81 #include <linux/highmem.h> 81 #include <linux/highmem.h>
82 #include <linux/mount.h> 82 #include <linux/mount.h>
83 #include <linux/security.h> 83 #include <linux/security.h>
84 #include <linux/syscalls.h> 84 #include <linux/syscalls.h>
85 #include <linux/compat.h> 85 #include <linux/compat.h>
86 #include <linux/kmod.h> 86 #include <linux/kmod.h>
87 #include <linux/audit.h> 87 #include <linux/audit.h>
88 #include <linux/wireless.h> 88 #include <linux/wireless.h>
89 #include <linux/nsproxy.h> 89 #include <linux/nsproxy.h>
90 90
91 #include <asm/uaccess.h> 91 #include <asm/uaccess.h>
92 #include <asm/unistd.h> 92 #include <asm/unistd.h>
93 93
94 #include <net/compat.h> 94 #include <net/compat.h>
95 #include <net/wext.h> 95 #include <net/wext.h>
96 96
97 #include <net/sock.h> 97 #include <net/sock.h>
98 #include <linux/netfilter.h> 98 #include <linux/netfilter.h>
99 99
100 static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 100 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
101 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 101 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos); 102 unsigned long nr_segs, loff_t pos);
103 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 103 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
104 unsigned long nr_segs, loff_t pos); 104 unsigned long nr_segs, loff_t pos);
105 static int sock_mmap(struct file *file, struct vm_area_struct *vma); 105 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
106 106
107 static int sock_close(struct inode *inode, struct file *file); 107 static int sock_close(struct inode *inode, struct file *file);
108 static unsigned int sock_poll(struct file *file, 108 static unsigned int sock_poll(struct file *file,
109 struct poll_table_struct *wait); 109 struct poll_table_struct *wait);
110 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 110 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
111 #ifdef CONFIG_COMPAT 111 #ifdef CONFIG_COMPAT
112 static long compat_sock_ioctl(struct file *file, 112 static long compat_sock_ioctl(struct file *file,
113 unsigned int cmd, unsigned long arg); 113 unsigned int cmd, unsigned long arg);
114 #endif 114 #endif
115 static int sock_fasync(int fd, struct file *filp, int on); 115 static int sock_fasync(int fd, struct file *filp, int on);
116 static ssize_t sock_sendpage(struct file *file, struct page *page, 116 static ssize_t sock_sendpage(struct file *file, struct page *page,
117 int offset, size_t size, loff_t *ppos, int more); 117 int offset, size_t size, loff_t *ppos, int more);
118 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 118 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
119 struct pipe_inode_info *pipe, size_t len, 119 struct pipe_inode_info *pipe, size_t len,
120 unsigned int flags); 120 unsigned int flags);
121 121
122 /* 122 /*
123 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 123 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
124 * in the operation structures but are done directly via the socketcall() multiplexor. 124 * in the operation structures but are done directly via the socketcall() multiplexor.
125 */ 125 */
126 126
127 static const struct file_operations socket_file_ops = { 127 static const struct file_operations socket_file_ops = {
128 .owner = THIS_MODULE, 128 .owner = THIS_MODULE,
129 .llseek = no_llseek, 129 .llseek = no_llseek,
130 .aio_read = sock_aio_read, 130 .aio_read = sock_aio_read,
131 .aio_write = sock_aio_write, 131 .aio_write = sock_aio_write,
132 .poll = sock_poll, 132 .poll = sock_poll,
133 .unlocked_ioctl = sock_ioctl, 133 .unlocked_ioctl = sock_ioctl,
134 #ifdef CONFIG_COMPAT 134 #ifdef CONFIG_COMPAT
135 .compat_ioctl = compat_sock_ioctl, 135 .compat_ioctl = compat_sock_ioctl,
136 #endif 136 #endif
137 .mmap = sock_mmap, 137 .mmap = sock_mmap,
138 .open = sock_no_open, /* special open code to disallow open via /proc */ 138 .open = sock_no_open, /* special open code to disallow open via /proc */
139 .release = sock_close, 139 .release = sock_close,
140 .fasync = sock_fasync, 140 .fasync = sock_fasync,
141 .sendpage = sock_sendpage, 141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage, 142 .splice_write = generic_splice_sendpage,
143 .splice_read = sock_splice_read, 143 .splice_read = sock_splice_read,
144 }; 144 };
145 145
146 /* 146 /*
147 * The protocol list. Each protocol is registered in here. 147 * The protocol list. Each protocol is registered in here.
148 */ 148 */
149 149
150 static DEFINE_SPINLOCK(net_family_lock); 150 static DEFINE_SPINLOCK(net_family_lock);
151 static const struct net_proto_family *net_families[NPROTO] __read_mostly; 151 static const struct net_proto_family *net_families[NPROTO] __read_mostly;
152 152
153 /* 153 /*
154 * Statistics counters of the socket lists 154 * Statistics counters of the socket lists
155 */ 155 */
156 156
157 static DEFINE_PER_CPU(int, sockets_in_use) = 0; 157 static DEFINE_PER_CPU(int, sockets_in_use) = 0;
158 158
159 /* 159 /*
160 * Support routines. 160 * Support routines.
161 * Move socket addresses back and forth across the kernel/user 161 * Move socket addresses back and forth across the kernel/user
162 * divide and look after the messy bits. 162 * divide and look after the messy bits.
163 */ 163 */
164 164
165 #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 165 #define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
166 16 for IP, 16 for IPX, 166 16 for IP, 16 for IPX,
167 24 for IPv6, 167 24 for IPv6,
168 about 80 for AX.25 168 about 80 for AX.25
169 must be at least one bigger than 169 must be at least one bigger than
170 the AF_UNIX size (see net/unix/af_unix.c 170 the AF_UNIX size (see net/unix/af_unix.c
171 :unix_mkname()). 171 :unix_mkname()).
172 */ 172 */
173 173
174 /** 174 /**
175 * move_addr_to_kernel - copy a socket address into kernel space 175 * move_addr_to_kernel - copy a socket address into kernel space
176 * @uaddr: Address in user space 176 * @uaddr: Address in user space
177 * @kaddr: Address in kernel space 177 * @kaddr: Address in kernel space
178 * @ulen: Length in user space 178 * @ulen: Length in user space
179 * 179 *
180 * The address is copied into kernel space. If the provided address is 180 * The address is copied into kernel space. If the provided address is
181 * too long an error code of -EINVAL is returned. If the copy gives 181 * too long an error code of -EINVAL is returned. If the copy gives
182 * invalid addresses -EFAULT is returned. On a success 0 is returned. 182 * invalid addresses -EFAULT is returned. On a success 0 is returned.
183 */ 183 */
184 184
185 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) 185 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
186 { 186 {
187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
188 return -EINVAL; 188 return -EINVAL;
189 if (ulen == 0) 189 if (ulen == 0)
190 return 0; 190 return 0;
191 if (copy_from_user(kaddr, uaddr, ulen)) 191 if (copy_from_user(kaddr, uaddr, ulen))
192 return -EFAULT; 192 return -EFAULT;
193 return audit_sockaddr(ulen, kaddr); 193 return audit_sockaddr(ulen, kaddr);
194 } 194 }
195 195
196 /** 196 /**
197 * move_addr_to_user - copy an address to user space 197 * move_addr_to_user - copy an address to user space
198 * @kaddr: kernel space address 198 * @kaddr: kernel space address
199 * @klen: length of address in kernel 199 * @klen: length of address in kernel
200 * @uaddr: user space address 200 * @uaddr: user space address
201 * @ulen: pointer to user length field 201 * @ulen: pointer to user length field
202 * 202 *
203 * The value pointed to by ulen on entry is the buffer length available. 203 * The value pointed to by ulen on entry is the buffer length available.
204 * This is overwritten with the buffer space used. -EINVAL is returned 204 * This is overwritten with the buffer space used. -EINVAL is returned
205 * if an overlong buffer is specified or a negative buffer size. -EFAULT 205 * if an overlong buffer is specified or a negative buffer size. -EFAULT
206 * is returned if either the buffer or the length field are not 206 * is returned if either the buffer or the length field are not
207 * accessible. 207 * accessible.
208 * After copying the data up to the limit the user specifies, the true 208 * After copying the data up to the limit the user specifies, the true
209 * length of the data is written over the length limit the user 209 * length of the data is written over the length limit the user
210 * specified. Zero is returned for a success. 210 * specified. Zero is returned for a success.
211 */ 211 */
212 212
213 int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 213 int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
214 int __user *ulen) 214 int __user *ulen)
215 { 215 {
216 int err; 216 int err;
217 int len; 217 int len;
218 218
219 err = get_user(len, ulen); 219 err = get_user(len, ulen);
220 if (err) 220 if (err)
221 return err; 221 return err;
222 if (len > klen) 222 if (len > klen)
223 len = klen; 223 len = klen;
224 if (len < 0 || len > sizeof(struct sockaddr_storage)) 224 if (len < 0 || len > sizeof(struct sockaddr_storage))
225 return -EINVAL; 225 return -EINVAL;
226 if (len) { 226 if (len) {
227 if (audit_sockaddr(klen, kaddr)) 227 if (audit_sockaddr(klen, kaddr))
228 return -ENOMEM; 228 return -ENOMEM;
229 if (copy_to_user(uaddr, kaddr, len)) 229 if (copy_to_user(uaddr, kaddr, len))
230 return -EFAULT; 230 return -EFAULT;
231 } 231 }
232 /* 232 /*
233 * "fromlen shall refer to the value before truncation.." 233 * "fromlen shall refer to the value before truncation.."
234 * 1003.1g 234 * 1003.1g
235 */ 235 */
236 return __put_user(klen, ulen); 236 return __put_user(klen, ulen);
237 } 237 }
238 238
239 #define SOCKFS_MAGIC 0x534F434B 239 #define SOCKFS_MAGIC 0x534F434B
240 240
241 static struct kmem_cache *sock_inode_cachep __read_mostly; 241 static struct kmem_cache *sock_inode_cachep __read_mostly;
242 242
243 static struct inode *sock_alloc_inode(struct super_block *sb) 243 static struct inode *sock_alloc_inode(struct super_block *sb)
244 { 244 {
245 struct socket_alloc *ei; 245 struct socket_alloc *ei;
246 246
247 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 247 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
248 if (!ei) 248 if (!ei)
249 return NULL; 249 return NULL;
250 init_waitqueue_head(&ei->socket.wait); 250 init_waitqueue_head(&ei->socket.wait);
251 251
252 ei->socket.fasync_list = NULL; 252 ei->socket.fasync_list = NULL;
253 ei->socket.state = SS_UNCONNECTED; 253 ei->socket.state = SS_UNCONNECTED;
254 ei->socket.flags = 0; 254 ei->socket.flags = 0;
255 ei->socket.ops = NULL; 255 ei->socket.ops = NULL;
256 ei->socket.sk = NULL; 256 ei->socket.sk = NULL;
257 ei->socket.file = NULL; 257 ei->socket.file = NULL;
258 258
259 return &ei->vfs_inode; 259 return &ei->vfs_inode;
260 } 260 }
261 261
262 static void sock_destroy_inode(struct inode *inode) 262 static void sock_destroy_inode(struct inode *inode)
263 { 263 {
264 kmem_cache_free(sock_inode_cachep, 264 kmem_cache_free(sock_inode_cachep,
265 container_of(inode, struct socket_alloc, vfs_inode)); 265 container_of(inode, struct socket_alloc, vfs_inode));
266 } 266 }
267 267
268 static void init_once(struct kmem_cache *cachep, void *foo) 268 static void init_once(struct kmem_cache *cachep, void *foo)
269 { 269 {
270 struct socket_alloc *ei = (struct socket_alloc *)foo; 270 struct socket_alloc *ei = (struct socket_alloc *)foo;
271 271
272 inode_init_once(&ei->vfs_inode); 272 inode_init_once(&ei->vfs_inode);
273 } 273 }
274 274
275 static int init_inodecache(void) 275 static int init_inodecache(void)
276 { 276 {
277 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 277 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
278 sizeof(struct socket_alloc), 278 sizeof(struct socket_alloc),
279 0, 279 0,
280 (SLAB_HWCACHE_ALIGN | 280 (SLAB_HWCACHE_ALIGN |
281 SLAB_RECLAIM_ACCOUNT | 281 SLAB_RECLAIM_ACCOUNT |
282 SLAB_MEM_SPREAD), 282 SLAB_MEM_SPREAD),
283 init_once); 283 init_once);
284 if (sock_inode_cachep == NULL) 284 if (sock_inode_cachep == NULL)
285 return -ENOMEM; 285 return -ENOMEM;
286 return 0; 286 return 0;
287 } 287 }
288 288
289 static struct super_operations sockfs_ops = { 289 static struct super_operations sockfs_ops = {
290 .alloc_inode = sock_alloc_inode, 290 .alloc_inode = sock_alloc_inode,
291 .destroy_inode =sock_destroy_inode, 291 .destroy_inode =sock_destroy_inode,
292 .statfs = simple_statfs, 292 .statfs = simple_statfs,
293 }; 293 };
294 294
295 static int sockfs_get_sb(struct file_system_type *fs_type, 295 static int sockfs_get_sb(struct file_system_type *fs_type,
296 int flags, const char *dev_name, void *data, 296 int flags, const char *dev_name, void *data,
297 struct vfsmount *mnt) 297 struct vfsmount *mnt)
298 { 298 {
299 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 299 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
300 mnt); 300 mnt);
301 } 301 }
302 302
303 static struct vfsmount *sock_mnt __read_mostly; 303 static struct vfsmount *sock_mnt __read_mostly;
304 304
305 static struct file_system_type sock_fs_type = { 305 static struct file_system_type sock_fs_type = {
306 .name = "sockfs", 306 .name = "sockfs",
307 .get_sb = sockfs_get_sb, 307 .get_sb = sockfs_get_sb,
308 .kill_sb = kill_anon_super, 308 .kill_sb = kill_anon_super,
309 }; 309 };
310 310
311 static int sockfs_delete_dentry(struct dentry *dentry) 311 static int sockfs_delete_dentry(struct dentry *dentry)
312 { 312 {
313 /* 313 /*
314 * At creation time, we pretended this dentry was hashed 314 * At creation time, we pretended this dentry was hashed
315 * (by clearing DCACHE_UNHASHED bit in d_flags) 315 * (by clearing DCACHE_UNHASHED bit in d_flags)
316 * At delete time, we restore the truth : not hashed. 316 * At delete time, we restore the truth : not hashed.
317 * (so that dput() can proceed correctly) 317 * (so that dput() can proceed correctly)
318 */ 318 */
319 dentry->d_flags |= DCACHE_UNHASHED; 319 dentry->d_flags |= DCACHE_UNHASHED;
320 return 0; 320 return 0;
321 } 321 }
322 322
323 /* 323 /*
324 * sockfs_dname() is called from d_path(). 324 * sockfs_dname() is called from d_path().
325 */ 325 */
326 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 326 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
327 { 327 {
328 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 328 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
329 dentry->d_inode->i_ino); 329 dentry->d_inode->i_ino);
330 } 330 }
331 331
332 static struct dentry_operations sockfs_dentry_operations = { 332 static struct dentry_operations sockfs_dentry_operations = {
333 .d_delete = sockfs_delete_dentry, 333 .d_delete = sockfs_delete_dentry,
334 .d_dname = sockfs_dname, 334 .d_dname = sockfs_dname,
335 }; 335 };
336 336
337 /* 337 /*
338 * Obtains the first available file descriptor and sets it up for use. 338 * Obtains the first available file descriptor and sets it up for use.
339 * 339 *
340 * These functions create file structures and maps them to fd space 340 * These functions create file structures and maps them to fd space
341 * of the current process. On success it returns file descriptor 341 * of the current process. On success it returns file descriptor
342 * and file struct implicitly stored in sock->file. 342 * and file struct implicitly stored in sock->file.
343 * Note that another thread may close file descriptor before we return 343 * Note that another thread may close file descriptor before we return
344 * from this function. We use the fact that now we do not refer 344 * from this function. We use the fact that now we do not refer
345 * to socket after mapping. If one day we will need it, this 345 * to socket after mapping. If one day we will need it, this
346 * function will increment ref. count on file by 1. 346 * function will increment ref. count on file by 1.
347 * 347 *
348 * In any case returned fd MAY BE not valid! 348 * In any case returned fd MAY BE not valid!
349 * This race condition is unavoidable 349 * This race condition is unavoidable
350 * with shared fd spaces, we cannot solve it inside kernel, 350 * with shared fd spaces, we cannot solve it inside kernel,
351 * but we take care of internal coherence yet. 351 * but we take care of internal coherence yet.
352 */ 352 */
353 353
354 static int sock_alloc_fd(struct file **filep, int flags) 354 static int sock_alloc_fd(struct file **filep, int flags)
355 { 355 {
356 int fd; 356 int fd;
357 357
358 fd = get_unused_fd_flags(flags); 358 fd = get_unused_fd_flags(flags);
359 if (likely(fd >= 0)) { 359 if (likely(fd >= 0)) {
360 struct file *file = get_empty_filp(); 360 struct file *file = get_empty_filp();
361 361
362 *filep = file; 362 *filep = file;
363 if (unlikely(!file)) { 363 if (unlikely(!file)) {
364 put_unused_fd(fd); 364 put_unused_fd(fd);
365 return -ENFILE; 365 return -ENFILE;
366 } 366 }
367 } else 367 } else
368 *filep = NULL; 368 *filep = NULL;
369 return fd; 369 return fd;
370 } 370 }
371 371
372 static int sock_attach_fd(struct socket *sock, struct file *file) 372 static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
373 { 373 {
374 struct dentry *dentry; 374 struct dentry *dentry;
375 struct qstr name = { .name = "" }; 375 struct qstr name = { .name = "" };
376 376
377 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 377 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
378 if (unlikely(!dentry)) 378 if (unlikely(!dentry))
379 return -ENOMEM; 379 return -ENOMEM;
380 380
381 dentry->d_op = &sockfs_dentry_operations; 381 dentry->d_op = &sockfs_dentry_operations;
382 /* 382 /*
383 * We dont want to push this dentry into global dentry hash table. 383 * We dont want to push this dentry into global dentry hash table.
384 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED 384 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
385 * This permits a working /proc/$pid/fd/XXX on sockets 385 * This permits a working /proc/$pid/fd/XXX on sockets
386 */ 386 */
387 dentry->d_flags &= ~DCACHE_UNHASHED; 387 dentry->d_flags &= ~DCACHE_UNHASHED;
388 d_instantiate(dentry, SOCK_INODE(sock)); 388 d_instantiate(dentry, SOCK_INODE(sock));
389 389
390 sock->file = file; 390 sock->file = file;
391 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, 391 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
392 &socket_file_ops); 392 &socket_file_ops);
393 SOCK_INODE(sock)->i_fop = &socket_file_ops; 393 SOCK_INODE(sock)->i_fop = &socket_file_ops;
394 file->f_flags = O_RDWR; 394 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
395 file->f_pos = 0; 395 file->f_pos = 0;
396 file->private_data = sock; 396 file->private_data = sock;
397 397
398 return 0; 398 return 0;
399 } 399 }
400 400
401 int sock_map_fd(struct socket *sock, int flags) 401 int sock_map_fd(struct socket *sock, int flags)
402 { 402 {
403 struct file *newfile; 403 struct file *newfile;
404 int fd = sock_alloc_fd(&newfile, flags); 404 int fd = sock_alloc_fd(&newfile, flags);
405 405
406 if (likely(fd >= 0)) { 406 if (likely(fd >= 0)) {
407 int err = sock_attach_fd(sock, newfile); 407 int err = sock_attach_fd(sock, newfile, flags);
408 408
409 if (unlikely(err < 0)) { 409 if (unlikely(err < 0)) {
410 put_filp(newfile); 410 put_filp(newfile);
411 put_unused_fd(fd); 411 put_unused_fd(fd);
412 return err; 412 return err;
413 } 413 }
414 fd_install(fd, newfile); 414 fd_install(fd, newfile);
415 } 415 }
416 return fd; 416 return fd;
417 } 417 }
418 418
419 static struct socket *sock_from_file(struct file *file, int *err) 419 static struct socket *sock_from_file(struct file *file, int *err)
420 { 420 {
421 if (file->f_op == &socket_file_ops) 421 if (file->f_op == &socket_file_ops)
422 return file->private_data; /* set in sock_map_fd */ 422 return file->private_data; /* set in sock_map_fd */
423 423
424 *err = -ENOTSOCK; 424 *err = -ENOTSOCK;
425 return NULL; 425 return NULL;
426 } 426 }
427 427
428 /** 428 /**
429 * sockfd_lookup - Go from a file number to its socket slot 429 * sockfd_lookup - Go from a file number to its socket slot
430 * @fd: file handle 430 * @fd: file handle
431 * @err: pointer to an error code return 431 * @err: pointer to an error code return
432 * 432 *
433 * The file handle passed in is locked and the socket it is bound 433 * The file handle passed in is locked and the socket it is bound
434 * too is returned. If an error occurs the err pointer is overwritten 434 * too is returned. If an error occurs the err pointer is overwritten
435 * with a negative errno code and NULL is returned. The function checks 435 * with a negative errno code and NULL is returned. The function checks
436 * for both invalid handles and passing a handle which is not a socket. 436 * for both invalid handles and passing a handle which is not a socket.
437 * 437 *
438 * On a success the socket object pointer is returned. 438 * On a success the socket object pointer is returned.
439 */ 439 */
440 440
441 struct socket *sockfd_lookup(int fd, int *err) 441 struct socket *sockfd_lookup(int fd, int *err)
442 { 442 {
443 struct file *file; 443 struct file *file;
444 struct socket *sock; 444 struct socket *sock;
445 445
446 file = fget(fd); 446 file = fget(fd);
447 if (!file) { 447 if (!file) {
448 *err = -EBADF; 448 *err = -EBADF;
449 return NULL; 449 return NULL;
450 } 450 }
451 451
452 sock = sock_from_file(file, err); 452 sock = sock_from_file(file, err);
453 if (!sock) 453 if (!sock)
454 fput(file); 454 fput(file);
455 return sock; 455 return sock;
456 } 456 }
457 457
458 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 458 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
459 { 459 {
460 struct file *file; 460 struct file *file;
461 struct socket *sock; 461 struct socket *sock;
462 462
463 *err = -EBADF; 463 *err = -EBADF;
464 file = fget_light(fd, fput_needed); 464 file = fget_light(fd, fput_needed);
465 if (file) { 465 if (file) {
466 sock = sock_from_file(file, err); 466 sock = sock_from_file(file, err);
467 if (sock) 467 if (sock)
468 return sock; 468 return sock;
469 fput_light(file, *fput_needed); 469 fput_light(file, *fput_needed);
470 } 470 }
471 return NULL; 471 return NULL;
472 } 472 }
473 473
474 /** 474 /**
475 * sock_alloc - allocate a socket 475 * sock_alloc - allocate a socket
476 * 476 *
477 * Allocate a new inode and socket object. The two are bound together 477 * Allocate a new inode and socket object. The two are bound together
478 * and initialised. The socket is then returned. If we are out of inodes 478 * and initialised. The socket is then returned. If we are out of inodes
479 * NULL is returned. 479 * NULL is returned.
480 */ 480 */
481 481
482 static struct socket *sock_alloc(void) 482 static struct socket *sock_alloc(void)
483 { 483 {
484 struct inode *inode; 484 struct inode *inode;
485 struct socket *sock; 485 struct socket *sock;
486 486
487 inode = new_inode(sock_mnt->mnt_sb); 487 inode = new_inode(sock_mnt->mnt_sb);
488 if (!inode) 488 if (!inode)
489 return NULL; 489 return NULL;
490 490
491 sock = SOCKET_I(inode); 491 sock = SOCKET_I(inode);
492 492
493 inode->i_mode = S_IFSOCK | S_IRWXUGO; 493 inode->i_mode = S_IFSOCK | S_IRWXUGO;
494 inode->i_uid = current->fsuid; 494 inode->i_uid = current->fsuid;
495 inode->i_gid = current->fsgid; 495 inode->i_gid = current->fsgid;
496 496
497 get_cpu_var(sockets_in_use)++; 497 get_cpu_var(sockets_in_use)++;
498 put_cpu_var(sockets_in_use); 498 put_cpu_var(sockets_in_use);
499 return sock; 499 return sock;
500 } 500 }
501 501
502 /* 502 /*
503 * In theory you can't get an open on this inode, but /proc provides 503 * In theory you can't get an open on this inode, but /proc provides
504 * a back door. Remember to keep it shut otherwise you'll let the 504 * a back door. Remember to keep it shut otherwise you'll let the
505 * creepy crawlies in. 505 * creepy crawlies in.
506 */ 506 */
507 507
508 static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 508 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
509 { 509 {
510 return -ENXIO; 510 return -ENXIO;
511 } 511 }
512 512
513 const struct file_operations bad_sock_fops = { 513 const struct file_operations bad_sock_fops = {
514 .owner = THIS_MODULE, 514 .owner = THIS_MODULE,
515 .open = sock_no_open, 515 .open = sock_no_open,
516 }; 516 };
517 517
518 /** 518 /**
519 * sock_release - close a socket 519 * sock_release - close a socket
520 * @sock: socket to close 520 * @sock: socket to close
521 * 521 *
522 * The socket is released from the protocol stack if it has a release 522 * The socket is released from the protocol stack if it has a release
523 * callback, and the inode is then released if the socket is bound to 523 * callback, and the inode is then released if the socket is bound to
524 * an inode not a file. 524 * an inode not a file.
525 */ 525 */
526 526
527 void sock_release(struct socket *sock) 527 void sock_release(struct socket *sock)
528 { 528 {
529 if (sock->ops) { 529 if (sock->ops) {
530 struct module *owner = sock->ops->owner; 530 struct module *owner = sock->ops->owner;
531 531
532 sock->ops->release(sock); 532 sock->ops->release(sock);
533 sock->ops = NULL; 533 sock->ops = NULL;
534 module_put(owner); 534 module_put(owner);
535 } 535 }
536 536
537 if (sock->fasync_list) 537 if (sock->fasync_list)
538 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 538 printk(KERN_ERR "sock_release: fasync list not empty!\n");
539 539
540 get_cpu_var(sockets_in_use)--; 540 get_cpu_var(sockets_in_use)--;
541 put_cpu_var(sockets_in_use); 541 put_cpu_var(sockets_in_use);
542 if (!sock->file) { 542 if (!sock->file) {
543 iput(SOCK_INODE(sock)); 543 iput(SOCK_INODE(sock));
544 return; 544 return;
545 } 545 }
546 sock->file = NULL; 546 sock->file = NULL;
547 } 547 }
548 548
549 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 549 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
550 struct msghdr *msg, size_t size) 550 struct msghdr *msg, size_t size)
551 { 551 {
552 struct sock_iocb *si = kiocb_to_siocb(iocb); 552 struct sock_iocb *si = kiocb_to_siocb(iocb);
553 int err; 553 int err;
554 554
555 si->sock = sock; 555 si->sock = sock;
556 si->scm = NULL; 556 si->scm = NULL;
557 si->msg = msg; 557 si->msg = msg;
558 si->size = size; 558 si->size = size;
559 559
560 err = security_socket_sendmsg(sock, msg, size); 560 err = security_socket_sendmsg(sock, msg, size);
561 if (err) 561 if (err)
562 return err; 562 return err;
563 563
564 return sock->ops->sendmsg(iocb, sock, msg, size); 564 return sock->ops->sendmsg(iocb, sock, msg, size);
565 } 565 }
566 566
567 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 567 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
568 { 568 {
569 struct kiocb iocb; 569 struct kiocb iocb;
570 struct sock_iocb siocb; 570 struct sock_iocb siocb;
571 int ret; 571 int ret;
572 572
573 init_sync_kiocb(&iocb, NULL); 573 init_sync_kiocb(&iocb, NULL);
574 iocb.private = &siocb; 574 iocb.private = &siocb;
575 ret = __sock_sendmsg(&iocb, sock, msg, size); 575 ret = __sock_sendmsg(&iocb, sock, msg, size);
576 if (-EIOCBQUEUED == ret) 576 if (-EIOCBQUEUED == ret)
577 ret = wait_on_sync_kiocb(&iocb); 577 ret = wait_on_sync_kiocb(&iocb);
578 return ret; 578 return ret;
579 } 579 }
580 580
581 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 581 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
582 struct kvec *vec, size_t num, size_t size) 582 struct kvec *vec, size_t num, size_t size)
583 { 583 {
584 mm_segment_t oldfs = get_fs(); 584 mm_segment_t oldfs = get_fs();
585 int result; 585 int result;
586 586
587 set_fs(KERNEL_DS); 587 set_fs(KERNEL_DS);
588 /* 588 /*
589 * the following is safe, since for compiler definitions of kvec and 589 * the following is safe, since for compiler definitions of kvec and
590 * iovec are identical, yielding the same in-core layout and alignment 590 * iovec are identical, yielding the same in-core layout and alignment
591 */ 591 */
592 msg->msg_iov = (struct iovec *)vec; 592 msg->msg_iov = (struct iovec *)vec;
593 msg->msg_iovlen = num; 593 msg->msg_iovlen = num;
594 result = sock_sendmsg(sock, msg, size); 594 result = sock_sendmsg(sock, msg, size);
595 set_fs(oldfs); 595 set_fs(oldfs);
596 return result; 596 return result;
597 } 597 }
598 598
599 /* 599 /*
600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
601 */ 601 */
602 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 602 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
603 struct sk_buff *skb) 603 struct sk_buff *skb)
604 { 604 {
605 ktime_t kt = skb->tstamp; 605 ktime_t kt = skb->tstamp;
606 606
607 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 607 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
608 struct timeval tv; 608 struct timeval tv;
609 /* Race occurred between timestamp enabling and packet 609 /* Race occurred between timestamp enabling and packet
610 receiving. Fill in the current time for now. */ 610 receiving. Fill in the current time for now. */
611 if (kt.tv64 == 0) 611 if (kt.tv64 == 0)
612 kt = ktime_get_real(); 612 kt = ktime_get_real();
613 skb->tstamp = kt; 613 skb->tstamp = kt;
614 tv = ktime_to_timeval(kt); 614 tv = ktime_to_timeval(kt);
615 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); 615 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
616 } else { 616 } else {
617 struct timespec ts; 617 struct timespec ts;
618 /* Race occurred between timestamp enabling and packet 618 /* Race occurred between timestamp enabling and packet
619 receiving. Fill in the current time for now. */ 619 receiving. Fill in the current time for now. */
620 if (kt.tv64 == 0) 620 if (kt.tv64 == 0)
621 kt = ktime_get_real(); 621 kt = ktime_get_real();
622 skb->tstamp = kt; 622 skb->tstamp = kt;
623 ts = ktime_to_timespec(kt); 623 ts = ktime_to_timespec(kt);
624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); 624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
625 } 625 }
626 } 626 }
627 627
628 EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 628 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
629 629
630 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 630 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
631 struct msghdr *msg, size_t size, int flags) 631 struct msghdr *msg, size_t size, int flags)
632 { 632 {
633 int err; 633 int err;
634 struct sock_iocb *si = kiocb_to_siocb(iocb); 634 struct sock_iocb *si = kiocb_to_siocb(iocb);
635 635
636 si->sock = sock; 636 si->sock = sock;
637 si->scm = NULL; 637 si->scm = NULL;
638 si->msg = msg; 638 si->msg = msg;
639 si->size = size; 639 si->size = size;
640 si->flags = flags; 640 si->flags = flags;
641 641
642 err = security_socket_recvmsg(sock, msg, size, flags); 642 err = security_socket_recvmsg(sock, msg, size, flags);
643 if (err) 643 if (err)
644 return err; 644 return err;
645 645
646 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 646 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
647 } 647 }
648 648
649 int sock_recvmsg(struct socket *sock, struct msghdr *msg, 649 int sock_recvmsg(struct socket *sock, struct msghdr *msg,
650 size_t size, int flags) 650 size_t size, int flags)
651 { 651 {
652 struct kiocb iocb; 652 struct kiocb iocb;
653 struct sock_iocb siocb; 653 struct sock_iocb siocb;
654 int ret; 654 int ret;
655 655
656 init_sync_kiocb(&iocb, NULL); 656 init_sync_kiocb(&iocb, NULL);
657 iocb.private = &siocb; 657 iocb.private = &siocb;
658 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 658 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
659 if (-EIOCBQUEUED == ret) 659 if (-EIOCBQUEUED == ret)
660 ret = wait_on_sync_kiocb(&iocb); 660 ret = wait_on_sync_kiocb(&iocb);
661 return ret; 661 return ret;
662 } 662 }
663 663
664 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 664 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
665 struct kvec *vec, size_t num, size_t size, int flags) 665 struct kvec *vec, size_t num, size_t size, int flags)
666 { 666 {
667 mm_segment_t oldfs = get_fs(); 667 mm_segment_t oldfs = get_fs();
668 int result; 668 int result;
669 669
670 set_fs(KERNEL_DS); 670 set_fs(KERNEL_DS);
671 /* 671 /*
672 * the following is safe, since for compiler definitions of kvec and 672 * the following is safe, since for compiler definitions of kvec and
673 * iovec are identical, yielding the same in-core layout and alignment 673 * iovec are identical, yielding the same in-core layout and alignment
674 */ 674 */
675 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 675 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
676 result = sock_recvmsg(sock, msg, size, flags); 676 result = sock_recvmsg(sock, msg, size, flags);
677 set_fs(oldfs); 677 set_fs(oldfs);
678 return result; 678 return result;
679 } 679 }
680 680
681 static void sock_aio_dtor(struct kiocb *iocb) 681 static void sock_aio_dtor(struct kiocb *iocb)
682 { 682 {
683 kfree(iocb->private); 683 kfree(iocb->private);
684 } 684 }
685 685
686 static ssize_t sock_sendpage(struct file *file, struct page *page, 686 static ssize_t sock_sendpage(struct file *file, struct page *page,
687 int offset, size_t size, loff_t *ppos, int more) 687 int offset, size_t size, loff_t *ppos, int more)
688 { 688 {
689 struct socket *sock; 689 struct socket *sock;
690 int flags; 690 int flags;
691 691
692 sock = file->private_data; 692 sock = file->private_data;
693 693
694 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 694 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
695 if (more) 695 if (more)
696 flags |= MSG_MORE; 696 flags |= MSG_MORE;
697 697
698 return sock->ops->sendpage(sock, page, offset, size, flags); 698 return sock->ops->sendpage(sock, page, offset, size, flags);
699 } 699 }
700 700
701 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 701 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
702 struct pipe_inode_info *pipe, size_t len, 702 struct pipe_inode_info *pipe, size_t len,
703 unsigned int flags) 703 unsigned int flags)
704 { 704 {
705 struct socket *sock = file->private_data; 705 struct socket *sock = file->private_data;
706 706
707 if (unlikely(!sock->ops->splice_read)) 707 if (unlikely(!sock->ops->splice_read))
708 return -EINVAL; 708 return -EINVAL;
709 709
710 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 710 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
711 } 711 }
712 712
713 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 713 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
714 struct sock_iocb *siocb) 714 struct sock_iocb *siocb)
715 { 715 {
716 if (!is_sync_kiocb(iocb)) { 716 if (!is_sync_kiocb(iocb)) {
717 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 717 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
718 if (!siocb) 718 if (!siocb)
719 return NULL; 719 return NULL;
720 iocb->ki_dtor = sock_aio_dtor; 720 iocb->ki_dtor = sock_aio_dtor;
721 } 721 }
722 722
723 siocb->kiocb = iocb; 723 siocb->kiocb = iocb;
724 iocb->private = siocb; 724 iocb->private = siocb;
725 return siocb; 725 return siocb;
726 } 726 }
727 727
728 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 728 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
729 struct file *file, const struct iovec *iov, 729 struct file *file, const struct iovec *iov,
730 unsigned long nr_segs) 730 unsigned long nr_segs)
731 { 731 {
732 struct socket *sock = file->private_data; 732 struct socket *sock = file->private_data;
733 size_t size = 0; 733 size_t size = 0;
734 int i; 734 int i;
735 735
736 for (i = 0; i < nr_segs; i++) 736 for (i = 0; i < nr_segs; i++)
737 size += iov[i].iov_len; 737 size += iov[i].iov_len;
738 738
739 msg->msg_name = NULL; 739 msg->msg_name = NULL;
740 msg->msg_namelen = 0; 740 msg->msg_namelen = 0;
741 msg->msg_control = NULL; 741 msg->msg_control = NULL;
742 msg->msg_controllen = 0; 742 msg->msg_controllen = 0;
743 msg->msg_iov = (struct iovec *)iov; 743 msg->msg_iov = (struct iovec *)iov;
744 msg->msg_iovlen = nr_segs; 744 msg->msg_iovlen = nr_segs;
745 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 745 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
746 746
747 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 747 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
748 } 748 }
749 749
750 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 750 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
751 unsigned long nr_segs, loff_t pos) 751 unsigned long nr_segs, loff_t pos)
752 { 752 {
753 struct sock_iocb siocb, *x; 753 struct sock_iocb siocb, *x;
754 754
755 if (pos != 0) 755 if (pos != 0)
756 return -ESPIPE; 756 return -ESPIPE;
757 757
758 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 758 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
759 return 0; 759 return 0;
760 760
761 761
762 x = alloc_sock_iocb(iocb, &siocb); 762 x = alloc_sock_iocb(iocb, &siocb);
763 if (!x) 763 if (!x)
764 return -ENOMEM; 764 return -ENOMEM;
765 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 765 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
766 } 766 }
767 767
768 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 768 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
769 struct file *file, const struct iovec *iov, 769 struct file *file, const struct iovec *iov,
770 unsigned long nr_segs) 770 unsigned long nr_segs)
771 { 771 {
772 struct socket *sock = file->private_data; 772 struct socket *sock = file->private_data;
773 size_t size = 0; 773 size_t size = 0;
774 int i; 774 int i;
775 775
776 for (i = 0; i < nr_segs; i++) 776 for (i = 0; i < nr_segs; i++)
777 size += iov[i].iov_len; 777 size += iov[i].iov_len;
778 778
779 msg->msg_name = NULL; 779 msg->msg_name = NULL;
780 msg->msg_namelen = 0; 780 msg->msg_namelen = 0;
781 msg->msg_control = NULL; 781 msg->msg_control = NULL;
782 msg->msg_controllen = 0; 782 msg->msg_controllen = 0;
783 msg->msg_iov = (struct iovec *)iov; 783 msg->msg_iov = (struct iovec *)iov;
784 msg->msg_iovlen = nr_segs; 784 msg->msg_iovlen = nr_segs;
785 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 785 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
786 if (sock->type == SOCK_SEQPACKET) 786 if (sock->type == SOCK_SEQPACKET)
787 msg->msg_flags |= MSG_EOR; 787 msg->msg_flags |= MSG_EOR;
788 788
789 return __sock_sendmsg(iocb, sock, msg, size); 789 return __sock_sendmsg(iocb, sock, msg, size);
790 } 790 }
791 791
792 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 792 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
793 unsigned long nr_segs, loff_t pos) 793 unsigned long nr_segs, loff_t pos)
794 { 794 {
795 struct sock_iocb siocb, *x; 795 struct sock_iocb siocb, *x;
796 796
797 if (pos != 0) 797 if (pos != 0)
798 return -ESPIPE; 798 return -ESPIPE;
799 799
800 x = alloc_sock_iocb(iocb, &siocb); 800 x = alloc_sock_iocb(iocb, &siocb);
801 if (!x) 801 if (!x)
802 return -ENOMEM; 802 return -ENOMEM;
803 803
804 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 804 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
805 } 805 }
806 806
807 /* 807 /*
808 * Atomic setting of ioctl hooks to avoid race 808 * Atomic setting of ioctl hooks to avoid race
809 * with module unload. 809 * with module unload.
810 */ 810 */
811 811
812 static DEFINE_MUTEX(br_ioctl_mutex); 812 static DEFINE_MUTEX(br_ioctl_mutex);
813 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; 813 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
814 814
815 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 815 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
816 { 816 {
817 mutex_lock(&br_ioctl_mutex); 817 mutex_lock(&br_ioctl_mutex);
818 br_ioctl_hook = hook; 818 br_ioctl_hook = hook;
819 mutex_unlock(&br_ioctl_mutex); 819 mutex_unlock(&br_ioctl_mutex);
820 } 820 }
821 821
822 EXPORT_SYMBOL(brioctl_set); 822 EXPORT_SYMBOL(brioctl_set);
823 823
824 static DEFINE_MUTEX(vlan_ioctl_mutex); 824 static DEFINE_MUTEX(vlan_ioctl_mutex);
825 static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 825 static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
826 826
827 void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 827 void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
828 { 828 {
829 mutex_lock(&vlan_ioctl_mutex); 829 mutex_lock(&vlan_ioctl_mutex);
830 vlan_ioctl_hook = hook; 830 vlan_ioctl_hook = hook;
831 mutex_unlock(&vlan_ioctl_mutex); 831 mutex_unlock(&vlan_ioctl_mutex);
832 } 832 }
833 833
834 EXPORT_SYMBOL(vlan_ioctl_set); 834 EXPORT_SYMBOL(vlan_ioctl_set);
835 835
836 static DEFINE_MUTEX(dlci_ioctl_mutex); 836 static DEFINE_MUTEX(dlci_ioctl_mutex);
837 static int (*dlci_ioctl_hook) (unsigned int, void __user *); 837 static int (*dlci_ioctl_hook) (unsigned int, void __user *);
838 838
839 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 839 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
840 { 840 {
841 mutex_lock(&dlci_ioctl_mutex); 841 mutex_lock(&dlci_ioctl_mutex);
842 dlci_ioctl_hook = hook; 842 dlci_ioctl_hook = hook;
843 mutex_unlock(&dlci_ioctl_mutex); 843 mutex_unlock(&dlci_ioctl_mutex);
844 } 844 }
845 845
846 EXPORT_SYMBOL(dlci_ioctl_set); 846 EXPORT_SYMBOL(dlci_ioctl_set);
847 847
848 /* 848 /*
849 * With an ioctl, arg may well be a user mode pointer, but we don't know 849 * With an ioctl, arg may well be a user mode pointer, but we don't know
850 * what to do with it - that's up to the protocol still. 850 * what to do with it - that's up to the protocol still.
851 */ 851 */
852 852
853 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 853 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
854 { 854 {
855 struct socket *sock; 855 struct socket *sock;
856 struct sock *sk; 856 struct sock *sk;
857 void __user *argp = (void __user *)arg; 857 void __user *argp = (void __user *)arg;
858 int pid, err; 858 int pid, err;
859 struct net *net; 859 struct net *net;
860 860
861 sock = file->private_data; 861 sock = file->private_data;
862 sk = sock->sk; 862 sk = sock->sk;
863 net = sock_net(sk); 863 net = sock_net(sk);
864 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 864 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
865 err = dev_ioctl(net, cmd, argp); 865 err = dev_ioctl(net, cmd, argp);
866 } else 866 } else
867 #ifdef CONFIG_WIRELESS_EXT 867 #ifdef CONFIG_WIRELESS_EXT
868 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 868 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
869 err = dev_ioctl(net, cmd, argp); 869 err = dev_ioctl(net, cmd, argp);
870 } else 870 } else
871 #endif /* CONFIG_WIRELESS_EXT */ 871 #endif /* CONFIG_WIRELESS_EXT */
872 switch (cmd) { 872 switch (cmd) {
873 case FIOSETOWN: 873 case FIOSETOWN:
874 case SIOCSPGRP: 874 case SIOCSPGRP:
875 err = -EFAULT; 875 err = -EFAULT;
876 if (get_user(pid, (int __user *)argp)) 876 if (get_user(pid, (int __user *)argp))
877 break; 877 break;
878 err = f_setown(sock->file, pid, 1); 878 err = f_setown(sock->file, pid, 1);
879 break; 879 break;
880 case FIOGETOWN: 880 case FIOGETOWN:
881 case SIOCGPGRP: 881 case SIOCGPGRP:
882 err = put_user(f_getown(sock->file), 882 err = put_user(f_getown(sock->file),
883 (int __user *)argp); 883 (int __user *)argp);
884 break; 884 break;
885 case SIOCGIFBR: 885 case SIOCGIFBR:
886 case SIOCSIFBR: 886 case SIOCSIFBR:
887 case SIOCBRADDBR: 887 case SIOCBRADDBR:
888 case SIOCBRDELBR: 888 case SIOCBRDELBR:
889 err = -ENOPKG; 889 err = -ENOPKG;
890 if (!br_ioctl_hook) 890 if (!br_ioctl_hook)
891 request_module("bridge"); 891 request_module("bridge");
892 892
893 mutex_lock(&br_ioctl_mutex); 893 mutex_lock(&br_ioctl_mutex);
894 if (br_ioctl_hook) 894 if (br_ioctl_hook)
895 err = br_ioctl_hook(net, cmd, argp); 895 err = br_ioctl_hook(net, cmd, argp);
896 mutex_unlock(&br_ioctl_mutex); 896 mutex_unlock(&br_ioctl_mutex);
897 break; 897 break;
898 case SIOCGIFVLAN: 898 case SIOCGIFVLAN:
899 case SIOCSIFVLAN: 899 case SIOCSIFVLAN:
900 err = -ENOPKG; 900 err = -ENOPKG;
901 if (!vlan_ioctl_hook) 901 if (!vlan_ioctl_hook)
902 request_module("8021q"); 902 request_module("8021q");
903 903
904 mutex_lock(&vlan_ioctl_mutex); 904 mutex_lock(&vlan_ioctl_mutex);
905 if (vlan_ioctl_hook) 905 if (vlan_ioctl_hook)
906 err = vlan_ioctl_hook(net, argp); 906 err = vlan_ioctl_hook(net, argp);
907 mutex_unlock(&vlan_ioctl_mutex); 907 mutex_unlock(&vlan_ioctl_mutex);
908 break; 908 break;
909 case SIOCADDDLCI: 909 case SIOCADDDLCI:
910 case SIOCDELDLCI: 910 case SIOCDELDLCI:
911 err = -ENOPKG; 911 err = -ENOPKG;
912 if (!dlci_ioctl_hook) 912 if (!dlci_ioctl_hook)
913 request_module("dlci"); 913 request_module("dlci");
914 914
915 mutex_lock(&dlci_ioctl_mutex); 915 mutex_lock(&dlci_ioctl_mutex);
916 if (dlci_ioctl_hook) 916 if (dlci_ioctl_hook)
917 err = dlci_ioctl_hook(cmd, argp); 917 err = dlci_ioctl_hook(cmd, argp);
918 mutex_unlock(&dlci_ioctl_mutex); 918 mutex_unlock(&dlci_ioctl_mutex);
919 break; 919 break;
920 default: 920 default:
921 err = sock->ops->ioctl(sock, cmd, arg); 921 err = sock->ops->ioctl(sock, cmd, arg);
922 922
923 /* 923 /*
924 * If this ioctl is unknown try to hand it down 924 * If this ioctl is unknown try to hand it down
925 * to the NIC driver. 925 * to the NIC driver.
926 */ 926 */
927 if (err == -ENOIOCTLCMD) 927 if (err == -ENOIOCTLCMD)
928 err = dev_ioctl(net, cmd, argp); 928 err = dev_ioctl(net, cmd, argp);
929 break; 929 break;
930 } 930 }
931 return err; 931 return err;
932 } 932 }
933 933
934 int sock_create_lite(int family, int type, int protocol, struct socket **res) 934 int sock_create_lite(int family, int type, int protocol, struct socket **res)
935 { 935 {
936 int err; 936 int err;
937 struct socket *sock = NULL; 937 struct socket *sock = NULL;
938 938
939 err = security_socket_create(family, type, protocol, 1); 939 err = security_socket_create(family, type, protocol, 1);
940 if (err) 940 if (err)
941 goto out; 941 goto out;
942 942
943 sock = sock_alloc(); 943 sock = sock_alloc();
944 if (!sock) { 944 if (!sock) {
945 err = -ENOMEM; 945 err = -ENOMEM;
946 goto out; 946 goto out;
947 } 947 }
948 948
949 sock->type = type; 949 sock->type = type;
950 err = security_socket_post_create(sock, family, type, protocol, 1); 950 err = security_socket_post_create(sock, family, type, protocol, 1);
951 if (err) 951 if (err)
952 goto out_release; 952 goto out_release;
953 953
954 out: 954 out:
955 *res = sock; 955 *res = sock;
956 return err; 956 return err;
957 out_release: 957 out_release:
958 sock_release(sock); 958 sock_release(sock);
959 sock = NULL; 959 sock = NULL;
960 goto out; 960 goto out;
961 } 961 }
962 962
963 /* No kernel lock held - perfect */ 963 /* No kernel lock held - perfect */
964 static unsigned int sock_poll(struct file *file, poll_table *wait) 964 static unsigned int sock_poll(struct file *file, poll_table *wait)
965 { 965 {
966 struct socket *sock; 966 struct socket *sock;
967 967
968 /* 968 /*
969 * We can't return errors to poll, so it's either yes or no. 969 * We can't return errors to poll, so it's either yes or no.
970 */ 970 */
971 sock = file->private_data; 971 sock = file->private_data;
972 return sock->ops->poll(file, sock, wait); 972 return sock->ops->poll(file, sock, wait);
973 } 973 }
974 974
975 static int sock_mmap(struct file *file, struct vm_area_struct *vma) 975 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
976 { 976 {
977 struct socket *sock = file->private_data; 977 struct socket *sock = file->private_data;
978 978
979 return sock->ops->mmap(file, sock, vma); 979 return sock->ops->mmap(file, sock, vma);
980 } 980 }
981 981
982 static int sock_close(struct inode *inode, struct file *filp) 982 static int sock_close(struct inode *inode, struct file *filp)
983 { 983 {
984 /* 984 /*
985 * It was possible the inode is NULL we were 985 * It was possible the inode is NULL we were
986 * closing an unfinished socket. 986 * closing an unfinished socket.
987 */ 987 */
988 988
989 if (!inode) { 989 if (!inode) {
990 printk(KERN_DEBUG "sock_close: NULL inode\n"); 990 printk(KERN_DEBUG "sock_close: NULL inode\n");
991 return 0; 991 return 0;
992 } 992 }
993 sock_fasync(-1, filp, 0); 993 sock_fasync(-1, filp, 0);
994 sock_release(SOCKET_I(inode)); 994 sock_release(SOCKET_I(inode));
995 return 0; 995 return 0;
996 } 996 }
997 997
998 /* 998 /*
999 * Update the socket async list 999 * Update the socket async list
1000 * 1000 *
1001 * Fasync_list locking strategy. 1001 * Fasync_list locking strategy.
1002 * 1002 *
1003 * 1. fasync_list is modified only under process context socket lock 1003 * 1. fasync_list is modified only under process context socket lock
1004 * i.e. under semaphore. 1004 * i.e. under semaphore.
1005 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1005 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1006 * or under socket lock. 1006 * or under socket lock.
1007 * 3. fasync_list can be used from softirq context, so that 1007 * 3. fasync_list can be used from softirq context, so that
1008 * modification under socket lock have to be enhanced with 1008 * modification under socket lock have to be enhanced with
1009 * write_lock_bh(&sk->sk_callback_lock). 1009 * write_lock_bh(&sk->sk_callback_lock).
1010 * --ANK (990710) 1010 * --ANK (990710)
1011 */ 1011 */
1012 1012
1013 static int sock_fasync(int fd, struct file *filp, int on) 1013 static int sock_fasync(int fd, struct file *filp, int on)
1014 { 1014 {
1015 struct fasync_struct *fa, *fna = NULL, **prev; 1015 struct fasync_struct *fa, *fna = NULL, **prev;
1016 struct socket *sock; 1016 struct socket *sock;
1017 struct sock *sk; 1017 struct sock *sk;
1018 1018
1019 if (on) { 1019 if (on) {
1020 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); 1020 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1021 if (fna == NULL) 1021 if (fna == NULL)
1022 return -ENOMEM; 1022 return -ENOMEM;
1023 } 1023 }
1024 1024
1025 sock = filp->private_data; 1025 sock = filp->private_data;
1026 1026
1027 sk = sock->sk; 1027 sk = sock->sk;
1028 if (sk == NULL) { 1028 if (sk == NULL) {
1029 kfree(fna); 1029 kfree(fna);
1030 return -EINVAL; 1030 return -EINVAL;
1031 } 1031 }
1032 1032
1033 lock_sock(sk); 1033 lock_sock(sk);
1034 1034
1035 prev = &(sock->fasync_list); 1035 prev = &(sock->fasync_list);
1036 1036
1037 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 1037 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1038 if (fa->fa_file == filp) 1038 if (fa->fa_file == filp)
1039 break; 1039 break;
1040 1040
1041 if (on) { 1041 if (on) {
1042 if (fa != NULL) { 1042 if (fa != NULL) {
1043 write_lock_bh(&sk->sk_callback_lock); 1043 write_lock_bh(&sk->sk_callback_lock);
1044 fa->fa_fd = fd; 1044 fa->fa_fd = fd;
1045 write_unlock_bh(&sk->sk_callback_lock); 1045 write_unlock_bh(&sk->sk_callback_lock);
1046 1046
1047 kfree(fna); 1047 kfree(fna);
1048 goto out; 1048 goto out;
1049 } 1049 }
1050 fna->fa_file = filp; 1050 fna->fa_file = filp;
1051 fna->fa_fd = fd; 1051 fna->fa_fd = fd;
1052 fna->magic = FASYNC_MAGIC; 1052 fna->magic = FASYNC_MAGIC;
1053 fna->fa_next = sock->fasync_list; 1053 fna->fa_next = sock->fasync_list;
1054 write_lock_bh(&sk->sk_callback_lock); 1054 write_lock_bh(&sk->sk_callback_lock);
1055 sock->fasync_list = fna; 1055 sock->fasync_list = fna;
1056 write_unlock_bh(&sk->sk_callback_lock); 1056 write_unlock_bh(&sk->sk_callback_lock);
1057 } else { 1057 } else {
1058 if (fa != NULL) { 1058 if (fa != NULL) {
1059 write_lock_bh(&sk->sk_callback_lock); 1059 write_lock_bh(&sk->sk_callback_lock);
1060 *prev = fa->fa_next; 1060 *prev = fa->fa_next;
1061 write_unlock_bh(&sk->sk_callback_lock); 1061 write_unlock_bh(&sk->sk_callback_lock);
1062 kfree(fa); 1062 kfree(fa);
1063 } 1063 }
1064 } 1064 }
1065 1065
1066 out: 1066 out:
1067 release_sock(sock->sk); 1067 release_sock(sock->sk);
1068 return 0; 1068 return 0;
1069 } 1069 }
1070 1070
1071 /* This function may be called only under socket lock or callback_lock */ 1071 /* This function may be called only under socket lock or callback_lock */
1072 1072
1073 int sock_wake_async(struct socket *sock, int how, int band) 1073 int sock_wake_async(struct socket *sock, int how, int band)
1074 { 1074 {
1075 if (!sock || !sock->fasync_list) 1075 if (!sock || !sock->fasync_list)
1076 return -1; 1076 return -1;
1077 switch (how) { 1077 switch (how) {
1078 case SOCK_WAKE_WAITD: 1078 case SOCK_WAKE_WAITD:
1079 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1079 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1080 break; 1080 break;
1081 goto call_kill; 1081 goto call_kill;
1082 case SOCK_WAKE_SPACE: 1082 case SOCK_WAKE_SPACE:
1083 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1083 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1084 break; 1084 break;
1085 /* fall through */ 1085 /* fall through */
1086 case SOCK_WAKE_IO: 1086 case SOCK_WAKE_IO:
1087 call_kill: 1087 call_kill:
1088 __kill_fasync(sock->fasync_list, SIGIO, band); 1088 __kill_fasync(sock->fasync_list, SIGIO, band);
1089 break; 1089 break;
1090 case SOCK_WAKE_URG: 1090 case SOCK_WAKE_URG:
1091 __kill_fasync(sock->fasync_list, SIGURG, band); 1091 __kill_fasync(sock->fasync_list, SIGURG, band);
1092 } 1092 }
1093 return 0; 1093 return 0;
1094 } 1094 }
1095 1095
1096 static int __sock_create(struct net *net, int family, int type, int protocol, 1096 static int __sock_create(struct net *net, int family, int type, int protocol,
1097 struct socket **res, int kern) 1097 struct socket **res, int kern)
1098 { 1098 {
1099 int err; 1099 int err;
1100 struct socket *sock; 1100 struct socket *sock;
1101 const struct net_proto_family *pf; 1101 const struct net_proto_family *pf;
1102 1102
1103 /* 1103 /*
1104 * Check protocol is in range 1104 * Check protocol is in range
1105 */ 1105 */
1106 if (family < 0 || family >= NPROTO) 1106 if (family < 0 || family >= NPROTO)
1107 return -EAFNOSUPPORT; 1107 return -EAFNOSUPPORT;
1108 if (type < 0 || type >= SOCK_MAX) 1108 if (type < 0 || type >= SOCK_MAX)
1109 return -EINVAL; 1109 return -EINVAL;
1110 1110
1111 /* Compatibility. 1111 /* Compatibility.
1112 1112
1113 This uglymoron is moved from INET layer to here to avoid 1113 This uglymoron is moved from INET layer to here to avoid
1114 deadlock in module load. 1114 deadlock in module load.
1115 */ 1115 */
1116 if (family == PF_INET && type == SOCK_PACKET) { 1116 if (family == PF_INET && type == SOCK_PACKET) {
1117 static int warned; 1117 static int warned;
1118 if (!warned) { 1118 if (!warned) {
1119 warned = 1; 1119 warned = 1;
1120 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1120 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1121 current->comm); 1121 current->comm);
1122 } 1122 }
1123 family = PF_PACKET; 1123 family = PF_PACKET;
1124 } 1124 }
1125 1125
1126 err = security_socket_create(family, type, protocol, kern); 1126 err = security_socket_create(family, type, protocol, kern);
1127 if (err) 1127 if (err)
1128 return err; 1128 return err;
1129 1129
1130 /* 1130 /*
1131 * Allocate the socket and allow the family to set things up. if 1131 * Allocate the socket and allow the family to set things up. if
1132 * the protocol is 0, the family is instructed to select an appropriate 1132 * the protocol is 0, the family is instructed to select an appropriate
1133 * default. 1133 * default.
1134 */ 1134 */
1135 sock = sock_alloc(); 1135 sock = sock_alloc();
1136 if (!sock) { 1136 if (!sock) {
1137 if (net_ratelimit()) 1137 if (net_ratelimit())
1138 printk(KERN_WARNING "socket: no more sockets\n"); 1138 printk(KERN_WARNING "socket: no more sockets\n");
1139 return -ENFILE; /* Not exactly a match, but its the 1139 return -ENFILE; /* Not exactly a match, but its the
1140 closest posix thing */ 1140 closest posix thing */
1141 } 1141 }
1142 1142
1143 sock->type = type; 1143 sock->type = type;
1144 1144
1145 #if defined(CONFIG_KMOD) 1145 #if defined(CONFIG_KMOD)
1146 /* Attempt to load a protocol module if the find failed. 1146 /* Attempt to load a protocol module if the find failed.
1147 * 1147 *
1148 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1148 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1149 * requested real, full-featured networking support upon configuration. 1149 * requested real, full-featured networking support upon configuration.
1150 * Otherwise module support will break! 1150 * Otherwise module support will break!
1151 */ 1151 */
1152 if (net_families[family] == NULL) 1152 if (net_families[family] == NULL)
1153 request_module("net-pf-%d", family); 1153 request_module("net-pf-%d", family);
1154 #endif 1154 #endif
1155 1155
1156 rcu_read_lock(); 1156 rcu_read_lock();
1157 pf = rcu_dereference(net_families[family]); 1157 pf = rcu_dereference(net_families[family]);
1158 err = -EAFNOSUPPORT; 1158 err = -EAFNOSUPPORT;
1159 if (!pf) 1159 if (!pf)
1160 goto out_release; 1160 goto out_release;
1161 1161
1162 /* 1162 /*
1163 * We will call the ->create function, that possibly is in a loadable 1163 * We will call the ->create function, that possibly is in a loadable
1164 * module, so we have to bump that loadable module refcnt first. 1164 * module, so we have to bump that loadable module refcnt first.
1165 */ 1165 */
1166 if (!try_module_get(pf->owner)) 1166 if (!try_module_get(pf->owner))
1167 goto out_release; 1167 goto out_release;
1168 1168
1169 /* Now protected by module ref count */ 1169 /* Now protected by module ref count */
1170 rcu_read_unlock(); 1170 rcu_read_unlock();
1171 1171
1172 err = pf->create(net, sock, protocol); 1172 err = pf->create(net, sock, protocol);
1173 if (err < 0) 1173 if (err < 0)
1174 goto out_module_put; 1174 goto out_module_put;
1175 1175
1176 /* 1176 /*
1177 * Now to bump the refcnt of the [loadable] module that owns this 1177 * Now to bump the refcnt of the [loadable] module that owns this
1178 * socket at sock_release time we decrement its refcnt. 1178 * socket at sock_release time we decrement its refcnt.
1179 */ 1179 */
1180 if (!try_module_get(sock->ops->owner)) 1180 if (!try_module_get(sock->ops->owner))
1181 goto out_module_busy; 1181 goto out_module_busy;
1182 1182
1183 /* 1183 /*
1184 * Now that we're done with the ->create function, the [loadable] 1184 * Now that we're done with the ->create function, the [loadable]
1185 * module can have its refcnt decremented 1185 * module can have its refcnt decremented
1186 */ 1186 */
1187 module_put(pf->owner); 1187 module_put(pf->owner);
1188 err = security_socket_post_create(sock, family, type, protocol, kern); 1188 err = security_socket_post_create(sock, family, type, protocol, kern);
1189 if (err) 1189 if (err)
1190 goto out_sock_release; 1190 goto out_sock_release;
1191 *res = sock; 1191 *res = sock;
1192 1192
1193 return 0; 1193 return 0;
1194 1194
1195 out_module_busy: 1195 out_module_busy:
1196 err = -EAFNOSUPPORT; 1196 err = -EAFNOSUPPORT;
1197 out_module_put: 1197 out_module_put:
1198 sock->ops = NULL; 1198 sock->ops = NULL;
1199 module_put(pf->owner); 1199 module_put(pf->owner);
1200 out_sock_release: 1200 out_sock_release:
1201 sock_release(sock); 1201 sock_release(sock);
1202 return err; 1202 return err;
1203 1203
1204 out_release: 1204 out_release:
1205 rcu_read_unlock(); 1205 rcu_read_unlock();
1206 goto out_sock_release; 1206 goto out_sock_release;
1207 } 1207 }
1208 1208
1209 int sock_create(int family, int type, int protocol, struct socket **res) 1209 int sock_create(int family, int type, int protocol, struct socket **res)
1210 { 1210 {
1211 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1211 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1212 } 1212 }
1213 1213
1214 int sock_create_kern(int family, int type, int protocol, struct socket **res) 1214 int sock_create_kern(int family, int type, int protocol, struct socket **res)
1215 { 1215 {
1216 return __sock_create(&init_net, family, type, protocol, res, 1); 1216 return __sock_create(&init_net, family, type, protocol, res, 1);
1217 } 1217 }
1218 1218
1219 asmlinkage long sys_socket(int family, int type, int protocol) 1219 asmlinkage long sys_socket(int family, int type, int protocol)
1220 { 1220 {
1221 int retval; 1221 int retval;
1222 struct socket *sock; 1222 struct socket *sock;
1223 int flags; 1223 int flags;
1224 1224
1225 flags = type & ~SOCK_TYPE_MASK; 1225 flags = type & ~SOCK_TYPE_MASK;
1226 if (flags & ~SOCK_CLOEXEC) 1226 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1227 return -EINVAL; 1227 return -EINVAL;
1228 type &= SOCK_TYPE_MASK; 1228 type &= SOCK_TYPE_MASK;
1229 1229
1230 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1230 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1231 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1231 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1232 1232
1233 retval = sock_create(family, type, protocol, &sock); 1233 retval = sock_create(family, type, protocol, &sock);
1234 if (retval < 0) 1234 if (retval < 0)
1235 goto out; 1235 goto out;
1236 1236
1237 retval = sock_map_fd(sock, flags & O_CLOEXEC); 1237 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1238 if (retval < 0) 1238 if (retval < 0)
1239 goto out_release; 1239 goto out_release;
1240 1240
1241 out: 1241 out:
1242 /* It may be already another descriptor 8) Not kernel problem. */ 1242 /* It may be already another descriptor 8) Not kernel problem. */
1243 return retval; 1243 return retval;
1244 1244
1245 out_release: 1245 out_release:
1246 sock_release(sock); 1246 sock_release(sock);
1247 return retval; 1247 return retval;
1248 } 1248 }
1249 1249
1250 /* 1250 /*
1251 * Create a pair of connected sockets. 1251 * Create a pair of connected sockets.
1252 */ 1252 */
1253 1253
1254 asmlinkage long sys_socketpair(int family, int type, int protocol, 1254 asmlinkage long sys_socketpair(int family, int type, int protocol,
1255 int __user *usockvec) 1255 int __user *usockvec)
1256 { 1256 {
1257 struct socket *sock1, *sock2; 1257 struct socket *sock1, *sock2;
1258 int fd1, fd2, err; 1258 int fd1, fd2, err;
1259 struct file *newfile1, *newfile2; 1259 struct file *newfile1, *newfile2;
1260 int flags; 1260 int flags;
1261 1261
1262 flags = type & ~SOCK_TYPE_MASK; 1262 flags = type & ~SOCK_TYPE_MASK;
1263 if (flags & ~SOCK_CLOEXEC) 1263 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1264 return -EINVAL; 1264 return -EINVAL;
1265 type &= SOCK_TYPE_MASK; 1265 type &= SOCK_TYPE_MASK;
1266 1266
1267 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1267 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1268 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1268 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1269 1269
1270 /* 1270 /*
1271 * Obtain the first socket and check if the underlying protocol 1271 * Obtain the first socket and check if the underlying protocol
1272 * supports the socketpair call. 1272 * supports the socketpair call.
1273 */ 1273 */
1274 1274
1275 err = sock_create(family, type, protocol, &sock1); 1275 err = sock_create(family, type, protocol, &sock1);
1276 if (err < 0) 1276 if (err < 0)
1277 goto out; 1277 goto out;
1278 1278
1279 err = sock_create(family, type, protocol, &sock2); 1279 err = sock_create(family, type, protocol, &sock2);
1280 if (err < 0) 1280 if (err < 0)
1281 goto out_release_1; 1281 goto out_release_1;
1282 1282
1283 err = sock1->ops->socketpair(sock1, sock2); 1283 err = sock1->ops->socketpair(sock1, sock2);
1284 if (err < 0) 1284 if (err < 0)
1285 goto out_release_both; 1285 goto out_release_both;
1286 1286
1287 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); 1287 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
1288 if (unlikely(fd1 < 0)) { 1288 if (unlikely(fd1 < 0)) {
1289 err = fd1; 1289 err = fd1;
1290 goto out_release_both; 1290 goto out_release_both;
1291 } 1291 }
1292 1292
1293 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); 1293 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
1294 if (unlikely(fd2 < 0)) { 1294 if (unlikely(fd2 < 0)) {
1295 err = fd2; 1295 err = fd2;
1296 put_filp(newfile1); 1296 put_filp(newfile1);
1297 put_unused_fd(fd1); 1297 put_unused_fd(fd1);
1298 goto out_release_both; 1298 goto out_release_both;
1299 } 1299 }
1300 1300
1301 err = sock_attach_fd(sock1, newfile1); 1301 err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
1302 if (unlikely(err < 0)) { 1302 if (unlikely(err < 0)) {
1303 goto out_fd2; 1303 goto out_fd2;
1304 } 1304 }
1305 1305
1306 err = sock_attach_fd(sock2, newfile2); 1306 err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
1307 if (unlikely(err < 0)) { 1307 if (unlikely(err < 0)) {
1308 fput(newfile1); 1308 fput(newfile1);
1309 goto out_fd1; 1309 goto out_fd1;
1310 } 1310 }
1311 1311
1312 err = audit_fd_pair(fd1, fd2); 1312 err = audit_fd_pair(fd1, fd2);
1313 if (err < 0) { 1313 if (err < 0) {
1314 fput(newfile1); 1314 fput(newfile1);
1315 fput(newfile2); 1315 fput(newfile2);
1316 goto out_fd; 1316 goto out_fd;
1317 } 1317 }
1318 1318
1319 fd_install(fd1, newfile1); 1319 fd_install(fd1, newfile1);
1320 fd_install(fd2, newfile2); 1320 fd_install(fd2, newfile2);
1321 /* fd1 and fd2 may be already another descriptors. 1321 /* fd1 and fd2 may be already another descriptors.
1322 * Not kernel problem. 1322 * Not kernel problem.
1323 */ 1323 */
1324 1324
1325 err = put_user(fd1, &usockvec[0]); 1325 err = put_user(fd1, &usockvec[0]);
1326 if (!err) 1326 if (!err)
1327 err = put_user(fd2, &usockvec[1]); 1327 err = put_user(fd2, &usockvec[1]);
1328 if (!err) 1328 if (!err)
1329 return 0; 1329 return 0;
1330 1330
1331 sys_close(fd2); 1331 sys_close(fd2);
1332 sys_close(fd1); 1332 sys_close(fd1);
1333 return err; 1333 return err;
1334 1334
1335 out_release_both: 1335 out_release_both:
1336 sock_release(sock2); 1336 sock_release(sock2);
1337 out_release_1: 1337 out_release_1:
1338 sock_release(sock1); 1338 sock_release(sock1);
1339 out: 1339 out:
1340 return err; 1340 return err;
1341 1341
1342 out_fd2: 1342 out_fd2:
1343 put_filp(newfile1); 1343 put_filp(newfile1);
1344 sock_release(sock1); 1344 sock_release(sock1);
1345 out_fd1: 1345 out_fd1:
1346 put_filp(newfile2); 1346 put_filp(newfile2);
1347 sock_release(sock2); 1347 sock_release(sock2);
1348 out_fd: 1348 out_fd:
1349 put_unused_fd(fd1); 1349 put_unused_fd(fd1);
1350 put_unused_fd(fd2); 1350 put_unused_fd(fd2);
1351 goto out; 1351 goto out;
1352 } 1352 }
1353 1353
1354 /* 1354 /*
1355 * Bind a name to a socket. Nothing much to do here since it's 1355 * Bind a name to a socket. Nothing much to do here since it's
1356 * the protocol's responsibility to handle the local address. 1356 * the protocol's responsibility to handle the local address.
1357 * 1357 *
1358 * We move the socket address to kernel space before we call 1358 * We move the socket address to kernel space before we call
1359 * the protocol layer (having also checked the address is ok). 1359 * the protocol layer (having also checked the address is ok).
1360 */ 1360 */
1361 1361
1362 asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) 1362 asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1363 { 1363 {
1364 struct socket *sock; 1364 struct socket *sock;
1365 struct sockaddr_storage address; 1365 struct sockaddr_storage address;
1366 int err, fput_needed; 1366 int err, fput_needed;
1367 1367
1368 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1368 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1369 if (sock) { 1369 if (sock) {
1370 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); 1370 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
1371 if (err >= 0) { 1371 if (err >= 0) {
1372 err = security_socket_bind(sock, 1372 err = security_socket_bind(sock,
1373 (struct sockaddr *)&address, 1373 (struct sockaddr *)&address,
1374 addrlen); 1374 addrlen);
1375 if (!err) 1375 if (!err)
1376 err = sock->ops->bind(sock, 1376 err = sock->ops->bind(sock,
1377 (struct sockaddr *) 1377 (struct sockaddr *)
1378 &address, addrlen); 1378 &address, addrlen);
1379 } 1379 }
1380 fput_light(sock->file, fput_needed); 1380 fput_light(sock->file, fput_needed);
1381 } 1381 }
1382 return err; 1382 return err;
1383 } 1383 }
1384 1384
1385 /* 1385 /*
1386 * Perform a listen. Basically, we allow the protocol to do anything 1386 * Perform a listen. Basically, we allow the protocol to do anything
1387 * necessary for a listen, and if that works, we mark the socket as 1387 * necessary for a listen, and if that works, we mark the socket as
1388 * ready for listening. 1388 * ready for listening.
1389 */ 1389 */
1390 1390
1391 asmlinkage long sys_listen(int fd, int backlog) 1391 asmlinkage long sys_listen(int fd, int backlog)
1392 { 1392 {
1393 struct socket *sock; 1393 struct socket *sock;
1394 int err, fput_needed; 1394 int err, fput_needed;
1395 int somaxconn; 1395 int somaxconn;
1396 1396
1397 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1397 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1398 if (sock) { 1398 if (sock) {
1399 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1399 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1400 if ((unsigned)backlog > somaxconn) 1400 if ((unsigned)backlog > somaxconn)
1401 backlog = somaxconn; 1401 backlog = somaxconn;
1402 1402
1403 err = security_socket_listen(sock, backlog); 1403 err = security_socket_listen(sock, backlog);
1404 if (!err) 1404 if (!err)
1405 err = sock->ops->listen(sock, backlog); 1405 err = sock->ops->listen(sock, backlog);
1406 1406
1407 fput_light(sock->file, fput_needed); 1407 fput_light(sock->file, fput_needed);
1408 } 1408 }
1409 return err; 1409 return err;
1410 } 1410 }
1411 1411
1412 /* 1412 /*
1413 * For accept, we attempt to create a new socket, set up the link 1413 * For accept, we attempt to create a new socket, set up the link
1414 * with the client, wake up the client, then return the new 1414 * with the client, wake up the client, then return the new
1415 * connected fd. We collect the address of the connector in kernel 1415 * connected fd. We collect the address of the connector in kernel
1416 * space and move it to user at the very end. This is unclean because 1416 * space and move it to user at the very end. This is unclean because
1417 * we open the socket then return an error. 1417 * we open the socket then return an error.
1418 * 1418 *
1419 * 1003.1g adds the ability to recvmsg() to query connection pending 1419 * 1003.1g adds the ability to recvmsg() to query connection pending
1420 * status to recvmsg. We need to add that support in a way thats 1420 * status to recvmsg. We need to add that support in a way thats
1421 * clean when we restucture accept also. 1421 * clean when we restucture accept also.
1422 */ 1422 */
1423 1423
1424 long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1424 long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1425 int __user *upeer_addrlen, int flags) 1425 int __user *upeer_addrlen, int flags)
1426 { 1426 {
1427 struct socket *sock, *newsock; 1427 struct socket *sock, *newsock;
1428 struct file *newfile; 1428 struct file *newfile;
1429 int err, len, newfd, fput_needed; 1429 int err, len, newfd, fput_needed;
1430 struct sockaddr_storage address; 1430 struct sockaddr_storage address;
1431 1431
1432 if (flags & ~SOCK_CLOEXEC) 1432 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1433 return -EINVAL; 1433 return -EINVAL;
1434 1434
1435 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1435 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1436 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1436 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1437 1437
1438 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1438 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1439 if (!sock) 1439 if (!sock)
1440 goto out; 1440 goto out;
1441 1441
1442 err = -ENFILE; 1442 err = -ENFILE;
1443 if (!(newsock = sock_alloc())) 1443 if (!(newsock = sock_alloc()))
1444 goto out_put; 1444 goto out_put;
1445 1445
1446 newsock->type = sock->type; 1446 newsock->type = sock->type;
1447 newsock->ops = sock->ops; 1447 newsock->ops = sock->ops;
1448 1448
1449 /* 1449 /*
1450 * We don't need try_module_get here, as the listening socket (sock) 1450 * We don't need try_module_get here, as the listening socket (sock)
1451 * has the protocol module (sock->ops->owner) held. 1451 * has the protocol module (sock->ops->owner) held.
1452 */ 1452 */
1453 __module_get(newsock->ops->owner); 1453 __module_get(newsock->ops->owner);
1454 1454
1455 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); 1455 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
1456 if (unlikely(newfd < 0)) { 1456 if (unlikely(newfd < 0)) {
1457 err = newfd; 1457 err = newfd;
1458 sock_release(newsock); 1458 sock_release(newsock);
1459 goto out_put; 1459 goto out_put;
1460 } 1460 }
1461 1461
1462 err = sock_attach_fd(newsock, newfile); 1462 err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
1463 if (err < 0) 1463 if (err < 0)
1464 goto out_fd_simple; 1464 goto out_fd_simple;
1465 1465
1466 err = security_socket_accept(sock, newsock); 1466 err = security_socket_accept(sock, newsock);
1467 if (err) 1467 if (err)
1468 goto out_fd; 1468 goto out_fd;
1469 1469
1470 err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1470 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1471 if (err < 0) 1471 if (err < 0)
1472 goto out_fd; 1472 goto out_fd;
1473 1473
1474 if (upeer_sockaddr) { 1474 if (upeer_sockaddr) {
1475 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1475 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
1476 &len, 2) < 0) { 1476 &len, 2) < 0) {
1477 err = -ECONNABORTED; 1477 err = -ECONNABORTED;
1478 goto out_fd; 1478 goto out_fd;
1479 } 1479 }
1480 err = move_addr_to_user((struct sockaddr *)&address, 1480 err = move_addr_to_user((struct sockaddr *)&address,
1481 len, upeer_sockaddr, upeer_addrlen); 1481 len, upeer_sockaddr, upeer_addrlen);
1482 if (err < 0) 1482 if (err < 0)
1483 goto out_fd; 1483 goto out_fd;
1484 } 1484 }
1485 1485
1486 /* File flags are not inherited via accept() unlike another OSes. */ 1486 /* File flags are not inherited via accept() unlike another OSes. */
1487 1487
1488 fd_install(newfd, newfile); 1488 fd_install(newfd, newfile);
1489 err = newfd; 1489 err = newfd;
1490 1490
1491 security_socket_post_accept(sock, newsock); 1491 security_socket_post_accept(sock, newsock);
1492 1492
1493 out_put: 1493 out_put:
1494 fput_light(sock->file, fput_needed); 1494 fput_light(sock->file, fput_needed);
1495 out: 1495 out:
1496 return err; 1496 return err;
1497 out_fd_simple: 1497 out_fd_simple:
1498 sock_release(newsock); 1498 sock_release(newsock);
1499 put_filp(newfile); 1499 put_filp(newfile);
1500 put_unused_fd(newfd); 1500 put_unused_fd(newfd);
1501 goto out_put; 1501 goto out_put;
1502 out_fd: 1502 out_fd:
1503 fput(newfile); 1503 fput(newfile);
1504 put_unused_fd(newfd); 1504 put_unused_fd(newfd);
1505 goto out_put; 1505 goto out_put;
1506 } 1506 }
1507 1507
1508 #ifdef HAVE_SET_RESTORE_SIGMASK 1508 #ifdef HAVE_SET_RESTORE_SIGMASK
1509 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 1509 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
1510 int __user *upeer_addrlen, 1510 int __user *upeer_addrlen,
1511 const sigset_t __user *sigmask, 1511 const sigset_t __user *sigmask,
1512 size_t sigsetsize, int flags) 1512 size_t sigsetsize, int flags)
1513 { 1513 {
1514 sigset_t ksigmask, sigsaved; 1514 sigset_t ksigmask, sigsaved;
1515 int ret; 1515 int ret;
1516 1516
1517 if (sigmask) { 1517 if (sigmask) {
1518 /* XXX: Don't preclude handling different sized sigset_t's. */ 1518 /* XXX: Don't preclude handling different sized sigset_t's. */
1519 if (sigsetsize != sizeof(sigset_t)) 1519 if (sigsetsize != sizeof(sigset_t))
1520 return -EINVAL; 1520 return -EINVAL;
1521 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 1521 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
1522 return -EFAULT; 1522 return -EFAULT;
1523 1523
1524 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1524 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1525 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1525 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1526 } 1526 }
1527 1527
1528 ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 1528 ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
1529 1529
1530 if (ret < 0 && signal_pending(current)) { 1530 if (ret < 0 && signal_pending(current)) {
1531 /* 1531 /*
1532 * Don't restore the signal mask yet. Let do_signal() deliver 1532 * Don't restore the signal mask yet. Let do_signal() deliver
1533 * the signal on the way back to userspace, before the signal 1533 * the signal on the way back to userspace, before the signal
1534 * mask is restored. 1534 * mask is restored.
1535 */ 1535 */
1536 if (sigmask) { 1536 if (sigmask) {
1537 memcpy(&current->saved_sigmask, &sigsaved, 1537 memcpy(&current->saved_sigmask, &sigsaved,
1538 sizeof(sigsaved)); 1538 sizeof(sigsaved));
1539 set_restore_sigmask(); 1539 set_restore_sigmask();
1540 } 1540 }
1541 } else if (sigmask) 1541 } else if (sigmask)
1542 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1542 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1543 1543
1544 return ret; 1544 return ret;
1545 } 1545 }
1546 #else 1546 #else
1547 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, 1547 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
1548 int __user *upeer_addrlen, 1548 int __user *upeer_addrlen,
1549 const sigset_t __user *sigmask, 1549 const sigset_t __user *sigmask,
1550 size_t sigsetsize, int flags) 1550 size_t sigsetsize, int flags)
1551 { 1551 {
1552 /* The platform does not support restoring the signal mask in the 1552 /* The platform does not support restoring the signal mask in the
1553 * return path. So we do not allow using paccept() with a signal 1553 * return path. So we do not allow using paccept() with a signal
1554 * mask. */ 1554 * mask. */
1555 if (sigmask) 1555 if (sigmask)
1556 return -EINVAL; 1556 return -EINVAL;
1557 1557
1558 return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); 1558 return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
1559 } 1559 }
1560 #endif 1560 #endif
1561 1561
1562 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, 1562 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1563 int __user *upeer_addrlen) 1563 int __user *upeer_addrlen)
1564 { 1564 {
1565 return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); 1565 return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
1566 } 1566 }
1567 1567
1568 /* 1568 /*
1569 * Attempt to connect to a socket with the server address. The address 1569 * Attempt to connect to a socket with the server address. The address
1570 * is in user space so we verify it is OK and move it to kernel space. 1570 * is in user space so we verify it is OK and move it to kernel space.
1571 * 1571 *
1572 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1572 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1573 * break bindings 1573 * break bindings
1574 * 1574 *
1575 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1575 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1576 * other SEQPACKET protocols that take time to connect() as it doesn't 1576 * other SEQPACKET protocols that take time to connect() as it doesn't
1577 * include the -EINPROGRESS status for such sockets. 1577 * include the -EINPROGRESS status for such sockets.
1578 */ 1578 */
1579 1579
1580 asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, 1580 asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1581 int addrlen) 1581 int addrlen)
1582 { 1582 {
1583 struct socket *sock; 1583 struct socket *sock;
1584 struct sockaddr_storage address; 1584 struct sockaddr_storage address;
1585 int err, fput_needed; 1585 int err, fput_needed;
1586 1586
1587 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1587 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1588 if (!sock) 1588 if (!sock)
1589 goto out; 1589 goto out;
1590 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); 1590 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1591 if (err < 0) 1591 if (err < 0)
1592 goto out_put; 1592 goto out_put;
1593 1593
1594 err = 1594 err =
1595 security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 1595 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1596 if (err) 1596 if (err)
1597 goto out_put; 1597 goto out_put;
1598 1598
1599 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, 1599 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1600 sock->file->f_flags); 1600 sock->file->f_flags);
1601 out_put: 1601 out_put:
1602 fput_light(sock->file, fput_needed); 1602 fput_light(sock->file, fput_needed);
1603 out: 1603 out:
1604 return err; 1604 return err;
1605 } 1605 }
1606 1606
1607 /* 1607 /*
1608 * Get the local address ('name') of a socket object. Move the obtained 1608 * Get the local address ('name') of a socket object. Move the obtained
1609 * name to user space. 1609 * name to user space.
1610 */ 1610 */
1611 1611
1612 asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, 1612 asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1613 int __user *usockaddr_len) 1613 int __user *usockaddr_len)
1614 { 1614 {
1615 struct socket *sock; 1615 struct socket *sock;
1616 struct sockaddr_storage address; 1616 struct sockaddr_storage address;
1617 int len, err, fput_needed; 1617 int len, err, fput_needed;
1618 1618
1619 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1619 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1620 if (!sock) 1620 if (!sock)
1621 goto out; 1621 goto out;
1622 1622
1623 err = security_socket_getsockname(sock); 1623 err = security_socket_getsockname(sock);
1624 if (err) 1624 if (err)
1625 goto out_put; 1625 goto out_put;
1626 1626
1627 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1627 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1628 if (err) 1628 if (err)
1629 goto out_put; 1629 goto out_put;
1630 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); 1630 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1631 1631
1632 out_put: 1632 out_put:
1633 fput_light(sock->file, fput_needed); 1633 fput_light(sock->file, fput_needed);
1634 out: 1634 out:
1635 return err; 1635 return err;
1636 } 1636 }
1637 1637
1638 /* 1638 /*
1639 * Get the remote address ('name') of a socket object. Move the obtained 1639 * Get the remote address ('name') of a socket object. Move the obtained
1640 * name to user space. 1640 * name to user space.
1641 */ 1641 */
1642 1642
1643 asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, 1643 asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1644 int __user *usockaddr_len) 1644 int __user *usockaddr_len)
1645 { 1645 {
1646 struct socket *sock; 1646 struct socket *sock;
1647 struct sockaddr_storage address; 1647 struct sockaddr_storage address;
1648 int len, err, fput_needed; 1648 int len, err, fput_needed;
1649 1649
1650 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1650 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1651 if (sock != NULL) { 1651 if (sock != NULL) {
1652 err = security_socket_getpeername(sock); 1652 err = security_socket_getpeername(sock);
1653 if (err) { 1653 if (err) {
1654 fput_light(sock->file, fput_needed); 1654 fput_light(sock->file, fput_needed);
1655 return err; 1655 return err;
1656 } 1656 }
1657 1657
1658 err = 1658 err =
1659 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1659 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
1660 1); 1660 1);
1661 if (!err) 1661 if (!err)
1662 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, 1662 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
1663 usockaddr_len); 1663 usockaddr_len);
1664 fput_light(sock->file, fput_needed); 1664 fput_light(sock->file, fput_needed);
1665 } 1665 }
1666 return err; 1666 return err;
1667 } 1667 }
1668 1668
1669 /* 1669 /*
1670 * Send a datagram to a given address. We move the address into kernel 1670 * Send a datagram to a given address. We move the address into kernel
1671 * space and check the user space data area is readable before invoking 1671 * space and check the user space data area is readable before invoking
1672 * the protocol. 1672 * the protocol.
1673 */ 1673 */
1674 1674
1675 asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, 1675 asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1676 unsigned flags, struct sockaddr __user *addr, 1676 unsigned flags, struct sockaddr __user *addr,
1677 int addr_len) 1677 int addr_len)
1678 { 1678 {
1679 struct socket *sock; 1679 struct socket *sock;
1680 struct sockaddr_storage address; 1680 struct sockaddr_storage address;
1681 int err; 1681 int err;
1682 struct msghdr msg; 1682 struct msghdr msg;
1683 struct iovec iov; 1683 struct iovec iov;
1684 int fput_needed; 1684 int fput_needed;
1685 1685
1686 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687 if (!sock) 1687 if (!sock)
1688 goto out; 1688 goto out;
1689 1689
1690 iov.iov_base = buff; 1690 iov.iov_base = buff;
1691 iov.iov_len = len; 1691 iov.iov_len = len;
1692 msg.msg_name = NULL; 1692 msg.msg_name = NULL;
1693 msg.msg_iov = &iov; 1693 msg.msg_iov = &iov;
1694 msg.msg_iovlen = 1; 1694 msg.msg_iovlen = 1;
1695 msg.msg_control = NULL; 1695 msg.msg_control = NULL;
1696 msg.msg_controllen = 0; 1696 msg.msg_controllen = 0;
1697 msg.msg_namelen = 0; 1697 msg.msg_namelen = 0;
1698 if (addr) { 1698 if (addr) {
1699 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); 1699 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1700 if (err < 0) 1700 if (err < 0)
1701 goto out_put; 1701 goto out_put;
1702 msg.msg_name = (struct sockaddr *)&address; 1702 msg.msg_name = (struct sockaddr *)&address;
1703 msg.msg_namelen = addr_len; 1703 msg.msg_namelen = addr_len;
1704 } 1704 }
1705 if (sock->file->f_flags & O_NONBLOCK) 1705 if (sock->file->f_flags & O_NONBLOCK)
1706 flags |= MSG_DONTWAIT; 1706 flags |= MSG_DONTWAIT;
1707 msg.msg_flags = flags; 1707 msg.msg_flags = flags;
1708 err = sock_sendmsg(sock, &msg, len); 1708 err = sock_sendmsg(sock, &msg, len);
1709 1709
1710 out_put: 1710 out_put:
1711 fput_light(sock->file, fput_needed); 1711 fput_light(sock->file, fput_needed);
1712 out: 1712 out:
1713 return err; 1713 return err;
1714 } 1714 }
1715 1715
1716 /* 1716 /*
1717 * Send a datagram down a socket. 1717 * Send a datagram down a socket.
1718 */ 1718 */
1719 1719
1720 asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) 1720 asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1721 { 1721 {
1722 return sys_sendto(fd, buff, len, flags, NULL, 0); 1722 return sys_sendto(fd, buff, len, flags, NULL, 0);
1723 } 1723 }
1724 1724
1725 /* 1725 /*
1726 * Receive a frame from the socket and optionally record the address of the 1726 * Receive a frame from the socket and optionally record the address of the
1727 * sender. We verify the buffers are writable and if needed move the 1727 * sender. We verify the buffers are writable and if needed move the
1728 * sender address from kernel to user space. 1728 * sender address from kernel to user space.
1729 */ 1729 */
1730 1730
1731 asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, 1731 asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1732 unsigned flags, struct sockaddr __user *addr, 1732 unsigned flags, struct sockaddr __user *addr,
1733 int __user *addr_len) 1733 int __user *addr_len)
1734 { 1734 {
1735 struct socket *sock; 1735 struct socket *sock;
1736 struct iovec iov; 1736 struct iovec iov;
1737 struct msghdr msg; 1737 struct msghdr msg;
1738 struct sockaddr_storage address; 1738 struct sockaddr_storage address;
1739 int err, err2; 1739 int err, err2;
1740 int fput_needed; 1740 int fput_needed;
1741 1741
1742 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1742 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1743 if (!sock) 1743 if (!sock)
1744 goto out; 1744 goto out;
1745 1745
1746 msg.msg_control = NULL; 1746 msg.msg_control = NULL;
1747 msg.msg_controllen = 0; 1747 msg.msg_controllen = 0;
1748 msg.msg_iovlen = 1; 1748 msg.msg_iovlen = 1;
1749 msg.msg_iov = &iov; 1749 msg.msg_iov = &iov;
1750 iov.iov_len = size; 1750 iov.iov_len = size;
1751 iov.iov_base = ubuf; 1751 iov.iov_base = ubuf;
1752 msg.msg_name = (struct sockaddr *)&address; 1752 msg.msg_name = (struct sockaddr *)&address;
1753 msg.msg_namelen = sizeof(address); 1753 msg.msg_namelen = sizeof(address);
1754 if (sock->file->f_flags & O_NONBLOCK) 1754 if (sock->file->f_flags & O_NONBLOCK)
1755 flags |= MSG_DONTWAIT; 1755 flags |= MSG_DONTWAIT;
1756 err = sock_recvmsg(sock, &msg, size, flags); 1756 err = sock_recvmsg(sock, &msg, size, flags);
1757 1757
1758 if (err >= 0 && addr != NULL) { 1758 if (err >= 0 && addr != NULL) {
1759 err2 = move_addr_to_user((struct sockaddr *)&address, 1759 err2 = move_addr_to_user((struct sockaddr *)&address,
1760 msg.msg_namelen, addr, addr_len); 1760 msg.msg_namelen, addr, addr_len);
1761 if (err2 < 0) 1761 if (err2 < 0)
1762 err = err2; 1762 err = err2;
1763 } 1763 }
1764 1764
1765 fput_light(sock->file, fput_needed); 1765 fput_light(sock->file, fput_needed);
1766 out: 1766 out:
1767 return err; 1767 return err;
1768 } 1768 }
1769 1769
1770 /* 1770 /*
1771 * Receive a datagram from a socket. 1771 * Receive a datagram from a socket.
1772 */ 1772 */
1773 1773
1774 asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 1774 asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1775 unsigned flags) 1775 unsigned flags)
1776 { 1776 {
1777 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1777 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1778 } 1778 }
1779 1779
1780 /* 1780 /*
1781 * Set a socket option. Because we don't know the option lengths we have 1781 * Set a socket option. Because we don't know the option lengths we have
1782 * to pass the user mode parameter for the protocols to sort out. 1782 * to pass the user mode parameter for the protocols to sort out.
1783 */ 1783 */
1784 1784
1785 asmlinkage long sys_setsockopt(int fd, int level, int optname, 1785 asmlinkage long sys_setsockopt(int fd, int level, int optname,
1786 char __user *optval, int optlen) 1786 char __user *optval, int optlen)
1787 { 1787 {
1788 int err, fput_needed; 1788 int err, fput_needed;
1789 struct socket *sock; 1789 struct socket *sock;
1790 1790
1791 if (optlen < 0) 1791 if (optlen < 0)
1792 return -EINVAL; 1792 return -EINVAL;
1793 1793
1794 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1794 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1795 if (sock != NULL) { 1795 if (sock != NULL) {
1796 err = security_socket_setsockopt(sock, level, optname); 1796 err = security_socket_setsockopt(sock, level, optname);
1797 if (err) 1797 if (err)
1798 goto out_put; 1798 goto out_put;
1799 1799
1800 if (level == SOL_SOCKET) 1800 if (level == SOL_SOCKET)
1801 err = 1801 err =
1802 sock_setsockopt(sock, level, optname, optval, 1802 sock_setsockopt(sock, level, optname, optval,
1803 optlen); 1803 optlen);
1804 else 1804 else
1805 err = 1805 err =
1806 sock->ops->setsockopt(sock, level, optname, optval, 1806 sock->ops->setsockopt(sock, level, optname, optval,
1807 optlen); 1807 optlen);
1808 out_put: 1808 out_put:
1809 fput_light(sock->file, fput_needed); 1809 fput_light(sock->file, fput_needed);
1810 } 1810 }
1811 return err; 1811 return err;
1812 } 1812 }
1813 1813
1814 /* 1814 /*
1815 * Get a socket option. Because we don't know the option lengths we have 1815 * Get a socket option. Because we don't know the option lengths we have
1816 * to pass a user mode parameter for the protocols to sort out. 1816 * to pass a user mode parameter for the protocols to sort out.
1817 */ 1817 */
1818 1818
1819 asmlinkage long sys_getsockopt(int fd, int level, int optname, 1819 asmlinkage long sys_getsockopt(int fd, int level, int optname,
1820 char __user *optval, int __user *optlen) 1820 char __user *optval, int __user *optlen)
1821 { 1821 {
1822 int err, fput_needed; 1822 int err, fput_needed;
1823 struct socket *sock; 1823 struct socket *sock;
1824 1824
1825 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1825 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1826 if (sock != NULL) { 1826 if (sock != NULL) {
1827 err = security_socket_getsockopt(sock, level, optname); 1827 err = security_socket_getsockopt(sock, level, optname);
1828 if (err) 1828 if (err)
1829 goto out_put; 1829 goto out_put;
1830 1830
1831 if (level == SOL_SOCKET) 1831 if (level == SOL_SOCKET)
1832 err = 1832 err =
1833 sock_getsockopt(sock, level, optname, optval, 1833 sock_getsockopt(sock, level, optname, optval,
1834 optlen); 1834 optlen);
1835 else 1835 else
1836 err = 1836 err =
1837 sock->ops->getsockopt(sock, level, optname, optval, 1837 sock->ops->getsockopt(sock, level, optname, optval,
1838 optlen); 1838 optlen);
1839 out_put: 1839 out_put:
1840 fput_light(sock->file, fput_needed); 1840 fput_light(sock->file, fput_needed);
1841 } 1841 }
1842 return err; 1842 return err;
1843 } 1843 }
1844 1844
1845 /* 1845 /*
1846 * Shutdown a socket. 1846 * Shutdown a socket.
1847 */ 1847 */
1848 1848
1849 asmlinkage long sys_shutdown(int fd, int how) 1849 asmlinkage long sys_shutdown(int fd, int how)
1850 { 1850 {
1851 int err, fput_needed; 1851 int err, fput_needed;
1852 struct socket *sock; 1852 struct socket *sock;
1853 1853
1854 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1854 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1855 if (sock != NULL) { 1855 if (sock != NULL) {
1856 err = security_socket_shutdown(sock, how); 1856 err = security_socket_shutdown(sock, how);
1857 if (!err) 1857 if (!err)
1858 err = sock->ops->shutdown(sock, how); 1858 err = sock->ops->shutdown(sock, how);
1859 fput_light(sock->file, fput_needed); 1859 fput_light(sock->file, fput_needed);
1860 } 1860 }
1861 return err; 1861 return err;
1862 } 1862 }
1863 1863
1864 /* A couple of helpful macros for getting the address of the 32/64 bit 1864 /* A couple of helpful macros for getting the address of the 32/64 bit
1865 * fields which are the same type (int / unsigned) on our platforms. 1865 * fields which are the same type (int / unsigned) on our platforms.
1866 */ 1866 */
1867 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1867 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1868 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1868 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1869 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1869 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1870 1870
1871 /* 1871 /*
1872 * BSD sendmsg interface 1872 * BSD sendmsg interface
1873 */ 1873 */
1874 1874
1875 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) 1875 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1876 { 1876 {
1877 struct compat_msghdr __user *msg_compat = 1877 struct compat_msghdr __user *msg_compat =
1878 (struct compat_msghdr __user *)msg; 1878 (struct compat_msghdr __user *)msg;
1879 struct socket *sock; 1879 struct socket *sock;
1880 struct sockaddr_storage address; 1880 struct sockaddr_storage address;
1881 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1881 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1882 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1882 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1883 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1883 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1884 /* 20 is size of ipv6_pktinfo */ 1884 /* 20 is size of ipv6_pktinfo */
1885 unsigned char *ctl_buf = ctl; 1885 unsigned char *ctl_buf = ctl;
1886 struct msghdr msg_sys; 1886 struct msghdr msg_sys;
1887 int err, ctl_len, iov_size, total_len; 1887 int err, ctl_len, iov_size, total_len;
1888 int fput_needed; 1888 int fput_needed;
1889 1889
1890 err = -EFAULT; 1890 err = -EFAULT;
1891 if (MSG_CMSG_COMPAT & flags) { 1891 if (MSG_CMSG_COMPAT & flags) {
1892 if (get_compat_msghdr(&msg_sys, msg_compat)) 1892 if (get_compat_msghdr(&msg_sys, msg_compat))
1893 return -EFAULT; 1893 return -EFAULT;
1894 } 1894 }
1895 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1895 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1896 return -EFAULT; 1896 return -EFAULT;
1897 1897
1898 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1898 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1899 if (!sock) 1899 if (!sock)
1900 goto out; 1900 goto out;
1901 1901
1902 /* do not move before msg_sys is valid */ 1902 /* do not move before msg_sys is valid */
1903 err = -EMSGSIZE; 1903 err = -EMSGSIZE;
1904 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1904 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1905 goto out_put; 1905 goto out_put;
1906 1906
1907 /* Check whether to allocate the iovec area */ 1907 /* Check whether to allocate the iovec area */
1908 err = -ENOMEM; 1908 err = -ENOMEM;
1909 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1909 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1910 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1910 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1911 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1911 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1912 if (!iov) 1912 if (!iov)
1913 goto out_put; 1913 goto out_put;
1914 } 1914 }
1915 1915
1916 /* This will also move the address data into kernel space */ 1916 /* This will also move the address data into kernel space */
1917 if (MSG_CMSG_COMPAT & flags) { 1917 if (MSG_CMSG_COMPAT & flags) {
1918 err = verify_compat_iovec(&msg_sys, iov, 1918 err = verify_compat_iovec(&msg_sys, iov,
1919 (struct sockaddr *)&address, 1919 (struct sockaddr *)&address,
1920 VERIFY_READ); 1920 VERIFY_READ);
1921 } else 1921 } else
1922 err = verify_iovec(&msg_sys, iov, 1922 err = verify_iovec(&msg_sys, iov,
1923 (struct sockaddr *)&address, 1923 (struct sockaddr *)&address,
1924 VERIFY_READ); 1924 VERIFY_READ);
1925 if (err < 0) 1925 if (err < 0)
1926 goto out_freeiov; 1926 goto out_freeiov;
1927 total_len = err; 1927 total_len = err;
1928 1928
1929 err = -ENOBUFS; 1929 err = -ENOBUFS;
1930 1930
1931 if (msg_sys.msg_controllen > INT_MAX) 1931 if (msg_sys.msg_controllen > INT_MAX)
1932 goto out_freeiov; 1932 goto out_freeiov;
1933 ctl_len = msg_sys.msg_controllen; 1933 ctl_len = msg_sys.msg_controllen;
1934 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1934 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1935 err = 1935 err =
1936 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1936 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1937 sizeof(ctl)); 1937 sizeof(ctl));
1938 if (err) 1938 if (err)
1939 goto out_freeiov; 1939 goto out_freeiov;
1940 ctl_buf = msg_sys.msg_control; 1940 ctl_buf = msg_sys.msg_control;
1941 ctl_len = msg_sys.msg_controllen; 1941 ctl_len = msg_sys.msg_controllen;
1942 } else if (ctl_len) { 1942 } else if (ctl_len) {
1943 if (ctl_len > sizeof(ctl)) { 1943 if (ctl_len > sizeof(ctl)) {
1944 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1944 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1945 if (ctl_buf == NULL) 1945 if (ctl_buf == NULL)
1946 goto out_freeiov; 1946 goto out_freeiov;
1947 } 1947 }
1948 err = -EFAULT; 1948 err = -EFAULT;
1949 /* 1949 /*
1950 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1950 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1951 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1951 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1952 * checking falls down on this. 1952 * checking falls down on this.
1953 */ 1953 */
1954 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1954 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1955 ctl_len)) 1955 ctl_len))
1956 goto out_freectl; 1956 goto out_freectl;
1957 msg_sys.msg_control = ctl_buf; 1957 msg_sys.msg_control = ctl_buf;
1958 } 1958 }
1959 msg_sys.msg_flags = flags; 1959 msg_sys.msg_flags = flags;
1960 1960
1961 if (sock->file->f_flags & O_NONBLOCK) 1961 if (sock->file->f_flags & O_NONBLOCK)
1962 msg_sys.msg_flags |= MSG_DONTWAIT; 1962 msg_sys.msg_flags |= MSG_DONTWAIT;
1963 err = sock_sendmsg(sock, &msg_sys, total_len); 1963 err = sock_sendmsg(sock, &msg_sys, total_len);
1964 1964
1965 out_freectl: 1965 out_freectl:
1966 if (ctl_buf != ctl) 1966 if (ctl_buf != ctl)
1967 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 1967 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1968 out_freeiov: 1968 out_freeiov:
1969 if (iov != iovstack) 1969 if (iov != iovstack)
1970 sock_kfree_s(sock->sk, iov, iov_size); 1970 sock_kfree_s(sock->sk, iov, iov_size);
1971 out_put: 1971 out_put:
1972 fput_light(sock->file, fput_needed); 1972 fput_light(sock->file, fput_needed);
1973 out: 1973 out:
1974 return err; 1974 return err;
1975 } 1975 }
1976 1976
1977 /* 1977 /*
1978 * BSD recvmsg interface 1978 * BSD recvmsg interface
1979 */ 1979 */
1980 1980
1981 asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, 1981 asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1982 unsigned int flags) 1982 unsigned int flags)
1983 { 1983 {
1984 struct compat_msghdr __user *msg_compat = 1984 struct compat_msghdr __user *msg_compat =
1985 (struct compat_msghdr __user *)msg; 1985 (struct compat_msghdr __user *)msg;
1986 struct socket *sock; 1986 struct socket *sock;
1987 struct iovec iovstack[UIO_FASTIOV]; 1987 struct iovec iovstack[UIO_FASTIOV];
1988 struct iovec *iov = iovstack; 1988 struct iovec *iov = iovstack;
1989 struct msghdr msg_sys; 1989 struct msghdr msg_sys;
1990 unsigned long cmsg_ptr; 1990 unsigned long cmsg_ptr;
1991 int err, iov_size, total_len, len; 1991 int err, iov_size, total_len, len;
1992 int fput_needed; 1992 int fput_needed;
1993 1993
1994 /* kernel mode address */ 1994 /* kernel mode address */
1995 struct sockaddr_storage addr; 1995 struct sockaddr_storage addr;
1996 1996
1997 /* user mode address pointers */ 1997 /* user mode address pointers */
1998 struct sockaddr __user *uaddr; 1998 struct sockaddr __user *uaddr;
1999 int __user *uaddr_len; 1999 int __user *uaddr_len;
2000 2000
2001 if (MSG_CMSG_COMPAT & flags) { 2001 if (MSG_CMSG_COMPAT & flags) {
2002 if (get_compat_msghdr(&msg_sys, msg_compat)) 2002 if (get_compat_msghdr(&msg_sys, msg_compat))
2003 return -EFAULT; 2003 return -EFAULT;
2004 } 2004 }
2005 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 2005 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
2006 return -EFAULT; 2006 return -EFAULT;
2007 2007
2008 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2008 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2009 if (!sock) 2009 if (!sock)
2010 goto out; 2010 goto out;
2011 2011
2012 err = -EMSGSIZE; 2012 err = -EMSGSIZE;
2013 if (msg_sys.msg_iovlen > UIO_MAXIOV) 2013 if (msg_sys.msg_iovlen > UIO_MAXIOV)
2014 goto out_put; 2014 goto out_put;
2015 2015
2016 /* Check whether to allocate the iovec area */ 2016 /* Check whether to allocate the iovec area */
2017 err = -ENOMEM; 2017 err = -ENOMEM;
2018 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 2018 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
2019 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 2019 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
2020 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 2020 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2021 if (!iov) 2021 if (!iov)
2022 goto out_put; 2022 goto out_put;
2023 } 2023 }
2024 2024
2025 /* 2025 /*
2026 * Save the user-mode address (verify_iovec will change the 2026 * Save the user-mode address (verify_iovec will change the
2027 * kernel msghdr to use the kernel address space) 2027 * kernel msghdr to use the kernel address space)
2028 */ 2028 */
2029 2029
2030 uaddr = (__force void __user *)msg_sys.msg_name; 2030 uaddr = (__force void __user *)msg_sys.msg_name;
2031 uaddr_len = COMPAT_NAMELEN(msg); 2031 uaddr_len = COMPAT_NAMELEN(msg);
2032 if (MSG_CMSG_COMPAT & flags) { 2032 if (MSG_CMSG_COMPAT & flags) {
2033 err = verify_compat_iovec(&msg_sys, iov, 2033 err = verify_compat_iovec(&msg_sys, iov,
2034 (struct sockaddr *)&addr, 2034 (struct sockaddr *)&addr,
2035 VERIFY_WRITE); 2035 VERIFY_WRITE);
2036 } else 2036 } else
2037 err = verify_iovec(&msg_sys, iov, 2037 err = verify_iovec(&msg_sys, iov,
2038 (struct sockaddr *)&addr, 2038 (struct sockaddr *)&addr,
2039 VERIFY_WRITE); 2039 VERIFY_WRITE);
2040 if (err < 0) 2040 if (err < 0)
2041 goto out_freeiov; 2041 goto out_freeiov;
2042 total_len = err; 2042 total_len = err;
2043 2043
2044 cmsg_ptr = (unsigned long)msg_sys.msg_control; 2044 cmsg_ptr = (unsigned long)msg_sys.msg_control;
2045 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2045 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2046 2046
2047 if (sock->file->f_flags & O_NONBLOCK) 2047 if (sock->file->f_flags & O_NONBLOCK)
2048 flags |= MSG_DONTWAIT; 2048 flags |= MSG_DONTWAIT;
2049 err = sock_recvmsg(sock, &msg_sys, total_len, flags); 2049 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
2050 if (err < 0) 2050 if (err < 0)
2051 goto out_freeiov; 2051 goto out_freeiov;
2052 len = err; 2052 len = err;
2053 2053
2054 if (uaddr != NULL) { 2054 if (uaddr != NULL) {
2055 err = move_addr_to_user((struct sockaddr *)&addr, 2055 err = move_addr_to_user((struct sockaddr *)&addr,
2056 msg_sys.msg_namelen, uaddr, 2056 msg_sys.msg_namelen, uaddr,
2057 uaddr_len); 2057 uaddr_len);
2058 if (err < 0) 2058 if (err < 0)
2059 goto out_freeiov; 2059 goto out_freeiov;
2060 } 2060 }
2061 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), 2061 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
2062 COMPAT_FLAGS(msg)); 2062 COMPAT_FLAGS(msg));
2063 if (err) 2063 if (err)
2064 goto out_freeiov; 2064 goto out_freeiov;
2065 if (MSG_CMSG_COMPAT & flags) 2065 if (MSG_CMSG_COMPAT & flags)
2066 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 2066 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
2067 &msg_compat->msg_controllen); 2067 &msg_compat->msg_controllen);
2068 else 2068 else
2069 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 2069 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
2070 &msg->msg_controllen); 2070 &msg->msg_controllen);
2071 if (err) 2071 if (err)
2072 goto out_freeiov; 2072 goto out_freeiov;
2073 err = len; 2073 err = len;
2074 2074
2075 out_freeiov: 2075 out_freeiov:
2076 if (iov != iovstack) 2076 if (iov != iovstack)
2077 sock_kfree_s(sock->sk, iov, iov_size); 2077 sock_kfree_s(sock->sk, iov, iov_size);
2078 out_put: 2078 out_put:
2079 fput_light(sock->file, fput_needed); 2079 fput_light(sock->file, fput_needed);
2080 out: 2080 out:
2081 return err; 2081 return err;
2082 } 2082 }
2083 2083
2084 #ifdef __ARCH_WANT_SYS_SOCKETCALL 2084 #ifdef __ARCH_WANT_SYS_SOCKETCALL
2085 2085
2086 /* Argument list sizes for sys_socketcall */ 2086 /* Argument list sizes for sys_socketcall */
2087 #define AL(x) ((x) * sizeof(unsigned long)) 2087 #define AL(x) ((x) * sizeof(unsigned long))
2088 static const unsigned char nargs[19]={ 2088 static const unsigned char nargs[19]={
2089 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2089 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2090 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2090 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2091 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2091 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
2092 AL(6) 2092 AL(6)
2093 }; 2093 };
2094 2094
2095 #undef AL 2095 #undef AL
2096 2096
2097 /* 2097 /*
2098 * System call vectors. 2098 * System call vectors.
2099 * 2099 *
2100 * Argument checking cleaned up. Saved 20% in size. 2100 * Argument checking cleaned up. Saved 20% in size.
2101 * This function doesn't need to set the kernel lock because 2101 * This function doesn't need to set the kernel lock because
2102 * it is set by the callees. 2102 * it is set by the callees.
2103 */ 2103 */
2104 2104
2105 asmlinkage long sys_socketcall(int call, unsigned long __user *args) 2105 asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2106 { 2106 {
2107 unsigned long a[6]; 2107 unsigned long a[6];
2108 unsigned long a0, a1; 2108 unsigned long a0, a1;
2109 int err; 2109 int err;
2110 2110
2111 if (call < 1 || call > SYS_PACCEPT) 2111 if (call < 1 || call > SYS_PACCEPT)
2112 return -EINVAL; 2112 return -EINVAL;
2113 2113
2114 /* copy_from_user should be SMP safe. */ 2114 /* copy_from_user should be SMP safe. */
2115 if (copy_from_user(a, args, nargs[call])) 2115 if (copy_from_user(a, args, nargs[call]))
2116 return -EFAULT; 2116 return -EFAULT;
2117 2117
2118 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2118 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2119 if (err) 2119 if (err)
2120 return err; 2120 return err;
2121 2121
2122 a0 = a[0]; 2122 a0 = a[0];
2123 a1 = a[1]; 2123 a1 = a[1];
2124 2124
2125 switch (call) { 2125 switch (call) {
2126 case SYS_SOCKET: 2126 case SYS_SOCKET:
2127 err = sys_socket(a0, a1, a[2]); 2127 err = sys_socket(a0, a1, a[2]);
2128 break; 2128 break;
2129 case SYS_BIND: 2129 case SYS_BIND:
2130 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 2130 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2131 break; 2131 break;
2132 case SYS_CONNECT: 2132 case SYS_CONNECT:
2133 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2133 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2134 break; 2134 break;
2135 case SYS_LISTEN: 2135 case SYS_LISTEN:
2136 err = sys_listen(a0, a1); 2136 err = sys_listen(a0, a1);
2137 break; 2137 break;
2138 case SYS_ACCEPT: 2138 case SYS_ACCEPT:
2139 err = 2139 err =
2140 do_accept(a0, (struct sockaddr __user *)a1, 2140 do_accept(a0, (struct sockaddr __user *)a1,
2141 (int __user *)a[2], 0); 2141 (int __user *)a[2], 0);
2142 break; 2142 break;
2143 case SYS_GETSOCKNAME: 2143 case SYS_GETSOCKNAME:
2144 err = 2144 err =
2145 sys_getsockname(a0, (struct sockaddr __user *)a1, 2145 sys_getsockname(a0, (struct sockaddr __user *)a1,
2146 (int __user *)a[2]); 2146 (int __user *)a[2]);
2147 break; 2147 break;
2148 case SYS_GETPEERNAME: 2148 case SYS_GETPEERNAME:
2149 err = 2149 err =
2150 sys_getpeername(a0, (struct sockaddr __user *)a1, 2150 sys_getpeername(a0, (struct sockaddr __user *)a1,
2151 (int __user *)a[2]); 2151 (int __user *)a[2]);
2152 break; 2152 break;
2153 case SYS_SOCKETPAIR: 2153 case SYS_SOCKETPAIR:
2154 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 2154 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2155 break; 2155 break;
2156 case SYS_SEND: 2156 case SYS_SEND:
2157 err = sys_send(a0, (void __user *)a1, a[2], a[3]); 2157 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2158 break; 2158 break;
2159 case SYS_SENDTO: 2159 case SYS_SENDTO:
2160 err = sys_sendto(a0, (void __user *)a1, a[2], a[3], 2160 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2161 (struct sockaddr __user *)a[4], a[5]); 2161 (struct sockaddr __user *)a[4], a[5]);
2162 break; 2162 break;
2163 case SYS_RECV: 2163 case SYS_RECV:
2164 err = sys_recv(a0, (void __user *)a1, a[2], a[3]); 2164 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2165 break; 2165 break;
2166 case SYS_RECVFROM: 2166 case SYS_RECVFROM:
2167 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2167 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2168 (struct sockaddr __user *)a[4], 2168 (struct sockaddr __user *)a[4],
2169 (int __user *)a[5]); 2169 (int __user *)a[5]);
2170 break; 2170 break;
2171 case SYS_SHUTDOWN: 2171 case SYS_SHUTDOWN:
2172 err = sys_shutdown(a0, a1); 2172 err = sys_shutdown(a0, a1);
2173 break; 2173 break;
2174 case SYS_SETSOCKOPT: 2174 case SYS_SETSOCKOPT:
2175 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); 2175 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2176 break; 2176 break;
2177 case SYS_GETSOCKOPT: 2177 case SYS_GETSOCKOPT:
2178 err = 2178 err =
2179 sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 2179 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2180 (int __user *)a[4]); 2180 (int __user *)a[4]);
2181 break; 2181 break;
2182 case SYS_SENDMSG: 2182 case SYS_SENDMSG:
2183 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2183 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2184 break; 2184 break;
2185 case SYS_RECVMSG: 2185 case SYS_RECVMSG:
2186 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2186 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2187 break; 2187 break;
2188 case SYS_PACCEPT: 2188 case SYS_PACCEPT:
2189 err = 2189 err =
2190 sys_paccept(a0, (struct sockaddr __user *)a1, 2190 sys_paccept(a0, (struct sockaddr __user *)a1,
2191 (int __user *)a[2], 2191 (int __user *)a[2],
2192 (const sigset_t __user *) a[3], 2192 (const sigset_t __user *) a[3],
2193 a[4], a[5]); 2193 a[4], a[5]);
2194 break; 2194 break;
2195 default: 2195 default:
2196 err = -EINVAL; 2196 err = -EINVAL;
2197 break; 2197 break;
2198 } 2198 }
2199 return err; 2199 return err;
2200 } 2200 }
2201 2201
2202 #endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2202 #endif /* __ARCH_WANT_SYS_SOCKETCALL */
2203 2203
2204 /** 2204 /**
2205 * sock_register - add a socket protocol handler 2205 * sock_register - add a socket protocol handler
2206 * @ops: description of protocol 2206 * @ops: description of protocol
2207 * 2207 *
2208 * This function is called by a protocol handler that wants to 2208 * This function is called by a protocol handler that wants to
2209 * advertise its address family, and have it linked into the 2209 * advertise its address family, and have it linked into the
2210 * socket interface. The value ops->family coresponds to the 2210 * socket interface. The value ops->family coresponds to the
2211 * socket system call protocol family. 2211 * socket system call protocol family.
2212 */ 2212 */
2213 int sock_register(const struct net_proto_family *ops) 2213 int sock_register(const struct net_proto_family *ops)
2214 { 2214 {
2215 int err; 2215 int err;
2216 2216
2217 if (ops->family >= NPROTO) { 2217 if (ops->family >= NPROTO) {
2218 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, 2218 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2219 NPROTO); 2219 NPROTO);
2220 return -ENOBUFS; 2220 return -ENOBUFS;
2221 } 2221 }
2222 2222
2223 spin_lock(&net_family_lock); 2223 spin_lock(&net_family_lock);
2224 if (net_families[ops->family]) 2224 if (net_families[ops->family])
2225 err = -EEXIST; 2225 err = -EEXIST;
2226 else { 2226 else {
2227 net_families[ops->family] = ops; 2227 net_families[ops->family] = ops;
2228 err = 0; 2228 err = 0;
2229 } 2229 }
2230 spin_unlock(&net_family_lock); 2230 spin_unlock(&net_family_lock);
2231 2231
2232 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 2232 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2233 return err; 2233 return err;
2234 } 2234 }
2235 2235
2236 /** 2236 /**
2237 * sock_unregister - remove a protocol handler 2237 * sock_unregister - remove a protocol handler
2238 * @family: protocol family to remove 2238 * @family: protocol family to remove
2239 * 2239 *
2240 * This function is called by a protocol handler that wants to 2240 * This function is called by a protocol handler that wants to
2241 * remove its address family, and have it unlinked from the 2241 * remove its address family, and have it unlinked from the
2242 * new socket creation. 2242 * new socket creation.
2243 * 2243 *
2244 * If protocol handler is a module, then it can use module reference 2244 * If protocol handler is a module, then it can use module reference
2245 * counts to protect against new references. If protocol handler is not 2245 * counts to protect against new references. If protocol handler is not
2246 * a module then it needs to provide its own protection in 2246 * a module then it needs to provide its own protection in
2247 * the ops->create routine. 2247 * the ops->create routine.
2248 */ 2248 */
2249 void sock_unregister(int family) 2249 void sock_unregister(int family)
2250 { 2250 {
2251 BUG_ON(family < 0 || family >= NPROTO); 2251 BUG_ON(family < 0 || family >= NPROTO);
2252 2252
2253 spin_lock(&net_family_lock); 2253 spin_lock(&net_family_lock);
2254 net_families[family] = NULL; 2254 net_families[family] = NULL;
2255 spin_unlock(&net_family_lock); 2255 spin_unlock(&net_family_lock);
2256 2256
2257 synchronize_rcu(); 2257 synchronize_rcu();
2258 2258
2259 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 2259 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2260 } 2260 }
2261 2261
2262 static int __init sock_init(void) 2262 static int __init sock_init(void)
2263 { 2263 {
2264 /* 2264 /*
2265 * Initialize sock SLAB cache. 2265 * Initialize sock SLAB cache.
2266 */ 2266 */
2267 2267
2268 sk_init(); 2268 sk_init();
2269 2269
2270 /* 2270 /*
2271 * Initialize skbuff SLAB cache 2271 * Initialize skbuff SLAB cache
2272 */ 2272 */
2273 skb_init(); 2273 skb_init();
2274 2274
2275 /* 2275 /*
2276 * Initialize the protocols module. 2276 * Initialize the protocols module.
2277 */ 2277 */
2278 2278
2279 init_inodecache(); 2279 init_inodecache();
2280 register_filesystem(&sock_fs_type); 2280 register_filesystem(&sock_fs_type);
2281 sock_mnt = kern_mount(&sock_fs_type); 2281 sock_mnt = kern_mount(&sock_fs_type);
2282 2282
2283 /* The real protocol initialization is performed in later initcalls. 2283 /* The real protocol initialization is performed in later initcalls.
2284 */ 2284 */
2285 2285
2286 #ifdef CONFIG_NETFILTER 2286 #ifdef CONFIG_NETFILTER
2287 netfilter_init(); 2287 netfilter_init();
2288 #endif 2288 #endif
2289 2289
2290 return 0; 2290 return 0;
2291 } 2291 }
2292 2292
2293 core_initcall(sock_init); /* early initcall */ 2293 core_initcall(sock_init); /* early initcall */
2294 2294
2295 #ifdef CONFIG_PROC_FS 2295 #ifdef CONFIG_PROC_FS
2296 void socket_seq_show(struct seq_file *seq) 2296 void socket_seq_show(struct seq_file *seq)
2297 { 2297 {
2298 int cpu; 2298 int cpu;
2299 int counter = 0; 2299 int counter = 0;
2300 2300
2301 for_each_possible_cpu(cpu) 2301 for_each_possible_cpu(cpu)
2302 counter += per_cpu(sockets_in_use, cpu); 2302 counter += per_cpu(sockets_in_use, cpu);
2303 2303
2304 /* It can be negative, by the way. 8) */ 2304 /* It can be negative, by the way. 8) */
2305 if (counter < 0) 2305 if (counter < 0)
2306 counter = 0; 2306 counter = 0;
2307 2307
2308 seq_printf(seq, "sockets: used %d\n", counter); 2308 seq_printf(seq, "sockets: used %d\n", counter);
2309 } 2309 }
2310 #endif /* CONFIG_PROC_FS */ 2310 #endif /* CONFIG_PROC_FS */
2311 2311
2312 #ifdef CONFIG_COMPAT 2312 #ifdef CONFIG_COMPAT
2313 static long compat_sock_ioctl(struct file *file, unsigned cmd, 2313 static long compat_sock_ioctl(struct file *file, unsigned cmd,
2314 unsigned long arg) 2314 unsigned long arg)
2315 { 2315 {
2316 struct socket *sock = file->private_data; 2316 struct socket *sock = file->private_data;
2317 int ret = -ENOIOCTLCMD; 2317 int ret = -ENOIOCTLCMD;
2318 struct sock *sk; 2318 struct sock *sk;
2319 struct net *net; 2319 struct net *net;
2320 2320
2321 sk = sock->sk; 2321 sk = sock->sk;
2322 net = sock_net(sk); 2322 net = sock_net(sk);
2323 2323
2324 if (sock->ops->compat_ioctl) 2324 if (sock->ops->compat_ioctl)
2325 ret = sock->ops->compat_ioctl(sock, cmd, arg); 2325 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2326 2326
2327 if (ret == -ENOIOCTLCMD && 2327 if (ret == -ENOIOCTLCMD &&
2328 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) 2328 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2329 ret = compat_wext_handle_ioctl(net, cmd, arg); 2329 ret = compat_wext_handle_ioctl(net, cmd, arg);
2330 2330
2331 return ret; 2331 return ret;
2332 } 2332 }
2333 #endif 2333 #endif
2334 2334
2335 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 2335 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2336 { 2336 {
2337 return sock->ops->bind(sock, addr, addrlen); 2337 return sock->ops->bind(sock, addr, addrlen);
2338 } 2338 }
2339 2339
2340 int kernel_listen(struct socket *sock, int backlog) 2340 int kernel_listen(struct socket *sock, int backlog)
2341 { 2341 {
2342 return sock->ops->listen(sock, backlog); 2342 return sock->ops->listen(sock, backlog);
2343 } 2343 }
2344 2344
2345 int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 2345 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2346 { 2346 {
2347 struct sock *sk = sock->sk; 2347 struct sock *sk = sock->sk;
2348 int err; 2348 int err;
2349 2349
2350 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 2350 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2351 newsock); 2351 newsock);
2352 if (err < 0) 2352 if (err < 0)
2353 goto done; 2353 goto done;
2354 2354
2355 err = sock->ops->accept(sock, *newsock, flags); 2355 err = sock->ops->accept(sock, *newsock, flags);
2356 if (err < 0) { 2356 if (err < 0) {
2357 sock_release(*newsock); 2357 sock_release(*newsock);
2358 *newsock = NULL; 2358 *newsock = NULL;
2359 goto done; 2359 goto done;
2360 } 2360 }
2361 2361
2362 (*newsock)->ops = sock->ops; 2362 (*newsock)->ops = sock->ops;
2363 2363
2364 done: 2364 done:
2365 return err; 2365 return err;
2366 } 2366 }
2367 2367
2368 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 2368 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2369 int flags) 2369 int flags)
2370 { 2370 {
2371 return sock->ops->connect(sock, addr, addrlen, flags); 2371 return sock->ops->connect(sock, addr, addrlen, flags);
2372 } 2372 }
2373 2373
2374 int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 2374 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2375 int *addrlen) 2375 int *addrlen)
2376 { 2376 {
2377 return sock->ops->getname(sock, addr, addrlen, 0); 2377 return sock->ops->getname(sock, addr, addrlen, 0);
2378 } 2378 }
2379 2379
2380 int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 2380 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2381 int *addrlen) 2381 int *addrlen)
2382 { 2382 {
2383 return sock->ops->getname(sock, addr, addrlen, 1); 2383 return sock->ops->getname(sock, addr, addrlen, 1);
2384 } 2384 }
2385 2385
2386 int kernel_getsockopt(struct socket *sock, int level, int optname, 2386 int kernel_getsockopt(struct socket *sock, int level, int optname,
2387 char *optval, int *optlen) 2387 char *optval, int *optlen)
2388 { 2388 {
2389 mm_segment_t oldfs = get_fs(); 2389 mm_segment_t oldfs = get_fs();
2390 int err; 2390 int err;
2391 2391
2392 set_fs(KERNEL_DS); 2392 set_fs(KERNEL_DS);
2393 if (level == SOL_SOCKET) 2393 if (level == SOL_SOCKET)
2394 err = sock_getsockopt(sock, level, optname, optval, optlen); 2394 err = sock_getsockopt(sock, level, optname, optval, optlen);
2395 else 2395 else
2396 err = sock->ops->getsockopt(sock, level, optname, optval, 2396 err = sock->ops->getsockopt(sock, level, optname, optval,
2397 optlen); 2397 optlen);
2398 set_fs(oldfs); 2398 set_fs(oldfs);
2399 return err; 2399 return err;
2400 } 2400 }
2401 2401
2402 int kernel_setsockopt(struct socket *sock, int level, int optname, 2402 int kernel_setsockopt(struct socket *sock, int level, int optname,
2403 char *optval, int optlen) 2403 char *optval, int optlen)
2404 { 2404 {
2405 mm_segment_t oldfs = get_fs(); 2405 mm_segment_t oldfs = get_fs();
2406 int err; 2406 int err;
2407 2407
2408 set_fs(KERNEL_DS); 2408 set_fs(KERNEL_DS);
2409 if (level == SOL_SOCKET) 2409 if (level == SOL_SOCKET)
2410 err = sock_setsockopt(sock, level, optname, optval, optlen); 2410 err = sock_setsockopt(sock, level, optname, optval, optlen);
2411 else 2411 else
2412 err = sock->ops->setsockopt(sock, level, optname, optval, 2412 err = sock->ops->setsockopt(sock, level, optname, optval,
2413 optlen); 2413 optlen);
2414 set_fs(oldfs); 2414 set_fs(oldfs);
2415 return err; 2415 return err;
2416 } 2416 }
2417 2417
2418 int kernel_sendpage(struct socket *sock, struct page *page, int offset, 2418 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2419 size_t size, int flags) 2419 size_t size, int flags)
2420 { 2420 {
2421 if (sock->ops->sendpage) 2421 if (sock->ops->sendpage)
2422 return sock->ops->sendpage(sock, page, offset, size, flags); 2422 return sock->ops->sendpage(sock, page, offset, size, flags);
2423 2423
2424 return sock_no_sendpage(sock, page, offset, size, flags); 2424 return sock_no_sendpage(sock, page, offset, size, flags);
2425 } 2425 }
2426 2426
2427 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 2427 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2428 { 2428 {
2429 mm_segment_t oldfs = get_fs(); 2429 mm_segment_t oldfs = get_fs();
2430 int err; 2430 int err;
2431 2431
2432 set_fs(KERNEL_DS); 2432 set_fs(KERNEL_DS);
2433 err = sock->ops->ioctl(sock, cmd, arg); 2433 err = sock->ops->ioctl(sock, cmd, arg);
2434 set_fs(oldfs); 2434 set_fs(oldfs);
2435 2435
2436 return err; 2436 return err;
2437 } 2437 }
2438 2438
2439 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 2439 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2440 { 2440 {
2441 return sock->ops->shutdown(sock, how); 2441 return sock->ops->shutdown(sock, how);
2442 } 2442 }
2443 2443
2444 EXPORT_SYMBOL(sock_create); 2444 EXPORT_SYMBOL(sock_create);
2445 EXPORT_SYMBOL(sock_create_kern); 2445 EXPORT_SYMBOL(sock_create_kern);
2446 EXPORT_SYMBOL(sock_create_lite); 2446 EXPORT_SYMBOL(sock_create_lite);
2447 EXPORT_SYMBOL(sock_map_fd); 2447 EXPORT_SYMBOL(sock_map_fd);
2448 EXPORT_SYMBOL(sock_recvmsg); 2448 EXPORT_SYMBOL(sock_recvmsg);
2449 EXPORT_SYMBOL(sock_register); 2449 EXPORT_SYMBOL(sock_register);
2450 EXPORT_SYMBOL(sock_release); 2450 EXPORT_SYMBOL(sock_release);
2451 EXPORT_SYMBOL(sock_sendmsg); 2451 EXPORT_SYMBOL(sock_sendmsg);
2452 EXPORT_SYMBOL(sock_unregister); 2452 EXPORT_SYMBOL(sock_unregister);
2453 EXPORT_SYMBOL(sock_wake_async); 2453 EXPORT_SYMBOL(sock_wake_async);
2454 EXPORT_SYMBOL(sockfd_lookup); 2454 EXPORT_SYMBOL(sockfd_lookup);
2455 EXPORT_SYMBOL(kernel_sendmsg); 2455 EXPORT_SYMBOL(kernel_sendmsg);
2456 EXPORT_SYMBOL(kernel_recvmsg); 2456 EXPORT_SYMBOL(kernel_recvmsg);
2457 EXPORT_SYMBOL(kernel_bind); 2457 EXPORT_SYMBOL(kernel_bind);
2458 EXPORT_SYMBOL(kernel_listen); 2458 EXPORT_SYMBOL(kernel_listen);
2459 EXPORT_SYMBOL(kernel_accept); 2459 EXPORT_SYMBOL(kernel_accept);
2460 EXPORT_SYMBOL(kernel_connect); 2460 EXPORT_SYMBOL(kernel_connect);
2461 EXPORT_SYMBOL(kernel_getsockname); 2461 EXPORT_SYMBOL(kernel_getsockname);
2462 EXPORT_SYMBOL(kernel_getpeername); 2462 EXPORT_SYMBOL(kernel_getpeername);
2463 EXPORT_SYMBOL(kernel_getsockopt); 2463 EXPORT_SYMBOL(kernel_getsockopt);
2464 EXPORT_SYMBOL(kernel_setsockopt); 2464 EXPORT_SYMBOL(kernel_setsockopt);
2465 EXPORT_SYMBOL(kernel_sendpage); 2465 EXPORT_SYMBOL(kernel_sendpage);
2466 EXPORT_SYMBOL(kernel_sock_ioctl); 2466 EXPORT_SYMBOL(kernel_sock_ioctl);
2467 EXPORT_SYMBOL(kernel_sock_shutdown); 2467 EXPORT_SYMBOL(kernel_sock_shutdown);
2468 2468