Commit ab2910921064b657610a3b501358a305e13087ea
Committed by
David S. Miller
1 parent
6d9f239a1e
Exists in
master
and in
7 other branches
net: remove two duplicated #include
Removed duplicated #include <rdma/ib_verbs.h> in net/9p/trans_rdma.c and #include <linux/thread_info.h> in net/socket.c Signed-off-by: Jianjun Kong <jianjun@zeuux.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 2 changed files with 0 additions and 2 deletions Inline Diff
net/9p/trans_rdma.c
1 | /* | 1 | /* |
2 | * linux/fs/9p/trans_rdma.c | 2 | * linux/fs/9p/trans_rdma.c |
3 | * | 3 | * |
4 | * RDMA transport layer based on the trans_fd.c implementation. | 4 | * RDMA transport layer based on the trans_fd.c implementation. |
5 | * | 5 | * |
6 | * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> | 6 | * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> |
7 | * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> | 7 | * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> |
8 | * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> | 8 | * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> |
9 | * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> | 9 | * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> |
10 | * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> | 10 | * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or modify | 12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License version 2 | 13 | * it under the terms of the GNU General Public License version 2 |
14 | * as published by the Free Software Foundation. | 14 | * as published by the Free Software Foundation. |
15 | * | 15 | * |
16 | * This program is distributed in the hope that it will be useful, | 16 | * This program is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | * GNU General Public License for more details. | 19 | * GNU General Public License for more details. |
20 | * | 20 | * |
21 | * You should have received a copy of the GNU General Public License | 21 | * You should have received a copy of the GNU General Public License |
22 | * along with this program; if not, write to: | 22 | * along with this program; if not, write to: |
23 | * Free Software Foundation | 23 | * Free Software Foundation |
24 | * 51 Franklin Street, Fifth Floor | 24 | * 51 Franklin Street, Fifth Floor |
25 | * Boston, MA 02111-1301 USA | 25 | * Boston, MA 02111-1301 USA |
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/in.h> | 29 | #include <linux/in.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/net.h> | 31 | #include <linux/net.h> |
32 | #include <linux/ipv6.h> | 32 | #include <linux/ipv6.h> |
33 | #include <linux/kthread.h> | 33 | #include <linux/kthread.h> |
34 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
35 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
36 | #include <linux/un.h> | 36 | #include <linux/un.h> |
37 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
38 | #include <linux/inet.h> | 38 | #include <linux/inet.h> |
39 | #include <linux/idr.h> | 39 | #include <linux/idr.h> |
40 | #include <linux/file.h> | 40 | #include <linux/file.h> |
41 | #include <linux/parser.h> | 41 | #include <linux/parser.h> |
42 | #include <linux/semaphore.h> | 42 | #include <linux/semaphore.h> |
43 | #include <net/9p/9p.h> | 43 | #include <net/9p/9p.h> |
44 | #include <net/9p/client.h> | 44 | #include <net/9p/client.h> |
45 | #include <net/9p/transport.h> | 45 | #include <net/9p/transport.h> |
46 | #include <rdma/ib_verbs.h> | 46 | #include <rdma/ib_verbs.h> |
47 | #include <rdma/rdma_cm.h> | 47 | #include <rdma/rdma_cm.h> |
48 | #include <rdma/ib_verbs.h> | ||
49 | 48 | ||
50 | #define P9_PORT 5640 | 49 | #define P9_PORT 5640 |
51 | #define P9_RDMA_SQ_DEPTH 32 | 50 | #define P9_RDMA_SQ_DEPTH 32 |
52 | #define P9_RDMA_RQ_DEPTH 32 | 51 | #define P9_RDMA_RQ_DEPTH 32 |
53 | #define P9_RDMA_SEND_SGE 4 | 52 | #define P9_RDMA_SEND_SGE 4 |
54 | #define P9_RDMA_RECV_SGE 4 | 53 | #define P9_RDMA_RECV_SGE 4 |
55 | #define P9_RDMA_IRD 0 | 54 | #define P9_RDMA_IRD 0 |
56 | #define P9_RDMA_ORD 0 | 55 | #define P9_RDMA_ORD 0 |
57 | #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ | 56 | #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ |
58 | #define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can | 57 | #define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can |
59 | * safely advertise a maxsize | 58 | * safely advertise a maxsize |
60 | * of 64k */ | 59 | * of 64k */ |
61 | 60 | ||
62 | #define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) | 61 | #define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) |
63 | /** | 62 | /** |
64 | * struct p9_trans_rdma - RDMA transport instance | 63 | * struct p9_trans_rdma - RDMA transport instance |
65 | * | 64 | * |
66 | * @state: tracks the transport state machine for connection setup and tear down | 65 | * @state: tracks the transport state machine for connection setup and tear down |
67 | * @cm_id: The RDMA CM ID | 66 | * @cm_id: The RDMA CM ID |
68 | * @pd: Protection Domain pointer | 67 | * @pd: Protection Domain pointer |
69 | * @qp: Queue Pair pointer | 68 | * @qp: Queue Pair pointer |
70 | * @cq: Completion Queue pointer | 69 | * @cq: Completion Queue pointer |
71 | * @lkey: The local access only memory region key | 70 | * @lkey: The local access only memory region key |
72 | * @timeout: Number of uSecs to wait for connection management events | 71 | * @timeout: Number of uSecs to wait for connection management events |
73 | * @sq_depth: The depth of the Send Queue | 72 | * @sq_depth: The depth of the Send Queue |
74 | * @sq_sem: Semaphore for the SQ | 73 | * @sq_sem: Semaphore for the SQ |
75 | * @rq_depth: The depth of the Receive Queue. | 74 | * @rq_depth: The depth of the Receive Queue. |
76 | * @addr: The remote peer's address | 75 | * @addr: The remote peer's address |
77 | * @req_lock: Protects the active request list | 76 | * @req_lock: Protects the active request list |
78 | * @send_wait: Wait list when the SQ fills up | 77 | * @send_wait: Wait list when the SQ fills up |
79 | * @cm_done: Completion event for connection management tracking | 78 | * @cm_done: Completion event for connection management tracking |
80 | */ | 79 | */ |
81 | struct p9_trans_rdma { | 80 | struct p9_trans_rdma { |
82 | enum { | 81 | enum { |
83 | P9_RDMA_INIT, | 82 | P9_RDMA_INIT, |
84 | P9_RDMA_ADDR_RESOLVED, | 83 | P9_RDMA_ADDR_RESOLVED, |
85 | P9_RDMA_ROUTE_RESOLVED, | 84 | P9_RDMA_ROUTE_RESOLVED, |
86 | P9_RDMA_CONNECTED, | 85 | P9_RDMA_CONNECTED, |
87 | P9_RDMA_FLUSHING, | 86 | P9_RDMA_FLUSHING, |
88 | P9_RDMA_CLOSING, | 87 | P9_RDMA_CLOSING, |
89 | P9_RDMA_CLOSED, | 88 | P9_RDMA_CLOSED, |
90 | } state; | 89 | } state; |
91 | struct rdma_cm_id *cm_id; | 90 | struct rdma_cm_id *cm_id; |
92 | struct ib_pd *pd; | 91 | struct ib_pd *pd; |
93 | struct ib_qp *qp; | 92 | struct ib_qp *qp; |
94 | struct ib_cq *cq; | 93 | struct ib_cq *cq; |
95 | struct ib_mr *dma_mr; | 94 | struct ib_mr *dma_mr; |
96 | u32 lkey; | 95 | u32 lkey; |
97 | long timeout; | 96 | long timeout; |
98 | int sq_depth; | 97 | int sq_depth; |
99 | struct semaphore sq_sem; | 98 | struct semaphore sq_sem; |
100 | int rq_depth; | 99 | int rq_depth; |
101 | atomic_t rq_count; | 100 | atomic_t rq_count; |
102 | struct sockaddr_in addr; | 101 | struct sockaddr_in addr; |
103 | spinlock_t req_lock; | 102 | spinlock_t req_lock; |
104 | 103 | ||
105 | struct completion cm_done; | 104 | struct completion cm_done; |
106 | }; | 105 | }; |
107 | 106 | ||
108 | /** | 107 | /** |
109 | * p9_rdma_context - Keeps track of in-process WR | 108 | * p9_rdma_context - Keeps track of in-process WR |
110 | * | 109 | * |
111 | * @wc_op: The original WR op for when the CQE completes in error. | 110 | * @wc_op: The original WR op for when the CQE completes in error. |
112 | * @busa: Bus address to unmap when the WR completes | 111 | * @busa: Bus address to unmap when the WR completes |
113 | * @req: Keeps track of requests (send) | 112 | * @req: Keeps track of requests (send) |
114 | * @rc: Keepts track of replies (receive) | 113 | * @rc: Keepts track of replies (receive) |
115 | */ | 114 | */ |
116 | struct p9_rdma_req; | 115 | struct p9_rdma_req; |
117 | struct p9_rdma_context { | 116 | struct p9_rdma_context { |
118 | enum ib_wc_opcode wc_op; | 117 | enum ib_wc_opcode wc_op; |
119 | dma_addr_t busa; | 118 | dma_addr_t busa; |
120 | union { | 119 | union { |
121 | struct p9_req_t *req; | 120 | struct p9_req_t *req; |
122 | struct p9_fcall *rc; | 121 | struct p9_fcall *rc; |
123 | }; | 122 | }; |
124 | }; | 123 | }; |
125 | 124 | ||
126 | /** | 125 | /** |
127 | * p9_rdma_opts - Collection of mount options | 126 | * p9_rdma_opts - Collection of mount options |
128 | * @port: port of connection | 127 | * @port: port of connection |
129 | * @sq_depth: The requested depth of the SQ. This really doesn't need | 128 | * @sq_depth: The requested depth of the SQ. This really doesn't need |
130 | * to be any deeper than the number of threads used in the client | 129 | * to be any deeper than the number of threads used in the client |
131 | * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth | 130 | * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth |
132 | * @timeout: Time to wait in msecs for CM events | 131 | * @timeout: Time to wait in msecs for CM events |
133 | */ | 132 | */ |
134 | struct p9_rdma_opts { | 133 | struct p9_rdma_opts { |
135 | short port; | 134 | short port; |
136 | int sq_depth; | 135 | int sq_depth; |
137 | int rq_depth; | 136 | int rq_depth; |
138 | long timeout; | 137 | long timeout; |
139 | }; | 138 | }; |
140 | 139 | ||
141 | /* | 140 | /* |
142 | * Option Parsing (code inspired by NFS code) | 141 | * Option Parsing (code inspired by NFS code) |
143 | */ | 142 | */ |
144 | enum { | 143 | enum { |
145 | /* Options that take integer arguments */ | 144 | /* Options that take integer arguments */ |
146 | Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, | 145 | Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, |
147 | }; | 146 | }; |
148 | 147 | ||
149 | static match_table_t tokens = { | 148 | static match_table_t tokens = { |
150 | {Opt_port, "port=%u"}, | 149 | {Opt_port, "port=%u"}, |
151 | {Opt_sq_depth, "sq=%u"}, | 150 | {Opt_sq_depth, "sq=%u"}, |
152 | {Opt_rq_depth, "rq=%u"}, | 151 | {Opt_rq_depth, "rq=%u"}, |
153 | {Opt_timeout, "timeout=%u"}, | 152 | {Opt_timeout, "timeout=%u"}, |
154 | {Opt_err, NULL}, | 153 | {Opt_err, NULL}, |
155 | }; | 154 | }; |
156 | 155 | ||
157 | /** | 156 | /** |
158 | * parse_options - parse mount options into session structure | 157 | * parse_options - parse mount options into session structure |
159 | * @options: options string passed from mount | 158 | * @options: options string passed from mount |
160 | * @opts: transport-specific structure to parse options into | 159 | * @opts: transport-specific structure to parse options into |
161 | * | 160 | * |
162 | * Returns 0 upon success, -ERRNO upon failure | 161 | * Returns 0 upon success, -ERRNO upon failure |
163 | */ | 162 | */ |
164 | static int parse_opts(char *params, struct p9_rdma_opts *opts) | 163 | static int parse_opts(char *params, struct p9_rdma_opts *opts) |
165 | { | 164 | { |
166 | char *p; | 165 | char *p; |
167 | substring_t args[MAX_OPT_ARGS]; | 166 | substring_t args[MAX_OPT_ARGS]; |
168 | int option; | 167 | int option; |
169 | char *options; | 168 | char *options; |
170 | int ret; | 169 | int ret; |
171 | 170 | ||
172 | opts->port = P9_PORT; | 171 | opts->port = P9_PORT; |
173 | opts->sq_depth = P9_RDMA_SQ_DEPTH; | 172 | opts->sq_depth = P9_RDMA_SQ_DEPTH; |
174 | opts->rq_depth = P9_RDMA_RQ_DEPTH; | 173 | opts->rq_depth = P9_RDMA_RQ_DEPTH; |
175 | opts->timeout = P9_RDMA_TIMEOUT; | 174 | opts->timeout = P9_RDMA_TIMEOUT; |
176 | 175 | ||
177 | if (!params) | 176 | if (!params) |
178 | return 0; | 177 | return 0; |
179 | 178 | ||
180 | options = kstrdup(params, GFP_KERNEL); | 179 | options = kstrdup(params, GFP_KERNEL); |
181 | if (!options) { | 180 | if (!options) { |
182 | P9_DPRINTK(P9_DEBUG_ERROR, | 181 | P9_DPRINTK(P9_DEBUG_ERROR, |
183 | "failed to allocate copy of option string\n"); | 182 | "failed to allocate copy of option string\n"); |
184 | return -ENOMEM; | 183 | return -ENOMEM; |
185 | } | 184 | } |
186 | 185 | ||
187 | while ((p = strsep(&options, ",")) != NULL) { | 186 | while ((p = strsep(&options, ",")) != NULL) { |
188 | int token; | 187 | int token; |
189 | int r; | 188 | int r; |
190 | if (!*p) | 189 | if (!*p) |
191 | continue; | 190 | continue; |
192 | token = match_token(p, tokens, args); | 191 | token = match_token(p, tokens, args); |
193 | r = match_int(&args[0], &option); | 192 | r = match_int(&args[0], &option); |
194 | if (r < 0) { | 193 | if (r < 0) { |
195 | P9_DPRINTK(P9_DEBUG_ERROR, | 194 | P9_DPRINTK(P9_DEBUG_ERROR, |
196 | "integer field, but no integer?\n"); | 195 | "integer field, but no integer?\n"); |
197 | ret = r; | 196 | ret = r; |
198 | continue; | 197 | continue; |
199 | } | 198 | } |
200 | switch (token) { | 199 | switch (token) { |
201 | case Opt_port: | 200 | case Opt_port: |
202 | opts->port = option; | 201 | opts->port = option; |
203 | break; | 202 | break; |
204 | case Opt_sq_depth: | 203 | case Opt_sq_depth: |
205 | opts->sq_depth = option; | 204 | opts->sq_depth = option; |
206 | break; | 205 | break; |
207 | case Opt_rq_depth: | 206 | case Opt_rq_depth: |
208 | opts->rq_depth = option; | 207 | opts->rq_depth = option; |
209 | break; | 208 | break; |
210 | case Opt_timeout: | 209 | case Opt_timeout: |
211 | opts->timeout = option; | 210 | opts->timeout = option; |
212 | break; | 211 | break; |
213 | default: | 212 | default: |
214 | continue; | 213 | continue; |
215 | } | 214 | } |
216 | } | 215 | } |
217 | /* RQ must be at least as large as the SQ */ | 216 | /* RQ must be at least as large as the SQ */ |
218 | opts->rq_depth = max(opts->rq_depth, opts->sq_depth); | 217 | opts->rq_depth = max(opts->rq_depth, opts->sq_depth); |
219 | kfree(options); | 218 | kfree(options); |
220 | return 0; | 219 | return 0; |
221 | } | 220 | } |
222 | 221 | ||
223 | static int | 222 | static int |
224 | p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) | 223 | p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) |
225 | { | 224 | { |
226 | struct p9_client *c = id->context; | 225 | struct p9_client *c = id->context; |
227 | struct p9_trans_rdma *rdma = c->trans; | 226 | struct p9_trans_rdma *rdma = c->trans; |
228 | switch (event->event) { | 227 | switch (event->event) { |
229 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 228 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
230 | BUG_ON(rdma->state != P9_RDMA_INIT); | 229 | BUG_ON(rdma->state != P9_RDMA_INIT); |
231 | rdma->state = P9_RDMA_ADDR_RESOLVED; | 230 | rdma->state = P9_RDMA_ADDR_RESOLVED; |
232 | break; | 231 | break; |
233 | 232 | ||
234 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 233 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
235 | BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); | 234 | BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); |
236 | rdma->state = P9_RDMA_ROUTE_RESOLVED; | 235 | rdma->state = P9_RDMA_ROUTE_RESOLVED; |
237 | break; | 236 | break; |
238 | 237 | ||
239 | case RDMA_CM_EVENT_ESTABLISHED: | 238 | case RDMA_CM_EVENT_ESTABLISHED: |
240 | BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); | 239 | BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); |
241 | rdma->state = P9_RDMA_CONNECTED; | 240 | rdma->state = P9_RDMA_CONNECTED; |
242 | break; | 241 | break; |
243 | 242 | ||
244 | case RDMA_CM_EVENT_DISCONNECTED: | 243 | case RDMA_CM_EVENT_DISCONNECTED: |
245 | if (rdma) | 244 | if (rdma) |
246 | rdma->state = P9_RDMA_CLOSED; | 245 | rdma->state = P9_RDMA_CLOSED; |
247 | if (c) | 246 | if (c) |
248 | c->status = Disconnected; | 247 | c->status = Disconnected; |
249 | break; | 248 | break; |
250 | 249 | ||
251 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: | 250 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: |
252 | break; | 251 | break; |
253 | 252 | ||
254 | case RDMA_CM_EVENT_ADDR_CHANGE: | 253 | case RDMA_CM_EVENT_ADDR_CHANGE: |
255 | case RDMA_CM_EVENT_ROUTE_ERROR: | 254 | case RDMA_CM_EVENT_ROUTE_ERROR: |
256 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | 255 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
257 | case RDMA_CM_EVENT_MULTICAST_JOIN: | 256 | case RDMA_CM_EVENT_MULTICAST_JOIN: |
258 | case RDMA_CM_EVENT_MULTICAST_ERROR: | 257 | case RDMA_CM_EVENT_MULTICAST_ERROR: |
259 | case RDMA_CM_EVENT_REJECTED: | 258 | case RDMA_CM_EVENT_REJECTED: |
260 | case RDMA_CM_EVENT_CONNECT_REQUEST: | 259 | case RDMA_CM_EVENT_CONNECT_REQUEST: |
261 | case RDMA_CM_EVENT_CONNECT_RESPONSE: | 260 | case RDMA_CM_EVENT_CONNECT_RESPONSE: |
262 | case RDMA_CM_EVENT_CONNECT_ERROR: | 261 | case RDMA_CM_EVENT_CONNECT_ERROR: |
263 | case RDMA_CM_EVENT_ADDR_ERROR: | 262 | case RDMA_CM_EVENT_ADDR_ERROR: |
264 | case RDMA_CM_EVENT_UNREACHABLE: | 263 | case RDMA_CM_EVENT_UNREACHABLE: |
265 | c->status = Disconnected; | 264 | c->status = Disconnected; |
266 | rdma_disconnect(rdma->cm_id); | 265 | rdma_disconnect(rdma->cm_id); |
267 | break; | 266 | break; |
268 | default: | 267 | default: |
269 | BUG(); | 268 | BUG(); |
270 | } | 269 | } |
271 | complete(&rdma->cm_done); | 270 | complete(&rdma->cm_done); |
272 | return 0; | 271 | return 0; |
273 | } | 272 | } |
274 | 273 | ||
275 | static void | 274 | static void |
276 | handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, | 275 | handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, |
277 | struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) | 276 | struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) |
278 | { | 277 | { |
279 | struct p9_req_t *req; | 278 | struct p9_req_t *req; |
280 | int err = 0; | 279 | int err = 0; |
281 | int16_t tag; | 280 | int16_t tag; |
282 | 281 | ||
283 | req = NULL; | 282 | req = NULL; |
284 | ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, | 283 | ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, |
285 | DMA_FROM_DEVICE); | 284 | DMA_FROM_DEVICE); |
286 | 285 | ||
287 | if (status != IB_WC_SUCCESS) | 286 | if (status != IB_WC_SUCCESS) |
288 | goto err_out; | 287 | goto err_out; |
289 | 288 | ||
290 | err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); | 289 | err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); |
291 | if (err) | 290 | if (err) |
292 | goto err_out; | 291 | goto err_out; |
293 | 292 | ||
294 | req = p9_tag_lookup(client, tag); | 293 | req = p9_tag_lookup(client, tag); |
295 | if (!req) | 294 | if (!req) |
296 | goto err_out; | 295 | goto err_out; |
297 | 296 | ||
298 | req->rc = c->rc; | 297 | req->rc = c->rc; |
299 | p9_client_cb(client, req); | 298 | p9_client_cb(client, req); |
300 | 299 | ||
301 | return; | 300 | return; |
302 | 301 | ||
303 | err_out: | 302 | err_out: |
304 | P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", | 303 | P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", |
305 | req, err, status); | 304 | req, err, status); |
306 | rdma->state = P9_RDMA_FLUSHING; | 305 | rdma->state = P9_RDMA_FLUSHING; |
307 | client->status = Disconnected; | 306 | client->status = Disconnected; |
308 | return; | 307 | return; |
309 | } | 308 | } |
310 | 309 | ||
311 | static void | 310 | static void |
312 | handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, | 311 | handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, |
313 | struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) | 312 | struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) |
314 | { | 313 | { |
315 | ib_dma_unmap_single(rdma->cm_id->device, | 314 | ib_dma_unmap_single(rdma->cm_id->device, |
316 | c->busa, c->req->tc->size, | 315 | c->busa, c->req->tc->size, |
317 | DMA_TO_DEVICE); | 316 | DMA_TO_DEVICE); |
318 | } | 317 | } |
319 | 318 | ||
320 | static void qp_event_handler(struct ib_event *event, void *context) | 319 | static void qp_event_handler(struct ib_event *event, void *context) |
321 | { | 320 | { |
322 | P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, | 321 | P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, |
323 | context); | 322 | context); |
324 | } | 323 | } |
325 | 324 | ||
326 | static void cq_comp_handler(struct ib_cq *cq, void *cq_context) | 325 | static void cq_comp_handler(struct ib_cq *cq, void *cq_context) |
327 | { | 326 | { |
328 | struct p9_client *client = cq_context; | 327 | struct p9_client *client = cq_context; |
329 | struct p9_trans_rdma *rdma = client->trans; | 328 | struct p9_trans_rdma *rdma = client->trans; |
330 | int ret; | 329 | int ret; |
331 | struct ib_wc wc; | 330 | struct ib_wc wc; |
332 | 331 | ||
333 | ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); | 332 | ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); |
334 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | 333 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { |
335 | struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; | 334 | struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; |
336 | 335 | ||
337 | switch (c->wc_op) { | 336 | switch (c->wc_op) { |
338 | case IB_WC_RECV: | 337 | case IB_WC_RECV: |
339 | atomic_dec(&rdma->rq_count); | 338 | atomic_dec(&rdma->rq_count); |
340 | handle_recv(client, rdma, c, wc.status, wc.byte_len); | 339 | handle_recv(client, rdma, c, wc.status, wc.byte_len); |
341 | break; | 340 | break; |
342 | 341 | ||
343 | case IB_WC_SEND: | 342 | case IB_WC_SEND: |
344 | handle_send(client, rdma, c, wc.status, wc.byte_len); | 343 | handle_send(client, rdma, c, wc.status, wc.byte_len); |
345 | up(&rdma->sq_sem); | 344 | up(&rdma->sq_sem); |
346 | break; | 345 | break; |
347 | 346 | ||
348 | default: | 347 | default: |
349 | printk(KERN_ERR "9prdma: unexpected completion type, " | 348 | printk(KERN_ERR "9prdma: unexpected completion type, " |
350 | "c->wc_op=%d, wc.opcode=%d, status=%d\n", | 349 | "c->wc_op=%d, wc.opcode=%d, status=%d\n", |
351 | c->wc_op, wc.opcode, wc.status); | 350 | c->wc_op, wc.opcode, wc.status); |
352 | break; | 351 | break; |
353 | } | 352 | } |
354 | kfree(c); | 353 | kfree(c); |
355 | } | 354 | } |
356 | } | 355 | } |
357 | 356 | ||
358 | static void cq_event_handler(struct ib_event *e, void *v) | 357 | static void cq_event_handler(struct ib_event *e, void *v) |
359 | { | 358 | { |
360 | P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); | 359 | P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); |
361 | } | 360 | } |
362 | 361 | ||
363 | static void rdma_destroy_trans(struct p9_trans_rdma *rdma) | 362 | static void rdma_destroy_trans(struct p9_trans_rdma *rdma) |
364 | { | 363 | { |
365 | if (!rdma) | 364 | if (!rdma) |
366 | return; | 365 | return; |
367 | 366 | ||
368 | if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) | 367 | if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) |
369 | ib_dereg_mr(rdma->dma_mr); | 368 | ib_dereg_mr(rdma->dma_mr); |
370 | 369 | ||
371 | if (rdma->qp && !IS_ERR(rdma->qp)) | 370 | if (rdma->qp && !IS_ERR(rdma->qp)) |
372 | ib_destroy_qp(rdma->qp); | 371 | ib_destroy_qp(rdma->qp); |
373 | 372 | ||
374 | if (rdma->pd && !IS_ERR(rdma->pd)) | 373 | if (rdma->pd && !IS_ERR(rdma->pd)) |
375 | ib_dealloc_pd(rdma->pd); | 374 | ib_dealloc_pd(rdma->pd); |
376 | 375 | ||
377 | if (rdma->cq && !IS_ERR(rdma->cq)) | 376 | if (rdma->cq && !IS_ERR(rdma->cq)) |
378 | ib_destroy_cq(rdma->cq); | 377 | ib_destroy_cq(rdma->cq); |
379 | 378 | ||
380 | if (rdma->cm_id && !IS_ERR(rdma->cm_id)) | 379 | if (rdma->cm_id && !IS_ERR(rdma->cm_id)) |
381 | rdma_destroy_id(rdma->cm_id); | 380 | rdma_destroy_id(rdma->cm_id); |
382 | 381 | ||
383 | kfree(rdma); | 382 | kfree(rdma); |
384 | } | 383 | } |
385 | 384 | ||
386 | static int | 385 | static int |
387 | post_recv(struct p9_client *client, struct p9_rdma_context *c) | 386 | post_recv(struct p9_client *client, struct p9_rdma_context *c) |
388 | { | 387 | { |
389 | struct p9_trans_rdma *rdma = client->trans; | 388 | struct p9_trans_rdma *rdma = client->trans; |
390 | struct ib_recv_wr wr, *bad_wr; | 389 | struct ib_recv_wr wr, *bad_wr; |
391 | struct ib_sge sge; | 390 | struct ib_sge sge; |
392 | 391 | ||
393 | c->busa = ib_dma_map_single(rdma->cm_id->device, | 392 | c->busa = ib_dma_map_single(rdma->cm_id->device, |
394 | c->rc->sdata, client->msize, | 393 | c->rc->sdata, client->msize, |
395 | DMA_FROM_DEVICE); | 394 | DMA_FROM_DEVICE); |
396 | if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) | 395 | if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) |
397 | goto error; | 396 | goto error; |
398 | 397 | ||
399 | sge.addr = c->busa; | 398 | sge.addr = c->busa; |
400 | sge.length = client->msize; | 399 | sge.length = client->msize; |
401 | sge.lkey = rdma->lkey; | 400 | sge.lkey = rdma->lkey; |
402 | 401 | ||
403 | wr.next = NULL; | 402 | wr.next = NULL; |
404 | c->wc_op = IB_WC_RECV; | 403 | c->wc_op = IB_WC_RECV; |
405 | wr.wr_id = (unsigned long) c; | 404 | wr.wr_id = (unsigned long) c; |
406 | wr.sg_list = &sge; | 405 | wr.sg_list = &sge; |
407 | wr.num_sge = 1; | 406 | wr.num_sge = 1; |
408 | return ib_post_recv(rdma->qp, &wr, &bad_wr); | 407 | return ib_post_recv(rdma->qp, &wr, &bad_wr); |
409 | 408 | ||
410 | error: | 409 | error: |
411 | P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); | 410 | P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); |
412 | return -EIO; | 411 | return -EIO; |
413 | } | 412 | } |
414 | 413 | ||
415 | static int rdma_request(struct p9_client *client, struct p9_req_t *req) | 414 | static int rdma_request(struct p9_client *client, struct p9_req_t *req) |
416 | { | 415 | { |
417 | struct p9_trans_rdma *rdma = client->trans; | 416 | struct p9_trans_rdma *rdma = client->trans; |
418 | struct ib_send_wr wr, *bad_wr; | 417 | struct ib_send_wr wr, *bad_wr; |
419 | struct ib_sge sge; | 418 | struct ib_sge sge; |
420 | int err = 0; | 419 | int err = 0; |
421 | unsigned long flags; | 420 | unsigned long flags; |
422 | struct p9_rdma_context *c = NULL; | 421 | struct p9_rdma_context *c = NULL; |
423 | struct p9_rdma_context *rpl_context = NULL; | 422 | struct p9_rdma_context *rpl_context = NULL; |
424 | 423 | ||
425 | /* Allocate an fcall for the reply */ | 424 | /* Allocate an fcall for the reply */ |
426 | rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); | 425 | rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); |
427 | if (!rpl_context) | 426 | if (!rpl_context) |
428 | goto err_close; | 427 | goto err_close; |
429 | 428 | ||
430 | /* | 429 | /* |
431 | * If the request has a buffer, steal it, otherwise | 430 | * If the request has a buffer, steal it, otherwise |
432 | * allocate a new one. Typically, requests should already | 431 | * allocate a new one. Typically, requests should already |
433 | * have receive buffers allocated and just swap them around | 432 | * have receive buffers allocated and just swap them around |
434 | */ | 433 | */ |
435 | if (!req->rc) { | 434 | if (!req->rc) { |
436 | req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, | 435 | req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, |
437 | GFP_KERNEL); | 436 | GFP_KERNEL); |
438 | if (req->rc) { | 437 | if (req->rc) { |
439 | req->rc->sdata = (char *) req->rc + | 438 | req->rc->sdata = (char *) req->rc + |
440 | sizeof(struct p9_fcall); | 439 | sizeof(struct p9_fcall); |
441 | req->rc->capacity = client->msize; | 440 | req->rc->capacity = client->msize; |
442 | } | 441 | } |
443 | } | 442 | } |
444 | rpl_context->rc = req->rc; | 443 | rpl_context->rc = req->rc; |
445 | if (!rpl_context->rc) { | 444 | if (!rpl_context->rc) { |
446 | kfree(rpl_context); | 445 | kfree(rpl_context); |
447 | goto err_close; | 446 | goto err_close; |
448 | } | 447 | } |
449 | 448 | ||
450 | /* | 449 | /* |
451 | * Post a receive buffer for this request. We need to ensure | 450 | * Post a receive buffer for this request. We need to ensure |
452 | * there is a reply buffer available for every outstanding | 451 | * there is a reply buffer available for every outstanding |
453 | * request. A flushed request can result in no reply for an | 452 | * request. A flushed request can result in no reply for an |
454 | * outstanding request, so we must keep a count to avoid | 453 | * outstanding request, so we must keep a count to avoid |
455 | * overflowing the RQ. | 454 | * overflowing the RQ. |
456 | */ | 455 | */ |
457 | if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { | 456 | if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { |
458 | err = post_recv(client, rpl_context); | 457 | err = post_recv(client, rpl_context); |
459 | if (err) { | 458 | if (err) { |
460 | kfree(rpl_context->rc); | 459 | kfree(rpl_context->rc); |
461 | kfree(rpl_context); | 460 | kfree(rpl_context); |
462 | goto err_close; | 461 | goto err_close; |
463 | } | 462 | } |
464 | } else | 463 | } else |
465 | atomic_dec(&rdma->rq_count); | 464 | atomic_dec(&rdma->rq_count); |
466 | 465 | ||
467 | /* remove posted receive buffer from request structure */ | 466 | /* remove posted receive buffer from request structure */ |
468 | req->rc = NULL; | 467 | req->rc = NULL; |
469 | 468 | ||
470 | /* Post the request */ | 469 | /* Post the request */ |
471 | c = kmalloc(sizeof *c, GFP_KERNEL); | 470 | c = kmalloc(sizeof *c, GFP_KERNEL); |
472 | if (!c) | 471 | if (!c) |
473 | goto err_close; | 472 | goto err_close; |
474 | c->req = req; | 473 | c->req = req; |
475 | 474 | ||
476 | c->busa = ib_dma_map_single(rdma->cm_id->device, | 475 | c->busa = ib_dma_map_single(rdma->cm_id->device, |
477 | c->req->tc->sdata, c->req->tc->size, | 476 | c->req->tc->sdata, c->req->tc->size, |
478 | DMA_TO_DEVICE); | 477 | DMA_TO_DEVICE); |
479 | if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) | 478 | if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) |
480 | goto error; | 479 | goto error; |
481 | 480 | ||
482 | sge.addr = c->busa; | 481 | sge.addr = c->busa; |
483 | sge.length = c->req->tc->size; | 482 | sge.length = c->req->tc->size; |
484 | sge.lkey = rdma->lkey; | 483 | sge.lkey = rdma->lkey; |
485 | 484 | ||
486 | wr.next = NULL; | 485 | wr.next = NULL; |
487 | c->wc_op = IB_WC_SEND; | 486 | c->wc_op = IB_WC_SEND; |
488 | wr.wr_id = (unsigned long) c; | 487 | wr.wr_id = (unsigned long) c; |
489 | wr.opcode = IB_WR_SEND; | 488 | wr.opcode = IB_WR_SEND; |
490 | wr.send_flags = IB_SEND_SIGNALED; | 489 | wr.send_flags = IB_SEND_SIGNALED; |
491 | wr.sg_list = &sge; | 490 | wr.sg_list = &sge; |
492 | wr.num_sge = 1; | 491 | wr.num_sge = 1; |
493 | 492 | ||
494 | if (down_interruptible(&rdma->sq_sem)) | 493 | if (down_interruptible(&rdma->sq_sem)) |
495 | goto error; | 494 | goto error; |
496 | 495 | ||
497 | return ib_post_send(rdma->qp, &wr, &bad_wr); | 496 | return ib_post_send(rdma->qp, &wr, &bad_wr); |
498 | 497 | ||
499 | error: | 498 | error: |
500 | P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); | 499 | P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); |
501 | return -EIO; | 500 | return -EIO; |
502 | 501 | ||
503 | err_close: | 502 | err_close: |
504 | spin_lock_irqsave(&rdma->req_lock, flags); | 503 | spin_lock_irqsave(&rdma->req_lock, flags); |
505 | if (rdma->state < P9_RDMA_CLOSING) { | 504 | if (rdma->state < P9_RDMA_CLOSING) { |
506 | rdma->state = P9_RDMA_CLOSING; | 505 | rdma->state = P9_RDMA_CLOSING; |
507 | spin_unlock_irqrestore(&rdma->req_lock, flags); | 506 | spin_unlock_irqrestore(&rdma->req_lock, flags); |
508 | rdma_disconnect(rdma->cm_id); | 507 | rdma_disconnect(rdma->cm_id); |
509 | } else | 508 | } else |
510 | spin_unlock_irqrestore(&rdma->req_lock, flags); | 509 | spin_unlock_irqrestore(&rdma->req_lock, flags); |
511 | return err; | 510 | return err; |
512 | } | 511 | } |
513 | 512 | ||
514 | static void rdma_close(struct p9_client *client) | 513 | static void rdma_close(struct p9_client *client) |
515 | { | 514 | { |
516 | struct p9_trans_rdma *rdma; | 515 | struct p9_trans_rdma *rdma; |
517 | 516 | ||
518 | if (!client) | 517 | if (!client) |
519 | return; | 518 | return; |
520 | 519 | ||
521 | rdma = client->trans; | 520 | rdma = client->trans; |
522 | if (!rdma) | 521 | if (!rdma) |
523 | return; | 522 | return; |
524 | 523 | ||
525 | client->status = Disconnected; | 524 | client->status = Disconnected; |
526 | rdma_disconnect(rdma->cm_id); | 525 | rdma_disconnect(rdma->cm_id); |
527 | rdma_destroy_trans(rdma); | 526 | rdma_destroy_trans(rdma); |
528 | } | 527 | } |
529 | 528 | ||
530 | /** | 529 | /** |
531 | * alloc_rdma - Allocate and initialize the rdma transport structure | 530 | * alloc_rdma - Allocate and initialize the rdma transport structure |
532 | * @msize: MTU | 531 | * @msize: MTU |
533 | * @dotu: Extension attribute | 532 | * @dotu: Extension attribute |
534 | * @opts: Mount options structure | 533 | * @opts: Mount options structure |
535 | */ | 534 | */ |
536 | static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) | 535 | static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) |
537 | { | 536 | { |
538 | struct p9_trans_rdma *rdma; | 537 | struct p9_trans_rdma *rdma; |
539 | 538 | ||
540 | rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); | 539 | rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); |
541 | if (!rdma) | 540 | if (!rdma) |
542 | return NULL; | 541 | return NULL; |
543 | 542 | ||
544 | rdma->sq_depth = opts->sq_depth; | 543 | rdma->sq_depth = opts->sq_depth; |
545 | rdma->rq_depth = opts->rq_depth; | 544 | rdma->rq_depth = opts->rq_depth; |
546 | rdma->timeout = opts->timeout; | 545 | rdma->timeout = opts->timeout; |
547 | spin_lock_init(&rdma->req_lock); | 546 | spin_lock_init(&rdma->req_lock); |
548 | init_completion(&rdma->cm_done); | 547 | init_completion(&rdma->cm_done); |
549 | sema_init(&rdma->sq_sem, rdma->sq_depth); | 548 | sema_init(&rdma->sq_sem, rdma->sq_depth); |
550 | atomic_set(&rdma->rq_count, 0); | 549 | atomic_set(&rdma->rq_count, 0); |
551 | 550 | ||
552 | return rdma; | 551 | return rdma; |
553 | } | 552 | } |
554 | 553 | ||
555 | /* its not clear to me we can do anything after send has been posted */ | 554 | /* its not clear to me we can do anything after send has been posted */ |
556 | static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) | 555 | static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) |
557 | { | 556 | { |
558 | return 1; | 557 | return 1; |
559 | } | 558 | } |
560 | 559 | ||
561 | /** | 560 | /** |
562 | * trans_create_rdma - Transport method for creating atransport instance | 561 | * trans_create_rdma - Transport method for creating atransport instance |
563 | * @client: client instance | 562 | * @client: client instance |
564 | * @addr: IP address string | 563 | * @addr: IP address string |
565 | * @args: Mount options string | 564 | * @args: Mount options string |
566 | */ | 565 | */ |
567 | static int | 566 | static int |
568 | rdma_create_trans(struct p9_client *client, const char *addr, char *args) | 567 | rdma_create_trans(struct p9_client *client, const char *addr, char *args) |
569 | { | 568 | { |
570 | int err; | 569 | int err; |
571 | struct p9_rdma_opts opts; | 570 | struct p9_rdma_opts opts; |
572 | struct p9_trans_rdma *rdma; | 571 | struct p9_trans_rdma *rdma; |
573 | struct rdma_conn_param conn_param; | 572 | struct rdma_conn_param conn_param; |
574 | struct ib_qp_init_attr qp_attr; | 573 | struct ib_qp_init_attr qp_attr; |
575 | struct ib_device_attr devattr; | 574 | struct ib_device_attr devattr; |
576 | 575 | ||
577 | /* Parse the transport specific mount options */ | 576 | /* Parse the transport specific mount options */ |
578 | err = parse_opts(args, &opts); | 577 | err = parse_opts(args, &opts); |
579 | if (err < 0) | 578 | if (err < 0) |
580 | return err; | 579 | return err; |
581 | 580 | ||
582 | /* Create and initialize the RDMA transport structure */ | 581 | /* Create and initialize the RDMA transport structure */ |
583 | rdma = alloc_rdma(&opts); | 582 | rdma = alloc_rdma(&opts); |
584 | if (!rdma) | 583 | if (!rdma) |
585 | return -ENOMEM; | 584 | return -ENOMEM; |
586 | 585 | ||
587 | /* Create the RDMA CM ID */ | 586 | /* Create the RDMA CM ID */ |
588 | rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); | 587 | rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); |
589 | if (IS_ERR(rdma->cm_id)) | 588 | if (IS_ERR(rdma->cm_id)) |
590 | goto error; | 589 | goto error; |
591 | 590 | ||
592 | /* Resolve the server's address */ | 591 | /* Resolve the server's address */ |
593 | rdma->addr.sin_family = AF_INET; | 592 | rdma->addr.sin_family = AF_INET; |
594 | rdma->addr.sin_addr.s_addr = in_aton(addr); | 593 | rdma->addr.sin_addr.s_addr = in_aton(addr); |
595 | rdma->addr.sin_port = htons(opts.port); | 594 | rdma->addr.sin_port = htons(opts.port); |
596 | err = rdma_resolve_addr(rdma->cm_id, NULL, | 595 | err = rdma_resolve_addr(rdma->cm_id, NULL, |
597 | (struct sockaddr *)&rdma->addr, | 596 | (struct sockaddr *)&rdma->addr, |
598 | rdma->timeout); | 597 | rdma->timeout); |
599 | if (err) | 598 | if (err) |
600 | goto error; | 599 | goto error; |
601 | err = wait_for_completion_interruptible(&rdma->cm_done); | 600 | err = wait_for_completion_interruptible(&rdma->cm_done); |
602 | if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) | 601 | if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) |
603 | goto error; | 602 | goto error; |
604 | 603 | ||
605 | /* Resolve the route to the server */ | 604 | /* Resolve the route to the server */ |
606 | err = rdma_resolve_route(rdma->cm_id, rdma->timeout); | 605 | err = rdma_resolve_route(rdma->cm_id, rdma->timeout); |
607 | if (err) | 606 | if (err) |
608 | goto error; | 607 | goto error; |
609 | err = wait_for_completion_interruptible(&rdma->cm_done); | 608 | err = wait_for_completion_interruptible(&rdma->cm_done); |
610 | if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) | 609 | if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) |
611 | goto error; | 610 | goto error; |
612 | 611 | ||
613 | /* Query the device attributes */ | 612 | /* Query the device attributes */ |
614 | err = ib_query_device(rdma->cm_id->device, &devattr); | 613 | err = ib_query_device(rdma->cm_id->device, &devattr); |
615 | if (err) | 614 | if (err) |
616 | goto error; | 615 | goto error; |
617 | 616 | ||
618 | /* Create the Completion Queue */ | 617 | /* Create the Completion Queue */ |
619 | rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, | 618 | rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, |
620 | cq_event_handler, client, | 619 | cq_event_handler, client, |
621 | opts.sq_depth + opts.rq_depth + 1, 0); | 620 | opts.sq_depth + opts.rq_depth + 1, 0); |
622 | if (IS_ERR(rdma->cq)) | 621 | if (IS_ERR(rdma->cq)) |
623 | goto error; | 622 | goto error; |
624 | ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); | 623 | ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); |
625 | 624 | ||
626 | /* Create the Protection Domain */ | 625 | /* Create the Protection Domain */ |
627 | rdma->pd = ib_alloc_pd(rdma->cm_id->device); | 626 | rdma->pd = ib_alloc_pd(rdma->cm_id->device); |
628 | if (IS_ERR(rdma->pd)) | 627 | if (IS_ERR(rdma->pd)) |
629 | goto error; | 628 | goto error; |
630 | 629 | ||
631 | /* Cache the DMA lkey in the transport */ | 630 | /* Cache the DMA lkey in the transport */ |
632 | rdma->dma_mr = NULL; | 631 | rdma->dma_mr = NULL; |
633 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) | 632 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) |
634 | rdma->lkey = rdma->cm_id->device->local_dma_lkey; | 633 | rdma->lkey = rdma->cm_id->device->local_dma_lkey; |
635 | else { | 634 | else { |
636 | rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); | 635 | rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); |
637 | if (IS_ERR(rdma->dma_mr)) | 636 | if (IS_ERR(rdma->dma_mr)) |
638 | goto error; | 637 | goto error; |
639 | rdma->lkey = rdma->dma_mr->lkey; | 638 | rdma->lkey = rdma->dma_mr->lkey; |
640 | } | 639 | } |
641 | 640 | ||
642 | /* Create the Queue Pair */ | 641 | /* Create the Queue Pair */ |
643 | memset(&qp_attr, 0, sizeof qp_attr); | 642 | memset(&qp_attr, 0, sizeof qp_attr); |
644 | qp_attr.event_handler = qp_event_handler; | 643 | qp_attr.event_handler = qp_event_handler; |
645 | qp_attr.qp_context = client; | 644 | qp_attr.qp_context = client; |
646 | qp_attr.cap.max_send_wr = opts.sq_depth; | 645 | qp_attr.cap.max_send_wr = opts.sq_depth; |
647 | qp_attr.cap.max_recv_wr = opts.rq_depth; | 646 | qp_attr.cap.max_recv_wr = opts.rq_depth; |
648 | qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; | 647 | qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; |
649 | qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; | 648 | qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; |
650 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | 649 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
651 | qp_attr.qp_type = IB_QPT_RC; | 650 | qp_attr.qp_type = IB_QPT_RC; |
652 | qp_attr.send_cq = rdma->cq; | 651 | qp_attr.send_cq = rdma->cq; |
653 | qp_attr.recv_cq = rdma->cq; | 652 | qp_attr.recv_cq = rdma->cq; |
654 | err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); | 653 | err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); |
655 | if (err) | 654 | if (err) |
656 | goto error; | 655 | goto error; |
657 | rdma->qp = rdma->cm_id->qp; | 656 | rdma->qp = rdma->cm_id->qp; |
658 | 657 | ||
659 | /* Request a connection */ | 658 | /* Request a connection */ |
660 | memset(&conn_param, 0, sizeof(conn_param)); | 659 | memset(&conn_param, 0, sizeof(conn_param)); |
661 | conn_param.private_data = NULL; | 660 | conn_param.private_data = NULL; |
662 | conn_param.private_data_len = 0; | 661 | conn_param.private_data_len = 0; |
663 | conn_param.responder_resources = P9_RDMA_IRD; | 662 | conn_param.responder_resources = P9_RDMA_IRD; |
664 | conn_param.initiator_depth = P9_RDMA_ORD; | 663 | conn_param.initiator_depth = P9_RDMA_ORD; |
665 | err = rdma_connect(rdma->cm_id, &conn_param); | 664 | err = rdma_connect(rdma->cm_id, &conn_param); |
666 | if (err) | 665 | if (err) |
667 | goto error; | 666 | goto error; |
668 | err = wait_for_completion_interruptible(&rdma->cm_done); | 667 | err = wait_for_completion_interruptible(&rdma->cm_done); |
669 | if (err || (rdma->state != P9_RDMA_CONNECTED)) | 668 | if (err || (rdma->state != P9_RDMA_CONNECTED)) |
670 | goto error; | 669 | goto error; |
671 | 670 | ||
672 | client->trans = rdma; | 671 | client->trans = rdma; |
673 | client->status = Connected; | 672 | client->status = Connected; |
674 | 673 | ||
675 | return 0; | 674 | return 0; |
676 | 675 | ||
677 | error: | 676 | error: |
678 | rdma_destroy_trans(rdma); | 677 | rdma_destroy_trans(rdma); |
679 | return -ENOTCONN; | 678 | return -ENOTCONN; |
680 | } | 679 | } |
681 | 680 | ||
682 | static struct p9_trans_module p9_rdma_trans = { | 681 | static struct p9_trans_module p9_rdma_trans = { |
683 | .name = "rdma", | 682 | .name = "rdma", |
684 | .maxsize = P9_RDMA_MAXSIZE, | 683 | .maxsize = P9_RDMA_MAXSIZE, |
685 | .def = 0, | 684 | .def = 0, |
686 | .owner = THIS_MODULE, | 685 | .owner = THIS_MODULE, |
687 | .create = rdma_create_trans, | 686 | .create = rdma_create_trans, |
688 | .close = rdma_close, | 687 | .close = rdma_close, |
689 | .request = rdma_request, | 688 | .request = rdma_request, |
690 | .cancel = rdma_cancel, | 689 | .cancel = rdma_cancel, |
691 | }; | 690 | }; |
692 | 691 | ||
693 | /** | 692 | /** |
694 | * p9_trans_rdma_init - Register the 9P RDMA transport driver | 693 | * p9_trans_rdma_init - Register the 9P RDMA transport driver |
695 | */ | 694 | */ |
696 | static int __init p9_trans_rdma_init(void) | 695 | static int __init p9_trans_rdma_init(void) |
697 | { | 696 | { |
698 | v9fs_register_trans(&p9_rdma_trans); | 697 | v9fs_register_trans(&p9_rdma_trans); |
699 | return 0; | 698 | return 0; |
700 | } | 699 | } |
701 | 700 | ||
702 | static void __exit p9_trans_rdma_exit(void) | 701 | static void __exit p9_trans_rdma_exit(void) |
703 | { | 702 | { |
704 | v9fs_unregister_trans(&p9_rdma_trans); | 703 | v9fs_unregister_trans(&p9_rdma_trans); |
705 | } | 704 | } |
706 | 705 | ||
707 | module_init(p9_trans_rdma_init); | 706 | module_init(p9_trans_rdma_init); |
708 | module_exit(p9_trans_rdma_exit); | 707 | module_exit(p9_trans_rdma_exit); |
709 | 708 | ||
710 | MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); | 709 | MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); |
711 | MODULE_DESCRIPTION("RDMA Transport for 9P"); | 710 | MODULE_DESCRIPTION("RDMA Transport for 9P"); |
712 | MODULE_LICENSE("Dual BSD/GPL"); | 711 | MODULE_LICENSE("Dual BSD/GPL"); |
713 | 712 |
net/socket.c
1 | /* | 1 | /* |
2 | * NET An implementation of the SOCKET network access protocol. | 2 | * NET An implementation of the SOCKET network access protocol. |
3 | * | 3 | * |
4 | * Version: @(#)socket.c 1.1.93 18/02/95 | 4 | * Version: @(#)socket.c 1.1.93 18/02/95 |
5 | * | 5 | * |
6 | * Authors: Orest Zborowski, <obz@Kodak.COM> | 6 | * Authors: Orest Zborowski, <obz@Kodak.COM> |
7 | * Ross Biro | 7 | * Ross Biro |
8 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 8 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
9 | * | 9 | * |
10 | * Fixes: | 10 | * Fixes: |
11 | * Anonymous : NOTSOCK/BADF cleanup. Error fix in | 11 | * Anonymous : NOTSOCK/BADF cleanup. Error fix in |
12 | * shutdown() | 12 | * shutdown() |
13 | * Alan Cox : verify_area() fixes | 13 | * Alan Cox : verify_area() fixes |
14 | * Alan Cox : Removed DDI | 14 | * Alan Cox : Removed DDI |
15 | * Jonathan Kamens : SOCK_DGRAM reconnect bug | 15 | * Jonathan Kamens : SOCK_DGRAM reconnect bug |
16 | * Alan Cox : Moved a load of checks to the very | 16 | * Alan Cox : Moved a load of checks to the very |
17 | * top level. | 17 | * top level. |
18 | * Alan Cox : Move address structures to/from user | 18 | * Alan Cox : Move address structures to/from user |
19 | * mode above the protocol layers. | 19 | * mode above the protocol layers. |
20 | * Rob Janssen : Allow 0 length sends. | 20 | * Rob Janssen : Allow 0 length sends. |
21 | * Alan Cox : Asynchronous I/O support (cribbed from the | 21 | * Alan Cox : Asynchronous I/O support (cribbed from the |
22 | * tty drivers). | 22 | * tty drivers). |
23 | * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) | 23 | * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) |
24 | * Jeff Uphoff : Made max number of sockets command-line | 24 | * Jeff Uphoff : Made max number of sockets command-line |
25 | * configurable. | 25 | * configurable. |
26 | * Matti Aarnio : Made the number of sockets dynamic, | 26 | * Matti Aarnio : Made the number of sockets dynamic, |
27 | * to be allocated when needed, and mr. | 27 | * to be allocated when needed, and mr. |
28 | * Uphoff's max is used as max to be | 28 | * Uphoff's max is used as max to be |
29 | * allowed to allocate. | 29 | * allowed to allocate. |
30 | * Linus : Argh. removed all the socket allocation | 30 | * Linus : Argh. removed all the socket allocation |
31 | * altogether: it's in the inode now. | 31 | * altogether: it's in the inode now. |
32 | * Alan Cox : Made sock_alloc()/sock_release() public | 32 | * Alan Cox : Made sock_alloc()/sock_release() public |
33 | * for NetROM and future kernel nfsd type | 33 | * for NetROM and future kernel nfsd type |
34 | * stuff. | 34 | * stuff. |
35 | * Alan Cox : sendmsg/recvmsg basics. | 35 | * Alan Cox : sendmsg/recvmsg basics. |
36 | * Tom Dyas : Export net symbols. | 36 | * Tom Dyas : Export net symbols. |
37 | * Marcin Dalecki : Fixed problems with CONFIG_NET="n". | 37 | * Marcin Dalecki : Fixed problems with CONFIG_NET="n". |
38 | * Alan Cox : Added thread locking to sys_* calls | 38 | * Alan Cox : Added thread locking to sys_* calls |
39 | * for sockets. May have errors at the | 39 | * for sockets. May have errors at the |
40 | * moment. | 40 | * moment. |
41 | * Kevin Buhr : Fixed the dumb errors in the above. | 41 | * Kevin Buhr : Fixed the dumb errors in the above. |
42 | * Andi Kleen : Some small cleanups, optimizations, | 42 | * Andi Kleen : Some small cleanups, optimizations, |
43 | * and fixed a copy_from_user() bug. | 43 | * and fixed a copy_from_user() bug. |
44 | * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) | 44 | * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) |
45 | * Tigran Aivazian : Made listen(2) backlog sanity checks | 45 | * Tigran Aivazian : Made listen(2) backlog sanity checks |
46 | * protocol-independent | 46 | * protocol-independent |
47 | * | 47 | * |
48 | * | 48 | * |
49 | * This program is free software; you can redistribute it and/or | 49 | * This program is free software; you can redistribute it and/or |
50 | * modify it under the terms of the GNU General Public License | 50 | * modify it under the terms of the GNU General Public License |
51 | * as published by the Free Software Foundation; either version | 51 | * as published by the Free Software Foundation; either version |
52 | * 2 of the License, or (at your option) any later version. | 52 | * 2 of the License, or (at your option) any later version. |
53 | * | 53 | * |
54 | * | 54 | * |
55 | * This module is effectively the top level interface to the BSD socket | 55 | * This module is effectively the top level interface to the BSD socket |
56 | * paradigm. | 56 | * paradigm. |
57 | * | 57 | * |
58 | * Based upon Swansea University Computer Society NET3.039 | 58 | * Based upon Swansea University Computer Society NET3.039 |
59 | */ | 59 | */ |
60 | 60 | ||
61 | #include <linux/mm.h> | 61 | #include <linux/mm.h> |
62 | #include <linux/socket.h> | 62 | #include <linux/socket.h> |
63 | #include <linux/file.h> | 63 | #include <linux/file.h> |
64 | #include <linux/net.h> | 64 | #include <linux/net.h> |
65 | #include <linux/interrupt.h> | 65 | #include <linux/interrupt.h> |
66 | #include <linux/thread_info.h> | 66 | #include <linux/thread_info.h> |
67 | #include <linux/rcupdate.h> | 67 | #include <linux/rcupdate.h> |
68 | #include <linux/netdevice.h> | 68 | #include <linux/netdevice.h> |
69 | #include <linux/proc_fs.h> | 69 | #include <linux/proc_fs.h> |
70 | #include <linux/seq_file.h> | 70 | #include <linux/seq_file.h> |
71 | #include <linux/mutex.h> | 71 | #include <linux/mutex.h> |
72 | #include <linux/thread_info.h> | ||
73 | #include <linux/wanrouter.h> | 72 | #include <linux/wanrouter.h> |
74 | #include <linux/if_bridge.h> | 73 | #include <linux/if_bridge.h> |
75 | #include <linux/if_frad.h> | 74 | #include <linux/if_frad.h> |
76 | #include <linux/if_vlan.h> | 75 | #include <linux/if_vlan.h> |
77 | #include <linux/init.h> | 76 | #include <linux/init.h> |
78 | #include <linux/poll.h> | 77 | #include <linux/poll.h> |
79 | #include <linux/cache.h> | 78 | #include <linux/cache.h> |
80 | #include <linux/module.h> | 79 | #include <linux/module.h> |
81 | #include <linux/highmem.h> | 80 | #include <linux/highmem.h> |
82 | #include <linux/mount.h> | 81 | #include <linux/mount.h> |
83 | #include <linux/security.h> | 82 | #include <linux/security.h> |
84 | #include <linux/syscalls.h> | 83 | #include <linux/syscalls.h> |
85 | #include <linux/compat.h> | 84 | #include <linux/compat.h> |
86 | #include <linux/kmod.h> | 85 | #include <linux/kmod.h> |
87 | #include <linux/audit.h> | 86 | #include <linux/audit.h> |
88 | #include <linux/wireless.h> | 87 | #include <linux/wireless.h> |
89 | #include <linux/nsproxy.h> | 88 | #include <linux/nsproxy.h> |
90 | 89 | ||
91 | #include <asm/uaccess.h> | 90 | #include <asm/uaccess.h> |
92 | #include <asm/unistd.h> | 91 | #include <asm/unistd.h> |
93 | 92 | ||
94 | #include <net/compat.h> | 93 | #include <net/compat.h> |
95 | #include <net/wext.h> | 94 | #include <net/wext.h> |
96 | 95 | ||
97 | #include <net/sock.h> | 96 | #include <net/sock.h> |
98 | #include <linux/netfilter.h> | 97 | #include <linux/netfilter.h> |
99 | 98 | ||
100 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare); | 99 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare); |
101 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, | 100 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, |
102 | unsigned long nr_segs, loff_t pos); | 101 | unsigned long nr_segs, loff_t pos); |
103 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, | 102 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, |
104 | unsigned long nr_segs, loff_t pos); | 103 | unsigned long nr_segs, loff_t pos); |
105 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); | 104 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); |
106 | 105 | ||
107 | static int sock_close(struct inode *inode, struct file *file); | 106 | static int sock_close(struct inode *inode, struct file *file); |
108 | static unsigned int sock_poll(struct file *file, | 107 | static unsigned int sock_poll(struct file *file, |
109 | struct poll_table_struct *wait); | 108 | struct poll_table_struct *wait); |
110 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 109 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
111 | #ifdef CONFIG_COMPAT | 110 | #ifdef CONFIG_COMPAT |
112 | static long compat_sock_ioctl(struct file *file, | 111 | static long compat_sock_ioctl(struct file *file, |
113 | unsigned int cmd, unsigned long arg); | 112 | unsigned int cmd, unsigned long arg); |
114 | #endif | 113 | #endif |
115 | static int sock_fasync(int fd, struct file *filp, int on); | 114 | static int sock_fasync(int fd, struct file *filp, int on); |
116 | static ssize_t sock_sendpage(struct file *file, struct page *page, | 115 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
117 | int offset, size_t size, loff_t *ppos, int more); | 116 | int offset, size_t size, loff_t *ppos, int more); |
118 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, | 117 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, |
119 | struct pipe_inode_info *pipe, size_t len, | 118 | struct pipe_inode_info *pipe, size_t len, |
120 | unsigned int flags); | 119 | unsigned int flags); |
121 | 120 | ||
122 | /* | 121 | /* |
123 | * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear | 122 | * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear |
124 | * in the operation structures but are done directly via the socketcall() multiplexor. | 123 | * in the operation structures but are done directly via the socketcall() multiplexor. |
125 | */ | 124 | */ |
126 | 125 | ||
127 | static const struct file_operations socket_file_ops = { | 126 | static const struct file_operations socket_file_ops = { |
128 | .owner = THIS_MODULE, | 127 | .owner = THIS_MODULE, |
129 | .llseek = no_llseek, | 128 | .llseek = no_llseek, |
130 | .aio_read = sock_aio_read, | 129 | .aio_read = sock_aio_read, |
131 | .aio_write = sock_aio_write, | 130 | .aio_write = sock_aio_write, |
132 | .poll = sock_poll, | 131 | .poll = sock_poll, |
133 | .unlocked_ioctl = sock_ioctl, | 132 | .unlocked_ioctl = sock_ioctl, |
134 | #ifdef CONFIG_COMPAT | 133 | #ifdef CONFIG_COMPAT |
135 | .compat_ioctl = compat_sock_ioctl, | 134 | .compat_ioctl = compat_sock_ioctl, |
136 | #endif | 135 | #endif |
137 | .mmap = sock_mmap, | 136 | .mmap = sock_mmap, |
138 | .open = sock_no_open, /* special open code to disallow open via /proc */ | 137 | .open = sock_no_open, /* special open code to disallow open via /proc */ |
139 | .release = sock_close, | 138 | .release = sock_close, |
140 | .fasync = sock_fasync, | 139 | .fasync = sock_fasync, |
141 | .sendpage = sock_sendpage, | 140 | .sendpage = sock_sendpage, |
142 | .splice_write = generic_splice_sendpage, | 141 | .splice_write = generic_splice_sendpage, |
143 | .splice_read = sock_splice_read, | 142 | .splice_read = sock_splice_read, |
144 | }; | 143 | }; |
145 | 144 | ||
146 | /* | 145 | /* |
147 | * The protocol list. Each protocol is registered in here. | 146 | * The protocol list. Each protocol is registered in here. |
148 | */ | 147 | */ |
149 | 148 | ||
150 | static DEFINE_SPINLOCK(net_family_lock); | 149 | static DEFINE_SPINLOCK(net_family_lock); |
151 | static const struct net_proto_family *net_families[NPROTO] __read_mostly; | 150 | static const struct net_proto_family *net_families[NPROTO] __read_mostly; |
152 | 151 | ||
153 | /* | 152 | /* |
154 | * Statistics counters of the socket lists | 153 | * Statistics counters of the socket lists |
155 | */ | 154 | */ |
156 | 155 | ||
157 | static DEFINE_PER_CPU(int, sockets_in_use) = 0; | 156 | static DEFINE_PER_CPU(int, sockets_in_use) = 0; |
158 | 157 | ||
159 | /* | 158 | /* |
160 | * Support routines. | 159 | * Support routines. |
161 | * Move socket addresses back and forth across the kernel/user | 160 | * Move socket addresses back and forth across the kernel/user |
162 | * divide and look after the messy bits. | 161 | * divide and look after the messy bits. |
163 | */ | 162 | */ |
164 | 163 | ||
165 | #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - | 164 | #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - |
166 | 16 for IP, 16 for IPX, | 165 | 16 for IP, 16 for IPX, |
167 | 24 for IPv6, | 166 | 24 for IPv6, |
168 | about 80 for AX.25 | 167 | about 80 for AX.25 |
169 | must be at least one bigger than | 168 | must be at least one bigger than |
170 | the AF_UNIX size (see net/unix/af_unix.c | 169 | the AF_UNIX size (see net/unix/af_unix.c |
171 | :unix_mkname()). | 170 | :unix_mkname()). |
172 | */ | 171 | */ |
173 | 172 | ||
174 | /** | 173 | /** |
175 | * move_addr_to_kernel - copy a socket address into kernel space | 174 | * move_addr_to_kernel - copy a socket address into kernel space |
176 | * @uaddr: Address in user space | 175 | * @uaddr: Address in user space |
177 | * @kaddr: Address in kernel space | 176 | * @kaddr: Address in kernel space |
178 | * @ulen: Length in user space | 177 | * @ulen: Length in user space |
179 | * | 178 | * |
180 | * The address is copied into kernel space. If the provided address is | 179 | * The address is copied into kernel space. If the provided address is |
181 | * too long an error code of -EINVAL is returned. If the copy gives | 180 | * too long an error code of -EINVAL is returned. If the copy gives |
182 | * invalid addresses -EFAULT is returned. On a success 0 is returned. | 181 | * invalid addresses -EFAULT is returned. On a success 0 is returned. |
183 | */ | 182 | */ |
184 | 183 | ||
185 | int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) | 184 | int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) |
186 | { | 185 | { |
187 | if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) | 186 | if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) |
188 | return -EINVAL; | 187 | return -EINVAL; |
189 | if (ulen == 0) | 188 | if (ulen == 0) |
190 | return 0; | 189 | return 0; |
191 | if (copy_from_user(kaddr, uaddr, ulen)) | 190 | if (copy_from_user(kaddr, uaddr, ulen)) |
192 | return -EFAULT; | 191 | return -EFAULT; |
193 | return audit_sockaddr(ulen, kaddr); | 192 | return audit_sockaddr(ulen, kaddr); |
194 | } | 193 | } |
195 | 194 | ||
196 | /** | 195 | /** |
197 | * move_addr_to_user - copy an address to user space | 196 | * move_addr_to_user - copy an address to user space |
198 | * @kaddr: kernel space address | 197 | * @kaddr: kernel space address |
199 | * @klen: length of address in kernel | 198 | * @klen: length of address in kernel |
200 | * @uaddr: user space address | 199 | * @uaddr: user space address |
201 | * @ulen: pointer to user length field | 200 | * @ulen: pointer to user length field |
202 | * | 201 | * |
203 | * The value pointed to by ulen on entry is the buffer length available. | 202 | * The value pointed to by ulen on entry is the buffer length available. |
204 | * This is overwritten with the buffer space used. -EINVAL is returned | 203 | * This is overwritten with the buffer space used. -EINVAL is returned |
205 | * if an overlong buffer is specified or a negative buffer size. -EFAULT | 204 | * if an overlong buffer is specified or a negative buffer size. -EFAULT |
206 | * is returned if either the buffer or the length field are not | 205 | * is returned if either the buffer or the length field are not |
207 | * accessible. | 206 | * accessible. |
208 | * After copying the data up to the limit the user specifies, the true | 207 | * After copying the data up to the limit the user specifies, the true |
209 | * length of the data is written over the length limit the user | 208 | * length of the data is written over the length limit the user |
210 | * specified. Zero is returned for a success. | 209 | * specified. Zero is returned for a success. |
211 | */ | 210 | */ |
212 | 211 | ||
213 | int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, | 212 | int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, |
214 | int __user *ulen) | 213 | int __user *ulen) |
215 | { | 214 | { |
216 | int err; | 215 | int err; |
217 | int len; | 216 | int len; |
218 | 217 | ||
219 | err = get_user(len, ulen); | 218 | err = get_user(len, ulen); |
220 | if (err) | 219 | if (err) |
221 | return err; | 220 | return err; |
222 | if (len > klen) | 221 | if (len > klen) |
223 | len = klen; | 222 | len = klen; |
224 | if (len < 0 || len > sizeof(struct sockaddr_storage)) | 223 | if (len < 0 || len > sizeof(struct sockaddr_storage)) |
225 | return -EINVAL; | 224 | return -EINVAL; |
226 | if (len) { | 225 | if (len) { |
227 | if (audit_sockaddr(klen, kaddr)) | 226 | if (audit_sockaddr(klen, kaddr)) |
228 | return -ENOMEM; | 227 | return -ENOMEM; |
229 | if (copy_to_user(uaddr, kaddr, len)) | 228 | if (copy_to_user(uaddr, kaddr, len)) |
230 | return -EFAULT; | 229 | return -EFAULT; |
231 | } | 230 | } |
232 | /* | 231 | /* |
233 | * "fromlen shall refer to the value before truncation.." | 232 | * "fromlen shall refer to the value before truncation.." |
234 | * 1003.1g | 233 | * 1003.1g |
235 | */ | 234 | */ |
236 | return __put_user(klen, ulen); | 235 | return __put_user(klen, ulen); |
237 | } | 236 | } |
238 | 237 | ||
239 | #define SOCKFS_MAGIC 0x534F434B | 238 | #define SOCKFS_MAGIC 0x534F434B |
240 | 239 | ||
241 | static struct kmem_cache *sock_inode_cachep __read_mostly; | 240 | static struct kmem_cache *sock_inode_cachep __read_mostly; |
242 | 241 | ||
243 | static struct inode *sock_alloc_inode(struct super_block *sb) | 242 | static struct inode *sock_alloc_inode(struct super_block *sb) |
244 | { | 243 | { |
245 | struct socket_alloc *ei; | 244 | struct socket_alloc *ei; |
246 | 245 | ||
247 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); | 246 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); |
248 | if (!ei) | 247 | if (!ei) |
249 | return NULL; | 248 | return NULL; |
250 | init_waitqueue_head(&ei->socket.wait); | 249 | init_waitqueue_head(&ei->socket.wait); |
251 | 250 | ||
252 | ei->socket.fasync_list = NULL; | 251 | ei->socket.fasync_list = NULL; |
253 | ei->socket.state = SS_UNCONNECTED; | 252 | ei->socket.state = SS_UNCONNECTED; |
254 | ei->socket.flags = 0; | 253 | ei->socket.flags = 0; |
255 | ei->socket.ops = NULL; | 254 | ei->socket.ops = NULL; |
256 | ei->socket.sk = NULL; | 255 | ei->socket.sk = NULL; |
257 | ei->socket.file = NULL; | 256 | ei->socket.file = NULL; |
258 | 257 | ||
259 | return &ei->vfs_inode; | 258 | return &ei->vfs_inode; |
260 | } | 259 | } |
261 | 260 | ||
262 | static void sock_destroy_inode(struct inode *inode) | 261 | static void sock_destroy_inode(struct inode *inode) |
263 | { | 262 | { |
264 | kmem_cache_free(sock_inode_cachep, | 263 | kmem_cache_free(sock_inode_cachep, |
265 | container_of(inode, struct socket_alloc, vfs_inode)); | 264 | container_of(inode, struct socket_alloc, vfs_inode)); |
266 | } | 265 | } |
267 | 266 | ||
268 | static void init_once(void *foo) | 267 | static void init_once(void *foo) |
269 | { | 268 | { |
270 | struct socket_alloc *ei = (struct socket_alloc *)foo; | 269 | struct socket_alloc *ei = (struct socket_alloc *)foo; |
271 | 270 | ||
272 | inode_init_once(&ei->vfs_inode); | 271 | inode_init_once(&ei->vfs_inode); |
273 | } | 272 | } |
274 | 273 | ||
275 | static int init_inodecache(void) | 274 | static int init_inodecache(void) |
276 | { | 275 | { |
277 | sock_inode_cachep = kmem_cache_create("sock_inode_cache", | 276 | sock_inode_cachep = kmem_cache_create("sock_inode_cache", |
278 | sizeof(struct socket_alloc), | 277 | sizeof(struct socket_alloc), |
279 | 0, | 278 | 0, |
280 | (SLAB_HWCACHE_ALIGN | | 279 | (SLAB_HWCACHE_ALIGN | |
281 | SLAB_RECLAIM_ACCOUNT | | 280 | SLAB_RECLAIM_ACCOUNT | |
282 | SLAB_MEM_SPREAD), | 281 | SLAB_MEM_SPREAD), |
283 | init_once); | 282 | init_once); |
284 | if (sock_inode_cachep == NULL) | 283 | if (sock_inode_cachep == NULL) |
285 | return -ENOMEM; | 284 | return -ENOMEM; |
286 | return 0; | 285 | return 0; |
287 | } | 286 | } |
288 | 287 | ||
289 | static struct super_operations sockfs_ops = { | 288 | static struct super_operations sockfs_ops = { |
290 | .alloc_inode = sock_alloc_inode, | 289 | .alloc_inode = sock_alloc_inode, |
291 | .destroy_inode =sock_destroy_inode, | 290 | .destroy_inode =sock_destroy_inode, |
292 | .statfs = simple_statfs, | 291 | .statfs = simple_statfs, |
293 | }; | 292 | }; |
294 | 293 | ||
295 | static int sockfs_get_sb(struct file_system_type *fs_type, | 294 | static int sockfs_get_sb(struct file_system_type *fs_type, |
296 | int flags, const char *dev_name, void *data, | 295 | int flags, const char *dev_name, void *data, |
297 | struct vfsmount *mnt) | 296 | struct vfsmount *mnt) |
298 | { | 297 | { |
299 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, | 298 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, |
300 | mnt); | 299 | mnt); |
301 | } | 300 | } |
302 | 301 | ||
303 | static struct vfsmount *sock_mnt __read_mostly; | 302 | static struct vfsmount *sock_mnt __read_mostly; |
304 | 303 | ||
305 | static struct file_system_type sock_fs_type = { | 304 | static struct file_system_type sock_fs_type = { |
306 | .name = "sockfs", | 305 | .name = "sockfs", |
307 | .get_sb = sockfs_get_sb, | 306 | .get_sb = sockfs_get_sb, |
308 | .kill_sb = kill_anon_super, | 307 | .kill_sb = kill_anon_super, |
309 | }; | 308 | }; |
310 | 309 | ||
311 | static int sockfs_delete_dentry(struct dentry *dentry) | 310 | static int sockfs_delete_dentry(struct dentry *dentry) |
312 | { | 311 | { |
313 | /* | 312 | /* |
314 | * At creation time, we pretended this dentry was hashed | 313 | * At creation time, we pretended this dentry was hashed |
315 | * (by clearing DCACHE_UNHASHED bit in d_flags) | 314 | * (by clearing DCACHE_UNHASHED bit in d_flags) |
316 | * At delete time, we restore the truth : not hashed. | 315 | * At delete time, we restore the truth : not hashed. |
317 | * (so that dput() can proceed correctly) | 316 | * (so that dput() can proceed correctly) |
318 | */ | 317 | */ |
319 | dentry->d_flags |= DCACHE_UNHASHED; | 318 | dentry->d_flags |= DCACHE_UNHASHED; |
320 | return 0; | 319 | return 0; |
321 | } | 320 | } |
322 | 321 | ||
323 | /* | 322 | /* |
324 | * sockfs_dname() is called from d_path(). | 323 | * sockfs_dname() is called from d_path(). |
325 | */ | 324 | */ |
326 | static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) | 325 | static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) |
327 | { | 326 | { |
328 | return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", | 327 | return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", |
329 | dentry->d_inode->i_ino); | 328 | dentry->d_inode->i_ino); |
330 | } | 329 | } |
331 | 330 | ||
332 | static struct dentry_operations sockfs_dentry_operations = { | 331 | static struct dentry_operations sockfs_dentry_operations = { |
333 | .d_delete = sockfs_delete_dentry, | 332 | .d_delete = sockfs_delete_dentry, |
334 | .d_dname = sockfs_dname, | 333 | .d_dname = sockfs_dname, |
335 | }; | 334 | }; |
336 | 335 | ||
337 | /* | 336 | /* |
338 | * Obtains the first available file descriptor and sets it up for use. | 337 | * Obtains the first available file descriptor and sets it up for use. |
339 | * | 338 | * |
340 | * These functions create file structures and maps them to fd space | 339 | * These functions create file structures and maps them to fd space |
341 | * of the current process. On success it returns file descriptor | 340 | * of the current process. On success it returns file descriptor |
342 | * and file struct implicitly stored in sock->file. | 341 | * and file struct implicitly stored in sock->file. |
343 | * Note that another thread may close file descriptor before we return | 342 | * Note that another thread may close file descriptor before we return |
344 | * from this function. We use the fact that now we do not refer | 343 | * from this function. We use the fact that now we do not refer |
345 | * to socket after mapping. If one day we will need it, this | 344 | * to socket after mapping. If one day we will need it, this |
346 | * function will increment ref. count on file by 1. | 345 | * function will increment ref. count on file by 1. |
347 | * | 346 | * |
348 | * In any case returned fd MAY BE not valid! | 347 | * In any case returned fd MAY BE not valid! |
349 | * This race condition is unavoidable | 348 | * This race condition is unavoidable |
350 | * with shared fd spaces, we cannot solve it inside kernel, | 349 | * with shared fd spaces, we cannot solve it inside kernel, |
351 | * but we take care of internal coherence yet. | 350 | * but we take care of internal coherence yet. |
352 | */ | 351 | */ |
353 | 352 | ||
354 | static int sock_alloc_fd(struct file **filep, int flags) | 353 | static int sock_alloc_fd(struct file **filep, int flags) |
355 | { | 354 | { |
356 | int fd; | 355 | int fd; |
357 | 356 | ||
358 | fd = get_unused_fd_flags(flags); | 357 | fd = get_unused_fd_flags(flags); |
359 | if (likely(fd >= 0)) { | 358 | if (likely(fd >= 0)) { |
360 | struct file *file = get_empty_filp(); | 359 | struct file *file = get_empty_filp(); |
361 | 360 | ||
362 | *filep = file; | 361 | *filep = file; |
363 | if (unlikely(!file)) { | 362 | if (unlikely(!file)) { |
364 | put_unused_fd(fd); | 363 | put_unused_fd(fd); |
365 | return -ENFILE; | 364 | return -ENFILE; |
366 | } | 365 | } |
367 | } else | 366 | } else |
368 | *filep = NULL; | 367 | *filep = NULL; |
369 | return fd; | 368 | return fd; |
370 | } | 369 | } |
371 | 370 | ||
372 | static int sock_attach_fd(struct socket *sock, struct file *file, int flags) | 371 | static int sock_attach_fd(struct socket *sock, struct file *file, int flags) |
373 | { | 372 | { |
374 | struct dentry *dentry; | 373 | struct dentry *dentry; |
375 | struct qstr name = { .name = "" }; | 374 | struct qstr name = { .name = "" }; |
376 | 375 | ||
377 | dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); | 376 | dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); |
378 | if (unlikely(!dentry)) | 377 | if (unlikely(!dentry)) |
379 | return -ENOMEM; | 378 | return -ENOMEM; |
380 | 379 | ||
381 | dentry->d_op = &sockfs_dentry_operations; | 380 | dentry->d_op = &sockfs_dentry_operations; |
382 | /* | 381 | /* |
383 | * We dont want to push this dentry into global dentry hash table. | 382 | * We dont want to push this dentry into global dentry hash table. |
384 | * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED | 383 | * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED |
385 | * This permits a working /proc/$pid/fd/XXX on sockets | 384 | * This permits a working /proc/$pid/fd/XXX on sockets |
386 | */ | 385 | */ |
387 | dentry->d_flags &= ~DCACHE_UNHASHED; | 386 | dentry->d_flags &= ~DCACHE_UNHASHED; |
388 | d_instantiate(dentry, SOCK_INODE(sock)); | 387 | d_instantiate(dentry, SOCK_INODE(sock)); |
389 | 388 | ||
390 | sock->file = file; | 389 | sock->file = file; |
391 | init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, | 390 | init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, |
392 | &socket_file_ops); | 391 | &socket_file_ops); |
393 | SOCK_INODE(sock)->i_fop = &socket_file_ops; | 392 | SOCK_INODE(sock)->i_fop = &socket_file_ops; |
394 | file->f_flags = O_RDWR | (flags & O_NONBLOCK); | 393 | file->f_flags = O_RDWR | (flags & O_NONBLOCK); |
395 | file->f_pos = 0; | 394 | file->f_pos = 0; |
396 | file->private_data = sock; | 395 | file->private_data = sock; |
397 | 396 | ||
398 | return 0; | 397 | return 0; |
399 | } | 398 | } |
400 | 399 | ||
401 | int sock_map_fd(struct socket *sock, int flags) | 400 | int sock_map_fd(struct socket *sock, int flags) |
402 | { | 401 | { |
403 | struct file *newfile; | 402 | struct file *newfile; |
404 | int fd = sock_alloc_fd(&newfile, flags); | 403 | int fd = sock_alloc_fd(&newfile, flags); |
405 | 404 | ||
406 | if (likely(fd >= 0)) { | 405 | if (likely(fd >= 0)) { |
407 | int err = sock_attach_fd(sock, newfile, flags); | 406 | int err = sock_attach_fd(sock, newfile, flags); |
408 | 407 | ||
409 | if (unlikely(err < 0)) { | 408 | if (unlikely(err < 0)) { |
410 | put_filp(newfile); | 409 | put_filp(newfile); |
411 | put_unused_fd(fd); | 410 | put_unused_fd(fd); |
412 | return err; | 411 | return err; |
413 | } | 412 | } |
414 | fd_install(fd, newfile); | 413 | fd_install(fd, newfile); |
415 | } | 414 | } |
416 | return fd; | 415 | return fd; |
417 | } | 416 | } |
418 | 417 | ||
419 | static struct socket *sock_from_file(struct file *file, int *err) | 418 | static struct socket *sock_from_file(struct file *file, int *err) |
420 | { | 419 | { |
421 | if (file->f_op == &socket_file_ops) | 420 | if (file->f_op == &socket_file_ops) |
422 | return file->private_data; /* set in sock_map_fd */ | 421 | return file->private_data; /* set in sock_map_fd */ |
423 | 422 | ||
424 | *err = -ENOTSOCK; | 423 | *err = -ENOTSOCK; |
425 | return NULL; | 424 | return NULL; |
426 | } | 425 | } |
427 | 426 | ||
428 | /** | 427 | /** |
429 | * sockfd_lookup - Go from a file number to its socket slot | 428 | * sockfd_lookup - Go from a file number to its socket slot |
430 | * @fd: file handle | 429 | * @fd: file handle |
431 | * @err: pointer to an error code return | 430 | * @err: pointer to an error code return |
432 | * | 431 | * |
433 | * The file handle passed in is locked and the socket it is bound | 432 | * The file handle passed in is locked and the socket it is bound |
434 | * too is returned. If an error occurs the err pointer is overwritten | 433 | * too is returned. If an error occurs the err pointer is overwritten |
435 | * with a negative errno code and NULL is returned. The function checks | 434 | * with a negative errno code and NULL is returned. The function checks |
436 | * for both invalid handles and passing a handle which is not a socket. | 435 | * for both invalid handles and passing a handle which is not a socket. |
437 | * | 436 | * |
438 | * On a success the socket object pointer is returned. | 437 | * On a success the socket object pointer is returned. |
439 | */ | 438 | */ |
440 | 439 | ||
441 | struct socket *sockfd_lookup(int fd, int *err) | 440 | struct socket *sockfd_lookup(int fd, int *err) |
442 | { | 441 | { |
443 | struct file *file; | 442 | struct file *file; |
444 | struct socket *sock; | 443 | struct socket *sock; |
445 | 444 | ||
446 | file = fget(fd); | 445 | file = fget(fd); |
447 | if (!file) { | 446 | if (!file) { |
448 | *err = -EBADF; | 447 | *err = -EBADF; |
449 | return NULL; | 448 | return NULL; |
450 | } | 449 | } |
451 | 450 | ||
452 | sock = sock_from_file(file, err); | 451 | sock = sock_from_file(file, err); |
453 | if (!sock) | 452 | if (!sock) |
454 | fput(file); | 453 | fput(file); |
455 | return sock; | 454 | return sock; |
456 | } | 455 | } |
457 | 456 | ||
458 | static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) | 457 | static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) |
459 | { | 458 | { |
460 | struct file *file; | 459 | struct file *file; |
461 | struct socket *sock; | 460 | struct socket *sock; |
462 | 461 | ||
463 | *err = -EBADF; | 462 | *err = -EBADF; |
464 | file = fget_light(fd, fput_needed); | 463 | file = fget_light(fd, fput_needed); |
465 | if (file) { | 464 | if (file) { |
466 | sock = sock_from_file(file, err); | 465 | sock = sock_from_file(file, err); |
467 | if (sock) | 466 | if (sock) |
468 | return sock; | 467 | return sock; |
469 | fput_light(file, *fput_needed); | 468 | fput_light(file, *fput_needed); |
470 | } | 469 | } |
471 | return NULL; | 470 | return NULL; |
472 | } | 471 | } |
473 | 472 | ||
474 | /** | 473 | /** |
475 | * sock_alloc - allocate a socket | 474 | * sock_alloc - allocate a socket |
476 | * | 475 | * |
477 | * Allocate a new inode and socket object. The two are bound together | 476 | * Allocate a new inode and socket object. The two are bound together |
478 | * and initialised. The socket is then returned. If we are out of inodes | 477 | * and initialised. The socket is then returned. If we are out of inodes |
479 | * NULL is returned. | 478 | * NULL is returned. |
480 | */ | 479 | */ |
481 | 480 | ||
482 | static struct socket *sock_alloc(void) | 481 | static struct socket *sock_alloc(void) |
483 | { | 482 | { |
484 | struct inode *inode; | 483 | struct inode *inode; |
485 | struct socket *sock; | 484 | struct socket *sock; |
486 | 485 | ||
487 | inode = new_inode(sock_mnt->mnt_sb); | 486 | inode = new_inode(sock_mnt->mnt_sb); |
488 | if (!inode) | 487 | if (!inode) |
489 | return NULL; | 488 | return NULL; |
490 | 489 | ||
491 | sock = SOCKET_I(inode); | 490 | sock = SOCKET_I(inode); |
492 | 491 | ||
493 | inode->i_mode = S_IFSOCK | S_IRWXUGO; | 492 | inode->i_mode = S_IFSOCK | S_IRWXUGO; |
494 | inode->i_uid = current->fsuid; | 493 | inode->i_uid = current->fsuid; |
495 | inode->i_gid = current->fsgid; | 494 | inode->i_gid = current->fsgid; |
496 | 495 | ||
497 | get_cpu_var(sockets_in_use)++; | 496 | get_cpu_var(sockets_in_use)++; |
498 | put_cpu_var(sockets_in_use); | 497 | put_cpu_var(sockets_in_use); |
499 | return sock; | 498 | return sock; |
500 | } | 499 | } |
501 | 500 | ||
502 | /* | 501 | /* |
503 | * In theory you can't get an open on this inode, but /proc provides | 502 | * In theory you can't get an open on this inode, but /proc provides |
504 | * a back door. Remember to keep it shut otherwise you'll let the | 503 | * a back door. Remember to keep it shut otherwise you'll let the |
505 | * creepy crawlies in. | 504 | * creepy crawlies in. |
506 | */ | 505 | */ |
507 | 506 | ||
508 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) | 507 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) |
509 | { | 508 | { |
510 | return -ENXIO; | 509 | return -ENXIO; |
511 | } | 510 | } |
512 | 511 | ||
513 | const struct file_operations bad_sock_fops = { | 512 | const struct file_operations bad_sock_fops = { |
514 | .owner = THIS_MODULE, | 513 | .owner = THIS_MODULE, |
515 | .open = sock_no_open, | 514 | .open = sock_no_open, |
516 | }; | 515 | }; |
517 | 516 | ||
518 | /** | 517 | /** |
519 | * sock_release - close a socket | 518 | * sock_release - close a socket |
520 | * @sock: socket to close | 519 | * @sock: socket to close |
521 | * | 520 | * |
522 | * The socket is released from the protocol stack if it has a release | 521 | * The socket is released from the protocol stack if it has a release |
523 | * callback, and the inode is then released if the socket is bound to | 522 | * callback, and the inode is then released if the socket is bound to |
524 | * an inode not a file. | 523 | * an inode not a file. |
525 | */ | 524 | */ |
526 | 525 | ||
527 | void sock_release(struct socket *sock) | 526 | void sock_release(struct socket *sock) |
528 | { | 527 | { |
529 | if (sock->ops) { | 528 | if (sock->ops) { |
530 | struct module *owner = sock->ops->owner; | 529 | struct module *owner = sock->ops->owner; |
531 | 530 | ||
532 | sock->ops->release(sock); | 531 | sock->ops->release(sock); |
533 | sock->ops = NULL; | 532 | sock->ops = NULL; |
534 | module_put(owner); | 533 | module_put(owner); |
535 | } | 534 | } |
536 | 535 | ||
537 | if (sock->fasync_list) | 536 | if (sock->fasync_list) |
538 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); | 537 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); |
539 | 538 | ||
540 | get_cpu_var(sockets_in_use)--; | 539 | get_cpu_var(sockets_in_use)--; |
541 | put_cpu_var(sockets_in_use); | 540 | put_cpu_var(sockets_in_use); |
542 | if (!sock->file) { | 541 | if (!sock->file) { |
543 | iput(SOCK_INODE(sock)); | 542 | iput(SOCK_INODE(sock)); |
544 | return; | 543 | return; |
545 | } | 544 | } |
546 | sock->file = NULL; | 545 | sock->file = NULL; |
547 | } | 546 | } |
548 | 547 | ||
549 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | 548 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, |
550 | struct msghdr *msg, size_t size) | 549 | struct msghdr *msg, size_t size) |
551 | { | 550 | { |
552 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 551 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
553 | int err; | 552 | int err; |
554 | 553 | ||
555 | si->sock = sock; | 554 | si->sock = sock; |
556 | si->scm = NULL; | 555 | si->scm = NULL; |
557 | si->msg = msg; | 556 | si->msg = msg; |
558 | si->size = size; | 557 | si->size = size; |
559 | 558 | ||
560 | err = security_socket_sendmsg(sock, msg, size); | 559 | err = security_socket_sendmsg(sock, msg, size); |
561 | if (err) | 560 | if (err) |
562 | return err; | 561 | return err; |
563 | 562 | ||
564 | return sock->ops->sendmsg(iocb, sock, msg, size); | 563 | return sock->ops->sendmsg(iocb, sock, msg, size); |
565 | } | 564 | } |
566 | 565 | ||
567 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) | 566 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) |
568 | { | 567 | { |
569 | struct kiocb iocb; | 568 | struct kiocb iocb; |
570 | struct sock_iocb siocb; | 569 | struct sock_iocb siocb; |
571 | int ret; | 570 | int ret; |
572 | 571 | ||
573 | init_sync_kiocb(&iocb, NULL); | 572 | init_sync_kiocb(&iocb, NULL); |
574 | iocb.private = &siocb; | 573 | iocb.private = &siocb; |
575 | ret = __sock_sendmsg(&iocb, sock, msg, size); | 574 | ret = __sock_sendmsg(&iocb, sock, msg, size); |
576 | if (-EIOCBQUEUED == ret) | 575 | if (-EIOCBQUEUED == ret) |
577 | ret = wait_on_sync_kiocb(&iocb); | 576 | ret = wait_on_sync_kiocb(&iocb); |
578 | return ret; | 577 | return ret; |
579 | } | 578 | } |
580 | 579 | ||
581 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, | 580 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, |
582 | struct kvec *vec, size_t num, size_t size) | 581 | struct kvec *vec, size_t num, size_t size) |
583 | { | 582 | { |
584 | mm_segment_t oldfs = get_fs(); | 583 | mm_segment_t oldfs = get_fs(); |
585 | int result; | 584 | int result; |
586 | 585 | ||
587 | set_fs(KERNEL_DS); | 586 | set_fs(KERNEL_DS); |
588 | /* | 587 | /* |
589 | * the following is safe, since for compiler definitions of kvec and | 588 | * the following is safe, since for compiler definitions of kvec and |
590 | * iovec are identical, yielding the same in-core layout and alignment | 589 | * iovec are identical, yielding the same in-core layout and alignment |
591 | */ | 590 | */ |
592 | msg->msg_iov = (struct iovec *)vec; | 591 | msg->msg_iov = (struct iovec *)vec; |
593 | msg->msg_iovlen = num; | 592 | msg->msg_iovlen = num; |
594 | result = sock_sendmsg(sock, msg, size); | 593 | result = sock_sendmsg(sock, msg, size); |
595 | set_fs(oldfs); | 594 | set_fs(oldfs); |
596 | return result; | 595 | return result; |
597 | } | 596 | } |
598 | 597 | ||
599 | /* | 598 | /* |
600 | * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) | 599 | * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) |
601 | */ | 600 | */ |
602 | void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, | 601 | void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, |
603 | struct sk_buff *skb) | 602 | struct sk_buff *skb) |
604 | { | 603 | { |
605 | ktime_t kt = skb->tstamp; | 604 | ktime_t kt = skb->tstamp; |
606 | 605 | ||
607 | if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { | 606 | if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { |
608 | struct timeval tv; | 607 | struct timeval tv; |
609 | /* Race occurred between timestamp enabling and packet | 608 | /* Race occurred between timestamp enabling and packet |
610 | receiving. Fill in the current time for now. */ | 609 | receiving. Fill in the current time for now. */ |
611 | if (kt.tv64 == 0) | 610 | if (kt.tv64 == 0) |
612 | kt = ktime_get_real(); | 611 | kt = ktime_get_real(); |
613 | skb->tstamp = kt; | 612 | skb->tstamp = kt; |
614 | tv = ktime_to_timeval(kt); | 613 | tv = ktime_to_timeval(kt); |
615 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); | 614 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); |
616 | } else { | 615 | } else { |
617 | struct timespec ts; | 616 | struct timespec ts; |
618 | /* Race occurred between timestamp enabling and packet | 617 | /* Race occurred between timestamp enabling and packet |
619 | receiving. Fill in the current time for now. */ | 618 | receiving. Fill in the current time for now. */ |
620 | if (kt.tv64 == 0) | 619 | if (kt.tv64 == 0) |
621 | kt = ktime_get_real(); | 620 | kt = ktime_get_real(); |
622 | skb->tstamp = kt; | 621 | skb->tstamp = kt; |
623 | ts = ktime_to_timespec(kt); | 622 | ts = ktime_to_timespec(kt); |
624 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); | 623 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); |
625 | } | 624 | } |
626 | } | 625 | } |
627 | 626 | ||
628 | EXPORT_SYMBOL_GPL(__sock_recv_timestamp); | 627 | EXPORT_SYMBOL_GPL(__sock_recv_timestamp); |
629 | 628 | ||
630 | static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, | 629 | static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, |
631 | struct msghdr *msg, size_t size, int flags) | 630 | struct msghdr *msg, size_t size, int flags) |
632 | { | 631 | { |
633 | int err; | 632 | int err; |
634 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 633 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
635 | 634 | ||
636 | si->sock = sock; | 635 | si->sock = sock; |
637 | si->scm = NULL; | 636 | si->scm = NULL; |
638 | si->msg = msg; | 637 | si->msg = msg; |
639 | si->size = size; | 638 | si->size = size; |
640 | si->flags = flags; | 639 | si->flags = flags; |
641 | 640 | ||
642 | err = security_socket_recvmsg(sock, msg, size, flags); | 641 | err = security_socket_recvmsg(sock, msg, size, flags); |
643 | if (err) | 642 | if (err) |
644 | return err; | 643 | return err; |
645 | 644 | ||
646 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); | 645 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); |
647 | } | 646 | } |
648 | 647 | ||
649 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, | 648 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, |
650 | size_t size, int flags) | 649 | size_t size, int flags) |
651 | { | 650 | { |
652 | struct kiocb iocb; | 651 | struct kiocb iocb; |
653 | struct sock_iocb siocb; | 652 | struct sock_iocb siocb; |
654 | int ret; | 653 | int ret; |
655 | 654 | ||
656 | init_sync_kiocb(&iocb, NULL); | 655 | init_sync_kiocb(&iocb, NULL); |
657 | iocb.private = &siocb; | 656 | iocb.private = &siocb; |
658 | ret = __sock_recvmsg(&iocb, sock, msg, size, flags); | 657 | ret = __sock_recvmsg(&iocb, sock, msg, size, flags); |
659 | if (-EIOCBQUEUED == ret) | 658 | if (-EIOCBQUEUED == ret) |
660 | ret = wait_on_sync_kiocb(&iocb); | 659 | ret = wait_on_sync_kiocb(&iocb); |
661 | return ret; | 660 | return ret; |
662 | } | 661 | } |
663 | 662 | ||
664 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, | 663 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, |
665 | struct kvec *vec, size_t num, size_t size, int flags) | 664 | struct kvec *vec, size_t num, size_t size, int flags) |
666 | { | 665 | { |
667 | mm_segment_t oldfs = get_fs(); | 666 | mm_segment_t oldfs = get_fs(); |
668 | int result; | 667 | int result; |
669 | 668 | ||
670 | set_fs(KERNEL_DS); | 669 | set_fs(KERNEL_DS); |
671 | /* | 670 | /* |
672 | * the following is safe, since for compiler definitions of kvec and | 671 | * the following is safe, since for compiler definitions of kvec and |
673 | * iovec are identical, yielding the same in-core layout and alignment | 672 | * iovec are identical, yielding the same in-core layout and alignment |
674 | */ | 673 | */ |
675 | msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; | 674 | msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; |
676 | result = sock_recvmsg(sock, msg, size, flags); | 675 | result = sock_recvmsg(sock, msg, size, flags); |
677 | set_fs(oldfs); | 676 | set_fs(oldfs); |
678 | return result; | 677 | return result; |
679 | } | 678 | } |
680 | 679 | ||
681 | static void sock_aio_dtor(struct kiocb *iocb) | 680 | static void sock_aio_dtor(struct kiocb *iocb) |
682 | { | 681 | { |
683 | kfree(iocb->private); | 682 | kfree(iocb->private); |
684 | } | 683 | } |
685 | 684 | ||
686 | static ssize_t sock_sendpage(struct file *file, struct page *page, | 685 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
687 | int offset, size_t size, loff_t *ppos, int more) | 686 | int offset, size_t size, loff_t *ppos, int more) |
688 | { | 687 | { |
689 | struct socket *sock; | 688 | struct socket *sock; |
690 | int flags; | 689 | int flags; |
691 | 690 | ||
692 | sock = file->private_data; | 691 | sock = file->private_data; |
693 | 692 | ||
694 | flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; | 693 | flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; |
695 | if (more) | 694 | if (more) |
696 | flags |= MSG_MORE; | 695 | flags |= MSG_MORE; |
697 | 696 | ||
698 | return sock->ops->sendpage(sock, page, offset, size, flags); | 697 | return sock->ops->sendpage(sock, page, offset, size, flags); |
699 | } | 698 | } |
700 | 699 | ||
701 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, | 700 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, |
702 | struct pipe_inode_info *pipe, size_t len, | 701 | struct pipe_inode_info *pipe, size_t len, |
703 | unsigned int flags) | 702 | unsigned int flags) |
704 | { | 703 | { |
705 | struct socket *sock = file->private_data; | 704 | struct socket *sock = file->private_data; |
706 | 705 | ||
707 | if (unlikely(!sock->ops->splice_read)) | 706 | if (unlikely(!sock->ops->splice_read)) |
708 | return -EINVAL; | 707 | return -EINVAL; |
709 | 708 | ||
710 | return sock->ops->splice_read(sock, ppos, pipe, len, flags); | 709 | return sock->ops->splice_read(sock, ppos, pipe, len, flags); |
711 | } | 710 | } |
712 | 711 | ||
713 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, | 712 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, |
714 | struct sock_iocb *siocb) | 713 | struct sock_iocb *siocb) |
715 | { | 714 | { |
716 | if (!is_sync_kiocb(iocb)) { | 715 | if (!is_sync_kiocb(iocb)) { |
717 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); | 716 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); |
718 | if (!siocb) | 717 | if (!siocb) |
719 | return NULL; | 718 | return NULL; |
720 | iocb->ki_dtor = sock_aio_dtor; | 719 | iocb->ki_dtor = sock_aio_dtor; |
721 | } | 720 | } |
722 | 721 | ||
723 | siocb->kiocb = iocb; | 722 | siocb->kiocb = iocb; |
724 | iocb->private = siocb; | 723 | iocb->private = siocb; |
725 | return siocb; | 724 | return siocb; |
726 | } | 725 | } |
727 | 726 | ||
728 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, | 727 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, |
729 | struct file *file, const struct iovec *iov, | 728 | struct file *file, const struct iovec *iov, |
730 | unsigned long nr_segs) | 729 | unsigned long nr_segs) |
731 | { | 730 | { |
732 | struct socket *sock = file->private_data; | 731 | struct socket *sock = file->private_data; |
733 | size_t size = 0; | 732 | size_t size = 0; |
734 | int i; | 733 | int i; |
735 | 734 | ||
736 | for (i = 0; i < nr_segs; i++) | 735 | for (i = 0; i < nr_segs; i++) |
737 | size += iov[i].iov_len; | 736 | size += iov[i].iov_len; |
738 | 737 | ||
739 | msg->msg_name = NULL; | 738 | msg->msg_name = NULL; |
740 | msg->msg_namelen = 0; | 739 | msg->msg_namelen = 0; |
741 | msg->msg_control = NULL; | 740 | msg->msg_control = NULL; |
742 | msg->msg_controllen = 0; | 741 | msg->msg_controllen = 0; |
743 | msg->msg_iov = (struct iovec *)iov; | 742 | msg->msg_iov = (struct iovec *)iov; |
744 | msg->msg_iovlen = nr_segs; | 743 | msg->msg_iovlen = nr_segs; |
745 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | 744 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; |
746 | 745 | ||
747 | return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); | 746 | return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); |
748 | } | 747 | } |
749 | 748 | ||
750 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, | 749 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, |
751 | unsigned long nr_segs, loff_t pos) | 750 | unsigned long nr_segs, loff_t pos) |
752 | { | 751 | { |
753 | struct sock_iocb siocb, *x; | 752 | struct sock_iocb siocb, *x; |
754 | 753 | ||
755 | if (pos != 0) | 754 | if (pos != 0) |
756 | return -ESPIPE; | 755 | return -ESPIPE; |
757 | 756 | ||
758 | if (iocb->ki_left == 0) /* Match SYS5 behaviour */ | 757 | if (iocb->ki_left == 0) /* Match SYS5 behaviour */ |
759 | return 0; | 758 | return 0; |
760 | 759 | ||
761 | 760 | ||
762 | x = alloc_sock_iocb(iocb, &siocb); | 761 | x = alloc_sock_iocb(iocb, &siocb); |
763 | if (!x) | 762 | if (!x) |
764 | return -ENOMEM; | 763 | return -ENOMEM; |
765 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); | 764 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); |
766 | } | 765 | } |
767 | 766 | ||
768 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, | 767 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, |
769 | struct file *file, const struct iovec *iov, | 768 | struct file *file, const struct iovec *iov, |
770 | unsigned long nr_segs) | 769 | unsigned long nr_segs) |
771 | { | 770 | { |
772 | struct socket *sock = file->private_data; | 771 | struct socket *sock = file->private_data; |
773 | size_t size = 0; | 772 | size_t size = 0; |
774 | int i; | 773 | int i; |
775 | 774 | ||
776 | for (i = 0; i < nr_segs; i++) | 775 | for (i = 0; i < nr_segs; i++) |
777 | size += iov[i].iov_len; | 776 | size += iov[i].iov_len; |
778 | 777 | ||
779 | msg->msg_name = NULL; | 778 | msg->msg_name = NULL; |
780 | msg->msg_namelen = 0; | 779 | msg->msg_namelen = 0; |
781 | msg->msg_control = NULL; | 780 | msg->msg_control = NULL; |
782 | msg->msg_controllen = 0; | 781 | msg->msg_controllen = 0; |
783 | msg->msg_iov = (struct iovec *)iov; | 782 | msg->msg_iov = (struct iovec *)iov; |
784 | msg->msg_iovlen = nr_segs; | 783 | msg->msg_iovlen = nr_segs; |
785 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | 784 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; |
786 | if (sock->type == SOCK_SEQPACKET) | 785 | if (sock->type == SOCK_SEQPACKET) |
787 | msg->msg_flags |= MSG_EOR; | 786 | msg->msg_flags |= MSG_EOR; |
788 | 787 | ||
789 | return __sock_sendmsg(iocb, sock, msg, size); | 788 | return __sock_sendmsg(iocb, sock, msg, size); |
790 | } | 789 | } |
791 | 790 | ||
792 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, | 791 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, |
793 | unsigned long nr_segs, loff_t pos) | 792 | unsigned long nr_segs, loff_t pos) |
794 | { | 793 | { |
795 | struct sock_iocb siocb, *x; | 794 | struct sock_iocb siocb, *x; |
796 | 795 | ||
797 | if (pos != 0) | 796 | if (pos != 0) |
798 | return -ESPIPE; | 797 | return -ESPIPE; |
799 | 798 | ||
800 | x = alloc_sock_iocb(iocb, &siocb); | 799 | x = alloc_sock_iocb(iocb, &siocb); |
801 | if (!x) | 800 | if (!x) |
802 | return -ENOMEM; | 801 | return -ENOMEM; |
803 | 802 | ||
804 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); | 803 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); |
805 | } | 804 | } |
806 | 805 | ||
807 | /* | 806 | /* |
808 | * Atomic setting of ioctl hooks to avoid race | 807 | * Atomic setting of ioctl hooks to avoid race |
809 | * with module unload. | 808 | * with module unload. |
810 | */ | 809 | */ |
811 | 810 | ||
812 | static DEFINE_MUTEX(br_ioctl_mutex); | 811 | static DEFINE_MUTEX(br_ioctl_mutex); |
813 | static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; | 812 | static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; |
814 | 813 | ||
815 | void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) | 814 | void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) |
816 | { | 815 | { |
817 | mutex_lock(&br_ioctl_mutex); | 816 | mutex_lock(&br_ioctl_mutex); |
818 | br_ioctl_hook = hook; | 817 | br_ioctl_hook = hook; |
819 | mutex_unlock(&br_ioctl_mutex); | 818 | mutex_unlock(&br_ioctl_mutex); |
820 | } | 819 | } |
821 | 820 | ||
822 | EXPORT_SYMBOL(brioctl_set); | 821 | EXPORT_SYMBOL(brioctl_set); |
823 | 822 | ||
824 | static DEFINE_MUTEX(vlan_ioctl_mutex); | 823 | static DEFINE_MUTEX(vlan_ioctl_mutex); |
825 | static int (*vlan_ioctl_hook) (struct net *, void __user *arg); | 824 | static int (*vlan_ioctl_hook) (struct net *, void __user *arg); |
826 | 825 | ||
827 | void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) | 826 | void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) |
828 | { | 827 | { |
829 | mutex_lock(&vlan_ioctl_mutex); | 828 | mutex_lock(&vlan_ioctl_mutex); |
830 | vlan_ioctl_hook = hook; | 829 | vlan_ioctl_hook = hook; |
831 | mutex_unlock(&vlan_ioctl_mutex); | 830 | mutex_unlock(&vlan_ioctl_mutex); |
832 | } | 831 | } |
833 | 832 | ||
834 | EXPORT_SYMBOL(vlan_ioctl_set); | 833 | EXPORT_SYMBOL(vlan_ioctl_set); |
835 | 834 | ||
836 | static DEFINE_MUTEX(dlci_ioctl_mutex); | 835 | static DEFINE_MUTEX(dlci_ioctl_mutex); |
837 | static int (*dlci_ioctl_hook) (unsigned int, void __user *); | 836 | static int (*dlci_ioctl_hook) (unsigned int, void __user *); |
838 | 837 | ||
839 | void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) | 838 | void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) |
840 | { | 839 | { |
841 | mutex_lock(&dlci_ioctl_mutex); | 840 | mutex_lock(&dlci_ioctl_mutex); |
842 | dlci_ioctl_hook = hook; | 841 | dlci_ioctl_hook = hook; |
843 | mutex_unlock(&dlci_ioctl_mutex); | 842 | mutex_unlock(&dlci_ioctl_mutex); |
844 | } | 843 | } |
845 | 844 | ||
846 | EXPORT_SYMBOL(dlci_ioctl_set); | 845 | EXPORT_SYMBOL(dlci_ioctl_set); |
847 | 846 | ||
848 | /* | 847 | /* |
849 | * With an ioctl, arg may well be a user mode pointer, but we don't know | 848 | * With an ioctl, arg may well be a user mode pointer, but we don't know |
850 | * what to do with it - that's up to the protocol still. | 849 | * what to do with it - that's up to the protocol still. |
851 | */ | 850 | */ |
852 | 851 | ||
853 | static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 852 | static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
854 | { | 853 | { |
855 | struct socket *sock; | 854 | struct socket *sock; |
856 | struct sock *sk; | 855 | struct sock *sk; |
857 | void __user *argp = (void __user *)arg; | 856 | void __user *argp = (void __user *)arg; |
858 | int pid, err; | 857 | int pid, err; |
859 | struct net *net; | 858 | struct net *net; |
860 | 859 | ||
861 | sock = file->private_data; | 860 | sock = file->private_data; |
862 | sk = sock->sk; | 861 | sk = sock->sk; |
863 | net = sock_net(sk); | 862 | net = sock_net(sk); |
864 | if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { | 863 | if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { |
865 | err = dev_ioctl(net, cmd, argp); | 864 | err = dev_ioctl(net, cmd, argp); |
866 | } else | 865 | } else |
867 | #ifdef CONFIG_WIRELESS_EXT | 866 | #ifdef CONFIG_WIRELESS_EXT |
868 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { | 867 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { |
869 | err = dev_ioctl(net, cmd, argp); | 868 | err = dev_ioctl(net, cmd, argp); |
870 | } else | 869 | } else |
871 | #endif /* CONFIG_WIRELESS_EXT */ | 870 | #endif /* CONFIG_WIRELESS_EXT */ |
872 | switch (cmd) { | 871 | switch (cmd) { |
873 | case FIOSETOWN: | 872 | case FIOSETOWN: |
874 | case SIOCSPGRP: | 873 | case SIOCSPGRP: |
875 | err = -EFAULT; | 874 | err = -EFAULT; |
876 | if (get_user(pid, (int __user *)argp)) | 875 | if (get_user(pid, (int __user *)argp)) |
877 | break; | 876 | break; |
878 | err = f_setown(sock->file, pid, 1); | 877 | err = f_setown(sock->file, pid, 1); |
879 | break; | 878 | break; |
880 | case FIOGETOWN: | 879 | case FIOGETOWN: |
881 | case SIOCGPGRP: | 880 | case SIOCGPGRP: |
882 | err = put_user(f_getown(sock->file), | 881 | err = put_user(f_getown(sock->file), |
883 | (int __user *)argp); | 882 | (int __user *)argp); |
884 | break; | 883 | break; |
885 | case SIOCGIFBR: | 884 | case SIOCGIFBR: |
886 | case SIOCSIFBR: | 885 | case SIOCSIFBR: |
887 | case SIOCBRADDBR: | 886 | case SIOCBRADDBR: |
888 | case SIOCBRDELBR: | 887 | case SIOCBRDELBR: |
889 | err = -ENOPKG; | 888 | err = -ENOPKG; |
890 | if (!br_ioctl_hook) | 889 | if (!br_ioctl_hook) |
891 | request_module("bridge"); | 890 | request_module("bridge"); |
892 | 891 | ||
893 | mutex_lock(&br_ioctl_mutex); | 892 | mutex_lock(&br_ioctl_mutex); |
894 | if (br_ioctl_hook) | 893 | if (br_ioctl_hook) |
895 | err = br_ioctl_hook(net, cmd, argp); | 894 | err = br_ioctl_hook(net, cmd, argp); |
896 | mutex_unlock(&br_ioctl_mutex); | 895 | mutex_unlock(&br_ioctl_mutex); |
897 | break; | 896 | break; |
898 | case SIOCGIFVLAN: | 897 | case SIOCGIFVLAN: |
899 | case SIOCSIFVLAN: | 898 | case SIOCSIFVLAN: |
900 | err = -ENOPKG; | 899 | err = -ENOPKG; |
901 | if (!vlan_ioctl_hook) | 900 | if (!vlan_ioctl_hook) |
902 | request_module("8021q"); | 901 | request_module("8021q"); |
903 | 902 | ||
904 | mutex_lock(&vlan_ioctl_mutex); | 903 | mutex_lock(&vlan_ioctl_mutex); |
905 | if (vlan_ioctl_hook) | 904 | if (vlan_ioctl_hook) |
906 | err = vlan_ioctl_hook(net, argp); | 905 | err = vlan_ioctl_hook(net, argp); |
907 | mutex_unlock(&vlan_ioctl_mutex); | 906 | mutex_unlock(&vlan_ioctl_mutex); |
908 | break; | 907 | break; |
909 | case SIOCADDDLCI: | 908 | case SIOCADDDLCI: |
910 | case SIOCDELDLCI: | 909 | case SIOCDELDLCI: |
911 | err = -ENOPKG; | 910 | err = -ENOPKG; |
912 | if (!dlci_ioctl_hook) | 911 | if (!dlci_ioctl_hook) |
913 | request_module("dlci"); | 912 | request_module("dlci"); |
914 | 913 | ||
915 | mutex_lock(&dlci_ioctl_mutex); | 914 | mutex_lock(&dlci_ioctl_mutex); |
916 | if (dlci_ioctl_hook) | 915 | if (dlci_ioctl_hook) |
917 | err = dlci_ioctl_hook(cmd, argp); | 916 | err = dlci_ioctl_hook(cmd, argp); |
918 | mutex_unlock(&dlci_ioctl_mutex); | 917 | mutex_unlock(&dlci_ioctl_mutex); |
919 | break; | 918 | break; |
920 | default: | 919 | default: |
921 | err = sock->ops->ioctl(sock, cmd, arg); | 920 | err = sock->ops->ioctl(sock, cmd, arg); |
922 | 921 | ||
923 | /* | 922 | /* |
924 | * If this ioctl is unknown try to hand it down | 923 | * If this ioctl is unknown try to hand it down |
925 | * to the NIC driver. | 924 | * to the NIC driver. |
926 | */ | 925 | */ |
927 | if (err == -ENOIOCTLCMD) | 926 | if (err == -ENOIOCTLCMD) |
928 | err = dev_ioctl(net, cmd, argp); | 927 | err = dev_ioctl(net, cmd, argp); |
929 | break; | 928 | break; |
930 | } | 929 | } |
931 | return err; | 930 | return err; |
932 | } | 931 | } |
933 | 932 | ||
934 | int sock_create_lite(int family, int type, int protocol, struct socket **res) | 933 | int sock_create_lite(int family, int type, int protocol, struct socket **res) |
935 | { | 934 | { |
936 | int err; | 935 | int err; |
937 | struct socket *sock = NULL; | 936 | struct socket *sock = NULL; |
938 | 937 | ||
939 | err = security_socket_create(family, type, protocol, 1); | 938 | err = security_socket_create(family, type, protocol, 1); |
940 | if (err) | 939 | if (err) |
941 | goto out; | 940 | goto out; |
942 | 941 | ||
943 | sock = sock_alloc(); | 942 | sock = sock_alloc(); |
944 | if (!sock) { | 943 | if (!sock) { |
945 | err = -ENOMEM; | 944 | err = -ENOMEM; |
946 | goto out; | 945 | goto out; |
947 | } | 946 | } |
948 | 947 | ||
949 | sock->type = type; | 948 | sock->type = type; |
950 | err = security_socket_post_create(sock, family, type, protocol, 1); | 949 | err = security_socket_post_create(sock, family, type, protocol, 1); |
951 | if (err) | 950 | if (err) |
952 | goto out_release; | 951 | goto out_release; |
953 | 952 | ||
954 | out: | 953 | out: |
955 | *res = sock; | 954 | *res = sock; |
956 | return err; | 955 | return err; |
957 | out_release: | 956 | out_release: |
958 | sock_release(sock); | 957 | sock_release(sock); |
959 | sock = NULL; | 958 | sock = NULL; |
960 | goto out; | 959 | goto out; |
961 | } | 960 | } |
962 | 961 | ||
963 | /* No kernel lock held - perfect */ | 962 | /* No kernel lock held - perfect */ |
964 | static unsigned int sock_poll(struct file *file, poll_table *wait) | 963 | static unsigned int sock_poll(struct file *file, poll_table *wait) |
965 | { | 964 | { |
966 | struct socket *sock; | 965 | struct socket *sock; |
967 | 966 | ||
968 | /* | 967 | /* |
969 | * We can't return errors to poll, so it's either yes or no. | 968 | * We can't return errors to poll, so it's either yes or no. |
970 | */ | 969 | */ |
971 | sock = file->private_data; | 970 | sock = file->private_data; |
972 | return sock->ops->poll(file, sock, wait); | 971 | return sock->ops->poll(file, sock, wait); |
973 | } | 972 | } |
974 | 973 | ||
975 | static int sock_mmap(struct file *file, struct vm_area_struct *vma) | 974 | static int sock_mmap(struct file *file, struct vm_area_struct *vma) |
976 | { | 975 | { |
977 | struct socket *sock = file->private_data; | 976 | struct socket *sock = file->private_data; |
978 | 977 | ||
979 | return sock->ops->mmap(file, sock, vma); | 978 | return sock->ops->mmap(file, sock, vma); |
980 | } | 979 | } |
981 | 980 | ||
982 | static int sock_close(struct inode *inode, struct file *filp) | 981 | static int sock_close(struct inode *inode, struct file *filp) |
983 | { | 982 | { |
984 | /* | 983 | /* |
985 | * It was possible the inode is NULL we were | 984 | * It was possible the inode is NULL we were |
986 | * closing an unfinished socket. | 985 | * closing an unfinished socket. |
987 | */ | 986 | */ |
988 | 987 | ||
989 | if (!inode) { | 988 | if (!inode) { |
990 | printk(KERN_DEBUG "sock_close: NULL inode\n"); | 989 | printk(KERN_DEBUG "sock_close: NULL inode\n"); |
991 | return 0; | 990 | return 0; |
992 | } | 991 | } |
993 | sock_fasync(-1, filp, 0); | 992 | sock_fasync(-1, filp, 0); |
994 | sock_release(SOCKET_I(inode)); | 993 | sock_release(SOCKET_I(inode)); |
995 | return 0; | 994 | return 0; |
996 | } | 995 | } |
997 | 996 | ||
998 | /* | 997 | /* |
999 | * Update the socket async list | 998 | * Update the socket async list |
1000 | * | 999 | * |
1001 | * Fasync_list locking strategy. | 1000 | * Fasync_list locking strategy. |
1002 | * | 1001 | * |
1003 | * 1. fasync_list is modified only under process context socket lock | 1002 | * 1. fasync_list is modified only under process context socket lock |
1004 | * i.e. under semaphore. | 1003 | * i.e. under semaphore. |
1005 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) | 1004 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) |
1006 | * or under socket lock. | 1005 | * or under socket lock. |
1007 | * 3. fasync_list can be used from softirq context, so that | 1006 | * 3. fasync_list can be used from softirq context, so that |
1008 | * modification under socket lock have to be enhanced with | 1007 | * modification under socket lock have to be enhanced with |
1009 | * write_lock_bh(&sk->sk_callback_lock). | 1008 | * write_lock_bh(&sk->sk_callback_lock). |
1010 | * --ANK (990710) | 1009 | * --ANK (990710) |
1011 | */ | 1010 | */ |
1012 | 1011 | ||
1013 | static int sock_fasync(int fd, struct file *filp, int on) | 1012 | static int sock_fasync(int fd, struct file *filp, int on) |
1014 | { | 1013 | { |
1015 | struct fasync_struct *fa, *fna = NULL, **prev; | 1014 | struct fasync_struct *fa, *fna = NULL, **prev; |
1016 | struct socket *sock; | 1015 | struct socket *sock; |
1017 | struct sock *sk; | 1016 | struct sock *sk; |
1018 | 1017 | ||
1019 | if (on) { | 1018 | if (on) { |
1020 | fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); | 1019 | fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); |
1021 | if (fna == NULL) | 1020 | if (fna == NULL) |
1022 | return -ENOMEM; | 1021 | return -ENOMEM; |
1023 | } | 1022 | } |
1024 | 1023 | ||
1025 | sock = filp->private_data; | 1024 | sock = filp->private_data; |
1026 | 1025 | ||
1027 | sk = sock->sk; | 1026 | sk = sock->sk; |
1028 | if (sk == NULL) { | 1027 | if (sk == NULL) { |
1029 | kfree(fna); | 1028 | kfree(fna); |
1030 | return -EINVAL; | 1029 | return -EINVAL; |
1031 | } | 1030 | } |
1032 | 1031 | ||
1033 | lock_sock(sk); | 1032 | lock_sock(sk); |
1034 | 1033 | ||
1035 | prev = &(sock->fasync_list); | 1034 | prev = &(sock->fasync_list); |
1036 | 1035 | ||
1037 | for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) | 1036 | for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) |
1038 | if (fa->fa_file == filp) | 1037 | if (fa->fa_file == filp) |
1039 | break; | 1038 | break; |
1040 | 1039 | ||
1041 | if (on) { | 1040 | if (on) { |
1042 | if (fa != NULL) { | 1041 | if (fa != NULL) { |
1043 | write_lock_bh(&sk->sk_callback_lock); | 1042 | write_lock_bh(&sk->sk_callback_lock); |
1044 | fa->fa_fd = fd; | 1043 | fa->fa_fd = fd; |
1045 | write_unlock_bh(&sk->sk_callback_lock); | 1044 | write_unlock_bh(&sk->sk_callback_lock); |
1046 | 1045 | ||
1047 | kfree(fna); | 1046 | kfree(fna); |
1048 | goto out; | 1047 | goto out; |
1049 | } | 1048 | } |
1050 | fna->fa_file = filp; | 1049 | fna->fa_file = filp; |
1051 | fna->fa_fd = fd; | 1050 | fna->fa_fd = fd; |
1052 | fna->magic = FASYNC_MAGIC; | 1051 | fna->magic = FASYNC_MAGIC; |
1053 | fna->fa_next = sock->fasync_list; | 1052 | fna->fa_next = sock->fasync_list; |
1054 | write_lock_bh(&sk->sk_callback_lock); | 1053 | write_lock_bh(&sk->sk_callback_lock); |
1055 | sock->fasync_list = fna; | 1054 | sock->fasync_list = fna; |
1056 | write_unlock_bh(&sk->sk_callback_lock); | 1055 | write_unlock_bh(&sk->sk_callback_lock); |
1057 | } else { | 1056 | } else { |
1058 | if (fa != NULL) { | 1057 | if (fa != NULL) { |
1059 | write_lock_bh(&sk->sk_callback_lock); | 1058 | write_lock_bh(&sk->sk_callback_lock); |
1060 | *prev = fa->fa_next; | 1059 | *prev = fa->fa_next; |
1061 | write_unlock_bh(&sk->sk_callback_lock); | 1060 | write_unlock_bh(&sk->sk_callback_lock); |
1062 | kfree(fa); | 1061 | kfree(fa); |
1063 | } | 1062 | } |
1064 | } | 1063 | } |
1065 | 1064 | ||
1066 | out: | 1065 | out: |
1067 | release_sock(sock->sk); | 1066 | release_sock(sock->sk); |
1068 | return 0; | 1067 | return 0; |
1069 | } | 1068 | } |
1070 | 1069 | ||
1071 | /* This function may be called only under socket lock or callback_lock */ | 1070 | /* This function may be called only under socket lock or callback_lock */ |
1072 | 1071 | ||
1073 | int sock_wake_async(struct socket *sock, int how, int band) | 1072 | int sock_wake_async(struct socket *sock, int how, int band) |
1074 | { | 1073 | { |
1075 | if (!sock || !sock->fasync_list) | 1074 | if (!sock || !sock->fasync_list) |
1076 | return -1; | 1075 | return -1; |
1077 | switch (how) { | 1076 | switch (how) { |
1078 | case SOCK_WAKE_WAITD: | 1077 | case SOCK_WAKE_WAITD: |
1079 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) | 1078 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) |
1080 | break; | 1079 | break; |
1081 | goto call_kill; | 1080 | goto call_kill; |
1082 | case SOCK_WAKE_SPACE: | 1081 | case SOCK_WAKE_SPACE: |
1083 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) | 1082 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) |
1084 | break; | 1083 | break; |
1085 | /* fall through */ | 1084 | /* fall through */ |
1086 | case SOCK_WAKE_IO: | 1085 | case SOCK_WAKE_IO: |
1087 | call_kill: | 1086 | call_kill: |
1088 | __kill_fasync(sock->fasync_list, SIGIO, band); | 1087 | __kill_fasync(sock->fasync_list, SIGIO, band); |
1089 | break; | 1088 | break; |
1090 | case SOCK_WAKE_URG: | 1089 | case SOCK_WAKE_URG: |
1091 | __kill_fasync(sock->fasync_list, SIGURG, band); | 1090 | __kill_fasync(sock->fasync_list, SIGURG, band); |
1092 | } | 1091 | } |
1093 | return 0; | 1092 | return 0; |
1094 | } | 1093 | } |
1095 | 1094 | ||
1096 | static int __sock_create(struct net *net, int family, int type, int protocol, | 1095 | static int __sock_create(struct net *net, int family, int type, int protocol, |
1097 | struct socket **res, int kern) | 1096 | struct socket **res, int kern) |
1098 | { | 1097 | { |
1099 | int err; | 1098 | int err; |
1100 | struct socket *sock; | 1099 | struct socket *sock; |
1101 | const struct net_proto_family *pf; | 1100 | const struct net_proto_family *pf; |
1102 | 1101 | ||
1103 | /* | 1102 | /* |
1104 | * Check protocol is in range | 1103 | * Check protocol is in range |
1105 | */ | 1104 | */ |
1106 | if (family < 0 || family >= NPROTO) | 1105 | if (family < 0 || family >= NPROTO) |
1107 | return -EAFNOSUPPORT; | 1106 | return -EAFNOSUPPORT; |
1108 | if (type < 0 || type >= SOCK_MAX) | 1107 | if (type < 0 || type >= SOCK_MAX) |
1109 | return -EINVAL; | 1108 | return -EINVAL; |
1110 | 1109 | ||
1111 | /* Compatibility. | 1110 | /* Compatibility. |
1112 | 1111 | ||
1113 | This uglymoron is moved from INET layer to here to avoid | 1112 | This uglymoron is moved from INET layer to here to avoid |
1114 | deadlock in module load. | 1113 | deadlock in module load. |
1115 | */ | 1114 | */ |
1116 | if (family == PF_INET && type == SOCK_PACKET) { | 1115 | if (family == PF_INET && type == SOCK_PACKET) { |
1117 | static int warned; | 1116 | static int warned; |
1118 | if (!warned) { | 1117 | if (!warned) { |
1119 | warned = 1; | 1118 | warned = 1; |
1120 | printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", | 1119 | printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", |
1121 | current->comm); | 1120 | current->comm); |
1122 | } | 1121 | } |
1123 | family = PF_PACKET; | 1122 | family = PF_PACKET; |
1124 | } | 1123 | } |
1125 | 1124 | ||
1126 | err = security_socket_create(family, type, protocol, kern); | 1125 | err = security_socket_create(family, type, protocol, kern); |
1127 | if (err) | 1126 | if (err) |
1128 | return err; | 1127 | return err; |
1129 | 1128 | ||
1130 | /* | 1129 | /* |
1131 | * Allocate the socket and allow the family to set things up. if | 1130 | * Allocate the socket and allow the family to set things up. if |
1132 | * the protocol is 0, the family is instructed to select an appropriate | 1131 | * the protocol is 0, the family is instructed to select an appropriate |
1133 | * default. | 1132 | * default. |
1134 | */ | 1133 | */ |
1135 | sock = sock_alloc(); | 1134 | sock = sock_alloc(); |
1136 | if (!sock) { | 1135 | if (!sock) { |
1137 | if (net_ratelimit()) | 1136 | if (net_ratelimit()) |
1138 | printk(KERN_WARNING "socket: no more sockets\n"); | 1137 | printk(KERN_WARNING "socket: no more sockets\n"); |
1139 | return -ENFILE; /* Not exactly a match, but its the | 1138 | return -ENFILE; /* Not exactly a match, but its the |
1140 | closest posix thing */ | 1139 | closest posix thing */ |
1141 | } | 1140 | } |
1142 | 1141 | ||
1143 | sock->type = type; | 1142 | sock->type = type; |
1144 | 1143 | ||
1145 | #ifdef CONFIG_MODULES | 1144 | #ifdef CONFIG_MODULES |
1146 | /* Attempt to load a protocol module if the find failed. | 1145 | /* Attempt to load a protocol module if the find failed. |
1147 | * | 1146 | * |
1148 | * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user | 1147 | * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user |
1149 | * requested real, full-featured networking support upon configuration. | 1148 | * requested real, full-featured networking support upon configuration. |
1150 | * Otherwise module support will break! | 1149 | * Otherwise module support will break! |
1151 | */ | 1150 | */ |
1152 | if (net_families[family] == NULL) | 1151 | if (net_families[family] == NULL) |
1153 | request_module("net-pf-%d", family); | 1152 | request_module("net-pf-%d", family); |
1154 | #endif | 1153 | #endif |
1155 | 1154 | ||
1156 | rcu_read_lock(); | 1155 | rcu_read_lock(); |
1157 | pf = rcu_dereference(net_families[family]); | 1156 | pf = rcu_dereference(net_families[family]); |
1158 | err = -EAFNOSUPPORT; | 1157 | err = -EAFNOSUPPORT; |
1159 | if (!pf) | 1158 | if (!pf) |
1160 | goto out_release; | 1159 | goto out_release; |
1161 | 1160 | ||
1162 | /* | 1161 | /* |
1163 | * We will call the ->create function, that possibly is in a loadable | 1162 | * We will call the ->create function, that possibly is in a loadable |
1164 | * module, so we have to bump that loadable module refcnt first. | 1163 | * module, so we have to bump that loadable module refcnt first. |
1165 | */ | 1164 | */ |
1166 | if (!try_module_get(pf->owner)) | 1165 | if (!try_module_get(pf->owner)) |
1167 | goto out_release; | 1166 | goto out_release; |
1168 | 1167 | ||
1169 | /* Now protected by module ref count */ | 1168 | /* Now protected by module ref count */ |
1170 | rcu_read_unlock(); | 1169 | rcu_read_unlock(); |
1171 | 1170 | ||
1172 | err = pf->create(net, sock, protocol); | 1171 | err = pf->create(net, sock, protocol); |
1173 | if (err < 0) | 1172 | if (err < 0) |
1174 | goto out_module_put; | 1173 | goto out_module_put; |
1175 | 1174 | ||
1176 | /* | 1175 | /* |
1177 | * Now to bump the refcnt of the [loadable] module that owns this | 1176 | * Now to bump the refcnt of the [loadable] module that owns this |
1178 | * socket at sock_release time we decrement its refcnt. | 1177 | * socket at sock_release time we decrement its refcnt. |
1179 | */ | 1178 | */ |
1180 | if (!try_module_get(sock->ops->owner)) | 1179 | if (!try_module_get(sock->ops->owner)) |
1181 | goto out_module_busy; | 1180 | goto out_module_busy; |
1182 | 1181 | ||
1183 | /* | 1182 | /* |
1184 | * Now that we're done with the ->create function, the [loadable] | 1183 | * Now that we're done with the ->create function, the [loadable] |
1185 | * module can have its refcnt decremented | 1184 | * module can have its refcnt decremented |
1186 | */ | 1185 | */ |
1187 | module_put(pf->owner); | 1186 | module_put(pf->owner); |
1188 | err = security_socket_post_create(sock, family, type, protocol, kern); | 1187 | err = security_socket_post_create(sock, family, type, protocol, kern); |
1189 | if (err) | 1188 | if (err) |
1190 | goto out_sock_release; | 1189 | goto out_sock_release; |
1191 | *res = sock; | 1190 | *res = sock; |
1192 | 1191 | ||
1193 | return 0; | 1192 | return 0; |
1194 | 1193 | ||
1195 | out_module_busy: | 1194 | out_module_busy: |
1196 | err = -EAFNOSUPPORT; | 1195 | err = -EAFNOSUPPORT; |
1197 | out_module_put: | 1196 | out_module_put: |
1198 | sock->ops = NULL; | 1197 | sock->ops = NULL; |
1199 | module_put(pf->owner); | 1198 | module_put(pf->owner); |
1200 | out_sock_release: | 1199 | out_sock_release: |
1201 | sock_release(sock); | 1200 | sock_release(sock); |
1202 | return err; | 1201 | return err; |
1203 | 1202 | ||
1204 | out_release: | 1203 | out_release: |
1205 | rcu_read_unlock(); | 1204 | rcu_read_unlock(); |
1206 | goto out_sock_release; | 1205 | goto out_sock_release; |
1207 | } | 1206 | } |
1208 | 1207 | ||
1209 | int sock_create(int family, int type, int protocol, struct socket **res) | 1208 | int sock_create(int family, int type, int protocol, struct socket **res) |
1210 | { | 1209 | { |
1211 | return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); | 1210 | return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); |
1212 | } | 1211 | } |
1213 | 1212 | ||
1214 | int sock_create_kern(int family, int type, int protocol, struct socket **res) | 1213 | int sock_create_kern(int family, int type, int protocol, struct socket **res) |
1215 | { | 1214 | { |
1216 | return __sock_create(&init_net, family, type, protocol, res, 1); | 1215 | return __sock_create(&init_net, family, type, protocol, res, 1); |
1217 | } | 1216 | } |
1218 | 1217 | ||
1219 | asmlinkage long sys_socket(int family, int type, int protocol) | 1218 | asmlinkage long sys_socket(int family, int type, int protocol) |
1220 | { | 1219 | { |
1221 | int retval; | 1220 | int retval; |
1222 | struct socket *sock; | 1221 | struct socket *sock; |
1223 | int flags; | 1222 | int flags; |
1224 | 1223 | ||
1225 | /* Check the SOCK_* constants for consistency. */ | 1224 | /* Check the SOCK_* constants for consistency. */ |
1226 | BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); | 1225 | BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); |
1227 | BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); | 1226 | BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); |
1228 | BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); | 1227 | BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); |
1229 | BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); | 1228 | BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); |
1230 | 1229 | ||
1231 | flags = type & ~SOCK_TYPE_MASK; | 1230 | flags = type & ~SOCK_TYPE_MASK; |
1232 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) | 1231 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) |
1233 | return -EINVAL; | 1232 | return -EINVAL; |
1234 | type &= SOCK_TYPE_MASK; | 1233 | type &= SOCK_TYPE_MASK; |
1235 | 1234 | ||
1236 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) | 1235 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) |
1237 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1236 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1238 | 1237 | ||
1239 | retval = sock_create(family, type, protocol, &sock); | 1238 | retval = sock_create(family, type, protocol, &sock); |
1240 | if (retval < 0) | 1239 | if (retval < 0) |
1241 | goto out; | 1240 | goto out; |
1242 | 1241 | ||
1243 | retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); | 1242 | retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); |
1244 | if (retval < 0) | 1243 | if (retval < 0) |
1245 | goto out_release; | 1244 | goto out_release; |
1246 | 1245 | ||
1247 | out: | 1246 | out: |
1248 | /* It may be already another descriptor 8) Not kernel problem. */ | 1247 | /* It may be already another descriptor 8) Not kernel problem. */ |
1249 | return retval; | 1248 | return retval; |
1250 | 1249 | ||
1251 | out_release: | 1250 | out_release: |
1252 | sock_release(sock); | 1251 | sock_release(sock); |
1253 | return retval; | 1252 | return retval; |
1254 | } | 1253 | } |
1255 | 1254 | ||
1256 | /* | 1255 | /* |
1257 | * Create a pair of connected sockets. | 1256 | * Create a pair of connected sockets. |
1258 | */ | 1257 | */ |
1259 | 1258 | ||
1260 | asmlinkage long sys_socketpair(int family, int type, int protocol, | 1259 | asmlinkage long sys_socketpair(int family, int type, int protocol, |
1261 | int __user *usockvec) | 1260 | int __user *usockvec) |
1262 | { | 1261 | { |
1263 | struct socket *sock1, *sock2; | 1262 | struct socket *sock1, *sock2; |
1264 | int fd1, fd2, err; | 1263 | int fd1, fd2, err; |
1265 | struct file *newfile1, *newfile2; | 1264 | struct file *newfile1, *newfile2; |
1266 | int flags; | 1265 | int flags; |
1267 | 1266 | ||
1268 | flags = type & ~SOCK_TYPE_MASK; | 1267 | flags = type & ~SOCK_TYPE_MASK; |
1269 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) | 1268 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) |
1270 | return -EINVAL; | 1269 | return -EINVAL; |
1271 | type &= SOCK_TYPE_MASK; | 1270 | type &= SOCK_TYPE_MASK; |
1272 | 1271 | ||
1273 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) | 1272 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) |
1274 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1273 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1275 | 1274 | ||
1276 | /* | 1275 | /* |
1277 | * Obtain the first socket and check if the underlying protocol | 1276 | * Obtain the first socket and check if the underlying protocol |
1278 | * supports the socketpair call. | 1277 | * supports the socketpair call. |
1279 | */ | 1278 | */ |
1280 | 1279 | ||
1281 | err = sock_create(family, type, protocol, &sock1); | 1280 | err = sock_create(family, type, protocol, &sock1); |
1282 | if (err < 0) | 1281 | if (err < 0) |
1283 | goto out; | 1282 | goto out; |
1284 | 1283 | ||
1285 | err = sock_create(family, type, protocol, &sock2); | 1284 | err = sock_create(family, type, protocol, &sock2); |
1286 | if (err < 0) | 1285 | if (err < 0) |
1287 | goto out_release_1; | 1286 | goto out_release_1; |
1288 | 1287 | ||
1289 | err = sock1->ops->socketpair(sock1, sock2); | 1288 | err = sock1->ops->socketpair(sock1, sock2); |
1290 | if (err < 0) | 1289 | if (err < 0) |
1291 | goto out_release_both; | 1290 | goto out_release_both; |
1292 | 1291 | ||
1293 | fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); | 1292 | fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); |
1294 | if (unlikely(fd1 < 0)) { | 1293 | if (unlikely(fd1 < 0)) { |
1295 | err = fd1; | 1294 | err = fd1; |
1296 | goto out_release_both; | 1295 | goto out_release_both; |
1297 | } | 1296 | } |
1298 | 1297 | ||
1299 | fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); | 1298 | fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); |
1300 | if (unlikely(fd2 < 0)) { | 1299 | if (unlikely(fd2 < 0)) { |
1301 | err = fd2; | 1300 | err = fd2; |
1302 | put_filp(newfile1); | 1301 | put_filp(newfile1); |
1303 | put_unused_fd(fd1); | 1302 | put_unused_fd(fd1); |
1304 | goto out_release_both; | 1303 | goto out_release_both; |
1305 | } | 1304 | } |
1306 | 1305 | ||
1307 | err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); | 1306 | err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); |
1308 | if (unlikely(err < 0)) { | 1307 | if (unlikely(err < 0)) { |
1309 | goto out_fd2; | 1308 | goto out_fd2; |
1310 | } | 1309 | } |
1311 | 1310 | ||
1312 | err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); | 1311 | err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); |
1313 | if (unlikely(err < 0)) { | 1312 | if (unlikely(err < 0)) { |
1314 | fput(newfile1); | 1313 | fput(newfile1); |
1315 | goto out_fd1; | 1314 | goto out_fd1; |
1316 | } | 1315 | } |
1317 | 1316 | ||
1318 | err = audit_fd_pair(fd1, fd2); | 1317 | err = audit_fd_pair(fd1, fd2); |
1319 | if (err < 0) { | 1318 | if (err < 0) { |
1320 | fput(newfile1); | 1319 | fput(newfile1); |
1321 | fput(newfile2); | 1320 | fput(newfile2); |
1322 | goto out_fd; | 1321 | goto out_fd; |
1323 | } | 1322 | } |
1324 | 1323 | ||
1325 | fd_install(fd1, newfile1); | 1324 | fd_install(fd1, newfile1); |
1326 | fd_install(fd2, newfile2); | 1325 | fd_install(fd2, newfile2); |
1327 | /* fd1 and fd2 may be already another descriptors. | 1326 | /* fd1 and fd2 may be already another descriptors. |
1328 | * Not kernel problem. | 1327 | * Not kernel problem. |
1329 | */ | 1328 | */ |
1330 | 1329 | ||
1331 | err = put_user(fd1, &usockvec[0]); | 1330 | err = put_user(fd1, &usockvec[0]); |
1332 | if (!err) | 1331 | if (!err) |
1333 | err = put_user(fd2, &usockvec[1]); | 1332 | err = put_user(fd2, &usockvec[1]); |
1334 | if (!err) | 1333 | if (!err) |
1335 | return 0; | 1334 | return 0; |
1336 | 1335 | ||
1337 | sys_close(fd2); | 1336 | sys_close(fd2); |
1338 | sys_close(fd1); | 1337 | sys_close(fd1); |
1339 | return err; | 1338 | return err; |
1340 | 1339 | ||
1341 | out_release_both: | 1340 | out_release_both: |
1342 | sock_release(sock2); | 1341 | sock_release(sock2); |
1343 | out_release_1: | 1342 | out_release_1: |
1344 | sock_release(sock1); | 1343 | sock_release(sock1); |
1345 | out: | 1344 | out: |
1346 | return err; | 1345 | return err; |
1347 | 1346 | ||
1348 | out_fd2: | 1347 | out_fd2: |
1349 | put_filp(newfile1); | 1348 | put_filp(newfile1); |
1350 | sock_release(sock1); | 1349 | sock_release(sock1); |
1351 | out_fd1: | 1350 | out_fd1: |
1352 | put_filp(newfile2); | 1351 | put_filp(newfile2); |
1353 | sock_release(sock2); | 1352 | sock_release(sock2); |
1354 | out_fd: | 1353 | out_fd: |
1355 | put_unused_fd(fd1); | 1354 | put_unused_fd(fd1); |
1356 | put_unused_fd(fd2); | 1355 | put_unused_fd(fd2); |
1357 | goto out; | 1356 | goto out; |
1358 | } | 1357 | } |
1359 | 1358 | ||
1360 | /* | 1359 | /* |
1361 | * Bind a name to a socket. Nothing much to do here since it's | 1360 | * Bind a name to a socket. Nothing much to do here since it's |
1362 | * the protocol's responsibility to handle the local address. | 1361 | * the protocol's responsibility to handle the local address. |
1363 | * | 1362 | * |
1364 | * We move the socket address to kernel space before we call | 1363 | * We move the socket address to kernel space before we call |
1365 | * the protocol layer (having also checked the address is ok). | 1364 | * the protocol layer (having also checked the address is ok). |
1366 | */ | 1365 | */ |
1367 | 1366 | ||
1368 | asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) | 1367 | asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) |
1369 | { | 1368 | { |
1370 | struct socket *sock; | 1369 | struct socket *sock; |
1371 | struct sockaddr_storage address; | 1370 | struct sockaddr_storage address; |
1372 | int err, fput_needed; | 1371 | int err, fput_needed; |
1373 | 1372 | ||
1374 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1373 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1375 | if (sock) { | 1374 | if (sock) { |
1376 | err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); | 1375 | err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); |
1377 | if (err >= 0) { | 1376 | if (err >= 0) { |
1378 | err = security_socket_bind(sock, | 1377 | err = security_socket_bind(sock, |
1379 | (struct sockaddr *)&address, | 1378 | (struct sockaddr *)&address, |
1380 | addrlen); | 1379 | addrlen); |
1381 | if (!err) | 1380 | if (!err) |
1382 | err = sock->ops->bind(sock, | 1381 | err = sock->ops->bind(sock, |
1383 | (struct sockaddr *) | 1382 | (struct sockaddr *) |
1384 | &address, addrlen); | 1383 | &address, addrlen); |
1385 | } | 1384 | } |
1386 | fput_light(sock->file, fput_needed); | 1385 | fput_light(sock->file, fput_needed); |
1387 | } | 1386 | } |
1388 | return err; | 1387 | return err; |
1389 | } | 1388 | } |
1390 | 1389 | ||
1391 | /* | 1390 | /* |
1392 | * Perform a listen. Basically, we allow the protocol to do anything | 1391 | * Perform a listen. Basically, we allow the protocol to do anything |
1393 | * necessary for a listen, and if that works, we mark the socket as | 1392 | * necessary for a listen, and if that works, we mark the socket as |
1394 | * ready for listening. | 1393 | * ready for listening. |
1395 | */ | 1394 | */ |
1396 | 1395 | ||
1397 | asmlinkage long sys_listen(int fd, int backlog) | 1396 | asmlinkage long sys_listen(int fd, int backlog) |
1398 | { | 1397 | { |
1399 | struct socket *sock; | 1398 | struct socket *sock; |
1400 | int err, fput_needed; | 1399 | int err, fput_needed; |
1401 | int somaxconn; | 1400 | int somaxconn; |
1402 | 1401 | ||
1403 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1402 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1404 | if (sock) { | 1403 | if (sock) { |
1405 | somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; | 1404 | somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; |
1406 | if ((unsigned)backlog > somaxconn) | 1405 | if ((unsigned)backlog > somaxconn) |
1407 | backlog = somaxconn; | 1406 | backlog = somaxconn; |
1408 | 1407 | ||
1409 | err = security_socket_listen(sock, backlog); | 1408 | err = security_socket_listen(sock, backlog); |
1410 | if (!err) | 1409 | if (!err) |
1411 | err = sock->ops->listen(sock, backlog); | 1410 | err = sock->ops->listen(sock, backlog); |
1412 | 1411 | ||
1413 | fput_light(sock->file, fput_needed); | 1412 | fput_light(sock->file, fput_needed); |
1414 | } | 1413 | } |
1415 | return err; | 1414 | return err; |
1416 | } | 1415 | } |
1417 | 1416 | ||
1418 | /* | 1417 | /* |
1419 | * For accept, we attempt to create a new socket, set up the link | 1418 | * For accept, we attempt to create a new socket, set up the link |
1420 | * with the client, wake up the client, then return the new | 1419 | * with the client, wake up the client, then return the new |
1421 | * connected fd. We collect the address of the connector in kernel | 1420 | * connected fd. We collect the address of the connector in kernel |
1422 | * space and move it to user at the very end. This is unclean because | 1421 | * space and move it to user at the very end. This is unclean because |
1423 | * we open the socket then return an error. | 1422 | * we open the socket then return an error. |
1424 | * | 1423 | * |
1425 | * 1003.1g adds the ability to recvmsg() to query connection pending | 1424 | * 1003.1g adds the ability to recvmsg() to query connection pending |
1426 | * status to recvmsg. We need to add that support in a way thats | 1425 | * status to recvmsg. We need to add that support in a way thats |
1427 | * clean when we restucture accept also. | 1426 | * clean when we restucture accept also. |
1428 | */ | 1427 | */ |
1429 | 1428 | ||
1430 | long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, | 1429 | long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, |
1431 | int __user *upeer_addrlen, int flags) | 1430 | int __user *upeer_addrlen, int flags) |
1432 | { | 1431 | { |
1433 | struct socket *sock, *newsock; | 1432 | struct socket *sock, *newsock; |
1434 | struct file *newfile; | 1433 | struct file *newfile; |
1435 | int err, len, newfd, fput_needed; | 1434 | int err, len, newfd, fput_needed; |
1436 | struct sockaddr_storage address; | 1435 | struct sockaddr_storage address; |
1437 | 1436 | ||
1438 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) | 1437 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) |
1439 | return -EINVAL; | 1438 | return -EINVAL; |
1440 | 1439 | ||
1441 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) | 1440 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) |
1442 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1441 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1443 | 1442 | ||
1444 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1443 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1445 | if (!sock) | 1444 | if (!sock) |
1446 | goto out; | 1445 | goto out; |
1447 | 1446 | ||
1448 | err = -ENFILE; | 1447 | err = -ENFILE; |
1449 | if (!(newsock = sock_alloc())) | 1448 | if (!(newsock = sock_alloc())) |
1450 | goto out_put; | 1449 | goto out_put; |
1451 | 1450 | ||
1452 | newsock->type = sock->type; | 1451 | newsock->type = sock->type; |
1453 | newsock->ops = sock->ops; | 1452 | newsock->ops = sock->ops; |
1454 | 1453 | ||
1455 | /* | 1454 | /* |
1456 | * We don't need try_module_get here, as the listening socket (sock) | 1455 | * We don't need try_module_get here, as the listening socket (sock) |
1457 | * has the protocol module (sock->ops->owner) held. | 1456 | * has the protocol module (sock->ops->owner) held. |
1458 | */ | 1457 | */ |
1459 | __module_get(newsock->ops->owner); | 1458 | __module_get(newsock->ops->owner); |
1460 | 1459 | ||
1461 | newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); | 1460 | newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); |
1462 | if (unlikely(newfd < 0)) { | 1461 | if (unlikely(newfd < 0)) { |
1463 | err = newfd; | 1462 | err = newfd; |
1464 | sock_release(newsock); | 1463 | sock_release(newsock); |
1465 | goto out_put; | 1464 | goto out_put; |
1466 | } | 1465 | } |
1467 | 1466 | ||
1468 | err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); | 1467 | err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); |
1469 | if (err < 0) | 1468 | if (err < 0) |
1470 | goto out_fd_simple; | 1469 | goto out_fd_simple; |
1471 | 1470 | ||
1472 | err = security_socket_accept(sock, newsock); | 1471 | err = security_socket_accept(sock, newsock); |
1473 | if (err) | 1472 | if (err) |
1474 | goto out_fd; | 1473 | goto out_fd; |
1475 | 1474 | ||
1476 | err = sock->ops->accept(sock, newsock, sock->file->f_flags); | 1475 | err = sock->ops->accept(sock, newsock, sock->file->f_flags); |
1477 | if (err < 0) | 1476 | if (err < 0) |
1478 | goto out_fd; | 1477 | goto out_fd; |
1479 | 1478 | ||
1480 | if (upeer_sockaddr) { | 1479 | if (upeer_sockaddr) { |
1481 | if (newsock->ops->getname(newsock, (struct sockaddr *)&address, | 1480 | if (newsock->ops->getname(newsock, (struct sockaddr *)&address, |
1482 | &len, 2) < 0) { | 1481 | &len, 2) < 0) { |
1483 | err = -ECONNABORTED; | 1482 | err = -ECONNABORTED; |
1484 | goto out_fd; | 1483 | goto out_fd; |
1485 | } | 1484 | } |
1486 | err = move_addr_to_user((struct sockaddr *)&address, | 1485 | err = move_addr_to_user((struct sockaddr *)&address, |
1487 | len, upeer_sockaddr, upeer_addrlen); | 1486 | len, upeer_sockaddr, upeer_addrlen); |
1488 | if (err < 0) | 1487 | if (err < 0) |
1489 | goto out_fd; | 1488 | goto out_fd; |
1490 | } | 1489 | } |
1491 | 1490 | ||
1492 | /* File flags are not inherited via accept() unlike another OSes. */ | 1491 | /* File flags are not inherited via accept() unlike another OSes. */ |
1493 | 1492 | ||
1494 | fd_install(newfd, newfile); | 1493 | fd_install(newfd, newfile); |
1495 | err = newfd; | 1494 | err = newfd; |
1496 | 1495 | ||
1497 | security_socket_post_accept(sock, newsock); | 1496 | security_socket_post_accept(sock, newsock); |
1498 | 1497 | ||
1499 | out_put: | 1498 | out_put: |
1500 | fput_light(sock->file, fput_needed); | 1499 | fput_light(sock->file, fput_needed); |
1501 | out: | 1500 | out: |
1502 | return err; | 1501 | return err; |
1503 | out_fd_simple: | 1502 | out_fd_simple: |
1504 | sock_release(newsock); | 1503 | sock_release(newsock); |
1505 | put_filp(newfile); | 1504 | put_filp(newfile); |
1506 | put_unused_fd(newfd); | 1505 | put_unused_fd(newfd); |
1507 | goto out_put; | 1506 | goto out_put; |
1508 | out_fd: | 1507 | out_fd: |
1509 | fput(newfile); | 1508 | fput(newfile); |
1510 | put_unused_fd(newfd); | 1509 | put_unused_fd(newfd); |
1511 | goto out_put; | 1510 | goto out_put; |
1512 | } | 1511 | } |
1513 | 1512 | ||
1514 | #if 0 | 1513 | #if 0 |
1515 | #ifdef HAVE_SET_RESTORE_SIGMASK | 1514 | #ifdef HAVE_SET_RESTORE_SIGMASK |
1516 | asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, | 1515 | asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, |
1517 | int __user *upeer_addrlen, | 1516 | int __user *upeer_addrlen, |
1518 | const sigset_t __user *sigmask, | 1517 | const sigset_t __user *sigmask, |
1519 | size_t sigsetsize, int flags) | 1518 | size_t sigsetsize, int flags) |
1520 | { | 1519 | { |
1521 | sigset_t ksigmask, sigsaved; | 1520 | sigset_t ksigmask, sigsaved; |
1522 | int ret; | 1521 | int ret; |
1523 | 1522 | ||
1524 | if (sigmask) { | 1523 | if (sigmask) { |
1525 | /* XXX: Don't preclude handling different sized sigset_t's. */ | 1524 | /* XXX: Don't preclude handling different sized sigset_t's. */ |
1526 | if (sigsetsize != sizeof(sigset_t)) | 1525 | if (sigsetsize != sizeof(sigset_t)) |
1527 | return -EINVAL; | 1526 | return -EINVAL; |
1528 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | 1527 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) |
1529 | return -EFAULT; | 1528 | return -EFAULT; |
1530 | 1529 | ||
1531 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 1530 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
1532 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1531 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
1533 | } | 1532 | } |
1534 | 1533 | ||
1535 | ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); | 1534 | ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); |
1536 | 1535 | ||
1537 | if (ret < 0 && signal_pending(current)) { | 1536 | if (ret < 0 && signal_pending(current)) { |
1538 | /* | 1537 | /* |
1539 | * Don't restore the signal mask yet. Let do_signal() deliver | 1538 | * Don't restore the signal mask yet. Let do_signal() deliver |
1540 | * the signal on the way back to userspace, before the signal | 1539 | * the signal on the way back to userspace, before the signal |
1541 | * mask is restored. | 1540 | * mask is restored. |
1542 | */ | 1541 | */ |
1543 | if (sigmask) { | 1542 | if (sigmask) { |
1544 | memcpy(¤t->saved_sigmask, &sigsaved, | 1543 | memcpy(¤t->saved_sigmask, &sigsaved, |
1545 | sizeof(sigsaved)); | 1544 | sizeof(sigsaved)); |
1546 | set_restore_sigmask(); | 1545 | set_restore_sigmask(); |
1547 | } | 1546 | } |
1548 | } else if (sigmask) | 1547 | } else if (sigmask) |
1549 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1548 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
1550 | 1549 | ||
1551 | return ret; | 1550 | return ret; |
1552 | } | 1551 | } |
1553 | #else | 1552 | #else |
1554 | asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, | 1553 | asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, |
1555 | int __user *upeer_addrlen, | 1554 | int __user *upeer_addrlen, |
1556 | const sigset_t __user *sigmask, | 1555 | const sigset_t __user *sigmask, |
1557 | size_t sigsetsize, int flags) | 1556 | size_t sigsetsize, int flags) |
1558 | { | 1557 | { |
1559 | /* The platform does not support restoring the signal mask in the | 1558 | /* The platform does not support restoring the signal mask in the |
1560 | * return path. So we do not allow using paccept() with a signal | 1559 | * return path. So we do not allow using paccept() with a signal |
1561 | * mask. */ | 1560 | * mask. */ |
1562 | if (sigmask) | 1561 | if (sigmask) |
1563 | return -EINVAL; | 1562 | return -EINVAL; |
1564 | 1563 | ||
1565 | return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); | 1564 | return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); |
1566 | } | 1565 | } |
1567 | #endif | 1566 | #endif |
1568 | #endif | 1567 | #endif |
1569 | 1568 | ||
1570 | asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, | 1569 | asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, |
1571 | int __user *upeer_addrlen) | 1570 | int __user *upeer_addrlen) |
1572 | { | 1571 | { |
1573 | return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); | 1572 | return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); |
1574 | } | 1573 | } |
1575 | 1574 | ||
1576 | /* | 1575 | /* |
1577 | * Attempt to connect to a socket with the server address. The address | 1576 | * Attempt to connect to a socket with the server address. The address |
1578 | * is in user space so we verify it is OK and move it to kernel space. | 1577 | * is in user space so we verify it is OK and move it to kernel space. |
1579 | * | 1578 | * |
1580 | * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to | 1579 | * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to |
1581 | * break bindings | 1580 | * break bindings |
1582 | * | 1581 | * |
1583 | * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and | 1582 | * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and |
1584 | * other SEQPACKET protocols that take time to connect() as it doesn't | 1583 | * other SEQPACKET protocols that take time to connect() as it doesn't |
1585 | * include the -EINPROGRESS status for such sockets. | 1584 | * include the -EINPROGRESS status for such sockets. |
1586 | */ | 1585 | */ |
1587 | 1586 | ||
1588 | asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, | 1587 | asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, |
1589 | int addrlen) | 1588 | int addrlen) |
1590 | { | 1589 | { |
1591 | struct socket *sock; | 1590 | struct socket *sock; |
1592 | struct sockaddr_storage address; | 1591 | struct sockaddr_storage address; |
1593 | int err, fput_needed; | 1592 | int err, fput_needed; |
1594 | 1593 | ||
1595 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1594 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1596 | if (!sock) | 1595 | if (!sock) |
1597 | goto out; | 1596 | goto out; |
1598 | err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); | 1597 | err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); |
1599 | if (err < 0) | 1598 | if (err < 0) |
1600 | goto out_put; | 1599 | goto out_put; |
1601 | 1600 | ||
1602 | err = | 1601 | err = |
1603 | security_socket_connect(sock, (struct sockaddr *)&address, addrlen); | 1602 | security_socket_connect(sock, (struct sockaddr *)&address, addrlen); |
1604 | if (err) | 1603 | if (err) |
1605 | goto out_put; | 1604 | goto out_put; |
1606 | 1605 | ||
1607 | err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, | 1606 | err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, |
1608 | sock->file->f_flags); | 1607 | sock->file->f_flags); |
1609 | out_put: | 1608 | out_put: |
1610 | fput_light(sock->file, fput_needed); | 1609 | fput_light(sock->file, fput_needed); |
1611 | out: | 1610 | out: |
1612 | return err; | 1611 | return err; |
1613 | } | 1612 | } |
1614 | 1613 | ||
1615 | /* | 1614 | /* |
1616 | * Get the local address ('name') of a socket object. Move the obtained | 1615 | * Get the local address ('name') of a socket object. Move the obtained |
1617 | * name to user space. | 1616 | * name to user space. |
1618 | */ | 1617 | */ |
1619 | 1618 | ||
1620 | asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, | 1619 | asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, |
1621 | int __user *usockaddr_len) | 1620 | int __user *usockaddr_len) |
1622 | { | 1621 | { |
1623 | struct socket *sock; | 1622 | struct socket *sock; |
1624 | struct sockaddr_storage address; | 1623 | struct sockaddr_storage address; |
1625 | int len, err, fput_needed; | 1624 | int len, err, fput_needed; |
1626 | 1625 | ||
1627 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1626 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1628 | if (!sock) | 1627 | if (!sock) |
1629 | goto out; | 1628 | goto out; |
1630 | 1629 | ||
1631 | err = security_socket_getsockname(sock); | 1630 | err = security_socket_getsockname(sock); |
1632 | if (err) | 1631 | if (err) |
1633 | goto out_put; | 1632 | goto out_put; |
1634 | 1633 | ||
1635 | err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); | 1634 | err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); |
1636 | if (err) | 1635 | if (err) |
1637 | goto out_put; | 1636 | goto out_put; |
1638 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); | 1637 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); |
1639 | 1638 | ||
1640 | out_put: | 1639 | out_put: |
1641 | fput_light(sock->file, fput_needed); | 1640 | fput_light(sock->file, fput_needed); |
1642 | out: | 1641 | out: |
1643 | return err; | 1642 | return err; |
1644 | } | 1643 | } |
1645 | 1644 | ||
1646 | /* | 1645 | /* |
1647 | * Get the remote address ('name') of a socket object. Move the obtained | 1646 | * Get the remote address ('name') of a socket object. Move the obtained |
1648 | * name to user space. | 1647 | * name to user space. |
1649 | */ | 1648 | */ |
1650 | 1649 | ||
1651 | asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, | 1650 | asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, |
1652 | int __user *usockaddr_len) | 1651 | int __user *usockaddr_len) |
1653 | { | 1652 | { |
1654 | struct socket *sock; | 1653 | struct socket *sock; |
1655 | struct sockaddr_storage address; | 1654 | struct sockaddr_storage address; |
1656 | int len, err, fput_needed; | 1655 | int len, err, fput_needed; |
1657 | 1656 | ||
1658 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1657 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1659 | if (sock != NULL) { | 1658 | if (sock != NULL) { |
1660 | err = security_socket_getpeername(sock); | 1659 | err = security_socket_getpeername(sock); |
1661 | if (err) { | 1660 | if (err) { |
1662 | fput_light(sock->file, fput_needed); | 1661 | fput_light(sock->file, fput_needed); |
1663 | return err; | 1662 | return err; |
1664 | } | 1663 | } |
1665 | 1664 | ||
1666 | err = | 1665 | err = |
1667 | sock->ops->getname(sock, (struct sockaddr *)&address, &len, | 1666 | sock->ops->getname(sock, (struct sockaddr *)&address, &len, |
1668 | 1); | 1667 | 1); |
1669 | if (!err) | 1668 | if (!err) |
1670 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, | 1669 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, |
1671 | usockaddr_len); | 1670 | usockaddr_len); |
1672 | fput_light(sock->file, fput_needed); | 1671 | fput_light(sock->file, fput_needed); |
1673 | } | 1672 | } |
1674 | return err; | 1673 | return err; |
1675 | } | 1674 | } |
1676 | 1675 | ||
1677 | /* | 1676 | /* |
1678 | * Send a datagram to a given address. We move the address into kernel | 1677 | * Send a datagram to a given address. We move the address into kernel |
1679 | * space and check the user space data area is readable before invoking | 1678 | * space and check the user space data area is readable before invoking |
1680 | * the protocol. | 1679 | * the protocol. |
1681 | */ | 1680 | */ |
1682 | 1681 | ||
1683 | asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, | 1682 | asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, |
1684 | unsigned flags, struct sockaddr __user *addr, | 1683 | unsigned flags, struct sockaddr __user *addr, |
1685 | int addr_len) | 1684 | int addr_len) |
1686 | { | 1685 | { |
1687 | struct socket *sock; | 1686 | struct socket *sock; |
1688 | struct sockaddr_storage address; | 1687 | struct sockaddr_storage address; |
1689 | int err; | 1688 | int err; |
1690 | struct msghdr msg; | 1689 | struct msghdr msg; |
1691 | struct iovec iov; | 1690 | struct iovec iov; |
1692 | int fput_needed; | 1691 | int fput_needed; |
1693 | 1692 | ||
1694 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1693 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1695 | if (!sock) | 1694 | if (!sock) |
1696 | goto out; | 1695 | goto out; |
1697 | 1696 | ||
1698 | iov.iov_base = buff; | 1697 | iov.iov_base = buff; |
1699 | iov.iov_len = len; | 1698 | iov.iov_len = len; |
1700 | msg.msg_name = NULL; | 1699 | msg.msg_name = NULL; |
1701 | msg.msg_iov = &iov; | 1700 | msg.msg_iov = &iov; |
1702 | msg.msg_iovlen = 1; | 1701 | msg.msg_iovlen = 1; |
1703 | msg.msg_control = NULL; | 1702 | msg.msg_control = NULL; |
1704 | msg.msg_controllen = 0; | 1703 | msg.msg_controllen = 0; |
1705 | msg.msg_namelen = 0; | 1704 | msg.msg_namelen = 0; |
1706 | if (addr) { | 1705 | if (addr) { |
1707 | err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); | 1706 | err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); |
1708 | if (err < 0) | 1707 | if (err < 0) |
1709 | goto out_put; | 1708 | goto out_put; |
1710 | msg.msg_name = (struct sockaddr *)&address; | 1709 | msg.msg_name = (struct sockaddr *)&address; |
1711 | msg.msg_namelen = addr_len; | 1710 | msg.msg_namelen = addr_len; |
1712 | } | 1711 | } |
1713 | if (sock->file->f_flags & O_NONBLOCK) | 1712 | if (sock->file->f_flags & O_NONBLOCK) |
1714 | flags |= MSG_DONTWAIT; | 1713 | flags |= MSG_DONTWAIT; |
1715 | msg.msg_flags = flags; | 1714 | msg.msg_flags = flags; |
1716 | err = sock_sendmsg(sock, &msg, len); | 1715 | err = sock_sendmsg(sock, &msg, len); |
1717 | 1716 | ||
1718 | out_put: | 1717 | out_put: |
1719 | fput_light(sock->file, fput_needed); | 1718 | fput_light(sock->file, fput_needed); |
1720 | out: | 1719 | out: |
1721 | return err; | 1720 | return err; |
1722 | } | 1721 | } |
1723 | 1722 | ||
1724 | /* | 1723 | /* |
1725 | * Send a datagram down a socket. | 1724 | * Send a datagram down a socket. |
1726 | */ | 1725 | */ |
1727 | 1726 | ||
1728 | asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) | 1727 | asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) |
1729 | { | 1728 | { |
1730 | return sys_sendto(fd, buff, len, flags, NULL, 0); | 1729 | return sys_sendto(fd, buff, len, flags, NULL, 0); |
1731 | } | 1730 | } |
1732 | 1731 | ||
1733 | /* | 1732 | /* |
1734 | * Receive a frame from the socket and optionally record the address of the | 1733 | * Receive a frame from the socket and optionally record the address of the |
1735 | * sender. We verify the buffers are writable and if needed move the | 1734 | * sender. We verify the buffers are writable and if needed move the |
1736 | * sender address from kernel to user space. | 1735 | * sender address from kernel to user space. |
1737 | */ | 1736 | */ |
1738 | 1737 | ||
1739 | asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, | 1738 | asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, |
1740 | unsigned flags, struct sockaddr __user *addr, | 1739 | unsigned flags, struct sockaddr __user *addr, |
1741 | int __user *addr_len) | 1740 | int __user *addr_len) |
1742 | { | 1741 | { |
1743 | struct socket *sock; | 1742 | struct socket *sock; |
1744 | struct iovec iov; | 1743 | struct iovec iov; |
1745 | struct msghdr msg; | 1744 | struct msghdr msg; |
1746 | struct sockaddr_storage address; | 1745 | struct sockaddr_storage address; |
1747 | int err, err2; | 1746 | int err, err2; |
1748 | int fput_needed; | 1747 | int fput_needed; |
1749 | 1748 | ||
1750 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1749 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1751 | if (!sock) | 1750 | if (!sock) |
1752 | goto out; | 1751 | goto out; |
1753 | 1752 | ||
1754 | msg.msg_control = NULL; | 1753 | msg.msg_control = NULL; |
1755 | msg.msg_controllen = 0; | 1754 | msg.msg_controllen = 0; |
1756 | msg.msg_iovlen = 1; | 1755 | msg.msg_iovlen = 1; |
1757 | msg.msg_iov = &iov; | 1756 | msg.msg_iov = &iov; |
1758 | iov.iov_len = size; | 1757 | iov.iov_len = size; |
1759 | iov.iov_base = ubuf; | 1758 | iov.iov_base = ubuf; |
1760 | msg.msg_name = (struct sockaddr *)&address; | 1759 | msg.msg_name = (struct sockaddr *)&address; |
1761 | msg.msg_namelen = sizeof(address); | 1760 | msg.msg_namelen = sizeof(address); |
1762 | if (sock->file->f_flags & O_NONBLOCK) | 1761 | if (sock->file->f_flags & O_NONBLOCK) |
1763 | flags |= MSG_DONTWAIT; | 1762 | flags |= MSG_DONTWAIT; |
1764 | err = sock_recvmsg(sock, &msg, size, flags); | 1763 | err = sock_recvmsg(sock, &msg, size, flags); |
1765 | 1764 | ||
1766 | if (err >= 0 && addr != NULL) { | 1765 | if (err >= 0 && addr != NULL) { |
1767 | err2 = move_addr_to_user((struct sockaddr *)&address, | 1766 | err2 = move_addr_to_user((struct sockaddr *)&address, |
1768 | msg.msg_namelen, addr, addr_len); | 1767 | msg.msg_namelen, addr, addr_len); |
1769 | if (err2 < 0) | 1768 | if (err2 < 0) |
1770 | err = err2; | 1769 | err = err2; |
1771 | } | 1770 | } |
1772 | 1771 | ||
1773 | fput_light(sock->file, fput_needed); | 1772 | fput_light(sock->file, fput_needed); |
1774 | out: | 1773 | out: |
1775 | return err; | 1774 | return err; |
1776 | } | 1775 | } |
1777 | 1776 | ||
1778 | /* | 1777 | /* |
1779 | * Receive a datagram from a socket. | 1778 | * Receive a datagram from a socket. |
1780 | */ | 1779 | */ |
1781 | 1780 | ||
1782 | asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, | 1781 | asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, |
1783 | unsigned flags) | 1782 | unsigned flags) |
1784 | { | 1783 | { |
1785 | return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); | 1784 | return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); |
1786 | } | 1785 | } |
1787 | 1786 | ||
1788 | /* | 1787 | /* |
1789 | * Set a socket option. Because we don't know the option lengths we have | 1788 | * Set a socket option. Because we don't know the option lengths we have |
1790 | * to pass the user mode parameter for the protocols to sort out. | 1789 | * to pass the user mode parameter for the protocols to sort out. |
1791 | */ | 1790 | */ |
1792 | 1791 | ||
1793 | asmlinkage long sys_setsockopt(int fd, int level, int optname, | 1792 | asmlinkage long sys_setsockopt(int fd, int level, int optname, |
1794 | char __user *optval, int optlen) | 1793 | char __user *optval, int optlen) |
1795 | { | 1794 | { |
1796 | int err, fput_needed; | 1795 | int err, fput_needed; |
1797 | struct socket *sock; | 1796 | struct socket *sock; |
1798 | 1797 | ||
1799 | if (optlen < 0) | 1798 | if (optlen < 0) |
1800 | return -EINVAL; | 1799 | return -EINVAL; |
1801 | 1800 | ||
1802 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1801 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1803 | if (sock != NULL) { | 1802 | if (sock != NULL) { |
1804 | err = security_socket_setsockopt(sock, level, optname); | 1803 | err = security_socket_setsockopt(sock, level, optname); |
1805 | if (err) | 1804 | if (err) |
1806 | goto out_put; | 1805 | goto out_put; |
1807 | 1806 | ||
1808 | if (level == SOL_SOCKET) | 1807 | if (level == SOL_SOCKET) |
1809 | err = | 1808 | err = |
1810 | sock_setsockopt(sock, level, optname, optval, | 1809 | sock_setsockopt(sock, level, optname, optval, |
1811 | optlen); | 1810 | optlen); |
1812 | else | 1811 | else |
1813 | err = | 1812 | err = |
1814 | sock->ops->setsockopt(sock, level, optname, optval, | 1813 | sock->ops->setsockopt(sock, level, optname, optval, |
1815 | optlen); | 1814 | optlen); |
1816 | out_put: | 1815 | out_put: |
1817 | fput_light(sock->file, fput_needed); | 1816 | fput_light(sock->file, fput_needed); |
1818 | } | 1817 | } |
1819 | return err; | 1818 | return err; |
1820 | } | 1819 | } |
1821 | 1820 | ||
1822 | /* | 1821 | /* |
1823 | * Get a socket option. Because we don't know the option lengths we have | 1822 | * Get a socket option. Because we don't know the option lengths we have |
1824 | * to pass a user mode parameter for the protocols to sort out. | 1823 | * to pass a user mode parameter for the protocols to sort out. |
1825 | */ | 1824 | */ |
1826 | 1825 | ||
1827 | asmlinkage long sys_getsockopt(int fd, int level, int optname, | 1826 | asmlinkage long sys_getsockopt(int fd, int level, int optname, |
1828 | char __user *optval, int __user *optlen) | 1827 | char __user *optval, int __user *optlen) |
1829 | { | 1828 | { |
1830 | int err, fput_needed; | 1829 | int err, fput_needed; |
1831 | struct socket *sock; | 1830 | struct socket *sock; |
1832 | 1831 | ||
1833 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1832 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1834 | if (sock != NULL) { | 1833 | if (sock != NULL) { |
1835 | err = security_socket_getsockopt(sock, level, optname); | 1834 | err = security_socket_getsockopt(sock, level, optname); |
1836 | if (err) | 1835 | if (err) |
1837 | goto out_put; | 1836 | goto out_put; |
1838 | 1837 | ||
1839 | if (level == SOL_SOCKET) | 1838 | if (level == SOL_SOCKET) |
1840 | err = | 1839 | err = |
1841 | sock_getsockopt(sock, level, optname, optval, | 1840 | sock_getsockopt(sock, level, optname, optval, |
1842 | optlen); | 1841 | optlen); |
1843 | else | 1842 | else |
1844 | err = | 1843 | err = |
1845 | sock->ops->getsockopt(sock, level, optname, optval, | 1844 | sock->ops->getsockopt(sock, level, optname, optval, |
1846 | optlen); | 1845 | optlen); |
1847 | out_put: | 1846 | out_put: |
1848 | fput_light(sock->file, fput_needed); | 1847 | fput_light(sock->file, fput_needed); |
1849 | } | 1848 | } |
1850 | return err; | 1849 | return err; |
1851 | } | 1850 | } |
1852 | 1851 | ||
1853 | /* | 1852 | /* |
1854 | * Shutdown a socket. | 1853 | * Shutdown a socket. |
1855 | */ | 1854 | */ |
1856 | 1855 | ||
1857 | asmlinkage long sys_shutdown(int fd, int how) | 1856 | asmlinkage long sys_shutdown(int fd, int how) |
1858 | { | 1857 | { |
1859 | int err, fput_needed; | 1858 | int err, fput_needed; |
1860 | struct socket *sock; | 1859 | struct socket *sock; |
1861 | 1860 | ||
1862 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1861 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1863 | if (sock != NULL) { | 1862 | if (sock != NULL) { |
1864 | err = security_socket_shutdown(sock, how); | 1863 | err = security_socket_shutdown(sock, how); |
1865 | if (!err) | 1864 | if (!err) |
1866 | err = sock->ops->shutdown(sock, how); | 1865 | err = sock->ops->shutdown(sock, how); |
1867 | fput_light(sock->file, fput_needed); | 1866 | fput_light(sock->file, fput_needed); |
1868 | } | 1867 | } |
1869 | return err; | 1868 | return err; |
1870 | } | 1869 | } |
1871 | 1870 | ||
1872 | /* A couple of helpful macros for getting the address of the 32/64 bit | 1871 | /* A couple of helpful macros for getting the address of the 32/64 bit |
1873 | * fields which are the same type (int / unsigned) on our platforms. | 1872 | * fields which are the same type (int / unsigned) on our platforms. |
1874 | */ | 1873 | */ |
1875 | #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) | 1874 | #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) |
1876 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) | 1875 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) |
1877 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) | 1876 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) |
1878 | 1877 | ||
1879 | /* | 1878 | /* |
1880 | * BSD sendmsg interface | 1879 | * BSD sendmsg interface |
1881 | */ | 1880 | */ |
1882 | 1881 | ||
1883 | asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | 1882 | asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) |
1884 | { | 1883 | { |
1885 | struct compat_msghdr __user *msg_compat = | 1884 | struct compat_msghdr __user *msg_compat = |
1886 | (struct compat_msghdr __user *)msg; | 1885 | (struct compat_msghdr __user *)msg; |
1887 | struct socket *sock; | 1886 | struct socket *sock; |
1888 | struct sockaddr_storage address; | 1887 | struct sockaddr_storage address; |
1889 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; | 1888 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; |
1890 | unsigned char ctl[sizeof(struct cmsghdr) + 20] | 1889 | unsigned char ctl[sizeof(struct cmsghdr) + 20] |
1891 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); | 1890 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); |
1892 | /* 20 is size of ipv6_pktinfo */ | 1891 | /* 20 is size of ipv6_pktinfo */ |
1893 | unsigned char *ctl_buf = ctl; | 1892 | unsigned char *ctl_buf = ctl; |
1894 | struct msghdr msg_sys; | 1893 | struct msghdr msg_sys; |
1895 | int err, ctl_len, iov_size, total_len; | 1894 | int err, ctl_len, iov_size, total_len; |
1896 | int fput_needed; | 1895 | int fput_needed; |
1897 | 1896 | ||
1898 | err = -EFAULT; | 1897 | err = -EFAULT; |
1899 | if (MSG_CMSG_COMPAT & flags) { | 1898 | if (MSG_CMSG_COMPAT & flags) { |
1900 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 1899 | if (get_compat_msghdr(&msg_sys, msg_compat)) |
1901 | return -EFAULT; | 1900 | return -EFAULT; |
1902 | } | 1901 | } |
1903 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | 1902 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) |
1904 | return -EFAULT; | 1903 | return -EFAULT; |
1905 | 1904 | ||
1906 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1905 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1907 | if (!sock) | 1906 | if (!sock) |
1908 | goto out; | 1907 | goto out; |
1909 | 1908 | ||
1910 | /* do not move before msg_sys is valid */ | 1909 | /* do not move before msg_sys is valid */ |
1911 | err = -EMSGSIZE; | 1910 | err = -EMSGSIZE; |
1912 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 1911 | if (msg_sys.msg_iovlen > UIO_MAXIOV) |
1913 | goto out_put; | 1912 | goto out_put; |
1914 | 1913 | ||
1915 | /* Check whether to allocate the iovec area */ | 1914 | /* Check whether to allocate the iovec area */ |
1916 | err = -ENOMEM; | 1915 | err = -ENOMEM; |
1917 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 1916 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); |
1918 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 1917 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { |
1919 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); | 1918 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
1920 | if (!iov) | 1919 | if (!iov) |
1921 | goto out_put; | 1920 | goto out_put; |
1922 | } | 1921 | } |
1923 | 1922 | ||
1924 | /* This will also move the address data into kernel space */ | 1923 | /* This will also move the address data into kernel space */ |
1925 | if (MSG_CMSG_COMPAT & flags) { | 1924 | if (MSG_CMSG_COMPAT & flags) { |
1926 | err = verify_compat_iovec(&msg_sys, iov, | 1925 | err = verify_compat_iovec(&msg_sys, iov, |
1927 | (struct sockaddr *)&address, | 1926 | (struct sockaddr *)&address, |
1928 | VERIFY_READ); | 1927 | VERIFY_READ); |
1929 | } else | 1928 | } else |
1930 | err = verify_iovec(&msg_sys, iov, | 1929 | err = verify_iovec(&msg_sys, iov, |
1931 | (struct sockaddr *)&address, | 1930 | (struct sockaddr *)&address, |
1932 | VERIFY_READ); | 1931 | VERIFY_READ); |
1933 | if (err < 0) | 1932 | if (err < 0) |
1934 | goto out_freeiov; | 1933 | goto out_freeiov; |
1935 | total_len = err; | 1934 | total_len = err; |
1936 | 1935 | ||
1937 | err = -ENOBUFS; | 1936 | err = -ENOBUFS; |
1938 | 1937 | ||
1939 | if (msg_sys.msg_controllen > INT_MAX) | 1938 | if (msg_sys.msg_controllen > INT_MAX) |
1940 | goto out_freeiov; | 1939 | goto out_freeiov; |
1941 | ctl_len = msg_sys.msg_controllen; | 1940 | ctl_len = msg_sys.msg_controllen; |
1942 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { | 1941 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { |
1943 | err = | 1942 | err = |
1944 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, | 1943 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, |
1945 | sizeof(ctl)); | 1944 | sizeof(ctl)); |
1946 | if (err) | 1945 | if (err) |
1947 | goto out_freeiov; | 1946 | goto out_freeiov; |
1948 | ctl_buf = msg_sys.msg_control; | 1947 | ctl_buf = msg_sys.msg_control; |
1949 | ctl_len = msg_sys.msg_controllen; | 1948 | ctl_len = msg_sys.msg_controllen; |
1950 | } else if (ctl_len) { | 1949 | } else if (ctl_len) { |
1951 | if (ctl_len > sizeof(ctl)) { | 1950 | if (ctl_len > sizeof(ctl)) { |
1952 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); | 1951 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); |
1953 | if (ctl_buf == NULL) | 1952 | if (ctl_buf == NULL) |
1954 | goto out_freeiov; | 1953 | goto out_freeiov; |
1955 | } | 1954 | } |
1956 | err = -EFAULT; | 1955 | err = -EFAULT; |
1957 | /* | 1956 | /* |
1958 | * Careful! Before this, msg_sys.msg_control contains a user pointer. | 1957 | * Careful! Before this, msg_sys.msg_control contains a user pointer. |
1959 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted | 1958 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted |
1960 | * checking falls down on this. | 1959 | * checking falls down on this. |
1961 | */ | 1960 | */ |
1962 | if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, | 1961 | if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, |
1963 | ctl_len)) | 1962 | ctl_len)) |
1964 | goto out_freectl; | 1963 | goto out_freectl; |
1965 | msg_sys.msg_control = ctl_buf; | 1964 | msg_sys.msg_control = ctl_buf; |
1966 | } | 1965 | } |
1967 | msg_sys.msg_flags = flags; | 1966 | msg_sys.msg_flags = flags; |
1968 | 1967 | ||
1969 | if (sock->file->f_flags & O_NONBLOCK) | 1968 | if (sock->file->f_flags & O_NONBLOCK) |
1970 | msg_sys.msg_flags |= MSG_DONTWAIT; | 1969 | msg_sys.msg_flags |= MSG_DONTWAIT; |
1971 | err = sock_sendmsg(sock, &msg_sys, total_len); | 1970 | err = sock_sendmsg(sock, &msg_sys, total_len); |
1972 | 1971 | ||
1973 | out_freectl: | 1972 | out_freectl: |
1974 | if (ctl_buf != ctl) | 1973 | if (ctl_buf != ctl) |
1975 | sock_kfree_s(sock->sk, ctl_buf, ctl_len); | 1974 | sock_kfree_s(sock->sk, ctl_buf, ctl_len); |
1976 | out_freeiov: | 1975 | out_freeiov: |
1977 | if (iov != iovstack) | 1976 | if (iov != iovstack) |
1978 | sock_kfree_s(sock->sk, iov, iov_size); | 1977 | sock_kfree_s(sock->sk, iov, iov_size); |
1979 | out_put: | 1978 | out_put: |
1980 | fput_light(sock->file, fput_needed); | 1979 | fput_light(sock->file, fput_needed); |
1981 | out: | 1980 | out: |
1982 | return err; | 1981 | return err; |
1983 | } | 1982 | } |
1984 | 1983 | ||
1985 | /* | 1984 | /* |
1986 | * BSD recvmsg interface | 1985 | * BSD recvmsg interface |
1987 | */ | 1986 | */ |
1988 | 1987 | ||
1989 | asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, | 1988 | asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, |
1990 | unsigned int flags) | 1989 | unsigned int flags) |
1991 | { | 1990 | { |
1992 | struct compat_msghdr __user *msg_compat = | 1991 | struct compat_msghdr __user *msg_compat = |
1993 | (struct compat_msghdr __user *)msg; | 1992 | (struct compat_msghdr __user *)msg; |
1994 | struct socket *sock; | 1993 | struct socket *sock; |
1995 | struct iovec iovstack[UIO_FASTIOV]; | 1994 | struct iovec iovstack[UIO_FASTIOV]; |
1996 | struct iovec *iov = iovstack; | 1995 | struct iovec *iov = iovstack; |
1997 | struct msghdr msg_sys; | 1996 | struct msghdr msg_sys; |
1998 | unsigned long cmsg_ptr; | 1997 | unsigned long cmsg_ptr; |
1999 | int err, iov_size, total_len, len; | 1998 | int err, iov_size, total_len, len; |
2000 | int fput_needed; | 1999 | int fput_needed; |
2001 | 2000 | ||
2002 | /* kernel mode address */ | 2001 | /* kernel mode address */ |
2003 | struct sockaddr_storage addr; | 2002 | struct sockaddr_storage addr; |
2004 | 2003 | ||
2005 | /* user mode address pointers */ | 2004 | /* user mode address pointers */ |
2006 | struct sockaddr __user *uaddr; | 2005 | struct sockaddr __user *uaddr; |
2007 | int __user *uaddr_len; | 2006 | int __user *uaddr_len; |
2008 | 2007 | ||
2009 | if (MSG_CMSG_COMPAT & flags) { | 2008 | if (MSG_CMSG_COMPAT & flags) { |
2010 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 2009 | if (get_compat_msghdr(&msg_sys, msg_compat)) |
2011 | return -EFAULT; | 2010 | return -EFAULT; |
2012 | } | 2011 | } |
2013 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | 2012 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) |
2014 | return -EFAULT; | 2013 | return -EFAULT; |
2015 | 2014 | ||
2016 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 2015 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
2017 | if (!sock) | 2016 | if (!sock) |
2018 | goto out; | 2017 | goto out; |
2019 | 2018 | ||
2020 | err = -EMSGSIZE; | 2019 | err = -EMSGSIZE; |
2021 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 2020 | if (msg_sys.msg_iovlen > UIO_MAXIOV) |
2022 | goto out_put; | 2021 | goto out_put; |
2023 | 2022 | ||
2024 | /* Check whether to allocate the iovec area */ | 2023 | /* Check whether to allocate the iovec area */ |
2025 | err = -ENOMEM; | 2024 | err = -ENOMEM; |
2026 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 2025 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); |
2027 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 2026 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { |
2028 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); | 2027 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
2029 | if (!iov) | 2028 | if (!iov) |
2030 | goto out_put; | 2029 | goto out_put; |
2031 | } | 2030 | } |
2032 | 2031 | ||
2033 | /* | 2032 | /* |
2034 | * Save the user-mode address (verify_iovec will change the | 2033 | * Save the user-mode address (verify_iovec will change the |
2035 | * kernel msghdr to use the kernel address space) | 2034 | * kernel msghdr to use the kernel address space) |
2036 | */ | 2035 | */ |
2037 | 2036 | ||
2038 | uaddr = (__force void __user *)msg_sys.msg_name; | 2037 | uaddr = (__force void __user *)msg_sys.msg_name; |
2039 | uaddr_len = COMPAT_NAMELEN(msg); | 2038 | uaddr_len = COMPAT_NAMELEN(msg); |
2040 | if (MSG_CMSG_COMPAT & flags) { | 2039 | if (MSG_CMSG_COMPAT & flags) { |
2041 | err = verify_compat_iovec(&msg_sys, iov, | 2040 | err = verify_compat_iovec(&msg_sys, iov, |
2042 | (struct sockaddr *)&addr, | 2041 | (struct sockaddr *)&addr, |
2043 | VERIFY_WRITE); | 2042 | VERIFY_WRITE); |
2044 | } else | 2043 | } else |
2045 | err = verify_iovec(&msg_sys, iov, | 2044 | err = verify_iovec(&msg_sys, iov, |
2046 | (struct sockaddr *)&addr, | 2045 | (struct sockaddr *)&addr, |
2047 | VERIFY_WRITE); | 2046 | VERIFY_WRITE); |
2048 | if (err < 0) | 2047 | if (err < 0) |
2049 | goto out_freeiov; | 2048 | goto out_freeiov; |
2050 | total_len = err; | 2049 | total_len = err; |
2051 | 2050 | ||
2052 | cmsg_ptr = (unsigned long)msg_sys.msg_control; | 2051 | cmsg_ptr = (unsigned long)msg_sys.msg_control; |
2053 | msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); | 2052 | msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); |
2054 | 2053 | ||
2055 | if (sock->file->f_flags & O_NONBLOCK) | 2054 | if (sock->file->f_flags & O_NONBLOCK) |
2056 | flags |= MSG_DONTWAIT; | 2055 | flags |= MSG_DONTWAIT; |
2057 | err = sock_recvmsg(sock, &msg_sys, total_len, flags); | 2056 | err = sock_recvmsg(sock, &msg_sys, total_len, flags); |
2058 | if (err < 0) | 2057 | if (err < 0) |
2059 | goto out_freeiov; | 2058 | goto out_freeiov; |
2060 | len = err; | 2059 | len = err; |
2061 | 2060 | ||
2062 | if (uaddr != NULL) { | 2061 | if (uaddr != NULL) { |
2063 | err = move_addr_to_user((struct sockaddr *)&addr, | 2062 | err = move_addr_to_user((struct sockaddr *)&addr, |
2064 | msg_sys.msg_namelen, uaddr, | 2063 | msg_sys.msg_namelen, uaddr, |
2065 | uaddr_len); | 2064 | uaddr_len); |
2066 | if (err < 0) | 2065 | if (err < 0) |
2067 | goto out_freeiov; | 2066 | goto out_freeiov; |
2068 | } | 2067 | } |
2069 | err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), | 2068 | err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), |
2070 | COMPAT_FLAGS(msg)); | 2069 | COMPAT_FLAGS(msg)); |
2071 | if (err) | 2070 | if (err) |
2072 | goto out_freeiov; | 2071 | goto out_freeiov; |
2073 | if (MSG_CMSG_COMPAT & flags) | 2072 | if (MSG_CMSG_COMPAT & flags) |
2074 | err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, | 2073 | err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, |
2075 | &msg_compat->msg_controllen); | 2074 | &msg_compat->msg_controllen); |
2076 | else | 2075 | else |
2077 | err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, | 2076 | err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, |
2078 | &msg->msg_controllen); | 2077 | &msg->msg_controllen); |
2079 | if (err) | 2078 | if (err) |
2080 | goto out_freeiov; | 2079 | goto out_freeiov; |
2081 | err = len; | 2080 | err = len; |
2082 | 2081 | ||
2083 | out_freeiov: | 2082 | out_freeiov: |
2084 | if (iov != iovstack) | 2083 | if (iov != iovstack) |
2085 | sock_kfree_s(sock->sk, iov, iov_size); | 2084 | sock_kfree_s(sock->sk, iov, iov_size); |
2086 | out_put: | 2085 | out_put: |
2087 | fput_light(sock->file, fput_needed); | 2086 | fput_light(sock->file, fput_needed); |
2088 | out: | 2087 | out: |
2089 | return err; | 2088 | return err; |
2090 | } | 2089 | } |
2091 | 2090 | ||
2092 | #ifdef __ARCH_WANT_SYS_SOCKETCALL | 2091 | #ifdef __ARCH_WANT_SYS_SOCKETCALL |
2093 | 2092 | ||
2094 | /* Argument list sizes for sys_socketcall */ | 2093 | /* Argument list sizes for sys_socketcall */ |
2095 | #define AL(x) ((x) * sizeof(unsigned long)) | 2094 | #define AL(x) ((x) * sizeof(unsigned long)) |
2096 | static const unsigned char nargs[19]={ | 2095 | static const unsigned char nargs[19]={ |
2097 | AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), | 2096 | AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), |
2098 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), | 2097 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), |
2099 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), | 2098 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), |
2100 | AL(6) | 2099 | AL(6) |
2101 | }; | 2100 | }; |
2102 | 2101 | ||
2103 | #undef AL | 2102 | #undef AL |
2104 | 2103 | ||
2105 | /* | 2104 | /* |
2106 | * System call vectors. | 2105 | * System call vectors. |
2107 | * | 2106 | * |
2108 | * Argument checking cleaned up. Saved 20% in size. | 2107 | * Argument checking cleaned up. Saved 20% in size. |
2109 | * This function doesn't need to set the kernel lock because | 2108 | * This function doesn't need to set the kernel lock because |
2110 | * it is set by the callees. | 2109 | * it is set by the callees. |
2111 | */ | 2110 | */ |
2112 | 2111 | ||
2113 | asmlinkage long sys_socketcall(int call, unsigned long __user *args) | 2112 | asmlinkage long sys_socketcall(int call, unsigned long __user *args) |
2114 | { | 2113 | { |
2115 | unsigned long a[6]; | 2114 | unsigned long a[6]; |
2116 | unsigned long a0, a1; | 2115 | unsigned long a0, a1; |
2117 | int err; | 2116 | int err; |
2118 | 2117 | ||
2119 | if (call < 1 || call > SYS_PACCEPT) | 2118 | if (call < 1 || call > SYS_PACCEPT) |
2120 | return -EINVAL; | 2119 | return -EINVAL; |
2121 | 2120 | ||
2122 | /* copy_from_user should be SMP safe. */ | 2121 | /* copy_from_user should be SMP safe. */ |
2123 | if (copy_from_user(a, args, nargs[call])) | 2122 | if (copy_from_user(a, args, nargs[call])) |
2124 | return -EFAULT; | 2123 | return -EFAULT; |
2125 | 2124 | ||
2126 | err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); | 2125 | err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); |
2127 | if (err) | 2126 | if (err) |
2128 | return err; | 2127 | return err; |
2129 | 2128 | ||
2130 | a0 = a[0]; | 2129 | a0 = a[0]; |
2131 | a1 = a[1]; | 2130 | a1 = a[1]; |
2132 | 2131 | ||
2133 | switch (call) { | 2132 | switch (call) { |
2134 | case SYS_SOCKET: | 2133 | case SYS_SOCKET: |
2135 | err = sys_socket(a0, a1, a[2]); | 2134 | err = sys_socket(a0, a1, a[2]); |
2136 | break; | 2135 | break; |
2137 | case SYS_BIND: | 2136 | case SYS_BIND: |
2138 | err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); | 2137 | err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); |
2139 | break; | 2138 | break; |
2140 | case SYS_CONNECT: | 2139 | case SYS_CONNECT: |
2141 | err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); | 2140 | err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); |
2142 | break; | 2141 | break; |
2143 | case SYS_LISTEN: | 2142 | case SYS_LISTEN: |
2144 | err = sys_listen(a0, a1); | 2143 | err = sys_listen(a0, a1); |
2145 | break; | 2144 | break; |
2146 | case SYS_ACCEPT: | 2145 | case SYS_ACCEPT: |
2147 | err = | 2146 | err = |
2148 | do_accept(a0, (struct sockaddr __user *)a1, | 2147 | do_accept(a0, (struct sockaddr __user *)a1, |
2149 | (int __user *)a[2], 0); | 2148 | (int __user *)a[2], 0); |
2150 | break; | 2149 | break; |
2151 | case SYS_GETSOCKNAME: | 2150 | case SYS_GETSOCKNAME: |
2152 | err = | 2151 | err = |
2153 | sys_getsockname(a0, (struct sockaddr __user *)a1, | 2152 | sys_getsockname(a0, (struct sockaddr __user *)a1, |
2154 | (int __user *)a[2]); | 2153 | (int __user *)a[2]); |
2155 | break; | 2154 | break; |
2156 | case SYS_GETPEERNAME: | 2155 | case SYS_GETPEERNAME: |
2157 | err = | 2156 | err = |
2158 | sys_getpeername(a0, (struct sockaddr __user *)a1, | 2157 | sys_getpeername(a0, (struct sockaddr __user *)a1, |
2159 | (int __user *)a[2]); | 2158 | (int __user *)a[2]); |
2160 | break; | 2159 | break; |
2161 | case SYS_SOCKETPAIR: | 2160 | case SYS_SOCKETPAIR: |
2162 | err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); | 2161 | err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); |
2163 | break; | 2162 | break; |
2164 | case SYS_SEND: | 2163 | case SYS_SEND: |
2165 | err = sys_send(a0, (void __user *)a1, a[2], a[3]); | 2164 | err = sys_send(a0, (void __user *)a1, a[2], a[3]); |
2166 | break; | 2165 | break; |
2167 | case SYS_SENDTO: | 2166 | case SYS_SENDTO: |
2168 | err = sys_sendto(a0, (void __user *)a1, a[2], a[3], | 2167 | err = sys_sendto(a0, (void __user *)a1, a[2], a[3], |
2169 | (struct sockaddr __user *)a[4], a[5]); | 2168 | (struct sockaddr __user *)a[4], a[5]); |
2170 | break; | 2169 | break; |
2171 | case SYS_RECV: | 2170 | case SYS_RECV: |
2172 | err = sys_recv(a0, (void __user *)a1, a[2], a[3]); | 2171 | err = sys_recv(a0, (void __user *)a1, a[2], a[3]); |
2173 | break; | 2172 | break; |
2174 | case SYS_RECVFROM: | 2173 | case SYS_RECVFROM: |
2175 | err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], | 2174 | err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], |
2176 | (struct sockaddr __user *)a[4], | 2175 | (struct sockaddr __user *)a[4], |
2177 | (int __user *)a[5]); | 2176 | (int __user *)a[5]); |
2178 | break; | 2177 | break; |
2179 | case SYS_SHUTDOWN: | 2178 | case SYS_SHUTDOWN: |
2180 | err = sys_shutdown(a0, a1); | 2179 | err = sys_shutdown(a0, a1); |
2181 | break; | 2180 | break; |
2182 | case SYS_SETSOCKOPT: | 2181 | case SYS_SETSOCKOPT: |
2183 | err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); | 2182 | err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); |
2184 | break; | 2183 | break; |
2185 | case SYS_GETSOCKOPT: | 2184 | case SYS_GETSOCKOPT: |
2186 | err = | 2185 | err = |
2187 | sys_getsockopt(a0, a1, a[2], (char __user *)a[3], | 2186 | sys_getsockopt(a0, a1, a[2], (char __user *)a[3], |
2188 | (int __user *)a[4]); | 2187 | (int __user *)a[4]); |
2189 | break; | 2188 | break; |
2190 | case SYS_SENDMSG: | 2189 | case SYS_SENDMSG: |
2191 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); | 2190 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); |
2192 | break; | 2191 | break; |
2193 | case SYS_RECVMSG: | 2192 | case SYS_RECVMSG: |
2194 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); | 2193 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); |
2195 | break; | 2194 | break; |
2196 | case SYS_PACCEPT: | 2195 | case SYS_PACCEPT: |
2197 | err = | 2196 | err = |
2198 | sys_paccept(a0, (struct sockaddr __user *)a1, | 2197 | sys_paccept(a0, (struct sockaddr __user *)a1, |
2199 | (int __user *)a[2], | 2198 | (int __user *)a[2], |
2200 | (const sigset_t __user *) a[3], | 2199 | (const sigset_t __user *) a[3], |
2201 | a[4], a[5]); | 2200 | a[4], a[5]); |
2202 | break; | 2201 | break; |
2203 | default: | 2202 | default: |
2204 | err = -EINVAL; | 2203 | err = -EINVAL; |
2205 | break; | 2204 | break; |
2206 | } | 2205 | } |
2207 | return err; | 2206 | return err; |
2208 | } | 2207 | } |
2209 | 2208 | ||
2210 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ | 2209 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ |
2211 | 2210 | ||
2212 | /** | 2211 | /** |
2213 | * sock_register - add a socket protocol handler | 2212 | * sock_register - add a socket protocol handler |
2214 | * @ops: description of protocol | 2213 | * @ops: description of protocol |
2215 | * | 2214 | * |
2216 | * This function is called by a protocol handler that wants to | 2215 | * This function is called by a protocol handler that wants to |
2217 | * advertise its address family, and have it linked into the | 2216 | * advertise its address family, and have it linked into the |
2218 | * socket interface. The value ops->family coresponds to the | 2217 | * socket interface. The value ops->family coresponds to the |
2219 | * socket system call protocol family. | 2218 | * socket system call protocol family. |
2220 | */ | 2219 | */ |
2221 | int sock_register(const struct net_proto_family *ops) | 2220 | int sock_register(const struct net_proto_family *ops) |
2222 | { | 2221 | { |
2223 | int err; | 2222 | int err; |
2224 | 2223 | ||
2225 | if (ops->family >= NPROTO) { | 2224 | if (ops->family >= NPROTO) { |
2226 | printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, | 2225 | printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, |
2227 | NPROTO); | 2226 | NPROTO); |
2228 | return -ENOBUFS; | 2227 | return -ENOBUFS; |
2229 | } | 2228 | } |
2230 | 2229 | ||
2231 | spin_lock(&net_family_lock); | 2230 | spin_lock(&net_family_lock); |
2232 | if (net_families[ops->family]) | 2231 | if (net_families[ops->family]) |
2233 | err = -EEXIST; | 2232 | err = -EEXIST; |
2234 | else { | 2233 | else { |
2235 | net_families[ops->family] = ops; | 2234 | net_families[ops->family] = ops; |
2236 | err = 0; | 2235 | err = 0; |
2237 | } | 2236 | } |
2238 | spin_unlock(&net_family_lock); | 2237 | spin_unlock(&net_family_lock); |
2239 | 2238 | ||
2240 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); | 2239 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); |
2241 | return err; | 2240 | return err; |
2242 | } | 2241 | } |
2243 | 2242 | ||
2244 | /** | 2243 | /** |
2245 | * sock_unregister - remove a protocol handler | 2244 | * sock_unregister - remove a protocol handler |
2246 | * @family: protocol family to remove | 2245 | * @family: protocol family to remove |
2247 | * | 2246 | * |
2248 | * This function is called by a protocol handler that wants to | 2247 | * This function is called by a protocol handler that wants to |
2249 | * remove its address family, and have it unlinked from the | 2248 | * remove its address family, and have it unlinked from the |
2250 | * new socket creation. | 2249 | * new socket creation. |
2251 | * | 2250 | * |
2252 | * If protocol handler is a module, then it can use module reference | 2251 | * If protocol handler is a module, then it can use module reference |
2253 | * counts to protect against new references. If protocol handler is not | 2252 | * counts to protect against new references. If protocol handler is not |
2254 | * a module then it needs to provide its own protection in | 2253 | * a module then it needs to provide its own protection in |
2255 | * the ops->create routine. | 2254 | * the ops->create routine. |
2256 | */ | 2255 | */ |
2257 | void sock_unregister(int family) | 2256 | void sock_unregister(int family) |
2258 | { | 2257 | { |
2259 | BUG_ON(family < 0 || family >= NPROTO); | 2258 | BUG_ON(family < 0 || family >= NPROTO); |
2260 | 2259 | ||
2261 | spin_lock(&net_family_lock); | 2260 | spin_lock(&net_family_lock); |
2262 | net_families[family] = NULL; | 2261 | net_families[family] = NULL; |
2263 | spin_unlock(&net_family_lock); | 2262 | spin_unlock(&net_family_lock); |
2264 | 2263 | ||
2265 | synchronize_rcu(); | 2264 | synchronize_rcu(); |
2266 | 2265 | ||
2267 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); | 2266 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); |
2268 | } | 2267 | } |
2269 | 2268 | ||
2270 | static int __init sock_init(void) | 2269 | static int __init sock_init(void) |
2271 | { | 2270 | { |
2272 | /* | 2271 | /* |
2273 | * Initialize sock SLAB cache. | 2272 | * Initialize sock SLAB cache. |
2274 | */ | 2273 | */ |
2275 | 2274 | ||
2276 | sk_init(); | 2275 | sk_init(); |
2277 | 2276 | ||
2278 | /* | 2277 | /* |
2279 | * Initialize skbuff SLAB cache | 2278 | * Initialize skbuff SLAB cache |
2280 | */ | 2279 | */ |
2281 | skb_init(); | 2280 | skb_init(); |
2282 | 2281 | ||
2283 | /* | 2282 | /* |
2284 | * Initialize the protocols module. | 2283 | * Initialize the protocols module. |
2285 | */ | 2284 | */ |
2286 | 2285 | ||
2287 | init_inodecache(); | 2286 | init_inodecache(); |
2288 | register_filesystem(&sock_fs_type); | 2287 | register_filesystem(&sock_fs_type); |
2289 | sock_mnt = kern_mount(&sock_fs_type); | 2288 | sock_mnt = kern_mount(&sock_fs_type); |
2290 | 2289 | ||
2291 | /* The real protocol initialization is performed in later initcalls. | 2290 | /* The real protocol initialization is performed in later initcalls. |
2292 | */ | 2291 | */ |
2293 | 2292 | ||
2294 | #ifdef CONFIG_NETFILTER | 2293 | #ifdef CONFIG_NETFILTER |
2295 | netfilter_init(); | 2294 | netfilter_init(); |
2296 | #endif | 2295 | #endif |
2297 | 2296 | ||
2298 | return 0; | 2297 | return 0; |
2299 | } | 2298 | } |
2300 | 2299 | ||
2301 | core_initcall(sock_init); /* early initcall */ | 2300 | core_initcall(sock_init); /* early initcall */ |
2302 | 2301 | ||
2303 | #ifdef CONFIG_PROC_FS | 2302 | #ifdef CONFIG_PROC_FS |
2304 | void socket_seq_show(struct seq_file *seq) | 2303 | void socket_seq_show(struct seq_file *seq) |
2305 | { | 2304 | { |
2306 | int cpu; | 2305 | int cpu; |
2307 | int counter = 0; | 2306 | int counter = 0; |
2308 | 2307 | ||
2309 | for_each_possible_cpu(cpu) | 2308 | for_each_possible_cpu(cpu) |
2310 | counter += per_cpu(sockets_in_use, cpu); | 2309 | counter += per_cpu(sockets_in_use, cpu); |
2311 | 2310 | ||
2312 | /* It can be negative, by the way. 8) */ | 2311 | /* It can be negative, by the way. 8) */ |
2313 | if (counter < 0) | 2312 | if (counter < 0) |
2314 | counter = 0; | 2313 | counter = 0; |
2315 | 2314 | ||
2316 | seq_printf(seq, "sockets: used %d\n", counter); | 2315 | seq_printf(seq, "sockets: used %d\n", counter); |
2317 | } | 2316 | } |
2318 | #endif /* CONFIG_PROC_FS */ | 2317 | #endif /* CONFIG_PROC_FS */ |
2319 | 2318 | ||
2320 | #ifdef CONFIG_COMPAT | 2319 | #ifdef CONFIG_COMPAT |
2321 | static long compat_sock_ioctl(struct file *file, unsigned cmd, | 2320 | static long compat_sock_ioctl(struct file *file, unsigned cmd, |
2322 | unsigned long arg) | 2321 | unsigned long arg) |
2323 | { | 2322 | { |
2324 | struct socket *sock = file->private_data; | 2323 | struct socket *sock = file->private_data; |
2325 | int ret = -ENOIOCTLCMD; | 2324 | int ret = -ENOIOCTLCMD; |
2326 | struct sock *sk; | 2325 | struct sock *sk; |
2327 | struct net *net; | 2326 | struct net *net; |
2328 | 2327 | ||
2329 | sk = sock->sk; | 2328 | sk = sock->sk; |
2330 | net = sock_net(sk); | 2329 | net = sock_net(sk); |
2331 | 2330 | ||
2332 | if (sock->ops->compat_ioctl) | 2331 | if (sock->ops->compat_ioctl) |
2333 | ret = sock->ops->compat_ioctl(sock, cmd, arg); | 2332 | ret = sock->ops->compat_ioctl(sock, cmd, arg); |
2334 | 2333 | ||
2335 | if (ret == -ENOIOCTLCMD && | 2334 | if (ret == -ENOIOCTLCMD && |
2336 | (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) | 2335 | (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) |
2337 | ret = compat_wext_handle_ioctl(net, cmd, arg); | 2336 | ret = compat_wext_handle_ioctl(net, cmd, arg); |
2338 | 2337 | ||
2339 | return ret; | 2338 | return ret; |
2340 | } | 2339 | } |
2341 | #endif | 2340 | #endif |
2342 | 2341 | ||
2343 | int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) | 2342 | int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) |
2344 | { | 2343 | { |
2345 | return sock->ops->bind(sock, addr, addrlen); | 2344 | return sock->ops->bind(sock, addr, addrlen); |
2346 | } | 2345 | } |
2347 | 2346 | ||
2348 | int kernel_listen(struct socket *sock, int backlog) | 2347 | int kernel_listen(struct socket *sock, int backlog) |
2349 | { | 2348 | { |
2350 | return sock->ops->listen(sock, backlog); | 2349 | return sock->ops->listen(sock, backlog); |
2351 | } | 2350 | } |
2352 | 2351 | ||
2353 | int kernel_accept(struct socket *sock, struct socket **newsock, int flags) | 2352 | int kernel_accept(struct socket *sock, struct socket **newsock, int flags) |
2354 | { | 2353 | { |
2355 | struct sock *sk = sock->sk; | 2354 | struct sock *sk = sock->sk; |
2356 | int err; | 2355 | int err; |
2357 | 2356 | ||
2358 | err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, | 2357 | err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, |
2359 | newsock); | 2358 | newsock); |
2360 | if (err < 0) | 2359 | if (err < 0) |
2361 | goto done; | 2360 | goto done; |
2362 | 2361 | ||
2363 | err = sock->ops->accept(sock, *newsock, flags); | 2362 | err = sock->ops->accept(sock, *newsock, flags); |
2364 | if (err < 0) { | 2363 | if (err < 0) { |
2365 | sock_release(*newsock); | 2364 | sock_release(*newsock); |
2366 | *newsock = NULL; | 2365 | *newsock = NULL; |
2367 | goto done; | 2366 | goto done; |
2368 | } | 2367 | } |
2369 | 2368 | ||
2370 | (*newsock)->ops = sock->ops; | 2369 | (*newsock)->ops = sock->ops; |
2371 | 2370 | ||
2372 | done: | 2371 | done: |
2373 | return err; | 2372 | return err; |
2374 | } | 2373 | } |
2375 | 2374 | ||
2376 | int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, | 2375 | int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, |
2377 | int flags) | 2376 | int flags) |
2378 | { | 2377 | { |
2379 | return sock->ops->connect(sock, addr, addrlen, flags); | 2378 | return sock->ops->connect(sock, addr, addrlen, flags); |
2380 | } | 2379 | } |
2381 | 2380 | ||
2382 | int kernel_getsockname(struct socket *sock, struct sockaddr *addr, | 2381 | int kernel_getsockname(struct socket *sock, struct sockaddr *addr, |
2383 | int *addrlen) | 2382 | int *addrlen) |
2384 | { | 2383 | { |
2385 | return sock->ops->getname(sock, addr, addrlen, 0); | 2384 | return sock->ops->getname(sock, addr, addrlen, 0); |
2386 | } | 2385 | } |
2387 | 2386 | ||
2388 | int kernel_getpeername(struct socket *sock, struct sockaddr *addr, | 2387 | int kernel_getpeername(struct socket *sock, struct sockaddr *addr, |
2389 | int *addrlen) | 2388 | int *addrlen) |
2390 | { | 2389 | { |
2391 | return sock->ops->getname(sock, addr, addrlen, 1); | 2390 | return sock->ops->getname(sock, addr, addrlen, 1); |
2392 | } | 2391 | } |
2393 | 2392 | ||
2394 | int kernel_getsockopt(struct socket *sock, int level, int optname, | 2393 | int kernel_getsockopt(struct socket *sock, int level, int optname, |
2395 | char *optval, int *optlen) | 2394 | char *optval, int *optlen) |
2396 | { | 2395 | { |
2397 | mm_segment_t oldfs = get_fs(); | 2396 | mm_segment_t oldfs = get_fs(); |
2398 | int err; | 2397 | int err; |
2399 | 2398 | ||
2400 | set_fs(KERNEL_DS); | 2399 | set_fs(KERNEL_DS); |
2401 | if (level == SOL_SOCKET) | 2400 | if (level == SOL_SOCKET) |
2402 | err = sock_getsockopt(sock, level, optname, optval, optlen); | 2401 | err = sock_getsockopt(sock, level, optname, optval, optlen); |
2403 | else | 2402 | else |
2404 | err = sock->ops->getsockopt(sock, level, optname, optval, | 2403 | err = sock->ops->getsockopt(sock, level, optname, optval, |
2405 | optlen); | 2404 | optlen); |
2406 | set_fs(oldfs); | 2405 | set_fs(oldfs); |
2407 | return err; | 2406 | return err; |
2408 | } | 2407 | } |
2409 | 2408 | ||
2410 | int kernel_setsockopt(struct socket *sock, int level, int optname, | 2409 | int kernel_setsockopt(struct socket *sock, int level, int optname, |
2411 | char *optval, int optlen) | 2410 | char *optval, int optlen) |
2412 | { | 2411 | { |
2413 | mm_segment_t oldfs = get_fs(); | 2412 | mm_segment_t oldfs = get_fs(); |
2414 | int err; | 2413 | int err; |
2415 | 2414 | ||
2416 | set_fs(KERNEL_DS); | 2415 | set_fs(KERNEL_DS); |
2417 | if (level == SOL_SOCKET) | 2416 | if (level == SOL_SOCKET) |
2418 | err = sock_setsockopt(sock, level, optname, optval, optlen); | 2417 | err = sock_setsockopt(sock, level, optname, optval, optlen); |
2419 | else | 2418 | else |
2420 | err = sock->ops->setsockopt(sock, level, optname, optval, | 2419 | err = sock->ops->setsockopt(sock, level, optname, optval, |
2421 | optlen); | 2420 | optlen); |
2422 | set_fs(oldfs); | 2421 | set_fs(oldfs); |
2423 | return err; | 2422 | return err; |
2424 | } | 2423 | } |
2425 | 2424 | ||
2426 | int kernel_sendpage(struct socket *sock, struct page *page, int offset, | 2425 | int kernel_sendpage(struct socket *sock, struct page *page, int offset, |
2427 | size_t size, int flags) | 2426 | size_t size, int flags) |
2428 | { | 2427 | { |
2429 | if (sock->ops->sendpage) | 2428 | if (sock->ops->sendpage) |
2430 | return sock->ops->sendpage(sock, page, offset, size, flags); | 2429 | return sock->ops->sendpage(sock, page, offset, size, flags); |
2431 | 2430 | ||
2432 | return sock_no_sendpage(sock, page, offset, size, flags); | 2431 | return sock_no_sendpage(sock, page, offset, size, flags); |
2433 | } | 2432 | } |
2434 | 2433 | ||
2435 | int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) | 2434 | int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) |
2436 | { | 2435 | { |
2437 | mm_segment_t oldfs = get_fs(); | 2436 | mm_segment_t oldfs = get_fs(); |
2438 | int err; | 2437 | int err; |
2439 | 2438 | ||
2440 | set_fs(KERNEL_DS); | 2439 | set_fs(KERNEL_DS); |
2441 | err = sock->ops->ioctl(sock, cmd, arg); | 2440 | err = sock->ops->ioctl(sock, cmd, arg); |
2442 | set_fs(oldfs); | 2441 | set_fs(oldfs); |
2443 | 2442 | ||
2444 | return err; | 2443 | return err; |
2445 | } | 2444 | } |
2446 | 2445 | ||
2447 | int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) | 2446 | int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) |
2448 | { | 2447 | { |
2449 | return sock->ops->shutdown(sock, how); | 2448 | return sock->ops->shutdown(sock, how); |
2450 | } | 2449 | } |
2451 | 2450 | ||
2452 | EXPORT_SYMBOL(sock_create); | 2451 | EXPORT_SYMBOL(sock_create); |
2453 | EXPORT_SYMBOL(sock_create_kern); | 2452 | EXPORT_SYMBOL(sock_create_kern); |
2454 | EXPORT_SYMBOL(sock_create_lite); | 2453 | EXPORT_SYMBOL(sock_create_lite); |
2455 | EXPORT_SYMBOL(sock_map_fd); | 2454 | EXPORT_SYMBOL(sock_map_fd); |
2456 | EXPORT_SYMBOL(sock_recvmsg); | 2455 | EXPORT_SYMBOL(sock_recvmsg); |
2457 | EXPORT_SYMBOL(sock_register); | 2456 | EXPORT_SYMBOL(sock_register); |
2458 | EXPORT_SYMBOL(sock_release); | 2457 | EXPORT_SYMBOL(sock_release); |
2459 | EXPORT_SYMBOL(sock_sendmsg); | 2458 | EXPORT_SYMBOL(sock_sendmsg); |
2460 | EXPORT_SYMBOL(sock_unregister); | 2459 | EXPORT_SYMBOL(sock_unregister); |
2461 | EXPORT_SYMBOL(sock_wake_async); | 2460 | EXPORT_SYMBOL(sock_wake_async); |
2462 | EXPORT_SYMBOL(sockfd_lookup); | 2461 | EXPORT_SYMBOL(sockfd_lookup); |
2463 | EXPORT_SYMBOL(kernel_sendmsg); | 2462 | EXPORT_SYMBOL(kernel_sendmsg); |
2464 | EXPORT_SYMBOL(kernel_recvmsg); | 2463 | EXPORT_SYMBOL(kernel_recvmsg); |
2465 | EXPORT_SYMBOL(kernel_bind); | 2464 | EXPORT_SYMBOL(kernel_bind); |
2466 | EXPORT_SYMBOL(kernel_listen); | 2465 | EXPORT_SYMBOL(kernel_listen); |
2467 | EXPORT_SYMBOL(kernel_accept); | 2466 | EXPORT_SYMBOL(kernel_accept); |
2468 | EXPORT_SYMBOL(kernel_connect); | 2467 | EXPORT_SYMBOL(kernel_connect); |
2469 | EXPORT_SYMBOL(kernel_getsockname); | 2468 | EXPORT_SYMBOL(kernel_getsockname); |
2470 | EXPORT_SYMBOL(kernel_getpeername); | 2469 | EXPORT_SYMBOL(kernel_getpeername); |
2471 | EXPORT_SYMBOL(kernel_getsockopt); | 2470 | EXPORT_SYMBOL(kernel_getsockopt); |
2472 | EXPORT_SYMBOL(kernel_setsockopt); | 2471 | EXPORT_SYMBOL(kernel_setsockopt); |
2473 | EXPORT_SYMBOL(kernel_sendpage); | 2472 | EXPORT_SYMBOL(kernel_sendpage); |
2474 | EXPORT_SYMBOL(kernel_sock_ioctl); | 2473 | EXPORT_SYMBOL(kernel_sock_ioctl); |
2475 | EXPORT_SYMBOL(kernel_sock_shutdown); | 2474 | EXPORT_SYMBOL(kernel_sock_shutdown); |
2476 | 2475 |