Commit 8edf19c2fe028563fc6ea9cb1995b8ee4172d4b6

Authored by Eric Dumazet
Committed by David S. Miller
1 parent c720c7e838

net: sk_drops consolidation part 2

- skb_kill_datagram() can increment sk->sk_drops itself, not callers.

- UDP on IPV4 & IPV6 dropped frames (because of bad checksum or policy checks) increment sk_drops

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 6 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * SUCS NET3: 2 * SUCS NET3:
3 * 3 *
4 * Generic datagram handling routines. These are generic for all 4 * Generic datagram handling routines. These are generic for all
5 * protocols. Possibly a generic IP version on top of these would 5 * protocols. Possibly a generic IP version on top of these would
6 * make sense. Not tonight however 8-). 6 * make sense. Not tonight however 8-).
7 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and 7 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
8 * NetROM layer all have identical poll code and mostly 8 * NetROM layer all have identical poll code and mostly
9 * identical recvmsg() code. So we share it here. The poll was 9 * identical recvmsg() code. So we share it here. The poll was
10 * shared before but buried in udp.c so I moved it. 10 * shared before but buried in udp.c so I moved it.
11 * 11 *
12 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old 12 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
13 * udp.c code) 13 * udp.c code)
14 * 14 *
15 * Fixes: 15 * Fixes:
16 * Alan Cox : NULL return from skb_peek_copy() 16 * Alan Cox : NULL return from skb_peek_copy()
17 * understood 17 * understood
18 * Alan Cox : Rewrote skb_read_datagram to avoid the 18 * Alan Cox : Rewrote skb_read_datagram to avoid the
19 * skb_peek_copy stuff. 19 * skb_peek_copy stuff.
20 * Alan Cox : Added support for SOCK_SEQPACKET. 20 * Alan Cox : Added support for SOCK_SEQPACKET.
21 * IPX can no longer use the SO_TYPE hack 21 * IPX can no longer use the SO_TYPE hack
22 * but AX.25 now works right, and SPX is 22 * but AX.25 now works right, and SPX is
23 * feasible. 23 * feasible.
24 * Alan Cox : Fixed write poll of non IP protocol 24 * Alan Cox : Fixed write poll of non IP protocol
25 * crash. 25 * crash.
26 * Florian La Roche: Changed for my new skbuff handling. 26 * Florian La Roche: Changed for my new skbuff handling.
27 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. 27 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
28 * Linus Torvalds : BSD semantic fixes. 28 * Linus Torvalds : BSD semantic fixes.
29 * Alan Cox : Datagram iovec handling 29 * Alan Cox : Datagram iovec handling
30 * Darryl Miles : Fixed non-blocking SOCK_STREAM. 30 * Darryl Miles : Fixed non-blocking SOCK_STREAM.
31 * Alan Cox : POSIXisms 31 * Alan Cox : POSIXisms
32 * Pete Wyckoff : Unconnected accept() fix. 32 * Pete Wyckoff : Unconnected accept() fix.
33 * 33 *
34 */ 34 */
35 35
36 #include <linux/module.h> 36 #include <linux/module.h>
37 #include <linux/types.h> 37 #include <linux/types.h>
38 #include <linux/kernel.h> 38 #include <linux/kernel.h>
39 #include <asm/uaccess.h> 39 #include <asm/uaccess.h>
40 #include <asm/system.h> 40 #include <asm/system.h>
41 #include <linux/mm.h> 41 #include <linux/mm.h>
42 #include <linux/interrupt.h> 42 #include <linux/interrupt.h>
43 #include <linux/errno.h> 43 #include <linux/errno.h>
44 #include <linux/sched.h> 44 #include <linux/sched.h>
45 #include <linux/inet.h> 45 #include <linux/inet.h>
46 #include <linux/netdevice.h> 46 #include <linux/netdevice.h>
47 #include <linux/rtnetlink.h> 47 #include <linux/rtnetlink.h>
48 #include <linux/poll.h> 48 #include <linux/poll.h>
49 #include <linux/highmem.h> 49 #include <linux/highmem.h>
50 #include <linux/spinlock.h> 50 #include <linux/spinlock.h>
51 51
52 #include <net/protocol.h> 52 #include <net/protocol.h>
53 #include <linux/skbuff.h> 53 #include <linux/skbuff.h>
54 54
55 #include <net/checksum.h> 55 #include <net/checksum.h>
56 #include <net/sock.h> 56 #include <net/sock.h>
57 #include <net/tcp_states.h> 57 #include <net/tcp_states.h>
58 #include <trace/events/skb.h> 58 #include <trace/events/skb.h>
59 59
60 /* 60 /*
61 * Is a socket 'connection oriented' ? 61 * Is a socket 'connection oriented' ?
62 */ 62 */
63 static inline int connection_based(struct sock *sk) 63 static inline int connection_based(struct sock *sk)
64 { 64 {
65 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; 65 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
66 } 66 }
67 67
68 static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, 68 static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
69 void *key) 69 void *key)
70 { 70 {
71 unsigned long bits = (unsigned long)key; 71 unsigned long bits = (unsigned long)key;
72 72
73 /* 73 /*
74 * Avoid a wakeup if event not interesting for us 74 * Avoid a wakeup if event not interesting for us
75 */ 75 */
76 if (bits && !(bits & (POLLIN | POLLERR))) 76 if (bits && !(bits & (POLLIN | POLLERR)))
77 return 0; 77 return 0;
78 return autoremove_wake_function(wait, mode, sync, key); 78 return autoremove_wake_function(wait, mode, sync, key);
79 } 79 }
80 /* 80 /*
81 * Wait for a packet.. 81 * Wait for a packet..
82 */ 82 */
83 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) 83 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
84 { 84 {
85 int error; 85 int error;
86 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 86 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
87 87
88 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 88 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
89 89
90 /* Socket errors? */ 90 /* Socket errors? */
91 error = sock_error(sk); 91 error = sock_error(sk);
92 if (error) 92 if (error)
93 goto out_err; 93 goto out_err;
94 94
95 if (!skb_queue_empty(&sk->sk_receive_queue)) 95 if (!skb_queue_empty(&sk->sk_receive_queue))
96 goto out; 96 goto out;
97 97
98 /* Socket shut down? */ 98 /* Socket shut down? */
99 if (sk->sk_shutdown & RCV_SHUTDOWN) 99 if (sk->sk_shutdown & RCV_SHUTDOWN)
100 goto out_noerr; 100 goto out_noerr;
101 101
102 /* Sequenced packets can come disconnected. 102 /* Sequenced packets can come disconnected.
103 * If so we report the problem 103 * If so we report the problem
104 */ 104 */
105 error = -ENOTCONN; 105 error = -ENOTCONN;
106 if (connection_based(sk) && 106 if (connection_based(sk) &&
107 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) 107 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
108 goto out_err; 108 goto out_err;
109 109
110 /* handle signals */ 110 /* handle signals */
111 if (signal_pending(current)) 111 if (signal_pending(current))
112 goto interrupted; 112 goto interrupted;
113 113
114 error = 0; 114 error = 0;
115 *timeo_p = schedule_timeout(*timeo_p); 115 *timeo_p = schedule_timeout(*timeo_p);
116 out: 116 out:
117 finish_wait(sk->sk_sleep, &wait); 117 finish_wait(sk->sk_sleep, &wait);
118 return error; 118 return error;
119 interrupted: 119 interrupted:
120 error = sock_intr_errno(*timeo_p); 120 error = sock_intr_errno(*timeo_p);
121 out_err: 121 out_err:
122 *err = error; 122 *err = error;
123 goto out; 123 goto out;
124 out_noerr: 124 out_noerr:
125 *err = 0; 125 *err = 0;
126 error = 1; 126 error = 1;
127 goto out; 127 goto out;
128 } 128 }
129 129
130 /** 130 /**
131 * __skb_recv_datagram - Receive a datagram skbuff 131 * __skb_recv_datagram - Receive a datagram skbuff
132 * @sk: socket 132 * @sk: socket
133 * @flags: MSG_ flags 133 * @flags: MSG_ flags
134 * @peeked: returns non-zero if this packet has been seen before 134 * @peeked: returns non-zero if this packet has been seen before
135 * @err: error code returned 135 * @err: error code returned
136 * 136 *
137 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 137 * Get a datagram skbuff, understands the peeking, nonblocking wakeups
138 * and possible races. This replaces identical code in packet, raw and 138 * and possible races. This replaces identical code in packet, raw and
139 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes 139 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
140 * the long standing peek and read race for datagram sockets. If you 140 * the long standing peek and read race for datagram sockets. If you
141 * alter this routine remember it must be re-entrant. 141 * alter this routine remember it must be re-entrant.
142 * 142 *
143 * This function will lock the socket if a skb is returned, so the caller 143 * This function will lock the socket if a skb is returned, so the caller
144 * needs to unlock the socket in that case (usually by calling 144 * needs to unlock the socket in that case (usually by calling
145 * skb_free_datagram) 145 * skb_free_datagram)
146 * 146 *
147 * * It does not lock socket since today. This function is 147 * * It does not lock socket since today. This function is
148 * * free of race conditions. This measure should/can improve 148 * * free of race conditions. This measure should/can improve
149 * * significantly datagram socket latencies at high loads, 149 * * significantly datagram socket latencies at high loads,
150 * * when data copying to user space takes lots of time. 150 * * when data copying to user space takes lots of time.
151 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet 151 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
152 * * 8) Great win.) 152 * * 8) Great win.)
153 * * --ANK (980729) 153 * * --ANK (980729)
154 * 154 *
155 * The order of the tests when we find no data waiting are specified 155 * The order of the tests when we find no data waiting are specified
156 * quite explicitly by POSIX 1003.1g, don't change them without having 156 * quite explicitly by POSIX 1003.1g, don't change them without having
157 * the standard around please. 157 * the standard around please.
158 */ 158 */
159 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, 159 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
160 int *peeked, int *err) 160 int *peeked, int *err)
161 { 161 {
162 struct sk_buff *skb; 162 struct sk_buff *skb;
163 long timeo; 163 long timeo;
164 /* 164 /*
165 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() 165 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
166 */ 166 */
167 int error = sock_error(sk); 167 int error = sock_error(sk);
168 168
169 if (error) 169 if (error)
170 goto no_packet; 170 goto no_packet;
171 171
172 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 172 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
173 173
174 do { 174 do {
175 /* Again only user level code calls this function, so nothing 175 /* Again only user level code calls this function, so nothing
176 * interrupt level will suddenly eat the receive_queue. 176 * interrupt level will suddenly eat the receive_queue.
177 * 177 *
178 * Look at current nfs client by the way... 178 * Look at current nfs client by the way...
179 * However, this function was corrent in any case. 8) 179 * However, this function was corrent in any case. 8)
180 */ 180 */
181 unsigned long cpu_flags; 181 unsigned long cpu_flags;
182 182
183 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); 183 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
184 skb = skb_peek(&sk->sk_receive_queue); 184 skb = skb_peek(&sk->sk_receive_queue);
185 if (skb) { 185 if (skb) {
186 *peeked = skb->peeked; 186 *peeked = skb->peeked;
187 if (flags & MSG_PEEK) { 187 if (flags & MSG_PEEK) {
188 skb->peeked = 1; 188 skb->peeked = 1;
189 atomic_inc(&skb->users); 189 atomic_inc(&skb->users);
190 } else 190 } else
191 __skb_unlink(skb, &sk->sk_receive_queue); 191 __skb_unlink(skb, &sk->sk_receive_queue);
192 } 192 }
193 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); 193 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
194 194
195 if (skb) 195 if (skb)
196 return skb; 196 return skb;
197 197
198 /* User doesn't want to wait */ 198 /* User doesn't want to wait */
199 error = -EAGAIN; 199 error = -EAGAIN;
200 if (!timeo) 200 if (!timeo)
201 goto no_packet; 201 goto no_packet;
202 202
203 } while (!wait_for_packet(sk, err, &timeo)); 203 } while (!wait_for_packet(sk, err, &timeo));
204 204
205 return NULL; 205 return NULL;
206 206
207 no_packet: 207 no_packet:
208 *err = error; 208 *err = error;
209 return NULL; 209 return NULL;
210 } 210 }
211 EXPORT_SYMBOL(__skb_recv_datagram); 211 EXPORT_SYMBOL(__skb_recv_datagram);
212 212
213 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, 213 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
214 int noblock, int *err) 214 int noblock, int *err)
215 { 215 {
216 int peeked; 216 int peeked;
217 217
218 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 218 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
219 &peeked, err); 219 &peeked, err);
220 } 220 }
221 221
222 void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 222 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
223 { 223 {
224 consume_skb(skb); 224 consume_skb(skb);
225 sk_mem_reclaim_partial(sk); 225 sk_mem_reclaim_partial(sk);
226 } 226 }
227 227
228 /** 228 /**
229 * skb_kill_datagram - Free a datagram skbuff forcibly 229 * skb_kill_datagram - Free a datagram skbuff forcibly
230 * @sk: socket 230 * @sk: socket
231 * @skb: datagram skbuff 231 * @skb: datagram skbuff
232 * @flags: MSG_ flags 232 * @flags: MSG_ flags
233 * 233 *
234 * This function frees a datagram skbuff that was received by 234 * This function frees a datagram skbuff that was received by
235 * skb_recv_datagram. The flags argument must match the one 235 * skb_recv_datagram. The flags argument must match the one
236 * used for skb_recv_datagram. 236 * used for skb_recv_datagram.
237 * 237 *
238 * If the MSG_PEEK flag is set, and the packet is still on the 238 * If the MSG_PEEK flag is set, and the packet is still on the
239 * receive queue of the socket, it will be taken off the queue 239 * receive queue of the socket, it will be taken off the queue
240 * before it is freed. 240 * before it is freed.
241 * 241 *
242 * This function currently only disables BH when acquiring the 242 * This function currently only disables BH when acquiring the
243 * sk_receive_queue lock. Therefore it must not be used in a 243 * sk_receive_queue lock. Therefore it must not be used in a
244 * context where that lock is acquired in an IRQ context. 244 * context where that lock is acquired in an IRQ context.
245 * 245 *
246 * It returns 0 if the packet was removed by us. 246 * It returns 0 if the packet was removed by us.
247 */ 247 */
248 248
249 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 249 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
250 { 250 {
251 int err = 0; 251 int err = 0;
252 252
253 if (flags & MSG_PEEK) { 253 if (flags & MSG_PEEK) {
254 err = -ENOENT; 254 err = -ENOENT;
255 spin_lock_bh(&sk->sk_receive_queue.lock); 255 spin_lock_bh(&sk->sk_receive_queue.lock);
256 if (skb == skb_peek(&sk->sk_receive_queue)) { 256 if (skb == skb_peek(&sk->sk_receive_queue)) {
257 __skb_unlink(skb, &sk->sk_receive_queue); 257 __skb_unlink(skb, &sk->sk_receive_queue);
258 atomic_dec(&skb->users); 258 atomic_dec(&skb->users);
259 err = 0; 259 err = 0;
260 } 260 }
261 spin_unlock_bh(&sk->sk_receive_queue.lock); 261 spin_unlock_bh(&sk->sk_receive_queue.lock);
262 } 262 }
263 263
264 kfree_skb(skb); 264 kfree_skb(skb);
265 atomic_inc(&sk->sk_drops);
265 sk_mem_reclaim_partial(sk); 266 sk_mem_reclaim_partial(sk);
266 267
267 return err; 268 return err;
268 } 269 }
269 270
270 EXPORT_SYMBOL(skb_kill_datagram); 271 EXPORT_SYMBOL(skb_kill_datagram);
271 272
272 /** 273 /**
273 * skb_copy_datagram_iovec - Copy a datagram to an iovec. 274 * skb_copy_datagram_iovec - Copy a datagram to an iovec.
274 * @skb: buffer to copy 275 * @skb: buffer to copy
275 * @offset: offset in the buffer to start copying from 276 * @offset: offset in the buffer to start copying from
276 * @to: io vector to copy to 277 * @to: io vector to copy to
277 * @len: amount of data to copy from buffer to iovec 278 * @len: amount of data to copy from buffer to iovec
278 * 279 *
279 * Note: the iovec is modified during the copy. 280 * Note: the iovec is modified during the copy.
280 */ 281 */
281 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, 282 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
282 struct iovec *to, int len) 283 struct iovec *to, int len)
283 { 284 {
284 int start = skb_headlen(skb); 285 int start = skb_headlen(skb);
285 int i, copy = start - offset; 286 int i, copy = start - offset;
286 struct sk_buff *frag_iter; 287 struct sk_buff *frag_iter;
287 288
288 trace_skb_copy_datagram_iovec(skb, len); 289 trace_skb_copy_datagram_iovec(skb, len);
289 290
290 /* Copy header. */ 291 /* Copy header. */
291 if (copy > 0) { 292 if (copy > 0) {
292 if (copy > len) 293 if (copy > len)
293 copy = len; 294 copy = len;
294 if (memcpy_toiovec(to, skb->data + offset, copy)) 295 if (memcpy_toiovec(to, skb->data + offset, copy))
295 goto fault; 296 goto fault;
296 if ((len -= copy) == 0) 297 if ((len -= copy) == 0)
297 return 0; 298 return 0;
298 offset += copy; 299 offset += copy;
299 } 300 }
300 301
301 /* Copy paged appendix. Hmm... why does this look so complicated? */ 302 /* Copy paged appendix. Hmm... why does this look so complicated? */
302 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 303 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
303 int end; 304 int end;
304 305
305 WARN_ON(start > offset + len); 306 WARN_ON(start > offset + len);
306 307
307 end = start + skb_shinfo(skb)->frags[i].size; 308 end = start + skb_shinfo(skb)->frags[i].size;
308 if ((copy = end - offset) > 0) { 309 if ((copy = end - offset) > 0) {
309 int err; 310 int err;
310 u8 *vaddr; 311 u8 *vaddr;
311 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 312 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
312 struct page *page = frag->page; 313 struct page *page = frag->page;
313 314
314 if (copy > len) 315 if (copy > len)
315 copy = len; 316 copy = len;
316 vaddr = kmap(page); 317 vaddr = kmap(page);
317 err = memcpy_toiovec(to, vaddr + frag->page_offset + 318 err = memcpy_toiovec(to, vaddr + frag->page_offset +
318 offset - start, copy); 319 offset - start, copy);
319 kunmap(page); 320 kunmap(page);
320 if (err) 321 if (err)
321 goto fault; 322 goto fault;
322 if (!(len -= copy)) 323 if (!(len -= copy))
323 return 0; 324 return 0;
324 offset += copy; 325 offset += copy;
325 } 326 }
326 start = end; 327 start = end;
327 } 328 }
328 329
329 skb_walk_frags(skb, frag_iter) { 330 skb_walk_frags(skb, frag_iter) {
330 int end; 331 int end;
331 332
332 WARN_ON(start > offset + len); 333 WARN_ON(start > offset + len);
333 334
334 end = start + frag_iter->len; 335 end = start + frag_iter->len;
335 if ((copy = end - offset) > 0) { 336 if ((copy = end - offset) > 0) {
336 if (copy > len) 337 if (copy > len)
337 copy = len; 338 copy = len;
338 if (skb_copy_datagram_iovec(frag_iter, 339 if (skb_copy_datagram_iovec(frag_iter,
339 offset - start, 340 offset - start,
340 to, copy)) 341 to, copy))
341 goto fault; 342 goto fault;
342 if ((len -= copy) == 0) 343 if ((len -= copy) == 0)
343 return 0; 344 return 0;
344 offset += copy; 345 offset += copy;
345 } 346 }
346 start = end; 347 start = end;
347 } 348 }
348 if (!len) 349 if (!len)
349 return 0; 350 return 0;
350 351
351 fault: 352 fault:
352 return -EFAULT; 353 return -EFAULT;
353 } 354 }
354 355
355 /** 356 /**
356 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. 357 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
357 * @skb: buffer to copy 358 * @skb: buffer to copy
358 * @offset: offset in the buffer to start copying from 359 * @offset: offset in the buffer to start copying from
359 * @to: io vector to copy to 360 * @to: io vector to copy to
360 * @to_offset: offset in the io vector to start copying to 361 * @to_offset: offset in the io vector to start copying to
361 * @len: amount of data to copy from buffer to iovec 362 * @len: amount of data to copy from buffer to iovec
362 * 363 *
363 * Returns 0 or -EFAULT. 364 * Returns 0 or -EFAULT.
364 * Note: the iovec is not modified during the copy. 365 * Note: the iovec is not modified during the copy.
365 */ 366 */
366 int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, 367 int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
367 const struct iovec *to, int to_offset, 368 const struct iovec *to, int to_offset,
368 int len) 369 int len)
369 { 370 {
370 int start = skb_headlen(skb); 371 int start = skb_headlen(skb);
371 int i, copy = start - offset; 372 int i, copy = start - offset;
372 struct sk_buff *frag_iter; 373 struct sk_buff *frag_iter;
373 374
374 /* Copy header. */ 375 /* Copy header. */
375 if (copy > 0) { 376 if (copy > 0) {
376 if (copy > len) 377 if (copy > len)
377 copy = len; 378 copy = len;
378 if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) 379 if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
379 goto fault; 380 goto fault;
380 if ((len -= copy) == 0) 381 if ((len -= copy) == 0)
381 return 0; 382 return 0;
382 offset += copy; 383 offset += copy;
383 to_offset += copy; 384 to_offset += copy;
384 } 385 }
385 386
386 /* Copy paged appendix. Hmm... why does this look so complicated? */ 387 /* Copy paged appendix. Hmm... why does this look so complicated? */
387 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 388 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
388 int end; 389 int end;
389 390
390 WARN_ON(start > offset + len); 391 WARN_ON(start > offset + len);
391 392
392 end = start + skb_shinfo(skb)->frags[i].size; 393 end = start + skb_shinfo(skb)->frags[i].size;
393 if ((copy = end - offset) > 0) { 394 if ((copy = end - offset) > 0) {
394 int err; 395 int err;
395 u8 *vaddr; 396 u8 *vaddr;
396 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 397 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
397 struct page *page = frag->page; 398 struct page *page = frag->page;
398 399
399 if (copy > len) 400 if (copy > len)
400 copy = len; 401 copy = len;
401 vaddr = kmap(page); 402 vaddr = kmap(page);
402 err = memcpy_toiovecend(to, vaddr + frag->page_offset + 403 err = memcpy_toiovecend(to, vaddr + frag->page_offset +
403 offset - start, to_offset, copy); 404 offset - start, to_offset, copy);
404 kunmap(page); 405 kunmap(page);
405 if (err) 406 if (err)
406 goto fault; 407 goto fault;
407 if (!(len -= copy)) 408 if (!(len -= copy))
408 return 0; 409 return 0;
409 offset += copy; 410 offset += copy;
410 to_offset += copy; 411 to_offset += copy;
411 } 412 }
412 start = end; 413 start = end;
413 } 414 }
414 415
415 skb_walk_frags(skb, frag_iter) { 416 skb_walk_frags(skb, frag_iter) {
416 int end; 417 int end;
417 418
418 WARN_ON(start > offset + len); 419 WARN_ON(start > offset + len);
419 420
420 end = start + frag_iter->len; 421 end = start + frag_iter->len;
421 if ((copy = end - offset) > 0) { 422 if ((copy = end - offset) > 0) {
422 if (copy > len) 423 if (copy > len)
423 copy = len; 424 copy = len;
424 if (skb_copy_datagram_const_iovec(frag_iter, 425 if (skb_copy_datagram_const_iovec(frag_iter,
425 offset - start, 426 offset - start,
426 to, to_offset, 427 to, to_offset,
427 copy)) 428 copy))
428 goto fault; 429 goto fault;
429 if ((len -= copy) == 0) 430 if ((len -= copy) == 0)
430 return 0; 431 return 0;
431 offset += copy; 432 offset += copy;
432 to_offset += copy; 433 to_offset += copy;
433 } 434 }
434 start = end; 435 start = end;
435 } 436 }
436 if (!len) 437 if (!len)
437 return 0; 438 return 0;
438 439
439 fault: 440 fault:
440 return -EFAULT; 441 return -EFAULT;
441 } 442 }
442 EXPORT_SYMBOL(skb_copy_datagram_const_iovec); 443 EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
443 444
444 /** 445 /**
445 * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. 446 * skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
446 * @skb: buffer to copy 447 * @skb: buffer to copy
447 * @offset: offset in the buffer to start copying to 448 * @offset: offset in the buffer to start copying to
448 * @from: io vector to copy to 449 * @from: io vector to copy to
449 * @from_offset: offset in the io vector to start copying from 450 * @from_offset: offset in the io vector to start copying from
450 * @len: amount of data to copy to buffer from iovec 451 * @len: amount of data to copy to buffer from iovec
451 * 452 *
452 * Returns 0 or -EFAULT. 453 * Returns 0 or -EFAULT.
453 * Note: the iovec is not modified during the copy. 454 * Note: the iovec is not modified during the copy.
454 */ 455 */
455 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, 456 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
456 const struct iovec *from, int from_offset, 457 const struct iovec *from, int from_offset,
457 int len) 458 int len)
458 { 459 {
459 int start = skb_headlen(skb); 460 int start = skb_headlen(skb);
460 int i, copy = start - offset; 461 int i, copy = start - offset;
461 struct sk_buff *frag_iter; 462 struct sk_buff *frag_iter;
462 463
463 /* Copy header. */ 464 /* Copy header. */
464 if (copy > 0) { 465 if (copy > 0) {
465 if (copy > len) 466 if (copy > len)
466 copy = len; 467 copy = len;
467 if (memcpy_fromiovecend(skb->data + offset, from, from_offset, 468 if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
468 copy)) 469 copy))
469 goto fault; 470 goto fault;
470 if ((len -= copy) == 0) 471 if ((len -= copy) == 0)
471 return 0; 472 return 0;
472 offset += copy; 473 offset += copy;
473 from_offset += copy; 474 from_offset += copy;
474 } 475 }
475 476
476 /* Copy paged appendix. Hmm... why does this look so complicated? */ 477 /* Copy paged appendix. Hmm... why does this look so complicated? */
477 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 478 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
478 int end; 479 int end;
479 480
480 WARN_ON(start > offset + len); 481 WARN_ON(start > offset + len);
481 482
482 end = start + skb_shinfo(skb)->frags[i].size; 483 end = start + skb_shinfo(skb)->frags[i].size;
483 if ((copy = end - offset) > 0) { 484 if ((copy = end - offset) > 0) {
484 int err; 485 int err;
485 u8 *vaddr; 486 u8 *vaddr;
486 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 487 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
487 struct page *page = frag->page; 488 struct page *page = frag->page;
488 489
489 if (copy > len) 490 if (copy > len)
490 copy = len; 491 copy = len;
491 vaddr = kmap(page); 492 vaddr = kmap(page);
492 err = memcpy_fromiovecend(vaddr + frag->page_offset + 493 err = memcpy_fromiovecend(vaddr + frag->page_offset +
493 offset - start, 494 offset - start,
494 from, from_offset, copy); 495 from, from_offset, copy);
495 kunmap(page); 496 kunmap(page);
496 if (err) 497 if (err)
497 goto fault; 498 goto fault;
498 499
499 if (!(len -= copy)) 500 if (!(len -= copy))
500 return 0; 501 return 0;
501 offset += copy; 502 offset += copy;
502 from_offset += copy; 503 from_offset += copy;
503 } 504 }
504 start = end; 505 start = end;
505 } 506 }
506 507
507 skb_walk_frags(skb, frag_iter) { 508 skb_walk_frags(skb, frag_iter) {
508 int end; 509 int end;
509 510
510 WARN_ON(start > offset + len); 511 WARN_ON(start > offset + len);
511 512
512 end = start + frag_iter->len; 513 end = start + frag_iter->len;
513 if ((copy = end - offset) > 0) { 514 if ((copy = end - offset) > 0) {
514 if (copy > len) 515 if (copy > len)
515 copy = len; 516 copy = len;
516 if (skb_copy_datagram_from_iovec(frag_iter, 517 if (skb_copy_datagram_from_iovec(frag_iter,
517 offset - start, 518 offset - start,
518 from, 519 from,
519 from_offset, 520 from_offset,
520 copy)) 521 copy))
521 goto fault; 522 goto fault;
522 if ((len -= copy) == 0) 523 if ((len -= copy) == 0)
523 return 0; 524 return 0;
524 offset += copy; 525 offset += copy;
525 from_offset += copy; 526 from_offset += copy;
526 } 527 }
527 start = end; 528 start = end;
528 } 529 }
529 if (!len) 530 if (!len)
530 return 0; 531 return 0;
531 532
532 fault: 533 fault:
533 return -EFAULT; 534 return -EFAULT;
534 } 535 }
535 EXPORT_SYMBOL(skb_copy_datagram_from_iovec); 536 EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
536 537
537 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 538 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
538 u8 __user *to, int len, 539 u8 __user *to, int len,
539 __wsum *csump) 540 __wsum *csump)
540 { 541 {
541 int start = skb_headlen(skb); 542 int start = skb_headlen(skb);
542 int i, copy = start - offset; 543 int i, copy = start - offset;
543 struct sk_buff *frag_iter; 544 struct sk_buff *frag_iter;
544 int pos = 0; 545 int pos = 0;
545 546
546 /* Copy header. */ 547 /* Copy header. */
547 if (copy > 0) { 548 if (copy > 0) {
548 int err = 0; 549 int err = 0;
549 if (copy > len) 550 if (copy > len)
550 copy = len; 551 copy = len;
551 *csump = csum_and_copy_to_user(skb->data + offset, to, copy, 552 *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
552 *csump, &err); 553 *csump, &err);
553 if (err) 554 if (err)
554 goto fault; 555 goto fault;
555 if ((len -= copy) == 0) 556 if ((len -= copy) == 0)
556 return 0; 557 return 0;
557 offset += copy; 558 offset += copy;
558 to += copy; 559 to += copy;
559 pos = copy; 560 pos = copy;
560 } 561 }
561 562
562 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 563 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
563 int end; 564 int end;
564 565
565 WARN_ON(start > offset + len); 566 WARN_ON(start > offset + len);
566 567
567 end = start + skb_shinfo(skb)->frags[i].size; 568 end = start + skb_shinfo(skb)->frags[i].size;
568 if ((copy = end - offset) > 0) { 569 if ((copy = end - offset) > 0) {
569 __wsum csum2; 570 __wsum csum2;
570 int err = 0; 571 int err = 0;
571 u8 *vaddr; 572 u8 *vaddr;
572 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 573 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
573 struct page *page = frag->page; 574 struct page *page = frag->page;
574 575
575 if (copy > len) 576 if (copy > len)
576 copy = len; 577 copy = len;
577 vaddr = kmap(page); 578 vaddr = kmap(page);
578 csum2 = csum_and_copy_to_user(vaddr + 579 csum2 = csum_and_copy_to_user(vaddr +
579 frag->page_offset + 580 frag->page_offset +
580 offset - start, 581 offset - start,
581 to, copy, 0, &err); 582 to, copy, 0, &err);
582 kunmap(page); 583 kunmap(page);
583 if (err) 584 if (err)
584 goto fault; 585 goto fault;
585 *csump = csum_block_add(*csump, csum2, pos); 586 *csump = csum_block_add(*csump, csum2, pos);
586 if (!(len -= copy)) 587 if (!(len -= copy))
587 return 0; 588 return 0;
588 offset += copy; 589 offset += copy;
589 to += copy; 590 to += copy;
590 pos += copy; 591 pos += copy;
591 } 592 }
592 start = end; 593 start = end;
593 } 594 }
594 595
595 skb_walk_frags(skb, frag_iter) { 596 skb_walk_frags(skb, frag_iter) {
596 int end; 597 int end;
597 598
598 WARN_ON(start > offset + len); 599 WARN_ON(start > offset + len);
599 600
600 end = start + frag_iter->len; 601 end = start + frag_iter->len;
601 if ((copy = end - offset) > 0) { 602 if ((copy = end - offset) > 0) {
602 __wsum csum2 = 0; 603 __wsum csum2 = 0;
603 if (copy > len) 604 if (copy > len)
604 copy = len; 605 copy = len;
605 if (skb_copy_and_csum_datagram(frag_iter, 606 if (skb_copy_and_csum_datagram(frag_iter,
606 offset - start, 607 offset - start,
607 to, copy, 608 to, copy,
608 &csum2)) 609 &csum2))
609 goto fault; 610 goto fault;
610 *csump = csum_block_add(*csump, csum2, pos); 611 *csump = csum_block_add(*csump, csum2, pos);
611 if ((len -= copy) == 0) 612 if ((len -= copy) == 0)
612 return 0; 613 return 0;
613 offset += copy; 614 offset += copy;
614 to += copy; 615 to += copy;
615 pos += copy; 616 pos += copy;
616 } 617 }
617 start = end; 618 start = end;
618 } 619 }
619 if (!len) 620 if (!len)
620 return 0; 621 return 0;
621 622
622 fault: 623 fault:
623 return -EFAULT; 624 return -EFAULT;
624 } 625 }
625 626
626 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) 627 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
627 { 628 {
628 __sum16 sum; 629 __sum16 sum;
629 630
630 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); 631 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
631 if (likely(!sum)) { 632 if (likely(!sum)) {
632 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) 633 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
633 netdev_rx_csum_fault(skb->dev); 634 netdev_rx_csum_fault(skb->dev);
634 skb->ip_summed = CHECKSUM_UNNECESSARY; 635 skb->ip_summed = CHECKSUM_UNNECESSARY;
635 } 636 }
636 return sum; 637 return sum;
637 } 638 }
638 EXPORT_SYMBOL(__skb_checksum_complete_head); 639 EXPORT_SYMBOL(__skb_checksum_complete_head);
639 640
640 __sum16 __skb_checksum_complete(struct sk_buff *skb) 641 __sum16 __skb_checksum_complete(struct sk_buff *skb)
641 { 642 {
642 return __skb_checksum_complete_head(skb, skb->len); 643 return __skb_checksum_complete_head(skb, skb->len);
643 } 644 }
644 EXPORT_SYMBOL(__skb_checksum_complete); 645 EXPORT_SYMBOL(__skb_checksum_complete);
645 646
646 /** 647 /**
647 * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. 648 * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
648 * @skb: skbuff 649 * @skb: skbuff
649 * @hlen: hardware length 650 * @hlen: hardware length
650 * @iov: io vector 651 * @iov: io vector
651 * 652 *
652 * Caller _must_ check that skb will fit to this iovec. 653 * Caller _must_ check that skb will fit to this iovec.
653 * 654 *
654 * Returns: 0 - success. 655 * Returns: 0 - success.
655 * -EINVAL - checksum failure. 656 * -EINVAL - checksum failure.
656 * -EFAULT - fault during copy. Beware, in this case iovec 657 * -EFAULT - fault during copy. Beware, in this case iovec
657 * can be modified! 658 * can be modified!
658 */ 659 */
659 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, 660 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
660 int hlen, struct iovec *iov) 661 int hlen, struct iovec *iov)
661 { 662 {
662 __wsum csum; 663 __wsum csum;
663 int chunk = skb->len - hlen; 664 int chunk = skb->len - hlen;
664 665
665 if (!chunk) 666 if (!chunk)
666 return 0; 667 return 0;
667 668
668 /* Skip filled elements. 669 /* Skip filled elements.
669 * Pretty silly, look at memcpy_toiovec, though 8) 670 * Pretty silly, look at memcpy_toiovec, though 8)
670 */ 671 */
671 while (!iov->iov_len) 672 while (!iov->iov_len)
672 iov++; 673 iov++;
673 674
674 if (iov->iov_len < chunk) { 675 if (iov->iov_len < chunk) {
675 if (__skb_checksum_complete(skb)) 676 if (__skb_checksum_complete(skb))
676 goto csum_error; 677 goto csum_error;
677 if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) 678 if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
678 goto fault; 679 goto fault;
679 } else { 680 } else {
680 csum = csum_partial(skb->data, hlen, skb->csum); 681 csum = csum_partial(skb->data, hlen, skb->csum);
681 if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, 682 if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
682 chunk, &csum)) 683 chunk, &csum))
683 goto fault; 684 goto fault;
684 if (csum_fold(csum)) 685 if (csum_fold(csum))
685 goto csum_error; 686 goto csum_error;
686 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) 687 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
687 netdev_rx_csum_fault(skb->dev); 688 netdev_rx_csum_fault(skb->dev);
688 iov->iov_len -= chunk; 689 iov->iov_len -= chunk;
689 iov->iov_base += chunk; 690 iov->iov_base += chunk;
690 } 691 }
691 return 0; 692 return 0;
692 csum_error: 693 csum_error:
693 return -EINVAL; 694 return -EINVAL;
694 fault: 695 fault:
695 return -EFAULT; 696 return -EFAULT;
696 } 697 }
697 698
698 /** 699 /**
699 * datagram_poll - generic datagram poll 700 * datagram_poll - generic datagram poll
700 * @file: file struct 701 * @file: file struct
701 * @sock: socket 702 * @sock: socket
702 * @wait: poll table 703 * @wait: poll table
703 * 704 *
704 * Datagram poll: Again totally generic. This also handles 705 * Datagram poll: Again totally generic. This also handles
705 * sequenced packet sockets providing the socket receive queue 706 * sequenced packet sockets providing the socket receive queue
706 * is only ever holding data ready to receive. 707 * is only ever holding data ready to receive.
707 * 708 *
708 * Note: when you _don't_ use this routine for this protocol, 709 * Note: when you _don't_ use this routine for this protocol,
709 * and you use a different write policy from sock_writeable() 710 * and you use a different write policy from sock_writeable()
710 * then please supply your own write_space callback. 711 * then please supply your own write_space callback.
711 */ 712 */
712 unsigned int datagram_poll(struct file *file, struct socket *sock, 713 unsigned int datagram_poll(struct file *file, struct socket *sock,
713 poll_table *wait) 714 poll_table *wait)
714 { 715 {
715 struct sock *sk = sock->sk; 716 struct sock *sk = sock->sk;
716 unsigned int mask; 717 unsigned int mask;
717 718
718 sock_poll_wait(file, sk->sk_sleep, wait); 719 sock_poll_wait(file, sk->sk_sleep, wait);
719 mask = 0; 720 mask = 0;
720 721
721 /* exceptional events? */ 722 /* exceptional events? */
722 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 723 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
723 mask |= POLLERR; 724 mask |= POLLERR;
724 if (sk->sk_shutdown & RCV_SHUTDOWN) 725 if (sk->sk_shutdown & RCV_SHUTDOWN)
725 mask |= POLLRDHUP; 726 mask |= POLLRDHUP;
726 if (sk->sk_shutdown == SHUTDOWN_MASK) 727 if (sk->sk_shutdown == SHUTDOWN_MASK)
727 mask |= POLLHUP; 728 mask |= POLLHUP;
728 729
729 /* readable? */ 730 /* readable? */
730 if (!skb_queue_empty(&sk->sk_receive_queue) || 731 if (!skb_queue_empty(&sk->sk_receive_queue) ||
731 (sk->sk_shutdown & RCV_SHUTDOWN)) 732 (sk->sk_shutdown & RCV_SHUTDOWN))
732 mask |= POLLIN | POLLRDNORM; 733 mask |= POLLIN | POLLRDNORM;
733 734
734 /* Connection-based need to check for termination and startup */ 735 /* Connection-based need to check for termination and startup */
735 if (connection_based(sk)) { 736 if (connection_based(sk)) {
736 if (sk->sk_state == TCP_CLOSE) 737 if (sk->sk_state == TCP_CLOSE)
737 mask |= POLLHUP; 738 mask |= POLLHUP;
738 /* connection hasn't started yet? */ 739 /* connection hasn't started yet? */
739 if (sk->sk_state == TCP_SYN_SENT) 740 if (sk->sk_state == TCP_SYN_SENT)
740 return mask; 741 return mask;
741 } 742 }
742 743
743 /* writable? */ 744 /* writable? */
744 if (sock_writeable(sk)) 745 if (sock_writeable(sk))
745 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 746 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
746 else 747 else
747 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 748 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
748 749
749 return mask; 750 return mask;
750 } 751 }
751 752
752 EXPORT_SYMBOL(datagram_poll); 753 EXPORT_SYMBOL(datagram_poll);
753 EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); 754 EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
754 EXPORT_SYMBOL(skb_copy_datagram_iovec); 755 EXPORT_SYMBOL(skb_copy_datagram_iovec);
755 EXPORT_SYMBOL(skb_free_datagram); 756 EXPORT_SYMBOL(skb_free_datagram);
756 EXPORT_SYMBOL(skb_recv_datagram); 757 EXPORT_SYMBOL(skb_recv_datagram);
757 758
1 /* 1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level. 4 * interface as the means of communication with the user level.
5 * 5 *
6 * The User Datagram Protocol (UDP). 6 * The User Datagram Protocol (UDP).
7 * 7 *
8 * Authors: Ross Biro 8 * Authors: Ross Biro
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11 * Alan Cox, <alan@lxorguk.ukuu.org.uk> 11 * Alan Cox, <alan@lxorguk.ukuu.org.uk>
12 * Hirokazu Takahashi, <taka@valinux.co.jp> 12 * Hirokazu Takahashi, <taka@valinux.co.jp>
13 * 13 *
14 * Fixes: 14 * Fixes:
15 * Alan Cox : verify_area() calls 15 * Alan Cox : verify_area() calls
16 * Alan Cox : stopped close while in use off icmp 16 * Alan Cox : stopped close while in use off icmp
17 * messages. Not a fix but a botch that 17 * messages. Not a fix but a botch that
18 * for udp at least is 'valid'. 18 * for udp at least is 'valid'.
19 * Alan Cox : Fixed icmp handling properly 19 * Alan Cox : Fixed icmp handling properly
20 * Alan Cox : Correct error for oversized datagrams 20 * Alan Cox : Correct error for oversized datagrams
21 * Alan Cox : Tidied select() semantics. 21 * Alan Cox : Tidied select() semantics.
22 * Alan Cox : udp_err() fixed properly, also now 22 * Alan Cox : udp_err() fixed properly, also now
23 * select and read wake correctly on errors 23 * select and read wake correctly on errors
24 * Alan Cox : udp_send verify_area moved to avoid mem leak 24 * Alan Cox : udp_send verify_area moved to avoid mem leak
25 * Alan Cox : UDP can count its memory 25 * Alan Cox : UDP can count its memory
26 * Alan Cox : send to an unknown connection causes 26 * Alan Cox : send to an unknown connection causes
27 * an ECONNREFUSED off the icmp, but 27 * an ECONNREFUSED off the icmp, but
28 * does NOT close. 28 * does NOT close.
29 * Alan Cox : Switched to new sk_buff handlers. No more backlog! 29 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
30 * Alan Cox : Using generic datagram code. Even smaller and the PEEK 30 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
31 * bug no longer crashes it. 31 * bug no longer crashes it.
32 * Fred Van Kempen : Net2e support for sk->broadcast. 32 * Fred Van Kempen : Net2e support for sk->broadcast.
33 * Alan Cox : Uses skb_free_datagram 33 * Alan Cox : Uses skb_free_datagram
34 * Alan Cox : Added get/set sockopt support. 34 * Alan Cox : Added get/set sockopt support.
35 * Alan Cox : Broadcasting without option set returns EACCES. 35 * Alan Cox : Broadcasting without option set returns EACCES.
36 * Alan Cox : No wakeup calls. Instead we now use the callbacks. 36 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
37 * Alan Cox : Use ip_tos and ip_ttl 37 * Alan Cox : Use ip_tos and ip_ttl
38 * Alan Cox : SNMP Mibs 38 * Alan Cox : SNMP Mibs
39 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. 39 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
40 * Matt Dillon : UDP length checks. 40 * Matt Dillon : UDP length checks.
41 * Alan Cox : Smarter af_inet used properly. 41 * Alan Cox : Smarter af_inet used properly.
42 * Alan Cox : Use new kernel side addressing. 42 * Alan Cox : Use new kernel side addressing.
43 * Alan Cox : Incorrect return on truncated datagram receive. 43 * Alan Cox : Incorrect return on truncated datagram receive.
44 * Arnt Gulbrandsen : New udp_send and stuff 44 * Arnt Gulbrandsen : New udp_send and stuff
45 * Alan Cox : Cache last socket 45 * Alan Cox : Cache last socket
46 * Alan Cox : Route cache 46 * Alan Cox : Route cache
47 * Jon Peatfield : Minor efficiency fix to sendto(). 47 * Jon Peatfield : Minor efficiency fix to sendto().
48 * Mike Shaver : RFC1122 checks. 48 * Mike Shaver : RFC1122 checks.
49 * Alan Cox : Nonblocking error fix. 49 * Alan Cox : Nonblocking error fix.
50 * Willy Konynenberg : Transparent proxying support. 50 * Willy Konynenberg : Transparent proxying support.
51 * Mike McLagan : Routing by source 51 * Mike McLagan : Routing by source
52 * David S. Miller : New socket lookup architecture. 52 * David S. Miller : New socket lookup architecture.
53 * Last socket cache retained as it 53 * Last socket cache retained as it
54 * does have a high hit rate. 54 * does have a high hit rate.
55 * Olaf Kirch : Don't linearise iovec on sendmsg. 55 * Olaf Kirch : Don't linearise iovec on sendmsg.
56 * Andi Kleen : Some cleanups, cache destination entry 56 * Andi Kleen : Some cleanups, cache destination entry
57 * for connect. 57 * for connect.
58 * Vitaly E. Lavrov : Transparent proxy revived after year coma. 58 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
59 * Melvin Smith : Check msg_name not msg_namelen in sendto(), 59 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
60 * return ENOTCONN for unconnected sockets (POSIX) 60 * return ENOTCONN for unconnected sockets (POSIX)
61 * Janos Farkas : don't deliver multi/broadcasts to a different 61 * Janos Farkas : don't deliver multi/broadcasts to a different
62 * bound-to-device socket 62 * bound-to-device socket
63 * Hirokazu Takahashi : HW checksumming for outgoing UDP 63 * Hirokazu Takahashi : HW checksumming for outgoing UDP
64 * datagrams. 64 * datagrams.
65 * Hirokazu Takahashi : sendfile() on UDP works now. 65 * Hirokazu Takahashi : sendfile() on UDP works now.
66 * Arnaldo C. Melo : convert /proc/net/udp to seq_file 66 * Arnaldo C. Melo : convert /proc/net/udp to seq_file
67 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 67 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
68 * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind 68 * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
69 * a single port at the same time. 69 * a single port at the same time.
70 * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support 70 * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
71 * James Chapman : Add L2TP encapsulation type. 71 * James Chapman : Add L2TP encapsulation type.
72 * 72 *
73 * 73 *
74 * This program is free software; you can redistribute it and/or 74 * This program is free software; you can redistribute it and/or
75 * modify it under the terms of the GNU General Public License 75 * modify it under the terms of the GNU General Public License
76 * as published by the Free Software Foundation; either version 76 * as published by the Free Software Foundation; either version
77 * 2 of the License, or (at your option) any later version. 77 * 2 of the License, or (at your option) any later version.
78 */ 78 */
79 79
80 #include <asm/system.h> 80 #include <asm/system.h>
81 #include <asm/uaccess.h> 81 #include <asm/uaccess.h>
82 #include <asm/ioctls.h> 82 #include <asm/ioctls.h>
83 #include <linux/bootmem.h> 83 #include <linux/bootmem.h>
84 #include <linux/highmem.h> 84 #include <linux/highmem.h>
85 #include <linux/swap.h> 85 #include <linux/swap.h>
86 #include <linux/types.h> 86 #include <linux/types.h>
87 #include <linux/fcntl.h> 87 #include <linux/fcntl.h>
88 #include <linux/module.h> 88 #include <linux/module.h>
89 #include <linux/socket.h> 89 #include <linux/socket.h>
90 #include <linux/sockios.h> 90 #include <linux/sockios.h>
91 #include <linux/igmp.h> 91 #include <linux/igmp.h>
92 #include <linux/in.h> 92 #include <linux/in.h>
93 #include <linux/errno.h> 93 #include <linux/errno.h>
94 #include <linux/timer.h> 94 #include <linux/timer.h>
95 #include <linux/mm.h> 95 #include <linux/mm.h>
96 #include <linux/inet.h> 96 #include <linux/inet.h>
97 #include <linux/netdevice.h> 97 #include <linux/netdevice.h>
98 #include <net/tcp_states.h> 98 #include <net/tcp_states.h>
99 #include <linux/skbuff.h> 99 #include <linux/skbuff.h>
100 #include <linux/proc_fs.h> 100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h> 101 #include <linux/seq_file.h>
102 #include <net/net_namespace.h> 102 #include <net/net_namespace.h>
103 #include <net/icmp.h> 103 #include <net/icmp.h>
104 #include <net/route.h> 104 #include <net/route.h>
105 #include <net/checksum.h> 105 #include <net/checksum.h>
106 #include <net/xfrm.h> 106 #include <net/xfrm.h>
107 #include "udp_impl.h" 107 #include "udp_impl.h"
108 108
109 struct udp_table udp_table __read_mostly; 109 struct udp_table udp_table __read_mostly;
110 EXPORT_SYMBOL(udp_table); 110 EXPORT_SYMBOL(udp_table);
111 111
112 int sysctl_udp_mem[3] __read_mostly; 112 int sysctl_udp_mem[3] __read_mostly;
113 EXPORT_SYMBOL(sysctl_udp_mem); 113 EXPORT_SYMBOL(sysctl_udp_mem);
114 114
115 int sysctl_udp_rmem_min __read_mostly; 115 int sysctl_udp_rmem_min __read_mostly;
116 EXPORT_SYMBOL(sysctl_udp_rmem_min); 116 EXPORT_SYMBOL(sysctl_udp_rmem_min);
117 117
118 int sysctl_udp_wmem_min __read_mostly; 118 int sysctl_udp_wmem_min __read_mostly;
119 EXPORT_SYMBOL(sysctl_udp_wmem_min); 119 EXPORT_SYMBOL(sysctl_udp_wmem_min);
120 120
121 atomic_t udp_memory_allocated; 121 atomic_t udp_memory_allocated;
122 EXPORT_SYMBOL(udp_memory_allocated); 122 EXPORT_SYMBOL(udp_memory_allocated);
123 123
124 #define MAX_UDP_PORTS 65536 124 #define MAX_UDP_PORTS 65536
125 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) 125 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
126 126
127 static int udp_lib_lport_inuse(struct net *net, __u16 num, 127 static int udp_lib_lport_inuse(struct net *net, __u16 num,
128 const struct udp_hslot *hslot, 128 const struct udp_hslot *hslot,
129 unsigned long *bitmap, 129 unsigned long *bitmap,
130 struct sock *sk, 130 struct sock *sk,
131 int (*saddr_comp)(const struct sock *sk1, 131 int (*saddr_comp)(const struct sock *sk1,
132 const struct sock *sk2), 132 const struct sock *sk2),
133 unsigned int log) 133 unsigned int log)
134 { 134 {
135 struct sock *sk2; 135 struct sock *sk2;
136 struct hlist_nulls_node *node; 136 struct hlist_nulls_node *node;
137 137
138 sk_nulls_for_each(sk2, node, &hslot->head) 138 sk_nulls_for_each(sk2, node, &hslot->head)
139 if (net_eq(sock_net(sk2), net) && 139 if (net_eq(sock_net(sk2), net) &&
140 sk2 != sk && 140 sk2 != sk &&
141 (bitmap || sk2->sk_hash == num) && 141 (bitmap || sk2->sk_hash == num) &&
142 (!sk2->sk_reuse || !sk->sk_reuse) && 142 (!sk2->sk_reuse || !sk->sk_reuse) &&
143 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 143 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
144 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 144 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
145 (*saddr_comp)(sk, sk2)) { 145 (*saddr_comp)(sk, sk2)) {
146 if (bitmap) 146 if (bitmap)
147 __set_bit(sk2->sk_hash >> log, bitmap); 147 __set_bit(sk2->sk_hash >> log, bitmap);
148 else 148 else
149 return 1; 149 return 1;
150 } 150 }
151 return 0; 151 return 0;
152 } 152 }
153 153
154 /** 154 /**
155 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 155 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
156 * 156 *
157 * @sk: socket struct in question 157 * @sk: socket struct in question
158 * @snum: port number to look up 158 * @snum: port number to look up
159 * @saddr_comp: AF-dependent comparison of bound local IP addresses 159 * @saddr_comp: AF-dependent comparison of bound local IP addresses
160 */ 160 */
161 int udp_lib_get_port(struct sock *sk, unsigned short snum, 161 int udp_lib_get_port(struct sock *sk, unsigned short snum,
162 int (*saddr_comp)(const struct sock *sk1, 162 int (*saddr_comp)(const struct sock *sk1,
163 const struct sock *sk2)) 163 const struct sock *sk2))
164 { 164 {
165 struct udp_hslot *hslot; 165 struct udp_hslot *hslot;
166 struct udp_table *udptable = sk->sk_prot->h.udp_table; 166 struct udp_table *udptable = sk->sk_prot->h.udp_table;
167 int error = 1; 167 int error = 1;
168 struct net *net = sock_net(sk); 168 struct net *net = sock_net(sk);
169 169
170 if (!snum) { 170 if (!snum) {
171 int low, high, remaining; 171 int low, high, remaining;
172 unsigned rand; 172 unsigned rand;
173 unsigned short first, last; 173 unsigned short first, last;
174 DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); 174 DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
175 175
176 inet_get_local_port_range(&low, &high); 176 inet_get_local_port_range(&low, &high);
177 remaining = (high - low) + 1; 177 remaining = (high - low) + 1;
178 178
179 rand = net_random(); 179 rand = net_random();
180 first = (((u64)rand * remaining) >> 32) + low; 180 first = (((u64)rand * remaining) >> 32) + low;
181 /* 181 /*
182 * force rand to be an odd multiple of UDP_HTABLE_SIZE 182 * force rand to be an odd multiple of UDP_HTABLE_SIZE
183 */ 183 */
184 rand = (rand | 1) * (udptable->mask + 1); 184 rand = (rand | 1) * (udptable->mask + 1);
185 for (last = first + udptable->mask + 1; 185 for (last = first + udptable->mask + 1;
186 first != last; 186 first != last;
187 first++) { 187 first++) {
188 hslot = udp_hashslot(udptable, net, first); 188 hslot = udp_hashslot(udptable, net, first);
189 bitmap_zero(bitmap, PORTS_PER_CHAIN); 189 bitmap_zero(bitmap, PORTS_PER_CHAIN);
190 spin_lock_bh(&hslot->lock); 190 spin_lock_bh(&hslot->lock);
191 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 191 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
192 saddr_comp, udptable->log); 192 saddr_comp, udptable->log);
193 193
194 snum = first; 194 snum = first;
195 /* 195 /*
196 * Iterate on all possible values of snum for this hash. 196 * Iterate on all possible values of snum for this hash.
197 * Using steps of an odd multiple of UDP_HTABLE_SIZE 197 * Using steps of an odd multiple of UDP_HTABLE_SIZE
198 * give us randomization and full range coverage. 198 * give us randomization and full range coverage.
199 */ 199 */
200 do { 200 do {
201 if (low <= snum && snum <= high && 201 if (low <= snum && snum <= high &&
202 !test_bit(snum >> udptable->log, bitmap)) 202 !test_bit(snum >> udptable->log, bitmap))
203 goto found; 203 goto found;
204 snum += rand; 204 snum += rand;
205 } while (snum != first); 205 } while (snum != first);
206 spin_unlock_bh(&hslot->lock); 206 spin_unlock_bh(&hslot->lock);
207 } 207 }
208 goto fail; 208 goto fail;
209 } else { 209 } else {
210 hslot = udp_hashslot(udptable, net, snum); 210 hslot = udp_hashslot(udptable, net, snum);
211 spin_lock_bh(&hslot->lock); 211 spin_lock_bh(&hslot->lock);
212 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 212 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
213 saddr_comp, 0)) 213 saddr_comp, 0))
214 goto fail_unlock; 214 goto fail_unlock;
215 } 215 }
216 found: 216 found:
217 inet_sk(sk)->inet_num = snum; 217 inet_sk(sk)->inet_num = snum;
218 sk->sk_hash = snum; 218 sk->sk_hash = snum;
219 if (sk_unhashed(sk)) { 219 if (sk_unhashed(sk)) {
220 sk_nulls_add_node_rcu(sk, &hslot->head); 220 sk_nulls_add_node_rcu(sk, &hslot->head);
221 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 221 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
222 } 222 }
223 error = 0; 223 error = 0;
224 fail_unlock: 224 fail_unlock:
225 spin_unlock_bh(&hslot->lock); 225 spin_unlock_bh(&hslot->lock);
226 fail: 226 fail:
227 return error; 227 return error;
228 } 228 }
229 EXPORT_SYMBOL(udp_lib_get_port); 229 EXPORT_SYMBOL(udp_lib_get_port);
230 230
231 static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 231 static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
232 { 232 {
233 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 233 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
234 234
235 return (!ipv6_only_sock(sk2) && 235 return (!ipv6_only_sock(sk2) &&
236 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || 236 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
237 inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); 237 inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
238 } 238 }
239 239
240 int udp_v4_get_port(struct sock *sk, unsigned short snum) 240 int udp_v4_get_port(struct sock *sk, unsigned short snum)
241 { 241 {
242 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); 242 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
243 } 243 }
244 244
245 static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, 245 static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
246 unsigned short hnum, 246 unsigned short hnum,
247 __be16 sport, __be32 daddr, __be16 dport, int dif) 247 __be16 sport, __be32 daddr, __be16 dport, int dif)
248 { 248 {
249 int score = -1; 249 int score = -1;
250 250
251 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 251 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
252 !ipv6_only_sock(sk)) { 252 !ipv6_only_sock(sk)) {
253 struct inet_sock *inet = inet_sk(sk); 253 struct inet_sock *inet = inet_sk(sk);
254 254
255 score = (sk->sk_family == PF_INET ? 1 : 0); 255 score = (sk->sk_family == PF_INET ? 1 : 0);
256 if (inet->inet_rcv_saddr) { 256 if (inet->inet_rcv_saddr) {
257 if (inet->inet_rcv_saddr != daddr) 257 if (inet->inet_rcv_saddr != daddr)
258 return -1; 258 return -1;
259 score += 2; 259 score += 2;
260 } 260 }
261 if (inet->inet_daddr) { 261 if (inet->inet_daddr) {
262 if (inet->inet_daddr != saddr) 262 if (inet->inet_daddr != saddr)
263 return -1; 263 return -1;
264 score += 2; 264 score += 2;
265 } 265 }
266 if (inet->inet_dport) { 266 if (inet->inet_dport) {
267 if (inet->inet_dport != sport) 267 if (inet->inet_dport != sport)
268 return -1; 268 return -1;
269 score += 2; 269 score += 2;
270 } 270 }
271 if (sk->sk_bound_dev_if) { 271 if (sk->sk_bound_dev_if) {
272 if (sk->sk_bound_dev_if != dif) 272 if (sk->sk_bound_dev_if != dif)
273 return -1; 273 return -1;
274 score += 2; 274 score += 2;
275 } 275 }
276 } 276 }
277 return score; 277 return score;
278 } 278 }
279 279
280 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try 280 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
281 * harder than this. -DaveM 281 * harder than this. -DaveM
282 */ 282 */
283 static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 283 static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
284 __be16 sport, __be32 daddr, __be16 dport, 284 __be16 sport, __be32 daddr, __be16 dport,
285 int dif, struct udp_table *udptable) 285 int dif, struct udp_table *udptable)
286 { 286 {
287 struct sock *sk, *result; 287 struct sock *sk, *result;
288 struct hlist_nulls_node *node; 288 struct hlist_nulls_node *node;
289 unsigned short hnum = ntohs(dport); 289 unsigned short hnum = ntohs(dport);
290 unsigned int hash = udp_hashfn(net, hnum, udptable->mask); 290 unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
291 struct udp_hslot *hslot = &udptable->hash[hash]; 291 struct udp_hslot *hslot = &udptable->hash[hash];
292 int score, badness; 292 int score, badness;
293 293
294 rcu_read_lock(); 294 rcu_read_lock();
295 begin: 295 begin:
296 result = NULL; 296 result = NULL;
297 badness = -1; 297 badness = -1;
298 sk_nulls_for_each_rcu(sk, node, &hslot->head) { 298 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
299 score = compute_score(sk, net, saddr, hnum, sport, 299 score = compute_score(sk, net, saddr, hnum, sport,
300 daddr, dport, dif); 300 daddr, dport, dif);
301 if (score > badness) { 301 if (score > badness) {
302 result = sk; 302 result = sk;
303 badness = score; 303 badness = score;
304 } 304 }
305 } 305 }
306 /* 306 /*
307 * if the nulls value we got at the end of this lookup is 307 * if the nulls value we got at the end of this lookup is
308 * not the expected one, we must restart lookup. 308 * not the expected one, we must restart lookup.
309 * We probably met an item that was moved to another chain. 309 * We probably met an item that was moved to another chain.
310 */ 310 */
311 if (get_nulls_value(node) != hash) 311 if (get_nulls_value(node) != hash)
312 goto begin; 312 goto begin;
313 313
314 if (result) { 314 if (result) {
315 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 315 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
316 result = NULL; 316 result = NULL;
317 else if (unlikely(compute_score(result, net, saddr, hnum, sport, 317 else if (unlikely(compute_score(result, net, saddr, hnum, sport,
318 daddr, dport, dif) < badness)) { 318 daddr, dport, dif) < badness)) {
319 sock_put(result); 319 sock_put(result);
320 goto begin; 320 goto begin;
321 } 321 }
322 } 322 }
323 rcu_read_unlock(); 323 rcu_read_unlock();
324 return result; 324 return result;
325 } 325 }
326 326
327 static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, 327 static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
328 __be16 sport, __be16 dport, 328 __be16 sport, __be16 dport,
329 struct udp_table *udptable) 329 struct udp_table *udptable)
330 { 330 {
331 struct sock *sk; 331 struct sock *sk;
332 const struct iphdr *iph = ip_hdr(skb); 332 const struct iphdr *iph = ip_hdr(skb);
333 333
334 if (unlikely(sk = skb_steal_sock(skb))) 334 if (unlikely(sk = skb_steal_sock(skb)))
335 return sk; 335 return sk;
336 else 336 else
337 return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, 337 return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
338 iph->daddr, dport, inet_iif(skb), 338 iph->daddr, dport, inet_iif(skb),
339 udptable); 339 udptable);
340 } 340 }
341 341
342 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 342 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
343 __be32 daddr, __be16 dport, int dif) 343 __be32 daddr, __be16 dport, int dif)
344 { 344 {
345 return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); 345 return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
346 } 346 }
347 EXPORT_SYMBOL_GPL(udp4_lib_lookup); 347 EXPORT_SYMBOL_GPL(udp4_lib_lookup);
348 348
349 static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, 349 static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
350 __be16 loc_port, __be32 loc_addr, 350 __be16 loc_port, __be32 loc_addr,
351 __be16 rmt_port, __be32 rmt_addr, 351 __be16 rmt_port, __be32 rmt_addr,
352 int dif) 352 int dif)
353 { 353 {
354 struct hlist_nulls_node *node; 354 struct hlist_nulls_node *node;
355 struct sock *s = sk; 355 struct sock *s = sk;
356 unsigned short hnum = ntohs(loc_port); 356 unsigned short hnum = ntohs(loc_port);
357 357
358 sk_nulls_for_each_from(s, node) { 358 sk_nulls_for_each_from(s, node) {
359 struct inet_sock *inet = inet_sk(s); 359 struct inet_sock *inet = inet_sk(s);
360 360
361 if (!net_eq(sock_net(s), net) || 361 if (!net_eq(sock_net(s), net) ||
362 s->sk_hash != hnum || 362 s->sk_hash != hnum ||
363 (inet->inet_daddr && inet->inet_daddr != rmt_addr) || 363 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
364 (inet->inet_dport != rmt_port && inet->inet_dport) || 364 (inet->inet_dport != rmt_port && inet->inet_dport) ||
365 (inet->inet_rcv_saddr && 365 (inet->inet_rcv_saddr &&
366 inet->inet_rcv_saddr != loc_addr) || 366 inet->inet_rcv_saddr != loc_addr) ||
367 ipv6_only_sock(s) || 367 ipv6_only_sock(s) ||
368 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) 368 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
369 continue; 369 continue;
370 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) 370 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
371 continue; 371 continue;
372 goto found; 372 goto found;
373 } 373 }
374 s = NULL; 374 s = NULL;
375 found: 375 found:
376 return s; 376 return s;
377 } 377 }
378 378
379 /* 379 /*
380 * This routine is called by the ICMP module when it gets some 380 * This routine is called by the ICMP module when it gets some
381 * sort of error condition. If err < 0 then the socket should 381 * sort of error condition. If err < 0 then the socket should
382 * be closed and the error returned to the user. If err > 0 382 * be closed and the error returned to the user. If err > 0
383 * it's just the icmp type << 8 | icmp code. 383 * it's just the icmp type << 8 | icmp code.
384 * Header points to the ip header of the error packet. We move 384 * Header points to the ip header of the error packet. We move
385 * on past this. Then (as it used to claim before adjustment) 385 * on past this. Then (as it used to claim before adjustment)
386 * header points to the first 8 bytes of the udp header. We need 386 * header points to the first 8 bytes of the udp header. We need
387 * to find the appropriate port. 387 * to find the appropriate port.
388 */ 388 */
389 389
390 void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 390 void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
391 { 391 {
392 struct inet_sock *inet; 392 struct inet_sock *inet;
393 struct iphdr *iph = (struct iphdr *)skb->data; 393 struct iphdr *iph = (struct iphdr *)skb->data;
394 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 394 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
395 const int type = icmp_hdr(skb)->type; 395 const int type = icmp_hdr(skb)->type;
396 const int code = icmp_hdr(skb)->code; 396 const int code = icmp_hdr(skb)->code;
397 struct sock *sk; 397 struct sock *sk;
398 int harderr; 398 int harderr;
399 int err; 399 int err;
400 struct net *net = dev_net(skb->dev); 400 struct net *net = dev_net(skb->dev);
401 401
402 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 402 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
403 iph->saddr, uh->source, skb->dev->ifindex, udptable); 403 iph->saddr, uh->source, skb->dev->ifindex, udptable);
404 if (sk == NULL) { 404 if (sk == NULL) {
405 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 405 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
406 return; /* No socket for error */ 406 return; /* No socket for error */
407 } 407 }
408 408
409 err = 0; 409 err = 0;
410 harderr = 0; 410 harderr = 0;
411 inet = inet_sk(sk); 411 inet = inet_sk(sk);
412 412
413 switch (type) { 413 switch (type) {
414 default: 414 default:
415 case ICMP_TIME_EXCEEDED: 415 case ICMP_TIME_EXCEEDED:
416 err = EHOSTUNREACH; 416 err = EHOSTUNREACH;
417 break; 417 break;
418 case ICMP_SOURCE_QUENCH: 418 case ICMP_SOURCE_QUENCH:
419 goto out; 419 goto out;
420 case ICMP_PARAMETERPROB: 420 case ICMP_PARAMETERPROB:
421 err = EPROTO; 421 err = EPROTO;
422 harderr = 1; 422 harderr = 1;
423 break; 423 break;
424 case ICMP_DEST_UNREACH: 424 case ICMP_DEST_UNREACH:
425 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 425 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
426 if (inet->pmtudisc != IP_PMTUDISC_DONT) { 426 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
427 err = EMSGSIZE; 427 err = EMSGSIZE;
428 harderr = 1; 428 harderr = 1;
429 break; 429 break;
430 } 430 }
431 goto out; 431 goto out;
432 } 432 }
433 err = EHOSTUNREACH; 433 err = EHOSTUNREACH;
434 if (code <= NR_ICMP_UNREACH) { 434 if (code <= NR_ICMP_UNREACH) {
435 harderr = icmp_err_convert[code].fatal; 435 harderr = icmp_err_convert[code].fatal;
436 err = icmp_err_convert[code].errno; 436 err = icmp_err_convert[code].errno;
437 } 437 }
438 break; 438 break;
439 } 439 }
440 440
441 /* 441 /*
442 * RFC1122: OK. Passes ICMP errors back to application, as per 442 * RFC1122: OK. Passes ICMP errors back to application, as per
443 * 4.1.3.3. 443 * 4.1.3.3.
444 */ 444 */
445 if (!inet->recverr) { 445 if (!inet->recverr) {
446 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 446 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
447 goto out; 447 goto out;
448 } else { 448 } else {
449 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); 449 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
450 } 450 }
451 sk->sk_err = err; 451 sk->sk_err = err;
452 sk->sk_error_report(sk); 452 sk->sk_error_report(sk);
453 out: 453 out:
454 sock_put(sk); 454 sock_put(sk);
455 } 455 }
456 456
457 void udp_err(struct sk_buff *skb, u32 info) 457 void udp_err(struct sk_buff *skb, u32 info)
458 { 458 {
459 __udp4_lib_err(skb, info, &udp_table); 459 __udp4_lib_err(skb, info, &udp_table);
460 } 460 }
461 461
462 /* 462 /*
463 * Throw away all pending data and cancel the corking. Socket is locked. 463 * Throw away all pending data and cancel the corking. Socket is locked.
464 */ 464 */
465 void udp_flush_pending_frames(struct sock *sk) 465 void udp_flush_pending_frames(struct sock *sk)
466 { 466 {
467 struct udp_sock *up = udp_sk(sk); 467 struct udp_sock *up = udp_sk(sk);
468 468
469 if (up->pending) { 469 if (up->pending) {
470 up->len = 0; 470 up->len = 0;
471 up->pending = 0; 471 up->pending = 0;
472 ip_flush_pending_frames(sk); 472 ip_flush_pending_frames(sk);
473 } 473 }
474 } 474 }
475 EXPORT_SYMBOL(udp_flush_pending_frames); 475 EXPORT_SYMBOL(udp_flush_pending_frames);
476 476
477 /** 477 /**
478 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 478 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
479 * @sk: socket we are sending on 479 * @sk: socket we are sending on
480 * @skb: sk_buff containing the filled-in UDP header 480 * @skb: sk_buff containing the filled-in UDP header
481 * (checksum field must be zeroed out) 481 * (checksum field must be zeroed out)
482 */ 482 */
483 static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, 483 static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
484 __be32 src, __be32 dst, int len) 484 __be32 src, __be32 dst, int len)
485 { 485 {
486 unsigned int offset; 486 unsigned int offset;
487 struct udphdr *uh = udp_hdr(skb); 487 struct udphdr *uh = udp_hdr(skb);
488 __wsum csum = 0; 488 __wsum csum = 0;
489 489
490 if (skb_queue_len(&sk->sk_write_queue) == 1) { 490 if (skb_queue_len(&sk->sk_write_queue) == 1) {
491 /* 491 /*
492 * Only one fragment on the socket. 492 * Only one fragment on the socket.
493 */ 493 */
494 skb->csum_start = skb_transport_header(skb) - skb->head; 494 skb->csum_start = skb_transport_header(skb) - skb->head;
495 skb->csum_offset = offsetof(struct udphdr, check); 495 skb->csum_offset = offsetof(struct udphdr, check);
496 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); 496 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
497 } else { 497 } else {
498 /* 498 /*
499 * HW-checksum won't work as there are two or more 499 * HW-checksum won't work as there are two or more
500 * fragments on the socket so that all csums of sk_buffs 500 * fragments on the socket so that all csums of sk_buffs
501 * should be together 501 * should be together
502 */ 502 */
503 offset = skb_transport_offset(skb); 503 offset = skb_transport_offset(skb);
504 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 504 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
505 505
506 skb->ip_summed = CHECKSUM_NONE; 506 skb->ip_summed = CHECKSUM_NONE;
507 507
508 skb_queue_walk(&sk->sk_write_queue, skb) { 508 skb_queue_walk(&sk->sk_write_queue, skb) {
509 csum = csum_add(csum, skb->csum); 509 csum = csum_add(csum, skb->csum);
510 } 510 }
511 511
512 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); 512 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
513 if (uh->check == 0) 513 if (uh->check == 0)
514 uh->check = CSUM_MANGLED_0; 514 uh->check = CSUM_MANGLED_0;
515 } 515 }
516 } 516 }
517 517
518 /* 518 /*
519 * Push out all pending data as one UDP datagram. Socket is locked. 519 * Push out all pending data as one UDP datagram. Socket is locked.
520 */ 520 */
521 static int udp_push_pending_frames(struct sock *sk) 521 static int udp_push_pending_frames(struct sock *sk)
522 { 522 {
523 struct udp_sock *up = udp_sk(sk); 523 struct udp_sock *up = udp_sk(sk);
524 struct inet_sock *inet = inet_sk(sk); 524 struct inet_sock *inet = inet_sk(sk);
525 struct flowi *fl = &inet->cork.fl; 525 struct flowi *fl = &inet->cork.fl;
526 struct sk_buff *skb; 526 struct sk_buff *skb;
527 struct udphdr *uh; 527 struct udphdr *uh;
528 int err = 0; 528 int err = 0;
529 int is_udplite = IS_UDPLITE(sk); 529 int is_udplite = IS_UDPLITE(sk);
530 __wsum csum = 0; 530 __wsum csum = 0;
531 531
532 /* Grab the skbuff where UDP header space exists. */ 532 /* Grab the skbuff where UDP header space exists. */
533 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 533 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
534 goto out; 534 goto out;
535 535
536 /* 536 /*
537 * Create a UDP header 537 * Create a UDP header
538 */ 538 */
539 uh = udp_hdr(skb); 539 uh = udp_hdr(skb);
540 uh->source = fl->fl_ip_sport; 540 uh->source = fl->fl_ip_sport;
541 uh->dest = fl->fl_ip_dport; 541 uh->dest = fl->fl_ip_dport;
542 uh->len = htons(up->len); 542 uh->len = htons(up->len);
543 uh->check = 0; 543 uh->check = 0;
544 544
545 if (is_udplite) /* UDP-Lite */ 545 if (is_udplite) /* UDP-Lite */
546 csum = udplite_csum_outgoing(sk, skb); 546 csum = udplite_csum_outgoing(sk, skb);
547 547
548 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 548 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
549 549
550 skb->ip_summed = CHECKSUM_NONE; 550 skb->ip_summed = CHECKSUM_NONE;
551 goto send; 551 goto send;
552 552
553 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 553 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
554 554
555 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); 555 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len);
556 goto send; 556 goto send;
557 557
558 } else /* `normal' UDP */ 558 } else /* `normal' UDP */
559 csum = udp_csum_outgoing(sk, skb); 559 csum = udp_csum_outgoing(sk, skb);
560 560
561 /* add protocol-dependent pseudo-header */ 561 /* add protocol-dependent pseudo-header */
562 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, 562 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
563 sk->sk_protocol, csum); 563 sk->sk_protocol, csum);
564 if (uh->check == 0) 564 if (uh->check == 0)
565 uh->check = CSUM_MANGLED_0; 565 uh->check = CSUM_MANGLED_0;
566 566
567 send: 567 send:
568 err = ip_push_pending_frames(sk); 568 err = ip_push_pending_frames(sk);
569 if (err) { 569 if (err) {
570 if (err == -ENOBUFS && !inet->recverr) { 570 if (err == -ENOBUFS && !inet->recverr) {
571 UDP_INC_STATS_USER(sock_net(sk), 571 UDP_INC_STATS_USER(sock_net(sk),
572 UDP_MIB_SNDBUFERRORS, is_udplite); 572 UDP_MIB_SNDBUFERRORS, is_udplite);
573 err = 0; 573 err = 0;
574 } 574 }
575 } else 575 } else
576 UDP_INC_STATS_USER(sock_net(sk), 576 UDP_INC_STATS_USER(sock_net(sk),
577 UDP_MIB_OUTDATAGRAMS, is_udplite); 577 UDP_MIB_OUTDATAGRAMS, is_udplite);
578 out: 578 out:
579 up->len = 0; 579 up->len = 0;
580 up->pending = 0; 580 up->pending = 0;
581 return err; 581 return err;
582 } 582 }
583 583
584 int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 584 int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
585 size_t len) 585 size_t len)
586 { 586 {
587 struct inet_sock *inet = inet_sk(sk); 587 struct inet_sock *inet = inet_sk(sk);
588 struct udp_sock *up = udp_sk(sk); 588 struct udp_sock *up = udp_sk(sk);
589 int ulen = len; 589 int ulen = len;
590 struct ipcm_cookie ipc; 590 struct ipcm_cookie ipc;
591 struct rtable *rt = NULL; 591 struct rtable *rt = NULL;
592 int free = 0; 592 int free = 0;
593 int connected = 0; 593 int connected = 0;
594 __be32 daddr, faddr, saddr; 594 __be32 daddr, faddr, saddr;
595 __be16 dport; 595 __be16 dport;
596 u8 tos; 596 u8 tos;
597 int err, is_udplite = IS_UDPLITE(sk); 597 int err, is_udplite = IS_UDPLITE(sk);
598 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 598 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
599 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 599 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
600 600
601 if (len > 0xFFFF) 601 if (len > 0xFFFF)
602 return -EMSGSIZE; 602 return -EMSGSIZE;
603 603
604 /* 604 /*
605 * Check the flags. 605 * Check the flags.
606 */ 606 */
607 607
608 if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ 608 if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
609 return -EOPNOTSUPP; 609 return -EOPNOTSUPP;
610 610
611 ipc.opt = NULL; 611 ipc.opt = NULL;
612 ipc.shtx.flags = 0; 612 ipc.shtx.flags = 0;
613 613
614 if (up->pending) { 614 if (up->pending) {
615 /* 615 /*
616 * There are pending frames. 616 * There are pending frames.
617 * The socket lock must be held while it's corked. 617 * The socket lock must be held while it's corked.
618 */ 618 */
619 lock_sock(sk); 619 lock_sock(sk);
620 if (likely(up->pending)) { 620 if (likely(up->pending)) {
621 if (unlikely(up->pending != AF_INET)) { 621 if (unlikely(up->pending != AF_INET)) {
622 release_sock(sk); 622 release_sock(sk);
623 return -EINVAL; 623 return -EINVAL;
624 } 624 }
625 goto do_append_data; 625 goto do_append_data;
626 } 626 }
627 release_sock(sk); 627 release_sock(sk);
628 } 628 }
629 ulen += sizeof(struct udphdr); 629 ulen += sizeof(struct udphdr);
630 630
631 /* 631 /*
632 * Get and verify the address. 632 * Get and verify the address.
633 */ 633 */
634 if (msg->msg_name) { 634 if (msg->msg_name) {
635 struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; 635 struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
636 if (msg->msg_namelen < sizeof(*usin)) 636 if (msg->msg_namelen < sizeof(*usin))
637 return -EINVAL; 637 return -EINVAL;
638 if (usin->sin_family != AF_INET) { 638 if (usin->sin_family != AF_INET) {
639 if (usin->sin_family != AF_UNSPEC) 639 if (usin->sin_family != AF_UNSPEC)
640 return -EAFNOSUPPORT; 640 return -EAFNOSUPPORT;
641 } 641 }
642 642
643 daddr = usin->sin_addr.s_addr; 643 daddr = usin->sin_addr.s_addr;
644 dport = usin->sin_port; 644 dport = usin->sin_port;
645 if (dport == 0) 645 if (dport == 0)
646 return -EINVAL; 646 return -EINVAL;
647 } else { 647 } else {
648 if (sk->sk_state != TCP_ESTABLISHED) 648 if (sk->sk_state != TCP_ESTABLISHED)
649 return -EDESTADDRREQ; 649 return -EDESTADDRREQ;
650 daddr = inet->inet_daddr; 650 daddr = inet->inet_daddr;
651 dport = inet->inet_dport; 651 dport = inet->inet_dport;
652 /* Open fast path for connected socket. 652 /* Open fast path for connected socket.
653 Route will not be used, if at least one option is set. 653 Route will not be used, if at least one option is set.
654 */ 654 */
655 connected = 1; 655 connected = 1;
656 } 656 }
657 ipc.addr = inet->inet_saddr; 657 ipc.addr = inet->inet_saddr;
658 658
659 ipc.oif = sk->sk_bound_dev_if; 659 ipc.oif = sk->sk_bound_dev_if;
660 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 660 err = sock_tx_timestamp(msg, sk, &ipc.shtx);
661 if (err) 661 if (err)
662 return err; 662 return err;
663 if (msg->msg_controllen) { 663 if (msg->msg_controllen) {
664 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 664 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
665 if (err) 665 if (err)
666 return err; 666 return err;
667 if (ipc.opt) 667 if (ipc.opt)
668 free = 1; 668 free = 1;
669 connected = 0; 669 connected = 0;
670 } 670 }
671 if (!ipc.opt) 671 if (!ipc.opt)
672 ipc.opt = inet->opt; 672 ipc.opt = inet->opt;
673 673
674 saddr = ipc.addr; 674 saddr = ipc.addr;
675 ipc.addr = faddr = daddr; 675 ipc.addr = faddr = daddr;
676 676
677 if (ipc.opt && ipc.opt->srr) { 677 if (ipc.opt && ipc.opt->srr) {
678 if (!daddr) 678 if (!daddr)
679 return -EINVAL; 679 return -EINVAL;
680 faddr = ipc.opt->faddr; 680 faddr = ipc.opt->faddr;
681 connected = 0; 681 connected = 0;
682 } 682 }
683 tos = RT_TOS(inet->tos); 683 tos = RT_TOS(inet->tos);
684 if (sock_flag(sk, SOCK_LOCALROUTE) || 684 if (sock_flag(sk, SOCK_LOCALROUTE) ||
685 (msg->msg_flags & MSG_DONTROUTE) || 685 (msg->msg_flags & MSG_DONTROUTE) ||
686 (ipc.opt && ipc.opt->is_strictroute)) { 686 (ipc.opt && ipc.opt->is_strictroute)) {
687 tos |= RTO_ONLINK; 687 tos |= RTO_ONLINK;
688 connected = 0; 688 connected = 0;
689 } 689 }
690 690
691 if (ipv4_is_multicast(daddr)) { 691 if (ipv4_is_multicast(daddr)) {
692 if (!ipc.oif) 692 if (!ipc.oif)
693 ipc.oif = inet->mc_index; 693 ipc.oif = inet->mc_index;
694 if (!saddr) 694 if (!saddr)
695 saddr = inet->mc_addr; 695 saddr = inet->mc_addr;
696 connected = 0; 696 connected = 0;
697 } 697 }
698 698
699 if (connected) 699 if (connected)
700 rt = (struct rtable *)sk_dst_check(sk, 0); 700 rt = (struct rtable *)sk_dst_check(sk, 0);
701 701
702 if (rt == NULL) { 702 if (rt == NULL) {
703 struct flowi fl = { .oif = ipc.oif, 703 struct flowi fl = { .oif = ipc.oif,
704 .mark = sk->sk_mark, 704 .mark = sk->sk_mark,
705 .nl_u = { .ip4_u = 705 .nl_u = { .ip4_u =
706 { .daddr = faddr, 706 { .daddr = faddr,
707 .saddr = saddr, 707 .saddr = saddr,
708 .tos = tos } }, 708 .tos = tos } },
709 .proto = sk->sk_protocol, 709 .proto = sk->sk_protocol,
710 .flags = inet_sk_flowi_flags(sk), 710 .flags = inet_sk_flowi_flags(sk),
711 .uli_u = { .ports = 711 .uli_u = { .ports =
712 { .sport = inet->inet_sport, 712 { .sport = inet->inet_sport,
713 .dport = dport } } }; 713 .dport = dport } } };
714 struct net *net = sock_net(sk); 714 struct net *net = sock_net(sk);
715 715
716 security_sk_classify_flow(sk, &fl); 716 security_sk_classify_flow(sk, &fl);
717 err = ip_route_output_flow(net, &rt, &fl, sk, 1); 717 err = ip_route_output_flow(net, &rt, &fl, sk, 1);
718 if (err) { 718 if (err) {
719 if (err == -ENETUNREACH) 719 if (err == -ENETUNREACH)
720 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 720 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
721 goto out; 721 goto out;
722 } 722 }
723 723
724 err = -EACCES; 724 err = -EACCES;
725 if ((rt->rt_flags & RTCF_BROADCAST) && 725 if ((rt->rt_flags & RTCF_BROADCAST) &&
726 !sock_flag(sk, SOCK_BROADCAST)) 726 !sock_flag(sk, SOCK_BROADCAST))
727 goto out; 727 goto out;
728 if (connected) 728 if (connected)
729 sk_dst_set(sk, dst_clone(&rt->u.dst)); 729 sk_dst_set(sk, dst_clone(&rt->u.dst));
730 } 730 }
731 731
732 if (msg->msg_flags&MSG_CONFIRM) 732 if (msg->msg_flags&MSG_CONFIRM)
733 goto do_confirm; 733 goto do_confirm;
734 back_from_confirm: 734 back_from_confirm:
735 735
736 saddr = rt->rt_src; 736 saddr = rt->rt_src;
737 if (!ipc.addr) 737 if (!ipc.addr)
738 daddr = ipc.addr = rt->rt_dst; 738 daddr = ipc.addr = rt->rt_dst;
739 739
740 lock_sock(sk); 740 lock_sock(sk);
741 if (unlikely(up->pending)) { 741 if (unlikely(up->pending)) {
742 /* The socket is already corked while preparing it. */ 742 /* The socket is already corked while preparing it. */
743 /* ... which is an evident application bug. --ANK */ 743 /* ... which is an evident application bug. --ANK */
744 release_sock(sk); 744 release_sock(sk);
745 745
746 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); 746 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
747 err = -EINVAL; 747 err = -EINVAL;
748 goto out; 748 goto out;
749 } 749 }
750 /* 750 /*
751 * Now cork the socket to pend data. 751 * Now cork the socket to pend data.
752 */ 752 */
753 inet->cork.fl.fl4_dst = daddr; 753 inet->cork.fl.fl4_dst = daddr;
754 inet->cork.fl.fl_ip_dport = dport; 754 inet->cork.fl.fl_ip_dport = dport;
755 inet->cork.fl.fl4_src = saddr; 755 inet->cork.fl.fl4_src = saddr;
756 inet->cork.fl.fl_ip_sport = inet->inet_sport; 756 inet->cork.fl.fl_ip_sport = inet->inet_sport;
757 up->pending = AF_INET; 757 up->pending = AF_INET;
758 758
759 do_append_data: 759 do_append_data:
760 up->len += ulen; 760 up->len += ulen;
761 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 761 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
762 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, 762 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
763 sizeof(struct udphdr), &ipc, &rt, 763 sizeof(struct udphdr), &ipc, &rt,
764 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 764 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
765 if (err) 765 if (err)
766 udp_flush_pending_frames(sk); 766 udp_flush_pending_frames(sk);
767 else if (!corkreq) 767 else if (!corkreq)
768 err = udp_push_pending_frames(sk); 768 err = udp_push_pending_frames(sk);
769 else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) 769 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
770 up->pending = 0; 770 up->pending = 0;
771 release_sock(sk); 771 release_sock(sk);
772 772
773 out: 773 out:
774 ip_rt_put(rt); 774 ip_rt_put(rt);
775 if (free) 775 if (free)
776 kfree(ipc.opt); 776 kfree(ipc.opt);
777 if (!err) 777 if (!err)
778 return len; 778 return len;
779 /* 779 /*
780 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting 780 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
781 * ENOBUFS might not be good (it's not tunable per se), but otherwise 781 * ENOBUFS might not be good (it's not tunable per se), but otherwise
782 * we don't have a good statistic (IpOutDiscards but it can be too many 782 * we don't have a good statistic (IpOutDiscards but it can be too many
783 * things). We could add another new stat but at least for now that 783 * things). We could add another new stat but at least for now that
784 * seems like overkill. 784 * seems like overkill.
785 */ 785 */
786 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 786 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
787 UDP_INC_STATS_USER(sock_net(sk), 787 UDP_INC_STATS_USER(sock_net(sk),
788 UDP_MIB_SNDBUFERRORS, is_udplite); 788 UDP_MIB_SNDBUFERRORS, is_udplite);
789 } 789 }
790 return err; 790 return err;
791 791
792 do_confirm: 792 do_confirm:
793 dst_confirm(&rt->u.dst); 793 dst_confirm(&rt->u.dst);
794 if (!(msg->msg_flags&MSG_PROBE) || len) 794 if (!(msg->msg_flags&MSG_PROBE) || len)
795 goto back_from_confirm; 795 goto back_from_confirm;
796 err = 0; 796 err = 0;
797 goto out; 797 goto out;
798 } 798 }
799 EXPORT_SYMBOL(udp_sendmsg); 799 EXPORT_SYMBOL(udp_sendmsg);
800 800
801 int udp_sendpage(struct sock *sk, struct page *page, int offset, 801 int udp_sendpage(struct sock *sk, struct page *page, int offset,
802 size_t size, int flags) 802 size_t size, int flags)
803 { 803 {
804 struct udp_sock *up = udp_sk(sk); 804 struct udp_sock *up = udp_sk(sk);
805 int ret; 805 int ret;
806 806
807 if (!up->pending) { 807 if (!up->pending) {
808 struct msghdr msg = { .msg_flags = flags|MSG_MORE }; 808 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
809 809
810 /* Call udp_sendmsg to specify destination address which 810 /* Call udp_sendmsg to specify destination address which
811 * sendpage interface can't pass. 811 * sendpage interface can't pass.
812 * This will succeed only when the socket is connected. 812 * This will succeed only when the socket is connected.
813 */ 813 */
814 ret = udp_sendmsg(NULL, sk, &msg, 0); 814 ret = udp_sendmsg(NULL, sk, &msg, 0);
815 if (ret < 0) 815 if (ret < 0)
816 return ret; 816 return ret;
817 } 817 }
818 818
819 lock_sock(sk); 819 lock_sock(sk);
820 820
821 if (unlikely(!up->pending)) { 821 if (unlikely(!up->pending)) {
822 release_sock(sk); 822 release_sock(sk);
823 823
824 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); 824 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
825 return -EINVAL; 825 return -EINVAL;
826 } 826 }
827 827
828 ret = ip_append_page(sk, page, offset, size, flags); 828 ret = ip_append_page(sk, page, offset, size, flags);
829 if (ret == -EOPNOTSUPP) { 829 if (ret == -EOPNOTSUPP) {
830 release_sock(sk); 830 release_sock(sk);
831 return sock_no_sendpage(sk->sk_socket, page, offset, 831 return sock_no_sendpage(sk->sk_socket, page, offset,
832 size, flags); 832 size, flags);
833 } 833 }
834 if (ret < 0) { 834 if (ret < 0) {
835 udp_flush_pending_frames(sk); 835 udp_flush_pending_frames(sk);
836 goto out; 836 goto out;
837 } 837 }
838 838
839 up->len += size; 839 up->len += size;
840 if (!(up->corkflag || (flags&MSG_MORE))) 840 if (!(up->corkflag || (flags&MSG_MORE)))
841 ret = udp_push_pending_frames(sk); 841 ret = udp_push_pending_frames(sk);
842 if (!ret) 842 if (!ret)
843 ret = size; 843 ret = size;
844 out: 844 out:
845 release_sock(sk); 845 release_sock(sk);
846 return ret; 846 return ret;
847 } 847 }
848 848
849 849
850 /** 850 /**
851 * first_packet_length - return length of first packet in receive queue 851 * first_packet_length - return length of first packet in receive queue
852 * @sk: socket 852 * @sk: socket
853 * 853 *
854 * Drops all bad checksum frames, until a valid one is found. 854 * Drops all bad checksum frames, until a valid one is found.
855 * Returns the length of found skb, or 0 if none is found. 855 * Returns the length of found skb, or 0 if none is found.
856 */ 856 */
857 static unsigned int first_packet_length(struct sock *sk) 857 static unsigned int first_packet_length(struct sock *sk)
858 { 858 {
859 struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; 859 struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
860 struct sk_buff *skb; 860 struct sk_buff *skb;
861 unsigned int res; 861 unsigned int res;
862 862
863 __skb_queue_head_init(&list_kill); 863 __skb_queue_head_init(&list_kill);
864 864
865 spin_lock_bh(&rcvq->lock); 865 spin_lock_bh(&rcvq->lock);
866 while ((skb = skb_peek(rcvq)) != NULL && 866 while ((skb = skb_peek(rcvq)) != NULL &&
867 udp_lib_checksum_complete(skb)) { 867 udp_lib_checksum_complete(skb)) {
868 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 868 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
869 IS_UDPLITE(sk)); 869 IS_UDPLITE(sk));
870 atomic_inc(&sk->sk_drops);
870 __skb_unlink(skb, rcvq); 871 __skb_unlink(skb, rcvq);
871 __skb_queue_tail(&list_kill, skb); 872 __skb_queue_tail(&list_kill, skb);
872 } 873 }
873 res = skb ? skb->len : 0; 874 res = skb ? skb->len : 0;
874 spin_unlock_bh(&rcvq->lock); 875 spin_unlock_bh(&rcvq->lock);
875 876
876 if (!skb_queue_empty(&list_kill)) { 877 if (!skb_queue_empty(&list_kill)) {
877 lock_sock(sk); 878 lock_sock(sk);
878 __skb_queue_purge(&list_kill); 879 __skb_queue_purge(&list_kill);
879 sk_mem_reclaim_partial(sk); 880 sk_mem_reclaim_partial(sk);
880 release_sock(sk); 881 release_sock(sk);
881 } 882 }
882 return res; 883 return res;
883 } 884 }
884 885
885 /* 886 /*
886 * IOCTL requests applicable to the UDP protocol 887 * IOCTL requests applicable to the UDP protocol
887 */ 888 */
888 889
889 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) 890 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
890 { 891 {
891 switch (cmd) { 892 switch (cmd) {
892 case SIOCOUTQ: 893 case SIOCOUTQ:
893 { 894 {
894 int amount = sk_wmem_alloc_get(sk); 895 int amount = sk_wmem_alloc_get(sk);
895 896
896 return put_user(amount, (int __user *)arg); 897 return put_user(amount, (int __user *)arg);
897 } 898 }
898 899
899 case SIOCINQ: 900 case SIOCINQ:
900 { 901 {
901 unsigned int amount = first_packet_length(sk); 902 unsigned int amount = first_packet_length(sk);
902 903
903 if (amount) 904 if (amount)
904 /* 905 /*
905 * We will only return the amount 906 * We will only return the amount
906 * of this packet since that is all 907 * of this packet since that is all
907 * that will be read. 908 * that will be read.
908 */ 909 */
909 amount -= sizeof(struct udphdr); 910 amount -= sizeof(struct udphdr);
910 911
911 return put_user(amount, (int __user *)arg); 912 return put_user(amount, (int __user *)arg);
912 } 913 }
913 914
914 default: 915 default:
915 return -ENOIOCTLCMD; 916 return -ENOIOCTLCMD;
916 } 917 }
917 918
918 return 0; 919 return 0;
919 } 920 }
920 EXPORT_SYMBOL(udp_ioctl); 921 EXPORT_SYMBOL(udp_ioctl);
921 922
922 /* 923 /*
923 * This should be easy, if there is something there we 924 * This should be easy, if there is something there we
924 * return it, otherwise we block. 925 * return it, otherwise we block.
925 */ 926 */
926 927
927 int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 928 int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
928 size_t len, int noblock, int flags, int *addr_len) 929 size_t len, int noblock, int flags, int *addr_len)
929 { 930 {
930 struct inet_sock *inet = inet_sk(sk); 931 struct inet_sock *inet = inet_sk(sk);
931 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 932 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
932 struct sk_buff *skb; 933 struct sk_buff *skb;
933 unsigned int ulen, copied; 934 unsigned int ulen, copied;
934 int peeked; 935 int peeked;
935 int err; 936 int err;
936 int is_udplite = IS_UDPLITE(sk); 937 int is_udplite = IS_UDPLITE(sk);
937 938
938 /* 939 /*
939 * Check any passed addresses 940 * Check any passed addresses
940 */ 941 */
941 if (addr_len) 942 if (addr_len)
942 *addr_len = sizeof(*sin); 943 *addr_len = sizeof(*sin);
943 944
944 if (flags & MSG_ERRQUEUE) 945 if (flags & MSG_ERRQUEUE)
945 return ip_recv_error(sk, msg, len); 946 return ip_recv_error(sk, msg, len);
946 947
947 try_again: 948 try_again:
948 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 949 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
949 &peeked, &err); 950 &peeked, &err);
950 if (!skb) 951 if (!skb)
951 goto out; 952 goto out;
952 953
953 ulen = skb->len - sizeof(struct udphdr); 954 ulen = skb->len - sizeof(struct udphdr);
954 copied = len; 955 copied = len;
955 if (copied > ulen) 956 if (copied > ulen)
956 copied = ulen; 957 copied = ulen;
957 else if (copied < ulen) 958 else if (copied < ulen)
958 msg->msg_flags |= MSG_TRUNC; 959 msg->msg_flags |= MSG_TRUNC;
959 960
960 /* 961 /*
961 * If checksum is needed at all, try to do it while copying the 962 * If checksum is needed at all, try to do it while copying the
962 * data. If the data is truncated, or if we only want a partial 963 * data. If the data is truncated, or if we only want a partial
963 * coverage checksum (UDP-Lite), do it before the copy. 964 * coverage checksum (UDP-Lite), do it before the copy.
964 */ 965 */
965 966
966 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { 967 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
967 if (udp_lib_checksum_complete(skb)) 968 if (udp_lib_checksum_complete(skb))
968 goto csum_copy_err; 969 goto csum_copy_err;
969 } 970 }
970 971
971 if (skb_csum_unnecessary(skb)) 972 if (skb_csum_unnecessary(skb))
972 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 973 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
973 msg->msg_iov, copied); 974 msg->msg_iov, copied);
974 else { 975 else {
975 err = skb_copy_and_csum_datagram_iovec(skb, 976 err = skb_copy_and_csum_datagram_iovec(skb,
976 sizeof(struct udphdr), 977 sizeof(struct udphdr),
977 msg->msg_iov); 978 msg->msg_iov);
978 979
979 if (err == -EINVAL) 980 if (err == -EINVAL)
980 goto csum_copy_err; 981 goto csum_copy_err;
981 } 982 }
982 983
983 if (err) 984 if (err)
984 goto out_free; 985 goto out_free;
985 986
986 if (!peeked) 987 if (!peeked)
987 UDP_INC_STATS_USER(sock_net(sk), 988 UDP_INC_STATS_USER(sock_net(sk),
988 UDP_MIB_INDATAGRAMS, is_udplite); 989 UDP_MIB_INDATAGRAMS, is_udplite);
989 990
990 sock_recv_ts_and_drops(msg, sk, skb); 991 sock_recv_ts_and_drops(msg, sk, skb);
991 992
992 /* Copy the address. */ 993 /* Copy the address. */
993 if (sin) { 994 if (sin) {
994 sin->sin_family = AF_INET; 995 sin->sin_family = AF_INET;
995 sin->sin_port = udp_hdr(skb)->source; 996 sin->sin_port = udp_hdr(skb)->source;
996 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 997 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
997 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 998 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
998 } 999 }
999 if (inet->cmsg_flags) 1000 if (inet->cmsg_flags)
1000 ip_cmsg_recv(msg, skb); 1001 ip_cmsg_recv(msg, skb);
1001 1002
1002 err = copied; 1003 err = copied;
1003 if (flags & MSG_TRUNC) 1004 if (flags & MSG_TRUNC)
1004 err = ulen; 1005 err = ulen;
1005 1006
1006 out_free: 1007 out_free:
1007 lock_sock(sk); 1008 lock_sock(sk);
1008 skb_free_datagram(sk, skb); 1009 skb_free_datagram(sk, skb);
1009 release_sock(sk); 1010 release_sock(sk);
1010 out: 1011 out:
1011 return err; 1012 return err;
1012 1013
1013 csum_copy_err: 1014 csum_copy_err:
1014 lock_sock(sk); 1015 lock_sock(sk);
1015 if (!skb_kill_datagram(sk, skb, flags)) 1016 if (!skb_kill_datagram(sk, skb, flags))
1016 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1017 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1017 release_sock(sk); 1018 release_sock(sk);
1018 1019
1019 if (noblock) 1020 if (noblock)
1020 return -EAGAIN; 1021 return -EAGAIN;
1021 goto try_again; 1022 goto try_again;
1022 } 1023 }
1023 1024
1024 1025
1025 int udp_disconnect(struct sock *sk, int flags) 1026 int udp_disconnect(struct sock *sk, int flags)
1026 { 1027 {
1027 struct inet_sock *inet = inet_sk(sk); 1028 struct inet_sock *inet = inet_sk(sk);
1028 /* 1029 /*
1029 * 1003.1g - break association. 1030 * 1003.1g - break association.
1030 */ 1031 */
1031 1032
1032 sk->sk_state = TCP_CLOSE; 1033 sk->sk_state = TCP_CLOSE;
1033 inet->inet_daddr = 0; 1034 inet->inet_daddr = 0;
1034 inet->inet_dport = 0; 1035 inet->inet_dport = 0;
1035 sk->sk_bound_dev_if = 0; 1036 sk->sk_bound_dev_if = 0;
1036 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1037 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1037 inet_reset_saddr(sk); 1038 inet_reset_saddr(sk);
1038 1039
1039 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 1040 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
1040 sk->sk_prot->unhash(sk); 1041 sk->sk_prot->unhash(sk);
1041 inet->inet_sport = 0; 1042 inet->inet_sport = 0;
1042 } 1043 }
1043 sk_dst_reset(sk); 1044 sk_dst_reset(sk);
1044 return 0; 1045 return 0;
1045 } 1046 }
1046 EXPORT_SYMBOL(udp_disconnect); 1047 EXPORT_SYMBOL(udp_disconnect);
1047 1048
1048 void udp_lib_unhash(struct sock *sk) 1049 void udp_lib_unhash(struct sock *sk)
1049 { 1050 {
1050 if (sk_hashed(sk)) { 1051 if (sk_hashed(sk)) {
1051 struct udp_table *udptable = sk->sk_prot->h.udp_table; 1052 struct udp_table *udptable = sk->sk_prot->h.udp_table;
1052 struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), 1053 struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
1053 sk->sk_hash); 1054 sk->sk_hash);
1054 1055
1055 spin_lock_bh(&hslot->lock); 1056 spin_lock_bh(&hslot->lock);
1056 if (sk_nulls_del_node_init_rcu(sk)) { 1057 if (sk_nulls_del_node_init_rcu(sk)) {
1057 inet_sk(sk)->inet_num = 0; 1058 inet_sk(sk)->inet_num = 0;
1058 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 1059 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1059 } 1060 }
1060 spin_unlock_bh(&hslot->lock); 1061 spin_unlock_bh(&hslot->lock);
1061 } 1062 }
1062 } 1063 }
1063 EXPORT_SYMBOL(udp_lib_unhash); 1064 EXPORT_SYMBOL(udp_lib_unhash);
1064 1065
1065 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1066 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1066 { 1067 {
1067 int rc = sock_queue_rcv_skb(sk, skb); 1068 int rc = sock_queue_rcv_skb(sk, skb);
1068 1069
1069 if (rc < 0) { 1070 if (rc < 0) {
1070 int is_udplite = IS_UDPLITE(sk); 1071 int is_udplite = IS_UDPLITE(sk);
1071 1072
1072 /* Note that an ENOMEM error is charged twice */ 1073 /* Note that an ENOMEM error is charged twice */
1073 if (rc == -ENOMEM) 1074 if (rc == -ENOMEM)
1074 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1075 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1075 is_udplite); 1076 is_udplite);
1076 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1077 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1077 kfree_skb(skb); 1078 kfree_skb(skb);
1078 return -1; 1079 return -1;
1079 } 1080 }
1080 1081
1081 return 0; 1082 return 0;
1082 1083
1083 } 1084 }
1084 1085
1085 /* returns: 1086 /* returns:
1086 * -1: error 1087 * -1: error
1087 * 0: success 1088 * 0: success
1088 * >0: "udp encap" protocol resubmission 1089 * >0: "udp encap" protocol resubmission
1089 * 1090 *
1090 * Note that in the success and error cases, the skb is assumed to 1091 * Note that in the success and error cases, the skb is assumed to
1091 * have either been requeued or freed. 1092 * have either been requeued or freed.
1092 */ 1093 */
1093 int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1094 int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1094 { 1095 {
1095 struct udp_sock *up = udp_sk(sk); 1096 struct udp_sock *up = udp_sk(sk);
1096 int rc; 1097 int rc;
1097 int is_udplite = IS_UDPLITE(sk); 1098 int is_udplite = IS_UDPLITE(sk);
1098 1099
1099 /* 1100 /*
1100 * Charge it to the socket, dropping if the queue is full. 1101 * Charge it to the socket, dropping if the queue is full.
1101 */ 1102 */
1102 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1103 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1103 goto drop; 1104 goto drop;
1104 nf_reset(skb); 1105 nf_reset(skb);
1105 1106
1106 if (up->encap_type) { 1107 if (up->encap_type) {
1107 /* 1108 /*
1108 * This is an encapsulation socket so pass the skb to 1109 * This is an encapsulation socket so pass the skb to
1109 * the socket's udp_encap_rcv() hook. Otherwise, just 1110 * the socket's udp_encap_rcv() hook. Otherwise, just
1110 * fall through and pass this up the UDP socket. 1111 * fall through and pass this up the UDP socket.
1111 * up->encap_rcv() returns the following value: 1112 * up->encap_rcv() returns the following value:
1112 * =0 if skb was successfully passed to the encap 1113 * =0 if skb was successfully passed to the encap
1113 * handler or was discarded by it. 1114 * handler or was discarded by it.
1114 * >0 if skb should be passed on to UDP. 1115 * >0 if skb should be passed on to UDP.
1115 * <0 if skb should be resubmitted as proto -N 1116 * <0 if skb should be resubmitted as proto -N
1116 */ 1117 */
1117 1118
1118 /* if we're overly short, let UDP handle it */ 1119 /* if we're overly short, let UDP handle it */
1119 if (skb->len > sizeof(struct udphdr) && 1120 if (skb->len > sizeof(struct udphdr) &&
1120 up->encap_rcv != NULL) { 1121 up->encap_rcv != NULL) {
1121 int ret; 1122 int ret;
1122 1123
1123 ret = (*up->encap_rcv)(sk, skb); 1124 ret = (*up->encap_rcv)(sk, skb);
1124 if (ret <= 0) { 1125 if (ret <= 0) {
1125 UDP_INC_STATS_BH(sock_net(sk), 1126 UDP_INC_STATS_BH(sock_net(sk),
1126 UDP_MIB_INDATAGRAMS, 1127 UDP_MIB_INDATAGRAMS,
1127 is_udplite); 1128 is_udplite);
1128 return -ret; 1129 return -ret;
1129 } 1130 }
1130 } 1131 }
1131 1132
1132 /* FALLTHROUGH -- it's a UDP Packet */ 1133 /* FALLTHROUGH -- it's a UDP Packet */
1133 } 1134 }
1134 1135
1135 /* 1136 /*
1136 * UDP-Lite specific tests, ignored on UDP sockets 1137 * UDP-Lite specific tests, ignored on UDP sockets
1137 */ 1138 */
1138 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 1139 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
1139 1140
1140 /* 1141 /*
1141 * MIB statistics other than incrementing the error count are 1142 * MIB statistics other than incrementing the error count are
1142 * disabled for the following two types of errors: these depend 1143 * disabled for the following two types of errors: these depend
1143 * on the application settings, not on the functioning of the 1144 * on the application settings, not on the functioning of the
1144 * protocol stack as such. 1145 * protocol stack as such.
1145 * 1146 *
1146 * RFC 3828 here recommends (sec 3.3): "There should also be a 1147 * RFC 3828 here recommends (sec 3.3): "There should also be a
1147 * way ... to ... at least let the receiving application block 1148 * way ... to ... at least let the receiving application block
1148 * delivery of packets with coverage values less than a value 1149 * delivery of packets with coverage values less than a value
1149 * provided by the application." 1150 * provided by the application."
1150 */ 1151 */
1151 if (up->pcrlen == 0) { /* full coverage was set */ 1152 if (up->pcrlen == 0) { /* full coverage was set */
1152 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " 1153 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1153 "%d while full coverage %d requested\n", 1154 "%d while full coverage %d requested\n",
1154 UDP_SKB_CB(skb)->cscov, skb->len); 1155 UDP_SKB_CB(skb)->cscov, skb->len);
1155 goto drop; 1156 goto drop;
1156 } 1157 }
1157 /* The next case involves violating the min. coverage requested 1158 /* The next case involves violating the min. coverage requested
1158 * by the receiver. This is subtle: if receiver wants x and x is 1159 * by the receiver. This is subtle: if receiver wants x and x is
1159 * greater than the buffersize/MTU then receiver will complain 1160 * greater than the buffersize/MTU then receiver will complain
1160 * that it wants x while sender emits packets of smaller size y. 1161 * that it wants x while sender emits packets of smaller size y.
1161 * Therefore the above ...()->partial_cov statement is essential. 1162 * Therefore the above ...()->partial_cov statement is essential.
1162 */ 1163 */
1163 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { 1164 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
1164 LIMIT_NETDEBUG(KERN_WARNING 1165 LIMIT_NETDEBUG(KERN_WARNING
1165 "UDPLITE: coverage %d too small, need min %d\n", 1166 "UDPLITE: coverage %d too small, need min %d\n",
1166 UDP_SKB_CB(skb)->cscov, up->pcrlen); 1167 UDP_SKB_CB(skb)->cscov, up->pcrlen);
1167 goto drop; 1168 goto drop;
1168 } 1169 }
1169 } 1170 }
1170 1171
1171 if (sk->sk_filter) { 1172 if (sk->sk_filter) {
1172 if (udp_lib_checksum_complete(skb)) 1173 if (udp_lib_checksum_complete(skb))
1173 goto drop; 1174 goto drop;
1174 } 1175 }
1175 1176
1176 rc = 0; 1177 rc = 0;
1177 1178
1178 bh_lock_sock(sk); 1179 bh_lock_sock(sk);
1179 if (!sock_owned_by_user(sk)) 1180 if (!sock_owned_by_user(sk))
1180 rc = __udp_queue_rcv_skb(sk, skb); 1181 rc = __udp_queue_rcv_skb(sk, skb);
1181 else 1182 else
1182 sk_add_backlog(sk, skb); 1183 sk_add_backlog(sk, skb);
1183 bh_unlock_sock(sk); 1184 bh_unlock_sock(sk);
1184 1185
1185 return rc; 1186 return rc;
1186 1187
1187 drop: 1188 drop:
1188 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1189 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1190 atomic_inc(&sk->sk_drops);
1189 kfree_skb(skb); 1191 kfree_skb(skb);
1190 return -1; 1192 return -1;
1191 } 1193 }
1192 1194
1193 /* 1195 /*
1194 * Multicasts and broadcasts go to each listener. 1196 * Multicasts and broadcasts go to each listener.
1195 * 1197 *
1196 * Note: called only from the BH handler context, 1198 * Note: called only from the BH handler context,
1197 * so we don't need to lock the hashes. 1199 * so we don't need to lock the hashes.
1198 */ 1200 */
1199 static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1201 static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1200 struct udphdr *uh, 1202 struct udphdr *uh,
1201 __be32 saddr, __be32 daddr, 1203 __be32 saddr, __be32 daddr,
1202 struct udp_table *udptable) 1204 struct udp_table *udptable)
1203 { 1205 {
1204 struct sock *sk; 1206 struct sock *sk;
1205 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); 1207 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
1206 int dif; 1208 int dif;
1207 1209
1208 spin_lock(&hslot->lock); 1210 spin_lock(&hslot->lock);
1209 sk = sk_nulls_head(&hslot->head); 1211 sk = sk_nulls_head(&hslot->head);
1210 dif = skb->dev->ifindex; 1212 dif = skb->dev->ifindex;
1211 sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 1213 sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
1212 if (sk) { 1214 if (sk) {
1213 struct sock *sknext = NULL; 1215 struct sock *sknext = NULL;
1214 1216
1215 do { 1217 do {
1216 struct sk_buff *skb1 = skb; 1218 struct sk_buff *skb1 = skb;
1217 1219
1218 sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, 1220 sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
1219 daddr, uh->source, saddr, 1221 daddr, uh->source, saddr,
1220 dif); 1222 dif);
1221 if (sknext) 1223 if (sknext)
1222 skb1 = skb_clone(skb, GFP_ATOMIC); 1224 skb1 = skb_clone(skb, GFP_ATOMIC);
1223 1225
1224 if (skb1) { 1226 if (skb1) {
1225 int ret = udp_queue_rcv_skb(sk, skb1); 1227 int ret = udp_queue_rcv_skb(sk, skb1);
1226 if (ret > 0) 1228 if (ret > 0)
1227 /* we should probably re-process instead 1229 /* we should probably re-process instead
1228 * of dropping packets here. */ 1230 * of dropping packets here. */
1229 kfree_skb(skb1); 1231 kfree_skb(skb1);
1230 } 1232 }
1231 sk = sknext; 1233 sk = sknext;
1232 } while (sknext); 1234 } while (sknext);
1233 } else 1235 } else
1234 consume_skb(skb); 1236 consume_skb(skb);
1235 spin_unlock(&hslot->lock); 1237 spin_unlock(&hslot->lock);
1236 return 0; 1238 return 0;
1237 } 1239 }
1238 1240
1239 /* Initialize UDP checksum. If exited with zero value (success), 1241 /* Initialize UDP checksum. If exited with zero value (success),
1240 * CHECKSUM_UNNECESSARY means, that no more checks are required. 1242 * CHECKSUM_UNNECESSARY means, that no more checks are required.
1241 * Otherwise, csum completion requires chacksumming packet body, 1243 * Otherwise, csum completion requires chacksumming packet body,
1242 * including udp header and folding it to skb->csum. 1244 * including udp header and folding it to skb->csum.
1243 */ 1245 */
1244 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, 1246 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1245 int proto) 1247 int proto)
1246 { 1248 {
1247 const struct iphdr *iph; 1249 const struct iphdr *iph;
1248 int err; 1250 int err;
1249 1251
1250 UDP_SKB_CB(skb)->partial_cov = 0; 1252 UDP_SKB_CB(skb)->partial_cov = 0;
1251 UDP_SKB_CB(skb)->cscov = skb->len; 1253 UDP_SKB_CB(skb)->cscov = skb->len;
1252 1254
1253 if (proto == IPPROTO_UDPLITE) { 1255 if (proto == IPPROTO_UDPLITE) {
1254 err = udplite_checksum_init(skb, uh); 1256 err = udplite_checksum_init(skb, uh);
1255 if (err) 1257 if (err)
1256 return err; 1258 return err;
1257 } 1259 }
1258 1260
1259 iph = ip_hdr(skb); 1261 iph = ip_hdr(skb);
1260 if (uh->check == 0) { 1262 if (uh->check == 0) {
1261 skb->ip_summed = CHECKSUM_UNNECESSARY; 1263 skb->ip_summed = CHECKSUM_UNNECESSARY;
1262 } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1264 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1263 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 1265 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1264 proto, skb->csum)) 1266 proto, skb->csum))
1265 skb->ip_summed = CHECKSUM_UNNECESSARY; 1267 skb->ip_summed = CHECKSUM_UNNECESSARY;
1266 } 1268 }
1267 if (!skb_csum_unnecessary(skb)) 1269 if (!skb_csum_unnecessary(skb))
1268 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1270 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1269 skb->len, proto, 0); 1271 skb->len, proto, 0);
1270 /* Probably, we should checksum udp header (it should be in cache 1272 /* Probably, we should checksum udp header (it should be in cache
1271 * in any case) and data in tiny packets (< rx copybreak). 1273 * in any case) and data in tiny packets (< rx copybreak).
1272 */ 1274 */
1273 1275
1274 return 0; 1276 return 0;
1275 } 1277 }
1276 1278
1277 /* 1279 /*
1278 * All we need to do is get the socket, and then do a checksum. 1280 * All we need to do is get the socket, and then do a checksum.
1279 */ 1281 */
1280 1282
1281 int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, 1283 int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1282 int proto) 1284 int proto)
1283 { 1285 {
1284 struct sock *sk; 1286 struct sock *sk;
1285 struct udphdr *uh; 1287 struct udphdr *uh;
1286 unsigned short ulen; 1288 unsigned short ulen;
1287 struct rtable *rt = skb_rtable(skb); 1289 struct rtable *rt = skb_rtable(skb);
1288 __be32 saddr, daddr; 1290 __be32 saddr, daddr;
1289 struct net *net = dev_net(skb->dev); 1291 struct net *net = dev_net(skb->dev);
1290 1292
1291 /* 1293 /*
1292 * Validate the packet. 1294 * Validate the packet.
1293 */ 1295 */
1294 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 1296 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1295 goto drop; /* No space for header. */ 1297 goto drop; /* No space for header. */
1296 1298
1297 uh = udp_hdr(skb); 1299 uh = udp_hdr(skb);
1298 ulen = ntohs(uh->len); 1300 ulen = ntohs(uh->len);
1299 if (ulen > skb->len) 1301 if (ulen > skb->len)
1300 goto short_packet; 1302 goto short_packet;
1301 1303
1302 if (proto == IPPROTO_UDP) { 1304 if (proto == IPPROTO_UDP) {
1303 /* UDP validates ulen. */ 1305 /* UDP validates ulen. */
1304 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) 1306 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1305 goto short_packet; 1307 goto short_packet;
1306 uh = udp_hdr(skb); 1308 uh = udp_hdr(skb);
1307 } 1309 }
1308 1310
1309 if (udp4_csum_init(skb, uh, proto)) 1311 if (udp4_csum_init(skb, uh, proto))
1310 goto csum_error; 1312 goto csum_error;
1311 1313
1312 saddr = ip_hdr(skb)->saddr; 1314 saddr = ip_hdr(skb)->saddr;
1313 daddr = ip_hdr(skb)->daddr; 1315 daddr = ip_hdr(skb)->daddr;
1314 1316
1315 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1317 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1316 return __udp4_lib_mcast_deliver(net, skb, uh, 1318 return __udp4_lib_mcast_deliver(net, skb, uh,
1317 saddr, daddr, udptable); 1319 saddr, daddr, udptable);
1318 1320
1319 sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 1321 sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
1320 1322
1321 if (sk != NULL) { 1323 if (sk != NULL) {
1322 int ret = udp_queue_rcv_skb(sk, skb); 1324 int ret = udp_queue_rcv_skb(sk, skb);
1323 sock_put(sk); 1325 sock_put(sk);
1324 1326
1325 /* a return value > 0 means to resubmit the input, but 1327 /* a return value > 0 means to resubmit the input, but
1326 * it wants the return to be -protocol, or 0 1328 * it wants the return to be -protocol, or 0
1327 */ 1329 */
1328 if (ret > 0) 1330 if (ret > 0)
1329 return -ret; 1331 return -ret;
1330 return 0; 1332 return 0;
1331 } 1333 }
1332 1334
1333 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1335 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1334 goto drop; 1336 goto drop;
1335 nf_reset(skb); 1337 nf_reset(skb);
1336 1338
1337 /* No socket. Drop packet silently, if checksum is wrong */ 1339 /* No socket. Drop packet silently, if checksum is wrong */
1338 if (udp_lib_checksum_complete(skb)) 1340 if (udp_lib_checksum_complete(skb))
1339 goto csum_error; 1341 goto csum_error;
1340 1342
1341 UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 1343 UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1342 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1344 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1343 1345
1344 /* 1346 /*
1345 * Hmm. We got an UDP packet to a port to which we 1347 * Hmm. We got an UDP packet to a port to which we
1346 * don't wanna listen. Ignore it. 1348 * don't wanna listen. Ignore it.
1347 */ 1349 */
1348 kfree_skb(skb); 1350 kfree_skb(skb);
1349 return 0; 1351 return 0;
1350 1352
1351 short_packet: 1353 short_packet:
1352 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", 1354 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
1353 proto == IPPROTO_UDPLITE ? "-Lite" : "", 1355 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1354 &saddr, 1356 &saddr,
1355 ntohs(uh->source), 1357 ntohs(uh->source),
1356 ulen, 1358 ulen,
1357 skb->len, 1359 skb->len,
1358 &daddr, 1360 &daddr,
1359 ntohs(uh->dest)); 1361 ntohs(uh->dest));
1360 goto drop; 1362 goto drop;
1361 1363
1362 csum_error: 1364 csum_error:
1363 /* 1365 /*
1364 * RFC1122: OK. Discards the bad packet silently (as far as 1366 * RFC1122: OK. Discards the bad packet silently (as far as
1365 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1367 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1366 */ 1368 */
1367 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", 1369 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
1368 proto == IPPROTO_UDPLITE ? "-Lite" : "", 1370 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1369 &saddr, 1371 &saddr,
1370 ntohs(uh->source), 1372 ntohs(uh->source),
1371 &daddr, 1373 &daddr,
1372 ntohs(uh->dest), 1374 ntohs(uh->dest),
1373 ulen); 1375 ulen);
1374 drop: 1376 drop:
1375 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1377 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1376 kfree_skb(skb); 1378 kfree_skb(skb);
1377 return 0; 1379 return 0;
1378 } 1380 }
1379 1381
1380 int udp_rcv(struct sk_buff *skb) 1382 int udp_rcv(struct sk_buff *skb)
1381 { 1383 {
1382 return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); 1384 return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
1383 } 1385 }
1384 1386
1385 void udp_destroy_sock(struct sock *sk) 1387 void udp_destroy_sock(struct sock *sk)
1386 { 1388 {
1387 lock_sock(sk); 1389 lock_sock(sk);
1388 udp_flush_pending_frames(sk); 1390 udp_flush_pending_frames(sk);
1389 release_sock(sk); 1391 release_sock(sk);
1390 } 1392 }
1391 1393
1392 /* 1394 /*
1393 * Socket option code for UDP 1395 * Socket option code for UDP
1394 */ 1396 */
1395 int udp_lib_setsockopt(struct sock *sk, int level, int optname, 1397 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1396 char __user *optval, unsigned int optlen, 1398 char __user *optval, unsigned int optlen,
1397 int (*push_pending_frames)(struct sock *)) 1399 int (*push_pending_frames)(struct sock *))
1398 { 1400 {
1399 struct udp_sock *up = udp_sk(sk); 1401 struct udp_sock *up = udp_sk(sk);
1400 int val; 1402 int val;
1401 int err = 0; 1403 int err = 0;
1402 int is_udplite = IS_UDPLITE(sk); 1404 int is_udplite = IS_UDPLITE(sk);
1403 1405
1404 if (optlen < sizeof(int)) 1406 if (optlen < sizeof(int))
1405 return -EINVAL; 1407 return -EINVAL;
1406 1408
1407 if (get_user(val, (int __user *)optval)) 1409 if (get_user(val, (int __user *)optval))
1408 return -EFAULT; 1410 return -EFAULT;
1409 1411
1410 switch (optname) { 1412 switch (optname) {
1411 case UDP_CORK: 1413 case UDP_CORK:
1412 if (val != 0) { 1414 if (val != 0) {
1413 up->corkflag = 1; 1415 up->corkflag = 1;
1414 } else { 1416 } else {
1415 up->corkflag = 0; 1417 up->corkflag = 0;
1416 lock_sock(sk); 1418 lock_sock(sk);
1417 (*push_pending_frames)(sk); 1419 (*push_pending_frames)(sk);
1418 release_sock(sk); 1420 release_sock(sk);
1419 } 1421 }
1420 break; 1422 break;
1421 1423
1422 case UDP_ENCAP: 1424 case UDP_ENCAP:
1423 switch (val) { 1425 switch (val) {
1424 case 0: 1426 case 0:
1425 case UDP_ENCAP_ESPINUDP: 1427 case UDP_ENCAP_ESPINUDP:
1426 case UDP_ENCAP_ESPINUDP_NON_IKE: 1428 case UDP_ENCAP_ESPINUDP_NON_IKE:
1427 up->encap_rcv = xfrm4_udp_encap_rcv; 1429 up->encap_rcv = xfrm4_udp_encap_rcv;
1428 /* FALLTHROUGH */ 1430 /* FALLTHROUGH */
1429 case UDP_ENCAP_L2TPINUDP: 1431 case UDP_ENCAP_L2TPINUDP:
1430 up->encap_type = val; 1432 up->encap_type = val;
1431 break; 1433 break;
1432 default: 1434 default:
1433 err = -ENOPROTOOPT; 1435 err = -ENOPROTOOPT;
1434 break; 1436 break;
1435 } 1437 }
1436 break; 1438 break;
1437 1439
1438 /* 1440 /*
1439 * UDP-Lite's partial checksum coverage (RFC 3828). 1441 * UDP-Lite's partial checksum coverage (RFC 3828).
1440 */ 1442 */
1441 /* The sender sets actual checksum coverage length via this option. 1443 /* The sender sets actual checksum coverage length via this option.
1442 * The case coverage > packet length is handled by send module. */ 1444 * The case coverage > packet length is handled by send module. */
1443 case UDPLITE_SEND_CSCOV: 1445 case UDPLITE_SEND_CSCOV:
1444 if (!is_udplite) /* Disable the option on UDP sockets */ 1446 if (!is_udplite) /* Disable the option on UDP sockets */
1445 return -ENOPROTOOPT; 1447 return -ENOPROTOOPT;
1446 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 1448 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1447 val = 8; 1449 val = 8;
1448 else if (val > USHORT_MAX) 1450 else if (val > USHORT_MAX)
1449 val = USHORT_MAX; 1451 val = USHORT_MAX;
1450 up->pcslen = val; 1452 up->pcslen = val;
1451 up->pcflag |= UDPLITE_SEND_CC; 1453 up->pcflag |= UDPLITE_SEND_CC;
1452 break; 1454 break;
1453 1455
1454 /* The receiver specifies a minimum checksum coverage value. To make 1456 /* The receiver specifies a minimum checksum coverage value. To make
1455 * sense, this should be set to at least 8 (as done below). If zero is 1457 * sense, this should be set to at least 8 (as done below). If zero is
1456 * used, this again means full checksum coverage. */ 1458 * used, this again means full checksum coverage. */
1457 case UDPLITE_RECV_CSCOV: 1459 case UDPLITE_RECV_CSCOV:
1458 if (!is_udplite) /* Disable the option on UDP sockets */ 1460 if (!is_udplite) /* Disable the option on UDP sockets */
1459 return -ENOPROTOOPT; 1461 return -ENOPROTOOPT;
1460 if (val != 0 && val < 8) /* Avoid silly minimal values. */ 1462 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1461 val = 8; 1463 val = 8;
1462 else if (val > USHORT_MAX) 1464 else if (val > USHORT_MAX)
1463 val = USHORT_MAX; 1465 val = USHORT_MAX;
1464 up->pcrlen = val; 1466 up->pcrlen = val;
1465 up->pcflag |= UDPLITE_RECV_CC; 1467 up->pcflag |= UDPLITE_RECV_CC;
1466 break; 1468 break;
1467 1469
1468 default: 1470 default:
1469 err = -ENOPROTOOPT; 1471 err = -ENOPROTOOPT;
1470 break; 1472 break;
1471 } 1473 }
1472 1474
1473 return err; 1475 return err;
1474 } 1476 }
1475 EXPORT_SYMBOL(udp_lib_setsockopt); 1477 EXPORT_SYMBOL(udp_lib_setsockopt);
1476 1478
1477 int udp_setsockopt(struct sock *sk, int level, int optname, 1479 int udp_setsockopt(struct sock *sk, int level, int optname,
1478 char __user *optval, unsigned int optlen) 1480 char __user *optval, unsigned int optlen)
1479 { 1481 {
1480 if (level == SOL_UDP || level == SOL_UDPLITE) 1482 if (level == SOL_UDP || level == SOL_UDPLITE)
1481 return udp_lib_setsockopt(sk, level, optname, optval, optlen, 1483 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1482 udp_push_pending_frames); 1484 udp_push_pending_frames);
1483 return ip_setsockopt(sk, level, optname, optval, optlen); 1485 return ip_setsockopt(sk, level, optname, optval, optlen);
1484 } 1486 }
1485 1487
1486 #ifdef CONFIG_COMPAT 1488 #ifdef CONFIG_COMPAT
1487 int compat_udp_setsockopt(struct sock *sk, int level, int optname, 1489 int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1488 char __user *optval, unsigned int optlen) 1490 char __user *optval, unsigned int optlen)
1489 { 1491 {
1490 if (level == SOL_UDP || level == SOL_UDPLITE) 1492 if (level == SOL_UDP || level == SOL_UDPLITE)
1491 return udp_lib_setsockopt(sk, level, optname, optval, optlen, 1493 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1492 udp_push_pending_frames); 1494 udp_push_pending_frames);
1493 return compat_ip_setsockopt(sk, level, optname, optval, optlen); 1495 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1494 } 1496 }
1495 #endif 1497 #endif
1496 1498
1497 int udp_lib_getsockopt(struct sock *sk, int level, int optname, 1499 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1498 char __user *optval, int __user *optlen) 1500 char __user *optval, int __user *optlen)
1499 { 1501 {
1500 struct udp_sock *up = udp_sk(sk); 1502 struct udp_sock *up = udp_sk(sk);
1501 int val, len; 1503 int val, len;
1502 1504
1503 if (get_user(len, optlen)) 1505 if (get_user(len, optlen))
1504 return -EFAULT; 1506 return -EFAULT;
1505 1507
1506 len = min_t(unsigned int, len, sizeof(int)); 1508 len = min_t(unsigned int, len, sizeof(int));
1507 1509
1508 if (len < 0) 1510 if (len < 0)
1509 return -EINVAL; 1511 return -EINVAL;
1510 1512
1511 switch (optname) { 1513 switch (optname) {
1512 case UDP_CORK: 1514 case UDP_CORK:
1513 val = up->corkflag; 1515 val = up->corkflag;
1514 break; 1516 break;
1515 1517
1516 case UDP_ENCAP: 1518 case UDP_ENCAP:
1517 val = up->encap_type; 1519 val = up->encap_type;
1518 break; 1520 break;
1519 1521
1520 /* The following two cannot be changed on UDP sockets, the return is 1522 /* The following two cannot be changed on UDP sockets, the return is
1521 * always 0 (which corresponds to the full checksum coverage of UDP). */ 1523 * always 0 (which corresponds to the full checksum coverage of UDP). */
1522 case UDPLITE_SEND_CSCOV: 1524 case UDPLITE_SEND_CSCOV:
1523 val = up->pcslen; 1525 val = up->pcslen;
1524 break; 1526 break;
1525 1527
1526 case UDPLITE_RECV_CSCOV: 1528 case UDPLITE_RECV_CSCOV:
1527 val = up->pcrlen; 1529 val = up->pcrlen;
1528 break; 1530 break;
1529 1531
1530 default: 1532 default:
1531 return -ENOPROTOOPT; 1533 return -ENOPROTOOPT;
1532 } 1534 }
1533 1535
1534 if (put_user(len, optlen)) 1536 if (put_user(len, optlen))
1535 return -EFAULT; 1537 return -EFAULT;
1536 if (copy_to_user(optval, &val, len)) 1538 if (copy_to_user(optval, &val, len))
1537 return -EFAULT; 1539 return -EFAULT;
1538 return 0; 1540 return 0;
1539 } 1541 }
1540 EXPORT_SYMBOL(udp_lib_getsockopt); 1542 EXPORT_SYMBOL(udp_lib_getsockopt);
1541 1543
1542 int udp_getsockopt(struct sock *sk, int level, int optname, 1544 int udp_getsockopt(struct sock *sk, int level, int optname,
1543 char __user *optval, int __user *optlen) 1545 char __user *optval, int __user *optlen)
1544 { 1546 {
1545 if (level == SOL_UDP || level == SOL_UDPLITE) 1547 if (level == SOL_UDP || level == SOL_UDPLITE)
1546 return udp_lib_getsockopt(sk, level, optname, optval, optlen); 1548 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1547 return ip_getsockopt(sk, level, optname, optval, optlen); 1549 return ip_getsockopt(sk, level, optname, optval, optlen);
1548 } 1550 }
1549 1551
1550 #ifdef CONFIG_COMPAT 1552 #ifdef CONFIG_COMPAT
1551 int compat_udp_getsockopt(struct sock *sk, int level, int optname, 1553 int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1552 char __user *optval, int __user *optlen) 1554 char __user *optval, int __user *optlen)
1553 { 1555 {
1554 if (level == SOL_UDP || level == SOL_UDPLITE) 1556 if (level == SOL_UDP || level == SOL_UDPLITE)
1555 return udp_lib_getsockopt(sk, level, optname, optval, optlen); 1557 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1556 return compat_ip_getsockopt(sk, level, optname, optval, optlen); 1558 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1557 } 1559 }
1558 #endif 1560 #endif
1559 /** 1561 /**
1560 * udp_poll - wait for a UDP event. 1562 * udp_poll - wait for a UDP event.
1561 * @file - file struct 1563 * @file - file struct
1562 * @sock - socket 1564 * @sock - socket
1563 * @wait - poll table 1565 * @wait - poll table
1564 * 1566 *
1565 * This is same as datagram poll, except for the special case of 1567 * This is same as datagram poll, except for the special case of
1566 * blocking sockets. If application is using a blocking fd 1568 * blocking sockets. If application is using a blocking fd
1567 * and a packet with checksum error is in the queue; 1569 * and a packet with checksum error is in the queue;
1568 * then it could get return from select indicating data available 1570 * then it could get return from select indicating data available
1569 * but then block when reading it. Add special case code 1571 * but then block when reading it. Add special case code
1570 * to work around these arguably broken applications. 1572 * to work around these arguably broken applications.
1571 */ 1573 */
1572 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) 1574 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1573 { 1575 {
1574 unsigned int mask = datagram_poll(file, sock, wait); 1576 unsigned int mask = datagram_poll(file, sock, wait);
1575 struct sock *sk = sock->sk; 1577 struct sock *sk = sock->sk;
1576 1578
1577 /* Check for false positives due to checksum errors */ 1579 /* Check for false positives due to checksum errors */
1578 if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 1580 if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
1579 !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) 1581 !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
1580 mask &= ~(POLLIN | POLLRDNORM); 1582 mask &= ~(POLLIN | POLLRDNORM);
1581 1583
1582 return mask; 1584 return mask;
1583 1585
1584 } 1586 }
1585 EXPORT_SYMBOL(udp_poll); 1587 EXPORT_SYMBOL(udp_poll);
1586 1588
1587 struct proto udp_prot = { 1589 struct proto udp_prot = {
1588 .name = "UDP", 1590 .name = "UDP",
1589 .owner = THIS_MODULE, 1591 .owner = THIS_MODULE,
1590 .close = udp_lib_close, 1592 .close = udp_lib_close,
1591 .connect = ip4_datagram_connect, 1593 .connect = ip4_datagram_connect,
1592 .disconnect = udp_disconnect, 1594 .disconnect = udp_disconnect,
1593 .ioctl = udp_ioctl, 1595 .ioctl = udp_ioctl,
1594 .destroy = udp_destroy_sock, 1596 .destroy = udp_destroy_sock,
1595 .setsockopt = udp_setsockopt, 1597 .setsockopt = udp_setsockopt,
1596 .getsockopt = udp_getsockopt, 1598 .getsockopt = udp_getsockopt,
1597 .sendmsg = udp_sendmsg, 1599 .sendmsg = udp_sendmsg,
1598 .recvmsg = udp_recvmsg, 1600 .recvmsg = udp_recvmsg,
1599 .sendpage = udp_sendpage, 1601 .sendpage = udp_sendpage,
1600 .backlog_rcv = __udp_queue_rcv_skb, 1602 .backlog_rcv = __udp_queue_rcv_skb,
1601 .hash = udp_lib_hash, 1603 .hash = udp_lib_hash,
1602 .unhash = udp_lib_unhash, 1604 .unhash = udp_lib_unhash,
1603 .get_port = udp_v4_get_port, 1605 .get_port = udp_v4_get_port,
1604 .memory_allocated = &udp_memory_allocated, 1606 .memory_allocated = &udp_memory_allocated,
1605 .sysctl_mem = sysctl_udp_mem, 1607 .sysctl_mem = sysctl_udp_mem,
1606 .sysctl_wmem = &sysctl_udp_wmem_min, 1608 .sysctl_wmem = &sysctl_udp_wmem_min,
1607 .sysctl_rmem = &sysctl_udp_rmem_min, 1609 .sysctl_rmem = &sysctl_udp_rmem_min,
1608 .obj_size = sizeof(struct udp_sock), 1610 .obj_size = sizeof(struct udp_sock),
1609 .slab_flags = SLAB_DESTROY_BY_RCU, 1611 .slab_flags = SLAB_DESTROY_BY_RCU,
1610 .h.udp_table = &udp_table, 1612 .h.udp_table = &udp_table,
1611 #ifdef CONFIG_COMPAT 1613 #ifdef CONFIG_COMPAT
1612 .compat_setsockopt = compat_udp_setsockopt, 1614 .compat_setsockopt = compat_udp_setsockopt,
1613 .compat_getsockopt = compat_udp_getsockopt, 1615 .compat_getsockopt = compat_udp_getsockopt,
1614 #endif 1616 #endif
1615 }; 1617 };
1616 EXPORT_SYMBOL(udp_prot); 1618 EXPORT_SYMBOL(udp_prot);
1617 1619
1618 /* ------------------------------------------------------------------------ */ 1620 /* ------------------------------------------------------------------------ */
1619 #ifdef CONFIG_PROC_FS 1621 #ifdef CONFIG_PROC_FS
1620 1622
1621 static struct sock *udp_get_first(struct seq_file *seq, int start) 1623 static struct sock *udp_get_first(struct seq_file *seq, int start)
1622 { 1624 {
1623 struct sock *sk; 1625 struct sock *sk;
1624 struct udp_iter_state *state = seq->private; 1626 struct udp_iter_state *state = seq->private;
1625 struct net *net = seq_file_net(seq); 1627 struct net *net = seq_file_net(seq);
1626 1628
1627 for (state->bucket = start; state->bucket <= state->udp_table->mask; 1629 for (state->bucket = start; state->bucket <= state->udp_table->mask;
1628 ++state->bucket) { 1630 ++state->bucket) {
1629 struct hlist_nulls_node *node; 1631 struct hlist_nulls_node *node;
1630 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 1632 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
1631 1633
1632 if (hlist_nulls_empty(&hslot->head)) 1634 if (hlist_nulls_empty(&hslot->head))
1633 continue; 1635 continue;
1634 1636
1635 spin_lock_bh(&hslot->lock); 1637 spin_lock_bh(&hslot->lock);
1636 sk_nulls_for_each(sk, node, &hslot->head) { 1638 sk_nulls_for_each(sk, node, &hslot->head) {
1637 if (!net_eq(sock_net(sk), net)) 1639 if (!net_eq(sock_net(sk), net))
1638 continue; 1640 continue;
1639 if (sk->sk_family == state->family) 1641 if (sk->sk_family == state->family)
1640 goto found; 1642 goto found;
1641 } 1643 }
1642 spin_unlock_bh(&hslot->lock); 1644 spin_unlock_bh(&hslot->lock);
1643 } 1645 }
1644 sk = NULL; 1646 sk = NULL;
1645 found: 1647 found:
1646 return sk; 1648 return sk;
1647 } 1649 }
1648 1650
1649 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) 1651 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1650 { 1652 {
1651 struct udp_iter_state *state = seq->private; 1653 struct udp_iter_state *state = seq->private;
1652 struct net *net = seq_file_net(seq); 1654 struct net *net = seq_file_net(seq);
1653 1655
1654 do { 1656 do {
1655 sk = sk_nulls_next(sk); 1657 sk = sk_nulls_next(sk);
1656 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 1658 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1657 1659
1658 if (!sk) { 1660 if (!sk) {
1659 if (state->bucket <= state->udp_table->mask) 1661 if (state->bucket <= state->udp_table->mask)
1660 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1662 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1661 return udp_get_first(seq, state->bucket + 1); 1663 return udp_get_first(seq, state->bucket + 1);
1662 } 1664 }
1663 return sk; 1665 return sk;
1664 } 1666 }
1665 1667
1666 static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) 1668 static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1667 { 1669 {
1668 struct sock *sk = udp_get_first(seq, 0); 1670 struct sock *sk = udp_get_first(seq, 0);
1669 1671
1670 if (sk) 1672 if (sk)
1671 while (pos && (sk = udp_get_next(seq, sk)) != NULL) 1673 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
1672 --pos; 1674 --pos;
1673 return pos ? NULL : sk; 1675 return pos ? NULL : sk;
1674 } 1676 }
1675 1677
1676 static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1678 static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1677 { 1679 {
1678 struct udp_iter_state *state = seq->private; 1680 struct udp_iter_state *state = seq->private;
1679 state->bucket = MAX_UDP_PORTS; 1681 state->bucket = MAX_UDP_PORTS;
1680 1682
1681 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1683 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1682 } 1684 }
1683 1685
1684 static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1686 static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1685 { 1687 {
1686 struct sock *sk; 1688 struct sock *sk;
1687 1689
1688 if (v == SEQ_START_TOKEN) 1690 if (v == SEQ_START_TOKEN)
1689 sk = udp_get_idx(seq, 0); 1691 sk = udp_get_idx(seq, 0);
1690 else 1692 else
1691 sk = udp_get_next(seq, v); 1693 sk = udp_get_next(seq, v);
1692 1694
1693 ++*pos; 1695 ++*pos;
1694 return sk; 1696 return sk;
1695 } 1697 }
1696 1698
1697 static void udp_seq_stop(struct seq_file *seq, void *v) 1699 static void udp_seq_stop(struct seq_file *seq, void *v)
1698 { 1700 {
1699 struct udp_iter_state *state = seq->private; 1701 struct udp_iter_state *state = seq->private;
1700 1702
1701 if (state->bucket <= state->udp_table->mask) 1703 if (state->bucket <= state->udp_table->mask)
1702 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1704 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1703 } 1705 }
1704 1706
1705 static int udp_seq_open(struct inode *inode, struct file *file) 1707 static int udp_seq_open(struct inode *inode, struct file *file)
1706 { 1708 {
1707 struct udp_seq_afinfo *afinfo = PDE(inode)->data; 1709 struct udp_seq_afinfo *afinfo = PDE(inode)->data;
1708 struct udp_iter_state *s; 1710 struct udp_iter_state *s;
1709 int err; 1711 int err;
1710 1712
1711 err = seq_open_net(inode, file, &afinfo->seq_ops, 1713 err = seq_open_net(inode, file, &afinfo->seq_ops,
1712 sizeof(struct udp_iter_state)); 1714 sizeof(struct udp_iter_state));
1713 if (err < 0) 1715 if (err < 0)
1714 return err; 1716 return err;
1715 1717
1716 s = ((struct seq_file *)file->private_data)->private; 1718 s = ((struct seq_file *)file->private_data)->private;
1717 s->family = afinfo->family; 1719 s->family = afinfo->family;
1718 s->udp_table = afinfo->udp_table; 1720 s->udp_table = afinfo->udp_table;
1719 return err; 1721 return err;
1720 } 1722 }
1721 1723
1722 /* ------------------------------------------------------------------------ */ 1724 /* ------------------------------------------------------------------------ */
1723 int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) 1725 int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
1724 { 1726 {
1725 struct proc_dir_entry *p; 1727 struct proc_dir_entry *p;
1726 int rc = 0; 1728 int rc = 0;
1727 1729
1728 afinfo->seq_fops.open = udp_seq_open; 1730 afinfo->seq_fops.open = udp_seq_open;
1729 afinfo->seq_fops.read = seq_read; 1731 afinfo->seq_fops.read = seq_read;
1730 afinfo->seq_fops.llseek = seq_lseek; 1732 afinfo->seq_fops.llseek = seq_lseek;
1731 afinfo->seq_fops.release = seq_release_net; 1733 afinfo->seq_fops.release = seq_release_net;
1732 1734
1733 afinfo->seq_ops.start = udp_seq_start; 1735 afinfo->seq_ops.start = udp_seq_start;
1734 afinfo->seq_ops.next = udp_seq_next; 1736 afinfo->seq_ops.next = udp_seq_next;
1735 afinfo->seq_ops.stop = udp_seq_stop; 1737 afinfo->seq_ops.stop = udp_seq_stop;
1736 1738
1737 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 1739 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
1738 &afinfo->seq_fops, afinfo); 1740 &afinfo->seq_fops, afinfo);
1739 if (!p) 1741 if (!p)
1740 rc = -ENOMEM; 1742 rc = -ENOMEM;
1741 return rc; 1743 return rc;
1742 } 1744 }
1743 EXPORT_SYMBOL(udp_proc_register); 1745 EXPORT_SYMBOL(udp_proc_register);
1744 1746
1745 void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) 1747 void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
1746 { 1748 {
1747 proc_net_remove(net, afinfo->name); 1749 proc_net_remove(net, afinfo->name);
1748 } 1750 }
1749 EXPORT_SYMBOL(udp_proc_unregister); 1751 EXPORT_SYMBOL(udp_proc_unregister);
1750 1752
1751 /* ------------------------------------------------------------------------ */ 1753 /* ------------------------------------------------------------------------ */
1752 static void udp4_format_sock(struct sock *sp, struct seq_file *f, 1754 static void udp4_format_sock(struct sock *sp, struct seq_file *f,
1753 int bucket, int *len) 1755 int bucket, int *len)
1754 { 1756 {
1755 struct inet_sock *inet = inet_sk(sp); 1757 struct inet_sock *inet = inet_sk(sp);
1756 __be32 dest = inet->inet_daddr; 1758 __be32 dest = inet->inet_daddr;
1757 __be32 src = inet->inet_rcv_saddr; 1759 __be32 src = inet->inet_rcv_saddr;
1758 __u16 destp = ntohs(inet->inet_dport); 1760 __u16 destp = ntohs(inet->inet_dport);
1759 __u16 srcp = ntohs(inet->inet_sport); 1761 __u16 srcp = ntohs(inet->inet_sport);
1760 1762
1761 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 1763 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
1762 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", 1764 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
1763 bucket, src, srcp, dest, destp, sp->sk_state, 1765 bucket, src, srcp, dest, destp, sp->sk_state,
1764 sk_wmem_alloc_get(sp), 1766 sk_wmem_alloc_get(sp),
1765 sk_rmem_alloc_get(sp), 1767 sk_rmem_alloc_get(sp),
1766 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), 1768 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1767 atomic_read(&sp->sk_refcnt), sp, 1769 atomic_read(&sp->sk_refcnt), sp,
1768 atomic_read(&sp->sk_drops), len); 1770 atomic_read(&sp->sk_drops), len);
1769 } 1771 }
1770 1772
1771 int udp4_seq_show(struct seq_file *seq, void *v) 1773 int udp4_seq_show(struct seq_file *seq, void *v)
1772 { 1774 {
1773 if (v == SEQ_START_TOKEN) 1775 if (v == SEQ_START_TOKEN)
1774 seq_printf(seq, "%-127s\n", 1776 seq_printf(seq, "%-127s\n",
1775 " sl local_address rem_address st tx_queue " 1777 " sl local_address rem_address st tx_queue "
1776 "rx_queue tr tm->when retrnsmt uid timeout " 1778 "rx_queue tr tm->when retrnsmt uid timeout "
1777 "inode ref pointer drops"); 1779 "inode ref pointer drops");
1778 else { 1780 else {
1779 struct udp_iter_state *state = seq->private; 1781 struct udp_iter_state *state = seq->private;
1780 int len; 1782 int len;
1781 1783
1782 udp4_format_sock(v, seq, state->bucket, &len); 1784 udp4_format_sock(v, seq, state->bucket, &len);
1783 seq_printf(seq, "%*s\n", 127 - len, ""); 1785 seq_printf(seq, "%*s\n", 127 - len, "");
1784 } 1786 }
1785 return 0; 1787 return 0;
1786 } 1788 }
1787 1789
1788 /* ------------------------------------------------------------------------ */ 1790 /* ------------------------------------------------------------------------ */
1789 static struct udp_seq_afinfo udp4_seq_afinfo = { 1791 static struct udp_seq_afinfo udp4_seq_afinfo = {
1790 .name = "udp", 1792 .name = "udp",
1791 .family = AF_INET, 1793 .family = AF_INET,
1792 .udp_table = &udp_table, 1794 .udp_table = &udp_table,
1793 .seq_fops = { 1795 .seq_fops = {
1794 .owner = THIS_MODULE, 1796 .owner = THIS_MODULE,
1795 }, 1797 },
1796 .seq_ops = { 1798 .seq_ops = {
1797 .show = udp4_seq_show, 1799 .show = udp4_seq_show,
1798 }, 1800 },
1799 }; 1801 };
1800 1802
1801 static int udp4_proc_init_net(struct net *net) 1803 static int udp4_proc_init_net(struct net *net)
1802 { 1804 {
1803 return udp_proc_register(net, &udp4_seq_afinfo); 1805 return udp_proc_register(net, &udp4_seq_afinfo);
1804 } 1806 }
1805 1807
1806 static void udp4_proc_exit_net(struct net *net) 1808 static void udp4_proc_exit_net(struct net *net)
1807 { 1809 {
1808 udp_proc_unregister(net, &udp4_seq_afinfo); 1810 udp_proc_unregister(net, &udp4_seq_afinfo);
1809 } 1811 }
1810 1812
1811 static struct pernet_operations udp4_net_ops = { 1813 static struct pernet_operations udp4_net_ops = {
1812 .init = udp4_proc_init_net, 1814 .init = udp4_proc_init_net,
1813 .exit = udp4_proc_exit_net, 1815 .exit = udp4_proc_exit_net,
1814 }; 1816 };
1815 1817
1816 int __init udp4_proc_init(void) 1818 int __init udp4_proc_init(void)
1817 { 1819 {
1818 return register_pernet_subsys(&udp4_net_ops); 1820 return register_pernet_subsys(&udp4_net_ops);
1819 } 1821 }
1820 1822
1821 void udp4_proc_exit(void) 1823 void udp4_proc_exit(void)
1822 { 1824 {
1823 unregister_pernet_subsys(&udp4_net_ops); 1825 unregister_pernet_subsys(&udp4_net_ops);
1824 } 1826 }
1825 #endif /* CONFIG_PROC_FS */ 1827 #endif /* CONFIG_PROC_FS */
1826 1828
1827 static __initdata unsigned long uhash_entries; 1829 static __initdata unsigned long uhash_entries;
1828 static int __init set_uhash_entries(char *str) 1830 static int __init set_uhash_entries(char *str)
1829 { 1831 {
1830 if (!str) 1832 if (!str)
1831 return 0; 1833 return 0;
1832 uhash_entries = simple_strtoul(str, &str, 0); 1834 uhash_entries = simple_strtoul(str, &str, 0);
1833 if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) 1835 if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
1834 uhash_entries = UDP_HTABLE_SIZE_MIN; 1836 uhash_entries = UDP_HTABLE_SIZE_MIN;
1835 return 1; 1837 return 1;
1836 } 1838 }
1837 __setup("uhash_entries=", set_uhash_entries); 1839 __setup("uhash_entries=", set_uhash_entries);
1838 1840
1839 void __init udp_table_init(struct udp_table *table, const char *name) 1841 void __init udp_table_init(struct udp_table *table, const char *name)
1840 { 1842 {
1841 unsigned int i; 1843 unsigned int i;
1842 1844
1843 if (!CONFIG_BASE_SMALL) 1845 if (!CONFIG_BASE_SMALL)
1844 table->hash = alloc_large_system_hash(name, 1846 table->hash = alloc_large_system_hash(name,
1845 sizeof(struct udp_hslot), 1847 sizeof(struct udp_hslot),
1846 uhash_entries, 1848 uhash_entries,
1847 21, /* one slot per 2 MB */ 1849 21, /* one slot per 2 MB */
1848 0, 1850 0,
1849 &table->log, 1851 &table->log,
1850 &table->mask, 1852 &table->mask,
1851 64 * 1024); 1853 64 * 1024);
1852 /* 1854 /*
1853 * Make sure hash table has the minimum size 1855 * Make sure hash table has the minimum size
1854 */ 1856 */
1855 if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { 1857 if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
1856 table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * 1858 table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
1857 sizeof(struct udp_hslot), GFP_KERNEL); 1859 sizeof(struct udp_hslot), GFP_KERNEL);
1858 if (!table->hash) 1860 if (!table->hash)
1859 panic(name); 1861 panic(name);
1860 table->log = ilog2(UDP_HTABLE_SIZE_MIN); 1862 table->log = ilog2(UDP_HTABLE_SIZE_MIN);
1861 table->mask = UDP_HTABLE_SIZE_MIN - 1; 1863 table->mask = UDP_HTABLE_SIZE_MIN - 1;
1862 } 1864 }
1863 for (i = 0; i <= table->mask; i++) { 1865 for (i = 0; i <= table->mask; i++) {
1864 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); 1866 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
1865 spin_lock_init(&table->hash[i].lock); 1867 spin_lock_init(&table->hash[i].lock);
1866 } 1868 }
1867 } 1869 }
1868 1870
1869 void __init udp_init(void) 1871 void __init udp_init(void)
1870 { 1872 {
1871 unsigned long nr_pages, limit; 1873 unsigned long nr_pages, limit;
1872 1874
1873 udp_table_init(&udp_table, "UDP"); 1875 udp_table_init(&udp_table, "UDP");
1874 /* Set the pressure threshold up by the same strategy of TCP. It is a 1876 /* Set the pressure threshold up by the same strategy of TCP. It is a
1875 * fraction of global memory that is up to 1/2 at 256 MB, decreasing 1877 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1876 * toward zero with the amount of memory, with a floor of 128 pages. 1878 * toward zero with the amount of memory, with a floor of 128 pages.
1877 */ 1879 */
1878 nr_pages = totalram_pages - totalhigh_pages; 1880 nr_pages = totalram_pages - totalhigh_pages;
1879 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); 1881 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
1880 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); 1882 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
1881 limit = max(limit, 128UL); 1883 limit = max(limit, 128UL);
1882 sysctl_udp_mem[0] = limit / 4 * 3; 1884 sysctl_udp_mem[0] = limit / 4 * 3;
1883 sysctl_udp_mem[1] = limit; 1885 sysctl_udp_mem[1] = limit;
1884 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; 1886 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
1885 1887
1886 sysctl_udp_rmem_min = SK_MEM_QUANTUM; 1888 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
1887 sysctl_udp_wmem_min = SK_MEM_QUANTUM; 1889 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
1888 } 1890 }
1889 1891
1890 int udp4_ufo_send_check(struct sk_buff *skb) 1892 int udp4_ufo_send_check(struct sk_buff *skb)
1891 { 1893 {
1892 const struct iphdr *iph; 1894 const struct iphdr *iph;
1893 struct udphdr *uh; 1895 struct udphdr *uh;
1894 1896
1895 if (!pskb_may_pull(skb, sizeof(*uh))) 1897 if (!pskb_may_pull(skb, sizeof(*uh)))
1896 return -EINVAL; 1898 return -EINVAL;
1897 1899
1898 iph = ip_hdr(skb); 1900 iph = ip_hdr(skb);
1899 uh = udp_hdr(skb); 1901 uh = udp_hdr(skb);
1900 1902
1901 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 1903 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1902 IPPROTO_UDP, 0); 1904 IPPROTO_UDP, 0);
1903 skb->csum_start = skb_transport_header(skb) - skb->head; 1905 skb->csum_start = skb_transport_header(skb) - skb->head;
1904 skb->csum_offset = offsetof(struct udphdr, check); 1906 skb->csum_offset = offsetof(struct udphdr, check);
1905 skb->ip_summed = CHECKSUM_PARTIAL; 1907 skb->ip_summed = CHECKSUM_PARTIAL;
1906 return 0; 1908 return 0;
1907 } 1909 }
1908 1910
1909 struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) 1911 struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
1910 { 1912 {
1911 struct sk_buff *segs = ERR_PTR(-EINVAL); 1913 struct sk_buff *segs = ERR_PTR(-EINVAL);
1912 unsigned int mss; 1914 unsigned int mss;
1913 int offset; 1915 int offset;
1914 __wsum csum; 1916 __wsum csum;
1915 1917
1916 mss = skb_shinfo(skb)->gso_size; 1918 mss = skb_shinfo(skb)->gso_size;
1917 if (unlikely(skb->len <= mss)) 1919 if (unlikely(skb->len <= mss))
1918 goto out; 1920 goto out;
1919 1921
1920 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { 1922 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
1921 /* Packet is from an untrusted source, reset gso_segs. */ 1923 /* Packet is from an untrusted source, reset gso_segs. */
1922 int type = skb_shinfo(skb)->gso_type; 1924 int type = skb_shinfo(skb)->gso_type;
1923 1925
1924 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || 1926 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
1925 !(type & (SKB_GSO_UDP)))) 1927 !(type & (SKB_GSO_UDP))))
1926 goto out; 1928 goto out;
1927 1929
1928 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); 1930 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
1929 1931
1930 segs = NULL; 1932 segs = NULL;
1931 goto out; 1933 goto out;
1932 } 1934 }
1933 1935
1934 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 1936 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
1935 * do checksum of UDP packets sent as multiple IP fragments. 1937 * do checksum of UDP packets sent as multiple IP fragments.
1936 */ 1938 */
1937 offset = skb->csum_start - skb_headroom(skb); 1939 offset = skb->csum_start - skb_headroom(skb);
1938 csum = skb_checksum(skb, offset, skb->len - offset, 0); 1940 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1939 offset += skb->csum_offset; 1941 offset += skb->csum_offset;
1940 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 1942 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1941 skb->ip_summed = CHECKSUM_NONE; 1943 skb->ip_summed = CHECKSUM_NONE;
1942 1944
1943 /* Fragment the skb. IP headers of the fragments are updated in 1945 /* Fragment the skb. IP headers of the fragments are updated in
1944 * inet_gso_segment() 1946 * inet_gso_segment()
1945 */ 1947 */
1946 segs = skb_segment(skb, features); 1948 segs = skb_segment(skb, features);
1947 out: 1949 out:
1948 return segs; 1950 return segs;
1949 } 1951 }
1950 1952
1951 1953
1 /* 1 /*
2 * RAW sockets for IPv6 2 * RAW sockets for IPv6
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt> 6 * Pedro Roque <roque@di.fc.ul.pt>
7 * 7 *
8 * Adapted from linux/net/ipv4/raw.c 8 * Adapted from linux/net/ipv4/raw.c
9 * 9 *
10 * Fixes: 10 * Fixes:
11 * Hideaki YOSHIFUJI : sin6_scope_id support 11 * Hideaki YOSHIFUJI : sin6_scope_id support
12 * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) 12 * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
13 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data 13 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
14 * 14 *
15 * This program is free software; you can redistribute it and/or 15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License 16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version. 18 * 2 of the License, or (at your option) any later version.
19 */ 19 */
20 20
21 #include <linux/errno.h> 21 #include <linux/errno.h>
22 #include <linux/types.h> 22 #include <linux/types.h>
23 #include <linux/socket.h> 23 #include <linux/socket.h>
24 #include <linux/sockios.h> 24 #include <linux/sockios.h>
25 #include <linux/net.h> 25 #include <linux/net.h>
26 #include <linux/in6.h> 26 #include <linux/in6.h>
27 #include <linux/netdevice.h> 27 #include <linux/netdevice.h>
28 #include <linux/if_arp.h> 28 #include <linux/if_arp.h>
29 #include <linux/icmpv6.h> 29 #include <linux/icmpv6.h>
30 #include <linux/netfilter.h> 30 #include <linux/netfilter.h>
31 #include <linux/netfilter_ipv6.h> 31 #include <linux/netfilter_ipv6.h>
32 #include <linux/skbuff.h> 32 #include <linux/skbuff.h>
33 #include <asm/uaccess.h> 33 #include <asm/uaccess.h>
34 #include <asm/ioctls.h> 34 #include <asm/ioctls.h>
35 35
36 #include <net/net_namespace.h> 36 #include <net/net_namespace.h>
37 #include <net/ip.h> 37 #include <net/ip.h>
38 #include <net/sock.h> 38 #include <net/sock.h>
39 #include <net/snmp.h> 39 #include <net/snmp.h>
40 40
41 #include <net/ipv6.h> 41 #include <net/ipv6.h>
42 #include <net/ndisc.h> 42 #include <net/ndisc.h>
43 #include <net/protocol.h> 43 #include <net/protocol.h>
44 #include <net/ip6_route.h> 44 #include <net/ip6_route.h>
45 #include <net/ip6_checksum.h> 45 #include <net/ip6_checksum.h>
46 #include <net/addrconf.h> 46 #include <net/addrconf.h>
47 #include <net/transp_v6.h> 47 #include <net/transp_v6.h>
48 #include <net/udp.h> 48 #include <net/udp.h>
49 #include <net/inet_common.h> 49 #include <net/inet_common.h>
50 #include <net/tcp_states.h> 50 #include <net/tcp_states.h>
51 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 51 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
52 #include <net/mip6.h> 52 #include <net/mip6.h>
53 #endif 53 #endif
54 #include <linux/mroute6.h> 54 #include <linux/mroute6.h>
55 55
56 #include <net/raw.h> 56 #include <net/raw.h>
57 #include <net/rawv6.h> 57 #include <net/rawv6.h>
58 #include <net/xfrm.h> 58 #include <net/xfrm.h>
59 59
60 #include <linux/proc_fs.h> 60 #include <linux/proc_fs.h>
61 #include <linux/seq_file.h> 61 #include <linux/seq_file.h>
62 62
63 static struct raw_hashinfo raw_v6_hashinfo = { 63 static struct raw_hashinfo raw_v6_hashinfo = {
64 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), 64 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
65 }; 65 };
66 66
67 static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, 67 static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
68 unsigned short num, struct in6_addr *loc_addr, 68 unsigned short num, struct in6_addr *loc_addr,
69 struct in6_addr *rmt_addr, int dif) 69 struct in6_addr *rmt_addr, int dif)
70 { 70 {
71 struct hlist_node *node; 71 struct hlist_node *node;
72 int is_multicast = ipv6_addr_is_multicast(loc_addr); 72 int is_multicast = ipv6_addr_is_multicast(loc_addr);
73 73
74 sk_for_each_from(sk, node) 74 sk_for_each_from(sk, node)
75 if (inet_sk(sk)->inet_num == num) { 75 if (inet_sk(sk)->inet_num == num) {
76 struct ipv6_pinfo *np = inet6_sk(sk); 76 struct ipv6_pinfo *np = inet6_sk(sk);
77 77
78 if (!net_eq(sock_net(sk), net)) 78 if (!net_eq(sock_net(sk), net))
79 continue; 79 continue;
80 80
81 if (!ipv6_addr_any(&np->daddr) && 81 if (!ipv6_addr_any(&np->daddr) &&
82 !ipv6_addr_equal(&np->daddr, rmt_addr)) 82 !ipv6_addr_equal(&np->daddr, rmt_addr))
83 continue; 83 continue;
84 84
85 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) 85 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
86 continue; 86 continue;
87 87
88 if (!ipv6_addr_any(&np->rcv_saddr)) { 88 if (!ipv6_addr_any(&np->rcv_saddr)) {
89 if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) 89 if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
90 goto found; 90 goto found;
91 if (is_multicast && 91 if (is_multicast &&
92 inet6_mc_check(sk, loc_addr, rmt_addr)) 92 inet6_mc_check(sk, loc_addr, rmt_addr))
93 goto found; 93 goto found;
94 continue; 94 continue;
95 } 95 }
96 goto found; 96 goto found;
97 } 97 }
98 sk = NULL; 98 sk = NULL;
99 found: 99 found:
100 return sk; 100 return sk;
101 } 101 }
102 102
103 /* 103 /*
104 * 0 - deliver 104 * 0 - deliver
105 * 1 - block 105 * 1 - block
106 */ 106 */
107 static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) 107 static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
108 { 108 {
109 struct icmp6hdr *icmph; 109 struct icmp6hdr *icmph;
110 struct raw6_sock *rp = raw6_sk(sk); 110 struct raw6_sock *rp = raw6_sk(sk);
111 111
112 if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { 112 if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
113 __u32 *data = &rp->filter.data[0]; 113 __u32 *data = &rp->filter.data[0];
114 int bit_nr; 114 int bit_nr;
115 115
116 icmph = (struct icmp6hdr *) skb->data; 116 icmph = (struct icmp6hdr *) skb->data;
117 bit_nr = icmph->icmp6_type; 117 bit_nr = icmph->icmp6_type;
118 118
119 return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; 119 return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
120 } 120 }
121 return 0; 121 return 0;
122 } 122 }
123 123
124 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 124 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
125 static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); 125 static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
126 126
127 int rawv6_mh_filter_register(int (*filter)(struct sock *sock, 127 int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
128 struct sk_buff *skb)) 128 struct sk_buff *skb))
129 { 129 {
130 rcu_assign_pointer(mh_filter, filter); 130 rcu_assign_pointer(mh_filter, filter);
131 return 0; 131 return 0;
132 } 132 }
133 EXPORT_SYMBOL(rawv6_mh_filter_register); 133 EXPORT_SYMBOL(rawv6_mh_filter_register);
134 134
135 int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, 135 int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
136 struct sk_buff *skb)) 136 struct sk_buff *skb))
137 { 137 {
138 rcu_assign_pointer(mh_filter, NULL); 138 rcu_assign_pointer(mh_filter, NULL);
139 synchronize_rcu(); 139 synchronize_rcu();
140 return 0; 140 return 0;
141 } 141 }
142 EXPORT_SYMBOL(rawv6_mh_filter_unregister); 142 EXPORT_SYMBOL(rawv6_mh_filter_unregister);
143 143
144 #endif 144 #endif
145 145
146 /* 146 /*
147 * demultiplex raw sockets. 147 * demultiplex raw sockets.
148 * (should consider queueing the skb in the sock receive_queue 148 * (should consider queueing the skb in the sock receive_queue
149 * without calling rawv6.c) 149 * without calling rawv6.c)
150 * 150 *
151 * Caller owns SKB so we must make clones. 151 * Caller owns SKB so we must make clones.
152 */ 152 */
153 static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) 153 static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
154 { 154 {
155 struct in6_addr *saddr; 155 struct in6_addr *saddr;
156 struct in6_addr *daddr; 156 struct in6_addr *daddr;
157 struct sock *sk; 157 struct sock *sk;
158 int delivered = 0; 158 int delivered = 0;
159 __u8 hash; 159 __u8 hash;
160 struct net *net; 160 struct net *net;
161 161
162 saddr = &ipv6_hdr(skb)->saddr; 162 saddr = &ipv6_hdr(skb)->saddr;
163 daddr = saddr + 1; 163 daddr = saddr + 1;
164 164
165 hash = nexthdr & (MAX_INET_PROTOS - 1); 165 hash = nexthdr & (MAX_INET_PROTOS - 1);
166 166
167 read_lock(&raw_v6_hashinfo.lock); 167 read_lock(&raw_v6_hashinfo.lock);
168 sk = sk_head(&raw_v6_hashinfo.ht[hash]); 168 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
169 169
170 if (sk == NULL) 170 if (sk == NULL)
171 goto out; 171 goto out;
172 172
173 net = dev_net(skb->dev); 173 net = dev_net(skb->dev);
174 sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); 174 sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
175 175
176 while (sk) { 176 while (sk) {
177 int filtered; 177 int filtered;
178 178
179 delivered = 1; 179 delivered = 1;
180 switch (nexthdr) { 180 switch (nexthdr) {
181 case IPPROTO_ICMPV6: 181 case IPPROTO_ICMPV6:
182 filtered = icmpv6_filter(sk, skb); 182 filtered = icmpv6_filter(sk, skb);
183 break; 183 break;
184 184
185 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 185 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
186 case IPPROTO_MH: 186 case IPPROTO_MH:
187 { 187 {
188 /* XXX: To validate MH only once for each packet, 188 /* XXX: To validate MH only once for each packet,
189 * this is placed here. It should be after checking 189 * this is placed here. It should be after checking
190 * xfrm policy, however it doesn't. The checking xfrm 190 * xfrm policy, however it doesn't. The checking xfrm
191 * policy is placed in rawv6_rcv() because it is 191 * policy is placed in rawv6_rcv() because it is
192 * required for each socket. 192 * required for each socket.
193 */ 193 */
194 int (*filter)(struct sock *sock, struct sk_buff *skb); 194 int (*filter)(struct sock *sock, struct sk_buff *skb);
195 195
196 filter = rcu_dereference(mh_filter); 196 filter = rcu_dereference(mh_filter);
197 filtered = filter ? filter(sk, skb) : 0; 197 filtered = filter ? filter(sk, skb) : 0;
198 break; 198 break;
199 } 199 }
200 #endif 200 #endif
201 default: 201 default:
202 filtered = 0; 202 filtered = 0;
203 break; 203 break;
204 } 204 }
205 205
206 if (filtered < 0) 206 if (filtered < 0)
207 break; 207 break;
208 if (filtered == 0) { 208 if (filtered == 0) {
209 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); 209 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
210 210
211 /* Not releasing hash table! */ 211 /* Not releasing hash table! */
212 if (clone) { 212 if (clone) {
213 nf_reset(clone); 213 nf_reset(clone);
214 rawv6_rcv(sk, clone); 214 rawv6_rcv(sk, clone);
215 } 215 }
216 } 216 }
217 sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, 217 sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
218 IP6CB(skb)->iif); 218 IP6CB(skb)->iif);
219 } 219 }
220 out: 220 out:
221 read_unlock(&raw_v6_hashinfo.lock); 221 read_unlock(&raw_v6_hashinfo.lock);
222 return delivered; 222 return delivered;
223 } 223 }
224 224
225 int raw6_local_deliver(struct sk_buff *skb, int nexthdr) 225 int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
226 { 226 {
227 struct sock *raw_sk; 227 struct sock *raw_sk;
228 228
229 raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); 229 raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
230 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) 230 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
231 raw_sk = NULL; 231 raw_sk = NULL;
232 232
233 return raw_sk != NULL; 233 return raw_sk != NULL;
234 } 234 }
235 235
236 /* This cleans up af_inet6 a bit. -DaveM */ 236 /* This cleans up af_inet6 a bit. -DaveM */
237 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 237 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
238 { 238 {
239 struct inet_sock *inet = inet_sk(sk); 239 struct inet_sock *inet = inet_sk(sk);
240 struct ipv6_pinfo *np = inet6_sk(sk); 240 struct ipv6_pinfo *np = inet6_sk(sk);
241 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; 241 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
242 __be32 v4addr = 0; 242 __be32 v4addr = 0;
243 int addr_type; 243 int addr_type;
244 int err; 244 int err;
245 245
246 if (addr_len < SIN6_LEN_RFC2133) 246 if (addr_len < SIN6_LEN_RFC2133)
247 return -EINVAL; 247 return -EINVAL;
248 addr_type = ipv6_addr_type(&addr->sin6_addr); 248 addr_type = ipv6_addr_type(&addr->sin6_addr);
249 249
250 /* Raw sockets are IPv6 only */ 250 /* Raw sockets are IPv6 only */
251 if (addr_type == IPV6_ADDR_MAPPED) 251 if (addr_type == IPV6_ADDR_MAPPED)
252 return(-EADDRNOTAVAIL); 252 return(-EADDRNOTAVAIL);
253 253
254 lock_sock(sk); 254 lock_sock(sk);
255 255
256 err = -EINVAL; 256 err = -EINVAL;
257 if (sk->sk_state != TCP_CLOSE) 257 if (sk->sk_state != TCP_CLOSE)
258 goto out; 258 goto out;
259 259
260 /* Check if the address belongs to the host. */ 260 /* Check if the address belongs to the host. */
261 if (addr_type != IPV6_ADDR_ANY) { 261 if (addr_type != IPV6_ADDR_ANY) {
262 struct net_device *dev = NULL; 262 struct net_device *dev = NULL;
263 263
264 if (addr_type & IPV6_ADDR_LINKLOCAL) { 264 if (addr_type & IPV6_ADDR_LINKLOCAL) {
265 if (addr_len >= sizeof(struct sockaddr_in6) && 265 if (addr_len >= sizeof(struct sockaddr_in6) &&
266 addr->sin6_scope_id) { 266 addr->sin6_scope_id) {
267 /* Override any existing binding, if another 267 /* Override any existing binding, if another
268 * one is supplied by user. 268 * one is supplied by user.
269 */ 269 */
270 sk->sk_bound_dev_if = addr->sin6_scope_id; 270 sk->sk_bound_dev_if = addr->sin6_scope_id;
271 } 271 }
272 272
273 /* Binding to link-local address requires an interface */ 273 /* Binding to link-local address requires an interface */
274 if (!sk->sk_bound_dev_if) 274 if (!sk->sk_bound_dev_if)
275 goto out; 275 goto out;
276 276
277 dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); 277 dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
278 if (!dev) { 278 if (!dev) {
279 err = -ENODEV; 279 err = -ENODEV;
280 goto out; 280 goto out;
281 } 281 }
282 } 282 }
283 283
284 /* ipv4 addr of the socket is invalid. Only the 284 /* ipv4 addr of the socket is invalid. Only the
285 * unspecified and mapped address have a v4 equivalent. 285 * unspecified and mapped address have a v4 equivalent.
286 */ 286 */
287 v4addr = LOOPBACK4_IPV6; 287 v4addr = LOOPBACK4_IPV6;
288 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 288 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
289 err = -EADDRNOTAVAIL; 289 err = -EADDRNOTAVAIL;
290 if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, 290 if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
291 dev, 0)) { 291 dev, 0)) {
292 if (dev) 292 if (dev)
293 dev_put(dev); 293 dev_put(dev);
294 goto out; 294 goto out;
295 } 295 }
296 } 296 }
297 if (dev) 297 if (dev)
298 dev_put(dev); 298 dev_put(dev);
299 } 299 }
300 300
301 inet->inet_rcv_saddr = inet->inet_saddr = v4addr; 301 inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
302 ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); 302 ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
303 if (!(addr_type & IPV6_ADDR_MULTICAST)) 303 if (!(addr_type & IPV6_ADDR_MULTICAST))
304 ipv6_addr_copy(&np->saddr, &addr->sin6_addr); 304 ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
305 err = 0; 305 err = 0;
306 out: 306 out:
307 release_sock(sk); 307 release_sock(sk);
308 return err; 308 return err;
309 } 309 }
310 310
311 static void rawv6_err(struct sock *sk, struct sk_buff *skb, 311 static void rawv6_err(struct sock *sk, struct sk_buff *skb,
312 struct inet6_skb_parm *opt, 312 struct inet6_skb_parm *opt,
313 u8 type, u8 code, int offset, __be32 info) 313 u8 type, u8 code, int offset, __be32 info)
314 { 314 {
315 struct inet_sock *inet = inet_sk(sk); 315 struct inet_sock *inet = inet_sk(sk);
316 struct ipv6_pinfo *np = inet6_sk(sk); 316 struct ipv6_pinfo *np = inet6_sk(sk);
317 int err; 317 int err;
318 int harderr; 318 int harderr;
319 319
320 /* Report error on raw socket, if: 320 /* Report error on raw socket, if:
321 1. User requested recverr. 321 1. User requested recverr.
322 2. Socket is connected (otherwise the error indication 322 2. Socket is connected (otherwise the error indication
323 is useless without recverr and error is hard. 323 is useless without recverr and error is hard.
324 */ 324 */
325 if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) 325 if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
326 return; 326 return;
327 327
328 harderr = icmpv6_err_convert(type, code, &err); 328 harderr = icmpv6_err_convert(type, code, &err);
329 if (type == ICMPV6_PKT_TOOBIG) 329 if (type == ICMPV6_PKT_TOOBIG)
330 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 330 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
331 331
332 if (np->recverr) { 332 if (np->recverr) {
333 u8 *payload = skb->data; 333 u8 *payload = skb->data;
334 if (!inet->hdrincl) 334 if (!inet->hdrincl)
335 payload += offset; 335 payload += offset;
336 ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); 336 ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
337 } 337 }
338 338
339 if (np->recverr || harderr) { 339 if (np->recverr || harderr) {
340 sk->sk_err = err; 340 sk->sk_err = err;
341 sk->sk_error_report(sk); 341 sk->sk_error_report(sk);
342 } 342 }
343 } 343 }
344 344
345 void raw6_icmp_error(struct sk_buff *skb, int nexthdr, 345 void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
346 u8 type, u8 code, int inner_offset, __be32 info) 346 u8 type, u8 code, int inner_offset, __be32 info)
347 { 347 {
348 struct sock *sk; 348 struct sock *sk;
349 int hash; 349 int hash;
350 struct in6_addr *saddr, *daddr; 350 struct in6_addr *saddr, *daddr;
351 struct net *net; 351 struct net *net;
352 352
353 hash = nexthdr & (RAW_HTABLE_SIZE - 1); 353 hash = nexthdr & (RAW_HTABLE_SIZE - 1);
354 354
355 read_lock(&raw_v6_hashinfo.lock); 355 read_lock(&raw_v6_hashinfo.lock);
356 sk = sk_head(&raw_v6_hashinfo.ht[hash]); 356 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
357 if (sk != NULL) { 357 if (sk != NULL) {
358 /* Note: ipv6_hdr(skb) != skb->data */ 358 /* Note: ipv6_hdr(skb) != skb->data */
359 struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data; 359 struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
360 saddr = &ip6h->saddr; 360 saddr = &ip6h->saddr;
361 daddr = &ip6h->daddr; 361 daddr = &ip6h->daddr;
362 net = dev_net(skb->dev); 362 net = dev_net(skb->dev);
363 363
364 while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, 364 while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
365 IP6CB(skb)->iif))) { 365 IP6CB(skb)->iif))) {
366 rawv6_err(sk, skb, NULL, type, code, 366 rawv6_err(sk, skb, NULL, type, code,
367 inner_offset, info); 367 inner_offset, info);
368 sk = sk_next(sk); 368 sk = sk_next(sk);
369 } 369 }
370 } 370 }
371 read_unlock(&raw_v6_hashinfo.lock); 371 read_unlock(&raw_v6_hashinfo.lock);
372 } 372 }
373 373
374 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 374 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
375 { 375 {
376 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 376 if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
377 skb_checksum_complete(skb)) { 377 skb_checksum_complete(skb)) {
378 atomic_inc(&sk->sk_drops); 378 atomic_inc(&sk->sk_drops);
379 kfree_skb(skb); 379 kfree_skb(skb);
380 return NET_RX_DROP; 380 return NET_RX_DROP;
381 } 381 }
382 382
383 /* Charge it to the socket. */ 383 /* Charge it to the socket. */
384 if (sock_queue_rcv_skb(sk, skb) < 0) { 384 if (sock_queue_rcv_skb(sk, skb) < 0) {
385 kfree_skb(skb); 385 kfree_skb(skb);
386 return NET_RX_DROP; 386 return NET_RX_DROP;
387 } 387 }
388 388
389 return 0; 389 return 0;
390 } 390 }
391 391
392 /* 392 /*
393 * This is next to useless... 393 * This is next to useless...
394 * if we demultiplex in network layer we don't need the extra call 394 * if we demultiplex in network layer we don't need the extra call
395 * just to queue the skb... 395 * just to queue the skb...
396 * maybe we could have the network decide upon a hint if it 396 * maybe we could have the network decide upon a hint if it
397 * should call raw_rcv for demultiplexing 397 * should call raw_rcv for demultiplexing
398 */ 398 */
399 int rawv6_rcv(struct sock *sk, struct sk_buff *skb) 399 int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
400 { 400 {
401 struct inet_sock *inet = inet_sk(sk); 401 struct inet_sock *inet = inet_sk(sk);
402 struct raw6_sock *rp = raw6_sk(sk); 402 struct raw6_sock *rp = raw6_sk(sk);
403 403
404 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 404 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
405 atomic_inc(&sk->sk_drops); 405 atomic_inc(&sk->sk_drops);
406 kfree_skb(skb); 406 kfree_skb(skb);
407 return NET_RX_DROP; 407 return NET_RX_DROP;
408 } 408 }
409 409
410 if (!rp->checksum) 410 if (!rp->checksum)
411 skb->ip_summed = CHECKSUM_UNNECESSARY; 411 skb->ip_summed = CHECKSUM_UNNECESSARY;
412 412
413 if (skb->ip_summed == CHECKSUM_COMPLETE) { 413 if (skb->ip_summed == CHECKSUM_COMPLETE) {
414 skb_postpull_rcsum(skb, skb_network_header(skb), 414 skb_postpull_rcsum(skb, skb_network_header(skb),
415 skb_network_header_len(skb)); 415 skb_network_header_len(skb));
416 if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 416 if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
417 &ipv6_hdr(skb)->daddr, 417 &ipv6_hdr(skb)->daddr,
418 skb->len, inet->inet_num, skb->csum)) 418 skb->len, inet->inet_num, skb->csum))
419 skb->ip_summed = CHECKSUM_UNNECESSARY; 419 skb->ip_summed = CHECKSUM_UNNECESSARY;
420 } 420 }
421 if (!skb_csum_unnecessary(skb)) 421 if (!skb_csum_unnecessary(skb))
422 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 422 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
423 &ipv6_hdr(skb)->daddr, 423 &ipv6_hdr(skb)->daddr,
424 skb->len, 424 skb->len,
425 inet->inet_num, 0)); 425 inet->inet_num, 0));
426 426
427 if (inet->hdrincl) { 427 if (inet->hdrincl) {
428 if (skb_checksum_complete(skb)) { 428 if (skb_checksum_complete(skb)) {
429 atomic_inc(&sk->sk_drops); 429 atomic_inc(&sk->sk_drops);
430 kfree_skb(skb); 430 kfree_skb(skb);
431 return NET_RX_DROP; 431 return NET_RX_DROP;
432 } 432 }
433 } 433 }
434 434
435 rawv6_rcv_skb(sk, skb); 435 rawv6_rcv_skb(sk, skb);
436 return 0; 436 return 0;
437 } 437 }
438 438
439 439
440 /* 440 /*
441 * This should be easy, if there is something there 441 * This should be easy, if there is something there
442 * we return it, otherwise we block. 442 * we return it, otherwise we block.
443 */ 443 */
444 444
445 static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, 445 static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
446 struct msghdr *msg, size_t len, 446 struct msghdr *msg, size_t len,
447 int noblock, int flags, int *addr_len) 447 int noblock, int flags, int *addr_len)
448 { 448 {
449 struct ipv6_pinfo *np = inet6_sk(sk); 449 struct ipv6_pinfo *np = inet6_sk(sk);
450 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; 450 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
451 struct sk_buff *skb; 451 struct sk_buff *skb;
452 size_t copied; 452 size_t copied;
453 int err; 453 int err;
454 454
455 if (flags & MSG_OOB) 455 if (flags & MSG_OOB)
456 return -EOPNOTSUPP; 456 return -EOPNOTSUPP;
457 457
458 if (addr_len) 458 if (addr_len)
459 *addr_len=sizeof(*sin6); 459 *addr_len=sizeof(*sin6);
460 460
461 if (flags & MSG_ERRQUEUE) 461 if (flags & MSG_ERRQUEUE)
462 return ipv6_recv_error(sk, msg, len); 462 return ipv6_recv_error(sk, msg, len);
463 463
464 skb = skb_recv_datagram(sk, flags, noblock, &err); 464 skb = skb_recv_datagram(sk, flags, noblock, &err);
465 if (!skb) 465 if (!skb)
466 goto out; 466 goto out;
467 467
468 copied = skb->len; 468 copied = skb->len;
469 if (copied > len) { 469 if (copied > len) {
470 copied = len; 470 copied = len;
471 msg->msg_flags |= MSG_TRUNC; 471 msg->msg_flags |= MSG_TRUNC;
472 } 472 }
473 473
474 if (skb_csum_unnecessary(skb)) { 474 if (skb_csum_unnecessary(skb)) {
475 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 475 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
476 } else if (msg->msg_flags&MSG_TRUNC) { 476 } else if (msg->msg_flags&MSG_TRUNC) {
477 if (__skb_checksum_complete(skb)) 477 if (__skb_checksum_complete(skb))
478 goto csum_copy_err; 478 goto csum_copy_err;
479 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 479 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
480 } else { 480 } else {
481 err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); 481 err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
482 if (err == -EINVAL) 482 if (err == -EINVAL)
483 goto csum_copy_err; 483 goto csum_copy_err;
484 } 484 }
485 if (err) 485 if (err)
486 goto out_free; 486 goto out_free;
487 487
488 /* Copy the address. */ 488 /* Copy the address. */
489 if (sin6) { 489 if (sin6) {
490 sin6->sin6_family = AF_INET6; 490 sin6->sin6_family = AF_INET6;
491 sin6->sin6_port = 0; 491 sin6->sin6_port = 0;
492 ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); 492 ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
493 sin6->sin6_flowinfo = 0; 493 sin6->sin6_flowinfo = 0;
494 sin6->sin6_scope_id = 0; 494 sin6->sin6_scope_id = 0;
495 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 495 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
496 sin6->sin6_scope_id = IP6CB(skb)->iif; 496 sin6->sin6_scope_id = IP6CB(skb)->iif;
497 } 497 }
498 498
499 sock_recv_ts_and_drops(msg, sk, skb); 499 sock_recv_ts_and_drops(msg, sk, skb);
500 500
501 if (np->rxopt.all) 501 if (np->rxopt.all)
502 datagram_recv_ctl(sk, msg, skb); 502 datagram_recv_ctl(sk, msg, skb);
503 503
504 err = copied; 504 err = copied;
505 if (flags & MSG_TRUNC) 505 if (flags & MSG_TRUNC)
506 err = skb->len; 506 err = skb->len;
507 507
508 out_free: 508 out_free:
509 skb_free_datagram(sk, skb); 509 skb_free_datagram(sk, skb);
510 out: 510 out:
511 return err; 511 return err;
512 512
513 csum_copy_err: 513 csum_copy_err:
514 skb_kill_datagram(sk, skb, flags); 514 skb_kill_datagram(sk, skb, flags);
515 515
516 /* Error for blocking case is chosen to masquerade 516 /* Error for blocking case is chosen to masquerade
517 as some normal condition. 517 as some normal condition.
518 */ 518 */
519 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 519 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
520 atomic_inc(&sk->sk_drops);
521 goto out; 520 goto out;
522 } 521 }
523 522
524 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, 523 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
525 struct raw6_sock *rp) 524 struct raw6_sock *rp)
526 { 525 {
527 struct sk_buff *skb; 526 struct sk_buff *skb;
528 int err = 0; 527 int err = 0;
529 int offset; 528 int offset;
530 int len; 529 int len;
531 int total_len; 530 int total_len;
532 __wsum tmp_csum; 531 __wsum tmp_csum;
533 __sum16 csum; 532 __sum16 csum;
534 533
535 if (!rp->checksum) 534 if (!rp->checksum)
536 goto send; 535 goto send;
537 536
538 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 537 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
539 goto out; 538 goto out;
540 539
541 offset = rp->offset; 540 offset = rp->offset;
542 total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) - 541 total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
543 skb->data); 542 skb->data);
544 if (offset >= total_len - 1) { 543 if (offset >= total_len - 1) {
545 err = -EINVAL; 544 err = -EINVAL;
546 ip6_flush_pending_frames(sk); 545 ip6_flush_pending_frames(sk);
547 goto out; 546 goto out;
548 } 547 }
549 548
550 /* should be check HW csum miyazawa */ 549 /* should be check HW csum miyazawa */
551 if (skb_queue_len(&sk->sk_write_queue) == 1) { 550 if (skb_queue_len(&sk->sk_write_queue) == 1) {
552 /* 551 /*
553 * Only one fragment on the socket. 552 * Only one fragment on the socket.
554 */ 553 */
555 tmp_csum = skb->csum; 554 tmp_csum = skb->csum;
556 } else { 555 } else {
557 struct sk_buff *csum_skb = NULL; 556 struct sk_buff *csum_skb = NULL;
558 tmp_csum = 0; 557 tmp_csum = 0;
559 558
560 skb_queue_walk(&sk->sk_write_queue, skb) { 559 skb_queue_walk(&sk->sk_write_queue, skb) {
561 tmp_csum = csum_add(tmp_csum, skb->csum); 560 tmp_csum = csum_add(tmp_csum, skb->csum);
562 561
563 if (csum_skb) 562 if (csum_skb)
564 continue; 563 continue;
565 564
566 len = skb->len - skb_transport_offset(skb); 565 len = skb->len - skb_transport_offset(skb);
567 if (offset >= len) { 566 if (offset >= len) {
568 offset -= len; 567 offset -= len;
569 continue; 568 continue;
570 } 569 }
571 570
572 csum_skb = skb; 571 csum_skb = skb;
573 } 572 }
574 573
575 skb = csum_skb; 574 skb = csum_skb;
576 } 575 }
577 576
578 offset += skb_transport_offset(skb); 577 offset += skb_transport_offset(skb);
579 if (skb_copy_bits(skb, offset, &csum, 2)) 578 if (skb_copy_bits(skb, offset, &csum, 2))
580 BUG(); 579 BUG();
581 580
582 /* in case cksum was not initialized */ 581 /* in case cksum was not initialized */
583 if (unlikely(csum)) 582 if (unlikely(csum))
584 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); 583 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
585 584
586 csum = csum_ipv6_magic(&fl->fl6_src, 585 csum = csum_ipv6_magic(&fl->fl6_src,
587 &fl->fl6_dst, 586 &fl->fl6_dst,
588 total_len, fl->proto, tmp_csum); 587 total_len, fl->proto, tmp_csum);
589 588
590 if (csum == 0 && fl->proto == IPPROTO_UDP) 589 if (csum == 0 && fl->proto == IPPROTO_UDP)
591 csum = CSUM_MANGLED_0; 590 csum = CSUM_MANGLED_0;
592 591
593 if (skb_store_bits(skb, offset, &csum, 2)) 592 if (skb_store_bits(skb, offset, &csum, 2))
594 BUG(); 593 BUG();
595 594
596 send: 595 send:
597 err = ip6_push_pending_frames(sk); 596 err = ip6_push_pending_frames(sk);
598 out: 597 out:
599 return err; 598 return err;
600 } 599 }
601 600
602 static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, 601 static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
603 struct flowi *fl, struct rt6_info *rt, 602 struct flowi *fl, struct rt6_info *rt,
604 unsigned int flags) 603 unsigned int flags)
605 { 604 {
606 struct ipv6_pinfo *np = inet6_sk(sk); 605 struct ipv6_pinfo *np = inet6_sk(sk);
607 struct ipv6hdr *iph; 606 struct ipv6hdr *iph;
608 struct sk_buff *skb; 607 struct sk_buff *skb;
609 int err; 608 int err;
610 609
611 if (length > rt->u.dst.dev->mtu) { 610 if (length > rt->u.dst.dev->mtu) {
612 ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); 611 ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
613 return -EMSGSIZE; 612 return -EMSGSIZE;
614 } 613 }
615 if (flags&MSG_PROBE) 614 if (flags&MSG_PROBE)
616 goto out; 615 goto out;
617 616
618 skb = sock_alloc_send_skb(sk, 617 skb = sock_alloc_send_skb(sk,
619 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, 618 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
620 flags & MSG_DONTWAIT, &err); 619 flags & MSG_DONTWAIT, &err);
621 if (skb == NULL) 620 if (skb == NULL)
622 goto error; 621 goto error;
623 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); 622 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
624 623
625 skb->priority = sk->sk_priority; 624 skb->priority = sk->sk_priority;
626 skb->mark = sk->sk_mark; 625 skb->mark = sk->sk_mark;
627 skb_dst_set(skb, dst_clone(&rt->u.dst)); 626 skb_dst_set(skb, dst_clone(&rt->u.dst));
628 627
629 skb_put(skb, length); 628 skb_put(skb, length);
630 skb_reset_network_header(skb); 629 skb_reset_network_header(skb);
631 iph = ipv6_hdr(skb); 630 iph = ipv6_hdr(skb);
632 631
633 skb->ip_summed = CHECKSUM_NONE; 632 skb->ip_summed = CHECKSUM_NONE;
634 633
635 skb->transport_header = skb->network_header; 634 skb->transport_header = skb->network_header;
636 err = memcpy_fromiovecend((void *)iph, from, 0, length); 635 err = memcpy_fromiovecend((void *)iph, from, 0, length);
637 if (err) 636 if (err)
638 goto error_fault; 637 goto error_fault;
639 638
640 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 639 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
641 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 640 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
642 dst_output); 641 dst_output);
643 if (err > 0) 642 if (err > 0)
644 err = net_xmit_errno(err); 643 err = net_xmit_errno(err);
645 if (err) 644 if (err)
646 goto error; 645 goto error;
647 out: 646 out:
648 return 0; 647 return 0;
649 648
650 error_fault: 649 error_fault:
651 err = -EFAULT; 650 err = -EFAULT;
652 kfree_skb(skb); 651 kfree_skb(skb);
653 error: 652 error:
654 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 653 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
655 if (err == -ENOBUFS && !np->recverr) 654 if (err == -ENOBUFS && !np->recverr)
656 err = 0; 655 err = 0;
657 return err; 656 return err;
658 } 657 }
659 658
660 static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) 659 static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
661 { 660 {
662 struct iovec *iov; 661 struct iovec *iov;
663 u8 __user *type = NULL; 662 u8 __user *type = NULL;
664 u8 __user *code = NULL; 663 u8 __user *code = NULL;
665 u8 len = 0; 664 u8 len = 0;
666 int probed = 0; 665 int probed = 0;
667 int i; 666 int i;
668 667
669 if (!msg->msg_iov) 668 if (!msg->msg_iov)
670 return 0; 669 return 0;
671 670
672 for (i = 0; i < msg->msg_iovlen; i++) { 671 for (i = 0; i < msg->msg_iovlen; i++) {
673 iov = &msg->msg_iov[i]; 672 iov = &msg->msg_iov[i];
674 if (!iov) 673 if (!iov)
675 continue; 674 continue;
676 675
677 switch (fl->proto) { 676 switch (fl->proto) {
678 case IPPROTO_ICMPV6: 677 case IPPROTO_ICMPV6:
679 /* check if one-byte field is readable or not. */ 678 /* check if one-byte field is readable or not. */
680 if (iov->iov_base && iov->iov_len < 1) 679 if (iov->iov_base && iov->iov_len < 1)
681 break; 680 break;
682 681
683 if (!type) { 682 if (!type) {
684 type = iov->iov_base; 683 type = iov->iov_base;
685 /* check if code field is readable or not. */ 684 /* check if code field is readable or not. */
686 if (iov->iov_len > 1) 685 if (iov->iov_len > 1)
687 code = type + 1; 686 code = type + 1;
688 } else if (!code) 687 } else if (!code)
689 code = iov->iov_base; 688 code = iov->iov_base;
690 689
691 if (type && code) { 690 if (type && code) {
692 if (get_user(fl->fl_icmp_type, type) || 691 if (get_user(fl->fl_icmp_type, type) ||
693 get_user(fl->fl_icmp_code, code)) 692 get_user(fl->fl_icmp_code, code))
694 return -EFAULT; 693 return -EFAULT;
695 probed = 1; 694 probed = 1;
696 } 695 }
697 break; 696 break;
698 case IPPROTO_MH: 697 case IPPROTO_MH:
699 if (iov->iov_base && iov->iov_len < 1) 698 if (iov->iov_base && iov->iov_len < 1)
700 break; 699 break;
701 /* check if type field is readable or not. */ 700 /* check if type field is readable or not. */
702 if (iov->iov_len > 2 - len) { 701 if (iov->iov_len > 2 - len) {
703 u8 __user *p = iov->iov_base; 702 u8 __user *p = iov->iov_base;
704 if (get_user(fl->fl_mh_type, &p[2 - len])) 703 if (get_user(fl->fl_mh_type, &p[2 - len]))
705 return -EFAULT; 704 return -EFAULT;
706 probed = 1; 705 probed = 1;
707 } else 706 } else
708 len += iov->iov_len; 707 len += iov->iov_len;
709 708
710 break; 709 break;
711 default: 710 default:
712 probed = 1; 711 probed = 1;
713 break; 712 break;
714 } 713 }
715 if (probed) 714 if (probed)
716 break; 715 break;
717 } 716 }
718 return 0; 717 return 0;
719 } 718 }
720 719
721 static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, 720 static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
722 struct msghdr *msg, size_t len) 721 struct msghdr *msg, size_t len)
723 { 722 {
724 struct ipv6_txoptions opt_space; 723 struct ipv6_txoptions opt_space;
725 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; 724 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
726 struct in6_addr *daddr, *final_p = NULL, final; 725 struct in6_addr *daddr, *final_p = NULL, final;
727 struct inet_sock *inet = inet_sk(sk); 726 struct inet_sock *inet = inet_sk(sk);
728 struct ipv6_pinfo *np = inet6_sk(sk); 727 struct ipv6_pinfo *np = inet6_sk(sk);
729 struct raw6_sock *rp = raw6_sk(sk); 728 struct raw6_sock *rp = raw6_sk(sk);
730 struct ipv6_txoptions *opt = NULL; 729 struct ipv6_txoptions *opt = NULL;
731 struct ip6_flowlabel *flowlabel = NULL; 730 struct ip6_flowlabel *flowlabel = NULL;
732 struct dst_entry *dst = NULL; 731 struct dst_entry *dst = NULL;
733 struct flowi fl; 732 struct flowi fl;
734 int addr_len = msg->msg_namelen; 733 int addr_len = msg->msg_namelen;
735 int hlimit = -1; 734 int hlimit = -1;
736 int tclass = -1; 735 int tclass = -1;
737 u16 proto; 736 u16 proto;
738 int err; 737 int err;
739 738
740 /* Rough check on arithmetic overflow, 739 /* Rough check on arithmetic overflow,
741 better check is made in ip6_append_data(). 740 better check is made in ip6_append_data().
742 */ 741 */
743 if (len > INT_MAX) 742 if (len > INT_MAX)
744 return -EMSGSIZE; 743 return -EMSGSIZE;
745 744
746 /* Mirror BSD error message compatibility */ 745 /* Mirror BSD error message compatibility */
747 if (msg->msg_flags & MSG_OOB) 746 if (msg->msg_flags & MSG_OOB)
748 return -EOPNOTSUPP; 747 return -EOPNOTSUPP;
749 748
750 /* 749 /*
751 * Get and verify the address. 750 * Get and verify the address.
752 */ 751 */
753 memset(&fl, 0, sizeof(fl)); 752 memset(&fl, 0, sizeof(fl));
754 753
755 fl.mark = sk->sk_mark; 754 fl.mark = sk->sk_mark;
756 755
757 if (sin6) { 756 if (sin6) {
758 if (addr_len < SIN6_LEN_RFC2133) 757 if (addr_len < SIN6_LEN_RFC2133)
759 return -EINVAL; 758 return -EINVAL;
760 759
761 if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 760 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
762 return(-EAFNOSUPPORT); 761 return(-EAFNOSUPPORT);
763 762
764 /* port is the proto value [0..255] carried in nexthdr */ 763 /* port is the proto value [0..255] carried in nexthdr */
765 proto = ntohs(sin6->sin6_port); 764 proto = ntohs(sin6->sin6_port);
766 765
767 if (!proto) 766 if (!proto)
768 proto = inet->inet_num; 767 proto = inet->inet_num;
769 else if (proto != inet->inet_num) 768 else if (proto != inet->inet_num)
770 return(-EINVAL); 769 return(-EINVAL);
771 770
772 if (proto > 255) 771 if (proto > 255)
773 return(-EINVAL); 772 return(-EINVAL);
774 773
775 daddr = &sin6->sin6_addr; 774 daddr = &sin6->sin6_addr;
776 if (np->sndflow) { 775 if (np->sndflow) {
777 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 776 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
778 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 777 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
779 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 778 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
780 if (flowlabel == NULL) 779 if (flowlabel == NULL)
781 return -EINVAL; 780 return -EINVAL;
782 daddr = &flowlabel->dst; 781 daddr = &flowlabel->dst;
783 } 782 }
784 } 783 }
785 784
786 /* 785 /*
787 * Otherwise it will be difficult to maintain 786 * Otherwise it will be difficult to maintain
788 * sk->sk_dst_cache. 787 * sk->sk_dst_cache.
789 */ 788 */
790 if (sk->sk_state == TCP_ESTABLISHED && 789 if (sk->sk_state == TCP_ESTABLISHED &&
791 ipv6_addr_equal(daddr, &np->daddr)) 790 ipv6_addr_equal(daddr, &np->daddr))
792 daddr = &np->daddr; 791 daddr = &np->daddr;
793 792
794 if (addr_len >= sizeof(struct sockaddr_in6) && 793 if (addr_len >= sizeof(struct sockaddr_in6) &&
795 sin6->sin6_scope_id && 794 sin6->sin6_scope_id &&
796 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 795 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
797 fl.oif = sin6->sin6_scope_id; 796 fl.oif = sin6->sin6_scope_id;
798 } else { 797 } else {
799 if (sk->sk_state != TCP_ESTABLISHED) 798 if (sk->sk_state != TCP_ESTABLISHED)
800 return -EDESTADDRREQ; 799 return -EDESTADDRREQ;
801 800
802 proto = inet->inet_num; 801 proto = inet->inet_num;
803 daddr = &np->daddr; 802 daddr = &np->daddr;
804 fl.fl6_flowlabel = np->flow_label; 803 fl.fl6_flowlabel = np->flow_label;
805 } 804 }
806 805
807 if (fl.oif == 0) 806 if (fl.oif == 0)
808 fl.oif = sk->sk_bound_dev_if; 807 fl.oif = sk->sk_bound_dev_if;
809 808
810 if (msg->msg_controllen) { 809 if (msg->msg_controllen) {
811 opt = &opt_space; 810 opt = &opt_space;
812 memset(opt, 0, sizeof(struct ipv6_txoptions)); 811 memset(opt, 0, sizeof(struct ipv6_txoptions));
813 opt->tot_len = sizeof(struct ipv6_txoptions); 812 opt->tot_len = sizeof(struct ipv6_txoptions);
814 813
815 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); 814 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
816 if (err < 0) { 815 if (err < 0) {
817 fl6_sock_release(flowlabel); 816 fl6_sock_release(flowlabel);
818 return err; 817 return err;
819 } 818 }
820 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 819 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
821 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 820 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
822 if (flowlabel == NULL) 821 if (flowlabel == NULL)
823 return -EINVAL; 822 return -EINVAL;
824 } 823 }
825 if (!(opt->opt_nflen|opt->opt_flen)) 824 if (!(opt->opt_nflen|opt->opt_flen))
826 opt = NULL; 825 opt = NULL;
827 } 826 }
828 if (opt == NULL) 827 if (opt == NULL)
829 opt = np->opt; 828 opt = np->opt;
830 if (flowlabel) 829 if (flowlabel)
831 opt = fl6_merge_options(&opt_space, flowlabel, opt); 830 opt = fl6_merge_options(&opt_space, flowlabel, opt);
832 opt = ipv6_fixup_options(&opt_space, opt); 831 opt = ipv6_fixup_options(&opt_space, opt);
833 832
834 fl.proto = proto; 833 fl.proto = proto;
835 err = rawv6_probe_proto_opt(&fl, msg); 834 err = rawv6_probe_proto_opt(&fl, msg);
836 if (err) 835 if (err)
837 goto out; 836 goto out;
838 837
839 if (!ipv6_addr_any(daddr)) 838 if (!ipv6_addr_any(daddr))
840 ipv6_addr_copy(&fl.fl6_dst, daddr); 839 ipv6_addr_copy(&fl.fl6_dst, daddr);
841 else 840 else
842 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 841 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
843 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 842 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
844 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 843 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
845 844
846 /* merge ip6_build_xmit from ip6_output */ 845 /* merge ip6_build_xmit from ip6_output */
847 if (opt && opt->srcrt) { 846 if (opt && opt->srcrt) {
848 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 847 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
849 ipv6_addr_copy(&final, &fl.fl6_dst); 848 ipv6_addr_copy(&final, &fl.fl6_dst);
850 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 849 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
851 final_p = &final; 850 final_p = &final;
852 } 851 }
853 852
854 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 853 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
855 fl.oif = np->mcast_oif; 854 fl.oif = np->mcast_oif;
856 security_sk_classify_flow(sk, &fl); 855 security_sk_classify_flow(sk, &fl);
857 856
858 err = ip6_dst_lookup(sk, &dst, &fl); 857 err = ip6_dst_lookup(sk, &dst, &fl);
859 if (err) 858 if (err)
860 goto out; 859 goto out;
861 if (final_p) 860 if (final_p)
862 ipv6_addr_copy(&fl.fl6_dst, final_p); 861 ipv6_addr_copy(&fl.fl6_dst, final_p);
863 862
864 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); 863 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
865 if (err < 0) { 864 if (err < 0) {
866 if (err == -EREMOTE) 865 if (err == -EREMOTE)
867 err = ip6_dst_blackhole(sk, &dst, &fl); 866 err = ip6_dst_blackhole(sk, &dst, &fl);
868 if (err < 0) 867 if (err < 0)
869 goto out; 868 goto out;
870 } 869 }
871 870
872 if (hlimit < 0) { 871 if (hlimit < 0) {
873 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 872 if (ipv6_addr_is_multicast(&fl.fl6_dst))
874 hlimit = np->mcast_hops; 873 hlimit = np->mcast_hops;
875 else 874 else
876 hlimit = np->hop_limit; 875 hlimit = np->hop_limit;
877 if (hlimit < 0) 876 if (hlimit < 0)
878 hlimit = ip6_dst_hoplimit(dst); 877 hlimit = ip6_dst_hoplimit(dst);
879 } 878 }
880 879
881 if (tclass < 0) 880 if (tclass < 0)
882 tclass = np->tclass; 881 tclass = np->tclass;
883 882
884 if (msg->msg_flags&MSG_CONFIRM) 883 if (msg->msg_flags&MSG_CONFIRM)
885 goto do_confirm; 884 goto do_confirm;
886 885
887 back_from_confirm: 886 back_from_confirm:
888 if (inet->hdrincl) { 887 if (inet->hdrincl) {
889 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); 888 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
890 } else { 889 } else {
891 lock_sock(sk); 890 lock_sock(sk);
892 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, 891 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
893 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, 892 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
894 msg->msg_flags); 893 msg->msg_flags);
895 894
896 if (err) 895 if (err)
897 ip6_flush_pending_frames(sk); 896 ip6_flush_pending_frames(sk);
898 else if (!(msg->msg_flags & MSG_MORE)) 897 else if (!(msg->msg_flags & MSG_MORE))
899 err = rawv6_push_pending_frames(sk, &fl, rp); 898 err = rawv6_push_pending_frames(sk, &fl, rp);
900 release_sock(sk); 899 release_sock(sk);
901 } 900 }
902 done: 901 done:
903 dst_release(dst); 902 dst_release(dst);
904 out: 903 out:
905 fl6_sock_release(flowlabel); 904 fl6_sock_release(flowlabel);
906 return err<0?err:len; 905 return err<0?err:len;
907 do_confirm: 906 do_confirm:
908 dst_confirm(dst); 907 dst_confirm(dst);
909 if (!(msg->msg_flags & MSG_PROBE) || len) 908 if (!(msg->msg_flags & MSG_PROBE) || len)
910 goto back_from_confirm; 909 goto back_from_confirm;
911 err = 0; 910 err = 0;
912 goto done; 911 goto done;
913 } 912 }
914 913
915 static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 914 static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
916 char __user *optval, int optlen) 915 char __user *optval, int optlen)
917 { 916 {
918 switch (optname) { 917 switch (optname) {
919 case ICMPV6_FILTER: 918 case ICMPV6_FILTER:
920 if (optlen > sizeof(struct icmp6_filter)) 919 if (optlen > sizeof(struct icmp6_filter))
921 optlen = sizeof(struct icmp6_filter); 920 optlen = sizeof(struct icmp6_filter);
922 if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen)) 921 if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
923 return -EFAULT; 922 return -EFAULT;
924 return 0; 923 return 0;
925 default: 924 default:
926 return -ENOPROTOOPT; 925 return -ENOPROTOOPT;
927 } 926 }
928 927
929 return 0; 928 return 0;
930 } 929 }
931 930
932 static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, 931 static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
933 char __user *optval, int __user *optlen) 932 char __user *optval, int __user *optlen)
934 { 933 {
935 int len; 934 int len;
936 935
937 switch (optname) { 936 switch (optname) {
938 case ICMPV6_FILTER: 937 case ICMPV6_FILTER:
939 if (get_user(len, optlen)) 938 if (get_user(len, optlen))
940 return -EFAULT; 939 return -EFAULT;
941 if (len < 0) 940 if (len < 0)
942 return -EINVAL; 941 return -EINVAL;
943 if (len > sizeof(struct icmp6_filter)) 942 if (len > sizeof(struct icmp6_filter))
944 len = sizeof(struct icmp6_filter); 943 len = sizeof(struct icmp6_filter);
945 if (put_user(len, optlen)) 944 if (put_user(len, optlen))
946 return -EFAULT; 945 return -EFAULT;
947 if (copy_to_user(optval, &raw6_sk(sk)->filter, len)) 946 if (copy_to_user(optval, &raw6_sk(sk)->filter, len))
948 return -EFAULT; 947 return -EFAULT;
949 return 0; 948 return 0;
950 default: 949 default:
951 return -ENOPROTOOPT; 950 return -ENOPROTOOPT;
952 } 951 }
953 952
954 return 0; 953 return 0;
955 } 954 }
956 955
957 956
958 static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, 957 static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
959 char __user *optval, unsigned int optlen) 958 char __user *optval, unsigned int optlen)
960 { 959 {
961 struct raw6_sock *rp = raw6_sk(sk); 960 struct raw6_sock *rp = raw6_sk(sk);
962 int val; 961 int val;
963 962
964 if (get_user(val, (int __user *)optval)) 963 if (get_user(val, (int __user *)optval))
965 return -EFAULT; 964 return -EFAULT;
966 965
967 switch (optname) { 966 switch (optname) {
968 case IPV6_CHECKSUM: 967 case IPV6_CHECKSUM:
969 if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && 968 if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
970 level == IPPROTO_IPV6) { 969 level == IPPROTO_IPV6) {
971 /* 970 /*
972 * RFC3542 tells that IPV6_CHECKSUM socket 971 * RFC3542 tells that IPV6_CHECKSUM socket
973 * option in the IPPROTO_IPV6 level is not 972 * option in the IPPROTO_IPV6 level is not
974 * allowed on ICMPv6 sockets. 973 * allowed on ICMPv6 sockets.
975 * If you want to set it, use IPPROTO_RAW 974 * If you want to set it, use IPPROTO_RAW
976 * level IPV6_CHECKSUM socket option 975 * level IPV6_CHECKSUM socket option
977 * (Linux extension). 976 * (Linux extension).
978 */ 977 */
979 return -EINVAL; 978 return -EINVAL;
980 } 979 }
981 980
982 /* You may get strange result with a positive odd offset; 981 /* You may get strange result with a positive odd offset;
983 RFC2292bis agrees with me. */ 982 RFC2292bis agrees with me. */
984 if (val > 0 && (val&1)) 983 if (val > 0 && (val&1))
985 return(-EINVAL); 984 return(-EINVAL);
986 if (val < 0) { 985 if (val < 0) {
987 rp->checksum = 0; 986 rp->checksum = 0;
988 } else { 987 } else {
989 rp->checksum = 1; 988 rp->checksum = 1;
990 rp->offset = val; 989 rp->offset = val;
991 } 990 }
992 991
993 return 0; 992 return 0;
994 break; 993 break;
995 994
996 default: 995 default:
997 return(-ENOPROTOOPT); 996 return(-ENOPROTOOPT);
998 } 997 }
999 } 998 }
1000 999
1001 static int rawv6_setsockopt(struct sock *sk, int level, int optname, 1000 static int rawv6_setsockopt(struct sock *sk, int level, int optname,
1002 char __user *optval, unsigned int optlen) 1001 char __user *optval, unsigned int optlen)
1003 { 1002 {
1004 switch(level) { 1003 switch(level) {
1005 case SOL_RAW: 1004 case SOL_RAW:
1006 break; 1005 break;
1007 1006
1008 case SOL_ICMPV6: 1007 case SOL_ICMPV6:
1009 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1008 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1010 return -EOPNOTSUPP; 1009 return -EOPNOTSUPP;
1011 return rawv6_seticmpfilter(sk, level, optname, optval, 1010 return rawv6_seticmpfilter(sk, level, optname, optval,
1012 optlen); 1011 optlen);
1013 case SOL_IPV6: 1012 case SOL_IPV6:
1014 if (optname == IPV6_CHECKSUM) 1013 if (optname == IPV6_CHECKSUM)
1015 break; 1014 break;
1016 default: 1015 default:
1017 return ipv6_setsockopt(sk, level, optname, optval, 1016 return ipv6_setsockopt(sk, level, optname, optval,
1018 optlen); 1017 optlen);
1019 } 1018 }
1020 1019
1021 return do_rawv6_setsockopt(sk, level, optname, optval, optlen); 1020 return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
1022 } 1021 }
1023 1022
1024 #ifdef CONFIG_COMPAT 1023 #ifdef CONFIG_COMPAT
1025 static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, 1024 static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
1026 char __user *optval, unsigned int optlen) 1025 char __user *optval, unsigned int optlen)
1027 { 1026 {
1028 switch (level) { 1027 switch (level) {
1029 case SOL_RAW: 1028 case SOL_RAW:
1030 break; 1029 break;
1031 case SOL_ICMPV6: 1030 case SOL_ICMPV6:
1032 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1031 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1033 return -EOPNOTSUPP; 1032 return -EOPNOTSUPP;
1034 return rawv6_seticmpfilter(sk, level, optname, optval, optlen); 1033 return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
1035 case SOL_IPV6: 1034 case SOL_IPV6:
1036 if (optname == IPV6_CHECKSUM) 1035 if (optname == IPV6_CHECKSUM)
1037 break; 1036 break;
1038 default: 1037 default:
1039 return compat_ipv6_setsockopt(sk, level, optname, 1038 return compat_ipv6_setsockopt(sk, level, optname,
1040 optval, optlen); 1039 optval, optlen);
1041 } 1040 }
1042 return do_rawv6_setsockopt(sk, level, optname, optval, optlen); 1041 return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
1043 } 1042 }
1044 #endif 1043 #endif
1045 1044
1046 static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, 1045 static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
1047 char __user *optval, int __user *optlen) 1046 char __user *optval, int __user *optlen)
1048 { 1047 {
1049 struct raw6_sock *rp = raw6_sk(sk); 1048 struct raw6_sock *rp = raw6_sk(sk);
1050 int val, len; 1049 int val, len;
1051 1050
1052 if (get_user(len,optlen)) 1051 if (get_user(len,optlen))
1053 return -EFAULT; 1052 return -EFAULT;
1054 1053
1055 switch (optname) { 1054 switch (optname) {
1056 case IPV6_CHECKSUM: 1055 case IPV6_CHECKSUM:
1057 /* 1056 /*
1058 * We allow getsockopt() for IPPROTO_IPV6-level 1057 * We allow getsockopt() for IPPROTO_IPV6-level
1059 * IPV6_CHECKSUM socket option on ICMPv6 sockets 1058 * IPV6_CHECKSUM socket option on ICMPv6 sockets
1060 * since RFC3542 is silent about it. 1059 * since RFC3542 is silent about it.
1061 */ 1060 */
1062 if (rp->checksum == 0) 1061 if (rp->checksum == 0)
1063 val = -1; 1062 val = -1;
1064 else 1063 else
1065 val = rp->offset; 1064 val = rp->offset;
1066 break; 1065 break;
1067 1066
1068 default: 1067 default:
1069 return -ENOPROTOOPT; 1068 return -ENOPROTOOPT;
1070 } 1069 }
1071 1070
1072 len = min_t(unsigned int, sizeof(int), len); 1071 len = min_t(unsigned int, sizeof(int), len);
1073 1072
1074 if (put_user(len, optlen)) 1073 if (put_user(len, optlen))
1075 return -EFAULT; 1074 return -EFAULT;
1076 if (copy_to_user(optval,&val,len)) 1075 if (copy_to_user(optval,&val,len))
1077 return -EFAULT; 1076 return -EFAULT;
1078 return 0; 1077 return 0;
1079 } 1078 }
1080 1079
1081 static int rawv6_getsockopt(struct sock *sk, int level, int optname, 1080 static int rawv6_getsockopt(struct sock *sk, int level, int optname,
1082 char __user *optval, int __user *optlen) 1081 char __user *optval, int __user *optlen)
1083 { 1082 {
1084 switch(level) { 1083 switch(level) {
1085 case SOL_RAW: 1084 case SOL_RAW:
1086 break; 1085 break;
1087 1086
1088 case SOL_ICMPV6: 1087 case SOL_ICMPV6:
1089 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1088 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1090 return -EOPNOTSUPP; 1089 return -EOPNOTSUPP;
1091 return rawv6_geticmpfilter(sk, level, optname, optval, 1090 return rawv6_geticmpfilter(sk, level, optname, optval,
1092 optlen); 1091 optlen);
1093 case SOL_IPV6: 1092 case SOL_IPV6:
1094 if (optname == IPV6_CHECKSUM) 1093 if (optname == IPV6_CHECKSUM)
1095 break; 1094 break;
1096 default: 1095 default:
1097 return ipv6_getsockopt(sk, level, optname, optval, 1096 return ipv6_getsockopt(sk, level, optname, optval,
1098 optlen); 1097 optlen);
1099 } 1098 }
1100 1099
1101 return do_rawv6_getsockopt(sk, level, optname, optval, optlen); 1100 return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1102 } 1101 }
1103 1102
1104 #ifdef CONFIG_COMPAT 1103 #ifdef CONFIG_COMPAT
1105 static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, 1104 static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
1106 char __user *optval, int __user *optlen) 1105 char __user *optval, int __user *optlen)
1107 { 1106 {
1108 switch (level) { 1107 switch (level) {
1109 case SOL_RAW: 1108 case SOL_RAW:
1110 break; 1109 break;
1111 case SOL_ICMPV6: 1110 case SOL_ICMPV6:
1112 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1111 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1113 return -EOPNOTSUPP; 1112 return -EOPNOTSUPP;
1114 return rawv6_geticmpfilter(sk, level, optname, optval, optlen); 1113 return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
1115 case SOL_IPV6: 1114 case SOL_IPV6:
1116 if (optname == IPV6_CHECKSUM) 1115 if (optname == IPV6_CHECKSUM)
1117 break; 1116 break;
1118 default: 1117 default:
1119 return compat_ipv6_getsockopt(sk, level, optname, 1118 return compat_ipv6_getsockopt(sk, level, optname,
1120 optval, optlen); 1119 optval, optlen);
1121 } 1120 }
1122 return do_rawv6_getsockopt(sk, level, optname, optval, optlen); 1121 return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1123 } 1122 }
1124 #endif 1123 #endif
1125 1124
1126 static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) 1125 static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
1127 { 1126 {
1128 switch(cmd) { 1127 switch(cmd) {
1129 case SIOCOUTQ: 1128 case SIOCOUTQ:
1130 { 1129 {
1131 int amount = sk_wmem_alloc_get(sk); 1130 int amount = sk_wmem_alloc_get(sk);
1132 1131
1133 return put_user(amount, (int __user *)arg); 1132 return put_user(amount, (int __user *)arg);
1134 } 1133 }
1135 case SIOCINQ: 1134 case SIOCINQ:
1136 { 1135 {
1137 struct sk_buff *skb; 1136 struct sk_buff *skb;
1138 int amount = 0; 1137 int amount = 0;
1139 1138
1140 spin_lock_bh(&sk->sk_receive_queue.lock); 1139 spin_lock_bh(&sk->sk_receive_queue.lock);
1141 skb = skb_peek(&sk->sk_receive_queue); 1140 skb = skb_peek(&sk->sk_receive_queue);
1142 if (skb != NULL) 1141 if (skb != NULL)
1143 amount = skb->tail - skb->transport_header; 1142 amount = skb->tail - skb->transport_header;
1144 spin_unlock_bh(&sk->sk_receive_queue.lock); 1143 spin_unlock_bh(&sk->sk_receive_queue.lock);
1145 return put_user(amount, (int __user *)arg); 1144 return put_user(amount, (int __user *)arg);
1146 } 1145 }
1147 1146
1148 default: 1147 default:
1149 #ifdef CONFIG_IPV6_MROUTE 1148 #ifdef CONFIG_IPV6_MROUTE
1150 return ip6mr_ioctl(sk, cmd, (void __user *)arg); 1149 return ip6mr_ioctl(sk, cmd, (void __user *)arg);
1151 #else 1150 #else
1152 return -ENOIOCTLCMD; 1151 return -ENOIOCTLCMD;
1153 #endif 1152 #endif
1154 } 1153 }
1155 } 1154 }
1156 1155
1157 static void rawv6_close(struct sock *sk, long timeout) 1156 static void rawv6_close(struct sock *sk, long timeout)
1158 { 1157 {
1159 if (inet_sk(sk)->inet_num == IPPROTO_RAW) 1158 if (inet_sk(sk)->inet_num == IPPROTO_RAW)
1160 ip6_ra_control(sk, -1); 1159 ip6_ra_control(sk, -1);
1161 ip6mr_sk_done(sk); 1160 ip6mr_sk_done(sk);
1162 sk_common_release(sk); 1161 sk_common_release(sk);
1163 } 1162 }
1164 1163
1165 static void raw6_destroy(struct sock *sk) 1164 static void raw6_destroy(struct sock *sk)
1166 { 1165 {
1167 lock_sock(sk); 1166 lock_sock(sk);
1168 ip6_flush_pending_frames(sk); 1167 ip6_flush_pending_frames(sk);
1169 release_sock(sk); 1168 release_sock(sk);
1170 1169
1171 inet6_destroy_sock(sk); 1170 inet6_destroy_sock(sk);
1172 } 1171 }
1173 1172
1174 static int rawv6_init_sk(struct sock *sk) 1173 static int rawv6_init_sk(struct sock *sk)
1175 { 1174 {
1176 struct raw6_sock *rp = raw6_sk(sk); 1175 struct raw6_sock *rp = raw6_sk(sk);
1177 1176
1178 switch (inet_sk(sk)->inet_num) { 1177 switch (inet_sk(sk)->inet_num) {
1179 case IPPROTO_ICMPV6: 1178 case IPPROTO_ICMPV6:
1180 rp->checksum = 1; 1179 rp->checksum = 1;
1181 rp->offset = 2; 1180 rp->offset = 2;
1182 break; 1181 break;
1183 case IPPROTO_MH: 1182 case IPPROTO_MH:
1184 rp->checksum = 1; 1183 rp->checksum = 1;
1185 rp->offset = 4; 1184 rp->offset = 4;
1186 break; 1185 break;
1187 default: 1186 default:
1188 break; 1187 break;
1189 } 1188 }
1190 return(0); 1189 return(0);
1191 } 1190 }
1192 1191
1193 struct proto rawv6_prot = { 1192 struct proto rawv6_prot = {
1194 .name = "RAWv6", 1193 .name = "RAWv6",
1195 .owner = THIS_MODULE, 1194 .owner = THIS_MODULE,
1196 .close = rawv6_close, 1195 .close = rawv6_close,
1197 .destroy = raw6_destroy, 1196 .destroy = raw6_destroy,
1198 .connect = ip6_datagram_connect, 1197 .connect = ip6_datagram_connect,
1199 .disconnect = udp_disconnect, 1198 .disconnect = udp_disconnect,
1200 .ioctl = rawv6_ioctl, 1199 .ioctl = rawv6_ioctl,
1201 .init = rawv6_init_sk, 1200 .init = rawv6_init_sk,
1202 .setsockopt = rawv6_setsockopt, 1201 .setsockopt = rawv6_setsockopt,
1203 .getsockopt = rawv6_getsockopt, 1202 .getsockopt = rawv6_getsockopt,
1204 .sendmsg = rawv6_sendmsg, 1203 .sendmsg = rawv6_sendmsg,
1205 .recvmsg = rawv6_recvmsg, 1204 .recvmsg = rawv6_recvmsg,
1206 .bind = rawv6_bind, 1205 .bind = rawv6_bind,
1207 .backlog_rcv = rawv6_rcv_skb, 1206 .backlog_rcv = rawv6_rcv_skb,
1208 .hash = raw_hash_sk, 1207 .hash = raw_hash_sk,
1209 .unhash = raw_unhash_sk, 1208 .unhash = raw_unhash_sk,
1210 .obj_size = sizeof(struct raw6_sock), 1209 .obj_size = sizeof(struct raw6_sock),
1211 .h.raw_hash = &raw_v6_hashinfo, 1210 .h.raw_hash = &raw_v6_hashinfo,
1212 #ifdef CONFIG_COMPAT 1211 #ifdef CONFIG_COMPAT
1213 .compat_setsockopt = compat_rawv6_setsockopt, 1212 .compat_setsockopt = compat_rawv6_setsockopt,
1214 .compat_getsockopt = compat_rawv6_getsockopt, 1213 .compat_getsockopt = compat_rawv6_getsockopt,
1215 #endif 1214 #endif
1216 }; 1215 };
1217 1216
1218 #ifdef CONFIG_PROC_FS 1217 #ifdef CONFIG_PROC_FS
1219 static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 1218 static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1220 { 1219 {
1221 struct ipv6_pinfo *np = inet6_sk(sp); 1220 struct ipv6_pinfo *np = inet6_sk(sp);
1222 struct in6_addr *dest, *src; 1221 struct in6_addr *dest, *src;
1223 __u16 destp, srcp; 1222 __u16 destp, srcp;
1224 1223
1225 dest = &np->daddr; 1224 dest = &np->daddr;
1226 src = &np->rcv_saddr; 1225 src = &np->rcv_saddr;
1227 destp = 0; 1226 destp = 0;
1228 srcp = inet_sk(sp)->inet_num; 1227 srcp = inet_sk(sp)->inet_num;
1229 seq_printf(seq, 1228 seq_printf(seq,
1230 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1229 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1231 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 1230 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1232 i, 1231 i,
1233 src->s6_addr32[0], src->s6_addr32[1], 1232 src->s6_addr32[0], src->s6_addr32[1],
1234 src->s6_addr32[2], src->s6_addr32[3], srcp, 1233 src->s6_addr32[2], src->s6_addr32[3], srcp,
1235 dest->s6_addr32[0], dest->s6_addr32[1], 1234 dest->s6_addr32[0], dest->s6_addr32[1],
1236 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1235 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1237 sp->sk_state, 1236 sp->sk_state,
1238 sk_wmem_alloc_get(sp), 1237 sk_wmem_alloc_get(sp),
1239 sk_rmem_alloc_get(sp), 1238 sk_rmem_alloc_get(sp),
1240 0, 0L, 0, 1239 0, 0L, 0,
1241 sock_i_uid(sp), 0, 1240 sock_i_uid(sp), 0,
1242 sock_i_ino(sp), 1241 sock_i_ino(sp),
1243 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); 1242 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1244 } 1243 }
1245 1244
1246 static int raw6_seq_show(struct seq_file *seq, void *v) 1245 static int raw6_seq_show(struct seq_file *seq, void *v)
1247 { 1246 {
1248 if (v == SEQ_START_TOKEN) 1247 if (v == SEQ_START_TOKEN)
1249 seq_printf(seq, 1248 seq_printf(seq,
1250 " sl " 1249 " sl "
1251 "local_address " 1250 "local_address "
1252 "remote_address " 1251 "remote_address "
1253 "st tx_queue rx_queue tr tm->when retrnsmt" 1252 "st tx_queue rx_queue tr tm->when retrnsmt"
1254 " uid timeout inode ref pointer drops\n"); 1253 " uid timeout inode ref pointer drops\n");
1255 else 1254 else
1256 raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); 1255 raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
1257 return 0; 1256 return 0;
1258 } 1257 }
1259 1258
1260 static const struct seq_operations raw6_seq_ops = { 1259 static const struct seq_operations raw6_seq_ops = {
1261 .start = raw_seq_start, 1260 .start = raw_seq_start,
1262 .next = raw_seq_next, 1261 .next = raw_seq_next,
1263 .stop = raw_seq_stop, 1262 .stop = raw_seq_stop,
1264 .show = raw6_seq_show, 1263 .show = raw6_seq_show,
1265 }; 1264 };
1266 1265
1267 static int raw6_seq_open(struct inode *inode, struct file *file) 1266 static int raw6_seq_open(struct inode *inode, struct file *file)
1268 { 1267 {
1269 return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops); 1268 return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
1270 } 1269 }
1271 1270
1272 static const struct file_operations raw6_seq_fops = { 1271 static const struct file_operations raw6_seq_fops = {
1273 .owner = THIS_MODULE, 1272 .owner = THIS_MODULE,
1274 .open = raw6_seq_open, 1273 .open = raw6_seq_open,
1275 .read = seq_read, 1274 .read = seq_read,
1276 .llseek = seq_lseek, 1275 .llseek = seq_lseek,
1277 .release = seq_release_net, 1276 .release = seq_release_net,
1278 }; 1277 };
1279 1278
1280 static int raw6_init_net(struct net *net) 1279 static int raw6_init_net(struct net *net)
1281 { 1280 {
1282 if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) 1281 if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
1283 return -ENOMEM; 1282 return -ENOMEM;
1284 1283
1285 return 0; 1284 return 0;
1286 } 1285 }
1287 1286
1288 static void raw6_exit_net(struct net *net) 1287 static void raw6_exit_net(struct net *net)
1289 { 1288 {
1290 proc_net_remove(net, "raw6"); 1289 proc_net_remove(net, "raw6");
1291 } 1290 }
1292 1291
1293 static struct pernet_operations raw6_net_ops = { 1292 static struct pernet_operations raw6_net_ops = {
1294 .init = raw6_init_net, 1293 .init = raw6_init_net,
1295 .exit = raw6_exit_net, 1294 .exit = raw6_exit_net,
1296 }; 1295 };
1297 1296
1298 int __init raw6_proc_init(void) 1297 int __init raw6_proc_init(void)
1299 { 1298 {
1300 return register_pernet_subsys(&raw6_net_ops); 1299 return register_pernet_subsys(&raw6_net_ops);
1301 } 1300 }
1302 1301
1303 void raw6_proc_exit(void) 1302 void raw6_proc_exit(void)
1304 { 1303 {
1305 unregister_pernet_subsys(&raw6_net_ops); 1304 unregister_pernet_subsys(&raw6_net_ops);
1306 } 1305 }
1307 #endif /* CONFIG_PROC_FS */ 1306 #endif /* CONFIG_PROC_FS */
1308 1307
1309 /* Same as inet6_dgram_ops, sans udp_poll. */ 1308 /* Same as inet6_dgram_ops, sans udp_poll. */
1310 static const struct proto_ops inet6_sockraw_ops = { 1309 static const struct proto_ops inet6_sockraw_ops = {
1311 .family = PF_INET6, 1310 .family = PF_INET6,
1312 .owner = THIS_MODULE, 1311 .owner = THIS_MODULE,
1313 .release = inet6_release, 1312 .release = inet6_release,
1314 .bind = inet6_bind, 1313 .bind = inet6_bind,
1315 .connect = inet_dgram_connect, /* ok */ 1314 .connect = inet_dgram_connect, /* ok */
1316 .socketpair = sock_no_socketpair, /* a do nothing */ 1315 .socketpair = sock_no_socketpair, /* a do nothing */
1317 .accept = sock_no_accept, /* a do nothing */ 1316 .accept = sock_no_accept, /* a do nothing */
1318 .getname = inet6_getname, 1317 .getname = inet6_getname,
1319 .poll = datagram_poll, /* ok */ 1318 .poll = datagram_poll, /* ok */
1320 .ioctl = inet6_ioctl, /* must change */ 1319 .ioctl = inet6_ioctl, /* must change */
1321 .listen = sock_no_listen, /* ok */ 1320 .listen = sock_no_listen, /* ok */
1322 .shutdown = inet_shutdown, /* ok */ 1321 .shutdown = inet_shutdown, /* ok */
1323 .setsockopt = sock_common_setsockopt, /* ok */ 1322 .setsockopt = sock_common_setsockopt, /* ok */
1324 .getsockopt = sock_common_getsockopt, /* ok */ 1323 .getsockopt = sock_common_getsockopt, /* ok */
1325 .sendmsg = inet_sendmsg, /* ok */ 1324 .sendmsg = inet_sendmsg, /* ok */
1326 .recvmsg = sock_common_recvmsg, /* ok */ 1325 .recvmsg = sock_common_recvmsg, /* ok */
1327 .mmap = sock_no_mmap, 1326 .mmap = sock_no_mmap,
1328 .sendpage = sock_no_sendpage, 1327 .sendpage = sock_no_sendpage,
1329 #ifdef CONFIG_COMPAT 1328 #ifdef CONFIG_COMPAT
1330 .compat_setsockopt = compat_sock_common_setsockopt, 1329 .compat_setsockopt = compat_sock_common_setsockopt,
1331 .compat_getsockopt = compat_sock_common_getsockopt, 1330 .compat_getsockopt = compat_sock_common_getsockopt,
1332 #endif 1331 #endif
1333 }; 1332 };
1334 1333
1335 static struct inet_protosw rawv6_protosw = { 1334 static struct inet_protosw rawv6_protosw = {
1336 .type = SOCK_RAW, 1335 .type = SOCK_RAW,
1337 .protocol = IPPROTO_IP, /* wild card */ 1336 .protocol = IPPROTO_IP, /* wild card */
1338 .prot = &rawv6_prot, 1337 .prot = &rawv6_prot,
1339 .ops = &inet6_sockraw_ops, 1338 .ops = &inet6_sockraw_ops,
1340 .capability = CAP_NET_RAW, 1339 .capability = CAP_NET_RAW,
1341 .no_check = UDP_CSUM_DEFAULT, 1340 .no_check = UDP_CSUM_DEFAULT,
1342 .flags = INET_PROTOSW_REUSE, 1341 .flags = INET_PROTOSW_REUSE,
1343 }; 1342 };
1344 1343
1345 int __init rawv6_init(void) 1344 int __init rawv6_init(void)
1346 { 1345 {
1347 int ret; 1346 int ret;
1348 1347
1349 ret = inet6_register_protosw(&rawv6_protosw); 1348 ret = inet6_register_protosw(&rawv6_protosw);
1350 if (ret) 1349 if (ret)
1351 goto out; 1350 goto out;
1352 out: 1351 out:
1353 return ret; 1352 return ret;
1354 } 1353 }
1355 1354
1356 void rawv6_exit(void) 1355 void rawv6_exit(void)
1357 { 1356 {
1358 inet6_unregister_protosw(&rawv6_protosw); 1357 inet6_unregister_protosw(&rawv6_protosw);
1359 } 1358 }
1360 1359
1 /* 1 /*
2 * UDP over IPv6 2 * UDP over IPv6
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt> 6 * Pedro Roque <roque@di.fc.ul.pt>
7 * 7 *
8 * Based on linux/ipv4/udp.c 8 * Based on linux/ipv4/udp.c
9 * 9 *
10 * Fixes: 10 * Fixes:
11 * Hideaki YOSHIFUJI : sin6_scope_id support 11 * Hideaki YOSHIFUJI : sin6_scope_id support
12 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 12 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
13 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 13 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
14 * a single port at the same time. 14 * a single port at the same time.
15 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data 15 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
16 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. 16 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file.
17 * 17 *
18 * This program is free software; you can redistribute it and/or 18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License 19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version 20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version. 21 * 2 of the License, or (at your option) any later version.
22 */ 22 */
23 23
24 #include <linux/errno.h> 24 #include <linux/errno.h>
25 #include <linux/types.h> 25 #include <linux/types.h>
26 #include <linux/socket.h> 26 #include <linux/socket.h>
27 #include <linux/sockios.h> 27 #include <linux/sockios.h>
28 #include <linux/net.h> 28 #include <linux/net.h>
29 #include <linux/in6.h> 29 #include <linux/in6.h>
30 #include <linux/netdevice.h> 30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h> 31 #include <linux/if_arp.h>
32 #include <linux/ipv6.h> 32 #include <linux/ipv6.h>
33 #include <linux/icmpv6.h> 33 #include <linux/icmpv6.h>
34 #include <linux/init.h> 34 #include <linux/init.h>
35 #include <linux/module.h> 35 #include <linux/module.h>
36 #include <linux/skbuff.h> 36 #include <linux/skbuff.h>
37 #include <asm/uaccess.h> 37 #include <asm/uaccess.h>
38 38
39 #include <net/ndisc.h> 39 #include <net/ndisc.h>
40 #include <net/protocol.h> 40 #include <net/protocol.h>
41 #include <net/transp_v6.h> 41 #include <net/transp_v6.h>
42 #include <net/ip6_route.h> 42 #include <net/ip6_route.h>
43 #include <net/raw.h> 43 #include <net/raw.h>
44 #include <net/tcp_states.h> 44 #include <net/tcp_states.h>
45 #include <net/ip6_checksum.h> 45 #include <net/ip6_checksum.h>
46 #include <net/xfrm.h> 46 #include <net/xfrm.h>
47 47
48 #include <linux/proc_fs.h> 48 #include <linux/proc_fs.h>
49 #include <linux/seq_file.h> 49 #include <linux/seq_file.h>
50 #include "udp_impl.h" 50 #include "udp_impl.h"
51 51
52 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) 52 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
53 { 53 {
54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; 54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); 55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
56 __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; 56 __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
57 __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); 57 __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
58 int sk_ipv6only = ipv6_only_sock(sk); 58 int sk_ipv6only = ipv6_only_sock(sk);
59 int sk2_ipv6only = inet_v6_ipv6only(sk2); 59 int sk2_ipv6only = inet_v6_ipv6only(sk2);
60 int addr_type = ipv6_addr_type(sk_rcv_saddr6); 60 int addr_type = ipv6_addr_type(sk_rcv_saddr6);
61 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 61 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
62 62
63 /* if both are mapped, treat as IPv4 */ 63 /* if both are mapped, treat as IPv4 */
64 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) 64 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
65 return (!sk2_ipv6only && 65 return (!sk2_ipv6only &&
66 (!sk1_rcv_saddr || !sk2_rcv_saddr || 66 (!sk1_rcv_saddr || !sk2_rcv_saddr ||
67 sk1_rcv_saddr == sk2_rcv_saddr)); 67 sk1_rcv_saddr == sk2_rcv_saddr));
68 68
69 if (addr_type2 == IPV6_ADDR_ANY && 69 if (addr_type2 == IPV6_ADDR_ANY &&
70 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 70 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
71 return 1; 71 return 1;
72 72
73 if (addr_type == IPV6_ADDR_ANY && 73 if (addr_type == IPV6_ADDR_ANY &&
74 !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 74 !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
75 return 1; 75 return 1;
76 76
77 if (sk2_rcv_saddr6 && 77 if (sk2_rcv_saddr6 &&
78 ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) 78 ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
79 return 1; 79 return 1;
80 80
81 return 0; 81 return 0;
82 } 82 }
83 83
84 int udp_v6_get_port(struct sock *sk, unsigned short snum) 84 int udp_v6_get_port(struct sock *sk, unsigned short snum)
85 { 85 {
86 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); 86 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
87 } 87 }
88 88
89 static inline int compute_score(struct sock *sk, struct net *net, 89 static inline int compute_score(struct sock *sk, struct net *net,
90 unsigned short hnum, 90 unsigned short hnum,
91 struct in6_addr *saddr, __be16 sport, 91 struct in6_addr *saddr, __be16 sport,
92 struct in6_addr *daddr, __be16 dport, 92 struct in6_addr *daddr, __be16 dport,
93 int dif) 93 int dif)
94 { 94 {
95 int score = -1; 95 int score = -1;
96 96
97 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 97 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
98 sk->sk_family == PF_INET6) { 98 sk->sk_family == PF_INET6) {
99 struct ipv6_pinfo *np = inet6_sk(sk); 99 struct ipv6_pinfo *np = inet6_sk(sk);
100 struct inet_sock *inet = inet_sk(sk); 100 struct inet_sock *inet = inet_sk(sk);
101 101
102 score = 0; 102 score = 0;
103 if (inet->inet_dport) { 103 if (inet->inet_dport) {
104 if (inet->inet_dport != sport) 104 if (inet->inet_dport != sport)
105 return -1; 105 return -1;
106 score++; 106 score++;
107 } 107 }
108 if (!ipv6_addr_any(&np->rcv_saddr)) { 108 if (!ipv6_addr_any(&np->rcv_saddr)) {
109 if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) 109 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
110 return -1; 110 return -1;
111 score++; 111 score++;
112 } 112 }
113 if (!ipv6_addr_any(&np->daddr)) { 113 if (!ipv6_addr_any(&np->daddr)) {
114 if (!ipv6_addr_equal(&np->daddr, saddr)) 114 if (!ipv6_addr_equal(&np->daddr, saddr))
115 return -1; 115 return -1;
116 score++; 116 score++;
117 } 117 }
118 if (sk->sk_bound_dev_if) { 118 if (sk->sk_bound_dev_if) {
119 if (sk->sk_bound_dev_if != dif) 119 if (sk->sk_bound_dev_if != dif)
120 return -1; 120 return -1;
121 score++; 121 score++;
122 } 122 }
123 } 123 }
124 return score; 124 return score;
125 } 125 }
126 126
127 static struct sock *__udp6_lib_lookup(struct net *net, 127 static struct sock *__udp6_lib_lookup(struct net *net,
128 struct in6_addr *saddr, __be16 sport, 128 struct in6_addr *saddr, __be16 sport,
129 struct in6_addr *daddr, __be16 dport, 129 struct in6_addr *daddr, __be16 dport,
130 int dif, struct udp_table *udptable) 130 int dif, struct udp_table *udptable)
131 { 131 {
132 struct sock *sk, *result; 132 struct sock *sk, *result;
133 struct hlist_nulls_node *node; 133 struct hlist_nulls_node *node;
134 unsigned short hnum = ntohs(dport); 134 unsigned short hnum = ntohs(dport);
135 unsigned int hash = udp_hashfn(net, hnum, udptable->mask); 135 unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
136 struct udp_hslot *hslot = &udptable->hash[hash]; 136 struct udp_hslot *hslot = &udptable->hash[hash];
137 int score, badness; 137 int score, badness;
138 138
139 rcu_read_lock(); 139 rcu_read_lock();
140 begin: 140 begin:
141 result = NULL; 141 result = NULL;
142 badness = -1; 142 badness = -1;
143 sk_nulls_for_each_rcu(sk, node, &hslot->head) { 143 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
144 score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); 144 score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
145 if (score > badness) { 145 if (score > badness) {
146 result = sk; 146 result = sk;
147 badness = score; 147 badness = score;
148 } 148 }
149 } 149 }
150 /* 150 /*
151 * if the nulls value we got at the end of this lookup is 151 * if the nulls value we got at the end of this lookup is
152 * not the expected one, we must restart lookup. 152 * not the expected one, we must restart lookup.
153 * We probably met an item that was moved to another chain. 153 * We probably met an item that was moved to another chain.
154 */ 154 */
155 if (get_nulls_value(node) != hash) 155 if (get_nulls_value(node) != hash)
156 goto begin; 156 goto begin;
157 157
158 if (result) { 158 if (result) {
159 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 159 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
160 result = NULL; 160 result = NULL;
161 else if (unlikely(compute_score(result, net, hnum, saddr, sport, 161 else if (unlikely(compute_score(result, net, hnum, saddr, sport,
162 daddr, dport, dif) < badness)) { 162 daddr, dport, dif) < badness)) {
163 sock_put(result); 163 sock_put(result);
164 goto begin; 164 goto begin;
165 } 165 }
166 } 166 }
167 rcu_read_unlock(); 167 rcu_read_unlock();
168 return result; 168 return result;
169 } 169 }
170 170
171 static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, 171 static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
172 __be16 sport, __be16 dport, 172 __be16 sport, __be16 dport,
173 struct udp_table *udptable) 173 struct udp_table *udptable)
174 { 174 {
175 struct sock *sk; 175 struct sock *sk;
176 struct ipv6hdr *iph = ipv6_hdr(skb); 176 struct ipv6hdr *iph = ipv6_hdr(skb);
177 177
178 if (unlikely(sk = skb_steal_sock(skb))) 178 if (unlikely(sk = skb_steal_sock(skb)))
179 return sk; 179 return sk;
180 return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, 180 return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
181 &iph->daddr, dport, inet6_iif(skb), 181 &iph->daddr, dport, inet6_iif(skb),
182 udptable); 182 udptable);
183 } 183 }
184 184
185 /* 185 /*
186 * This should be easy, if there is something there we 186 * This should be easy, if there is something there we
187 * return it, otherwise we block. 187 * return it, otherwise we block.
188 */ 188 */
189 189
190 int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, 190 int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
191 struct msghdr *msg, size_t len, 191 struct msghdr *msg, size_t len,
192 int noblock, int flags, int *addr_len) 192 int noblock, int flags, int *addr_len)
193 { 193 {
194 struct ipv6_pinfo *np = inet6_sk(sk); 194 struct ipv6_pinfo *np = inet6_sk(sk);
195 struct inet_sock *inet = inet_sk(sk); 195 struct inet_sock *inet = inet_sk(sk);
196 struct sk_buff *skb; 196 struct sk_buff *skb;
197 unsigned int ulen, copied; 197 unsigned int ulen, copied;
198 int peeked; 198 int peeked;
199 int err; 199 int err;
200 int is_udplite = IS_UDPLITE(sk); 200 int is_udplite = IS_UDPLITE(sk);
201 int is_udp4; 201 int is_udp4;
202 202
203 if (addr_len) 203 if (addr_len)
204 *addr_len=sizeof(struct sockaddr_in6); 204 *addr_len=sizeof(struct sockaddr_in6);
205 205
206 if (flags & MSG_ERRQUEUE) 206 if (flags & MSG_ERRQUEUE)
207 return ipv6_recv_error(sk, msg, len); 207 return ipv6_recv_error(sk, msg, len);
208 208
209 try_again: 209 try_again:
210 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 210 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
211 &peeked, &err); 211 &peeked, &err);
212 if (!skb) 212 if (!skb)
213 goto out; 213 goto out;
214 214
215 ulen = skb->len - sizeof(struct udphdr); 215 ulen = skb->len - sizeof(struct udphdr);
216 copied = len; 216 copied = len;
217 if (copied > ulen) 217 if (copied > ulen)
218 copied = ulen; 218 copied = ulen;
219 else if (copied < ulen) 219 else if (copied < ulen)
220 msg->msg_flags |= MSG_TRUNC; 220 msg->msg_flags |= MSG_TRUNC;
221 221
222 is_udp4 = (skb->protocol == htons(ETH_P_IP)); 222 is_udp4 = (skb->protocol == htons(ETH_P_IP));
223 223
224 /* 224 /*
225 * If checksum is needed at all, try to do it while copying the 225 * If checksum is needed at all, try to do it while copying the
226 * data. If the data is truncated, or if we only want a partial 226 * data. If the data is truncated, or if we only want a partial
227 * coverage checksum (UDP-Lite), do it before the copy. 227 * coverage checksum (UDP-Lite), do it before the copy.
228 */ 228 */
229 229
230 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { 230 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
231 if (udp_lib_checksum_complete(skb)) 231 if (udp_lib_checksum_complete(skb))
232 goto csum_copy_err; 232 goto csum_copy_err;
233 } 233 }
234 234
235 if (skb_csum_unnecessary(skb)) 235 if (skb_csum_unnecessary(skb))
236 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 236 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
237 msg->msg_iov, copied ); 237 msg->msg_iov, copied );
238 else { 238 else {
239 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); 239 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
240 if (err == -EINVAL) 240 if (err == -EINVAL)
241 goto csum_copy_err; 241 goto csum_copy_err;
242 } 242 }
243 if (err) 243 if (err)
244 goto out_free; 244 goto out_free;
245 245
246 if (!peeked) { 246 if (!peeked) {
247 if (is_udp4) 247 if (is_udp4)
248 UDP_INC_STATS_USER(sock_net(sk), 248 UDP_INC_STATS_USER(sock_net(sk),
249 UDP_MIB_INDATAGRAMS, is_udplite); 249 UDP_MIB_INDATAGRAMS, is_udplite);
250 else 250 else
251 UDP6_INC_STATS_USER(sock_net(sk), 251 UDP6_INC_STATS_USER(sock_net(sk),
252 UDP_MIB_INDATAGRAMS, is_udplite); 252 UDP_MIB_INDATAGRAMS, is_udplite);
253 } 253 }
254 254
255 sock_recv_ts_and_drops(msg, sk, skb); 255 sock_recv_ts_and_drops(msg, sk, skb);
256 256
257 /* Copy the address. */ 257 /* Copy the address. */
258 if (msg->msg_name) { 258 if (msg->msg_name) {
259 struct sockaddr_in6 *sin6; 259 struct sockaddr_in6 *sin6;
260 260
261 sin6 = (struct sockaddr_in6 *) msg->msg_name; 261 sin6 = (struct sockaddr_in6 *) msg->msg_name;
262 sin6->sin6_family = AF_INET6; 262 sin6->sin6_family = AF_INET6;
263 sin6->sin6_port = udp_hdr(skb)->source; 263 sin6->sin6_port = udp_hdr(skb)->source;
264 sin6->sin6_flowinfo = 0; 264 sin6->sin6_flowinfo = 0;
265 sin6->sin6_scope_id = 0; 265 sin6->sin6_scope_id = 0;
266 266
267 if (is_udp4) 267 if (is_udp4)
268 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, 268 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
269 &sin6->sin6_addr); 269 &sin6->sin6_addr);
270 else { 270 else {
271 ipv6_addr_copy(&sin6->sin6_addr, 271 ipv6_addr_copy(&sin6->sin6_addr,
272 &ipv6_hdr(skb)->saddr); 272 &ipv6_hdr(skb)->saddr);
273 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 273 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
274 sin6->sin6_scope_id = IP6CB(skb)->iif; 274 sin6->sin6_scope_id = IP6CB(skb)->iif;
275 } 275 }
276 276
277 } 277 }
278 if (is_udp4) { 278 if (is_udp4) {
279 if (inet->cmsg_flags) 279 if (inet->cmsg_flags)
280 ip_cmsg_recv(msg, skb); 280 ip_cmsg_recv(msg, skb);
281 } else { 281 } else {
282 if (np->rxopt.all) 282 if (np->rxopt.all)
283 datagram_recv_ctl(sk, msg, skb); 283 datagram_recv_ctl(sk, msg, skb);
284 } 284 }
285 285
286 err = copied; 286 err = copied;
287 if (flags & MSG_TRUNC) 287 if (flags & MSG_TRUNC)
288 err = ulen; 288 err = ulen;
289 289
290 out_free: 290 out_free:
291 lock_sock(sk); 291 lock_sock(sk);
292 skb_free_datagram(sk, skb); 292 skb_free_datagram(sk, skb);
293 release_sock(sk); 293 release_sock(sk);
294 out: 294 out:
295 return err; 295 return err;
296 296
297 csum_copy_err: 297 csum_copy_err:
298 lock_sock(sk); 298 lock_sock(sk);
299 if (!skb_kill_datagram(sk, skb, flags)) { 299 if (!skb_kill_datagram(sk, skb, flags)) {
300 if (is_udp4) 300 if (is_udp4)
301 UDP_INC_STATS_USER(sock_net(sk), 301 UDP_INC_STATS_USER(sock_net(sk),
302 UDP_MIB_INERRORS, is_udplite); 302 UDP_MIB_INERRORS, is_udplite);
303 else 303 else
304 UDP6_INC_STATS_USER(sock_net(sk), 304 UDP6_INC_STATS_USER(sock_net(sk),
305 UDP_MIB_INERRORS, is_udplite); 305 UDP_MIB_INERRORS, is_udplite);
306 } 306 }
307 release_sock(sk); 307 release_sock(sk);
308 308
309 if (flags & MSG_DONTWAIT) 309 if (flags & MSG_DONTWAIT)
310 return -EAGAIN; 310 return -EAGAIN;
311 goto try_again; 311 goto try_again;
312 } 312 }
313 313
314 void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 314 void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
315 u8 type, u8 code, int offset, __be32 info, 315 u8 type, u8 code, int offset, __be32 info,
316 struct udp_table *udptable) 316 struct udp_table *udptable)
317 { 317 {
318 struct ipv6_pinfo *np; 318 struct ipv6_pinfo *np;
319 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; 319 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
320 struct in6_addr *saddr = &hdr->saddr; 320 struct in6_addr *saddr = &hdr->saddr;
321 struct in6_addr *daddr = &hdr->daddr; 321 struct in6_addr *daddr = &hdr->daddr;
322 struct udphdr *uh = (struct udphdr*)(skb->data+offset); 322 struct udphdr *uh = (struct udphdr*)(skb->data+offset);
323 struct sock *sk; 323 struct sock *sk;
324 int err; 324 int err;
325 325
326 sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, 326 sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
327 saddr, uh->source, inet6_iif(skb), udptable); 327 saddr, uh->source, inet6_iif(skb), udptable);
328 if (sk == NULL) 328 if (sk == NULL)
329 return; 329 return;
330 330
331 np = inet6_sk(sk); 331 np = inet6_sk(sk);
332 332
333 if (!icmpv6_err_convert(type, code, &err) && !np->recverr) 333 if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
334 goto out; 334 goto out;
335 335
336 if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) 336 if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
337 goto out; 337 goto out;
338 338
339 if (np->recverr) 339 if (np->recverr)
340 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); 340 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
341 341
342 sk->sk_err = err; 342 sk->sk_err = err;
343 sk->sk_error_report(sk); 343 sk->sk_error_report(sk);
344 out: 344 out:
345 sock_put(sk); 345 sock_put(sk);
346 } 346 }
347 347
348 static __inline__ void udpv6_err(struct sk_buff *skb, 348 static __inline__ void udpv6_err(struct sk_buff *skb,
349 struct inet6_skb_parm *opt, u8 type, 349 struct inet6_skb_parm *opt, u8 type,
350 u8 code, int offset, __be32 info ) 350 u8 code, int offset, __be32 info )
351 { 351 {
352 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); 352 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
353 } 353 }
354 354
355 int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 355 int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
356 { 356 {
357 struct udp_sock *up = udp_sk(sk); 357 struct udp_sock *up = udp_sk(sk);
358 int rc; 358 int rc;
359 int is_udplite = IS_UDPLITE(sk); 359 int is_udplite = IS_UDPLITE(sk);
360 360
361 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 361 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
362 goto drop; 362 goto drop;
363 363
364 /* 364 /*
365 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). 365 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
366 */ 366 */
367 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 367 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
368 368
369 if (up->pcrlen == 0) { /* full coverage was set */ 369 if (up->pcrlen == 0) { /* full coverage was set */
370 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" 370 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
371 " %d while full coverage %d requested\n", 371 " %d while full coverage %d requested\n",
372 UDP_SKB_CB(skb)->cscov, skb->len); 372 UDP_SKB_CB(skb)->cscov, skb->len);
373 goto drop; 373 goto drop;
374 } 374 }
375 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { 375 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
376 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " 376 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
377 "too small, need min %d\n", 377 "too small, need min %d\n",
378 UDP_SKB_CB(skb)->cscov, up->pcrlen); 378 UDP_SKB_CB(skb)->cscov, up->pcrlen);
379 goto drop; 379 goto drop;
380 } 380 }
381 } 381 }
382 382
383 if (sk->sk_filter) { 383 if (sk->sk_filter) {
384 if (udp_lib_checksum_complete(skb)) 384 if (udp_lib_checksum_complete(skb))
385 goto drop; 385 goto drop;
386 } 386 }
387 387
388 if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { 388 if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
389 /* Note that an ENOMEM error is charged twice */ 389 /* Note that an ENOMEM error is charged twice */
390 if (rc == -ENOMEM) 390 if (rc == -ENOMEM)
391 UDP6_INC_STATS_BH(sock_net(sk), 391 UDP6_INC_STATS_BH(sock_net(sk),
392 UDP_MIB_RCVBUFERRORS, is_udplite); 392 UDP_MIB_RCVBUFERRORS, is_udplite);
393 goto drop; 393 goto drop_no_sk_drops_inc;
394 } 394 }
395 395
396 return 0; 396 return 0;
397 drop: 397 drop:
398 atomic_inc(&sk->sk_drops);
399 drop_no_sk_drops_inc:
398 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 400 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
399 kfree_skb(skb); 401 kfree_skb(skb);
400 return -1; 402 return -1;
401 } 403 }
402 404
403 static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, 405 static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
404 __be16 loc_port, struct in6_addr *loc_addr, 406 __be16 loc_port, struct in6_addr *loc_addr,
405 __be16 rmt_port, struct in6_addr *rmt_addr, 407 __be16 rmt_port, struct in6_addr *rmt_addr,
406 int dif) 408 int dif)
407 { 409 {
408 struct hlist_nulls_node *node; 410 struct hlist_nulls_node *node;
409 struct sock *s = sk; 411 struct sock *s = sk;
410 unsigned short num = ntohs(loc_port); 412 unsigned short num = ntohs(loc_port);
411 413
412 sk_nulls_for_each_from(s, node) { 414 sk_nulls_for_each_from(s, node) {
413 struct inet_sock *inet = inet_sk(s); 415 struct inet_sock *inet = inet_sk(s);
414 416
415 if (!net_eq(sock_net(s), net)) 417 if (!net_eq(sock_net(s), net))
416 continue; 418 continue;
417 419
418 if (s->sk_hash == num && s->sk_family == PF_INET6) { 420 if (s->sk_hash == num && s->sk_family == PF_INET6) {
419 struct ipv6_pinfo *np = inet6_sk(s); 421 struct ipv6_pinfo *np = inet6_sk(s);
420 if (inet->inet_dport) { 422 if (inet->inet_dport) {
421 if (inet->inet_dport != rmt_port) 423 if (inet->inet_dport != rmt_port)
422 continue; 424 continue;
423 } 425 }
424 if (!ipv6_addr_any(&np->daddr) && 426 if (!ipv6_addr_any(&np->daddr) &&
425 !ipv6_addr_equal(&np->daddr, rmt_addr)) 427 !ipv6_addr_equal(&np->daddr, rmt_addr))
426 continue; 428 continue;
427 429
428 if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) 430 if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
429 continue; 431 continue;
430 432
431 if (!ipv6_addr_any(&np->rcv_saddr)) { 433 if (!ipv6_addr_any(&np->rcv_saddr)) {
432 if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) 434 if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
433 continue; 435 continue;
434 } 436 }
435 if (!inet6_mc_check(s, loc_addr, rmt_addr)) 437 if (!inet6_mc_check(s, loc_addr, rmt_addr))
436 continue; 438 continue;
437 return s; 439 return s;
438 } 440 }
439 } 441 }
440 return NULL; 442 return NULL;
441 } 443 }
442 444
443 /* 445 /*
444 * Note: called only from the BH handler context, 446 * Note: called only from the BH handler context,
445 * so we don't need to lock the hashes. 447 * so we don't need to lock the hashes.
446 */ 448 */
447 static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 449 static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
448 struct in6_addr *saddr, struct in6_addr *daddr, 450 struct in6_addr *saddr, struct in6_addr *daddr,
449 struct udp_table *udptable) 451 struct udp_table *udptable)
450 { 452 {
451 struct sock *sk, *sk2; 453 struct sock *sk, *sk2;
452 const struct udphdr *uh = udp_hdr(skb); 454 const struct udphdr *uh = udp_hdr(skb);
453 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); 455 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
454 int dif; 456 int dif;
455 457
456 spin_lock(&hslot->lock); 458 spin_lock(&hslot->lock);
457 sk = sk_nulls_head(&hslot->head); 459 sk = sk_nulls_head(&hslot->head);
458 dif = inet6_iif(skb); 460 dif = inet6_iif(skb);
459 sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 461 sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
460 if (!sk) { 462 if (!sk) {
461 kfree_skb(skb); 463 kfree_skb(skb);
462 goto out; 464 goto out;
463 } 465 }
464 466
465 sk2 = sk; 467 sk2 = sk;
466 while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, 468 while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr,
467 uh->source, saddr, dif))) { 469 uh->source, saddr, dif))) {
468 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); 470 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
469 if (buff) { 471 if (buff) {
470 bh_lock_sock(sk2); 472 bh_lock_sock(sk2);
471 if (!sock_owned_by_user(sk2)) 473 if (!sock_owned_by_user(sk2))
472 udpv6_queue_rcv_skb(sk2, buff); 474 udpv6_queue_rcv_skb(sk2, buff);
473 else 475 else
474 sk_add_backlog(sk2, buff); 476 sk_add_backlog(sk2, buff);
475 bh_unlock_sock(sk2); 477 bh_unlock_sock(sk2);
476 } 478 }
477 } 479 }
478 bh_lock_sock(sk); 480 bh_lock_sock(sk);
479 if (!sock_owned_by_user(sk)) 481 if (!sock_owned_by_user(sk))
480 udpv6_queue_rcv_skb(sk, skb); 482 udpv6_queue_rcv_skb(sk, skb);
481 else 483 else
482 sk_add_backlog(sk, skb); 484 sk_add_backlog(sk, skb);
483 bh_unlock_sock(sk); 485 bh_unlock_sock(sk);
484 out: 486 out:
485 spin_unlock(&hslot->lock); 487 spin_unlock(&hslot->lock);
486 return 0; 488 return 0;
487 } 489 }
488 490
489 static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, 491 static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
490 int proto) 492 int proto)
491 { 493 {
492 int err; 494 int err;
493 495
494 UDP_SKB_CB(skb)->partial_cov = 0; 496 UDP_SKB_CB(skb)->partial_cov = 0;
495 UDP_SKB_CB(skb)->cscov = skb->len; 497 UDP_SKB_CB(skb)->cscov = skb->len;
496 498
497 if (proto == IPPROTO_UDPLITE) { 499 if (proto == IPPROTO_UDPLITE) {
498 err = udplite_checksum_init(skb, uh); 500 err = udplite_checksum_init(skb, uh);
499 if (err) 501 if (err)
500 return err; 502 return err;
501 } 503 }
502 504
503 if (uh->check == 0) { 505 if (uh->check == 0) {
504 /* RFC 2460 section 8.1 says that we SHOULD log 506 /* RFC 2460 section 8.1 says that we SHOULD log
505 this error. Well, it is reasonable. 507 this error. Well, it is reasonable.
506 */ 508 */
507 LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); 509 LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
508 return 1; 510 return 1;
509 } 511 }
510 if (skb->ip_summed == CHECKSUM_COMPLETE && 512 if (skb->ip_summed == CHECKSUM_COMPLETE &&
511 !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 513 !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
512 skb->len, proto, skb->csum)) 514 skb->len, proto, skb->csum))
513 skb->ip_summed = CHECKSUM_UNNECESSARY; 515 skb->ip_summed = CHECKSUM_UNNECESSARY;
514 516
515 if (!skb_csum_unnecessary(skb)) 517 if (!skb_csum_unnecessary(skb))
516 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 518 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
517 &ipv6_hdr(skb)->daddr, 519 &ipv6_hdr(skb)->daddr,
518 skb->len, proto, 0)); 520 skb->len, proto, 0));
519 521
520 return 0; 522 return 0;
521 } 523 }
522 524
523 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, 525 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
524 int proto) 526 int proto)
525 { 527 {
526 struct sock *sk; 528 struct sock *sk;
527 struct udphdr *uh; 529 struct udphdr *uh;
528 struct net_device *dev = skb->dev; 530 struct net_device *dev = skb->dev;
529 struct in6_addr *saddr, *daddr; 531 struct in6_addr *saddr, *daddr;
530 u32 ulen = 0; 532 u32 ulen = 0;
531 struct net *net = dev_net(skb->dev); 533 struct net *net = dev_net(skb->dev);
532 534
533 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 535 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
534 goto short_packet; 536 goto short_packet;
535 537
536 saddr = &ipv6_hdr(skb)->saddr; 538 saddr = &ipv6_hdr(skb)->saddr;
537 daddr = &ipv6_hdr(skb)->daddr; 539 daddr = &ipv6_hdr(skb)->daddr;
538 uh = udp_hdr(skb); 540 uh = udp_hdr(skb);
539 541
540 ulen = ntohs(uh->len); 542 ulen = ntohs(uh->len);
541 if (ulen > skb->len) 543 if (ulen > skb->len)
542 goto short_packet; 544 goto short_packet;
543 545
544 if (proto == IPPROTO_UDP) { 546 if (proto == IPPROTO_UDP) {
545 /* UDP validates ulen. */ 547 /* UDP validates ulen. */
546 548
547 /* Check for jumbo payload */ 549 /* Check for jumbo payload */
548 if (ulen == 0) 550 if (ulen == 0)
549 ulen = skb->len; 551 ulen = skb->len;
550 552
551 if (ulen < sizeof(*uh)) 553 if (ulen < sizeof(*uh))
552 goto short_packet; 554 goto short_packet;
553 555
554 if (ulen < skb->len) { 556 if (ulen < skb->len) {
555 if (pskb_trim_rcsum(skb, ulen)) 557 if (pskb_trim_rcsum(skb, ulen))
556 goto short_packet; 558 goto short_packet;
557 saddr = &ipv6_hdr(skb)->saddr; 559 saddr = &ipv6_hdr(skb)->saddr;
558 daddr = &ipv6_hdr(skb)->daddr; 560 daddr = &ipv6_hdr(skb)->daddr;
559 uh = udp_hdr(skb); 561 uh = udp_hdr(skb);
560 } 562 }
561 } 563 }
562 564
563 if (udp6_csum_init(skb, uh, proto)) 565 if (udp6_csum_init(skb, uh, proto))
564 goto discard; 566 goto discard;
565 567
566 /* 568 /*
567 * Multicast receive code 569 * Multicast receive code
568 */ 570 */
569 if (ipv6_addr_is_multicast(daddr)) 571 if (ipv6_addr_is_multicast(daddr))
570 return __udp6_lib_mcast_deliver(net, skb, 572 return __udp6_lib_mcast_deliver(net, skb,
571 saddr, daddr, udptable); 573 saddr, daddr, udptable);
572 574
573 /* Unicast */ 575 /* Unicast */
574 576
575 /* 577 /*
576 * check socket cache ... must talk to Alan about his plans 578 * check socket cache ... must talk to Alan about his plans
577 * for sock caches... i'll skip this for now. 579 * for sock caches... i'll skip this for now.
578 */ 580 */
579 sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 581 sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
580 582
581 if (sk == NULL) { 583 if (sk == NULL) {
582 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 584 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
583 goto discard; 585 goto discard;
584 586
585 if (udp_lib_checksum_complete(skb)) 587 if (udp_lib_checksum_complete(skb))
586 goto discard; 588 goto discard;
587 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, 589 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
588 proto == IPPROTO_UDPLITE); 590 proto == IPPROTO_UDPLITE);
589 591
590 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); 592 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
591 593
592 kfree_skb(skb); 594 kfree_skb(skb);
593 return 0; 595 return 0;
594 } 596 }
595 597
596 /* deliver */ 598 /* deliver */
597 599
598 bh_lock_sock(sk); 600 bh_lock_sock(sk);
599 if (!sock_owned_by_user(sk)) 601 if (!sock_owned_by_user(sk))
600 udpv6_queue_rcv_skb(sk, skb); 602 udpv6_queue_rcv_skb(sk, skb);
601 else 603 else
602 sk_add_backlog(sk, skb); 604 sk_add_backlog(sk, skb);
603 bh_unlock_sock(sk); 605 bh_unlock_sock(sk);
604 sock_put(sk); 606 sock_put(sk);
605 return 0; 607 return 0;
606 608
607 short_packet: 609 short_packet:
608 LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", 610 LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
609 proto == IPPROTO_UDPLITE ? "-Lite" : "", 611 proto == IPPROTO_UDPLITE ? "-Lite" : "",
610 ulen, skb->len); 612 ulen, skb->len);
611 613
612 discard: 614 discard:
613 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 615 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
614 kfree_skb(skb); 616 kfree_skb(skb);
615 return 0; 617 return 0;
616 } 618 }
617 619
618 static __inline__ int udpv6_rcv(struct sk_buff *skb) 620 static __inline__ int udpv6_rcv(struct sk_buff *skb)
619 { 621 {
620 return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); 622 return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
621 } 623 }
622 624
623 /* 625 /*
624 * Throw away all pending data and cancel the corking. Socket is locked. 626 * Throw away all pending data and cancel the corking. Socket is locked.
625 */ 627 */
626 static void udp_v6_flush_pending_frames(struct sock *sk) 628 static void udp_v6_flush_pending_frames(struct sock *sk)
627 { 629 {
628 struct udp_sock *up = udp_sk(sk); 630 struct udp_sock *up = udp_sk(sk);
629 631
630 if (up->pending == AF_INET) 632 if (up->pending == AF_INET)
631 udp_flush_pending_frames(sk); 633 udp_flush_pending_frames(sk);
632 else if (up->pending) { 634 else if (up->pending) {
633 up->len = 0; 635 up->len = 0;
634 up->pending = 0; 636 up->pending = 0;
635 ip6_flush_pending_frames(sk); 637 ip6_flush_pending_frames(sk);
636 } 638 }
637 } 639 }
638 640
639 /** 641 /**
640 * udp6_hwcsum_outgoing - handle outgoing HW checksumming 642 * udp6_hwcsum_outgoing - handle outgoing HW checksumming
641 * @sk: socket we are sending on 643 * @sk: socket we are sending on
642 * @skb: sk_buff containing the filled-in UDP header 644 * @skb: sk_buff containing the filled-in UDP header
643 * (checksum field must be zeroed out) 645 * (checksum field must be zeroed out)
644 */ 646 */
645 static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, 647 static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
646 const struct in6_addr *saddr, 648 const struct in6_addr *saddr,
647 const struct in6_addr *daddr, int len) 649 const struct in6_addr *daddr, int len)
648 { 650 {
649 unsigned int offset; 651 unsigned int offset;
650 struct udphdr *uh = udp_hdr(skb); 652 struct udphdr *uh = udp_hdr(skb);
651 __wsum csum = 0; 653 __wsum csum = 0;
652 654
653 if (skb_queue_len(&sk->sk_write_queue) == 1) { 655 if (skb_queue_len(&sk->sk_write_queue) == 1) {
654 /* Only one fragment on the socket. */ 656 /* Only one fragment on the socket. */
655 skb->csum_start = skb_transport_header(skb) - skb->head; 657 skb->csum_start = skb_transport_header(skb) - skb->head;
656 skb->csum_offset = offsetof(struct udphdr, check); 658 skb->csum_offset = offsetof(struct udphdr, check);
657 uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0); 659 uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
658 } else { 660 } else {
659 /* 661 /*
660 * HW-checksum won't work as there are two or more 662 * HW-checksum won't work as there are two or more
661 * fragments on the socket so that all csums of sk_buffs 663 * fragments on the socket so that all csums of sk_buffs
662 * should be together 664 * should be together
663 */ 665 */
664 offset = skb_transport_offset(skb); 666 offset = skb_transport_offset(skb);
665 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 667 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
666 668
667 skb->ip_summed = CHECKSUM_NONE; 669 skb->ip_summed = CHECKSUM_NONE;
668 670
669 skb_queue_walk(&sk->sk_write_queue, skb) { 671 skb_queue_walk(&sk->sk_write_queue, skb) {
670 csum = csum_add(csum, skb->csum); 672 csum = csum_add(csum, skb->csum);
671 } 673 }
672 674
673 uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 675 uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
674 csum); 676 csum);
675 if (uh->check == 0) 677 if (uh->check == 0)
676 uh->check = CSUM_MANGLED_0; 678 uh->check = CSUM_MANGLED_0;
677 } 679 }
678 } 680 }
679 681
680 /* 682 /*
681 * Sending 683 * Sending
682 */ 684 */
683 685
684 static int udp_v6_push_pending_frames(struct sock *sk) 686 static int udp_v6_push_pending_frames(struct sock *sk)
685 { 687 {
686 struct sk_buff *skb; 688 struct sk_buff *skb;
687 struct udphdr *uh; 689 struct udphdr *uh;
688 struct udp_sock *up = udp_sk(sk); 690 struct udp_sock *up = udp_sk(sk);
689 struct inet_sock *inet = inet_sk(sk); 691 struct inet_sock *inet = inet_sk(sk);
690 struct flowi *fl = &inet->cork.fl; 692 struct flowi *fl = &inet->cork.fl;
691 int err = 0; 693 int err = 0;
692 int is_udplite = IS_UDPLITE(sk); 694 int is_udplite = IS_UDPLITE(sk);
693 __wsum csum = 0; 695 __wsum csum = 0;
694 696
695 /* Grab the skbuff where UDP header space exists. */ 697 /* Grab the skbuff where UDP header space exists. */
696 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 698 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
697 goto out; 699 goto out;
698 700
699 /* 701 /*
700 * Create a UDP header 702 * Create a UDP header
701 */ 703 */
702 uh = udp_hdr(skb); 704 uh = udp_hdr(skb);
703 uh->source = fl->fl_ip_sport; 705 uh->source = fl->fl_ip_sport;
704 uh->dest = fl->fl_ip_dport; 706 uh->dest = fl->fl_ip_dport;
705 uh->len = htons(up->len); 707 uh->len = htons(up->len);
706 uh->check = 0; 708 uh->check = 0;
707 709
708 if (is_udplite) 710 if (is_udplite)
709 csum = udplite_csum_outgoing(sk, skb); 711 csum = udplite_csum_outgoing(sk, skb);
710 else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 712 else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
711 udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, 713 udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst,
712 up->len); 714 up->len);
713 goto send; 715 goto send;
714 } else 716 } else
715 csum = udp_csum_outgoing(sk, skb); 717 csum = udp_csum_outgoing(sk, skb);
716 718
717 /* add protocol-dependent pseudo-header */ 719 /* add protocol-dependent pseudo-header */
718 uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, 720 uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst,
719 up->len, fl->proto, csum ); 721 up->len, fl->proto, csum );
720 if (uh->check == 0) 722 if (uh->check == 0)
721 uh->check = CSUM_MANGLED_0; 723 uh->check = CSUM_MANGLED_0;
722 724
723 send: 725 send:
724 err = ip6_push_pending_frames(sk); 726 err = ip6_push_pending_frames(sk);
725 if (err) { 727 if (err) {
726 if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { 728 if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
727 UDP6_INC_STATS_USER(sock_net(sk), 729 UDP6_INC_STATS_USER(sock_net(sk),
728 UDP_MIB_SNDBUFERRORS, is_udplite); 730 UDP_MIB_SNDBUFERRORS, is_udplite);
729 err = 0; 731 err = 0;
730 } 732 }
731 } else 733 } else
732 UDP6_INC_STATS_USER(sock_net(sk), 734 UDP6_INC_STATS_USER(sock_net(sk),
733 UDP_MIB_OUTDATAGRAMS, is_udplite); 735 UDP_MIB_OUTDATAGRAMS, is_udplite);
734 out: 736 out:
735 up->len = 0; 737 up->len = 0;
736 up->pending = 0; 738 up->pending = 0;
737 return err; 739 return err;
738 } 740 }
739 741
740 int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, 742 int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
741 struct msghdr *msg, size_t len) 743 struct msghdr *msg, size_t len)
742 { 744 {
743 struct ipv6_txoptions opt_space; 745 struct ipv6_txoptions opt_space;
744 struct udp_sock *up = udp_sk(sk); 746 struct udp_sock *up = udp_sk(sk);
745 struct inet_sock *inet = inet_sk(sk); 747 struct inet_sock *inet = inet_sk(sk);
746 struct ipv6_pinfo *np = inet6_sk(sk); 748 struct ipv6_pinfo *np = inet6_sk(sk);
747 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; 749 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
748 struct in6_addr *daddr, *final_p = NULL, final; 750 struct in6_addr *daddr, *final_p = NULL, final;
749 struct ipv6_txoptions *opt = NULL; 751 struct ipv6_txoptions *opt = NULL;
750 struct ip6_flowlabel *flowlabel = NULL; 752 struct ip6_flowlabel *flowlabel = NULL;
751 struct flowi fl; 753 struct flowi fl;
752 struct dst_entry *dst; 754 struct dst_entry *dst;
753 int addr_len = msg->msg_namelen; 755 int addr_len = msg->msg_namelen;
754 int ulen = len; 756 int ulen = len;
755 int hlimit = -1; 757 int hlimit = -1;
756 int tclass = -1; 758 int tclass = -1;
757 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 759 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
758 int err; 760 int err;
759 int connected = 0; 761 int connected = 0;
760 int is_udplite = IS_UDPLITE(sk); 762 int is_udplite = IS_UDPLITE(sk);
761 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 763 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
762 764
763 /* destination address check */ 765 /* destination address check */
764 if (sin6) { 766 if (sin6) {
765 if (addr_len < offsetof(struct sockaddr, sa_data)) 767 if (addr_len < offsetof(struct sockaddr, sa_data))
766 return -EINVAL; 768 return -EINVAL;
767 769
768 switch (sin6->sin6_family) { 770 switch (sin6->sin6_family) {
769 case AF_INET6: 771 case AF_INET6:
770 if (addr_len < SIN6_LEN_RFC2133) 772 if (addr_len < SIN6_LEN_RFC2133)
771 return -EINVAL; 773 return -EINVAL;
772 daddr = &sin6->sin6_addr; 774 daddr = &sin6->sin6_addr;
773 break; 775 break;
774 case AF_INET: 776 case AF_INET:
775 goto do_udp_sendmsg; 777 goto do_udp_sendmsg;
776 case AF_UNSPEC: 778 case AF_UNSPEC:
777 msg->msg_name = sin6 = NULL; 779 msg->msg_name = sin6 = NULL;
778 msg->msg_namelen = addr_len = 0; 780 msg->msg_namelen = addr_len = 0;
779 daddr = NULL; 781 daddr = NULL;
780 break; 782 break;
781 default: 783 default:
782 return -EINVAL; 784 return -EINVAL;
783 } 785 }
784 } else if (!up->pending) { 786 } else if (!up->pending) {
785 if (sk->sk_state != TCP_ESTABLISHED) 787 if (sk->sk_state != TCP_ESTABLISHED)
786 return -EDESTADDRREQ; 788 return -EDESTADDRREQ;
787 daddr = &np->daddr; 789 daddr = &np->daddr;
788 } else 790 } else
789 daddr = NULL; 791 daddr = NULL;
790 792
791 if (daddr) { 793 if (daddr) {
792 if (ipv6_addr_v4mapped(daddr)) { 794 if (ipv6_addr_v4mapped(daddr)) {
793 struct sockaddr_in sin; 795 struct sockaddr_in sin;
794 sin.sin_family = AF_INET; 796 sin.sin_family = AF_INET;
795 sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; 797 sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
796 sin.sin_addr.s_addr = daddr->s6_addr32[3]; 798 sin.sin_addr.s_addr = daddr->s6_addr32[3];
797 msg->msg_name = &sin; 799 msg->msg_name = &sin;
798 msg->msg_namelen = sizeof(sin); 800 msg->msg_namelen = sizeof(sin);
799 do_udp_sendmsg: 801 do_udp_sendmsg:
800 if (__ipv6_only_sock(sk)) 802 if (__ipv6_only_sock(sk))
801 return -ENETUNREACH; 803 return -ENETUNREACH;
802 return udp_sendmsg(iocb, sk, msg, len); 804 return udp_sendmsg(iocb, sk, msg, len);
803 } 805 }
804 } 806 }
805 807
806 if (up->pending == AF_INET) 808 if (up->pending == AF_INET)
807 return udp_sendmsg(iocb, sk, msg, len); 809 return udp_sendmsg(iocb, sk, msg, len);
808 810
809 /* Rough check on arithmetic overflow, 811 /* Rough check on arithmetic overflow,
810 better check is made in ip6_append_data(). 812 better check is made in ip6_append_data().
811 */ 813 */
812 if (len > INT_MAX - sizeof(struct udphdr)) 814 if (len > INT_MAX - sizeof(struct udphdr))
813 return -EMSGSIZE; 815 return -EMSGSIZE;
814 816
815 if (up->pending) { 817 if (up->pending) {
816 /* 818 /*
817 * There are pending frames. 819 * There are pending frames.
818 * The socket lock must be held while it's corked. 820 * The socket lock must be held while it's corked.
819 */ 821 */
820 lock_sock(sk); 822 lock_sock(sk);
821 if (likely(up->pending)) { 823 if (likely(up->pending)) {
822 if (unlikely(up->pending != AF_INET6)) { 824 if (unlikely(up->pending != AF_INET6)) {
823 release_sock(sk); 825 release_sock(sk);
824 return -EAFNOSUPPORT; 826 return -EAFNOSUPPORT;
825 } 827 }
826 dst = NULL; 828 dst = NULL;
827 goto do_append_data; 829 goto do_append_data;
828 } 830 }
829 release_sock(sk); 831 release_sock(sk);
830 } 832 }
831 ulen += sizeof(struct udphdr); 833 ulen += sizeof(struct udphdr);
832 834
833 memset(&fl, 0, sizeof(fl)); 835 memset(&fl, 0, sizeof(fl));
834 836
835 if (sin6) { 837 if (sin6) {
836 if (sin6->sin6_port == 0) 838 if (sin6->sin6_port == 0)
837 return -EINVAL; 839 return -EINVAL;
838 840
839 fl.fl_ip_dport = sin6->sin6_port; 841 fl.fl_ip_dport = sin6->sin6_port;
840 daddr = &sin6->sin6_addr; 842 daddr = &sin6->sin6_addr;
841 843
842 if (np->sndflow) { 844 if (np->sndflow) {
843 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 845 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
844 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 846 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
845 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 847 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
846 if (flowlabel == NULL) 848 if (flowlabel == NULL)
847 return -EINVAL; 849 return -EINVAL;
848 daddr = &flowlabel->dst; 850 daddr = &flowlabel->dst;
849 } 851 }
850 } 852 }
851 853
852 /* 854 /*
853 * Otherwise it will be difficult to maintain 855 * Otherwise it will be difficult to maintain
854 * sk->sk_dst_cache. 856 * sk->sk_dst_cache.
855 */ 857 */
856 if (sk->sk_state == TCP_ESTABLISHED && 858 if (sk->sk_state == TCP_ESTABLISHED &&
857 ipv6_addr_equal(daddr, &np->daddr)) 859 ipv6_addr_equal(daddr, &np->daddr))
858 daddr = &np->daddr; 860 daddr = &np->daddr;
859 861
860 if (addr_len >= sizeof(struct sockaddr_in6) && 862 if (addr_len >= sizeof(struct sockaddr_in6) &&
861 sin6->sin6_scope_id && 863 sin6->sin6_scope_id &&
862 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 864 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
863 fl.oif = sin6->sin6_scope_id; 865 fl.oif = sin6->sin6_scope_id;
864 } else { 866 } else {
865 if (sk->sk_state != TCP_ESTABLISHED) 867 if (sk->sk_state != TCP_ESTABLISHED)
866 return -EDESTADDRREQ; 868 return -EDESTADDRREQ;
867 869
868 fl.fl_ip_dport = inet->inet_dport; 870 fl.fl_ip_dport = inet->inet_dport;
869 daddr = &np->daddr; 871 daddr = &np->daddr;
870 fl.fl6_flowlabel = np->flow_label; 872 fl.fl6_flowlabel = np->flow_label;
871 connected = 1; 873 connected = 1;
872 } 874 }
873 875
874 if (!fl.oif) 876 if (!fl.oif)
875 fl.oif = sk->sk_bound_dev_if; 877 fl.oif = sk->sk_bound_dev_if;
876 878
877 if (!fl.oif) 879 if (!fl.oif)
878 fl.oif = np->sticky_pktinfo.ipi6_ifindex; 880 fl.oif = np->sticky_pktinfo.ipi6_ifindex;
879 881
880 fl.mark = sk->sk_mark; 882 fl.mark = sk->sk_mark;
881 883
882 if (msg->msg_controllen) { 884 if (msg->msg_controllen) {
883 opt = &opt_space; 885 opt = &opt_space;
884 memset(opt, 0, sizeof(struct ipv6_txoptions)); 886 memset(opt, 0, sizeof(struct ipv6_txoptions));
885 opt->tot_len = sizeof(*opt); 887 opt->tot_len = sizeof(*opt);
886 888
887 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); 889 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
888 if (err < 0) { 890 if (err < 0) {
889 fl6_sock_release(flowlabel); 891 fl6_sock_release(flowlabel);
890 return err; 892 return err;
891 } 893 }
892 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 894 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
893 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 895 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
894 if (flowlabel == NULL) 896 if (flowlabel == NULL)
895 return -EINVAL; 897 return -EINVAL;
896 } 898 }
897 if (!(opt->opt_nflen|opt->opt_flen)) 899 if (!(opt->opt_nflen|opt->opt_flen))
898 opt = NULL; 900 opt = NULL;
899 connected = 0; 901 connected = 0;
900 } 902 }
901 if (opt == NULL) 903 if (opt == NULL)
902 opt = np->opt; 904 opt = np->opt;
903 if (flowlabel) 905 if (flowlabel)
904 opt = fl6_merge_options(&opt_space, flowlabel, opt); 906 opt = fl6_merge_options(&opt_space, flowlabel, opt);
905 opt = ipv6_fixup_options(&opt_space, opt); 907 opt = ipv6_fixup_options(&opt_space, opt);
906 908
907 fl.proto = sk->sk_protocol; 909 fl.proto = sk->sk_protocol;
908 if (!ipv6_addr_any(daddr)) 910 if (!ipv6_addr_any(daddr))
909 ipv6_addr_copy(&fl.fl6_dst, daddr); 911 ipv6_addr_copy(&fl.fl6_dst, daddr);
910 else 912 else
911 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 913 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
912 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 914 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
913 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 915 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
914 fl.fl_ip_sport = inet->inet_sport; 916 fl.fl_ip_sport = inet->inet_sport;
915 917
916 /* merge ip6_build_xmit from ip6_output */ 918 /* merge ip6_build_xmit from ip6_output */
917 if (opt && opt->srcrt) { 919 if (opt && opt->srcrt) {
918 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 920 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
919 ipv6_addr_copy(&final, &fl.fl6_dst); 921 ipv6_addr_copy(&final, &fl.fl6_dst);
920 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 922 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
921 final_p = &final; 923 final_p = &final;
922 connected = 0; 924 connected = 0;
923 } 925 }
924 926
925 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { 927 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
926 fl.oif = np->mcast_oif; 928 fl.oif = np->mcast_oif;
927 connected = 0; 929 connected = 0;
928 } 930 }
929 931
930 security_sk_classify_flow(sk, &fl); 932 security_sk_classify_flow(sk, &fl);
931 933
932 err = ip6_sk_dst_lookup(sk, &dst, &fl); 934 err = ip6_sk_dst_lookup(sk, &dst, &fl);
933 if (err) 935 if (err)
934 goto out; 936 goto out;
935 if (final_p) 937 if (final_p)
936 ipv6_addr_copy(&fl.fl6_dst, final_p); 938 ipv6_addr_copy(&fl.fl6_dst, final_p);
937 939
938 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); 940 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
939 if (err < 0) { 941 if (err < 0) {
940 if (err == -EREMOTE) 942 if (err == -EREMOTE)
941 err = ip6_dst_blackhole(sk, &dst, &fl); 943 err = ip6_dst_blackhole(sk, &dst, &fl);
942 if (err < 0) 944 if (err < 0)
943 goto out; 945 goto out;
944 } 946 }
945 947
946 if (hlimit < 0) { 948 if (hlimit < 0) {
947 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 949 if (ipv6_addr_is_multicast(&fl.fl6_dst))
948 hlimit = np->mcast_hops; 950 hlimit = np->mcast_hops;
949 else 951 else
950 hlimit = np->hop_limit; 952 hlimit = np->hop_limit;
951 if (hlimit < 0) 953 if (hlimit < 0)
952 hlimit = ip6_dst_hoplimit(dst); 954 hlimit = ip6_dst_hoplimit(dst);
953 } 955 }
954 956
955 if (tclass < 0) 957 if (tclass < 0)
956 tclass = np->tclass; 958 tclass = np->tclass;
957 959
958 if (msg->msg_flags&MSG_CONFIRM) 960 if (msg->msg_flags&MSG_CONFIRM)
959 goto do_confirm; 961 goto do_confirm;
960 back_from_confirm: 962 back_from_confirm:
961 963
962 lock_sock(sk); 964 lock_sock(sk);
963 if (unlikely(up->pending)) { 965 if (unlikely(up->pending)) {
964 /* The socket is already corked while preparing it. */ 966 /* The socket is already corked while preparing it. */
965 /* ... which is an evident application bug. --ANK */ 967 /* ... which is an evident application bug. --ANK */
966 release_sock(sk); 968 release_sock(sk);
967 969
968 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); 970 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
969 err = -EINVAL; 971 err = -EINVAL;
970 goto out; 972 goto out;
971 } 973 }
972 974
973 up->pending = AF_INET6; 975 up->pending = AF_INET6;
974 976
975 do_append_data: 977 do_append_data:
976 up->len += ulen; 978 up->len += ulen;
977 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 979 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
978 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, 980 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
979 sizeof(struct udphdr), hlimit, tclass, opt, &fl, 981 sizeof(struct udphdr), hlimit, tclass, opt, &fl,
980 (struct rt6_info*)dst, 982 (struct rt6_info*)dst,
981 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 983 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
982 if (err) 984 if (err)
983 udp_v6_flush_pending_frames(sk); 985 udp_v6_flush_pending_frames(sk);
984 else if (!corkreq) 986 else if (!corkreq)
985 err = udp_v6_push_pending_frames(sk); 987 err = udp_v6_push_pending_frames(sk);
986 else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) 988 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
987 up->pending = 0; 989 up->pending = 0;
988 990
989 if (dst) { 991 if (dst) {
990 if (connected) { 992 if (connected) {
991 ip6_dst_store(sk, dst, 993 ip6_dst_store(sk, dst,
992 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? 994 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
993 &np->daddr : NULL, 995 &np->daddr : NULL,
994 #ifdef CONFIG_IPV6_SUBTREES 996 #ifdef CONFIG_IPV6_SUBTREES
995 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? 997 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
996 &np->saddr : 998 &np->saddr :
997 #endif 999 #endif
998 NULL); 1000 NULL);
999 } else { 1001 } else {
1000 dst_release(dst); 1002 dst_release(dst);
1001 } 1003 }
1002 dst = NULL; 1004 dst = NULL;
1003 } 1005 }
1004 1006
1005 if (err > 0) 1007 if (err > 0)
1006 err = np->recverr ? net_xmit_errno(err) : 0; 1008 err = np->recverr ? net_xmit_errno(err) : 0;
1007 release_sock(sk); 1009 release_sock(sk);
1008 out: 1010 out:
1009 dst_release(dst); 1011 dst_release(dst);
1010 fl6_sock_release(flowlabel); 1012 fl6_sock_release(flowlabel);
1011 if (!err) 1013 if (!err)
1012 return len; 1014 return len;
1013 /* 1015 /*
1014 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting 1016 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
1015 * ENOBUFS might not be good (it's not tunable per se), but otherwise 1017 * ENOBUFS might not be good (it's not tunable per se), but otherwise
1016 * we don't have a good statistic (IpOutDiscards but it can be too many 1018 * we don't have a good statistic (IpOutDiscards but it can be too many
1017 * things). We could add another new stat but at least for now that 1019 * things). We could add another new stat but at least for now that
1018 * seems like overkill. 1020 * seems like overkill.
1019 */ 1021 */
1020 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 1022 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1021 UDP6_INC_STATS_USER(sock_net(sk), 1023 UDP6_INC_STATS_USER(sock_net(sk),
1022 UDP_MIB_SNDBUFERRORS, is_udplite); 1024 UDP_MIB_SNDBUFERRORS, is_udplite);
1023 } 1025 }
1024 return err; 1026 return err;
1025 1027
1026 do_confirm: 1028 do_confirm:
1027 dst_confirm(dst); 1029 dst_confirm(dst);
1028 if (!(msg->msg_flags&MSG_PROBE) || len) 1030 if (!(msg->msg_flags&MSG_PROBE) || len)
1029 goto back_from_confirm; 1031 goto back_from_confirm;
1030 err = 0; 1032 err = 0;
1031 goto out; 1033 goto out;
1032 } 1034 }
1033 1035
1034 void udpv6_destroy_sock(struct sock *sk) 1036 void udpv6_destroy_sock(struct sock *sk)
1035 { 1037 {
1036 lock_sock(sk); 1038 lock_sock(sk);
1037 udp_v6_flush_pending_frames(sk); 1039 udp_v6_flush_pending_frames(sk);
1038 release_sock(sk); 1040 release_sock(sk);
1039 1041
1040 inet6_destroy_sock(sk); 1042 inet6_destroy_sock(sk);
1041 } 1043 }
1042 1044
1043 /* 1045 /*
1044 * Socket option code for UDP 1046 * Socket option code for UDP
1045 */ 1047 */
1046 int udpv6_setsockopt(struct sock *sk, int level, int optname, 1048 int udpv6_setsockopt(struct sock *sk, int level, int optname,
1047 char __user *optval, unsigned int optlen) 1049 char __user *optval, unsigned int optlen)
1048 { 1050 {
1049 if (level == SOL_UDP || level == SOL_UDPLITE) 1051 if (level == SOL_UDP || level == SOL_UDPLITE)
1050 return udp_lib_setsockopt(sk, level, optname, optval, optlen, 1052 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1051 udp_v6_push_pending_frames); 1053 udp_v6_push_pending_frames);
1052 return ipv6_setsockopt(sk, level, optname, optval, optlen); 1054 return ipv6_setsockopt(sk, level, optname, optval, optlen);
1053 } 1055 }
1054 1056
1055 #ifdef CONFIG_COMPAT 1057 #ifdef CONFIG_COMPAT
1056 int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, 1058 int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
1057 char __user *optval, unsigned int optlen) 1059 char __user *optval, unsigned int optlen)
1058 { 1060 {
1059 if (level == SOL_UDP || level == SOL_UDPLITE) 1061 if (level == SOL_UDP || level == SOL_UDPLITE)
1060 return udp_lib_setsockopt(sk, level, optname, optval, optlen, 1062 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1061 udp_v6_push_pending_frames); 1063 udp_v6_push_pending_frames);
1062 return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); 1064 return compat_ipv6_setsockopt(sk, level, optname, optval, optlen);
1063 } 1065 }
1064 #endif 1066 #endif
1065 1067
1066 int udpv6_getsockopt(struct sock *sk, int level, int optname, 1068 int udpv6_getsockopt(struct sock *sk, int level, int optname,
1067 char __user *optval, int __user *optlen) 1069 char __user *optval, int __user *optlen)
1068 { 1070 {
1069 if (level == SOL_UDP || level == SOL_UDPLITE) 1071 if (level == SOL_UDP || level == SOL_UDPLITE)
1070 return udp_lib_getsockopt(sk, level, optname, optval, optlen); 1072 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1071 return ipv6_getsockopt(sk, level, optname, optval, optlen); 1073 return ipv6_getsockopt(sk, level, optname, optval, optlen);
1072 } 1074 }
1073 1075
1074 #ifdef CONFIG_COMPAT 1076 #ifdef CONFIG_COMPAT
1075 int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, 1077 int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
1076 char __user *optval, int __user *optlen) 1078 char __user *optval, int __user *optlen)
1077 { 1079 {
1078 if (level == SOL_UDP || level == SOL_UDPLITE) 1080 if (level == SOL_UDP || level == SOL_UDPLITE)
1079 return udp_lib_getsockopt(sk, level, optname, optval, optlen); 1081 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1080 return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); 1082 return compat_ipv6_getsockopt(sk, level, optname, optval, optlen);
1081 } 1083 }
1082 #endif 1084 #endif
1083 1085
1084 static int udp6_ufo_send_check(struct sk_buff *skb) 1086 static int udp6_ufo_send_check(struct sk_buff *skb)
1085 { 1087 {
1086 struct ipv6hdr *ipv6h; 1088 struct ipv6hdr *ipv6h;
1087 struct udphdr *uh; 1089 struct udphdr *uh;
1088 1090
1089 if (!pskb_may_pull(skb, sizeof(*uh))) 1091 if (!pskb_may_pull(skb, sizeof(*uh)))
1090 return -EINVAL; 1092 return -EINVAL;
1091 1093
1092 ipv6h = ipv6_hdr(skb); 1094 ipv6h = ipv6_hdr(skb);
1093 uh = udp_hdr(skb); 1095 uh = udp_hdr(skb);
1094 1096
1095 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, 1097 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
1096 IPPROTO_UDP, 0); 1098 IPPROTO_UDP, 0);
1097 skb->csum_start = skb_transport_header(skb) - skb->head; 1099 skb->csum_start = skb_transport_header(skb) - skb->head;
1098 skb->csum_offset = offsetof(struct udphdr, check); 1100 skb->csum_offset = offsetof(struct udphdr, check);
1099 skb->ip_summed = CHECKSUM_PARTIAL; 1101 skb->ip_summed = CHECKSUM_PARTIAL;
1100 return 0; 1102 return 0;
1101 } 1103 }
1102 1104
1103 static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) 1105 static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features)
1104 { 1106 {
1105 struct sk_buff *segs = ERR_PTR(-EINVAL); 1107 struct sk_buff *segs = ERR_PTR(-EINVAL);
1106 unsigned int mss; 1108 unsigned int mss;
1107 unsigned int unfrag_ip6hlen, unfrag_len; 1109 unsigned int unfrag_ip6hlen, unfrag_len;
1108 struct frag_hdr *fptr; 1110 struct frag_hdr *fptr;
1109 u8 *mac_start, *prevhdr; 1111 u8 *mac_start, *prevhdr;
1110 u8 nexthdr; 1112 u8 nexthdr;
1111 u8 frag_hdr_sz = sizeof(struct frag_hdr); 1113 u8 frag_hdr_sz = sizeof(struct frag_hdr);
1112 int offset; 1114 int offset;
1113 __wsum csum; 1115 __wsum csum;
1114 1116
1115 mss = skb_shinfo(skb)->gso_size; 1117 mss = skb_shinfo(skb)->gso_size;
1116 if (unlikely(skb->len <= mss)) 1118 if (unlikely(skb->len <= mss))
1117 goto out; 1119 goto out;
1118 1120
1119 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { 1121 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
1120 /* Packet is from an untrusted source, reset gso_segs. */ 1122 /* Packet is from an untrusted source, reset gso_segs. */
1121 int type = skb_shinfo(skb)->gso_type; 1123 int type = skb_shinfo(skb)->gso_type;
1122 1124
1123 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || 1125 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
1124 !(type & (SKB_GSO_UDP)))) 1126 !(type & (SKB_GSO_UDP))))
1125 goto out; 1127 goto out;
1126 1128
1127 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); 1129 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
1128 1130
1129 segs = NULL; 1131 segs = NULL;
1130 goto out; 1132 goto out;
1131 } 1133 }
1132 1134
1133 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 1135 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
1134 * do checksum of UDP packets sent as multiple IP fragments. 1136 * do checksum of UDP packets sent as multiple IP fragments.
1135 */ 1137 */
1136 offset = skb->csum_start - skb_headroom(skb); 1138 offset = skb->csum_start - skb_headroom(skb);
1137 csum = skb_checksum(skb, offset, skb->len- offset, 0); 1139 csum = skb_checksum(skb, offset, skb->len- offset, 0);
1138 offset += skb->csum_offset; 1140 offset += skb->csum_offset;
1139 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 1141 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1140 skb->ip_summed = CHECKSUM_NONE; 1142 skb->ip_summed = CHECKSUM_NONE;
1141 1143
1142 /* Check if there is enough headroom to insert fragment header. */ 1144 /* Check if there is enough headroom to insert fragment header. */
1143 if ((skb_headroom(skb) < frag_hdr_sz) && 1145 if ((skb_headroom(skb) < frag_hdr_sz) &&
1144 pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) 1146 pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
1145 goto out; 1147 goto out;
1146 1148
1147 /* Find the unfragmentable header and shift it left by frag_hdr_sz 1149 /* Find the unfragmentable header and shift it left by frag_hdr_sz
1148 * bytes to insert fragment header. 1150 * bytes to insert fragment header.
1149 */ 1151 */
1150 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); 1152 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
1151 nexthdr = *prevhdr; 1153 nexthdr = *prevhdr;
1152 *prevhdr = NEXTHDR_FRAGMENT; 1154 *prevhdr = NEXTHDR_FRAGMENT;
1153 unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + 1155 unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
1154 unfrag_ip6hlen; 1156 unfrag_ip6hlen;
1155 mac_start = skb_mac_header(skb); 1157 mac_start = skb_mac_header(skb);
1156 memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); 1158 memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
1157 1159
1158 skb->mac_header -= frag_hdr_sz; 1160 skb->mac_header -= frag_hdr_sz;
1159 skb->network_header -= frag_hdr_sz; 1161 skb->network_header -= frag_hdr_sz;
1160 1162
1161 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); 1163 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
1162 fptr->nexthdr = nexthdr; 1164 fptr->nexthdr = nexthdr;
1163 fptr->reserved = 0; 1165 fptr->reserved = 0;
1164 ipv6_select_ident(fptr); 1166 ipv6_select_ident(fptr);
1165 1167
1166 /* Fragment the skb. ipv6 header and the remaining fields of the 1168 /* Fragment the skb. ipv6 header and the remaining fields of the
1167 * fragment header are updated in ipv6_gso_segment() 1169 * fragment header are updated in ipv6_gso_segment()
1168 */ 1170 */
1169 segs = skb_segment(skb, features); 1171 segs = skb_segment(skb, features);
1170 1172
1171 out: 1173 out:
1172 return segs; 1174 return segs;
1173 } 1175 }
1174 1176
1175 static const struct inet6_protocol udpv6_protocol = { 1177 static const struct inet6_protocol udpv6_protocol = {
1176 .handler = udpv6_rcv, 1178 .handler = udpv6_rcv,
1177 .err_handler = udpv6_err, 1179 .err_handler = udpv6_err,
1178 .gso_send_check = udp6_ufo_send_check, 1180 .gso_send_check = udp6_ufo_send_check,
1179 .gso_segment = udp6_ufo_fragment, 1181 .gso_segment = udp6_ufo_fragment,
1180 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 1182 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1181 }; 1183 };
1182 1184
1183 /* ------------------------------------------------------------------------ */ 1185 /* ------------------------------------------------------------------------ */
1184 #ifdef CONFIG_PROC_FS 1186 #ifdef CONFIG_PROC_FS
1185 1187
1186 static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) 1188 static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
1187 { 1189 {
1188 struct inet_sock *inet = inet_sk(sp); 1190 struct inet_sock *inet = inet_sk(sp);
1189 struct ipv6_pinfo *np = inet6_sk(sp); 1191 struct ipv6_pinfo *np = inet6_sk(sp);
1190 struct in6_addr *dest, *src; 1192 struct in6_addr *dest, *src;
1191 __u16 destp, srcp; 1193 __u16 destp, srcp;
1192 1194
1193 dest = &np->daddr; 1195 dest = &np->daddr;
1194 src = &np->rcv_saddr; 1196 src = &np->rcv_saddr;
1195 destp = ntohs(inet->inet_dport); 1197 destp = ntohs(inet->inet_dport);
1196 srcp = ntohs(inet->inet_sport); 1198 srcp = ntohs(inet->inet_sport);
1197 seq_printf(seq, 1199 seq_printf(seq,
1198 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1200 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1199 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 1201 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1200 bucket, 1202 bucket,
1201 src->s6_addr32[0], src->s6_addr32[1], 1203 src->s6_addr32[0], src->s6_addr32[1],
1202 src->s6_addr32[2], src->s6_addr32[3], srcp, 1204 src->s6_addr32[2], src->s6_addr32[3], srcp,
1203 dest->s6_addr32[0], dest->s6_addr32[1], 1205 dest->s6_addr32[0], dest->s6_addr32[1],
1204 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1206 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1205 sp->sk_state, 1207 sp->sk_state,
1206 sk_wmem_alloc_get(sp), 1208 sk_wmem_alloc_get(sp),
1207 sk_rmem_alloc_get(sp), 1209 sk_rmem_alloc_get(sp),
1208 0, 0L, 0, 1210 0, 0L, 0,
1209 sock_i_uid(sp), 0, 1211 sock_i_uid(sp), 0,
1210 sock_i_ino(sp), 1212 sock_i_ino(sp),
1211 atomic_read(&sp->sk_refcnt), sp, 1213 atomic_read(&sp->sk_refcnt), sp,
1212 atomic_read(&sp->sk_drops)); 1214 atomic_read(&sp->sk_drops));
1213 } 1215 }
1214 1216
1215 int udp6_seq_show(struct seq_file *seq, void *v) 1217 int udp6_seq_show(struct seq_file *seq, void *v)
1216 { 1218 {
1217 if (v == SEQ_START_TOKEN) 1219 if (v == SEQ_START_TOKEN)
1218 seq_printf(seq, 1220 seq_printf(seq,
1219 " sl " 1221 " sl "
1220 "local_address " 1222 "local_address "
1221 "remote_address " 1223 "remote_address "
1222 "st tx_queue rx_queue tr tm->when retrnsmt" 1224 "st tx_queue rx_queue tr tm->when retrnsmt"
1223 " uid timeout inode ref pointer drops\n"); 1225 " uid timeout inode ref pointer drops\n");
1224 else 1226 else
1225 udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); 1227 udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
1226 return 0; 1228 return 0;
1227 } 1229 }
1228 1230
1229 static struct udp_seq_afinfo udp6_seq_afinfo = { 1231 static struct udp_seq_afinfo udp6_seq_afinfo = {
1230 .name = "udp6", 1232 .name = "udp6",
1231 .family = AF_INET6, 1233 .family = AF_INET6,
1232 .udp_table = &udp_table, 1234 .udp_table = &udp_table,
1233 .seq_fops = { 1235 .seq_fops = {
1234 .owner = THIS_MODULE, 1236 .owner = THIS_MODULE,
1235 }, 1237 },
1236 .seq_ops = { 1238 .seq_ops = {
1237 .show = udp6_seq_show, 1239 .show = udp6_seq_show,
1238 }, 1240 },
1239 }; 1241 };
1240 1242
1241 int udp6_proc_init(struct net *net) 1243 int udp6_proc_init(struct net *net)
1242 { 1244 {
1243 return udp_proc_register(net, &udp6_seq_afinfo); 1245 return udp_proc_register(net, &udp6_seq_afinfo);
1244 } 1246 }
1245 1247
1246 void udp6_proc_exit(struct net *net) { 1248 void udp6_proc_exit(struct net *net) {
1247 udp_proc_unregister(net, &udp6_seq_afinfo); 1249 udp_proc_unregister(net, &udp6_seq_afinfo);
1248 } 1250 }
1249 #endif /* CONFIG_PROC_FS */ 1251 #endif /* CONFIG_PROC_FS */
1250 1252
1251 /* ------------------------------------------------------------------------ */ 1253 /* ------------------------------------------------------------------------ */
1252 1254
1253 struct proto udpv6_prot = { 1255 struct proto udpv6_prot = {
1254 .name = "UDPv6", 1256 .name = "UDPv6",
1255 .owner = THIS_MODULE, 1257 .owner = THIS_MODULE,
1256 .close = udp_lib_close, 1258 .close = udp_lib_close,
1257 .connect = ip6_datagram_connect, 1259 .connect = ip6_datagram_connect,
1258 .disconnect = udp_disconnect, 1260 .disconnect = udp_disconnect,
1259 .ioctl = udp_ioctl, 1261 .ioctl = udp_ioctl,
1260 .destroy = udpv6_destroy_sock, 1262 .destroy = udpv6_destroy_sock,
1261 .setsockopt = udpv6_setsockopt, 1263 .setsockopt = udpv6_setsockopt,
1262 .getsockopt = udpv6_getsockopt, 1264 .getsockopt = udpv6_getsockopt,
1263 .sendmsg = udpv6_sendmsg, 1265 .sendmsg = udpv6_sendmsg,
1264 .recvmsg = udpv6_recvmsg, 1266 .recvmsg = udpv6_recvmsg,
1265 .backlog_rcv = udpv6_queue_rcv_skb, 1267 .backlog_rcv = udpv6_queue_rcv_skb,
1266 .hash = udp_lib_hash, 1268 .hash = udp_lib_hash,
1267 .unhash = udp_lib_unhash, 1269 .unhash = udp_lib_unhash,
1268 .get_port = udp_v6_get_port, 1270 .get_port = udp_v6_get_port,
1269 .memory_allocated = &udp_memory_allocated, 1271 .memory_allocated = &udp_memory_allocated,
1270 .sysctl_mem = sysctl_udp_mem, 1272 .sysctl_mem = sysctl_udp_mem,
1271 .sysctl_wmem = &sysctl_udp_wmem_min, 1273 .sysctl_wmem = &sysctl_udp_wmem_min,
1272 .sysctl_rmem = &sysctl_udp_rmem_min, 1274 .sysctl_rmem = &sysctl_udp_rmem_min,
1273 .obj_size = sizeof(struct udp6_sock), 1275 .obj_size = sizeof(struct udp6_sock),
1274 .slab_flags = SLAB_DESTROY_BY_RCU, 1276 .slab_flags = SLAB_DESTROY_BY_RCU,
1275 .h.udp_table = &udp_table, 1277 .h.udp_table = &udp_table,
1276 #ifdef CONFIG_COMPAT 1278 #ifdef CONFIG_COMPAT
1277 .compat_setsockopt = compat_udpv6_setsockopt, 1279 .compat_setsockopt = compat_udpv6_setsockopt,
1278 .compat_getsockopt = compat_udpv6_getsockopt, 1280 .compat_getsockopt = compat_udpv6_getsockopt,
1279 #endif 1281 #endif
1280 }; 1282 };
1281 1283
1282 static struct inet_protosw udpv6_protosw = { 1284 static struct inet_protosw udpv6_protosw = {
1283 .type = SOCK_DGRAM, 1285 .type = SOCK_DGRAM,
1284 .protocol = IPPROTO_UDP, 1286 .protocol = IPPROTO_UDP,
1285 .prot = &udpv6_prot, 1287 .prot = &udpv6_prot,
1286 .ops = &inet6_dgram_ops, 1288 .ops = &inet6_dgram_ops,
1287 .capability =-1, 1289 .capability =-1,
1288 .no_check = UDP_CSUM_DEFAULT, 1290 .no_check = UDP_CSUM_DEFAULT,
1289 .flags = INET_PROTOSW_PERMANENT, 1291 .flags = INET_PROTOSW_PERMANENT,
1290 }; 1292 };
1291 1293
1292 1294
1293 int __init udpv6_init(void) 1295 int __init udpv6_init(void)
1294 { 1296 {
1295 int ret; 1297 int ret;
1296 1298
1297 ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); 1299 ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
1298 if (ret) 1300 if (ret)
1299 goto out; 1301 goto out;
1300 1302
1301 ret = inet6_register_protosw(&udpv6_protosw); 1303 ret = inet6_register_protosw(&udpv6_protosw);
1302 if (ret) 1304 if (ret)
1303 goto out_udpv6_protocol; 1305 goto out_udpv6_protocol;
1304 out: 1306 out:
1305 return ret; 1307 return ret;
1306 1308
1307 out_udpv6_protocol: 1309 out_udpv6_protocol:
1308 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); 1310 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1309 goto out; 1311 goto out;
1310 } 1312 }
1311 1313
1312 void udpv6_exit(void) 1314 void udpv6_exit(void)
1313 { 1315 {
1314 inet6_unregister_protosw(&udpv6_protosw); 1316 inet6_unregister_protosw(&udpv6_protosw);
1315 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); 1317 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1316 } 1318 }
1317 1319